2016-12-29 07:15:17 +00:00
|
|
|
.\" Copyright (c) 2016, IBM Corporation.
|
|
|
|
.\" Written by Mike Rapoport <rppt@linux.vnet.ibm.com>
|
2017-01-04 22:29:03 +00:00
|
|
|
.\" and Copyright (C) 2017 Michael Kerrisk <mtk.manpages@gmail.com>
|
2016-12-29 07:15:17 +00:00
|
|
|
.\"
|
|
|
|
.\" %%%LICENSE_START(VERBATIM)
|
|
|
|
.\" Permission is granted to make and distribute verbatim copies of this
|
|
|
|
.\" manual provided the copyright notice and this permission notice are
|
|
|
|
.\" preserved on all copies.
|
|
|
|
.\"
|
|
|
|
.\" Permission is granted to copy and distribute modified versions of this
|
|
|
|
.\" manual under the conditions for verbatim copying, provided that the
|
|
|
|
.\" entire resulting derived work is distributed under the terms of a
|
|
|
|
.\" permission notice identical to this one.
|
|
|
|
.\"
|
|
|
|
.\" Since the Linux kernel and libraries are constantly changing, this
|
|
|
|
.\" manual page may be incorrect or out-of-date. The author(s) assume no
|
|
|
|
.\" responsibility for errors or omissions, or for damages resulting from
|
|
|
|
.\" the use of the information contained herein. The author(s) may not
|
|
|
|
.\" have taken the same level of care in the production of this manual,
|
|
|
|
.\" which is licensed free of charge, as they might when working
|
|
|
|
.\" professionally.
|
|
|
|
.\"
|
|
|
|
.\" Formatted or processed versions of this manual, if unaccompanied by
|
|
|
|
.\" the source, must acknowledge the copyright and authors of this work.
|
|
|
|
.\" %%%LICENSE_END
|
|
|
|
.\"
|
iconv.1, ldd.1, locale.1, localedef.1, memusage.1, memusagestat.1, mtrace.1, pldd.1, sprof.1, time.1, _syscall.2, add_key.2, alloc_hugepages.2, arch_prctl.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chown.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, fanotify_init.2, fcntl.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_console.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_getfsmap.2, ioctl_iflags.2, ioctl_list.2, ioctl_ns.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, ipc.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readlink.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, signal.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscall.2, syscalls.2, sysctl.2, sysfs.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, uname.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2, CPU_SET.3, INFINITY.3, __ppc_get_timebase.3, __ppc_set_ppr_med.3, __ppc_yield.3, __setfpucw.3, acos.3, acosh.3, adjtime.3, aio_fsync.3, aio_init.3, aio_read.3, aio_return.3, aio_suspend.3, aio_write.3, alloca.3, argz_add.3, asin.3, asinh.3, asprintf.3, assert.3, assert_perror.3, atan.3, atan2.3, atanh.3, atexit.3, backtrace.3, basename.3, bindresvport.3, bsd_signal.3, bsearch.3, bswap.3, btree.3, byteorder.3, bzero.3, canonicalize_file_name.3, carg.3, cbrt.3, ccos.3, ccosh.3, ceil.3, cexp.3, cfree.3, clearenv.3, clock.3, clock_getcpuclockid.3, clog.3, clog10.3, clog2.3, cmsg.3, confstr.3, copysign.3, cos.3, cosh.3, crypt.3, csin.3, csinh.3, csqrt.3, ctan.3, ctanh.3, ctime.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlerror.3, dlinfo.3, dlopen.3, dlsym.3, drand48.3, drand48_r.3, duplocale.3, encrypt.3, end.3, endian.3, envz_add.3, erf.3, erfc.3, err.3, errno.3, error.3, ether_aton.3, euidaccess.3, exec.3, exit.3, exp.3, exp10.3, exp2.3, expm1.3, fabs.3, fcloseall.3, fdim.3, fenv.3, ferror.3, fexecve.3, fflush.3, ffs.3, fgetc.3, fgetgrent.3, fgetpwent.3, finite.3, floor.3, fma.3, fmax.3, fmemopen.3, fmin.3, fmod.3, fmtmsg.3, fopen.3, fopencookie.3, fpclassify.3, fpurge.3, fputwc.3, fputws.3, frexp.3, fseek.3, fseeko.3, ftime.3, fts.3, ftw.3, futimes.3, gamma.3, gcvt.3, get_nprocs_conf.3, get_phys_pages.3, getaddrinfo.3, getaddrinfo_a.3, getauxval.3, getcontext.3, getcwd.3, getdate.3, getentropy.3, getenv.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, gethostid.3, getifaddrs.3, getipnodebyname.3, getline.3, getlogin.3, getmntent.3, getnameinfo.3, getnetent.3, getnetent_r.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getrpcent_r.3, getrpcport.3, gets.3, getservent.3, getservent_r.3, getspnam.3, getsubopt.3, getttyent.3, getumask.3, getutent.3, getwchar.3, glob.3, gnu_get_libc_version.3, grantpt.3, gsignal.3, hash.3, hsearch.3, hypot.3, iconv.3, iconv_close.3, iconv_open.3, if_nameindex.3, if_nametoindex.3, ilogb.3, inet.3, inet_net_pton.3, inet_ntop.3, inet_pton.3, initgroups.3, insque.3, intro.3, isalpha.3, isgreater.3, j0.3, key_setsecret.3, killpg.3, ldexp.3, lgamma.3, lio_listio.3, lockf.3, log.3, log10.3, log1p.3, log2.3, logb.3, login.3, lrint.3, lround.3, lsearch.3, lseek64.3, makecontext.3, makedev.3, mallinfo.3, malloc.3, malloc_get_state.3, malloc_info.3, malloc_stats.3, malloc_trim.3, malloc_usable_size.3, mallopt.3, matherr.3, mbsnrtowcs.3, mbsrtowcs.3, mbstowcs.3, mcheck.3, memccpy.3, memchr.3, memcmp.3, memcpy.3, mkfifo.3, mkstemp.3, mktemp.3, modf.3, mpool.3, mq_close.3, mq_getattr.3, mq_notify.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, nextafter.3, nextup.3, nl_langinfo.3, ntp_gettime.3, offsetof.3, on_exit.3, open_memstream.3, opendir.3, openpty.3, perror.3, popen.3, posix_fallocate.3, posix_madvise.3, posix_memalign.3, posix_openpt.3, posix_spawn.3, pow.3, pow10.3, printf.3, profil.3, program_invocation_name.3, psignal.3, pthread_atfork.3, pthread_attr_init.3, pthread_attr_setaffinity_np.3, pthread_attr_setdetachstate.3, pthread_attr_setguardsize.3, pthread_attr_setinheritsched.3, pthread_attr_setschedparam.3, pthread_attr_setschedpolicy.3, pthread_attr_setscope.3, pthread_attr_setstack.3, pthread_attr_setstackaddr.3, pthread_attr_setstacksize.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_detach.3, pthread_exit.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_join.3, pthread_kill.3, pthread_kill_other_threads_np.3, pthread_self.3, pthread_setaffinity_np.3, pthread_setcancelstate.3, pthread_setconcurrency.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_sigqueue.3, pthread_testcancel.3, pthread_tryjoin_np.3, ptsname.3, putgrent.3, putpwent.3, qsort.3, random.3, random_r.3, rcmd.3, re_comp.3, readdir.3, realpath.3, recno.3, regex.3, remainder.3, remove.3, remquo.3, resolver.3, rexec.3, rint.3, round.3, rpc.3, rpmatch.3, rtime.3, scalb.3, scalbln.3, scandir.3, scanf.3, sched_getcpu.3, sem_close.3, sem_destroy.3, sem_getvalue.3, sem_init.3, sem_open.3, sem_wait.3, setaliasent.3, setbuf.3, setenv.3, setlocale.3, setlogmask.3, setnetgrent.3, shm_open.3, signbit.3, significand.3, sigpause.3, sigqueue.3, sigset.3, sigvec.3, sin.3, sincos.3, sinh.3, sleep.3, sockatmark.3, sqrt.3, statvfs.3, stdarg.3, stdin.3, strcasecmp.3, strcat.3, strchr.3, strcoll.3, strcpy.3, strdup.3, strerror.3, strfmon.3, strfromd.3, strftime.3, strptime.3, strsignal.3, strstr.3, strtod.3, strtok.3, strtol.3, strtoul.3, strverscmp.3, syslog.3, system.3, sysv_signal.3, tan.3, tanh.3, telldir.3, tempnam.3, termios.3, tgamma.3, timeradd.3, tmpnam.3, toupper.3, towlower.3, towupper.3, trunc.3, ttyslot.3, tzset.3, ualarm.3, ulimit.3, undocumented.3, unlocked_stdio.3, updwtmp.3, uselocale.3, usleep.3, wcrtomb.3, wcsdup.3, wcsnrtombs.3, wcsrtombs.3, wcstombs.3, wctob.3, wcwidth.3, wordexp.3, wprintf.3, xcrypt.3, xdr.3, y0.3, cciss.4, console_codes.4, dsp56k.4, fuse.4, hd.4, hpsa.4, initrd.4, intro.4, loop.4, random.4, rtc.4, sd.4, sk98lin.4, st.4, wavelan.4, acct.5, core.5, elf.5, filesystems.5, host.conf.5, hosts.5, locale.5, nologin.5, proc.5, resolv.conf.5, rpc.5, slabinfo.5, utmp.5, aio.7, arp.7, bootparam.7, capabilities.7, cgroup_namespaces.7, cgroups.7, charsets.7, cpuset.7, ddp.7, environ.7, epoll.7, fanotify.7, feature_test_macros.7, futex.7, inode.7, inotify.7, ip.7, ipv6.7, keyrings.7, locale.7, man-pages.7, man.7, math_error.7, mount_namespaces.7, mq_overview.7, namespaces.7, netdevice.7, netlink.7, packet.7, pipe.7, pkeys.7, pthreads.7, pty.7, raw.7, rtld-audit.7, rtnetlink.7, sched.7, session-keyring.7, signal.7, sock_diag.7, socket.7, spufs.7, suffixes.7, tcp.7, udp.7, udplite.7, unicode.7, units.7, unix.7, uri.7, user_namespaces.7, vdso.7, x25.7, xattr.7, iconvconfig.8, ld.so.8, ldconfig.8, sln.8: Update timestamps
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-09-15 10:44:56 +00:00
|
|
|
.TH USERFAULTFD 2 2017-09-15 "Linux" "Linux Programmer's Manual"
|
2016-12-29 07:15:17 +00:00
|
|
|
.SH NAME
|
2017-01-04 22:44:30 +00:00
|
|
|
userfaultfd \- create a file descriptor for handling page faults in user space
|
2016-12-29 07:15:17 +00:00
|
|
|
.SH SYNOPSIS
|
|
|
|
.nf
|
|
|
|
.B #include <sys/types.h>
|
2017-01-06 03:55:33 +00:00
|
|
|
.B #include <linux/userfaultfd.h>
|
_exit.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, close.2, connect.2, create_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, exit_group.2, fanotify_mark.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpid.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, getxattr.2, idle.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, ioctl.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_getfsmap.2, ioctl_tty.2, ioperm.2, iopl.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, mount.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, pause.2, pciconfig_read.2, perf_event_open.2, perfmonctl.2, personality.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, remap_file_pages.2, removexattr.2, rename.2, request_key.2, rmdir.2, rt_sigqueueinfo.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, set_mempolicy.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, setup.2, setxattr.2, sgetmask.2, shmctl.2, shmget.2, shutdown.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, stat.2, statfs.2, stime.2, swapon.2, symlink.2, sync.2, sysctl.2, sysinfo.2, syslog.2, time.2, timer_create.2, timer_delete.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, uname.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vfork.2, vhangup.2, vm86.2, wait.2, wait4.2, write.2, CPU_SET.3, INFINITY.3, __ppc_get_timebase.3, __ppc_set_ppr_med.3, __ppc_yield.3, __setfpucw.3, a64l.3, abort.3, abs.3, acos.3, acosh.3, addseverity.3, adjtime.3, aio_cancel.3, aio_error.3, aio_fsync.3, aio_init.3, aio_read.3, aio_return.3, aio_suspend.3, aio_write.3, alloca.3, argz_add.3, asin.3, asinh.3, asprintf.3, assert.3, assert_perror.3, atan.3, atan2.3, atanh.3, atexit.3, atof.3, atoi.3, basename.3, bcmp.3, bcopy.3, bsd_signal.3, bsearch.3, bstring.3, btowc.3, byteorder.3, cabs.3, cacos.3, cacosh.3, canonicalize_file_name.3, carg.3, casin.3, casinh.3, catan.3, catanh.3, catopen.3, cbrt.3, ccos.3, ccosh.3, ceil.3, cexp.3, cexp2.3, cfree.3, cimag.3, clearenv.3, clock.3, clock_getcpuclockid.3, clog.3, clog10.3, clog2.3, closedir.3, cmsg.3, confstr.3, conj.3, copysign.3, cos.3, cosh.3, cpow.3, cproj.3, creal.3, crypt.3, csin.3, csinh.3, csqrt.3, ctan.3, ctanh.3, ctermid.3, ctime.3, daemon.3, difftime.3, dirfd.3, div.3, dlerror.3, dlopen.3, dlsym.3, drand48.3, drand48_r.3, duplocale.3, dysize.3, ecvt.3, ecvt_r.3, encrypt.3, endian.3, erf.3, erfc.3, err.3, errno.3, ether_aton.3, euidaccess.3, exec.3, exit.3, exp.3, exp10.3, exp2.3, expm1.3, fabs.3, fclose.3, fcloseall.3, fdim.3, fenv.3, ferror.3, fexecve.3, fflush.3, ffs.3, fgetc.3, fgetgrent.3, fgetpwent.3, fgetwc.3, fgetws.3, finite.3, flockfile.3, floor.3, fma.3, fmax.3, fmemopen.3, fmin.3, fmod.3, fmtmsg.3, fnmatch.3, fopen.3, fpathconf.3, fpclassify.3, fpurge.3, fputwc.3, fputws.3, fread.3, frexp.3, fseek.3, fseeko.3, ftime.3, ftok.3, fts.3, ftw.3, futimes.3, fwide.3, gamma.3, gcvt.3, get_nprocs_conf.3, get_phys_pages.3, getaddrinfo.3, getaddrinfo_a.3, getauxval.3, getcontext.3, getcwd.3, getdate.3, getdirentries.3, getdtablesize.3, getentropy.3, getenv.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, gethostid.3, getifaddrs.3, getipnodebyname.3, getline.3, getloadavg.3, getlogin.3, getmntent.3, getnameinfo.3, getnetent.3, getnetent_r.3, getopt.3, getpass.3, getprotoent.3, getprotoent_r.3, getpt.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent_r.3, getrpcport.3, gets.3, getservent.3, getservent_r.3, getspnam.3, getsubopt.3, getttyent.3, getumask.3, getusershell.3, getutent.3, getw.3, getwchar.3, glob.3, grantpt.3, group_member.3, gsignal.3, hsearch.3, hypot.3, iconv.3, iconv_close.3, iconv_open.3, if_nameindex.3, if_nametoindex.3, ilogb.3, index.3, inet.3, inet_net_pton.3, inet_ntop.3, infnan.3, initgroups.3, insque.3, isalpha.3, isatty.3, isfdtype.3, isgreater.3, iswalnum.3, iswalpha.3, iswblank.3, iswcntrl.3, iswctype.3, iswdigit.3, iswgraph.3, iswlower.3, iswprint.3, iswpunct.3, iswspace.3, iswupper.3, iswxdigit.3, j0.3, key_setsecret.3, killpg.3, ldexp.3, lgamma.3, localeconv.3, lockf.3, log.3, log10.3, log1p.3, log2.3, logb.3, login.3, lrint.3, lround.3, lsearch.3, lseek64.3, makecontext.3, malloc.3, malloc_get_state.3, malloc_hook.3, malloc_info.3, matherr.3, mblen.3, mbrlen.3, mbrtowc.3, mbsinit.3, mbsnrtowcs.3, mbsrtowcs.3, mbstowcs.3, mbtowc.3, mcheck.3, memccpy.3, memchr.3, memcmp.3, memcpy.3, memfrob.3, memmem.3, memmove.3, mempcpy.3, memset.3, mkdtemp.3, mkfifo.3, mkstemp.3, mktemp.3, modf.3, mpool.3, mq_close.3, mq_getattr.3, mq_notify.3, mq_open.3, mq_receive.3, mq_send.3, mq_unlink.3, mtrace.3, nan.3, netlink.3, newlocale.3, nextafter.3, nl_langinfo.3, offsetof.3, on_exit.3, open_memstream.3, opendir.3, openpty.3, perror.3, popen.3, posix_fallocate.3, posix_madvise.3, posix_memalign.3, posix_openpt.3, pow.3, pow10.3, printf.3, profil.3, psignal.3, pthread_atfork.3, pthread_attr_init.3, pthread_attr_setaffinity_np.3, pthread_attr_setdetachstate.3, pthread_attr_setguardsize.3, pthread_attr_setinheritsched.3, pthread_attr_setschedparam.3, pthread_attr_setschedpolicy.3, pthread_attr_setscope.3, pthread_attr_setstack.3, pthread_attr_setstackaddr.3, pthread_attr_setstacksize.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_detach.3, pthread_equal.3, pthread_exit.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_join.3, pthread_kill.3, pthread_rwlockattr_setkind_np.3, pthread_self.3, pthread_setaffinity_np.3, pthread_setcancelstate.3, pthread_setconcurrency.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_setschedprio.3, pthread_sigmask.3, pthread_sigqueue.3, pthread_testcancel.3, pthread_tryjoin_np.3, pthread_yield.3, ptsname.3, putenv.3, putgrent.3, putpwent.3, puts.3, putwchar.3, qecvt.3, qsort.3, raise.3, rand.3, random.3, random_r.3, rcmd.3, re_comp.3, readdir.3, realpath.3, remainder.3, remove.3, remquo.3, resolver.3, rewinddir.3, rexec.3, rint.3, round.3, rpmatch.3, rtime.3, rtnetlink.3, scalb.3, scalbln.3, scandir.3, scanf.3, sched_getcpu.3, seekdir.3, sem_close.3, sem_destroy.3, sem_getvalue.3, sem_init.3, sem_open.3, sem_post.3, sem_unlink.3, sem_wait.3, setaliasent.3, setbuf.3, setenv.3, setlocale.3, setlogmask.3, setnetgrent.3, shm_open.3, siginterrupt.3, signbit.3, significand.3, sigpause.3, sigqueue.3, sigset.3, sigvec.3, sigwait.3, sin.3, sincos.3, sinh.3, sleep.3, sockatmark.3, sqrt.3, statvfs.3, stdarg.3, stdio.3, stdio_ext.3, stpcpy.3, stpncpy.3, strcasecmp.3, strcat.3, strchr.3, strcmp.3, strcoll.3, strcpy.3, strdup.3, strerror.3, strfmon.3, strfromd.3, strfry.3, strftime.3, strlen.3, strnlen.3, strpbrk.3, strptime.3, strsep.3, strsignal.3, strspn.3, strstr.3, strtod.3, strtoimax.3, strtok.3, strtol.3, strtoul.3, strverscmp.3, strxfrm.3, swab.3, sysconf.3, syslog.3, system.3, sysv_signal.3, tan.3, tanh.3, tcgetpgrp.3, tcgetsid.3, telldir.3, tempnam.3, termios.3, tgamma.3, timegm.3, timeradd.3, tmpfile.3, tmpnam.3, toascii.3, toupper.3, towctrans.3, towlower.3, towupper.3, trunc.3, tsearch.3, ttyname.3, ttyslot.3, tzset.3, ualarm.3, ulimit.3, ungetwc.3, unlocked_stdio.3, unlockpt.3, updwtmp.3, uselocale.3, usleep.3, wcpcpy.3, wcpncpy.3, wcrtomb.3, wcscasecmp.3, wcscat.3, wcschr.3, wcscmp.3, wcscpy.3, wcscspn.3, wcsdup.3, wcslen.3, wcsncasecmp.3, wcsncat.3, wcsncmp.3, wcsncpy.3, wcsnlen.3, wcsnrtombs.3, wcspbrk.3, wcsrchr.3, wcsrtombs.3, wcsspn.3, wcsstr.3, wcstoimax.3, wcstok.3, wcstombs.3, wcswidth.3, wctob.3, wctomb.3, wctrans.3, wctype.3, wcwidth.3, wmemchr.3, wmemcmp.3, wmemcpy.3, wmemmove.3, wmemset.3, wordexp.3, wprintf.3, xcrypt.3, y0.3, dsp56k.4, random.4, rtc.4, st.4, ddp.7, ip.7, ipv6.7, packet.7, rtnetlink.7, socket.7, tcp.7, udp.7, udplite.7, x25.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-15 15:16:29 +00:00
|
|
|
.PP
|
2016-12-29 07:15:17 +00:00
|
|
|
.BI "int userfaultfd(int " flags );
|
|
|
|
.fi
|
|
|
|
.PP
|
|
|
|
.IR Note :
|
|
|
|
There is no glibc wrapper for this system call; see NOTES.
|
|
|
|
.SH DESCRIPTION
|
2016-12-29 11:50:09 +00:00
|
|
|
.BR userfaultfd ()
|
|
|
|
creates a new userfaultfd object that can be used for delegation of page-fault
|
|
|
|
handling to a user-space application,
|
|
|
|
and returns a file descriptor that refers to the new object.
|
|
|
|
The new userfaultfd object is configured using
|
2016-12-29 07:15:17 +00:00
|
|
|
.BR ioctl (2).
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2016-12-29 11:50:09 +00:00
|
|
|
Once the userfaultfd object is configured, the application can use
|
2016-12-29 07:15:17 +00:00
|
|
|
.BR read (2)
|
|
|
|
to receive userfaultfd notifications.
|
2016-12-29 11:50:09 +00:00
|
|
|
The reads from userfaultfd may be blocking or non-blocking,
|
|
|
|
depending on the value of
|
2016-12-29 07:15:17 +00:00
|
|
|
.I flags
|
|
|
|
used for the creation of the userfaultfd or subsequent calls to
|
2016-12-29 11:50:09 +00:00
|
|
|
.BR fcntl (2).
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2016-12-29 07:15:17 +00:00
|
|
|
The following values may be bitwise ORed in
|
|
|
|
.IR flags
|
|
|
|
to change the behavior of
|
|
|
|
.BR userfaultfd ():
|
|
|
|
.TP
|
|
|
|
.BR O_CLOEXEC
|
2016-12-29 11:50:09 +00:00
|
|
|
Enable the close-on-exec flag for the new userfaultfd file descriptor.
|
2016-12-29 07:15:17 +00:00
|
|
|
See the description of the
|
|
|
|
.B O_CLOEXEC
|
|
|
|
flag in
|
2016-12-29 11:50:09 +00:00
|
|
|
.BR open (2).
|
2016-12-29 07:15:17 +00:00
|
|
|
.TP
|
|
|
|
.BR O_NONBLOCK
|
2016-12-29 11:50:09 +00:00
|
|
|
Enables non-blocking operation for the userfaultfd object.
|
2016-12-29 07:15:17 +00:00
|
|
|
See the description of the
|
|
|
|
.BR O_NONBLOCK
|
|
|
|
flag in
|
|
|
|
.BR open (2).
|
2017-03-26 15:49:51 +00:00
|
|
|
.PP
|
|
|
|
When the last file descriptor referring to a userfaultfd object is closed,
|
|
|
|
all memory ranges that were registered with the object are unregistered
|
2017-04-27 14:14:33 +00:00
|
|
|
and unread events are flushed.
|
2016-12-29 07:15:17 +00:00
|
|
|
.\"
|
2017-01-04 22:29:03 +00:00
|
|
|
.SS Usage
|
|
|
|
The userfaultfd mechanism is designed to allow a thread in a multithreaded
|
|
|
|
program to perform user-space paging for the other threads in the process.
|
|
|
|
When a page fault occurs for one of the regions registered
|
|
|
|
to the userfaultfd object,
|
|
|
|
the faulting thread is put to sleep and
|
|
|
|
an event is generated that can be read via the userfaultfd file descriptor.
|
|
|
|
The fault-handling thread reads events from this file descriptor and services
|
|
|
|
them using the operations described in
|
|
|
|
.BR ioctl_userfaultfd (2).
|
|
|
|
When servicing the page fault events,
|
|
|
|
the fault-handling thread can trigger a wake-up for the sleeping thread.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-04-25 16:29:08 +00:00
|
|
|
It is possible for the faulting threads and the fault-handling threads
|
|
|
|
to run in the context of different processes.
|
|
|
|
In this case, these threads may belong to different programs,
|
|
|
|
and the program that executes the faulting threads
|
|
|
|
will not necessarily cooperate with the program that handles the page faults.
|
|
|
|
In such non-cooperative mode,
|
2017-04-26 07:18:00 +00:00
|
|
|
the process that monitors userfaultfd and handles page faults
|
2017-04-25 16:29:08 +00:00
|
|
|
needs to be aware of the changes in the virtual memory layout
|
|
|
|
of the faulting process to avoid memory corruption.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-04-27 14:14:33 +00:00
|
|
|
Starting from Linux 4.11,
|
2017-04-27 17:19:20 +00:00
|
|
|
userfaultfd can also notify the fault-handling threads about changes
|
2017-04-27 14:14:33 +00:00
|
|
|
in the virtual memory layout of the faulting process.
|
|
|
|
In addition, if the faulting process invokes
|
2017-04-27 17:19:20 +00:00
|
|
|
.BR fork (2),
|
2017-04-27 14:14:33 +00:00
|
|
|
the userfaultfd objects associated with the parent may be duplicated
|
|
|
|
into the child process and the userfaultfd monitor will be notified
|
2017-05-01 12:14:37 +00:00
|
|
|
(via the
|
|
|
|
.B UFFD_EVENT_FORK
|
|
|
|
described below)
|
2017-04-27 14:14:33 +00:00
|
|
|
about the file descriptor associated with the userfault objects
|
|
|
|
created for the child process,
|
2017-05-01 12:14:37 +00:00
|
|
|
which allows the userfaultfd monitor to perform user-space paging
|
2017-04-27 14:14:33 +00:00
|
|
|
for the child process.
|
2017-05-01 11:32:56 +00:00
|
|
|
Unlike page faults which have to be synchronous and require an
|
2017-05-01 05:43:18 +00:00
|
|
|
explicit or implicit wakeup,
|
|
|
|
all other events are delivered asynchronously and
|
|
|
|
the non-cooperative process resumes execution as
|
2017-05-01 11:32:56 +00:00
|
|
|
soon as the userfaultfd manager executes
|
|
|
|
.BR read (2).
|
|
|
|
The userfaultfd manager should carefully synchronize calls to
|
|
|
|
.B UFFDIO_COPY
|
|
|
|
with the processing of events.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-05-01 05:43:18 +00:00
|
|
|
The current asynchronous model of the event delivery is optimal for
|
|
|
|
single threaded non-cooperative userfaultfd manager implementations.
|
2017-05-02 20:27:07 +00:00
|
|
|
.\" Regarding the preceding sentence, Mike Rapoport says:
|
|
|
|
.\" The major point here is that current events delivery model could be
|
|
|
|
.\" problematic for multi-threaded monitor. I even suspect that it would be
|
|
|
|
.\" impossible to ensure synchronization between page faults and non-page
|
|
|
|
.\" fault events in multi-threaded monitor.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-04-25 16:29:08 +00:00
|
|
|
.\" FIXME elaborate about non-cooperating mode, describe its limitations
|
2017-04-26 07:18:00 +00:00
|
|
|
.\" for kernels before 4.11, features added in 4.11
|
2017-04-25 16:29:08 +00:00
|
|
|
.\" and limitations remaining in 4.11
|
|
|
|
.\" Maybe it's worth adding a dedicated sub-section...
|
2017-01-04 22:29:03 +00:00
|
|
|
.\"
|
2016-12-29 07:15:17 +00:00
|
|
|
.SS Userfaultfd operation
|
|
|
|
After the userfaultfd object is created with
|
2016-12-29 11:50:09 +00:00
|
|
|
.BR userfaultfd (),
|
|
|
|
the application must enable it using the
|
|
|
|
.B UFFDIO_API
|
|
|
|
.BR ioctl (2)
|
|
|
|
operation.
|
|
|
|
This operation allows a handshake between the kernel and user space
|
|
|
|
to determine the API version and supported features.
|
2016-12-29 12:11:53 +00:00
|
|
|
This operation must be performed before any of the other
|
|
|
|
.BR ioctl (2)
|
|
|
|
operations described below (or those operations fail with the
|
|
|
|
.BR EINVAL
|
|
|
|
error).
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2016-12-29 11:50:09 +00:00
|
|
|
After a successful
|
|
|
|
.B UFFDIO_API
|
|
|
|
operation,
|
|
|
|
the application then registers memory address ranges using the
|
|
|
|
.B UFFDIO_REGISTER
|
|
|
|
.BR ioctl (2)
|
|
|
|
operation.
|
|
|
|
After successful completion of a
|
|
|
|
.B UFFDIO_REGISTER
|
|
|
|
operation,
|
|
|
|
a page fault occurring in the requested memory range, and satisfying
|
|
|
|
the mode defined at the registration time, will be forwarded by the kernel to
|
|
|
|
the user-space application.
|
|
|
|
The application can then use the
|
|
|
|
.B UFFDIO_COPY
|
2016-12-29 07:15:17 +00:00
|
|
|
or
|
2016-12-29 11:50:09 +00:00
|
|
|
.B UFFDIO_ZERO
|
|
|
|
.BR ioctl (2)
|
|
|
|
operations to resolve the page fault.
|
2017-10-09 22:45:51 +00:00
|
|
|
.PP
|
|
|
|
Starting from Linux 4.14, if application sets
|
|
|
|
.B UFFD_FEATURE_SIGBUS
|
|
|
|
feature bit using
|
|
|
|
.B UFFDIO_API
|
|
|
|
.BR ioctl (2),
|
|
|
|
no page fault notification will be forwarded to
|
|
|
|
the user-space, instead a
|
|
|
|
.B SIGBUS
|
|
|
|
signal is delivered to the faulting process. With this feature,
|
|
|
|
userfaultfd can be used for robustness purpose to simply catch
|
|
|
|
any access to areas within the registered address range that do not
|
|
|
|
have pages allocated, without having to listen to userfaultfd events.
|
|
|
|
No userfaultfd monitor will be required for dealing with such memory
|
|
|
|
accesses. For example, this feature can be useful for applications that
|
|
|
|
want to prevent the kernel from automatically allocating pages and filling
|
|
|
|
holes in sparse files when the hole is accessed thru mapped address.
|
|
|
|
.PP
|
|
|
|
The
|
|
|
|
.B UFFD_FEATURE_SIGBUS
|
|
|
|
feature is implicitly inherited through fork() if used in combination with
|
|
|
|
.BR UFFD_FEATURE_FORK .
|
|
|
|
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2016-12-30 10:06:33 +00:00
|
|
|
Details of the various
|
2016-12-29 11:50:09 +00:00
|
|
|
.BR ioctl (2)
|
2016-12-30 10:06:33 +00:00
|
|
|
operations can be found in
|
|
|
|
.BR ioctl_userfaultfd (2).
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-04-27 14:14:33 +00:00
|
|
|
Since Linux 4.11, events other than page-fault may enabled during
|
|
|
|
.B UFFDIO_API
|
|
|
|
operation.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-04-25 16:29:04 +00:00
|
|
|
Up to Linux 4.11,
|
|
|
|
userfaultfd can be used only with anonymous private memory mappings.
|
|
|
|
Since Linux 4.11,
|
|
|
|
userfaultfd can be also used with hugetlbfs and shared memory mappings.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-04 22:29:03 +00:00
|
|
|
.\"
|
|
|
|
.SS Reading from the userfaultfd structure
|
|
|
|
Each
|
|
|
|
.BR read (2)
|
|
|
|
from the userfaultfd file descriptor returns one or more
|
|
|
|
.I uffd_msg
|
2017-04-27 14:14:33 +00:00
|
|
|
structures, each of which describes a page-fault event
|
|
|
|
or an event required for the non-cooperative userfaultfd usage:
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-04 22:29:03 +00:00
|
|
|
.in +4n
|
execve.2, ioctl_console.2, ioctl_iflags.2, ioctl_ns.2, ioctl_userfaultfd.2, kcmp.2, kexec_load.2, keyctl.2, link.2, listxattr.2, membarrier.2, memfd_create.2, mmap.2, modify_ldt.2, mprotect.2, msgctl.2, nanosleep.2, open_by_handle_at.2, perf_event_open.2, poll.2, posix_fadvise.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, readdir.2, readv.2, recv.2, recvmmsg.2, request_key.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setscheduler.2, seccomp.2, select.2, select_tut.2, semctl.2, semop.2, send.2, sendmmsg.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sigaltstack.2, signal.2, sigwaitinfo.2, stat.2, statfs.2, statx.2, sync_file_range.2, syscall.2, sysctl.2, sysinfo.2, tee.2, timer_create.2, timer_settime.2, timerfd_create.2, unshare.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, aio_init.3, backtrace.3, basename.3, bswap.3, btree.3, clock_getcpuclockid.3, cmsg.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlinfo.3, dlopen.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, ether_aton.3, fgetgrent.3, fgetpwent.3, fmemopen.3, frexp.3, ftime.3, fts.3, getaddrinfo.3, getaddrinfo_a.3, getdate.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, getifaddrs.3, getipnodebyname.3, getmntent.3, getnameinfo.3, getnetent.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getservent.3, getservent_r.3, getspnam.3, getttyent.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, ntp_gettime.3, posix_openpt.3, printf.3, pthread_attr_init.3, pthread_attr_setschedparam.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_tryjoin_np.3, readdir.3, realpath.3, recno.3, regex.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, statvfs.3, strcat.3, strcpy.3, strftime.3, strtok.3, strtol.3, strverscmp.3, toupper.3, ttyslot.3, xdr.3, fuse.4, loop.4, rtc.4, st.4, acct.5, core.5, elf.5, slabinfo.5, aio.7, arp.7, capabilities.7, cgroup_namespaces.7, cgroups.7, ddp.7, fanotify.7, feature_test_macros.7, inode.7, inotify.7, ip.7, keyrings.7, locale.7, mount_namespaces.7, namespaces.7, netdevice.7, netlink.7, packet.7, pkeys.7, pthreads.7, sched.7, session-keyring.7, sock_diag.7, socket.7, spufs.7, udplite.7, unix.7, user_namespaces.7, vdso.7, x25.7, ld.so.8: Use consistent markup for code snippets
Change .nf/.fi to .EX/.EE
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:52:46 +00:00
|
|
|
.EX
|
2017-01-04 22:29:03 +00:00
|
|
|
struct uffd_msg {
|
2017-04-27 17:19:20 +00:00
|
|
|
__u8 event; /* Type of event */
|
2017-01-04 22:29:03 +00:00
|
|
|
...
|
|
|
|
union {
|
2017-01-26 19:33:26 +00:00
|
|
|
struct {
|
2017-04-27 17:19:20 +00:00
|
|
|
__u64 flags; /* Flags describing fault */
|
|
|
|
__u64 address; /* Faulting address */
|
2017-01-04 22:29:03 +00:00
|
|
|
} pagefault;
|
2017-04-27 17:19:20 +00:00
|
|
|
|
|
|
|
struct { /* Since Linux 4.11 */
|
|
|
|
__u32 ufd; /* Userfault file descriptor
|
|
|
|
of the child process */
|
|
|
|
} fork;
|
|
|
|
|
|
|
|
struct { /* Since Linux 4.11 */
|
|
|
|
__u64 from; /* Old address of remapped area */
|
|
|
|
__u64 to; /* New address of remapped area */
|
|
|
|
__u64 len; /* Original mapping length */
|
|
|
|
} remap;
|
|
|
|
|
|
|
|
struct { /* Since Linux 4.11 */
|
|
|
|
__u64 start; /* Start address of removed area */
|
|
|
|
__u64 end; /* End address of removed area */
|
|
|
|
} remove;
|
2017-01-06 04:11:26 +00:00
|
|
|
...
|
2017-01-04 22:29:03 +00:00
|
|
|
} arg;
|
|
|
|
|
|
|
|
/* Padding fields omitted */
|
|
|
|
} __packed;
|
execve.2, ioctl_console.2, ioctl_iflags.2, ioctl_ns.2, ioctl_userfaultfd.2, kcmp.2, kexec_load.2, keyctl.2, link.2, listxattr.2, membarrier.2, memfd_create.2, mmap.2, modify_ldt.2, mprotect.2, msgctl.2, nanosleep.2, open_by_handle_at.2, perf_event_open.2, poll.2, posix_fadvise.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, readdir.2, readv.2, recv.2, recvmmsg.2, request_key.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setscheduler.2, seccomp.2, select.2, select_tut.2, semctl.2, semop.2, send.2, sendmmsg.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sigaltstack.2, signal.2, sigwaitinfo.2, stat.2, statfs.2, statx.2, sync_file_range.2, syscall.2, sysctl.2, sysinfo.2, tee.2, timer_create.2, timer_settime.2, timerfd_create.2, unshare.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, aio_init.3, backtrace.3, basename.3, bswap.3, btree.3, clock_getcpuclockid.3, cmsg.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlinfo.3, dlopen.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, ether_aton.3, fgetgrent.3, fgetpwent.3, fmemopen.3, frexp.3, ftime.3, fts.3, getaddrinfo.3, getaddrinfo_a.3, getdate.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, getifaddrs.3, getipnodebyname.3, getmntent.3, getnameinfo.3, getnetent.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getservent.3, getservent_r.3, getspnam.3, getttyent.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, ntp_gettime.3, posix_openpt.3, printf.3, pthread_attr_init.3, pthread_attr_setschedparam.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_tryjoin_np.3, readdir.3, realpath.3, recno.3, regex.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, statvfs.3, strcat.3, strcpy.3, strftime.3, strtok.3, strtol.3, strverscmp.3, toupper.3, ttyslot.3, xdr.3, fuse.4, loop.4, rtc.4, st.4, acct.5, core.5, elf.5, slabinfo.5, aio.7, arp.7, capabilities.7, cgroup_namespaces.7, cgroups.7, ddp.7, fanotify.7, feature_test_macros.7, inode.7, inotify.7, ip.7, keyrings.7, locale.7, mount_namespaces.7, namespaces.7, netdevice.7, netlink.7, packet.7, pkeys.7, pthreads.7, sched.7, session-keyring.7, sock_diag.7, socket.7, spufs.7, udplite.7, unix.7, user_namespaces.7, vdso.7, x25.7, ld.so.8: Use consistent markup for code snippets
Change .nf/.fi to .EX/.EE
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:52:46 +00:00
|
|
|
.EE
|
ioctl_console.2, ioctl_getfsmap.2, ioctl_iflags.2, ioctl_list.2, ioctl_ns.2, kcmp.2, kexec_load.2, keyctl.2, link.2, mmap.2, modify_ldt.2, msgctl.2, poll.2, query_module.2, quotactl.2, recv.2, recvmmsg.2, sched_setscheduler.2, seccomp.2, select.2, semctl.2, semop.2, send.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sysinfo.2, timer_create.2, timerfd_create.2, uname.2, unshare.2, userfaultfd.2, ustat.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, backtrace.3, bswap.3, btree.3, clock_getcpuclockid.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dlinfo.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, fmemopen.3, fopencookie.3, frexp.3, fts.3, ftw.3, getaddrinfo.3, getaddrinfo_a.3, getcontext.3, getgrouplist.3, getifaddrs.3, getipnodebyname.3, getnameinfo.3, getopt.3, getprotoent_r.3, getpwent_r.3, getrpcent.3, getservent_r.3, getttyent.3, getumask.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, inet.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallopt.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mtrace.3, newlocale.3, ntp_gettime.3, offsetof.3, posix_openpt.3, printf.3, pthread_setname_np.3, pthread_setschedparam.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, sigvec.3, stdarg.3, strcat.3, strcpy.3, strftime.3, strtol.3, toupper.3, ttyslot.3, fuse.4, loop.4, st.4, elf.5, cgroup_namespaces.7, cgroups.7, feature_test_macros.7, inode.7, inotify.7, keyrings.7, man-pages.7, math_error.7, mount_namespaces.7, mq_overview.7, pthreads.7, sched.7, session-keyring.7, udplite.7, unix.7, vdso.7: Use consistent markup for code snippets
The preferred form is
.PP/.IP
.in +4n
.EX
<code>
.EE
.in
.PP/.IP
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:37:55 +00:00
|
|
|
.in
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-04 22:29:03 +00:00
|
|
|
If multiple events are available and the supplied buffer is large enough,
|
|
|
|
.BR read (2)
|
|
|
|
returns as many events as will fit in the supplied buffer.
|
|
|
|
If the buffer supplied to
|
|
|
|
.BR read (2)
|
|
|
|
is smaller than the size of the
|
|
|
|
.I uffd_msg
|
|
|
|
structure, the
|
|
|
|
.BR read (2)
|
|
|
|
fails with the error
|
|
|
|
.BR EINVAL .
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-04 22:29:03 +00:00
|
|
|
The fields set in the
|
|
|
|
.I uffd_msg
|
|
|
|
structure are as follows:
|
|
|
|
.TP
|
|
|
|
.I event
|
|
|
|
The type of event.
|
2017-04-27 14:14:33 +00:00
|
|
|
Depending of the event type,
|
|
|
|
different fields of the
|
|
|
|
.I arg
|
|
|
|
union represent details required for the event processing.
|
|
|
|
The non-page-fault events are generated only when appropriate feature
|
|
|
|
is enabled during API handshake with
|
|
|
|
.B UFFDIO_API
|
|
|
|
.BR ioctl (2).
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.IP
|
2017-04-27 14:14:33 +00:00
|
|
|
The following values can appear in the
|
|
|
|
.I event
|
|
|
|
field:
|
|
|
|
.RS
|
|
|
|
.TP
|
2017-04-27 15:59:00 +00:00
|
|
|
.BR UFFD_EVENT_PAGEFAULT " (since Linux 4.3)"
|
2017-04-27 14:14:33 +00:00
|
|
|
A page-fault event.
|
|
|
|
The page-fault details are available in the
|
|
|
|
.I pagefault
|
|
|
|
field.
|
2017-01-04 22:29:03 +00:00
|
|
|
.TP
|
2017-04-27 15:59:00 +00:00
|
|
|
.BR UFFD_EVENT_FORK " (since Linux 4.11)"
|
2017-04-27 14:14:33 +00:00
|
|
|
Generated when the faulting process invokes
|
2017-05-01 12:14:37 +00:00
|
|
|
.BR fork (2)
|
|
|
|
(or
|
|
|
|
.BR clone (2)
|
|
|
|
without the
|
|
|
|
.BR CLONE_VM
|
|
|
|
flag).
|
2017-04-27 14:14:33 +00:00
|
|
|
The event details are available in the
|
|
|
|
.I fork
|
|
|
|
field.
|
2017-04-27 18:49:30 +00:00
|
|
|
.\" FIXME describe duplication of userfault file descriptor during fork
|
2017-04-27 14:14:33 +00:00
|
|
|
.TP
|
2017-04-27 15:59:00 +00:00
|
|
|
.BR UFFD_EVENT_REMAP " (since Linux 4.11)"
|
2017-04-27 14:14:33 +00:00
|
|
|
Generated when the faulting process invokes
|
2017-04-27 17:19:20 +00:00
|
|
|
.BR mremap (2).
|
2017-04-27 14:14:33 +00:00
|
|
|
The event details are available in the
|
|
|
|
.I remap
|
|
|
|
field.
|
|
|
|
.TP
|
2017-04-27 15:59:00 +00:00
|
|
|
.BR UFFD_EVENT_REMOVE " (since Linux 4.11)"
|
2017-04-27 14:14:33 +00:00
|
|
|
Generated when the faulting process invokes
|
|
|
|
.BR madvise (2)
|
2017-04-27 17:19:20 +00:00
|
|
|
with
|
2017-04-27 14:14:33 +00:00
|
|
|
.BR MADV_DONTNEED
|
|
|
|
or
|
|
|
|
.BR MADV_REMOVE
|
|
|
|
advice.
|
|
|
|
The event details are available in the
|
|
|
|
.I remove
|
|
|
|
field.
|
|
|
|
.TP
|
2017-04-27 15:59:00 +00:00
|
|
|
.BR UFFD_EVENT_UNMAP " (since Linux 4.11)"
|
2017-04-27 14:14:33 +00:00
|
|
|
Generated when the faulting process unmaps a memory range,
|
|
|
|
either explicitly using
|
|
|
|
.BR munmap (2)
|
2017-04-27 17:19:20 +00:00
|
|
|
or implicitly during
|
2017-04-27 14:14:33 +00:00
|
|
|
.BR mmap (2)
|
|
|
|
or
|
2017-04-27 17:19:20 +00:00
|
|
|
.BR mremap (2).
|
2017-04-27 14:14:33 +00:00
|
|
|
The event details are available in the
|
|
|
|
.I remove
|
|
|
|
field.
|
|
|
|
.RE
|
|
|
|
.TP
|
|
|
|
.I pagefault.address
|
2017-01-04 22:29:03 +00:00
|
|
|
The address that triggered the page fault.
|
|
|
|
.TP
|
2017-04-27 14:14:33 +00:00
|
|
|
.I pagefault.flags
|
2017-01-04 22:29:03 +00:00
|
|
|
A bit mask of flags that describe the event.
|
|
|
|
For
|
|
|
|
.BR UFFD_EVENT_PAGEFAULT ,
|
|
|
|
the following flag may appear:
|
|
|
|
.RS
|
|
|
|
.TP
|
|
|
|
.B UFFD_PAGEFAULT_FLAG_WRITE
|
|
|
|
If the address is in a range that was registered with the
|
|
|
|
.B UFFDIO_REGISTER_MODE_MISSING
|
|
|
|
flag (see
|
|
|
|
.BR ioctl_userfaultfd (2))
|
|
|
|
and this flag is set, this a write fault;
|
|
|
|
otherwise it is a read fault.
|
|
|
|
.\"
|
|
|
|
.\" UFFD_PAGEFAULT_FLAG_WP is not yet supported.
|
|
|
|
.RE
|
2017-04-27 14:14:33 +00:00
|
|
|
.TP
|
|
|
|
.I fork.ufd
|
|
|
|
The file descriptor associated with the userfault object
|
2017-05-01 12:14:37 +00:00
|
|
|
created for the child created by
|
|
|
|
.BR fork (2).
|
2017-04-27 14:14:33 +00:00
|
|
|
.TP
|
|
|
|
.I remap.from
|
|
|
|
The original address of the memory range that was remapped using
|
|
|
|
.BR mremap (2).
|
|
|
|
.TP
|
|
|
|
.I remap.to
|
|
|
|
The new address of the memory range that was remapped using
|
|
|
|
.BR mremap (2).
|
|
|
|
.TP
|
|
|
|
.I remap.len
|
2017-04-27 18:49:30 +00:00
|
|
|
The original length of the memory range that was remapped using
|
2017-04-27 14:14:33 +00:00
|
|
|
.BR mremap (2).
|
|
|
|
.TP
|
|
|
|
.I remove.start
|
|
|
|
The start address of the memory range that was freed using
|
|
|
|
.BR madvise (2)
|
|
|
|
or unmapped
|
|
|
|
.TP
|
|
|
|
.I remove.end
|
|
|
|
The end address of the memory range that was freed using
|
|
|
|
.BR madvise (2)
|
|
|
|
or unmapped
|
2017-01-04 22:29:03 +00:00
|
|
|
.PP
|
2017-01-05 18:53:41 +00:00
|
|
|
A
|
|
|
|
.BR read (2)
|
|
|
|
on a userfaultfd file descriptor can fail with the following errors:
|
|
|
|
.TP
|
|
|
|
.B EINVAL
|
|
|
|
The userfaultfd object has not yet been enabled using the
|
|
|
|
.BR UFFDIO_API
|
|
|
|
.BR ioctl (2)
|
|
|
|
operation
|
|
|
|
.PP
|
2017-03-26 15:22:27 +00:00
|
|
|
If the
|
|
|
|
.B O_NONBLOCK
|
|
|
|
flag is enabled in the associated open file description,
|
|
|
|
the userfaultfd file descriptor can be monitored with
|
2017-01-04 22:29:03 +00:00
|
|
|
.BR poll (2),
|
|
|
|
.BR select (2),
|
|
|
|
and
|
|
|
|
.BR epoll (7).
|
|
|
|
When events are available, the file descriptor indicates as readable.
|
2017-03-26 15:22:27 +00:00
|
|
|
If the
|
|
|
|
.B O_NONBLOCK
|
|
|
|
flag is not enabled, then
|
|
|
|
.BR poll (2)
|
|
|
|
(always) indicates the file as having a
|
|
|
|
.BR POLLERR
|
|
|
|
condition, and
|
|
|
|
.BR select (2)
|
|
|
|
indicates the file descriptor as both readable and writable.
|
|
|
|
.\" FIXME What is the reason for this seemingly odd behavior with respect
|
|
|
|
.\" to the O_NONBLOCK flag? (see userfaultfd_poll() in fs/userfaultfd.c).
|
|
|
|
.\" Something needs to be said about this.
|
2016-12-29 07:15:17 +00:00
|
|
|
.SH RETURN VALUE
|
2016-12-29 11:50:09 +00:00
|
|
|
On success,
|
|
|
|
.BR userfaultfd ()
|
|
|
|
returns a new file descriptor that refers to the userfaultfd object.
|
2016-12-29 07:15:17 +00:00
|
|
|
On error, \-1 is returned, and
|
|
|
|
.I errno
|
|
|
|
is set appropriately.
|
|
|
|
.SH ERRORS
|
|
|
|
.TP
|
|
|
|
.B EINVAL
|
|
|
|
An unsupported value was specified in
|
|
|
|
.IR flags .
|
|
|
|
.TP
|
|
|
|
.BR EMFILE
|
|
|
|
The per-process limit on the number of open file descriptors has been
|
|
|
|
reached
|
|
|
|
.TP
|
|
|
|
.B ENFILE
|
|
|
|
The system-wide limit on the total number of open files has been
|
|
|
|
reached.
|
|
|
|
.TP
|
|
|
|
.B ENOMEM
|
|
|
|
Insufficient kernel memory was available.
|
2017-01-04 08:00:29 +00:00
|
|
|
.SH VERSIONS
|
|
|
|
The
|
|
|
|
.BR userfaultfd ()
|
|
|
|
system call first appeared in Linux 4.3.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-05-01 05:43:19 +00:00
|
|
|
The support for hugetlbfs and shared memory areas and
|
|
|
|
non-page-fault events was added in Linux 4.11
|
2016-12-29 07:15:17 +00:00
|
|
|
.SH CONFORMING TO
|
|
|
|
.BR userfaultfd ()
|
|
|
|
is Linux-specific and should not be used in programs intended to be
|
|
|
|
portable.
|
|
|
|
.SH NOTES
|
|
|
|
Glibc does not provide a wrapper for this system call; call it using
|
|
|
|
.BR syscall (2).
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-11 19:30:06 +00:00
|
|
|
The userfaultfd mechanism can be used as an alternative to
|
|
|
|
traditional user-space paging techniques based on the use of the
|
|
|
|
.BR SIGSEGV
|
|
|
|
signal and
|
|
|
|
.BR mmap (2).
|
|
|
|
It can also be used to implement lazy restore
|
|
|
|
for checkpoint/restore mechanisms,
|
|
|
|
as well as post-copy migration to allow (nearly) uninterrupted execution
|
2017-04-25 16:29:07 +00:00
|
|
|
when transferring virtual machines and Linux containers
|
|
|
|
from one host to another.
|
2017-01-06 04:11:26 +00:00
|
|
|
.SH EXAMPLE
|
|
|
|
The program below demonstrates the use of the userfaultfd mechanism.
|
|
|
|
The program creates two threads, one of which acts as the
|
|
|
|
page-fault handler for the process, for the pages in a demand-page zero
|
|
|
|
region created using
|
|
|
|
.BR mmap (2).
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-06 04:11:26 +00:00
|
|
|
The program takes one command-line argument,
|
|
|
|
which is the number of pages that will be created in a mapping
|
|
|
|
whose page faults will be handled via userfaultfd.
|
|
|
|
After creating a userfaultfd object,
|
|
|
|
the program then creates an anonymous private mapping of the specified size
|
|
|
|
and registers the address range of that mapping using the
|
|
|
|
.B UFFDIO_REGISTER
|
|
|
|
.BR ioctl (2)
|
|
|
|
operation.
|
2017-01-26 19:33:26 +00:00
|
|
|
The program then creates a second thread that will perform the
|
2017-01-06 04:11:26 +00:00
|
|
|
task of handling page faults.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-06 04:11:26 +00:00
|
|
|
The main thread then walks through the pages of the mapping fetching
|
|
|
|
bytes from successive pages.
|
|
|
|
Because the pages have not yet been accessed,
|
|
|
|
the first access of a byte in each page will trigger a page-fault event
|
|
|
|
on the userfaultfd file descriptor.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-06 04:11:26 +00:00
|
|
|
Each of the page-fault events is handled by the second thread,
|
|
|
|
which sits in a loop processing input from the userfaultfd file descriptor.
|
|
|
|
In each loop iteration, the second thread first calls
|
|
|
|
.BR poll (2)
|
|
|
|
to check the state of the file descriptor,
|
|
|
|
and then reads an event from the file descriptor.
|
|
|
|
All such events should be
|
|
|
|
.B UFFD_EVENT_PAGEFAULT
|
|
|
|
events,
|
|
|
|
which the thread handles by copying a page of data into
|
|
|
|
the faulting region using the
|
2017-01-26 19:33:26 +00:00
|
|
|
.B UFFDIO_COPY
|
2017-01-06 04:11:26 +00:00
|
|
|
.BR ioctl (2)
|
|
|
|
operation.
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-06 04:11:26 +00:00
|
|
|
The following is an example of what we see when running the program:
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2017-01-06 04:11:26 +00:00
|
|
|
.in +4n
|
execve.2, ioctl_console.2, ioctl_iflags.2, ioctl_ns.2, ioctl_userfaultfd.2, kcmp.2, kexec_load.2, keyctl.2, link.2, listxattr.2, membarrier.2, memfd_create.2, mmap.2, modify_ldt.2, mprotect.2, msgctl.2, nanosleep.2, open_by_handle_at.2, perf_event_open.2, poll.2, posix_fadvise.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, readdir.2, readv.2, recv.2, recvmmsg.2, request_key.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setscheduler.2, seccomp.2, select.2, select_tut.2, semctl.2, semop.2, send.2, sendmmsg.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sigaltstack.2, signal.2, sigwaitinfo.2, stat.2, statfs.2, statx.2, sync_file_range.2, syscall.2, sysctl.2, sysinfo.2, tee.2, timer_create.2, timer_settime.2, timerfd_create.2, unshare.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, aio_init.3, backtrace.3, basename.3, bswap.3, btree.3, clock_getcpuclockid.3, cmsg.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlinfo.3, dlopen.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, ether_aton.3, fgetgrent.3, fgetpwent.3, fmemopen.3, frexp.3, ftime.3, fts.3, getaddrinfo.3, getaddrinfo_a.3, getdate.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, getifaddrs.3, getipnodebyname.3, getmntent.3, getnameinfo.3, getnetent.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getservent.3, getservent_r.3, getspnam.3, getttyent.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, ntp_gettime.3, posix_openpt.3, printf.3, pthread_attr_init.3, pthread_attr_setschedparam.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_tryjoin_np.3, readdir.3, realpath.3, recno.3, regex.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, statvfs.3, strcat.3, strcpy.3, strftime.3, strtok.3, strtol.3, strverscmp.3, toupper.3, ttyslot.3, xdr.3, fuse.4, loop.4, rtc.4, st.4, acct.5, core.5, elf.5, slabinfo.5, aio.7, arp.7, capabilities.7, cgroup_namespaces.7, cgroups.7, ddp.7, fanotify.7, feature_test_macros.7, inode.7, inotify.7, ip.7, keyrings.7, locale.7, mount_namespaces.7, namespaces.7, netdevice.7, netlink.7, packet.7, pkeys.7, pthreads.7, sched.7, session-keyring.7, sock_diag.7, socket.7, spufs.7, udplite.7, unix.7, user_namespaces.7, vdso.7, x25.7, ld.so.8: Use consistent markup for code snippets
Change .nf/.fi to .EX/.EE
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:52:46 +00:00
|
|
|
.EX
|
2017-01-06 04:11:26 +00:00
|
|
|
$ \fB./userfaultfd_demo 3\fP
|
|
|
|
Address returned by mmap() = 0x7fd30106c000
|
|
|
|
|
|
|
|
fault_handler_thread():
|
|
|
|
poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
|
|
|
|
UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106c00f
|
|
|
|
(uffdio_copy.copy returned 4096)
|
|
|
|
Read address 0x7fd30106c00f in main(): A
|
|
|
|
Read address 0x7fd30106c40f in main(): A
|
|
|
|
Read address 0x7fd30106c80f in main(): A
|
|
|
|
Read address 0x7fd30106cc0f in main(): A
|
|
|
|
|
|
|
|
fault_handler_thread():
|
|
|
|
poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
|
|
|
|
UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106d00f
|
|
|
|
(uffdio_copy.copy returned 4096)
|
|
|
|
Read address 0x7fd30106d00f in main(): B
|
|
|
|
Read address 0x7fd30106d40f in main(): B
|
|
|
|
Read address 0x7fd30106d80f in main(): B
|
|
|
|
Read address 0x7fd30106dc0f in main(): B
|
|
|
|
|
|
|
|
fault_handler_thread():
|
|
|
|
poll() returns: nready = 1; POLLIN = 1; POLLERR = 0
|
|
|
|
UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106e00f
|
|
|
|
(uffdio_copy.copy returned 4096)
|
|
|
|
Read address 0x7fd30106e00f in main(): C
|
|
|
|
Read address 0x7fd30106e40f in main(): C
|
|
|
|
Read address 0x7fd30106e80f in main(): C
|
|
|
|
Read address 0x7fd30106ec0f in main(): C
|
execve.2, ioctl_console.2, ioctl_iflags.2, ioctl_ns.2, ioctl_userfaultfd.2, kcmp.2, kexec_load.2, keyctl.2, link.2, listxattr.2, membarrier.2, memfd_create.2, mmap.2, modify_ldt.2, mprotect.2, msgctl.2, nanosleep.2, open_by_handle_at.2, perf_event_open.2, poll.2, posix_fadvise.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, readdir.2, readv.2, recv.2, recvmmsg.2, request_key.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setscheduler.2, seccomp.2, select.2, select_tut.2, semctl.2, semop.2, send.2, sendmmsg.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sigaltstack.2, signal.2, sigwaitinfo.2, stat.2, statfs.2, statx.2, sync_file_range.2, syscall.2, sysctl.2, sysinfo.2, tee.2, timer_create.2, timer_settime.2, timerfd_create.2, unshare.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, aio_init.3, backtrace.3, basename.3, bswap.3, btree.3, clock_getcpuclockid.3, cmsg.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlinfo.3, dlopen.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, ether_aton.3, fgetgrent.3, fgetpwent.3, fmemopen.3, frexp.3, ftime.3, fts.3, getaddrinfo.3, getaddrinfo_a.3, getdate.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, getifaddrs.3, getipnodebyname.3, getmntent.3, getnameinfo.3, getnetent.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getservent.3, getservent_r.3, getspnam.3, getttyent.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, ntp_gettime.3, posix_openpt.3, printf.3, pthread_attr_init.3, pthread_attr_setschedparam.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_tryjoin_np.3, readdir.3, realpath.3, recno.3, regex.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, statvfs.3, strcat.3, strcpy.3, strftime.3, strtok.3, strtol.3, strverscmp.3, toupper.3, ttyslot.3, xdr.3, fuse.4, loop.4, rtc.4, st.4, acct.5, core.5, elf.5, slabinfo.5, aio.7, arp.7, capabilities.7, cgroup_namespaces.7, cgroups.7, ddp.7, fanotify.7, feature_test_macros.7, inode.7, inotify.7, ip.7, keyrings.7, locale.7, mount_namespaces.7, namespaces.7, netdevice.7, netlink.7, packet.7, pkeys.7, pthreads.7, sched.7, session-keyring.7, sock_diag.7, socket.7, spufs.7, udplite.7, unix.7, user_namespaces.7, vdso.7, x25.7, ld.so.8: Use consistent markup for code snippets
Change .nf/.fi to .EX/.EE
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:52:46 +00:00
|
|
|
.EE
|
ioctl_console.2, ioctl_getfsmap.2, ioctl_iflags.2, ioctl_list.2, ioctl_ns.2, kcmp.2, kexec_load.2, keyctl.2, link.2, mmap.2, modify_ldt.2, msgctl.2, poll.2, query_module.2, quotactl.2, recv.2, recvmmsg.2, sched_setscheduler.2, seccomp.2, select.2, semctl.2, semop.2, send.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sysinfo.2, timer_create.2, timerfd_create.2, uname.2, unshare.2, userfaultfd.2, ustat.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, backtrace.3, bswap.3, btree.3, clock_getcpuclockid.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dlinfo.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, fmemopen.3, fopencookie.3, frexp.3, fts.3, ftw.3, getaddrinfo.3, getaddrinfo_a.3, getcontext.3, getgrouplist.3, getifaddrs.3, getipnodebyname.3, getnameinfo.3, getopt.3, getprotoent_r.3, getpwent_r.3, getrpcent.3, getservent_r.3, getttyent.3, getumask.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, inet.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallopt.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mtrace.3, newlocale.3, ntp_gettime.3, offsetof.3, posix_openpt.3, printf.3, pthread_setname_np.3, pthread_setschedparam.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, sigvec.3, stdarg.3, strcat.3, strcpy.3, strftime.3, strtol.3, toupper.3, ttyslot.3, fuse.4, loop.4, st.4, elf.5, cgroup_namespaces.7, cgroups.7, feature_test_macros.7, inode.7, inotify.7, keyrings.7, man-pages.7, math_error.7, mount_namespaces.7, mq_overview.7, pthreads.7, sched.7, session-keyring.7, udplite.7, unix.7, vdso.7: Use consistent markup for code snippets
The preferred form is
.PP/.IP
.in +4n
.EX
<code>
.EE
.in
.PP/.IP
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:37:55 +00:00
|
|
|
.in
|
2017-01-06 04:11:26 +00:00
|
|
|
.SS Program source
|
|
|
|
\&
|
memusage.1, clone.2, eventfd.2, futex.2, getdents.2, ioctl_fat.2, ioctl_ns.2, kcmp.2, keyctl.2, mmap.2, mprotect.2, msgop.2, recvmmsg.2, request_key.2, sched_setaffinity.2, seccomp.2, setns.2, tee.2, timer_create.2, timerfd_create.2, unshare.2, userfaultfd.2, wait.2, __ppc_get_timebase.3, backtrace.3, bswap.3, clock_getcpuclockid.3, dl_iterate_phdr.3, dlinfo.3, dlopen.3, duplocale.3, end.3, endian.3, fmemopen.3, fopencookie.3, frexp.3, ftw.3, getdate.3, getgrouplist.3, getifaddrs.3, getprotoent_r.3, getservent_r.3, gnu_get_libc_version.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, mq_getattr.3, mq_notify.3, newlocale.3, offsetof.3, posix_spawn.3, pthread_attr_init.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, scandir.3, sem_wait.3, strcat.3, strftime.3, strtok.3, strtol.3, strverscmp.3, loop.4, core.5, aio.7, fanotify.7, feature_test_macros.7, inotify.7, pkeys.7, unix.7, user_namespaces.7: Use .EX/.EE for EXAMPLE programs
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 08:44:33 +00:00
|
|
|
.EX
|
2017-01-06 04:11:26 +00:00
|
|
|
/* userfaultfd_demo.c
|
2017-01-26 19:33:26 +00:00
|
|
|
|
2017-01-06 04:11:26 +00:00
|
|
|
Licensed under the GNU General Public License version 2 or later.
|
|
|
|
*/
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <linux/userfaultfd.h>
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <poll.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/syscall.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <poll.h>
|
|
|
|
|
|
|
|
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
static int page_size;
|
|
|
|
|
|
|
|
static void *
|
|
|
|
fault_handler_thread(void *arg)
|
|
|
|
{
|
|
|
|
static struct uffd_msg msg; /* Data read from userfaultfd */
|
|
|
|
static int fault_cnt = 0; /* Number of faults so far handled */
|
|
|
|
long uffd; /* userfaultfd file descriptor */
|
|
|
|
static char *page = NULL;
|
|
|
|
struct uffdio_copy uffdio_copy;
|
|
|
|
ssize_t nread;
|
|
|
|
|
|
|
|
uffd = (long) arg;
|
|
|
|
|
|
|
|
/* Create a page that will be copied into the faulting region */
|
|
|
|
|
|
|
|
if (page == NULL) {
|
|
|
|
page = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0);
|
|
|
|
if (page == MAP_FAILED)
|
|
|
|
errExit("mmap");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Loop, handling incoming events on the userfaultfd
|
|
|
|
file descriptor */
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
|
|
|
|
/* See what poll() tells us about the userfaultfd */
|
|
|
|
|
|
|
|
struct pollfd pollfd;
|
|
|
|
int nready;
|
|
|
|
pollfd.fd = uffd;
|
|
|
|
pollfd.events = POLLIN;
|
|
|
|
nready = poll(&pollfd, 1, \-1);
|
|
|
|
if (nready == \-1)
|
|
|
|
errExit("poll");
|
|
|
|
|
|
|
|
printf("\\nfault_handler_thread():\\n");
|
|
|
|
printf(" poll() returns: nready = %d; "
|
|
|
|
"POLLIN = %d; POLLERR = %d\\n", nready,
|
|
|
|
(pollfd.revents & POLLIN) != 0,
|
|
|
|
(pollfd.revents & POLLERR) != 0);
|
|
|
|
|
|
|
|
/* Read an event from the userfaultfd */
|
|
|
|
|
|
|
|
nread = read(uffd, &msg, sizeof(msg));
|
|
|
|
if (nread == 0) {
|
|
|
|
printf("EOF on userfaultfd!\\n");
|
|
|
|
exit(EXIT_FAILURE);
|
2017-01-26 19:33:26 +00:00
|
|
|
}
|
2017-01-06 04:11:26 +00:00
|
|
|
|
|
|
|
if (nread == \-1)
|
|
|
|
errExit("read");
|
2017-01-26 19:33:26 +00:00
|
|
|
|
2017-01-06 04:11:26 +00:00
|
|
|
/* We expect only one kind of event; verify that assumption */
|
|
|
|
|
|
|
|
if (msg.event != UFFD_EVENT_PAGEFAULT) {
|
|
|
|
fprintf(stderr, "Unexpected event on userfaultfd\\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Display info about the page\-fault event */
|
|
|
|
|
|
|
|
printf(" UFFD_EVENT_PAGEFAULT event: ");
|
|
|
|
printf("flags = %llx; ", msg.arg.pagefault.flags);
|
|
|
|
printf("address = %llx\\n", msg.arg.pagefault.address);
|
|
|
|
|
|
|
|
/* Copy the page pointed to by \(aqpage\(aq into the faulting
|
|
|
|
region. Vary the contents that are copied in, so that it
|
|
|
|
is more obvious that each fault is handled separately. */
|
|
|
|
|
|
|
|
memset(page, \(aqA\(aq + fault_cnt % 20, page_size);
|
|
|
|
fault_cnt++;
|
|
|
|
|
|
|
|
uffdio_copy.src = (unsigned long) page;
|
|
|
|
|
|
|
|
/* We need to handle page faults in units of pages(!).
|
|
|
|
So, round faulting address down to page boundary */
|
|
|
|
|
|
|
|
uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address &
|
|
|
|
~(page_size \- 1);
|
|
|
|
uffdio_copy.len = page_size;
|
|
|
|
uffdio_copy.mode = 0;
|
|
|
|
uffdio_copy.copy = 0;
|
|
|
|
if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == \-1)
|
|
|
|
errExit("ioctl\-UFFDIO_COPY");
|
|
|
|
|
|
|
|
printf(" (uffdio_copy.copy returned %lld)\\n",
|
|
|
|
uffdio_copy.copy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
|
|
|
long uffd; /* userfaultfd file descriptor */
|
|
|
|
char *addr; /* Start of region handled by userfaultfd */
|
|
|
|
unsigned long len; /* Length of region handled by userfaultfd */
|
|
|
|
pthread_t thr; /* ID of thread that handles page faults */
|
|
|
|
struct uffdio_api uffdio_api;
|
|
|
|
struct uffdio_register uffdio_register;
|
|
|
|
int s;
|
|
|
|
|
|
|
|
if (argc != 2) {
|
|
|
|
fprintf(stderr, "Usage: %s num\-pages\\n", argv[0]);
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
page_size = sysconf(_SC_PAGE_SIZE);
|
|
|
|
len = strtoul(argv[1], NULL, 0) * page_size;
|
|
|
|
|
|
|
|
/* Create and enable userfaultfd object */
|
|
|
|
|
|
|
|
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
|
|
|
if (uffd == \-1)
|
|
|
|
errExit("userfaultfd");
|
|
|
|
|
|
|
|
uffdio_api.api = UFFD_API;
|
|
|
|
uffdio_api.features = 0;
|
|
|
|
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == \-1)
|
|
|
|
errExit("ioctl\-UFFDIO_API");
|
|
|
|
|
|
|
|
/* Create a private anonymous mapping. The memory will be
|
|
|
|
demand\-zero paged\-\-that is, not yet allocated. When we
|
|
|
|
actually touch the memory, it will be allocated via
|
|
|
|
the userfaultfd. */
|
|
|
|
|
|
|
|
addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
|
|
|
|
MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0);
|
|
|
|
if (addr == MAP_FAILED)
|
|
|
|
errExit("mmap");
|
|
|
|
|
|
|
|
printf("Address returned by mmap() = %p\\n", addr);
|
|
|
|
|
|
|
|
/* Register the memory range of the mapping we just created for
|
|
|
|
handling by the userfaultfd object. In mode, we request to track
|
|
|
|
missing pages (i.e., pages that have not yet been faulted in). */
|
|
|
|
|
|
|
|
uffdio_register.range.start = (unsigned long) addr;
|
|
|
|
uffdio_register.range.len = len;
|
|
|
|
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
|
|
|
|
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == \-1)
|
|
|
|
errExit("ioctl\-UFFDIO_REGISTER");
|
|
|
|
|
|
|
|
/* Create a thread that will process the userfaultfd events */
|
|
|
|
|
|
|
|
s = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
|
|
|
|
if (s != 0) {
|
|
|
|
errno = s;
|
|
|
|
errExit("pthread_create");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Main thread now touches memory in the mapping, touching
|
|
|
|
locations 1024 bytes apart. This will trigger userfaultfd
|
|
|
|
events for all pages in the region. */
|
|
|
|
|
|
|
|
int l;
|
|
|
|
l = 0xf; /* Ensure that faulting address is not on a page
|
|
|
|
boundary, in order to test that we correctly
|
|
|
|
handle that case in fault_handling_thread() */
|
|
|
|
while (l < len) {
|
|
|
|
char c = addr[l];
|
|
|
|
printf("Read address %p in main(): ", addr + l);
|
|
|
|
printf("%c\\n", c);
|
|
|
|
l += 1024;
|
|
|
|
usleep(100000); /* Slow things down a little */
|
|
|
|
}
|
|
|
|
|
|
|
|
exit(EXIT_SUCCESS);
|
|
|
|
}
|
memusage.1, clone.2, eventfd.2, futex.2, getdents.2, ioctl_fat.2, ioctl_ns.2, kcmp.2, keyctl.2, mmap.2, mprotect.2, msgop.2, recvmmsg.2, request_key.2, sched_setaffinity.2, seccomp.2, setns.2, tee.2, timer_create.2, timerfd_create.2, unshare.2, userfaultfd.2, wait.2, __ppc_get_timebase.3, backtrace.3, bswap.3, clock_getcpuclockid.3, dl_iterate_phdr.3, dlinfo.3, dlopen.3, duplocale.3, end.3, endian.3, fmemopen.3, fopencookie.3, frexp.3, ftw.3, getdate.3, getgrouplist.3, getifaddrs.3, getprotoent_r.3, getservent_r.3, gnu_get_libc_version.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, mq_getattr.3, mq_notify.3, newlocale.3, offsetof.3, posix_spawn.3, pthread_attr_init.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, scandir.3, sem_wait.3, strcat.3, strftime.3, strtok.3, strtol.3, strverscmp.3, loop.4, core.5, aio.7, fanotify.7, feature_test_macros.7, inotify.7, pkeys.7, unix.7, user_namespaces.7: Use .EX/.EE for EXAMPLE programs
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 08:44:33 +00:00
|
|
|
.EE
|
2016-12-29 07:15:17 +00:00
|
|
|
.SH SEE ALSO
|
|
|
|
.BR fcntl (2),
|
2016-12-29 11:50:09 +00:00
|
|
|
.BR ioctl (2),
|
2016-12-30 10:06:33 +00:00
|
|
|
.BR ioctl_userfaultfd (2),
|
2017-01-09 00:04:19 +00:00
|
|
|
.BR madvise (2),
|
2016-12-29 11:50:09 +00:00
|
|
|
.BR mmap (2)
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|
2016-12-29 07:15:17 +00:00
|
|
|
.IR Documentation/vm/userfaultfd.txt
|
|
|
|
in the Linux kernel source tree
|
_syscall.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscalls.2, sysctl.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2: Formatting fix: replace blank lines with .PP/.IP
Blank lines shouldn't generally appear in *roff source (other
than in code examples), since they create large vertical
spaces between text blocks.
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-16 07:30:51 +00:00
|
|
|
.PP
|