mirror of https://github.com/mkerrisk/man-pages
399 lines
10 KiB
Groff
399 lines
10 KiB
Groff
.\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
|
|
.\" starting from a version by Davide Libenzi <davidel@xmailserver.org>
|
|
.\"
|
|
.\" This program is free software; you can redistribute it and/or modify
|
|
.\" it under the terms of the GNU General Public License as published by
|
|
.\" the Free Software Foundation; either version 2 of the License, or
|
|
.\" (at your option) any later version.
|
|
.\"
|
|
.\" This program is distributed in the hope that it will be useful,
|
|
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
.\" GNU General Public License for more details.
|
|
.\"
|
|
.\" You should have received a copy of the GNU General Public License
|
|
.\" along with this program; if not, write to the Free Software
|
|
.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
.\" MA 02111-1307 USA
|
|
.\"
|
|
.\" 2008-10-10, mtk: describe eventfd2(), and EFD_NONBLOCK and EFD_CLOEXEC
|
|
.\"
|
|
.TH EVENTFD 2 2009-01-26 Linux "Linux Programmer's Manual"
|
|
.SH NAME
|
|
eventfd \- create a file descriptor for event notification
|
|
.SH SYNOPSIS
|
|
.B #include <sys/eventfd.h>
|
|
.sp
|
|
.BI "int eventfd(unsigned int " initval ", int " flags );
|
|
.SH DESCRIPTION
|
|
.BR eventfd ()
|
|
creates an "eventfd object" that can be used as
|
|
an event wait/notify mechanism by userspace applications,
|
|
and by the kernel to notify userspace applications of events.
|
|
The object contains an unsigned 64-bit integer
|
|
.RI ( uint64_t )
|
|
counter that is maintained by the kernel.
|
|
This counter is initialized with the value specified in the argument
|
|
.IR initval .
|
|
|
|
Starting with Linux 2.6.27, the following values may be bitwise ORed in
|
|
.IR flags
|
|
to change the behaviour of
|
|
.BR eventfd ():
|
|
.TP 14
|
|
.B EFD_NONBLOCK
|
|
Set the
|
|
.BR O_NONBLOCK
|
|
file status flag on the new open file description.
|
|
Using this flag saves extra calls to
|
|
.BR fcntl (2)
|
|
to achieve the same result.
|
|
.TP
|
|
.B EFD_CLOEXEC
|
|
Set the close-on-exec
|
|
.RB ( FD_CLOEXEC )
|
|
flag on the new file descriptor.
|
|
See the description of the
|
|
.B O_CLOEXEC
|
|
flag in
|
|
.BR open (2)
|
|
for reasons why this may be useful.
|
|
.PP
|
|
In Linux up to version 2.6.26, the
|
|
.I flags
|
|
argument is unused, and must be specified as zero.
|
|
|
|
As its return value,
|
|
.BR eventfd ()
|
|
returns a new file descriptor that can be used to refer to the
|
|
eventfd object.
|
|
The following operations can be performed on the file descriptor:
|
|
.TP
|
|
.BR read (2)
|
|
If the eventfd counter has a nonzero value, then a
|
|
.BR read (2)
|
|
returns 8 bytes containing that value,
|
|
and the counter's value is reset to zero.
|
|
(The returned value is in host byte order,
|
|
i.e., the native byte order for integers on the host machine.)
|
|
.IP
|
|
If the counter is zero at the time of the
|
|
.BR read (2),
|
|
then the call either blocks until the counter becomes nonzero,
|
|
or fails with the error
|
|
.B EAGAIN
|
|
if the file descriptor has been made nonblocking.
|
|
.IP
|
|
A
|
|
.BR read (2)
|
|
will fail with the error
|
|
.B EINVAL
|
|
if the size of the supplied buffer is less than 8 bytes.
|
|
.TP
|
|
.BR write (2)
|
|
A
|
|
.BR write (2)
|
|
call adds the 8-byte integer value supplied in its
|
|
buffer to the counter.
|
|
The maximum value that may be stored in the counter is the largest
|
|
unsigned 64-bit value minus 1 (i.e., 0xfffffffffffffffe).
|
|
If the addition would cause the counter's value to exceed
|
|
the maximum, then the
|
|
.BR write (2)
|
|
either blocks until a
|
|
.BR read (2)
|
|
is performed on the file descriptor,
|
|
or fails with the error
|
|
.B EAGAIN
|
|
if the file descriptor has been made nonblocking.
|
|
.IP
|
|
A
|
|
.BR write (2)
|
|
will fail with the error
|
|
.B EINVAL
|
|
if the size of the supplied buffer is less than 8 bytes,
|
|
or if an attempt is made to write the value 0xffffffffffffffff.
|
|
.TP
|
|
.BR poll "(2), " select "(2) (and similar)"
|
|
The returned file descriptor supports
|
|
.BR poll (2)
|
|
(and analogously
|
|
.BR epoll (7))
|
|
and
|
|
.BR select (2),
|
|
as follows:
|
|
.RS
|
|
.IP * 3
|
|
The file descriptor is readable
|
|
(the
|
|
.BR select (2)
|
|
.I readfds
|
|
argument; the
|
|
.BR poll (2)
|
|
.B POLLIN
|
|
flag)
|
|
if the counter has a value greater than 0.
|
|
.IP *
|
|
The file descriptor is writable
|
|
(the
|
|
.BR select (2)
|
|
.I writefds
|
|
argument; the
|
|
.BR poll (2)
|
|
.B POLLOUT
|
|
flag)
|
|
if it is possible to write a value of at least "1" without blocking.
|
|
.IP *
|
|
If an overflow of the counter value was detected,
|
|
then
|
|
.BR select (2)
|
|
indicates the file descriptor as being both readable and writable, and
|
|
.BR poll (2)
|
|
returns a
|
|
.B POLLERR
|
|
event.
|
|
As noted above,
|
|
.BR write (2)
|
|
can never overflow the counter.
|
|
However an overflow can occur if 2^64
|
|
eventfd "signal posts" were performed by the KAIO
|
|
subsystem (theoretically possible, but practically unlikely).
|
|
If an overflow has occurred, then
|
|
.BR read (2)
|
|
will return that maximum
|
|
.I uint64_t
|
|
value (i.e., 0xffffffffffffffff).
|
|
.RE
|
|
.IP
|
|
The eventfd file descriptor also supports the other file-descriptor
|
|
multiplexing APIs:
|
|
.BR pselect (2),
|
|
.BR ppoll (2),
|
|
and
|
|
.BR epoll (7).
|
|
.TP
|
|
.BR close (2)
|
|
When the file descriptor is no longer required it should be closed.
|
|
When all file descriptors associated with the same eventfd object
|
|
have been closed, the resources for object are freed by the kernel.
|
|
.PP
|
|
A copy of the file descriptor created by
|
|
.BR eventfd ()
|
|
is inherited by the child produced by
|
|
.BR fork (2).
|
|
The duplicate file descriptor is associated with the same
|
|
eventfd object.
|
|
File descriptors created by
|
|
.BR eventfd ()
|
|
are preserved across
|
|
.BR execve (2).
|
|
.SH "RETURN VALUE"
|
|
On success,
|
|
.BR eventfd ()
|
|
returns a new eventfd file descriptor.
|
|
On error, \-1 is returned and
|
|
.I errno
|
|
is set to indicate the error.
|
|
.SH ERRORS
|
|
.TP
|
|
.B EINVAL
|
|
.I flags
|
|
is invalid;
|
|
or, in Linux 2.6.26 or earlier,
|
|
.I flags
|
|
is nonzero.
|
|
.TP
|
|
.B EMFILE
|
|
The per-process limit on open file descriptors has been reached.
|
|
.TP
|
|
.B ENFILE
|
|
The system-wide limit on the total number of open files has been
|
|
reached.
|
|
.TP
|
|
.B ENODEV
|
|
.\" Note from Davide:
|
|
.\" The ENODEV error is basically never going to happen if
|
|
.\" the kernel boots correctly. That error happen only if during
|
|
.\" the kernel initialization, some error occur in the anonymous
|
|
.\" inode source initialization.
|
|
Could not mount (internal) anonymous inode device.
|
|
.TP
|
|
.B ENOMEM
|
|
There was insufficient memory to create a new
|
|
eventfd file descriptor.
|
|
.SH VERSIONS
|
|
.BR eventfd ()
|
|
is available on Linux since kernel 2.6.22.
|
|
Working support is provided in glibc since version 2.8.
|
|
.\" eventfd() is in glibc 2.7, but reportedly does not build
|
|
The
|
|
.BR eventfd2 ()
|
|
system call (see NOTES) is available on Linux since kernel 2.6.27.
|
|
Since version 2.9, the glibc
|
|
.BR eventfd ()
|
|
wrapper will employ the
|
|
.BR eventfd2 ()
|
|
system call, if it is supported by the kernel.
|
|
.SH CONFORMING TO
|
|
.BR eventfd ()
|
|
and
|
|
.BR eventfd2 ()
|
|
are Linux-specific.
|
|
.SH NOTES
|
|
Applications can use an eventfd file descriptor instead of a pipe (see
|
|
.BR pipe (2))
|
|
in all cases where a pipe is used simply to signal events.
|
|
The kernel overhead of an eventfd file descriptor
|
|
is much lower than that of a pipe,
|
|
and only one file descriptor is
|
|
required (versus the two required for a pipe).
|
|
|
|
When used in the kernel, an eventfd
|
|
file descriptor can provide a kernel-userspace bridge allowing,
|
|
for example, functionalities like KAIO (kernel AIO)
|
|
.\" or eventually syslets/threadlets
|
|
to signal to a file descriptor that some operation is complete.
|
|
|
|
A key point about an eventfd file descriptor is that it can be
|
|
monitored just like any other file descriptor using
|
|
.BR select (2),
|
|
.BR poll (2),
|
|
or
|
|
.BR epoll (7).
|
|
This means that an application can simultaneously monitor the
|
|
readiness of "traditional" files and the readiness of other
|
|
kernel mechanisms that support the eventfd interface.
|
|
(Without the
|
|
.BR eventfd ()
|
|
interface, these mechanisms could not be multiplexed via
|
|
.BR select (2),
|
|
.BR poll (2),
|
|
or
|
|
.BR epoll (7).)
|
|
.SS Underlying Linux system calls
|
|
There are two underlying Linux system calls:
|
|
.BR eventfd ()
|
|
and the more recent
|
|
.BR eventfd2 ().
|
|
The former system call does not implement a
|
|
.I flags
|
|
argument.
|
|
The latter system call implements the
|
|
.I flags
|
|
values described above.
|
|
The glibc wrapper function will use
|
|
.BR eventfd2 ()
|
|
where it is available.
|
|
.SS Additional glibc features
|
|
The GNU C library defines an additional type,
|
|
and two functions that attempt to abstract some of the details of
|
|
reading and writing on an eventfd file descriptor:
|
|
.in +4n
|
|
.nf
|
|
|
|
typedef uint64_t eventfd_t;
|
|
|
|
int eventfd_read(int fd, eventfd_t *value);
|
|
int eventfd_write(int fd, eventfd_t value);
|
|
.fi
|
|
.in
|
|
|
|
The functions perform the read and write operations on an
|
|
eventfd file descriptor,
|
|
returning 0 if the correct number of bytes was transferred,
|
|
or \-1 otherwise.
|
|
.SH EXAMPLE
|
|
.PP
|
|
The following program creates an eventfd file descriptor
|
|
and then forks to create a child process.
|
|
While the parent briefly sleeps,
|
|
the child writes each of the integers supplied in the program's
|
|
command-line arguments to the eventfd file descriptor.
|
|
When the parent has finished sleeping,
|
|
it reads from the eventfd file descriptor.
|
|
|
|
The following shell session shows a sample run of the program:
|
|
.in +4n
|
|
.nf
|
|
|
|
.RB "$" " ./a.out 1 2 4 7 14"
|
|
Child writing 1 to efd
|
|
Child writing 2 to efd
|
|
Child writing 4 to efd
|
|
Child writing 7 to efd
|
|
Child writing 14 to efd
|
|
Child completed write loop
|
|
Parent about to read
|
|
Parent read 28 (0x1c) from efd
|
|
.fi
|
|
.in
|
|
.SS Program source
|
|
\&
|
|
.nf
|
|
#include <sys/eventfd.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <stdint.h> /* Definition of uint64_t */
|
|
|
|
#define handle_error(msg) \\
|
|
do { perror(msg); exit(EXIT_FAILURE); } while (0)
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
int efd, j;
|
|
uint64_t u;
|
|
ssize_t s;
|
|
|
|
if (argc < 2) {
|
|
fprintf(stderr, "Usage: %s <num>...\\n", argv[0]);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
efd = eventfd(0, 0);
|
|
if (efd == \-1)
|
|
handle_error("eventfd");
|
|
|
|
switch (fork()) {
|
|
case 0:
|
|
for (j = 1; j < argc; j++) {
|
|
printf("Child writing %s to efd\\n", argv[j]);
|
|
u = strtoull(argv[j], NULL, 0);
|
|
/* strtoull() allows various bases */
|
|
s = write(efd, &u, sizeof(uint64_t));
|
|
if (s != sizeof(uint64_t))
|
|
handle_error("write");
|
|
}
|
|
printf("Child completed write loop\\n");
|
|
|
|
exit(EXIT_SUCCESS);
|
|
|
|
default:
|
|
sleep(2);
|
|
|
|
printf("Parent about to read\\n");
|
|
s = read(efd, &u, sizeof(uint64_t));
|
|
if (s != sizeof(uint64_t))
|
|
handle_error("read");
|
|
printf("Parent read %llu (0x%llx) from efd\\n",
|
|
(unsigned long long) u, (unsigned long long) u);
|
|
exit(EXIT_SUCCESS);
|
|
|
|
case \-1:
|
|
handle_error("fork");
|
|
}
|
|
}
|
|
.fi
|
|
.SH "SEE ALSO"
|
|
.BR futex (2),
|
|
.BR pipe (2),
|
|
.BR poll (2),
|
|
.BR read (2),
|
|
.BR select (2),
|
|
.BR signalfd (2),
|
|
.BR timerfd_create (2),
|
|
.BR write (2),
|
|
.BR epoll (7),
|
|
.BR sem_overview (7)
|