mirror of https://github.com/mkerrisk/man-pages
868 lines
23 KiB
Groff
868 lines
23 KiB
Groff
.\" This manpage is copyright (C) 2001 Paul Sheer.
|
|
.\"
|
|
.\" Permission is granted to make and distribute verbatim copies of this
|
|
.\" manual provided the copyright notice and this permission notice are
|
|
.\" preserved on all copies.
|
|
.\"
|
|
.\" Permission is granted to copy and distribute modified versions of this
|
|
.\" manual under the conditions for verbatim copying, provided that the
|
|
.\" entire resulting derived work is distributed under the terms of a
|
|
.\" permission notice identical to this one.
|
|
.\"
|
|
.\" Since the Linux kernel and libraries are constantly changing, this
|
|
.\" manual page may be incorrect or out-of-date. The author(s) assume no
|
|
.\" responsibility for errors or omissions, or for damages resulting from
|
|
.\" the use of the information contained herein. The author(s) may not
|
|
.\" have taken the same level of care in the production of this manual,
|
|
.\" which is licensed free of charge, as they might when working
|
|
.\" professionally.
|
|
.\"
|
|
.\" Formatted or processed versions of this manual, if unaccompanied by
|
|
.\" the source, must acknowledge the copyright and authors of this work.
|
|
.\"
|
|
.\" very minor changes, aeb
|
|
.\"
|
|
.\" Modified 5 June 2002, Michael Kerrisk <mtk.manpages@gmail.com>
|
|
.\" 2006-05-13, mtk, removed much material that is redundant with select.2
|
|
.\" various other changes
|
|
.\"
|
|
.TH SELECT_TUT 2 2007-12-18 "Linux" "Linux Programmer's Manual"
|
|
.SH NAME
|
|
select, pselect, FD_CLR, FD_ISSET, FD_SET, FD_ZERO \-
|
|
synchronous I/O multiplexing
|
|
.SH SYNOPSIS
|
|
.nf
|
|
/* According to POSIX.1-2001 */
|
|
.br
|
|
.B #include <sys/select.h>
|
|
.sp
|
|
/* According to earlier standards */
|
|
.br
|
|
.B #include <sys/time.h>
|
|
.br
|
|
.B #include <sys/types.h>
|
|
.br
|
|
.B #include <unistd.h>
|
|
.sp
|
|
.BI "int select(int " nfds ", fd_set *" readfds ", fd_set *" writefds ,
|
|
.BI " fd_set *" exceptfds ", struct timeval *" utimeout );
|
|
.sp
|
|
.BI "void FD_CLR(int " fd ", fd_set *" set );
|
|
.br
|
|
.BI "int FD_ISSET(int " fd ", fd_set *" set );
|
|
.br
|
|
.BI "void FD_SET(int " fd ", fd_set *" set );
|
|
.br
|
|
.BI "void FD_ZERO(fd_set *" set );
|
|
.sp
|
|
.B #include <sys/select.h>
|
|
.sp
|
|
.BI "int pselect(int " nfds ", fd_set *" readfds ", fd_set *" writefds ,
|
|
.BI " fd_set *" exceptfds ", const struct timespec *" ntimeout ,
|
|
.BI " const sigset_t *" sigmask );
|
|
.fi
|
|
.sp
|
|
.in -4n
|
|
Feature Test Macro Requirements for glibc (see
|
|
.BR feature_test_macros (7)):
|
|
.in
|
|
.sp
|
|
.BR pselect ():
|
|
_POSIX_C_SOURCE\ >=\ 200112L || _XOPEN_SOURCE\ >=\ 600
|
|
.SH DESCRIPTION
|
|
.BR select ()
|
|
(or
|
|
.BR pselect ())
|
|
is the pivot function of
|
|
most C programs that
|
|
handle more than one simultaneous file descriptor (or socket handle)
|
|
in an efficient
|
|
manner.
|
|
Its principal arguments are three arrays of file descriptors:
|
|
\fIreadfds\fP, \fIwritefds\fP, and \fIexceptfds\fP.
|
|
The way that
|
|
.BR select ()
|
|
is usually used is to block while waiting for a "change of
|
|
status" on one or more of the file descriptors.
|
|
A "change of status" is
|
|
when more characters become available from the file descriptor, \fIor\fP
|
|
when space becomes available within the kernel's internal buffers for
|
|
more to be written to the file descriptor, \fIor\fP when a file
|
|
descriptor goes into error (in the case of a socket or pipe this is
|
|
when the other end of the connection is closed).
|
|
|
|
In summary,
|
|
.BR select ()
|
|
just watches multiple file descriptors,
|
|
and is the standard Unix call to do so.
|
|
|
|
The arrays of file descriptors are called \fIfile descriptor sets\fP.
|
|
Each set is declared as type \fBfd_set\fP, and its contents can be
|
|
altered with the macros
|
|
.BR FD_CLR (),
|
|
.BR FD_ISSET (),
|
|
.BR FD_SET (),
|
|
and
|
|
.BR FD_ZERO ().
|
|
.BR FD_ZERO ()
|
|
is usually the first function to be used on
|
|
a newly declared set.
|
|
Thereafter, the individual file descriptors that
|
|
you are interested in can be added one by one with
|
|
.BR FD_SET ().
|
|
.BR select ()
|
|
modifies the contents of the sets according to the rules
|
|
described below; after calling
|
|
.BR select ()
|
|
you can test if your file
|
|
descriptor is still present in the set with the
|
|
.BR FD_ISSET ()
|
|
macro.
|
|
.BR FD_ISSET ()
|
|
returns non-zero if the descriptor is present and zero if
|
|
it is not.
|
|
.BR FD_CLR ()
|
|
removes a file descriptor from the set.
|
|
.SS Arguments
|
|
.TP
|
|
\fIreadfds\fP
|
|
This set is watched to see if data is available for reading from any of
|
|
its file descriptors.
|
|
After
|
|
.BR select ()
|
|
has returned, \fIreadfds\fP will be
|
|
cleared of all file descriptors except for those that
|
|
are immediately available for reading with a
|
|
.BR recv (2)
|
|
(for sockets) or
|
|
.BR read (2)
|
|
(for pipes, files, and sockets) call.
|
|
.TP
|
|
\fIwritefds\fP
|
|
This set is watched to see if there is space to write data to any of
|
|
its file descriptors.
|
|
After
|
|
.BR select ()
|
|
has returned, \fIwritefds\fP will be
|
|
cleared of all file descriptors except for those that
|
|
are immediately available for writing with a
|
|
.BR send (2)
|
|
(for sockets) or
|
|
.BR write (2)
|
|
(for pipes, files, and sockets) call.
|
|
.TP
|
|
\fIexceptfds\fP
|
|
This set is watched for exceptions or errors on any of the file
|
|
descriptors.
|
|
However, that is actually just a rumor.
|
|
How you use
|
|
\fIexceptfds\fP is to watch for \fIout-of-band\fP (OOB) data.
|
|
OOB data
|
|
is data sent on a socket using the \fBMSG_OOB\fP flag, and hence
|
|
\fIexceptfds\fP only really applies to sockets.
|
|
See
|
|
.BR recv (2)
|
|
and
|
|
.BR send (2)
|
|
about this.
|
|
After
|
|
.BR select ()
|
|
has returned,
|
|
\fIexceptfds\fP will be cleared of all file descriptors except for those
|
|
that are available for reading OOB data.
|
|
You can only ever
|
|
read one byte of OOB data though (which is done with
|
|
.BR recv (2)),
|
|
and
|
|
writing OOB data (done with
|
|
.BR send (2))
|
|
can be done at any time and will
|
|
not block.
|
|
Hence there is no need for a fourth set to check if a socket
|
|
is available for writing OOB data.
|
|
.TP
|
|
\fInfds\fP
|
|
This is an integer one more than the maximum of any file descriptor in
|
|
any of the sets.
|
|
In other words, while you are busy adding file descriptors
|
|
to your sets, you must calculate the maximum integer value of all of
|
|
them, then increment this value by one, and then pass this as \fInfds\fP to
|
|
.BR select ().
|
|
.TP
|
|
\fIutimeout\fP
|
|
This is the longest time
|
|
.BR select ()
|
|
may wait before returning, even
|
|
if nothing interesting happened.
|
|
If this value is passed as NULL,
|
|
then
|
|
.BR select ()
|
|
blocks indefinitely waiting for an event.
|
|
\fIutimeout\fP can be set to zero seconds, which causes
|
|
.BR select ()
|
|
to
|
|
return immediately.
|
|
The structure \fIstruct timeval\fP is defined as:
|
|
.IP
|
|
.in +4n
|
|
.nf
|
|
struct timeval {
|
|
time_t tv_sec; /* seconds */
|
|
long tv_usec; /* microseconds */
|
|
};
|
|
.fi
|
|
.in
|
|
.TP
|
|
\fIntimeout\fP
|
|
This argument has the same meaning as \fIutimeout\fP but \fIstruct timespec\fP
|
|
has nanosecond precision as follows:
|
|
.IP
|
|
.in +4n
|
|
.nf
|
|
struct timespec {
|
|
long tv_sec; /* seconds */
|
|
long tv_nsec; /* nanoseconds */
|
|
};
|
|
.fi
|
|
.in
|
|
.TP
|
|
\fIsigmask\fP
|
|
This argument holds a set of signals to allow while performing a
|
|
.BR pselect ()
|
|
call (see
|
|
.BR sigaddset (3)
|
|
and
|
|
.BR sigprocmask (2)).
|
|
It can be passed
|
|
as NULL, in which case it does not modify the set of allowed signals on
|
|
entry and exit to the function.
|
|
It will then behave just like
|
|
.BR select ().
|
|
.SS Combining Signal and Data Events
|
|
.BR pselect ()
|
|
must be used if you are waiting for a signal as well as
|
|
data from a file descriptor.
|
|
Programs that receive signals as events
|
|
normally use the signal handler only to raise a global flag.
|
|
The global
|
|
flag will indicate that the event must be processed in the main loop of
|
|
the program.
|
|
A signal will cause the
|
|
.BR select ()
|
|
(or
|
|
.BR pselect ())
|
|
call to return with \fIerrno\fP set to \fBEINTR\fP.
|
|
This behavior is
|
|
essential so that signals can be processed in the main loop of the
|
|
program, otherwise
|
|
.BR select ()
|
|
would block indefinitely.
|
|
Now, somewhere
|
|
in the main loop will be a conditional to check the global flag.
|
|
So we
|
|
must ask: what if a signal arrives after the conditional, but before the
|
|
.BR select ()
|
|
call?
|
|
The answer is that
|
|
.BR select ()
|
|
would block
|
|
indefinitely, even though an event is actually pending.
|
|
This race
|
|
condition is solved by the
|
|
.BR pselect ()
|
|
call.
|
|
This call can be used to
|
|
mask out signals that are not to be received except within the
|
|
.BR pselect ()
|
|
call.
|
|
For instance, let us say that the event in question
|
|
was the exit of a child process.
|
|
Before the start of the main loop, we
|
|
would block \fBSIGCHLD\fP using
|
|
.BR sigprocmask (2).
|
|
Our
|
|
.BR pselect ()
|
|
call would enable \fBSIGCHLD\fP by using the virgin signal mask.
|
|
Our
|
|
program would look like:
|
|
.PP
|
|
.nf
|
|
int child_events = 0;
|
|
|
|
void
|
|
child_sig_handler(int x)
|
|
{
|
|
child_events++;
|
|
signal(SIGCHLD, child_sig_handler);
|
|
}
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
sigset_t sigmask, orig_sigmask;
|
|
|
|
sigemptyset(&sigmask);
|
|
sigaddset(&sigmask, SIGCHLD);
|
|
sigprocmask(SIG_BLOCK, &sigmask, &orig_sigmask);
|
|
|
|
signal(SIGCHLD, child_sig_handler);
|
|
|
|
for (;;) { /* main loop */
|
|
for (; child_events > 0; child_events\-\-) {
|
|
/* do event work here */
|
|
}
|
|
r = pselect(nfds, &rd, &wr, &er, 0, &orig_sigmask);
|
|
|
|
/* main body of program */
|
|
}
|
|
}
|
|
.fi
|
|
.SS Practical
|
|
So what is the point of
|
|
.BR select ()?
|
|
Can't I just read and write to my
|
|
descriptors whenever I want?
|
|
The point of
|
|
.BR select ()
|
|
is that it watches
|
|
multiple descriptors at the same time and properly puts the process to
|
|
sleep if there is no activity.
|
|
It does this while enabling you to handle
|
|
multiple simultaneous pipes and sockets.
|
|
Unix programmers often find
|
|
themselves in a position where they have to handle I/O from more than one
|
|
file descriptor where the data flow may be intermittent.
|
|
If you were to
|
|
merely create a sequence of
|
|
.BR read (2)
|
|
and
|
|
.BR write (2)
|
|
calls, you would
|
|
find that one of your calls may block waiting for data from/to a file
|
|
descriptor, while another file descriptor is unused though available
|
|
for data.
|
|
.BR select ()
|
|
efficiently copes with this situation.
|
|
|
|
A simple example of the use of
|
|
.BR select ()
|
|
can be found in the
|
|
.BR select (2)
|
|
manual page.
|
|
.SS Select Law
|
|
Many people who try to use
|
|
.BR select ()
|
|
come across behavior that is
|
|
difficult to understand and produces non-portable or borderline
|
|
results.
|
|
For instance, the above program is carefully written not to
|
|
block at any point, even though it does not set its file descriptors to
|
|
non-blocking mode at all (see
|
|
.BR ioctl (2)).
|
|
It is easy to introduce
|
|
subtle errors that will remove the advantage of using
|
|
.BR select (),
|
|
hence I will present a list of essentials to watch for when using the
|
|
.BR select ()
|
|
call.
|
|
.TP 4
|
|
1.
|
|
You should always try to use
|
|
.BR select ()
|
|
without a timeout.
|
|
Your program
|
|
should have nothing to do if there is no data available.
|
|
Code that
|
|
depends on timeouts is not usually portable and is difficult to debug.
|
|
.TP
|
|
2.
|
|
The value \fInfds\fP must be properly calculated for efficiency as
|
|
explained above.
|
|
.TP
|
|
3.
|
|
No file descriptor must be added to any set if you do not intend
|
|
to check its result after the
|
|
.BR select ()
|
|
call, and respond
|
|
appropriately.
|
|
See next rule.
|
|
.TP
|
|
4.
|
|
After
|
|
.BR select ()
|
|
returns, all file descriptors in all sets
|
|
should be checked to see if they are ready.
|
|
.\" mtk, May 2006: the following isn't really true.
|
|
.\" Any file descriptor that is available
|
|
.\" for writing \fImust\fP be written to, and any file descriptor
|
|
.\" available for reading \fImust\fP be read, etc.
|
|
.TP
|
|
5.
|
|
The functions
|
|
.BR read (2),
|
|
.BR recv (2),
|
|
.BR write (2),
|
|
and
|
|
.BR send (2)
|
|
do \fInot\fP necessarily read/write the full amount of data
|
|
that you have requested.
|
|
If they do read/write the full amount, it's
|
|
because you have a low traffic load and a fast stream.
|
|
This is not
|
|
always going to be the case.
|
|
You should cope with the case of your
|
|
functions only managing to send or receive a single byte.
|
|
.TP
|
|
6.
|
|
Never read/write only in single bytes at a time unless you are really
|
|
sure that you have a small amount of data to process.
|
|
It is extremely
|
|
inefficient not to read/write as much data as you can buffer each time.
|
|
The buffers in the example above are 1024 bytes although they could
|
|
easily be made larger.
|
|
.TP
|
|
7.
|
|
The functions
|
|
.BR read (2),
|
|
.BR recv (2),
|
|
.BR write (2),
|
|
and
|
|
.BR send (2)
|
|
as well as the
|
|
.BR select ()
|
|
call can return \-1 with
|
|
.I errno
|
|
set to \fBEINTR\fP,
|
|
or with
|
|
.I errno
|
|
set to \fBEAGAIN\fP (\fBEWOULDBLOCK\fP).
|
|
These results must be properly managed (not done properly
|
|
above).
|
|
If your program is not going to receive any signals, then
|
|
it is unlikely you will get \fBEINTR\fP.
|
|
If your program does not
|
|
set non-blocking I/O, you will not get \fBEAGAIN\fP.
|
|
Nonetheless
|
|
you should still cope with these errors for completeness.
|
|
.TP
|
|
8.
|
|
Never call
|
|
.BR read (2),
|
|
.BR recv (2),
|
|
.BR write (2),
|
|
or
|
|
.BR send (2)
|
|
with a buffer length of zero.
|
|
.TP
|
|
9.
|
|
If the functions
|
|
.BR read (2),
|
|
.BR recv (2),
|
|
.BR write (2),
|
|
and
|
|
.BR send (2)
|
|
fail
|
|
with errors other than those listed in \fB7.\fP,
|
|
or one of the input functions returns 0, indicating end of file,
|
|
then you should \fInot\fP pass that descriptor to
|
|
.BR select ()
|
|
again.
|
|
In the above example,
|
|
I close the descriptor immediately, and then set it to \-1
|
|
to prevent it being included in a set.
|
|
.TP
|
|
10.
|
|
The timeout value must be initialized with each new call to
|
|
.BR select (),
|
|
since some operating systems modify the structure.
|
|
.BR pselect ()
|
|
however does not modify its timeout structure.
|
|
.TP
|
|
11.
|
|
I have heard that the Windows socket layer does not cope with OOB data
|
|
properly.
|
|
It also does not cope with
|
|
.BR select ()
|
|
calls when no file
|
|
descriptors are set at all.
|
|
Having no file descriptors set is a useful
|
|
way to sleep the process with sub-second precision by using the timeout.
|
|
(See further on.)
|
|
.SS Usleep Emulation
|
|
On systems that do not have a
|
|
.BR usleep (3)
|
|
function, you can call
|
|
.BR select ()
|
|
with a finite timeout and no file descriptors as
|
|
follows:
|
|
.PP
|
|
.nf
|
|
struct timeval tv;
|
|
tv.tv_sec = 0;
|
|
tv.tv_usec = 200000; /* 0.2 seconds */
|
|
select(0, NULL, NULL, NULL, &tv);
|
|
.fi
|
|
.PP
|
|
This is only guaranteed to work on Unix systems, however.
|
|
.SH RETURN VALUE
|
|
On success,
|
|
.BR select ()
|
|
returns the total number of file descriptors
|
|
still present in the file descriptor sets.
|
|
|
|
If
|
|
.BR select ()
|
|
timed out, then
|
|
the return value will be zero.
|
|
The file descriptors set should be all
|
|
empty (but may not be on some systems).
|
|
|
|
A return value of \-1 indicates an error, with \fIerrno\fP being
|
|
set appropriately.
|
|
In the case of an error, the contents of the returned sets and
|
|
the \fIstruct timeout\fP contents are undefined and should not be used.
|
|
.BR pselect ()
|
|
however never modifies \fIntimeout\fP.
|
|
.SH NOTES
|
|
Generally speaking, all operating systems that support sockets, also
|
|
support
|
|
.BR select ().
|
|
Many types of programs become
|
|
extremely complicated without the use of
|
|
.BR select ().
|
|
.BR select ()
|
|
can be used to solve
|
|
many problems in a portable and efficient way that naive programmers try
|
|
to solve in a more complicated manner using
|
|
threads, forking, IPCs, signals, memory sharing, and so on.
|
|
.PP
|
|
The
|
|
.BR poll (2)
|
|
system call has the same functionality as
|
|
.BR select (),
|
|
and is somewhat more efficient when monitoring sparse
|
|
file descriptor sets.
|
|
It is nowadays widely available,
|
|
but historically was less portable than
|
|
.BR select ().
|
|
.PP
|
|
The Linux-specific
|
|
.BR epoll (7)
|
|
API provides an interface that is more efficient than
|
|
.BR select (2)
|
|
and
|
|
.BR poll (2)
|
|
when monitoring large numbers of file descriptors.
|
|
.SH EXAMPLE
|
|
Here is an example that better demonstrates the true utility of
|
|
.BR select ().
|
|
The listing below is a TCP forwarding program that forwards
|
|
from one TCP port to another.
|
|
.PP
|
|
.nf
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <sys/time.h>
|
|
#include <sys/types.h>
|
|
#include <string.h>
|
|
#include <signal.h>
|
|
#include <sys/socket.h>
|
|
#include <netinet/in.h>
|
|
#include <arpa/inet.h>
|
|
#include <errno.h>
|
|
|
|
static int forward_port;
|
|
|
|
#undef max
|
|
#define max(x,y) ((x) > (y) ? (x) : (y))
|
|
|
|
static int
|
|
listen_socket(int listen_port)
|
|
{
|
|
struct sockaddr_in a;
|
|
int s;
|
|
int yes;
|
|
|
|
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
|
perror("socket");
|
|
return \-1;
|
|
}
|
|
yes = 1;
|
|
if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
|
|
(char *) &yes, sizeof(yes)) < 0) {
|
|
perror("setsockopt");
|
|
close(s);
|
|
return \-1;
|
|
}
|
|
memset(&a, 0, sizeof(a));
|
|
a.sin_port = htons(listen_port);
|
|
a.sin_family = AF_INET;
|
|
if (bind(s, (struct sockaddr *) &a, sizeof(a)) < 0) {
|
|
perror("bind");
|
|
close(s);
|
|
return \-1;
|
|
}
|
|
printf("accepting connections on port %d\\n", listen_port);
|
|
listen(s, 10);
|
|
return s;
|
|
}
|
|
|
|
static int
|
|
connect_socket(int connect_port, char *address)
|
|
{
|
|
struct sockaddr_in a;
|
|
int s;
|
|
|
|
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
|
perror("socket");
|
|
close(s);
|
|
return \-1;
|
|
}
|
|
|
|
memset(&a, 0, sizeof(a));
|
|
a.sin_port = htons(connect_port);
|
|
a.sin_family = AF_INET;
|
|
|
|
if (!inet_aton(address, (struct in_addr *) &a.sin_addr.s_addr)) {
|
|
perror("bad IP address format");
|
|
close(s);
|
|
return \-1;
|
|
}
|
|
|
|
if (connect(s, (struct sockaddr *) &a, sizeof(a)) < 0) {
|
|
perror("connect()");
|
|
shutdown(s, SHUT_RDWR);
|
|
close(s);
|
|
return \-1;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
#define SHUT_FD1 { \\
|
|
if (fd1 >= 0) { \\
|
|
shutdown(fd1, SHUT_RDWR); \\
|
|
close(fd1); \\
|
|
fd1 = \-1; \\
|
|
} \\
|
|
}
|
|
|
|
#define SHUT_FD2 { \\
|
|
if (fd2 >= 0) { \\
|
|
shutdown(fd2, SHUT_RDWR); \\
|
|
close(fd2); \\
|
|
fd2 = \-1; \\
|
|
} \\
|
|
}
|
|
|
|
#define BUF_SIZE 1024
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
int h;
|
|
int fd1 = \-1, fd2 = \-1;
|
|
char buf1[BUF_SIZE], buf2[BUF_SIZE];
|
|
int buf1_avail, buf1_written;
|
|
int buf2_avail, buf2_written;
|
|
|
|
if (argc != 4) {
|
|
fprintf(stderr,
|
|
"Usage\\n\\tfwd <listen-port> "
|
|
"<forward-to-port> <forward-to-ip-address>\\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
signal(SIGPIPE, SIG_IGN);
|
|
|
|
forward_port = atoi(argv[2]);
|
|
|
|
h = listen_socket(atoi(argv[1]));
|
|
if (h < 0)
|
|
exit(EXIT_FAILURE);
|
|
|
|
for (;;) {
|
|
int r, nfds = 0;
|
|
fd_set rd, wr, er;
|
|
FD_ZERO(&rd);
|
|
FD_ZERO(&wr);
|
|
FD_ZERO(&er);
|
|
FD_SET(h, &rd);
|
|
nfds = max(nfds, h);
|
|
if (fd1 > 0 && buf1_avail < BUF_SIZE) {
|
|
FD_SET(fd1, &rd);
|
|
nfds = max(nfds, fd1);
|
|
}
|
|
if (fd2 > 0 && buf2_avail < BUF_SIZE) {
|
|
FD_SET(fd2, &rd);
|
|
nfds = max(nfds, fd2);
|
|
}
|
|
if (fd1 > 0
|
|
&& buf2_avail \- buf2_written > 0) {
|
|
FD_SET(fd1, &wr);
|
|
nfds = max(nfds, fd1);
|
|
}
|
|
if (fd2 > 0
|
|
&& buf1_avail \- buf1_written > 0) {
|
|
FD_SET(fd2, &wr);
|
|
nfds = max(nfds, fd2);
|
|
}
|
|
if (fd1 > 0) {
|
|
FD_SET(fd1, &er);
|
|
nfds = max(nfds, fd1);
|
|
}
|
|
if (fd2 > 0) {
|
|
FD_SET(fd2, &er);
|
|
nfds = max(nfds, fd2);
|
|
}
|
|
|
|
r = select(nfds + 1, &rd, &wr, &er, NULL);
|
|
|
|
if (r == \-1 && errno == EINTR)
|
|
continue;
|
|
if (r < 0) {
|
|
perror("select()");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
if (FD_ISSET(h, &rd)) {
|
|
unsigned int l;
|
|
struct sockaddr_in client_address;
|
|
memset(&client_address, 0, l = sizeof(client_address));
|
|
r = accept(h, (struct sockaddr *) &client_address, &l);
|
|
if (r < 0) {
|
|
perror("accept()");
|
|
} else {
|
|
SHUT_FD1;
|
|
SHUT_FD2;
|
|
buf1_avail = buf1_written = 0;
|
|
buf2_avail = buf2_written = 0;
|
|
fd1 = r;
|
|
fd2 =
|
|
connect_socket(forward_port, argv[3]);
|
|
if (fd2 < 0) {
|
|
SHUT_FD1;
|
|
} else
|
|
printf("connect from %s\\n",
|
|
inet_ntoa(client_address.sin_addr));
|
|
}
|
|
}
|
|
/* NB: read oob data before normal reads */
|
|
if (fd1 > 0)
|
|
if (FD_ISSET(fd1, &er)) {
|
|
char c;
|
|
errno = 0;
|
|
r = recv(fd1, &c, 1, MSG_OOB);
|
|
if (r < 1) {
|
|
SHUT_FD1;
|
|
} else
|
|
send(fd2, &c, 1, MSG_OOB);
|
|
}
|
|
if (fd2 > 0)
|
|
if (FD_ISSET(fd2, &er)) {
|
|
char c;
|
|
errno = 0;
|
|
r = recv(fd2, &c, 1, MSG_OOB);
|
|
if (r < 1) {
|
|
SHUT_FD1;
|
|
} else
|
|
send(fd1, &c, 1, MSG_OOB);
|
|
}
|
|
if (fd1 > 0)
|
|
if (FD_ISSET(fd1, &rd)) {
|
|
r =
|
|
read(fd1, buf1 + buf1_avail,
|
|
BUF_SIZE \- buf1_avail);
|
|
if (r < 1) {
|
|
SHUT_FD1;
|
|
} else
|
|
buf1_avail += r;
|
|
}
|
|
if (fd2 > 0)
|
|
if (FD_ISSET(fd2, &rd)) {
|
|
r =
|
|
read(fd2, buf2 + buf2_avail,
|
|
BUF_SIZE \- buf2_avail);
|
|
if (r < 1) {
|
|
SHUT_FD2;
|
|
} else
|
|
buf2_avail += r;
|
|
}
|
|
if (fd1 > 0)
|
|
if (FD_ISSET(fd1, &wr)) {
|
|
r =
|
|
write(fd1, buf2 + buf2_written,
|
|
buf2_avail \- buf2_written);
|
|
if (r < 1) {
|
|
SHUT_FD1;
|
|
} else
|
|
buf2_written += r;
|
|
}
|
|
if (fd2 > 0)
|
|
if (FD_ISSET(fd2, &wr)) {
|
|
r =
|
|
write(fd2, buf1 + buf1_written,
|
|
buf1_avail \- buf1_written);
|
|
if (r < 1) {
|
|
SHUT_FD2;
|
|
} else
|
|
buf1_written += r;
|
|
}
|
|
/* check if write data has caught read data */
|
|
if (buf1_written == buf1_avail)
|
|
buf1_written = buf1_avail = 0;
|
|
if (buf2_written == buf2_avail)
|
|
buf2_written = buf2_avail = 0;
|
|
/* one side has closed the connection, keep
|
|
writing to the other side until empty */
|
|
if (fd1 < 0 && buf1_avail \- buf1_written == 0) {
|
|
SHUT_FD2;
|
|
}
|
|
if (fd2 < 0 && buf2_avail \- buf2_written == 0) {
|
|
SHUT_FD1;
|
|
}
|
|
}
|
|
exit(EXIT_SUCCESS);
|
|
}
|
|
.fi
|
|
.PP
|
|
The above program properly forwards most kinds of TCP connections
|
|
including OOB signal data transmitted by \fBtelnet\fP servers.
|
|
It
|
|
handles the tricky problem of having data flow in both directions
|
|
simultaneously.
|
|
You might think it more efficient to use a
|
|
.BR fork (2)
|
|
call and devote a thread to each stream.
|
|
This becomes more tricky than
|
|
you might suspect.
|
|
Another idea is to set non-blocking I/O using an
|
|
.BR ioctl (2)
|
|
call.
|
|
This also has its problems because you end up using
|
|
inefficient timeouts.
|
|
|
|
The program does not handle more than one simultaneous connection at a
|
|
time, although it could easily be extended to do this with a linked list
|
|
of buffers \(em one for each connection.
|
|
At the moment, new
|
|
connections cause the current connection to be dropped.
|
|
.SH SEE ALSO
|
|
.BR accept (2),
|
|
.BR connect (2),
|
|
.BR ioctl (2),
|
|
.BR poll (2),
|
|
.BR read (2),
|
|
.BR recv (2),
|
|
.BR select (2),
|
|
.BR send (2),
|
|
.BR sigprocmask (2),
|
|
.BR write (2),
|
|
.BR sigaddset (3),
|
|
.BR sigdelset (3),
|
|
.BR sigemptyset (3),
|
|
.BR sigfillset (3),
|
|
.BR sigismember (3),
|
|
.BR epoll (7)
|
|
.\" .SH AUTHORS
|
|
.\" This man page was written by Paul Sheer.
|