mirror of https://github.com/mkerrisk/man-pages
1438 lines
41 KiB
Groff
1438 lines
41 KiB
Groff
.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
|
|
.\"
|
|
.\" %%%LICENSE_START(VERBATIM_ONE_PARA)
|
|
.\" Permission is granted to distribute possibly modified copies
|
|
.\" of this page provided the header is included verbatim,
|
|
.\" and in case of nontrivial modification author and date
|
|
.\" of the modification is added to the header.
|
|
.\" %%%LICENSE_END
|
|
.\"
|
|
.\" $Id: ip.7,v 1.19 2000/12/20 18:10:31 ak Exp $
|
|
.\"
|
|
.\" FIXME The following socket options are yet to be documented
|
|
.\"
|
|
.\" IP_XFRM_POLICY (2.5.48)
|
|
.\" Needs CAP_NET_ADMIN
|
|
.\"
|
|
.\" IP_IPSEC_POLICY (2.5.47)
|
|
.\" Needs CAP_NET_ADMIN
|
|
.\"
|
|
.\" IP_MINTTL (2.6.34)
|
|
.\" commit d218d11133d888f9745802146a50255a4781d37a
|
|
.\" Author: Stephen Hemminger <shemminger@vyatta.com>
|
|
.\"
|
|
.\" MCAST_JOIN_GROUP (2.4.22 / 2.6)
|
|
.\"
|
|
.\" MCAST_BLOCK_SOURCE (2.4.22 / 2.6)
|
|
.\"
|
|
.\" MCAST_UNBLOCK_SOURCE (2.4.22 / 2.6)
|
|
.\"
|
|
.\" MCAST_LEAVE_GROUP (2.4.22 / 2.6)
|
|
.\"
|
|
.\" MCAST_JOIN_SOURCE_GROUP (2.4.22 / 2.6)
|
|
.\"
|
|
.\" MCAST_LEAVE_SOURCE_GROUP (2.4.22 / 2.6)
|
|
.\"
|
|
.\" MCAST_MSFILTER (2.4.22 / 2.6)
|
|
.\"
|
|
.\" IP_UNICAST_IF (3.4)
|
|
.\" commit 76e21053b5bf33a07c76f99d27a74238310e3c71
|
|
.\" Author: Erich E. Hoover <ehoover@mines.edu>
|
|
.\"
|
|
.TH IP 7 2020-11-01 "Linux" "Linux Programmer's Manual"
|
|
.SH NAME
|
|
ip \- Linux IPv4 protocol implementation
|
|
.SH SYNOPSIS
|
|
.nf
|
|
.B #include <sys/socket.h>
|
|
.\" .B #include <net/netinet.h> -- does not exist anymore
|
|
.\" .B #include <linux/errqueue.h> -- never include <linux/foo.h>
|
|
.B #include <netinet/in.h>
|
|
.B #include <netinet/ip.h> \fR/* superset of previous */
|
|
.PP
|
|
.IB tcp_socket " = socket(AF_INET, SOCK_STREAM, 0);"
|
|
.IB udp_socket " = socket(AF_INET, SOCK_DGRAM, 0);"
|
|
.IB raw_socket " = socket(AF_INET, SOCK_RAW, " protocol ");"
|
|
.fi
|
|
.SH DESCRIPTION
|
|
Linux implements the Internet Protocol, version 4,
|
|
described in RFC\ 791 and RFC\ 1122.
|
|
.B ip
|
|
contains a level 2 multicasting implementation conforming to RFC\ 1112.
|
|
It also contains an IP router including a packet filter.
|
|
.PP
|
|
The programming interface is BSD-sockets compatible.
|
|
For more information on sockets, see
|
|
.BR socket (7).
|
|
.PP
|
|
An IP socket is created using
|
|
.BR socket (2):
|
|
.PP
|
|
socket(AF_INET, socket_type, protocol);
|
|
.PP
|
|
Valid socket types include
|
|
.B SOCK_STREAM
|
|
to open a stream socket,
|
|
.B SOCK_DGRAM
|
|
to open a datagram socket, and
|
|
.B SOCK_RAW
|
|
to open a
|
|
.BR raw (7)
|
|
socket to access the IP protocol directly.
|
|
.PP
|
|
.I protocol
|
|
is the IP protocol in the IP header to be received or sent.
|
|
Valid values for
|
|
.I protocol
|
|
include:
|
|
.IP \(bu 2
|
|
0 and
|
|
.B IPPROTO_TCP
|
|
for
|
|
.BR tcp (7)
|
|
stream sockets;
|
|
.IP \(bu
|
|
0 and
|
|
.B IPPROTO_UDP
|
|
for
|
|
.BR udp (7)
|
|
datagram sockets;
|
|
.IP \(bu
|
|
.B IPPROTO_SCTP
|
|
for
|
|
.BR sctp (7)
|
|
stream sockets; and
|
|
.IP \(bu
|
|
.B IPPROTO_UDPLITE
|
|
for
|
|
.BR udplite (7)
|
|
datagram sockets.
|
|
.PP
|
|
For
|
|
.B SOCK_RAW
|
|
you may specify a valid IANA IP protocol defined in
|
|
RFC\ 1700 assigned numbers.
|
|
.PP
|
|
When a process wants to receive new incoming packets or connections, it
|
|
should bind a socket to a local interface address using
|
|
.BR bind (2).
|
|
In this case, only one IP socket may be bound to any given local
|
|
(address, port) pair.
|
|
When
|
|
.B INADDR_ANY
|
|
is specified in the bind call, the socket will be bound to
|
|
.I all
|
|
local interfaces.
|
|
When
|
|
.BR listen (2)
|
|
is called on an unbound socket, the socket is automatically bound
|
|
to a random free port with the local address set to
|
|
.BR INADDR_ANY .
|
|
When
|
|
.BR connect (2)
|
|
is called on an unbound socket, the socket is automatically bound
|
|
to a random free port or to a usable shared port with the local address
|
|
set to
|
|
.BR INADDR_ANY .
|
|
.PP
|
|
A TCP local socket address that has been bound is unavailable for
|
|
some time after closing, unless the
|
|
.B SO_REUSEADDR
|
|
flag has been set.
|
|
Care should be taken when using this flag as it makes TCP less reliable.
|
|
.SS Address format
|
|
An IP socket address is defined as a combination of an IP interface
|
|
address and a 16-bit port number.
|
|
The basic IP protocol does not supply port numbers, they
|
|
are implemented by higher level protocols like
|
|
.BR udp (7)
|
|
and
|
|
.BR tcp (7).
|
|
On raw sockets
|
|
.I sin_port
|
|
is set to the IP protocol.
|
|
.PP
|
|
.in +4n
|
|
.EX
|
|
struct sockaddr_in {
|
|
sa_family_t sin_family; /* address family: AF_INET */
|
|
in_port_t sin_port; /* port in network byte order */
|
|
struct in_addr sin_addr; /* internet address */
|
|
};
|
|
|
|
/* Internet address */
|
|
struct in_addr {
|
|
uint32_t s_addr; /* address in network byte order */
|
|
};
|
|
.EE
|
|
.in
|
|
.PP
|
|
.I sin_family
|
|
is always set to
|
|
.BR AF_INET .
|
|
This is required; in Linux 2.2 most networking functions return
|
|
.B EINVAL
|
|
when this setting is missing.
|
|
.I sin_port
|
|
contains the port in network byte order.
|
|
The port numbers below 1024 are called
|
|
.IR "privileged ports"
|
|
(or sometimes:
|
|
.IR "reserved ports" ).
|
|
Only a privileged process
|
|
(on Linux: a process that has the
|
|
.B CAP_NET_BIND_SERVICE
|
|
capability in the user namespace governing its network namespace) may
|
|
.BR bind (2)
|
|
to these sockets.
|
|
Note that the raw IPv4 protocol as such has no concept of a
|
|
port, they are implemented only by higher protocols like
|
|
.BR tcp (7)
|
|
and
|
|
.BR udp (7).
|
|
.PP
|
|
.I sin_addr
|
|
is the IP host address.
|
|
The
|
|
.I s_addr
|
|
member of
|
|
.I struct in_addr
|
|
contains the host interface address in network byte order.
|
|
.I in_addr
|
|
should be assigned one of the
|
|
.BR INADDR_*
|
|
values
|
|
(e.g.,
|
|
.BR INADDR_LOOPBACK )
|
|
using
|
|
.BR htonl (3)
|
|
or set using the
|
|
.BR inet_aton (3),
|
|
.BR inet_addr (3),
|
|
.BR inet_makeaddr (3)
|
|
library functions or directly with the name resolver (see
|
|
.BR gethostbyname (3)).
|
|
.PP
|
|
IPv4 addresses are divided into unicast, broadcast,
|
|
and multicast addresses.
|
|
Unicast addresses specify a single interface of a host,
|
|
broadcast addresses specify all hosts on a network, and multicast
|
|
addresses address all hosts in a multicast group.
|
|
Datagrams to broadcast addresses can be sent or received only when the
|
|
.B SO_BROADCAST
|
|
socket flag is set.
|
|
In the current implementation, connection-oriented sockets are allowed
|
|
to use only unicast addresses.
|
|
.\" Leave a loophole for XTP @)
|
|
.PP
|
|
Note that the address and the port are always stored in
|
|
network byte order.
|
|
In particular, this means that you need to call
|
|
.BR htons (3)
|
|
on the number that is assigned to a port.
|
|
All address/port manipulation
|
|
functions in the standard library work in network byte order.
|
|
.PP
|
|
There are several special addresses:
|
|
.B INADDR_LOOPBACK
|
|
(127.0.0.1)
|
|
always refers to the local host via the loopback device;
|
|
.B INADDR_ANY
|
|
(0.0.0.0)
|
|
means any address for binding;
|
|
.B INADDR_BROADCAST
|
|
(255.255.255.255)
|
|
means any host and has the same effect on bind as
|
|
.B INADDR_ANY
|
|
for historical reasons.
|
|
.SS Socket options
|
|
IP supports some protocol-specific socket options that can be set with
|
|
.BR setsockopt (2)
|
|
and read with
|
|
.BR getsockopt (2).
|
|
The socket option level for IP is
|
|
.BR IPPROTO_IP .
|
|
.\" or SOL_IP on Linux
|
|
A boolean integer flag is zero when it is false, otherwise true.
|
|
.PP
|
|
When an invalid socket option is specified,
|
|
.BR getsockopt (2)
|
|
and
|
|
.BR setsockopt (2)
|
|
fail with the error
|
|
.BR ENOPROTOOPT .
|
|
.TP
|
|
.BR IP_ADD_MEMBERSHIP " (since Linux 1.2)"
|
|
Join a multicast group.
|
|
Argument is an
|
|
.I ip_mreqn
|
|
structure.
|
|
.PP
|
|
.in +4n
|
|
.EX
|
|
struct ip_mreqn {
|
|
struct in_addr imr_multiaddr; /* IP multicast group
|
|
address */
|
|
struct in_addr imr_address; /* IP address of local
|
|
interface */
|
|
int imr_ifindex; /* interface index */
|
|
};
|
|
.EE
|
|
.in
|
|
.PP
|
|
.I imr_multiaddr
|
|
contains the address of the multicast group the application
|
|
wants to join or leave.
|
|
It must be a valid multicast address
|
|
.\" (i.e., within the 224.0.0.0-239.255.255.255 range)
|
|
(or
|
|
.BR setsockopt (2)
|
|
fails with the error
|
|
.BR EINVAL ).
|
|
.I imr_address
|
|
is the address of the local interface with which the system
|
|
should join the multicast group; if it is equal to
|
|
.BR INADDR_ANY ,
|
|
an appropriate interface is chosen by the system.
|
|
.I imr_ifindex
|
|
is the interface index of the interface that should join/leave the
|
|
.I imr_multiaddr
|
|
group, or 0 to indicate any interface.
|
|
.IP
|
|
The
|
|
.I ip_mreqn
|
|
structure is available only since Linux 2.2.
|
|
For compatibility, the old
|
|
.I ip_mreq
|
|
structure (present since Linux 1.2) is still supported;
|
|
it differs from
|
|
.I ip_mreqn
|
|
only by not including the
|
|
.I imr_ifindex
|
|
field.
|
|
(The kernel determines which structure is being passed based
|
|
on the size passed in
|
|
.IR optlen .)
|
|
.IP
|
|
.B IP_ADD_MEMBERSHIP
|
|
is valid only for
|
|
.BR setsockopt (2).
|
|
.\"
|
|
.TP
|
|
.BR IP_ADD_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / 2.5.68)"
|
|
Join a multicast group and allow receiving data only
|
|
from a specified source.
|
|
Argument is an
|
|
.I ip_mreq_source
|
|
structure.
|
|
.PP
|
|
.in +4n
|
|
.EX
|
|
struct ip_mreq_source {
|
|
struct in_addr imr_multiaddr; /* IP multicast group
|
|
address */
|
|
struct in_addr imr_interface; /* IP address of local
|
|
interface */
|
|
struct in_addr imr_sourceaddr; /* IP address of
|
|
multicast source */
|
|
};
|
|
.EE
|
|
.in
|
|
.PP
|
|
The
|
|
.I ip_mreq_source
|
|
structure is similar to
|
|
.I ip_mreqn
|
|
described under
|
|
.BR IP_ADD_MEMBERSHIP .
|
|
The
|
|
.I imr_multiaddr
|
|
field contains the address of the multicast group the application
|
|
wants to join or leave.
|
|
The
|
|
.I imr_interface
|
|
field is the address of the local interface with which
|
|
the system should join the multicast group.
|
|
Finally, the
|
|
.I imr_sourceaddr
|
|
field contains the address of the source the
|
|
application wants to receive data from.
|
|
.IP
|
|
This option can be used multiple times to allow
|
|
receiving data from more than one source.
|
|
.TP
|
|
.BR IP_BIND_ADDRESS_NO_PORT " (since Linux 4.2)"
|
|
.\" commit 90c337da1524863838658078ec34241f45d8394d
|
|
Inform the kernel to not reserve an ephemeral port when using
|
|
.BR bind (2)
|
|
with a port number of 0.
|
|
The port will later be automatically chosen at
|
|
.BR connect (2)
|
|
time,
|
|
in a way that allows sharing a source port as long as the 4-tuple is unique.
|
|
.TP
|
|
.BR IP_BLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
|
|
Stop receiving multicast data from a specific source in a given group.
|
|
This is valid only after the application has subscribed
|
|
to the multicast group using either
|
|
.BR IP_ADD_MEMBERSHIP
|
|
or
|
|
.BR IP_ADD_SOURCE_MEMBERSHIP .
|
|
.IP
|
|
Argument is an
|
|
.I ip_mreq_source
|
|
structure as described under
|
|
.BR IP_ADD_SOURCE_MEMBERSHIP .
|
|
.TP
|
|
.BR IP_DROP_MEMBERSHIP " (since Linux 1.2)"
|
|
Leave a multicast group.
|
|
Argument is an
|
|
.I ip_mreqn
|
|
or
|
|
.I ip_mreq
|
|
structure similar to
|
|
.BR IP_ADD_MEMBERSHIP .
|
|
.TP
|
|
.BR IP_DROP_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / 2.5.68)"
|
|
Leave a source-specific group\(emthat is, stop receiving data from
|
|
a given multicast group that come from a given source.
|
|
If the application has subscribed to multiple sources within
|
|
the same group, data from the remaining sources will still be delivered.
|
|
To stop receiving data from all sources at once, use
|
|
.BR IP_DROP_MEMBERSHIP .
|
|
.IP
|
|
Argument is an
|
|
.I ip_mreq_source
|
|
structure as described under
|
|
.BR IP_ADD_SOURCE_MEMBERSHIP .
|
|
.TP
|
|
.BR IP_FREEBIND " (since Linux 2.4)"
|
|
.\" Precisely: 2.4.0-test10
|
|
If enabled, this boolean option allows binding to an IP address
|
|
that is nonlocal or does not (yet) exist.
|
|
This permits listening on a socket,
|
|
without requiring the underlying network interface or the
|
|
specified dynamic IP address to be up at the time that
|
|
the application is trying to bind to it.
|
|
This option is the per-socket equivalent of the
|
|
.IR ip_nonlocal_bind
|
|
.I /proc
|
|
interface described below.
|
|
.TP
|
|
.BR IP_HDRINCL " (since Linux 2.0)"
|
|
If enabled,
|
|
the user supplies an IP header in front of the user data.
|
|
Valid only for
|
|
.B SOCK_RAW
|
|
sockets; see
|
|
.BR raw (7)
|
|
for more information.
|
|
When this flag is enabled, the values set by
|
|
.BR IP_OPTIONS ,
|
|
.BR IP_TTL ,
|
|
and
|
|
.B IP_TOS
|
|
are ignored.
|
|
.TP
|
|
.BR IP_MSFILTER " (since Linux 2.4.22 / 2.5.68)"
|
|
This option provides access to the advanced full-state filtering API.
|
|
Argument is an
|
|
.I ip_msfilter
|
|
structure.
|
|
.PP
|
|
.in +4n
|
|
.EX
|
|
struct ip_msfilter {
|
|
struct in_addr imsf_multiaddr; /* IP multicast group
|
|
address */
|
|
struct in_addr imsf_interface; /* IP address of local
|
|
interface */
|
|
uint32_t imsf_fmode; /* Filter-mode */
|
|
|
|
uint32_t imsf_numsrc; /* Number of sources in
|
|
the following array */
|
|
struct in_addr imsf_slist[1]; /* Array of source
|
|
addresses */
|
|
};
|
|
.EE
|
|
.in
|
|
.PP
|
|
There are two macros,
|
|
.BR MCAST_INCLUDE
|
|
and
|
|
.BR MCAST_EXCLUDE ,
|
|
which can be used to specify the filtering mode.
|
|
Additionally, the
|
|
.BR IP_MSFILTER_SIZE (n)
|
|
macro exists to determine how much memory is needed to store
|
|
.I ip_msfilter
|
|
structure with
|
|
.I n
|
|
sources in the source list.
|
|
.IP
|
|
For the full description of multicast source filtering
|
|
refer to RFC 3376.
|
|
.TP
|
|
.BR IP_MTU " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.124
|
|
Retrieve the current known path MTU of the current socket.
|
|
Returns an integer.
|
|
.IP
|
|
.B IP_MTU
|
|
is valid only for
|
|
.BR getsockopt (2)
|
|
and can be employed only when the socket has been connected.
|
|
.TP
|
|
.BR IP_MTU_DISCOVER " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.124
|
|
Set or receive the Path MTU Discovery setting for a socket.
|
|
When enabled, Linux will perform Path MTU Discovery
|
|
as defined in RFC\ 1191 on
|
|
.B SOCK_STREAM
|
|
sockets.
|
|
For
|
|
.RB non- SOCK_STREAM
|
|
sockets,
|
|
.B IP_PMTUDISC_DO
|
|
forces the don't-fragment flag to be set on all outgoing packets.
|
|
It is the user's responsibility to packetize the data
|
|
in MTU-sized chunks and to do the retransmits if necessary.
|
|
The kernel will reject (with
|
|
.BR EMSGSIZE )
|
|
datagrams that are bigger than the known path MTU.
|
|
.B IP_PMTUDISC_WANT
|
|
will fragment a datagram if needed according to the path MTU,
|
|
or will set the don't-fragment flag otherwise.
|
|
.IP
|
|
The system-wide default can be toggled between
|
|
.B IP_PMTUDISC_WANT
|
|
and
|
|
.B IP_PMTUDISC_DONT
|
|
by writing (respectively, zero and nonzero values) to the
|
|
.I /proc/sys/net/ipv4/ip_no_pmtu_disc
|
|
file.
|
|
.TS
|
|
tab(:);
|
|
c l
|
|
l l.
|
|
Path MTU discovery value:Meaning
|
|
IP_PMTUDISC_WANT:Use per-route settings.
|
|
IP_PMTUDISC_DONT:Never do Path MTU Discovery.
|
|
IP_PMTUDISC_DO:Always do Path MTU Discovery.
|
|
IP_PMTUDISC_PROBE:Set DF but ignore Path MTU.
|
|
.TE
|
|
.sp 1
|
|
When PMTU discovery is enabled, the kernel automatically keeps track of
|
|
the path MTU per destination host.
|
|
When it is connected to a specific peer with
|
|
.BR connect (2),
|
|
the currently known path MTU can be retrieved conveniently using the
|
|
.B IP_MTU
|
|
socket option (e.g., after an
|
|
.B EMSGSIZE
|
|
error occurred).
|
|
The path MTU may change over time.
|
|
For connectionless sockets with many destinations,
|
|
the new MTU for a given destination can also be accessed using the
|
|
error queue (see
|
|
.BR IP_RECVERR ).
|
|
A new error will be queued for every incoming MTU update.
|
|
.IP
|
|
While MTU discovery is in progress, initial packets from datagram sockets
|
|
may be dropped.
|
|
Applications using UDP should be aware of this and not
|
|
take it into account for their packet retransmit strategy.
|
|
.IP
|
|
To bootstrap the path MTU discovery process on unconnected sockets, it
|
|
is possible to start with a big datagram size
|
|
(headers up to 64 kilobytes long) and let it shrink by updates of the path MTU.
|
|
.IP
|
|
To get an initial estimate of the
|
|
path MTU, connect a datagram socket to the destination address using
|
|
.BR connect (2)
|
|
and retrieve the MTU by calling
|
|
.BR getsockopt (2)
|
|
with the
|
|
.B IP_MTU
|
|
option.
|
|
.IP
|
|
It is possible to implement RFC 4821 MTU probing with
|
|
.B SOCK_DGRAM
|
|
or
|
|
.B SOCK_RAW
|
|
sockets by setting a value of
|
|
.BR IP_PMTUDISC_PROBE
|
|
(available since Linux 2.6.22).
|
|
This is also particularly useful for diagnostic tools such as
|
|
.BR tracepath (8)
|
|
that wish to deliberately send probe packets larger than
|
|
the observed Path MTU.
|
|
.TP
|
|
.BR IP_MULTICAST_ALL " (since Linux 2.6.31)"
|
|
This option can be used to modify the delivery policy of multicast messages
|
|
to sockets bound to the wildcard
|
|
.B INADDR_ANY
|
|
address.
|
|
The argument is a boolean integer (defaults to 1).
|
|
If set to 1,
|
|
the socket will receive messages from all the groups that have been joined
|
|
globally on the whole system.
|
|
Otherwise, it will deliver messages only from
|
|
the groups that have been explicitly joined (for example via the
|
|
.B IP_ADD_MEMBERSHIP
|
|
option) on this particular socket.
|
|
.TP
|
|
.BR IP_MULTICAST_IF " (since Linux 1.2)"
|
|
Set the local device for a multicast socket.
|
|
The argument for
|
|
.BR setsockopt (2)
|
|
is an
|
|
.I ip_mreqn
|
|
or
|
|
.\" net: IP_MULTICAST_IF setsockopt now recognizes struct mreq
|
|
.\" Commit: 3a084ddb4bf299a6e898a9a07c89f3917f0713f7
|
|
(since Linux 3.5)
|
|
.I ip_mreq
|
|
structure similar to
|
|
.BR IP_ADD_MEMBERSHIP ,
|
|
or an
|
|
.I in_addr
|
|
structure.
|
|
(The kernel determines which structure is being passed based
|
|
on the size passed in
|
|
.IR optlen .)
|
|
For
|
|
.BR getsockopt (2),
|
|
the argument is an
|
|
.I in_addr
|
|
structure.
|
|
.TP
|
|
.BR IP_MULTICAST_LOOP " (since Linux 1.2)"
|
|
Set or read a boolean integer argument that determines whether
|
|
sent multicast packets should be looped back to the local sockets.
|
|
.TP
|
|
.BR IP_MULTICAST_TTL " (since Linux 1.2)"
|
|
Set or read the time-to-live value of outgoing multicast packets for this
|
|
socket.
|
|
It is very important for multicast packets to set the smallest TTL possible.
|
|
The default is 1 which means that multicast packets don't leave the local
|
|
network unless the user program explicitly requests it.
|
|
Argument is an integer.
|
|
.TP
|
|
.BR IP_NODEFRAG " (since Linux 2.6.36)"
|
|
If enabled (argument is nonzero),
|
|
the reassembly of outgoing packets is disabled in the netfilter layer.
|
|
The argument is an integer.
|
|
.IP
|
|
This option is valid only for
|
|
.B SOCK_RAW
|
|
sockets.
|
|
.TP
|
|
.BR IP_OPTIONS " (since Linux 2.0)"
|
|
.\" Precisely: 1.3.30
|
|
Set or get the IP options to be sent with every packet from this socket.
|
|
The arguments are a pointer to a memory buffer containing the options
|
|
and the option length.
|
|
The
|
|
.BR setsockopt (2)
|
|
call sets the IP options associated with a socket.
|
|
The maximum option size for IPv4 is 40 bytes.
|
|
See RFC\ 791 for the allowed options.
|
|
When the initial connection request packet for a
|
|
.B SOCK_STREAM
|
|
socket contains IP options, the IP options will be set automatically
|
|
to the options from the initial packet with routing headers reversed.
|
|
Incoming packets are not allowed to change options after the connection
|
|
is established.
|
|
The processing of all incoming source routing options
|
|
is disabled by default and can be enabled by using the
|
|
.I accept_source_route
|
|
.I /proc
|
|
interface.
|
|
Other options like timestamps are still handled.
|
|
For datagram sockets, IP options can be set only by the local user.
|
|
Calling
|
|
.BR getsockopt (2)
|
|
with
|
|
.B IP_OPTIONS
|
|
puts the current IP options used for sending into the supplied buffer.
|
|
.TP
|
|
.BR IP_PASSSEC " (since Linux 2.6.17)"
|
|
.\" commit 2c7946a7bf45ae86736ab3b43d0085e43947945c
|
|
If labeled IPSEC or NetLabel is configured on the sending and receiving
|
|
hosts, this option enables receiving of the security context of the peer
|
|
socket in an ancillary message of type
|
|
.B SCM_SECURITY
|
|
retrieved using
|
|
.BR recvmsg (2).
|
|
This option is supported only for UDP sockets; for TCP or SCTP sockets,
|
|
see the description of the
|
|
.B SO_PEERSEC
|
|
option below.
|
|
.IP
|
|
The value given as an argument to
|
|
.BR setsockopt (2)
|
|
and returned as the result of
|
|
.BR getsockopt (2)
|
|
is an integer boolean flag.
|
|
.IP
|
|
The security context returned in the
|
|
.B SCM_SECURITY
|
|
ancillary message
|
|
is of the same format as the one described under the
|
|
.B SO_PEERSEC
|
|
option below.
|
|
.IP
|
|
Note: the reuse of the
|
|
.B SCM_SECURITY
|
|
message type for the
|
|
.B IP_PASSSEC
|
|
socket option was likely a mistake, since other IP control messages use
|
|
their own numbering scheme in the IP namespace and often use the
|
|
socket option value as the message type.
|
|
There is no conflict currently since the IP option with the same value as
|
|
.B SCM_SECURITY
|
|
is
|
|
.B IP_HDRINCL
|
|
and this is never used for a control message type.
|
|
.TP
|
|
.BR IP_PKTINFO " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.68
|
|
Pass an
|
|
.B IP_PKTINFO
|
|
ancillary message that contains a
|
|
.I pktinfo
|
|
structure that supplies some information about the incoming packet.
|
|
This works only for datagram oriented sockets.
|
|
The argument is a flag that tells the socket whether the
|
|
.B IP_PKTINFO
|
|
message should be passed or not.
|
|
The message itself can be sent/retrieved
|
|
only as a control message with a packet using
|
|
.BR recvmsg (2)
|
|
or
|
|
.BR sendmsg (2).
|
|
.IP
|
|
.in +4n
|
|
.EX
|
|
struct in_pktinfo {
|
|
unsigned int ipi_ifindex; /* Interface index */
|
|
struct in_addr ipi_spec_dst; /* Local address */
|
|
struct in_addr ipi_addr; /* Header Destination
|
|
address */
|
|
};
|
|
.EE
|
|
.in
|
|
.IP
|
|
.I ipi_ifindex
|
|
is the unique index of the interface the packet was received on.
|
|
.I ipi_spec_dst
|
|
is the local address of the packet and
|
|
.I ipi_addr
|
|
is the destination address in the packet header.
|
|
If
|
|
.B IP_PKTINFO
|
|
is passed to
|
|
.BR sendmsg (2)
|
|
and
|
|
.\" This field is grossly misnamed
|
|
.I ipi_spec_dst
|
|
is not zero, then it is used as the local source address for the routing
|
|
table lookup and for setting up IP source route options.
|
|
When
|
|
.I ipi_ifindex
|
|
is not zero, the primary local address of the interface specified by the
|
|
index overwrites
|
|
.I ipi_spec_dst
|
|
for the routing table lookup.
|
|
.TP
|
|
.BR IP_RECVERR " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.15
|
|
Enable extended reliable error message passing.
|
|
When enabled on a datagram socket, all
|
|
generated errors will be queued in a per-socket error queue.
|
|
When the user receives an error from a socket operation,
|
|
the errors can be received by calling
|
|
.BR recvmsg (2)
|
|
with the
|
|
.B MSG_ERRQUEUE
|
|
flag set.
|
|
The
|
|
.I sock_extended_err
|
|
structure describing the error will be passed in an ancillary message with
|
|
the type
|
|
.B IP_RECVERR
|
|
and the level
|
|
.BR IPPROTO_IP .
|
|
.\" or SOL_IP on Linux
|
|
This is useful for reliable error handling on unconnected sockets.
|
|
The received data portion of the error queue contains the error packet.
|
|
.IP
|
|
The
|
|
.B IP_RECVERR
|
|
control message contains a
|
|
.I sock_extended_err
|
|
structure:
|
|
.IP
|
|
.in +4n
|
|
.EX
|
|
#define SO_EE_ORIGIN_NONE 0
|
|
#define SO_EE_ORIGIN_LOCAL 1
|
|
#define SO_EE_ORIGIN_ICMP 2
|
|
#define SO_EE_ORIGIN_ICMP6 3
|
|
|
|
struct sock_extended_err {
|
|
uint32_t ee_errno; /* error number */
|
|
uint8_t ee_origin; /* where the error originated */
|
|
uint8_t ee_type; /* type */
|
|
uint8_t ee_code; /* code */
|
|
uint8_t ee_pad;
|
|
uint32_t ee_info; /* additional information */
|
|
uint32_t ee_data; /* other data */
|
|
/* More data may follow */
|
|
};
|
|
|
|
struct sockaddr *SO_EE_OFFENDER(struct sock_extended_err *);
|
|
.EE
|
|
.in
|
|
.IP
|
|
.I ee_errno
|
|
contains the
|
|
.I errno
|
|
number of the queued error.
|
|
.I ee_origin
|
|
is the origin code of where the error originated.
|
|
The other fields are protocol-specific.
|
|
The macro
|
|
.B SO_EE_OFFENDER
|
|
returns a pointer to the address of the network object
|
|
where the error originated from given a pointer to the ancillary message.
|
|
If this address is not known, the
|
|
.I sa_family
|
|
member of the
|
|
.I sockaddr
|
|
contains
|
|
.B AF_UNSPEC
|
|
and the other fields of the
|
|
.I sockaddr
|
|
are undefined.
|
|
.IP
|
|
IP uses the
|
|
.I sock_extended_err
|
|
structure as follows:
|
|
.I ee_origin
|
|
is set to
|
|
.B SO_EE_ORIGIN_ICMP
|
|
for errors received as an ICMP packet, or
|
|
.B SO_EE_ORIGIN_LOCAL
|
|
for locally generated errors.
|
|
Unknown values should be ignored.
|
|
.I ee_type
|
|
and
|
|
.I ee_code
|
|
are set from the type and code fields of the ICMP header.
|
|
.I ee_info
|
|
contains the discovered MTU for
|
|
.B EMSGSIZE
|
|
errors.
|
|
The message also contains the
|
|
.I sockaddr_in of the node
|
|
caused the error, which can be accessed with the
|
|
.B SO_EE_OFFENDER
|
|
macro.
|
|
The
|
|
.I sin_family
|
|
field of the
|
|
.B SO_EE_OFFENDER
|
|
address is
|
|
.B AF_UNSPEC
|
|
when the source was unknown.
|
|
When the error originated from the network, all IP options
|
|
.RB ( IP_OPTIONS ", " IP_TTL ,
|
|
etc.) enabled on the socket and contained in the
|
|
error packet are passed as control messages.
|
|
The payload of the packet causing the error is returned as normal payload.
|
|
.\" FIXME . Is it a good idea to document that? It is a dubious feature.
|
|
.\" On
|
|
.\" .B SOCK_STREAM
|
|
.\" sockets,
|
|
.\" .B IP_RECVERR
|
|
.\" has slightly different semantics. Instead of
|
|
.\" saving the errors for the next timeout, it passes all incoming
|
|
.\" errors immediately to the user.
|
|
.\" This might be useful for very short-lived TCP connections which
|
|
.\" need fast error handling. Use this option with care:
|
|
.\" it makes TCP unreliable
|
|
.\" by not allowing it to recover properly from routing
|
|
.\" shifts and other normal
|
|
.\" conditions and breaks the protocol specification.
|
|
Note that TCP has no error queue;
|
|
.B MSG_ERRQUEUE
|
|
is not permitted on
|
|
.B SOCK_STREAM
|
|
sockets.
|
|
.B IP_RECVERR
|
|
is valid for TCP, but all errors are returned by socket function return or
|
|
.B SO_ERROR
|
|
only.
|
|
.IP
|
|
For raw sockets,
|
|
.B IP_RECVERR
|
|
enables passing of all received ICMP errors to the
|
|
application, otherwise errors are reported only on connected sockets
|
|
.IP
|
|
It sets or retrieves an integer boolean flag.
|
|
.B IP_RECVERR
|
|
defaults to off.
|
|
.TP
|
|
.BR IP_RECVOPTS " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.15
|
|
Pass all incoming IP options to the user in a
|
|
.B IP_OPTIONS
|
|
control message.
|
|
The routing header and other options are already filled in
|
|
for the local host.
|
|
Not supported for
|
|
.B SOCK_STREAM
|
|
sockets.
|
|
.TP
|
|
.BR IP_RECVORIGDSTADDR " (since Linux 2.6.29)"
|
|
.\" commit e8b2dfe9b4501ed0047459b2756ba26e5a940a69
|
|
This boolean option enables the
|
|
.B IP_ORIGDSTADDR
|
|
ancillary message in
|
|
.BR recvmsg (2),
|
|
in which the kernel returns the original destination address
|
|
of the datagram being received.
|
|
The ancillary message contains a
|
|
.IR "struct sockaddr_in" .
|
|
.TP
|
|
.BR IP_RECVTOS " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.68
|
|
If enabled, the
|
|
.B IP_TOS
|
|
ancillary message is passed with incoming packets.
|
|
It contains a byte which specifies the Type of Service/Precedence
|
|
field of the packet header.
|
|
Expects a boolean integer flag.
|
|
.TP
|
|
.BR IP_RECVTTL " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.68
|
|
When this flag is set, pass a
|
|
.B IP_TTL
|
|
control message with the time-to-live
|
|
field of the received packet as a 32 bit integer.
|
|
Not supported for
|
|
.B SOCK_STREAM
|
|
sockets.
|
|
.TP
|
|
.BR IP_RETOPTS " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.15
|
|
Identical to
|
|
.BR IP_RECVOPTS ,
|
|
but returns raw unprocessed options with timestamp and route record
|
|
options not filled in for this hop.
|
|
.TP
|
|
.BR IP_ROUTER_ALERT " (since Linux 2.2)"
|
|
.\" Precisely: 2.1.68
|
|
Pass all to-be forwarded packets with the
|
|
IP Router Alert option set to this socket.
|
|
Valid only for raw sockets.
|
|
This is useful, for instance, for user-space RSVP daemons.
|
|
The tapped packets are not forwarded by the kernel; it is
|
|
the user's responsibility to send them out again.
|
|
Socket binding is ignored,
|
|
such packets are filtered only by protocol.
|
|
Expects an integer flag.
|
|
.TP
|
|
.BR IP_TOS " (since Linux 1.0)"
|
|
Set or receive the Type-Of-Service (TOS) field that is sent
|
|
with every IP packet originating from this socket.
|
|
It is used to prioritize packets on the network.
|
|
TOS is a byte.
|
|
There are some standard TOS flags defined:
|
|
.B IPTOS_LOWDELAY
|
|
to minimize delays for interactive traffic,
|
|
.B IPTOS_THROUGHPUT
|
|
to optimize throughput,
|
|
.B IPTOS_RELIABILITY
|
|
to optimize for reliability,
|
|
.B IPTOS_MINCOST
|
|
should be used for "filler data" where slow transmission doesn't matter.
|
|
At most one of these TOS values can be specified.
|
|
Other bits are invalid and shall be cleared.
|
|
Linux sends
|
|
.B IPTOS_LOWDELAY
|
|
datagrams first by default,
|
|
but the exact behavior depends on the configured queueing discipline.
|
|
.\" FIXME elaborate on this
|
|
Some high-priority levels may require superuser privileges (the
|
|
.B CAP_NET_ADMIN
|
|
capability).
|
|
.\" The priority can also be set in a protocol-independent way by the
|
|
.\" .RB ( SOL_SOCKET ", " SO_PRIORITY )
|
|
.\" socket option (see
|
|
.\" .BR socket (7)).
|
|
.TP
|
|
.BR IP_TRANSPARENT " (since Linux 2.6.24)"
|
|
.\" commit f5715aea4564f233767ea1d944b2637a5fd7cd2e
|
|
.\" This patch introduces the IP_TRANSPARENT socket option: enabling that
|
|
.\" will make the IPv4 routing omit the non-local source address check on
|
|
.\" output. Setting IP_TRANSPARENT requires NET_ADMIN capability.
|
|
.\" http://lwn.net/Articles/252545/
|
|
Setting this boolean option enables transparent proxying on this socket.
|
|
This socket option allows
|
|
the calling application to bind to a nonlocal IP address and operate
|
|
both as a client and a server with the foreign address as the local endpoint.
|
|
NOTE: this requires that routing be set up in a way that
|
|
packets going to the foreign address are routed through the TProxy box
|
|
(i.e., the system hosting the application that employs the
|
|
.B IP_TRANSPARENT
|
|
socket option).
|
|
Enabling this socket option requires superuser privileges
|
|
(the
|
|
.BR CAP_NET_ADMIN
|
|
capability).
|
|
.IP
|
|
TProxy redirection with the iptables TPROXY target also requires that
|
|
this option be set on the redirected socket.
|
|
.TP
|
|
.BR IP_TTL " (since Linux 1.0)"
|
|
Set or retrieve the current time-to-live field that is used in every packet
|
|
sent from this socket.
|
|
.TP
|
|
.BR IP_UNBLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
|
|
Unblock previously blocked multicast source.
|
|
Returns
|
|
.BR EADDRNOTAVAIL
|
|
when given source is not being blocked.
|
|
.IP
|
|
Argument is an
|
|
.I ip_mreq_source
|
|
structure as described under
|
|
.BR IP_ADD_SOURCE_MEMBERSHIP .
|
|
.TP
|
|
.BR SO_PEERSEC " (since Linux 2.6.17)"
|
|
If labeled IPSEC or NetLabel is configured on both the sending and
|
|
receiving hosts, this read-only socket option returns the security
|
|
context of the peer socket connected to this socket.
|
|
By default,
|
|
this will be the same as the security context of the process that created
|
|
the peer socket unless overridden by the policy or by a process with
|
|
the required permissions.
|
|
.IP
|
|
The argument to
|
|
.BR getsockopt (2)
|
|
is a pointer to a buffer of the specified length in bytes
|
|
into which the security context string will be copied.
|
|
If the buffer length is less than the length of the security
|
|
context string, then
|
|
.BR getsockopt (2)
|
|
returns \-1, sets
|
|
.I errno
|
|
to
|
|
.BR ERANGE ,
|
|
and returns the required length via
|
|
.IR optlen .
|
|
The caller should allocate at least
|
|
.BR NAME_MAX
|
|
bytes for the buffer initially, although this is not guaranteed
|
|
to be sufficient.
|
|
Resizing the buffer to the returned length
|
|
and retrying may be necessary.
|
|
.IP
|
|
The security context string may include a terminating null character
|
|
in the returned length, but is not guaranteed to do so: a security
|
|
context "foo" might be represented as either {'f','o','o'} of length 3
|
|
or {'f','o','o','\\0'} of length 4, which are considered to be
|
|
interchangeable.
|
|
The string is printable, does not contain non-terminating null characters,
|
|
and is in an unspecified encoding (in particular, it
|
|
is not guaranteed to be ASCII or UTF-8).
|
|
.IP
|
|
The use of this option for sockets in the
|
|
.B AF_INET
|
|
address family is supported since Linux 2.6.17
|
|
.\" commit 2c7946a7bf45ae86736ab3b43d0085e43947945c
|
|
for TCP sockets, and since Linux 4.17
|
|
.\" commit d452930fd3b9031e59abfeddb2fa383f1403d61a
|
|
for SCTP sockets.
|
|
.IP
|
|
For SELinux, NetLabel conveys only the MLS portion of the security
|
|
context of the peer across the wire, defaulting the rest of the
|
|
security context to the values defined in the policy for the
|
|
netmsg initial security identifier (SID).
|
|
However, NetLabel can
|
|
be configured to pass full security contexts over loopback.
|
|
Labeled IPSEC always passes full security contexts as part of establishing
|
|
the security association (SA) and looks them up based on the association
|
|
for each packet.
|
|
.\"
|
|
.SS /proc interfaces
|
|
The IP protocol
|
|
supports a set of
|
|
.I /proc
|
|
interfaces to configure some global parameters.
|
|
The parameters can be accessed by reading or writing files in the directory
|
|
.IR /proc/sys/net/ipv4/ .
|
|
.\" FIXME As at 2.6.12, 14 Jun 2005, the following are undocumented:
|
|
.\" ip_queue_maxlen
|
|
.\" ip_conntrack_max
|
|
Interfaces described as
|
|
.I Boolean
|
|
take an integer value, with a nonzero value ("true") meaning that
|
|
the corresponding option is enabled, and a zero value ("false")
|
|
meaning that the option is disabled.
|
|
.\"
|
|
.TP
|
|
.IR ip_always_defrag " (Boolean; since Linux 2.2.13)"
|
|
[New with kernel 2.2.13; in earlier kernel versions this feature
|
|
was controlled at compile time by the
|
|
.B CONFIG_IP_ALWAYS_DEFRAG
|
|
option; this option is not present in 2.4.x and later]
|
|
.IP
|
|
When this boolean flag is enabled (not equal 0), incoming fragments
|
|
(parts of IP packets
|
|
that arose when some host between origin and destination decided
|
|
that the packets were too large and cut them into pieces) will be
|
|
reassembled (defragmented) before being processed, even if they are
|
|
about to be forwarded.
|
|
.IP
|
|
Enable only if running either a firewall that is the sole link
|
|
to your network or a transparent proxy; never ever use it for a
|
|
normal router or host.
|
|
Otherwise, fragmented communication can be disturbed
|
|
if the fragments travel over different links.
|
|
Defragmentation also has a large memory and CPU time cost.
|
|
.IP
|
|
This is automagically turned on when masquerading or transparent
|
|
proxying are configured.
|
|
.\"
|
|
.TP
|
|
.IR ip_autoconfig " (since Linux 2.2 to 2.6.17)"
|
|
.\" Precisely: since 2.1.68
|
|
.\" FIXME document ip_autoconfig
|
|
Not documented.
|
|
.\"
|
|
.TP
|
|
.IR ip_default_ttl " (integer; default: 64; since Linux 2.2)"
|
|
.\" Precisely: 2.1.15
|
|
Set the default time-to-live value of outgoing packets.
|
|
This can be changed per socket with the
|
|
.B IP_TTL
|
|
option.
|
|
.\"
|
|
.TP
|
|
.IR ip_dynaddr " (Boolean; default: disabled; since Linux 2.0.31)"
|
|
Enable dynamic socket address and masquerading entry rewriting on interface
|
|
address change.
|
|
This is useful for dialup interface with changing IP addresses.
|
|
0 means no rewriting, 1 turns it on and 2 enables verbose mode.
|
|
.\"
|
|
.TP
|
|
.IR ip_forward " (Boolean; default: disabled; since Linux 1.2)"
|
|
Enable IP forwarding with a boolean flag.
|
|
IP forwarding can be also set on a per-interface basis.
|
|
.\"
|
|
.TP
|
|
.IR ip_local_port_range " (since Linux 2.2)"
|
|
.\" Precisely: since 2.1.68
|
|
This file contains two integers that define the default local port range
|
|
allocated to sockets that are not explicitly bound to a port number\(emthat
|
|
is, the range used for
|
|
.IR "ephemeral ports" .
|
|
An ephemeral port is allocated to a socket in the following circumstances:
|
|
.RS
|
|
.IP * 3
|
|
the port number in a socket address is specified as 0 when calling
|
|
.BR bind (2);
|
|
.IP *
|
|
.BR listen (2)
|
|
is called on a stream socket that was not previously bound;
|
|
.IP *
|
|
.BR connect (2)
|
|
was called on a socket that was not previously bound;
|
|
.IP *
|
|
.BR sendto (2)
|
|
is called on a datagram socket that was not previously bound.
|
|
.RE
|
|
.IP
|
|
Allocation of ephemeral ports starts with the first number in
|
|
.IR ip_local_port_range
|
|
and ends with the second number.
|
|
If the range of ephemeral ports is exhausted,
|
|
then the relevant system call returns an error (but see BUGS).
|
|
.IP
|
|
Note that the port range in
|
|
.IR ip_local_port_range
|
|
should not conflict with the ports used by masquerading
|
|
(although the case is handled).
|
|
Also, arbitrary choices may cause problems with some firewall packet
|
|
filters that make assumptions about the local ports in use.
|
|
The first number should be at least greater than 1024,
|
|
or better, greater than 4096, to avoid clashes
|
|
with well known ports and to minimize firewall problems.
|
|
.\"
|
|
.TP
|
|
.IR ip_no_pmtu_disc " (Boolean; default: disabled; since Linux 2.2)"
|
|
.\" Precisely: 2.1.15
|
|
If enabled, don't do Path MTU Discovery for TCP sockets by default.
|
|
Path MTU discovery may fail if misconfigured firewalls (that drop
|
|
all ICMP packets) or misconfigured interfaces (e.g., a point-to-point
|
|
link where the both ends don't agree on the MTU) are on the path.
|
|
It is better to fix the broken routers on the path than to turn off
|
|
Path MTU Discovery globally, because not doing it incurs a high cost
|
|
to the network.
|
|
.\"
|
|
.\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
|
|
.TP
|
|
.IR ip_nonlocal_bind " (Boolean; default: disabled; since Linux 2.4)"
|
|
.\" Precisely: patch-2.4.0-test10
|
|
If set, allows processes to
|
|
.BR bind (2)
|
|
to nonlocal IP addresses,
|
|
which can be quite useful, but may break some applications.
|
|
.\"
|
|
.\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
|
|
.TP
|
|
.IR ip6frag_time " (integer; default: 30)"
|
|
Time in seconds to keep an IPv6 fragment in memory.
|
|
.\"
|
|
.\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
|
|
.TP
|
|
.IR ip6frag_secret_interval " (integer; default: 600)"
|
|
Regeneration interval (in seconds) of the hash secret (or lifetime
|
|
for the hash secret) for IPv6 fragments.
|
|
.TP
|
|
.IR ipfrag_high_thresh " (integer), " ipfrag_low_thresh " (integer)"
|
|
If the amount of queued IP fragments reaches
|
|
.IR ipfrag_high_thresh ,
|
|
the queue is pruned down to
|
|
.IR ipfrag_low_thresh .
|
|
Contains an integer with the number of bytes.
|
|
.TP
|
|
.I neigh/*
|
|
See
|
|
.BR arp (7).
|
|
.\" FIXME Document the conf/*/* interfaces
|
|
.\"
|
|
.\" FIXME Document the route/* interfaces
|
|
.SS Ioctls
|
|
All ioctls described in
|
|
.BR socket (7)
|
|
apply to
|
|
.BR ip .
|
|
.PP
|
|
Ioctls to configure generic device parameters are described in
|
|
.BR netdevice (7).
|
|
.\" FIXME Add a discussion of multicasting
|
|
.SH ERRORS
|
|
.\" FIXME document all errors.
|
|
.\" We should really fix the kernels to give more uniform
|
|
.\" error returns (ENOMEM vs ENOBUFS, EPERM vs EACCES etc.)
|
|
.TP
|
|
.B EACCES
|
|
The user tried to execute an operation without the necessary permissions.
|
|
These include:
|
|
sending a packet to a broadcast address without having the
|
|
.B SO_BROADCAST
|
|
flag set;
|
|
sending a packet via a
|
|
.I prohibit
|
|
route;
|
|
modifying firewall settings without superuser privileges (the
|
|
.B CAP_NET_ADMIN
|
|
capability);
|
|
binding to a privileged port without superuser privileges (the
|
|
.B CAP_NET_BIND_SERVICE
|
|
capability).
|
|
.TP
|
|
.B EADDRINUSE
|
|
Tried to bind to an address already in use.
|
|
.TP
|
|
.B EADDRNOTAVAIL
|
|
A nonexistent interface was requested or the requested source
|
|
address was not local.
|
|
.TP
|
|
.B EAGAIN
|
|
Operation on a nonblocking socket would block.
|
|
.TP
|
|
.B EALREADY
|
|
A connection operation on a nonblocking socket is already in progress.
|
|
.TP
|
|
.B ECONNABORTED
|
|
A connection was closed during an
|
|
.BR accept (2).
|
|
.TP
|
|
.B EHOSTUNREACH
|
|
No valid routing table entry matches the destination address.
|
|
This error can be caused by an ICMP message from a remote router or
|
|
for the local routing table.
|
|
.TP
|
|
.B EINVAL
|
|
Invalid argument passed.
|
|
For send operations this can be caused by sending to a
|
|
.I blackhole
|
|
route.
|
|
.TP
|
|
.B EISCONN
|
|
.BR connect (2)
|
|
was called on an already connected socket.
|
|
.TP
|
|
.B EMSGSIZE
|
|
Datagram is bigger than an MTU on the path and it cannot be fragmented.
|
|
.TP
|
|
.BR ENOBUFS ", " ENOMEM
|
|
Not enough free memory.
|
|
This often means that the memory allocation is limited by the socket
|
|
buffer limits, not by the system memory, but this is not 100% consistent.
|
|
.TP
|
|
.B ENOENT
|
|
.B SIOCGSTAMP
|
|
was called on a socket where no packet arrived.
|
|
.TP
|
|
.B ENOPKG
|
|
A kernel subsystem was not configured.
|
|
.TP
|
|
.BR ENOPROTOOPT " and " EOPNOTSUPP
|
|
Invalid socket option passed.
|
|
.TP
|
|
.B ENOTCONN
|
|
The operation is defined only on a connected socket, but the socket wasn't
|
|
connected.
|
|
.TP
|
|
.B EPERM
|
|
User doesn't have permission to set high priority, change configuration,
|
|
or send signals to the requested process or group.
|
|
.TP
|
|
.B EPIPE
|
|
The connection was unexpectedly closed or shut down by the other end.
|
|
.TP
|
|
.B ESOCKTNOSUPPORT
|
|
The socket is not configured or an unknown socket type was requested.
|
|
.PP
|
|
Other errors may be generated by the overlaying protocols; see
|
|
.BR tcp (7),
|
|
.BR raw (7),
|
|
.BR udp (7),
|
|
and
|
|
.BR socket (7).
|
|
.SH NOTES
|
|
.BR IP_FREEBIND ,
|
|
.BR IP_MSFILTER ,
|
|
.BR IP_MTU ,
|
|
.BR IP_MTU_DISCOVER ,
|
|
.BR IP_RECVORIGDSTADDR ,
|
|
.BR IP_PASSSEC ,
|
|
.BR IP_PKTINFO ,
|
|
.BR IP_RECVERR ,
|
|
.BR IP_ROUTER_ALERT ,
|
|
and
|
|
.BR IP_TRANSPARENT
|
|
are Linux-specific.
|
|
.\" IP_XFRM_POLICY is Linux-specific
|
|
.\" IP_IPSEC_POLICY is a nonstandard extension, also present on some BSDs
|
|
.PP
|
|
Be very careful with the
|
|
.B SO_BROADCAST
|
|
option \- it is not privileged in Linux.
|
|
It is easy to overload the network
|
|
with careless broadcasts.
|
|
For new application protocols
|
|
it is better to use a multicast group instead of broadcasting.
|
|
Broadcasting is discouraged.
|
|
.PP
|
|
Some other BSD sockets implementations provide
|
|
.B IP_RCVDSTADDR
|
|
and
|
|
.B IP_RECVIF
|
|
socket options to get the destination address and the interface of
|
|
received datagrams.
|
|
Linux has the more general
|
|
.B IP_PKTINFO
|
|
for the same task.
|
|
.PP
|
|
Some BSD sockets implementations also provide an
|
|
.B IP_RECVTTL
|
|
option, but an ancillary message with type
|
|
.B IP_RECVTTL
|
|
is passed with the incoming packet.
|
|
This is different from the
|
|
.B IP_TTL
|
|
option used in Linux.
|
|
.PP
|
|
Using the
|
|
.B SOL_IP
|
|
socket options level isn't portable; BSD-based stacks use the
|
|
.B IPPROTO_IP
|
|
level.
|
|
.PP
|
|
.B INADDR_ANY
|
|
(0.0.0.0) and
|
|
.B INADDR_BROADCAST
|
|
(255.255.255.255) are byte-order-neutral.
|
|
This means
|
|
.BR htonl (3)
|
|
has no effect on them.
|
|
.SS Compatibility
|
|
For compatibility with Linux 2.0, the obsolete
|
|
.BI "socket(AF_INET, SOCK_PACKET, " protocol )
|
|
syntax is still supported to open a
|
|
.BR packet (7)
|
|
socket.
|
|
This is deprecated and should be replaced by
|
|
.BI "socket(AF_PACKET, SOCK_RAW, " protocol )
|
|
instead.
|
|
The main difference is the new
|
|
.I sockaddr_ll
|
|
address structure for generic link layer information instead of the old
|
|
.BR sockaddr_pkt .
|
|
.SH BUGS
|
|
There are too many inconsistent error values.
|
|
.PP
|
|
The error used to diagnose exhaustion of the ephemeral port range differs
|
|
across the various system calls
|
|
.RB ( connect (2),
|
|
.BR bind (2),
|
|
.BR listen (2),
|
|
.BR sendto (2))
|
|
that can assign ephemeral ports.
|
|
.PP
|
|
The ioctls to configure IP-specific interface options and ARP tables are
|
|
not described.
|
|
.\" .PP
|
|
.\" Some versions of glibc forget to declare
|
|
.\" .IR in_pktinfo .
|
|
.\" Workaround currently is to copy it into your program from this man page.
|
|
.PP
|
|
Receiving the original destination address with
|
|
.B MSG_ERRQUEUE
|
|
in
|
|
.I msg_name
|
|
by
|
|
.BR recvmsg (2)
|
|
does not work in some 2.2 kernels.
|
|
.\" .SH AUTHORS
|
|
.\" This man page was written by Andi Kleen.
|
|
.SH SEE ALSO
|
|
.BR recvmsg (2),
|
|
.BR sendmsg (2),
|
|
.BR byteorder (3),
|
|
.BR capabilities (7),
|
|
.BR icmp (7),
|
|
.BR ipv6 (7),
|
|
.BR netdevice (7),
|
|
.BR netlink (7),
|
|
.BR raw (7),
|
|
.BR socket (7),
|
|
.BR tcp (7),
|
|
.BR udp (7),
|
|
.BR ip (8)
|
|
.PP
|
|
The kernel source file
|
|
.IR Documentation/networking/ip-sysctl.txt .
|
|
.PP
|
|
RFC\ 791 for the original IP specification.
|
|
RFC\ 1122 for the IPv4 host requirements.
|
|
RFC\ 1812 for the IPv4 router requirements.
|