bpf.2: ffix (multiple)

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2015-05-24 21:31:01 +02:00 · 2015-05-24 21:31:01 +02:00 · 1148d9343f
parent 9a5215bfad
commit 1148d9343f
1 changed files with 231 additions and 109 deletions
--- a/man2/bpf.2
+++ b/man2/bpf.2
@ -72,19 +72,22 @@ execution of the program which may store information about the event in the maps
 Beyond storing data the programs may call into in-kernel helper functions.
 The same program can be attached to multiple events and different programs can
 access the same map:
+
+.in +4n
 .nf
-  tracing     tracing     tracing     packet     packet
-  event A     event B     event C     on eth0    on eth1
-   |             |          |           |          |
-   |             |          |           |          |
-   --> tracing <--      tracing       socket     socket
-        prog_1           prog_2       prog_3     prog_4
-        |  |               |            |
-     |---  -----|  |-------|           map_3
-   map_1       map_2
+tracing     tracing     tracing     packet     packet
+event A     event B     event C     on eth0    on eth1
+ |             |          |           |          |
+ |             |          |           |          |
+ --> tracing <--      tracing       socket     socket
+      prog_1           prog_2       prog_3     prog_4
+      |  |               |            |
+   |---  -----|  |-------|           map_3
+ map_1       map_2
 .fi
+.in
 .SS Syscall Arguments
-.B bpf()
+.BR bpf ()
 syscall operation is determined by
 .IR cmd
 which can be one of the following:
@ -106,40 +109,43 @@ Lookup element by key in a given map and return key of next element
 .TP
 .B BPF_PROG_LOAD
 Verify and load BPF program
-.TP
-.B attr
-is a pointer to a union of type bpf_attr as defined below.
-.TP
-.B size
+.PP
+.I attr
+is a pointer to a union of type
+.I bpf_attr
+as defined below.
+
+.I size
 is the size of the union.
 .P
 .nf
 union bpf_attr {
    struct { /* anonymous struct used by BPF_MAP_CREATE command */
-        __u32             map_type;
-        __u32             key_size;    /* size of key in bytes */
-        __u32             value_size;  /* size of value in bytes */
-        __u32             max_entries; /* max number of entries in a map */
+        __u32          map_type;
+        __u32          key_size;    /* size of key in bytes */
+        __u32          value_size;  /* size of value in bytes */
+        __u32          max_entries; /* maximum number of entries
+                                       in a map */
    };

    struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
-        __u32             map_fd;
-        __aligned_u64     key;
+        __u32          map_fd;
+        __aligned_u64  key;
        union {
            __aligned_u64 value;
            __aligned_u64 next_key;
        };
-	__u64             flags;
+        __u64          flags;
    };

    struct { /* anonymous struct used by BPF_PROG_LOAD command */
-        __u32         prog_type;
-        __u32         insn_cnt;
-        __aligned_u64 insns;     /* 'const struct bpf_insn *' */
-        __aligned_u64 license;   /* 'const char *' */
-        __u32         log_level; /* verbosity level of verifier */
-        __u32         log_size;  /* size of user buffer */
-        __aligned_u64 log_buf;   /* user supplied 'char *' buffer */
+        __u32          prog_type;
+        __u32          insn_cnt;
+        __aligned_u64  insns;      /* 'const struct bpf_insn *' */
+        __aligned_u64  license;    /* 'const char *' */
+        __u32          log_level;  /* verbosity level of verifier */
+        __u32          log_size;   /* size of user buffer */
+        __aligned_u64  log_buf;    /* user supplied 'char *' buffer */
    };
 } __attribute__((aligned(8)));
 .fi
@ -148,6 +154,7 @@ maps are a generic data structure for storage of different types
 and sharing data between kernel and userspace.

 Any map type has the following attributes:
+
  . type
  . max number of elements
  . key size in bytes
@ -174,47 +181,79 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size,
    return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 .fi
-bpf() syscall creates a map of
+
+.BR bpf ()
+syscall creates a map of
 .I map_type
 type and given attributes
-.I key_size, value_size, max_entries.
+.IR key_size ,
+.IR value_size ,
+.IR max_entries .
 On success it returns a process-local file descriptor.
 On error, \-1 is returned and
 .I errno
-is set to EINVAL or EPERM or ENOMEM.
+is set to
+.BR EINVAL ,
+.BR EPERM ,
+or
+.BR ENOMEM .

 The attributes
 .I key_size
 and
 .I value_size
 will be used by the verifier during program loading to check that the program
-is calling bpf_map_*_elem() helper functions with a correctly initialized
+is calling
+.BR bpf_map_*_elem ()
+helper functions with a correctly initialized
 .I key
 and that the program doesn't access map element
 .I value
 beyond the specified
 .I value_size.
-For example, when a map is created with key_size = 8 and the program calls
+For example, when a map is created with
+.IR "key_size = 8"
+and the program calls
+
+.in +4n
 .nf
 bpf_map_lookup_elem(map_fd, fp - 4)
 .fi
-the program will be rejected,
-since the in-kernel helper function bpf_map_lookup_elem(map_fd, void *key) expects
-to read 8 bytes from 'key' pointer, but 'fp - 4' starting address will cause
-out of bounds stack access.
+.in

-Similarly, when a map is created with value_size = 1 and the program calls
+the program will be rejected,
+since the in-kernel helper function
+
+     bpf_map_lookup_elem(map_fd, void *key)
+
+expects to read 8 bytes from
+.I key
+pointer, but
+.IR "fp\ -\ 4"
+starting address will cause out-of-bounds stack access.
+
+Similarly, when a map is created with
+.I "value_size = 1"
+and the program calls
+
+.in +4n
 .nf
 value = bpf_map_lookup_elem(...);
-*(u32 *)value = 1;
+*(u32 *) value = 1;
 .fi
+.in
+
 the program will be rejected, since it accesses the
 .I value
-pointer beyond the specified 1 byte value_size limit.
+pointer beyond the specified 1 byte
+.I value_size
+limit.

 Currently two
 .I map_type
 are supported:
+
+.in +4n
 .nf
 enum bpf_map_type {
   BPF_MAP_TYPE_UNSPEC,
@ -222,10 +261,14 @@ enum bpf_map_type {
   BPF_MAP_TYPE_ARRAY,
 };
 .fi
+.in
+
 .I map_type
 selects one of the available map implementations in the kernel.
 For all map_types
-programs access maps with the same bpf_map_lookup_elem()/bpf_map_update_elem()
+programs access maps with the same
+.BR bpf_map_lookup_elem ()/
+.BR bpf_map_update_elem ()
 helper functions.
 .TP
 .B BPF_MAP_LOOKUP_ELEM
@ -241,15 +284,18 @@ int bpf_lookup_elem(int fd, void *key, void *value)
    return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
 }
 .fi
-bpf() syscall looks up an element with a given
+
+.BR bpf ()
+syscall looks up an element with a given
 .I key
 in a map
-.I fd.
+.IR fd .
 If an element is found it returns zero and stores element's value into
 .I value.
 If no element is found it returns \-1 and sets
 .I errno
-to ENOENT.
+to
+.BR ENOENT .
 .TP
 .B BPF_MAP_UPDATE_ELEM
 .nf
@ -265,6 +311,7 @@ int bpf_update_elem(int fd, void *key, void *value, __u64 flags)
    return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
 }
 .fi
+
 The call creates or updates an element with a given
 .I key/value
 in a map
@ -272,25 +319,42 @@ in a map
 according to
 .I flags
 which can have one of 3 possible values:
+
 .nf
-#define BPF_ANY         0 /* create new element or update existing */
-#define BPF_NOEXIST     1 /* create new element if it didn't exist */
-#define BPF_EXIST       2 /* update existing element */
+#define BPF_ANY      0 /* create new element or update existing */
+#define BPF_NOEXIST  1 /* create new element if it didn't exist */
+#define BPF_EXIST    2 /* update existing element */
 .fi
+
 On success it returns zero.
 On error, \-1 is returned and
 .I errno
-is set to EINVAL, EPERM, ENOMEM or E2BIG.
+is set to
+.BR EINVAL ,
+.BR EPERM ,
+.BR ENOMEM ,
+or
+.BR E2BIG .
 .B E2BIG
 indicates that the number of elements in the map reached
 .I max_entries
 limit specified at map creation time.
 .B EEXIST
-will be returned from a call to bpf_update_elem(fd, key, value, BPF_NOEXIST) if
-the element with 'key' already exists in the map.
+will be returned from a call to
+
+    bpf_update_elem(fd, key, value, BPF_NOEXIST)
+
+if the element with
+.I key
+already exists in the map.
 .B ENOENT
-will be returned from a call to bpf_update_elem(fd, key, value, BPF_EXIST) if
-the element with 'key' doesn't exist in the map.
+will be returned from a call to
+
+    bpf_update_elem(fd, key, value, BPF_EXIST)
+
+if the element with
+.I key
+doesn't exist in the map.
 .TP
 .B BPF_MAP_DELETE_ELEM
 .nf
@ -304,14 +368,16 @@ int bpf_delete_elem(int fd, void *key)
    return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
 }
 .fi
+
 The call deletes an element in a map
 .I fd
 with a given
-.I key.
+.IR key .
 Returns zero on success.
 If the element is not found it returns \-1 and sets
 .I errno
-to ENOENT.
+to
+.BR ENOENT .
 .TP
 .B BPF_MAP_GET_NEXT_KEY
 .nf
@ -326,6 +392,7 @@ int bpf_get_next_key(int fd, void *key, void *next_key)
    return bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
 }
 .fi
+
 The call looks up an element by
 .I key
 in a given map
@ -342,19 +409,24 @@ If
 .I key
 is the last element, it returns \-1 and sets
 .I errno
-to ENOENT.
+to
+.BR ENOENT .
 Other possible
 .I errno
-values are ENOMEM, EFAULT, EPERM and EINVAL.
+values are
+.BR ENOMEM ,
+.BR EFAULT ,
+.BR EPERM ,
+and
+.BR EINVAL .
 This method can be used to iterate over all elements in the map.
 .TP
 .B close(map_fd)
 will delete the map
-.I map_fd.
+.IR map_fd .
 When the user space program that created maps exits all maps will
 be deleted automatically.

-.P
 .SS BPF programs

 .TP
@ -383,15 +455,20 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
    return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 }
 .fi
-.B prog_type
+
+.I prog_type
 is one of the available program types:
+
+.in +4n
 .nf
 enum bpf_prog_type {
-        BPF_PROG_TYPE_UNSPEC,
-        BPF_PROG_TYPE_SOCKET_FILTER,
-        BPF_PROG_TYPE_SCHED_CLS,
+    BPF_PROG_TYPE_UNSPEC,
+    BPF_PROG_TYPE_SOCKET_FILTER,
+    BPF_PROG_TYPE_SCHED_CLS,
 };
 .fi
+.in
+
 By picking
 .I prog_type
 the program author selects a set of helper functions callable from
@ -399,8 +476,12 @@ the program and the corresponding format of
 .I struct bpf_context
 (which is the data blob passed into the program as the first argument).
 For example, the programs loaded with
-.I prog_type
-= BPF_PROG_TYPE_SOCKET_FILTER may call bpf_map_lookup_elem() helper,
+
+    prog_type = BPF_PROG_TYPE_SOCKET_FILTER
+
+may call the
+.BR bpf_map_lookup_elem ()
+helper,
 whereas some future types may not.
 The set of functions available to the programs under a given type may increase
 in the future.
@ -408,31 +489,48 @@ in the future.
 Currently the set of functions for
 .B BPF_PROG_TYPE_SOCKET_FILTER
 is:
-.nf
-bpf_map_lookup_elem(map_fd, void *key)              // lookup key in a map_fd
-bpf_map_update_elem(map_fd, void *key, void *value) // update key/value
-bpf_map_delete_elem(map_fd, void *key)              // delete key in a map_fd
-.fi

-and bpf_context is a pointer to 'struct sk_buff'.
-Programs cannot access fields of 'sk_buff' directly.
+.in +4n
+.nf
+bpf_map_lookup_elem(map_fd, void *key)
+                    /* look up key in a map_fd */
+bpf_map_update_elem(map_fd, void *key, void *value)
+                    /* update key/value */
+bpf_map_delete_elem(map_fd, void *key)
+                    /* delete key in a map_fd */
+.fi
+.in
+
+and
+.I bpf_context
+is a pointer to
+.IR "struct sk_buff" .
+Programs cannot access fields of
+.I sk_buff
+directly.

 More program types may be added in the future.
 Like
 .B BPF_PROG_TYPE_KPROBE
-and bpf_context for it may be defined as a pointer to 'struct pt_regs'.
+and
+.I bpf_context
+for it may be defined as a pointer to a
+.IR "struct pt_regs" .

-.B insns
-array of "struct bpf_insn" instructions.
+.I insns
+array of
+.I "struct bpf_insn"
+instructions.

-.B insn_cnt
+.I insn_cnt
 number of instructions in the program.

-.B license
+.I license
 license string, which must be GPL compatible to call helper functions
-marked gpl_only.
+marked
+.IR gpl_only .

-.B log_buf
+.I log_buf
 user supplied buffer that the in-kernel verifier is using to store the
 verification log.
 This log is a multi-line string that can be checked by
@ -440,14 +538,15 @@ the program author in order to understand how the verifier came to
 the conclusion that the BPF program is unsafe.
 The format of the output can change at any time as the verifier evolves.

-.B log_size
+.I log_size
 size of user buffer.
 If the size of the buffer is not large enough to store all
 verifier messages, \-1 is returned and
 .I errno
-is set to ENOSPC.
+is set to
+.BR ENOSPC .

-.B log_level
+.I log_level
 verbosity level of the verifier.
 A value of zero means that the verifier will
 not provide a log.
@ -464,30 +563,39 @@ User space fetches data from the maps.
 Either the same or a different map may be used by user space as a configuration
 space to alter program behavior on the fly.
 .SS Events
-.P
 Once a program is loaded, it can be attached to an event.
 Various kernel
 subsystems have different ways to do so.
 For example:

+.in +4n
 .nf
-setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd));
+setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF,
+           &prog_fd, sizeof(prog_fd));
 .fi
+.in
+
 will attach the program
 .I prog_fd
 to socket
 .I sock
-which was received from a prior call to socket().
+which was received from a prior call to
+.BR socket (2).

 In the future
+
+.in +4n
 .nf
 ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
 .fi
+.in
+
 may attach the program
 .I prog_fd
 to perf event
 .I event_fd
-which was received by prior call to perf_event_open().
+which was received by prior call to
+.BR perf_event_open (2).

 .SH EXAMPLES
 .nf
@ -504,7 +612,8 @@ int main(int ac, char **av)
    int sock, map_fd, prog_fd, key;
    long long value = 0, tcp_cnt, udp_cnt;

-    map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 256);
+    map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key),
+                            sizeof(value), 256);
    if (map_fd < 0) {
        printf("failed to create map '%s'\\n", strerror(errno));
        /* likely not run as root */
@ -512,25 +621,32 @@ int main(int ac, char **av)
    }

    struct bpf_insn prog[] = {
-        BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),           /* r6 = r1 */
-        BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol)), /* r0 = ip->proto */
-        BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
-        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),          /* r2 = fp */
-        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),         /* r2 = r2 - 4 */
-        BPF_LD_MAP_FD(BPF_REG_1, map_fd),              /* r1 = map_fd */
-        BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),       /* r0 = map_lookup(r1, r2) */
-        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),         /* if (r0 == 0) goto pc+2 */
-        BPF_MOV64_IMM(BPF_REG_1, 1),                   /* r1 = 1 */
-        BPF_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0),  /* lock *(u64 *)r0 += r1 */
-        BPF_MOV64_IMM(BPF_REG_0, 0),                   /* r0 = 0 */
-        BPF_EXIT_INSN(),                               /* return r0 */
+        BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),        /* r6 = r1 */
+        BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol)),
+                                /* r0 = ip->proto */
+        BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+                                /* *(u32 *)(fp - 4) = r0 */
+        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),       /* r2 = fp */
+        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),      /* r2 = r2 - 4 */
+        BPF_LD_MAP_FD(BPF_REG_1, map_fd),           /* r1 = map_fd */
+        BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
+                                /* r0 = map_lookup(r1, r2) */
+        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                                /* if (r0 == 0) goto pc+2 */
+        BPF_MOV64_IMM(BPF_REG_1, 1),                /* r1 = 1 */
+        BPF_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0),
+                                /* lock *(u64 *) r0 += r1 */
+        BPF_MOV64_IMM(BPF_REG_0, 0),                /* r0 = 0 */
+        BPF_EXIT_INSN(),                            /* return r0 */
    };

-    prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL");
+    prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+                            sizeof(prog), "GPL");

    sock = open_raw_sock("lo");

-    assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)) == 0);
+    assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+                      sizeof(prog_fd)) == 0);

    for (;;) {
        key = IPPROTO_TCP;
@ -562,7 +678,8 @@ is set appropriately.
 .SH ERRORS
 .TP
 .B EPERM
-bpf() syscall was made without sufficient privilege
+.BR bpf()
+syscall was made without sufficient privilege
 (without the
 .B CAP_SYS_ADMIN
 capability).
@ -575,15 +692,15 @@ Cannot allocate sufficient memory.
 is not an open file descriptor
 .TP
 .B EFAULT
-One of the pointers (
-.I key
+One of the pointers
+.RI ( key
 or
 .I value
 or
 .I log_buf
 or
-.I insns
-) is outside the accessible address space.
+.IR insns )
+is outside the accessible address space.
 .TP
 .B EINVAL
 The value specified in
@ -601,7 +718,9 @@ or attributes are invalid.
 For
 .BR BPF_MAP_*_ELEM
 commands,
-some of the fields of "union bpf_attr" that are not used by this command
+some of the fields of
+.I "union bpf_attr"
+that are not used by this command
 are not set to zero.
 .TP
 .B EINVAL
@ -621,7 +740,9 @@ This may be because it may have
 accessed a disallowed memory region or an uninitialized stack/register or
 because the function contraints don't match the actual types or because
 there was a misaligned memory access.
-In such case it is recommended to call bpf() again with
+In such case it is recommended to call
+.BR bpf ()
+again with
 .I log_level = 1
 and examine
 .I log_buf
@ -646,4 +767,5 @@ These commands may be used only by a privileged process (one having the
 .B CAP_SYS_ADMIN
 capability).
 .SH SEE ALSO
-Both classic and extended BPF are explained in Documentation/networking/filter.txt
+Both classic and extended BPF are explained in
+.IR Documentation/networking/filter.txt .