seccomp.2: Changes after review feedback by Kees Cook

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
This commit is contained in:
Michael Kerrisk 2014-12-30 21:25:02 +01:00
parent 36931cfc80
commit 068653012c
1 changed files with 11 additions and 39 deletions

View File

@ -206,7 +206,6 @@ struct sock_filter { /* Filter block */
.fi
.in
.\" FIXME I reworded/enhanced the following sentence. Is it okay?
When executing the instructions, the BPF program operates on the
system call information made available (i.e., use the
.BR BPF_ABS
@ -277,8 +276,6 @@ and
will indicate which system call was attempted.
.IP *
.I si_code
.\" FIXME Why is the constant thus named? All of the other 'si_code'
.\" constants are prefixed 'SI_'. Why the inconsistency?
will contain
.BR SYS_SECCOMP .
.IP *
@ -291,10 +288,7 @@ portion of the filter return value.
The program counter will be as though the system call happened
(i.e., it will not point to the system call instruction).
The return value register will contain an architecture\-dependent value;
if resuming execution, set it to something sensible.
.\" FIXME Regarding the preceding line, can you give an example(s)
.\" of "something sensible"? (Depending on the answer, maybe it
.\" might be useful to add some text on this point.)
if resuming execution, set it to something appropriate for the system call.
.\"
.\" FIXME Please check:
.\" In an attempt to make the text clearer, I changed
@ -346,11 +340,7 @@ allow use of
.BR ptrace (2)\(emeven
of other
sandboxed processes\(emwithout extreme care;
.\" FIXME Below, I think it would be helpful to add some words after
.\" "to escape", as in "to escape [what?]" I suppose the wording
.\" would be something like "to escape the seccomp sandbox mechanism"
.\" but perhaps you have a better wording.
ptracers can use this mechanism to escape.)
ptracers can use this mechanism to escape from the seccomp sandbox.)
.TP
.BR SECCOMP_RET_ALLOW
This value results in the system call being executed.
@ -392,7 +382,6 @@ is unknown; or
.IR flags
are invalid for the given
.IR operation .
.\" FIXME Please review the following
.TP
.BR EINVAL
.I operation
@ -400,14 +389,12 @@ included
.BR BPF_ABS ,
but the specified offset was not aligned to a 32-bit boundary or exceeded
.IR "sizeof(struct\ seccomp_data)" .
.\" FIXME Please review the following
.TP
.BR EINVAL
.\" See kernel/seccomp.c::seccomp_may_assign_mode() in 3.18 sources
A secure computing mode has already been set, and
.I operation
differs from the existing setting.
.\" FIXME Please review the following
.TP
.BR EINVAL
.\" See stub kernel/seccomp.c::seccomp_set_mode_filter() in 3.18 sources
@ -417,7 +404,6 @@ specified
but the kernel was not built with
.B CONFIG_SECCOMP_FILTER
enabled.
.\" FIXME Please review the following
.TP
.BR EINVAL
.I operation
@ -432,7 +418,6 @@ was not valid or the length of the filter program was zero or exceeded
.TP
.BR ENOMEM
Out of memory.
.\" FIXME Please review the following
.TP
.BR ENOMEM
.\" ENOMEM in kernel/seccomp.c::seccomp_attach_filter() in 3.18 sources
@ -463,7 +448,6 @@ provides a superset of the functionality provided by the
.BR PR_SET_SECCOMP
operation (which does not support
.IR flags ).
.\" FIXME Please review the following new subsection {{{
.SS Seccomp-specific BPF details
Note the following BPF details specific to seccomp filters:
.IP * 3
@ -490,9 +474,6 @@ addressing mode modifier yields an immediate mode operand
whose value is the size of the
.IR seccomp_data
buffer.
.\" FIXME Any other seccomp-specific BPF details that should be added here?
.\"
.\" FIXME End of new subsection for review }}}
.SH EXAMPLE
The program below accepts four or more arguments.
The first three arguments are a system call number,
@ -528,7 +509,7 @@ $ \fBuname -m\fP
x86_64
$ \fBsyscall_nr() {
cat /usr/src/linux/arch/x86/syscalls/syscall_64.tbl | \\
awk '$2 != "x32" && $3 == "'$1'" { print $1 }'
awk '$2 != "x32" && $3 == "'$1'" { print $1 }'
}\fP
.in
.fi
@ -554,7 +535,7 @@ system call, so that the command is not even executed:
.nf
.in +4n
$ \fBsyscall_nr execve\fP
59
59
$ \fB./a.out\fP
Usage: ./a.out <syscall_nr> <arch> <errno> <prog> [<args>]
Hint for <arch>: AUDIT_ARCH_I386: 0x40000003
@ -607,45 +588,36 @@ cecilia
static int
install_filter(int syscall_nr, int t_arch, int f_errno)
{
.\" FIXME In the BPF program below, you use '+' to build the instructions.
.\" However, most other BPF example code I see uses '|'. While I
.\" assume it's equivalent (i.e., the bit fields are nonoverlapping),
.\" was there a reason to use '+' rather than '|'? (To me, the
.\" latter is a little clearer in its intent.)
.\"
.\" FIXME I expanded comments [0], [1], [2], [3], [4] a little.
.\" Are they okay? */
.\"
struct sock_filter filter[] = {
/* [0] Load architecture from 'seccomp_data' buffer into
accumulator */
BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
(offsetof(struct seccomp_data, arch))),
/* [1] Jump forward 4 instructions if architecture does not
match 't_arch' */
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, t_arch, 0, 4),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, t_arch, 0, 4),
/* [2] Load system call number from 'seccomp_data' buffer into
accumulator */
BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
(offsetof(struct seccomp_data, nr))),
/* [3] Jump forward 1 instruction if system call number
does not match 'syscall_nr' */
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, syscall_nr, 0, 1),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, syscall_nr, 0, 1),
/* [4] Matching architecture and system call: don't execute
the system call, and return 'f_errno' in 'errno' */
BPF_STMT(BPF_RET + BPF_K,
BPF_STMT(BPF_RET | BPF_K,
SECCOMP_RET_ERRNO | (f_errno & SECCOMP_RET_DATA)),
/* [5] Destination of system call number mismatch: allow other
system calls */
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/* [6] Destination of architecture mismatch: kill process */
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),
};
struct sock_fprog prog = {