user_namespaces.7: Add an example program

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
This commit is contained in:
Michael Kerrisk 2013-03-01 08:52:14 +01:00
parent df23ae04d6
commit 8d36d80cc3
1 changed files with 333 additions and 1 deletions

View File

@ -396,6 +396,337 @@ because of their potential to confuse set-user-ID-root applications.
In general, it becomes safe to allow the root user in a user namespace to
use those features because it is impossible, while in a user namespace,
to gain more privilege than the root user of a user namespace has.
.SH EXAMPLE
The program below is designed to allow experimenting with
user namespaces, as well as other types of namespaces.
It creates namespaces as specified by command-line options and then executes
a command inside those namespaces.
The comments and
.I usage()
function inside the program provide a full explanation of the program.
The following shell session demonstrates its use:
.in +4n
.nf
$ \fBuname -rs\fP # Need Linux 3.8 or later
Linux 3.8.0
$ \fBid -u\fP # Running as unprivileged user
1000
$ \fBid -g\fP
1000
.fi
.in
Now start a shell in new user
.RI ( \-U ),
mount
.RI ( \-m ),
and PID
.RI ( \-p )
namespaces, with user ID
.RI ( \-M )
and group ID 1000
.RI ( \-G )
mapped to 0 inside the user namespace:
.in +4n
.nf
$ \fB./userns_child_exec -p -m -U -M '0 1000 1' -G '0 1000 1' bash\fP
.fi
.in
The shell has PID 1, because it is the first process in the new
PID namespace:
.in +4n
.nf
bash$ \fBecho $$\fP
1
.fi
.in
Inside the user namespace, the shell has user and group ID 0,
and a full set of permitted and effective capabilities:
.in +4n
.nf
bash$ \fBcat /proc/$$/status | egrep '^[UG]id'\fP
Uid: 0 0 0 0
Gid: 0 0 0 0
bash$ \fBcat /proc/$$/status | egrep '^Cap(Prm|Inh|Eff)'\fP
CapInh: 0000000000000000
CapPrm: 0000001fffffffff
CapEff: 0000001fffffffff
.fi
.in
Mounting a new
.I /proc
file system and listing all of the processes visible
in the new PID namespace shows that the shell can't see
any processes outside the PID namespace:
.in +4n
.nf
bash$ \fBmount -t proc proc /proc\fP
bash$ \fBps ax\fP
PID TTY STAT TIME COMMAND
1 pts/3 S 0:00 bash
22 pts/3 R+ 0:00 ps ax
.fi
.in
.SS Program source
\&
.nf
/* userns_child_exec.c
Licensed under GNU General Public License v2 or later
Create a child process that executes a shell command in new
namespace(s); allow UID and GID mappings to be specified when
creating a user namespace.
*/
#define _GNU_SOURCE
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <signal.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
/* A simple error\-handling function: print an error message based
on the value in \(aqerrno\(aq and terminate the calling process */
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
} while (0)
struct child_args {
char **argv; /* Command to be executed by child, with args */
int pipe_fd[2]; /* Pipe used to synchronize parent and child */
};
static int verbose;
static void
usage(char *pname)
{
fprintf(stderr, "Usage: %s [options] cmd [arg...]\\n\\n", pname);
fprintf(stderr, "Create a child process that executes a shell "
"command in a new user namespace,\\n"
"and possibly also other new namespace(s).\\n\\n");
fprintf(stderr, "Options can be:\\n\\n");
#define fpe(str) fprintf(stderr, " %s", str);
fpe("\-i New IPC namespace\\n");
fpe("\-m New mount namespace\\n");
fpe("\-n New network namespace\\n");
fpe("\-p New PID namespace\\n");
fpe("\-u New UTS namespace\\n");
fpe("\-U New user namespace\\n");
fpe("\-M uid_map Specify UID map for user namespace\\n");
fpe("\-G gid_map Specify GID map for user namespace\\n");
fpe("\-z Map user\(aqs UID and GID to 0 in user namespace\\n");
fpe(" (equivalent to: \-M \(aq0 <uid> 1\(aq \-G \(aq0 <gid> 1\(aq)\\n");
fpe("\-v Display verbose messages\\n");
fpe("\\n");
fpe("If \-z, \-M, or \-G is specified, \-U is required.\\n");
fpe("It is not permitted to specify both \-z and either \-M or \-G.\\n");
fpe("\\n");
fpe("Map strings for \-M and \-G consist of records of the form:\\n");
fpe("\\n");
fpe(" ID\-inside\-ns ID\-outside\-ns len\\n");
fpe("\\n");
fpe("A map string can contain multiple records, separated"
" by commas;\\n");
fpe("the commas are replaced by newlines before writing"
" to map files.\\n");
exit(EXIT_FAILURE);
}
/* Update the mapping file \(aqmap_file\(aq, with the value provided in
\(aqmapping\(aq, a string that defines a UID or GID mapping. A UID or
GID mapping consists of one or more newline\-delimited records
of the form:
ID_inside\-ns ID\-outside\-ns length
Requiring the user to supply a string that contains newlines is
of course inconvenient for command\-line use. Thus, we permit the
use of commas to delimit records in this string, and replace them
with newlines before writing the string to the file. */
static void
update_map(char *mapping, char *map_file)
{
int fd, j;
size_t map_len; /* Length of \(aqmapping\(aq */
/* Replace commas in mapping string with newlines */
map_len = strlen(mapping);
for (j = 0; j < map_len; j++)
if (mapping[j] == \(aq,\(aq)
mapping[j] = \(aq\\n\(aq;
fd = open(map_file, O_RDWR);
if (fd == \-1) {
fprintf(stderr, "ERROR: open %s: %s\\n", map_file, strerror(errno));
return;
//exit(EXIT_FAILURE);
}
if (write(fd, mapping, map_len) != map_len) {
fprintf(stderr, "ERROR: write %s: %s\\n", map_file, strerror(errno));
//exit(EXIT_FAILURE);
}
close(fd);
}
static int /* Start function for cloned child */
childFunc(void *arg)
{
struct child_args *args = (struct child_args *) arg;
char ch;
/* Wait until the parent has updated the UID and GID mappings.
See the comment in main(). We wait for end of file on a
pipe that will be closed by the parent process once it has
updated the mappings. */
close(args\->pipe_fd[1]); /* Close our descriptor for the write
end of the pipe so that we see EOF
when parent closes its descriptor */
if (read(args\->pipe_fd[0], &ch, 1) != 0) {
fprintf(stderr,
"Failure in child: read from pipe returned != 0\\n");
exit(EXIT_FAILURE);
}
/* Execute a shell command */
printf("About to exec %s\\n", args\->argv[0]);
execvp(args\->argv[0], args\->argv);
errExit("execvp");
}
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE]; /* Space for child\(aqs stack */
int
main(int argc, char *argv[])
{
int flags, opt, map_zero;
pid_t child_pid;
struct child_args args;
char *uid_map, *gid_map;
const int MAP_BUF_SIZE = 100;
char map_buf[MAP_BUF_SIZE];
char map_path[PATH_MAX];
/* Parse command\-line options. The initial \(aq+\(aq character in
the final getopt() argument prevents GNU\-style permutation
of command\-line options. That\(aqs useful, since sometimes
the \(aqcommand\(aq to be executed by this program itself
has command\-line options. We don\(aqt want getopt() to treat
those as options to this program. */
flags = 0;
verbose = 0;
gid_map = NULL;
uid_map = NULL;
map_zero = 0;
while ((opt = getopt(argc, argv, "+imnpuUM:G:zv")) != \-1) {
switch (opt) {
case \(aqi\(aq: flags |= CLONE_NEWIPC; break;
case \(aqm\(aq: flags |= CLONE_NEWNS; break;
case \(aqn\(aq: flags |= CLONE_NEWNET; break;
case \(aqp\(aq: flags |= CLONE_NEWPID; break;
case \(aqu\(aq: flags |= CLONE_NEWUTS; break;
case \(aqv\(aq: verbose = 1; break;
case \(aqz\(aq: map_zero = 1; break;
case \(aqM\(aq: uid_map = optarg; break;
case \(aqG\(aq: gid_map = optarg; break;
case \(aqU\(aq: flags |= CLONE_NEWUSER; break;
default: usage(argv[0]);
}
}
/* \-M or \-G without \-U is nonsensical */
if (((uid_map != NULL || gid_map != NULL || map_zero) &&
!(flags & CLONE_NEWUSER)) ||
(map_zero && (uid_map != NULL || gid_map != NULL)))
usage(argv[0]);
args.argv = &argv[optind];
/* We use a pipe to synchronize the parent and child, in order to
ensure that the parent sets the UID and GID maps before the child
calls execve(). This ensures that the child maintains its
capabilities during the execve() in the common case where we
want to map the child\(aqs effective user ID to 0 in the new user
namespace. Without this synchronization, the child would lose
its capabilities if it performed an execve() with nonzero
user IDs (see the capabilities(7) man page for details of the
transformation of a process\(aqs capabilities during execve()). */
if (pipe(args.pipe_fd) == \-1)
errExit("pipe");
/* Create the child in new namespace(s) */
child_pid = clone(childFunc, child_stack + STACK_SIZE,
flags | SIGCHLD, &args);
if (child_pid == \-1)
errExit("clone");
/* Parent falls through to here */
if (verbose)
printf("%s: PID of child created by clone() is %ld\\n",
argv[0], (long) child_pid);
/* Update the UID and GID maps in the child */
if (uid_map != NULL || map_zero) {
snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
(long) child_pid);
if (map_zero) {
snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getuid());
uid_map = map_buf;
}
update_map(uid_map, map_path);
}
if (gid_map != NULL || map_zero) {
snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
(long) child_pid);
if (map_zero) {
snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getgid());
gid_map = map_buf;
}
update_map(gid_map, map_path);
}
/* Close the write end of the pipe, to signal to the child that we
have updated the UID and GID maps */
close(args.pipe_fd[1]);
if (waitpid(child_pid, NULL, 0) == \-1) /* Wait for child */
errExit("waitpid");
if (verbose)
printf("%s: terminating\\n", argv[0]);
exit(EXIT_SUCCESS);
}
.fi
.SH SEE ALSO
.BR unshare (1),
.BR clone (2),
@ -404,4 +735,5 @@ to gain more privilege than the root user of a user namespace has.
.BR proc (5),
.BR credentials (7),
.BR capabilities (7)
.BR namespaces (7)
.BR namespaces (7),
.BR pid_namespaces (7)