mirror of https://github.com/mkerrisk/man-pages
user_namespaces.7: Add an example program
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
This commit is contained in:
parent
df23ae04d6
commit
8d36d80cc3
|
@ -396,6 +396,337 @@ because of their potential to confuse set-user-ID-root applications.
|
|||
In general, it becomes safe to allow the root user in a user namespace to
|
||||
use those features because it is impossible, while in a user namespace,
|
||||
to gain more privilege than the root user of a user namespace has.
|
||||
.SH EXAMPLE
|
||||
The program below is designed to allow experimenting with
|
||||
user namespaces, as well as other types of namespaces.
|
||||
It creates namespaces as specified by command-line options and then executes
|
||||
a command inside those namespaces.
|
||||
The comments and
|
||||
.I usage()
|
||||
function inside the program provide a full explanation of the program.
|
||||
The following shell session demonstrates its use:
|
||||
|
||||
.in +4n
|
||||
.nf
|
||||
$ \fBuname -rs\fP # Need Linux 3.8 or later
|
||||
Linux 3.8.0
|
||||
$ \fBid -u\fP # Running as unprivileged user
|
||||
1000
|
||||
$ \fBid -g\fP
|
||||
1000
|
||||
.fi
|
||||
.in
|
||||
|
||||
Now start a shell in new user
|
||||
.RI ( \-U ),
|
||||
mount
|
||||
.RI ( \-m ),
|
||||
and PID
|
||||
.RI ( \-p )
|
||||
namespaces, with user ID
|
||||
.RI ( \-M )
|
||||
and group ID 1000
|
||||
.RI ( \-G )
|
||||
mapped to 0 inside the user namespace:
|
||||
|
||||
.in +4n
|
||||
.nf
|
||||
$ \fB./userns_child_exec -p -m -U -M '0 1000 1' -G '0 1000 1' bash\fP
|
||||
.fi
|
||||
.in
|
||||
|
||||
The shell has PID 1, because it is the first process in the new
|
||||
PID namespace:
|
||||
|
||||
.in +4n
|
||||
.nf
|
||||
bash$ \fBecho $$\fP
|
||||
1
|
||||
.fi
|
||||
.in
|
||||
|
||||
Inside the user namespace, the shell has user and group ID 0,
|
||||
and a full set of permitted and effective capabilities:
|
||||
|
||||
.in +4n
|
||||
.nf
|
||||
bash$ \fBcat /proc/$$/status | egrep '^[UG]id'\fP
|
||||
Uid: 0 0 0 0
|
||||
Gid: 0 0 0 0
|
||||
bash$ \fBcat /proc/$$/status | egrep '^Cap(Prm|Inh|Eff)'\fP
|
||||
CapInh: 0000000000000000
|
||||
CapPrm: 0000001fffffffff
|
||||
CapEff: 0000001fffffffff
|
||||
.fi
|
||||
.in
|
||||
|
||||
Mounting a new
|
||||
.I /proc
|
||||
file system and listing all of the processes visible
|
||||
in the new PID namespace shows that the shell can't see
|
||||
any processes outside the PID namespace:
|
||||
|
||||
.in +4n
|
||||
.nf
|
||||
bash$ \fBmount -t proc proc /proc\fP
|
||||
bash$ \fBps ax\fP
|
||||
PID TTY STAT TIME COMMAND
|
||||
1 pts/3 S 0:00 bash
|
||||
22 pts/3 R+ 0:00 ps ax
|
||||
.fi
|
||||
.in
|
||||
.SS Program source
|
||||
\&
|
||||
.nf
|
||||
/* userns_child_exec.c
|
||||
|
||||
Licensed under GNU General Public License v2 or later
|
||||
|
||||
Create a child process that executes a shell command in new
|
||||
namespace(s); allow UID and GID mappings to be specified when
|
||||
creating a user namespace.
|
||||
*/
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/wait.h>
|
||||
#include <signal.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <errno.h>
|
||||
|
||||
/* A simple error\-handling function: print an error message based
|
||||
on the value in \(aqerrno\(aq and terminate the calling process */
|
||||
|
||||
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
|
||||
} while (0)
|
||||
|
||||
struct child_args {
|
||||
char **argv; /* Command to be executed by child, with args */
|
||||
int pipe_fd[2]; /* Pipe used to synchronize parent and child */
|
||||
};
|
||||
|
||||
static int verbose;
|
||||
|
||||
static void
|
||||
usage(char *pname)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [options] cmd [arg...]\\n\\n", pname);
|
||||
fprintf(stderr, "Create a child process that executes a shell "
|
||||
"command in a new user namespace,\\n"
|
||||
"and possibly also other new namespace(s).\\n\\n");
|
||||
fprintf(stderr, "Options can be:\\n\\n");
|
||||
#define fpe(str) fprintf(stderr, " %s", str);
|
||||
fpe("\-i New IPC namespace\\n");
|
||||
fpe("\-m New mount namespace\\n");
|
||||
fpe("\-n New network namespace\\n");
|
||||
fpe("\-p New PID namespace\\n");
|
||||
fpe("\-u New UTS namespace\\n");
|
||||
fpe("\-U New user namespace\\n");
|
||||
fpe("\-M uid_map Specify UID map for user namespace\\n");
|
||||
fpe("\-G gid_map Specify GID map for user namespace\\n");
|
||||
fpe("\-z Map user\(aqs UID and GID to 0 in user namespace\\n");
|
||||
fpe(" (equivalent to: \-M \(aq0 <uid> 1\(aq \-G \(aq0 <gid> 1\(aq)\\n");
|
||||
fpe("\-v Display verbose messages\\n");
|
||||
fpe("\\n");
|
||||
fpe("If \-z, \-M, or \-G is specified, \-U is required.\\n");
|
||||
fpe("It is not permitted to specify both \-z and either \-M or \-G.\\n");
|
||||
fpe("\\n");
|
||||
fpe("Map strings for \-M and \-G consist of records of the form:\\n");
|
||||
fpe("\\n");
|
||||
fpe(" ID\-inside\-ns ID\-outside\-ns len\\n");
|
||||
fpe("\\n");
|
||||
fpe("A map string can contain multiple records, separated"
|
||||
" by commas;\\n");
|
||||
fpe("the commas are replaced by newlines before writing"
|
||||
" to map files.\\n");
|
||||
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* Update the mapping file \(aqmap_file\(aq, with the value provided in
|
||||
\(aqmapping\(aq, a string that defines a UID or GID mapping. A UID or
|
||||
GID mapping consists of one or more newline\-delimited records
|
||||
of the form:
|
||||
|
||||
ID_inside\-ns ID\-outside\-ns length
|
||||
|
||||
Requiring the user to supply a string that contains newlines is
|
||||
of course inconvenient for command\-line use. Thus, we permit the
|
||||
use of commas to delimit records in this string, and replace them
|
||||
with newlines before writing the string to the file. */
|
||||
|
||||
static void
|
||||
update_map(char *mapping, char *map_file)
|
||||
{
|
||||
int fd, j;
|
||||
size_t map_len; /* Length of \(aqmapping\(aq */
|
||||
|
||||
/* Replace commas in mapping string with newlines */
|
||||
|
||||
map_len = strlen(mapping);
|
||||
for (j = 0; j < map_len; j++)
|
||||
if (mapping[j] == \(aq,\(aq)
|
||||
mapping[j] = \(aq\\n\(aq;
|
||||
|
||||
fd = open(map_file, O_RDWR);
|
||||
if (fd == \-1) {
|
||||
fprintf(stderr, "ERROR: open %s: %s\\n", map_file, strerror(errno));
|
||||
return;
|
||||
//exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (write(fd, mapping, map_len) != map_len) {
|
||||
fprintf(stderr, "ERROR: write %s: %s\\n", map_file, strerror(errno));
|
||||
//exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static int /* Start function for cloned child */
|
||||
childFunc(void *arg)
|
||||
{
|
||||
struct child_args *args = (struct child_args *) arg;
|
||||
char ch;
|
||||
|
||||
/* Wait until the parent has updated the UID and GID mappings.
|
||||
See the comment in main(). We wait for end of file on a
|
||||
pipe that will be closed by the parent process once it has
|
||||
updated the mappings. */
|
||||
|
||||
close(args\->pipe_fd[1]); /* Close our descriptor for the write
|
||||
end of the pipe so that we see EOF
|
||||
when parent closes its descriptor */
|
||||
if (read(args\->pipe_fd[0], &ch, 1) != 0) {
|
||||
fprintf(stderr,
|
||||
"Failure in child: read from pipe returned != 0\\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* Execute a shell command */
|
||||
|
||||
printf("About to exec %s\\n", args\->argv[0]);
|
||||
execvp(args\->argv[0], args\->argv);
|
||||
errExit("execvp");
|
||||
}
|
||||
|
||||
#define STACK_SIZE (1024 * 1024)
|
||||
|
||||
static char child_stack[STACK_SIZE]; /* Space for child\(aqs stack */
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int flags, opt, map_zero;
|
||||
pid_t child_pid;
|
||||
struct child_args args;
|
||||
char *uid_map, *gid_map;
|
||||
const int MAP_BUF_SIZE = 100;
|
||||
char map_buf[MAP_BUF_SIZE];
|
||||
char map_path[PATH_MAX];
|
||||
|
||||
/* Parse command\-line options. The initial \(aq+\(aq character in
|
||||
the final getopt() argument prevents GNU\-style permutation
|
||||
of command\-line options. That\(aqs useful, since sometimes
|
||||
the \(aqcommand\(aq to be executed by this program itself
|
||||
has command\-line options. We don\(aqt want getopt() to treat
|
||||
those as options to this program. */
|
||||
|
||||
flags = 0;
|
||||
verbose = 0;
|
||||
gid_map = NULL;
|
||||
uid_map = NULL;
|
||||
map_zero = 0;
|
||||
while ((opt = getopt(argc, argv, "+imnpuUM:G:zv")) != \-1) {
|
||||
switch (opt) {
|
||||
case \(aqi\(aq: flags |= CLONE_NEWIPC; break;
|
||||
case \(aqm\(aq: flags |= CLONE_NEWNS; break;
|
||||
case \(aqn\(aq: flags |= CLONE_NEWNET; break;
|
||||
case \(aqp\(aq: flags |= CLONE_NEWPID; break;
|
||||
case \(aqu\(aq: flags |= CLONE_NEWUTS; break;
|
||||
case \(aqv\(aq: verbose = 1; break;
|
||||
case \(aqz\(aq: map_zero = 1; break;
|
||||
case \(aqM\(aq: uid_map = optarg; break;
|
||||
case \(aqG\(aq: gid_map = optarg; break;
|
||||
case \(aqU\(aq: flags |= CLONE_NEWUSER; break;
|
||||
default: usage(argv[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/* \-M or \-G without \-U is nonsensical */
|
||||
|
||||
if (((uid_map != NULL || gid_map != NULL || map_zero) &&
|
||||
!(flags & CLONE_NEWUSER)) ||
|
||||
(map_zero && (uid_map != NULL || gid_map != NULL)))
|
||||
usage(argv[0]);
|
||||
|
||||
args.argv = &argv[optind];
|
||||
|
||||
/* We use a pipe to synchronize the parent and child, in order to
|
||||
ensure that the parent sets the UID and GID maps before the child
|
||||
calls execve(). This ensures that the child maintains its
|
||||
capabilities during the execve() in the common case where we
|
||||
want to map the child\(aqs effective user ID to 0 in the new user
|
||||
namespace. Without this synchronization, the child would lose
|
||||
its capabilities if it performed an execve() with nonzero
|
||||
user IDs (see the capabilities(7) man page for details of the
|
||||
transformation of a process\(aqs capabilities during execve()). */
|
||||
|
||||
if (pipe(args.pipe_fd) == \-1)
|
||||
errExit("pipe");
|
||||
|
||||
/* Create the child in new namespace(s) */
|
||||
|
||||
child_pid = clone(childFunc, child_stack + STACK_SIZE,
|
||||
flags | SIGCHLD, &args);
|
||||
if (child_pid == \-1)
|
||||
errExit("clone");
|
||||
|
||||
/* Parent falls through to here */
|
||||
|
||||
if (verbose)
|
||||
printf("%s: PID of child created by clone() is %ld\\n",
|
||||
argv[0], (long) child_pid);
|
||||
|
||||
/* Update the UID and GID maps in the child */
|
||||
|
||||
if (uid_map != NULL || map_zero) {
|
||||
snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
|
||||
(long) child_pid);
|
||||
if (map_zero) {
|
||||
snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getuid());
|
||||
uid_map = map_buf;
|
||||
}
|
||||
update_map(uid_map, map_path);
|
||||
}
|
||||
if (gid_map != NULL || map_zero) {
|
||||
snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
|
||||
(long) child_pid);
|
||||
if (map_zero) {
|
||||
snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getgid());
|
||||
gid_map = map_buf;
|
||||
}
|
||||
update_map(gid_map, map_path);
|
||||
}
|
||||
|
||||
/* Close the write end of the pipe, to signal to the child that we
|
||||
have updated the UID and GID maps */
|
||||
|
||||
close(args.pipe_fd[1]);
|
||||
|
||||
if (waitpid(child_pid, NULL, 0) == \-1) /* Wait for child */
|
||||
errExit("waitpid");
|
||||
|
||||
if (verbose)
|
||||
printf("%s: terminating\\n", argv[0]);
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
.fi
|
||||
.SH SEE ALSO
|
||||
.BR unshare (1),
|
||||
.BR clone (2),
|
||||
|
@ -404,4 +735,5 @@ to gain more privilege than the root user of a user namespace has.
|
|||
.BR proc (5),
|
||||
.BR credentials (7),
|
||||
.BR capabilities (7)
|
||||
.BR namespaces (7)
|
||||
.BR namespaces (7),
|
||||
.BR pid_namespaces (7)
|
||||
|
|
Loading…
Reference in New Issue