mirror of https://github.com/mkerrisk/man-pages
568 lines
15 KiB
Groff
568 lines
15 KiB
Groff
.\" Copyright (C) 2006, Janak Desai <janak@us.ibm.com>
|
|
.\" and Copyright (C) 2006, 2012 Michael Kerrisk <mtk.manpages@gmail.com>
|
|
.\"
|
|
.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
|
|
.\" Licensed under the GPL
|
|
.\" %%%LICENSE_END
|
|
.\"
|
|
.\" Patch Justification:
|
|
.\" unshare system call is needed to implement, using PAM,
|
|
.\" per-security_context and/or per-user namespace to provide
|
|
.\" polyinstantiated directories. Using unshare and bind mounts, a
|
|
.\" PAM module can create private namespace with appropriate
|
|
.\" directories(based on user's security context) bind mounted on
|
|
.\" public directories such as /tmp, thus providing an instance of
|
|
.\" /tmp that is based on user's security context. Without the
|
|
.\" unshare system call, namespace separation can only be achieved
|
|
.\" by clone, which would require porting and maintaining all commands
|
|
.\" such as login, and su, that establish a user session.
|
|
.\"
|
|
.TH UNSHARE 2 2021-03-22 "Linux" "Linux Programmer's Manual"
|
|
.SH NAME
|
|
unshare \- disassociate parts of the process execution context
|
|
.SH SYNOPSIS
|
|
.nf
|
|
.B #define _GNU_SOURCE
|
|
.B #include <sched.h>
|
|
.PP
|
|
.BI "int unshare(int " flags );
|
|
.fi
|
|
.SH DESCRIPTION
|
|
.BR unshare ()
|
|
allows a process (or thread) to disassociate parts of its execution
|
|
context that are currently being shared with other processes (or threads).
|
|
Part of the execution context, such as the mount namespace, is shared
|
|
implicitly when a new process is created using
|
|
.BR fork (2)
|
|
or
|
|
.BR vfork (2),
|
|
while other parts, such as virtual memory, may be
|
|
shared by explicit request when creating a process or thread using
|
|
.BR clone (2).
|
|
.PP
|
|
The main use of
|
|
.BR unshare ()
|
|
is to allow a process to control its
|
|
shared execution context without creating a new process.
|
|
.PP
|
|
The
|
|
.I flags
|
|
argument is a bit mask that specifies which parts of
|
|
the execution context should be unshared.
|
|
This argument is specified by ORing together zero or more
|
|
of the following constants:
|
|
.TP
|
|
.B CLONE_FILES
|
|
Reverse the effect of the
|
|
.BR clone (2)
|
|
.B CLONE_FILES
|
|
flag.
|
|
Unshare the file descriptor table, so that the calling process
|
|
no longer shares its file descriptors with any other process.
|
|
.TP
|
|
.B CLONE_FS
|
|
Reverse the effect of the
|
|
.BR clone (2)
|
|
.B CLONE_FS
|
|
flag.
|
|
Unshare filesystem attributes, so that the calling process
|
|
no longer shares its root directory
|
|
.RB ( chroot (2)),
|
|
current directory
|
|
.RB ( chdir (2)),
|
|
or umask
|
|
.RB ( umask (2))
|
|
attributes with any other process.
|
|
.TP
|
|
.BR CLONE_NEWCGROUP " (since Linux 4.6)"
|
|
This flag has the same effect as the
|
|
.BR clone (2)
|
|
.B CLONE_NEWCGROUP
|
|
flag.
|
|
Unshare the cgroup namespace.
|
|
Use of
|
|
.BR CLONE_NEWCGROUP
|
|
requires the
|
|
.BR CAP_SYS_ADMIN
|
|
capability.
|
|
.TP
|
|
.BR CLONE_NEWIPC " (since Linux 2.6.19)"
|
|
This flag has the same effect as the
|
|
.BR clone (2)
|
|
.B CLONE_NEWIPC
|
|
flag.
|
|
Unshare the IPC namespace,
|
|
so that the calling process has a private copy of the
|
|
IPC namespace which is not shared with any other process.
|
|
Specifying this flag automatically implies
|
|
.BR CLONE_SYSVSEM
|
|
as well.
|
|
Use of
|
|
.BR CLONE_NEWIPC
|
|
requires the
|
|
.BR CAP_SYS_ADMIN
|
|
capability.
|
|
.TP
|
|
.BR CLONE_NEWNET " (since Linux 2.6.24)"
|
|
This flag has the same effect as the
|
|
.BR clone (2)
|
|
.B CLONE_NEWNET
|
|
flag.
|
|
Unshare the network namespace,
|
|
so that the calling process is moved into a
|
|
new network namespace which is not shared
|
|
with any previously existing process.
|
|
Use of
|
|
.BR CLONE_NEWNET
|
|
requires the
|
|
.BR CAP_SYS_ADMIN
|
|
capability.
|
|
.TP
|
|
.B CLONE_NEWNS
|
|
.\" These flag name are inconsistent:
|
|
.\" CLONE_NEWNS does the same thing in clone(), but CLONE_VM,
|
|
.\" CLONE_FS, and CLONE_FILES reverse the action of the clone()
|
|
.\" flags of the same name.
|
|
This flag has the same effect as the
|
|
.BR clone (2)
|
|
.B CLONE_NEWNS
|
|
flag.
|
|
Unshare the mount namespace,
|
|
so that the calling process has a private copy of
|
|
its namespace which is not shared with any other process.
|
|
Specifying this flag automatically implies
|
|
.B CLONE_FS
|
|
as well.
|
|
Use of
|
|
.BR CLONE_NEWNS
|
|
requires the
|
|
.BR CAP_SYS_ADMIN
|
|
capability.
|
|
For further information, see
|
|
.BR mount_namespaces (7).
|
|
.TP
|
|
.BR CLONE_NEWPID " (since Linux 3.8)"
|
|
This flag has the same effect as the
|
|
.BR clone (2)
|
|
.B CLONE_NEWPID
|
|
flag.
|
|
Unshare the PID namespace,
|
|
so that the calling process has a new PID namespace for its children
|
|
which is not shared with any previously existing process.
|
|
The calling process is
|
|
.I not
|
|
moved into the new namespace.
|
|
The first child created by the calling process will have
|
|
the process ID 1 and will assume the role of
|
|
.BR init (1)
|
|
in the new namespace.
|
|
.BR CLONE_NEWPID
|
|
automatically implies
|
|
.BR CLONE_THREAD
|
|
as well.
|
|
Use of
|
|
.BR CLONE_NEWPID
|
|
requires the
|
|
.BR CAP_SYS_ADMIN
|
|
capability.
|
|
For further information, see
|
|
.BR pid_namespaces (7).
|
|
.TP
|
|
.BR CLONE_NEWTIME " (since Linux 5.6)"
|
|
Unshare the time namespace,
|
|
so that the calling process has a new time namespace for its children
|
|
which is not shared with any previously existing process.
|
|
The calling process is
|
|
.I not
|
|
moved into the new namespace.
|
|
Use of
|
|
.BR CLONE_NEWTIME
|
|
requires the
|
|
.BR CAP_SYS_ADMIN
|
|
capability.
|
|
For further information, see
|
|
.BR time_namespaces (7).
|
|
.TP
|
|
.BR CLONE_NEWUSER " (since Linux 3.8)"
|
|
This flag has the same effect as the
|
|
.BR clone (2)
|
|
.B CLONE_NEWUSER
|
|
flag.
|
|
Unshare the user namespace,
|
|
so that the calling process is moved into a new user namespace
|
|
which is not shared with any previously existing process.
|
|
As with the child process created by
|
|
.BR clone (2)
|
|
with the
|
|
.B CLONE_NEWUSER
|
|
flag, the caller obtains a full set of capabilities in the new namespace.
|
|
.IP
|
|
.BR CLONE_NEWUSER
|
|
requires that the calling process is not threaded; specifying
|
|
.BR CLONE_NEWUSER
|
|
automatically implies
|
|
.BR CLONE_THREAD .
|
|
Since Linux 3.9,
|
|
.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
|
|
.\" https://lwn.net/Articles/543273/
|
|
.BR CLONE_NEWUSER
|
|
also automatically implies
|
|
.BR CLONE_FS .
|
|
.BR CLONE_NEWUSER
|
|
requires that the user ID and group ID
|
|
of the calling process are mapped to user IDs and group IDs in the
|
|
user namespace of the calling process at the time of the call.
|
|
.IP
|
|
For further information on user namespaces, see
|
|
.BR user_namespaces (7).
|
|
.TP
|
|
.BR CLONE_NEWUTS " (since Linux 2.6.19)"
|
|
This flag has the same effect as the
|
|
.BR clone (2)
|
|
.B CLONE_NEWUTS
|
|
flag.
|
|
Unshare the UTS IPC namespace,
|
|
so that the calling process has a private copy of the
|
|
UTS namespace which is not shared with any other process.
|
|
Use of
|
|
.BR CLONE_NEWUTS
|
|
requires the
|
|
.BR CAP_SYS_ADMIN
|
|
capability.
|
|
.TP
|
|
.BR CLONE_SYSVSEM " (since Linux 2.6.26)"
|
|
.\" commit 9edff4ab1f8d82675277a04e359d0ed8bf14a7b7
|
|
This flag reverses the effect of the
|
|
.BR clone (2)
|
|
.B CLONE_SYSVSEM
|
|
flag.
|
|
Unshare System\ V semaphore adjustment
|
|
.RI ( semadj )
|
|
values,
|
|
so that the calling process has a new empty
|
|
.I semadj
|
|
list that is not shared with any other process.
|
|
If this is the last process that has a reference to the process's current
|
|
.I semadj
|
|
list, then the adjustments in that list are applied
|
|
to the corresponding semaphores, as described in
|
|
.BR semop (2).
|
|
.\" CLONE_NEWNS If CLONE_SIGHAND is set and signals are also being shared
|
|
.\" (i.e., current->signal->count > 1), force CLONE_THREAD.
|
|
.PP
|
|
In addition,
|
|
.BR CLONE_THREAD ,
|
|
.BR CLONE_SIGHAND ,
|
|
and
|
|
.BR CLONE_VM
|
|
can be specified in
|
|
.I flags
|
|
if the caller is single threaded (i.e., it is not sharing
|
|
its address space with another process or thread).
|
|
In this case, these flags have no effect.
|
|
(Note also that specifying
|
|
.BR CLONE_THREAD
|
|
automatically implies
|
|
.BR CLONE_VM ,
|
|
and specifying
|
|
.BR CLONE_VM
|
|
automatically implies
|
|
.BR CLONE_SIGHAND .)
|
|
.\" As at 3.9, the following forced implications also apply,
|
|
.\" although the relevant flags are not yet implemented.
|
|
.\" If CLONE_THREAD is set force CLONE_VM.
|
|
.\" If CLONE_VM is set, force CLONE_SIGHAND.
|
|
.\"
|
|
If the process is multithreaded, then
|
|
the use of these flags results in an error.
|
|
.\" See kernel/fork.c::check_unshare_flags()
|
|
.PP
|
|
If
|
|
.I flags
|
|
is specified as zero, then
|
|
.BR unshare ()
|
|
is a no-op;
|
|
no changes are made to the calling process's execution context.
|
|
.SH RETURN VALUE
|
|
On success, zero returned.
|
|
On failure, \-1 is returned and
|
|
.I errno
|
|
is set to indicate the error.
|
|
.SH ERRORS
|
|
.TP
|
|
.B EINVAL
|
|
An invalid bit was specified in
|
|
.IR flags .
|
|
.TP
|
|
.B EINVAL
|
|
.BR CLONE_THREAD ,
|
|
.BR CLONE_SIGHAND ,
|
|
or
|
|
.BR CLONE_VM
|
|
was specified in
|
|
.IR flags ,
|
|
and the caller is multithreaded.
|
|
.TP
|
|
.B EINVAL
|
|
.BR CLONE_NEWIPC
|
|
was specified in
|
|
.IR flags ,
|
|
but the kernel was not configured with the
|
|
.B CONFIG_SYSVIPC
|
|
and
|
|
.BR CONFIG_IPC_NS
|
|
options.
|
|
.TP
|
|
.B EINVAL
|
|
.BR CLONE_NEWNET
|
|
was specified in
|
|
.IR flags ,
|
|
but the kernel was not configured with the
|
|
.B CONFIG_NET_NS
|
|
option.
|
|
.TP
|
|
.B EINVAL
|
|
.BR CLONE_NEWPID
|
|
was specified in
|
|
.IR flags ,
|
|
but the kernel was not configured with the
|
|
.B CONFIG_PID_NS
|
|
option.
|
|
.TP
|
|
.B EINVAL
|
|
.BR CLONE_NEWUSER
|
|
was specified in
|
|
.IR flags ,
|
|
but the kernel was not configured with the
|
|
.B CONFIG_USER_NS
|
|
option.
|
|
.TP
|
|
.B EINVAL
|
|
.BR CLONE_NEWUTS
|
|
was specified in
|
|
.IR flags ,
|
|
but the kernel was not configured with the
|
|
.B CONFIG_UTS_NS
|
|
option.
|
|
.TP
|
|
.B EINVAL
|
|
.BR CLONE_NEWPID
|
|
was specified in
|
|
.IR flags ,
|
|
but the process has previously called
|
|
.BR unshare ()
|
|
with the
|
|
.BR CLONE_NEWPID
|
|
flag.
|
|
.TP
|
|
.B ENOMEM
|
|
Cannot allocate sufficient memory to copy parts of caller's
|
|
context that need to be unshared.
|
|
.TP
|
|
.BR ENOSPC " (since Linux 3.7)"
|
|
.\" commit f2302505775fd13ba93f034206f1e2a587017929
|
|
.B CLONE_NEWPID
|
|
was specified in flags,
|
|
but the limit on the nesting depth of PID namespaces
|
|
would have been exceeded; see
|
|
.BR pid_namespaces (7).
|
|
.TP
|
|
.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
|
|
.B CLONE_NEWUSER
|
|
was specified in
|
|
.IR flags ,
|
|
and the call would cause the limit on the number of
|
|
nested user namespaces to be exceeded.
|
|
See
|
|
.BR user_namespaces (7).
|
|
.IP
|
|
From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
|
|
.BR EUSERS .
|
|
.TP
|
|
.BR ENOSPC " (since Linux 4.9)"
|
|
One of the values in
|
|
.I flags
|
|
specified the creation of a new user namespace,
|
|
but doing so would have caused the limit defined by the corresponding file in
|
|
.IR /proc/sys/user
|
|
to be exceeded.
|
|
For further details, see
|
|
.BR namespaces (7).
|
|
.TP
|
|
.B EPERM
|
|
The calling process did not have the required privileges for this operation.
|
|
.TP
|
|
.B EPERM
|
|
.BR CLONE_NEWUSER
|
|
was specified in
|
|
.IR flags ,
|
|
but either the effective user ID or the effective group ID of the caller
|
|
does not have a mapping in the parent namespace (see
|
|
.BR user_namespaces (7)).
|
|
.TP
|
|
.BR EPERM " (since Linux 3.9)"
|
|
.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
|
|
.B CLONE_NEWUSER
|
|
was specified in
|
|
.I flags
|
|
and the caller is in a chroot environment
|
|
.\" FIXME What is the rationale for this restriction?
|
|
(i.e., the caller's root directory does not match the root directory
|
|
of the mount namespace in which it resides).
|
|
.TP
|
|
.BR EUSERS " (from Linux 3.11 to Linux 4.8)"
|
|
.B CLONE_NEWUSER
|
|
was specified in
|
|
.IR flags ,
|
|
and the limit on the number of nested user namespaces would be exceeded.
|
|
See the discussion of the
|
|
.BR ENOSPC
|
|
error above.
|
|
.SH VERSIONS
|
|
The
|
|
.BR unshare ()
|
|
system call was added to Linux in kernel 2.6.16.
|
|
.SH CONFORMING TO
|
|
The
|
|
.BR unshare ()
|
|
system call is Linux-specific.
|
|
.SH NOTES
|
|
Not all of the process attributes that can be shared when
|
|
a new process is created using
|
|
.BR clone (2)
|
|
can be unshared using
|
|
.BR unshare ().
|
|
In particular, as at kernel 3.8,
|
|
.\" FIXME all of the following needs to be reviewed for the current kernel
|
|
.BR unshare ()
|
|
does not implement flags that reverse the effects of
|
|
.BR CLONE_SIGHAND ,
|
|
.\" However, we can do unshare(CLONE_SIGHAND) if CLONE_SIGHAND
|
|
.\" was not specified when doing clone(); i.e., unsharing
|
|
.\" signal handlers is permitted if we are not actually
|
|
.\" sharing signal handlers. mtk
|
|
.BR CLONE_THREAD ,
|
|
or
|
|
.BR CLONE_VM .
|
|
.\" However, we can do unshare(CLONE_VM) if CLONE_VM
|
|
.\" was not specified when doing clone(); i.e., unsharing
|
|
.\" virtual memory is permitted if we are not actually
|
|
.\" sharing virtual memory. mtk
|
|
Such functionality may be added in the future, if required.
|
|
.\"
|
|
.\"9) Future Work
|
|
.\"--------------
|
|
.\"The current implementation of unshare does not allow unsharing of
|
|
.\"signals and signal handlers. Signals are complex to begin with and
|
|
.\"to unshare signals and/or signal handlers of a currently running
|
|
.\"process is even more complex. If in the future there is a specific
|
|
.\"need to allow unsharing of signals and/or signal handlers, it can
|
|
.\"be incrementally added to unshare without affecting legacy
|
|
.\"applications using unshare.
|
|
.\"
|
|
.SH EXAMPLES
|
|
The program below provides a simple implementation of the
|
|
.BR unshare (1)
|
|
command, which unshares one or more namespaces and executes the
|
|
command supplied in its command-line arguments.
|
|
Here's an example of the use of this program,
|
|
running a shell in a new mount namespace,
|
|
and verifying that the original shell and the
|
|
new shell are in separate mount namespaces:
|
|
.PP
|
|
.in +4n
|
|
.EX
|
|
$ \fBreadlink /proc/$$/ns/mnt\fP
|
|
mnt:[4026531840]
|
|
$ \fBsudo ./unshare \-m /bin/bash\fP
|
|
# \fBreadlink /proc/$$/ns/mnt\fP
|
|
mnt:[4026532325]
|
|
.EE
|
|
.in
|
|
.PP
|
|
The differing output of the two
|
|
.BR readlink (1)
|
|
commands shows that the two shells are in different mount namespaces.
|
|
.SS Program source
|
|
\&
|
|
.EX
|
|
/* unshare.c
|
|
|
|
A simple implementation of the unshare(1) command: unshare
|
|
namespaces and execute a command.
|
|
*/
|
|
#define _GNU_SOURCE
|
|
#include <sched.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
/* A simple error\-handling function: print an error message based
|
|
on the value in \(aqerrno\(aq and terminate the calling process. */
|
|
|
|
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
|
|
} while (0)
|
|
|
|
static void
|
|
usage(char *pname)
|
|
{
|
|
fprintf(stderr, "Usage: %s [options] program [arg...]\en", pname);
|
|
fprintf(stderr, "Options can be:\en");
|
|
fprintf(stderr, " \-C unshare cgroup namespace\en");
|
|
fprintf(stderr, " \-i unshare IPC namespace\en");
|
|
fprintf(stderr, " \-m unshare mount namespace\en");
|
|
fprintf(stderr, " \-n unshare network namespace\en");
|
|
fprintf(stderr, " \-p unshare PID namespace\en");
|
|
fprintf(stderr, " \-t unshare time namespace\en");
|
|
fprintf(stderr, " \-u unshare UTS namespace\en");
|
|
fprintf(stderr, " \-U unshare user namespace\en");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
int flags, opt;
|
|
|
|
flags = 0;
|
|
|
|
while ((opt = getopt(argc, argv, "CimnptuU")) != \-1) {
|
|
switch (opt) {
|
|
case \(aqC\(aq: flags |= CLONE_NEWCGROUP; break;
|
|
case \(aqi\(aq: flags |= CLONE_NEWIPC; break;
|
|
case \(aqm\(aq: flags |= CLONE_NEWNS; break;
|
|
case \(aqn\(aq: flags |= CLONE_NEWNET; break;
|
|
case \(aqp\(aq: flags |= CLONE_NEWPID; break;
|
|
case \(aqt\(aq: flags |= CLONE_NEWTIME; break;
|
|
case \(aqu\(aq: flags |= CLONE_NEWUTS; break;
|
|
case \(aqU\(aq: flags |= CLONE_NEWUSER; break;
|
|
default: usage(argv[0]);
|
|
}
|
|
}
|
|
|
|
if (optind >= argc)
|
|
usage(argv[0]);
|
|
|
|
if (unshare(flags) == \-1)
|
|
errExit("unshare");
|
|
|
|
execvp(argv[optind], &argv[optind]);
|
|
errExit("execvp");
|
|
}
|
|
.EE
|
|
.SH SEE ALSO
|
|
.BR unshare (1),
|
|
.BR clone (2),
|
|
.BR fork (2),
|
|
.BR kcmp (2),
|
|
.BR setns (2),
|
|
.BR vfork (2),
|
|
.BR namespaces (7)
|
|
.PP
|
|
.I Documentation/userspace\-api/unshare.rst
|
|
in the Linux kernel source tree
|
|
.\" commit f504d47be5e8fa7ecf2bf660b18b42e6960c0eb2
|
|
(or
|
|
.I Documentation/unshare.txt
|
|
before Linux 4.12)
|