2013-02-27 06:49:48 +00:00
|
|
|
.\" Copyright (c) 2013 by Michael Kerrisk <mtk.manpages@gmail.com>
|
|
|
|
.\" and Copyright (c) 2012 by Eric W. Biederman <ebiederm@xmission.com>
|
|
|
|
.\"
|
2014-09-16 07:05:40 +00:00
|
|
|
.\" %%%LICENSE_START(VERBATIM)
|
2013-02-27 06:49:48 +00:00
|
|
|
.\" Permission is granted to make and distribute verbatim copies of this
|
|
|
|
.\" manual provided the copyright notice and this permission notice are
|
|
|
|
.\" preserved on all copies.
|
|
|
|
.\"
|
|
|
|
.\" Permission is granted to copy and distribute modified versions of this
|
|
|
|
.\" manual under the conditions for verbatim copying, provided that the
|
|
|
|
.\" entire resulting derived work is distributed under the terms of a
|
|
|
|
.\" permission notice identical to this one.
|
|
|
|
.\"
|
|
|
|
.\" Since the Linux kernel and libraries are constantly changing, this
|
|
|
|
.\" manual page may be incorrect or out-of-date. The author(s) assume no
|
|
|
|
.\" responsibility for errors or omissions, or for damages resulting from
|
|
|
|
.\" the use of the information contained herein. The author(s) may not
|
|
|
|
.\" have taken the same level of care in the production of this manual,
|
|
|
|
.\" which is licensed free of charge, as they might when working
|
|
|
|
.\" professionally.
|
|
|
|
.\"
|
|
|
|
.\" Formatted or processed versions of this manual, if unaccompanied by
|
|
|
|
.\" the source, must acknowledge the copyright and authors of this work.
|
2014-09-16 07:05:40 +00:00
|
|
|
.\" %%%LICENSE_END
|
2013-02-27 06:49:48 +00:00
|
|
|
.\"
|
|
|
|
.\"
|
getent.1, iconv.1, ldd.1, locale.1, localedef.1, memusage.1, memusagestat.1, pldd.1, sprof.1, time.1, _syscall.2, accept.2, add_key.2, adjtimex.2, bind.2, bpf.2, capget.2, chown.2, chroot.2, clock_getres.2, clone.2, connect.2, copy_file_range.2, epoll_ctl.2, epoll_wait.2, eventfd.2, fanotify_init.2, fanotify_mark.2, fcntl.2, fsync.2, futex.2, getcpu.2, getdents.2, getgid.2, getgroups.2, getpid.2, gettid.2, gettimeofday.2, getuid.2, getxattr.2, inotify_add_watch.2, inotify_init.2, ioctl_fat.2, ioctl_ns.2, ioctl_userfaultfd.2, ioprio_set.2, kcmp.2, kexec_load.2, keyctl.2, listxattr.2, lseek.2, madvise.2, memfd_create.2, migrate_pages.2, mount.2, mprotect.2, mremap.2, msgctl.2, msgop.2, nfsservctl.2, open_by_handle_at.2, perf_event_open.2, pipe.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, readahead.2, readdir.2, readlink.2, reboot.2, recvmmsg.2, removexattr.2, rename.2, request_key.2, s390_guarded_storage.2, s390_runtime_instr.2, s390_sthyi.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, seccomp.2, select.2, select_tut.2, semctl.2, sendmmsg.2, set_thread_area.2, setgid.2, setns.2, setuid.2, setxattr.2, shmctl.2, sigaction.2, signalfd.2, sigsuspend.2, socket.2, socketpair.2, spu_run.2, stat.2, statx.2, subpage_prot.2, syscalls.2, sysctl.2, tee.2, timer_create.2, timerfd_create.2, truncate.2, uname.2, unshare.2, userfaultfd.2, ustat.2, vmsplice.2, write.2, CPU_SET.3, __ppc_get_timebase.3, alloca.3, argz_add.3, asprintf.3, backtrace.3, basename.3, bsd_signal.3, bstring.3, bswap.3, bzero.3, cacos.3, cacosh.3, catan.3, catanh.3, catgets.3, clock_getcpuclockid.3, cmsg.3, confstr.3, ctermid.3, ctime.3, des_crypt.3, dl_iterate_phdr.3, dlinfo.3, dlsym.3, duplocale.3, end.3, endian.3, errno.3, exec.3, exit.3, ferror.3, fgetws.3, fmemopen.3, fnmatch.3, fopencookie.3, fputws.3, frexp.3, ftw.3, get_nprocs_conf.3, get_phys_pages.3, getaddrinfo.3, getaddrinfo_a.3, getdate.3, getgrouplist.3, getifaddrs.3, getline.3, getlogin.3, getmntent.3, getnameinfo.3, getopt.3, getpass.3, getprotoent_r.3, getpwnam.3, getservent_r.3, getsubopt.3, glob.3, gnu_get_libc_version.3, hsearch.3, if_nameindex.3, index.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, isatty.3, iswblank.3, iswspace.3, lockf.3, makecontext.3, mallinfo.3, malloc.3, malloc_hook.3, malloc_info.3, mallopt.3, matherr.3, mbrtowc.3, mbsnrtowcs.3, mbsrtowcs.3, mbstowcs.3, mbtowc.3, mcheck.3, memchr.3, mq_getattr.3, mq_notify.3, newlocale.3, nl_langinfo.3, offsetof.3, perror.3, posix_spawn.3, printf.3, pthread_attr_init.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_mutexattr_setrobust.3, pthread_rwlockattr_setkind_np.3, pthread_setaffinity_np.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, putenv.3, qsort.3, rand.3, random.3, readdir.3, regex.3, resolver.3, rpmatch.3, rtime.3, scanf.3, sem_wait.3, setaliasent.3, setbuf.3, stpcpy.3, stpncpy.3, strcat.3, strchr.3, strcmp.3, strcpy.3, strdup.3, strerror.3, strfromd.3, strfry.3, strftime.3, string.3, strlen.3, strnlen.3, strsep.3, strstr.3, strtok.3, strtol.3, strtoul.3, strverscmp.3, strxfrm.3, system.3, termios.3, trunc.3, wcpcpy.3, wcpncpy.3, wcrtomb.3, wcscat.3, wcscpy.3, wcslen.3, wcsncat.3, wcsncmp.3, wcsncpy.3, wcsnlen.3, wcsnrtombs.3, wcsrtombs.3, wcsstr.3, wcstok.3, wcstombs.3, wcwidth.3, wprintf.3, xcrypt.3, console_codes.4, dsp56k.4, full.4, initrd.4, lirc.4, loop.4, st.4, tty.4, vcs.4, charmap.5, core.5, host.conf.5, locale.5, proc.5, repertoiremap.5, resolv.conf.5, termcap.5, tmpfs.5, tzfile.5, aio.7, capabilities.7, cgroup_namespaces.7, cgroups.7, charsets.7, complex.7, epoll.7, fanotify.7, feature_test_macros.7, inotify.7, ip.7, locale.7, man-pages.7, man.7, namespaces.7, pid_namespaces.7, pkeys.7, pthreads.7, rtld-audit.7, sched.7, signal.7, sock_diag.7, socket.7, tcp.7, udp.7, unicode.7, user_namespaces.7, utf-8.7, zdump.8, zic.8: tstamp
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2019-03-06 14:11:03 +00:00
|
|
|
.TH PID_NAMESPACES 7 2019-03-06 "Linux" "Linux Programmer's Manual"
|
2013-02-27 06:49:48 +00:00
|
|
|
.SH NAME
|
|
|
|
pid_namespaces \- overview of Linux PID namespaces
|
|
|
|
.SH DESCRIPTION
|
|
|
|
For an overview of namespaces, see
|
|
|
|
.BR namespaces (7).
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 06:49:48 +00:00
|
|
|
PID namespaces isolate the process ID number space,
|
|
|
|
meaning that processes in different PID namespaces can have the same PID.
|
2013-03-01 09:00:34 +00:00
|
|
|
PID namespaces allow containers to provide functionality
|
|
|
|
such as suspending/resuming the set of processes in the container and
|
|
|
|
migrating the container to a new host
|
2013-02-27 06:49:48 +00:00
|
|
|
while the processes inside the container maintain the same PIDs.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 06:49:48 +00:00
|
|
|
PIDs in a new PID namespace start at 1,
|
|
|
|
somewhat like a standalone system, and calls to
|
|
|
|
.BR fork (2),
|
|
|
|
.BR vfork (2),
|
|
|
|
or
|
|
|
|
.BR clone (2)
|
|
|
|
will produce processes with PIDs that are unique within the namespace.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 07:04:38 +00:00
|
|
|
Use of PID namespaces requires a kernel that is configured with the
|
|
|
|
.B CONFIG_PID_NS
|
|
|
|
option.
|
2013-02-28 10:10:47 +00:00
|
|
|
.\"
|
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
2013-02-27 07:04:38 +00:00
|
|
|
.SS The namespace "init" process
|
2013-02-27 06:49:48 +00:00
|
|
|
The first process created in a new namespace
|
|
|
|
(i.e., the process created using
|
|
|
|
.BR clone (2)
|
|
|
|
with the
|
|
|
|
.BR CLONE_NEWPID
|
|
|
|
flag, or the first child created by a process after a call to
|
|
|
|
.BR unshare (2)
|
|
|
|
using the
|
|
|
|
.BR CLONE_NEWPID
|
|
|
|
flag) has the PID 1, and is the "init" process for the namespace (see
|
|
|
|
.BR init (1)).
|
2018-11-19 15:23:28 +00:00
|
|
|
This process becomes the parent of any child processes that are orphaned
|
|
|
|
because a process that resides in this PID namespace terminated
|
|
|
|
(see below for further details).
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 06:49:48 +00:00
|
|
|
If the "init" process of a PID namespace terminates,
|
|
|
|
the kernel terminates all of the processes in the namespace via a
|
|
|
|
.BR SIGKILL
|
|
|
|
signal.
|
|
|
|
This behavior reflects the fact that the "init" process
|
|
|
|
is essential for the correct operation of a PID namespace.
|
2013-02-28 10:40:50 +00:00
|
|
|
In this case, a subsequent
|
2013-02-27 06:49:48 +00:00
|
|
|
.BR fork (2)
|
access.2, delete_module.2, eventfd.2, fallocate.2, fcntl.2, getrandom.2, init_module.2, open.2, seccomp.2, timerfd_create.2, openpty.3, pthread_spin_lock.3, shm_open.3, tempnam.3, fifo.7, keyrings.7, pid_namespaces.7, sched.7, thread-keyring.7: wfix (will fail --> fail/fails)
Reported-by: Pedro Alves <palves@redhat.com>
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-10-20 14:55:10 +00:00
|
|
|
into this PID namespace fail with the error
|
2013-02-27 06:49:48 +00:00
|
|
|
.BR ENOMEM ;
|
|
|
|
it is not possible to create a new processes in a PID namespace whose "init"
|
|
|
|
process has terminated.
|
2013-03-01 10:19:14 +00:00
|
|
|
Such scenarios can occur when, for example,
|
|
|
|
a process uses an open file descriptor for a
|
|
|
|
.I /proc/[pid]/ns/pid
|
|
|
|
file corresponding to a process that was in a namespace to
|
|
|
|
.BR setns (2)
|
|
|
|
into that namespace after the "init" process has terminated.
|
|
|
|
Another possible scenario can occur after a call to
|
|
|
|
.BR unshare (2):
|
|
|
|
if the first child subsequently created by a
|
|
|
|
.BR fork (2)
|
|
|
|
terminates, then subsequent calls to
|
|
|
|
.BR fork (2)
|
access.2, delete_module.2, eventfd.2, fallocate.2, fcntl.2, getrandom.2, init_module.2, open.2, seccomp.2, timerfd_create.2, openpty.3, pthread_spin_lock.3, shm_open.3, tempnam.3, fifo.7, keyrings.7, pid_namespaces.7, sched.7, thread-keyring.7: wfix (will fail --> fail/fails)
Reported-by: Pedro Alves <palves@redhat.com>
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-10-20 14:55:10 +00:00
|
|
|
fail with
|
2013-03-01 10:19:14 +00:00
|
|
|
.BR ENOMEM .
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 06:49:48 +00:00
|
|
|
Only signals for which the "init" process has established a signal handler
|
|
|
|
can be sent to the "init" process by other members of the PID namespace.
|
|
|
|
This restriction applies even to privileged processes,
|
|
|
|
and prevents other members of the PID namespace from
|
|
|
|
accidentally killing the "init" process.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 06:49:48 +00:00
|
|
|
Likewise, a process in an ancestor namespace
|
|
|
|
can\(emsubject to the usual permission checks described in
|
|
|
|
.BR kill (2)\(emsend
|
2013-02-28 10:40:50 +00:00
|
|
|
signals to the "init" process of a child PID namespace only
|
2013-02-27 06:49:48 +00:00
|
|
|
if the "init" process has established a handler for that signal.
|
|
|
|
(Within the handler, the
|
|
|
|
.I siginfo_t
|
|
|
|
.I si_pid
|
|
|
|
field described in
|
|
|
|
.BR sigaction (2)
|
|
|
|
will be zero.)
|
|
|
|
.B SIGKILL
|
|
|
|
or
|
|
|
|
.B SIGSTOP
|
|
|
|
are treated exceptionally:
|
|
|
|
these signals are forcibly delivered when sent from an ancestor PID namespace.
|
|
|
|
Neither of these signals can be caught by the "init" process,
|
|
|
|
and so will result in the usual actions associated with those signals
|
|
|
|
(respectively, terminating and stopping the process).
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-03-14 10:15:08 +00:00
|
|
|
Starting with Linux 3.4, the
|
2013-03-06 23:32:21 +00:00
|
|
|
.BR reboot (2)
|
2014-10-27 20:42:53 +00:00
|
|
|
system call causes a signal to be sent to the namespace "init" process.
|
2013-03-06 23:32:21 +00:00
|
|
|
See
|
2014-03-14 18:57:08 +00:00
|
|
|
.BR reboot (2)
|
2013-03-06 23:32:21 +00:00
|
|
|
for more details.
|
2013-02-28 10:10:47 +00:00
|
|
|
.\"
|
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
2013-02-27 07:04:38 +00:00
|
|
|
.SS Nesting PID namespaces
|
2013-02-28 10:40:26 +00:00
|
|
|
PID namespaces can be nested:
|
|
|
|
each PID namespace has a parent,
|
|
|
|
except for the initial ("root") PID namespace.
|
|
|
|
The parent of a PID namespace is the PID namespace of the process that
|
|
|
|
created the namespace using
|
|
|
|
.BR clone (2)
|
|
|
|
or
|
|
|
|
.BR unshare (2).
|
|
|
|
PID namespaces thus form a tree,
|
|
|
|
with all namespaces ultimately tracing their ancestry to the root namespace.
|
2017-04-17 07:33:05 +00:00
|
|
|
Since Linux 3.7,
|
|
|
|
.\" commit f2302505775fd13ba93f034206f1e2a587017929
|
|
|
|
.\" The kernel constant MAX_PID_NS_LEVEL
|
|
|
|
the kernel limits the maximum nesting depth for PID namespaces to 32.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-28 10:40:26 +00:00
|
|
|
A process is visible to other processes in its PID namespace,
|
|
|
|
and to the processes in each direct ancestor PID namespace
|
|
|
|
going back to the root PID namespace.
|
|
|
|
In this context, "visible" means that one process
|
|
|
|
can be the target of operations by another process using
|
|
|
|
system calls that specify a process ID.
|
|
|
|
Conversely, the processes in a child PID namespace can't see
|
2014-10-27 20:42:53 +00:00
|
|
|
processes in the parent and further removed ancestor namespaces.
|
2013-02-27 06:49:48 +00:00
|
|
|
More succinctly: a process can see (e.g., send signals with
|
2014-03-14 18:57:08 +00:00
|
|
|
.BR kill (2),
|
2013-02-28 10:40:26 +00:00
|
|
|
set nice values with
|
|
|
|
.BR setpriority (2),
|
|
|
|
etc.) only processes contained in its own PID namespace
|
|
|
|
and in descendants of that namespace.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-28 10:40:26 +00:00
|
|
|
A process has one process ID in each of the layers of the PID
|
|
|
|
namespace hierarchy in which is visible,
|
|
|
|
and walking back though each direct ancestor namespace
|
2013-02-27 06:49:48 +00:00
|
|
|
through to the root PID namespace.
|
2013-02-28 10:40:26 +00:00
|
|
|
System calls that operate on process IDs always
|
|
|
|
operate using the process ID that is visible in the
|
|
|
|
PID namespace of the caller.
|
2013-02-27 06:49:48 +00:00
|
|
|
A call to
|
|
|
|
.BR getpid (2)
|
|
|
|
always returns the PID associated with the namespace in which
|
2013-02-28 10:40:26 +00:00
|
|
|
the process was created.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 06:49:48 +00:00
|
|
|
Some processes in a PID namespace may have parents
|
|
|
|
that are outside of the namespace.
|
|
|
|
For example, the parent of the initial process in the namespace
|
2013-02-28 10:40:26 +00:00
|
|
|
(i.e., the
|
2013-02-27 06:49:48 +00:00
|
|
|
.BR init (1)
|
|
|
|
process with PID 1) is necessarily in another namespace.
|
|
|
|
Likewise, the direct children of a process that uses
|
|
|
|
.BR setns (2)
|
|
|
|
to cause its children to join a PID namespace are in a different
|
|
|
|
PID namespace from the caller of
|
|
|
|
.BR setns (2).
|
|
|
|
Calls to
|
|
|
|
.BR getppid (2)
|
|
|
|
for such processes return 0.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2015-01-06 14:23:51 +00:00
|
|
|
While processes may freely descend into child PID namespaces
|
|
|
|
(e.g., using
|
2015-01-02 18:04:42 +00:00
|
|
|
.BR setns (2)
|
2017-05-23 19:38:56 +00:00
|
|
|
with a PID namespace file descriptor),
|
2015-01-02 18:04:42 +00:00
|
|
|
they may not move in the other direction.
|
|
|
|
That is to say, processes may not enter any ancestor namespaces
|
|
|
|
(parent, grandparent, etc.).
|
2016-12-11 14:45:08 +00:00
|
|
|
Changing PID namespaces is a one-way operation.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2016-12-11 14:49:29 +00:00
|
|
|
The
|
|
|
|
.BR NS_GET_PARENT
|
|
|
|
.BR ioctl (2)
|
|
|
|
operation can be used to discover the parental relationship
|
|
|
|
between PID namespaces; see
|
2017-01-08 18:24:49 +00:00
|
|
|
.BR ioctl_ns (2).
|
2013-02-28 10:10:47 +00:00
|
|
|
.\"
|
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
2013-02-27 07:04:38 +00:00
|
|
|
.SS setns(2) and unshare(2) semantics
|
2013-02-27 06:49:48 +00:00
|
|
|
Calls to
|
|
|
|
.BR setns (2)
|
|
|
|
that specify a PID namespace file descriptor
|
|
|
|
and calls to
|
|
|
|
.BR unshare (2)
|
|
|
|
with the
|
|
|
|
.BR CLONE_NEWPID
|
|
|
|
flag cause children subsequently created
|
|
|
|
by the caller to be placed in a different PID namespace from the caller.
|
2017-05-23 19:41:42 +00:00
|
|
|
(Since Linux 4.12, that PID namespace is shown via the
|
|
|
|
.IR /proc/[pid]/ns/pid_for_children
|
|
|
|
file, as described in
|
|
|
|
.BR namespaces (7).)
|
2013-02-27 06:49:48 +00:00
|
|
|
These calls do not, however,
|
|
|
|
change the PID namespace of the calling process,
|
|
|
|
because doing so would change the caller's idea of its own PID
|
|
|
|
(as reported by
|
|
|
|
.BR getpid ()),
|
|
|
|
which would break many applications and libraries.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 06:49:48 +00:00
|
|
|
To put things another way:
|
|
|
|
a process's PID namespace membership is determined when the process is created
|
|
|
|
and cannot be changed thereafter.
|
2013-02-28 10:53:18 +00:00
|
|
|
Among other things, this means that the parental relationship
|
2013-03-01 09:02:22 +00:00
|
|
|
between processes mirrors the parental relationship between PID namespaces:
|
2013-02-28 10:53:18 +00:00
|
|
|
the parent of a process is either in the same namespace
|
|
|
|
or resides in the immediate parent PID namespace.
|
2018-10-01 12:42:07 +00:00
|
|
|
.PP
|
|
|
|
A process may call
|
|
|
|
.BR unshare (2)
|
|
|
|
with the
|
|
|
|
.B CLONE_NEWPID
|
|
|
|
flag only once.
|
2018-10-01 12:47:49 +00:00
|
|
|
After it has performed this operation, its
|
|
|
|
.IR /proc/PID/ns/pid_for_children
|
|
|
|
symbolic link will be empty until the first child is created in the namespace.
|
2018-10-01 12:42:07 +00:00
|
|
|
.\"
|
2018-11-19 15:23:28 +00:00
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
|
|
|
.SS Adoption of orphaned children
|
|
|
|
When a child process becomes orphaned, it is reparented to the "init"
|
|
|
|
process in the PID namespace of its parent
|
|
|
|
(unless one of the nearer ancestors of the parent employed the
|
|
|
|
.BR prctl (2)
|
|
|
|
.B PR_SET_CHILD_SUBREAPER
|
|
|
|
command to mark itself as the reaper of orphaned descendant processes).
|
|
|
|
Note that because of the
|
|
|
|
.BR setns (2)
|
|
|
|
and
|
|
|
|
.BR unshare (2)
|
|
|
|
semantics described above, this may be the "init" process in the PID
|
|
|
|
namespace that is the
|
|
|
|
.I parent
|
|
|
|
of the child's PID namespace,
|
|
|
|
rather than the "init" process in the child's own PID namespace.
|
|
|
|
\" Furthermore, by definition, the parent of the "init" process
|
|
|
|
.\" of a PID namespace resides in the parent PID namespace.
|
|
|
|
.\"
|
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
2013-03-06 09:21:12 +00:00
|
|
|
.SS Compatibility of CLONE_NEWPID with other CLONE_* flags
|
2016-12-28 14:56:05 +00:00
|
|
|
In current versions of Linux,
|
2013-03-06 09:21:12 +00:00
|
|
|
.BR CLONE_NEWPID
|
2016-12-27 19:34:08 +00:00
|
|
|
can't be combined with
|
|
|
|
.BR CLONE_THREAD .
|
|
|
|
Threads are required to be in the same PID namespace such that
|
2013-03-06 09:21:12 +00:00
|
|
|
the threads in a process can send signals to each other.
|
|
|
|
Similarly, it must be possible to see all of the threads
|
|
|
|
of a processes in the
|
|
|
|
.BR proc (5)
|
2016-12-28 14:56:05 +00:00
|
|
|
filesystem.
|
|
|
|
Additionally, if two threads were in different PID
|
2016-12-27 19:34:08 +00:00
|
|
|
namespaces, the process ID of the process sending a signal
|
2013-03-06 09:21:12 +00:00
|
|
|
could not be meaningfully encoded when a signal is sent
|
|
|
|
(see the description of the
|
|
|
|
.I siginfo_t
|
|
|
|
type in
|
|
|
|
.BR sigaction (2)).
|
2016-12-28 14:56:05 +00:00
|
|
|
Since this is computed when a signal is enqueued,
|
2016-12-27 19:34:08 +00:00
|
|
|
a signal queue shared by processes in multiple PID namespaces
|
|
|
|
would defeat that.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2016-12-27 19:34:08 +00:00
|
|
|
.\" Note these restrictions were all introduced in
|
|
|
|
.\" 8382fcac1b813ad0a4e68a838fc7ae93fa39eda0
|
|
|
|
.\" when CLONE_NEWPID|CLONE_VM was disallowed
|
2016-12-28 14:56:05 +00:00
|
|
|
In earlier versions of Linux,
|
2016-12-27 19:34:08 +00:00
|
|
|
.BR CLONE_NEWPID
|
2016-12-28 14:56:05 +00:00
|
|
|
was additionally disallowed (failing with the error
|
|
|
|
.BR EINVAL )
|
|
|
|
in combination with
|
2016-12-27 19:34:08 +00:00
|
|
|
.BR CLONE_SIGHAND
|
|
|
|
.\" (restriction lifted in faf00da544045fdc1454f3b9e6d7f65c841de302)
|
2016-12-28 14:56:05 +00:00
|
|
|
(before Linux 4.3) as well as
|
2016-12-27 19:34:08 +00:00
|
|
|
.\" (restriction lifted in e79f525e99b04390ca4d2366309545a836c03bf1)
|
|
|
|
.BR CLONE_VM
|
2016-12-28 14:56:05 +00:00
|
|
|
(before Linux 3.12).
|
|
|
|
The changes that lifted these restrictions have also been ported to
|
|
|
|
earlier stable kernels.
|
2013-02-28 10:10:47 +00:00
|
|
|
.\"
|
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
2013-03-01 08:32:51 +00:00
|
|
|
.SS /proc and PID namespaces
|
2013-03-01 08:49:21 +00:00
|
|
|
A
|
|
|
|
.I /proc
|
2014-03-14 18:54:00 +00:00
|
|
|
filesystem shows (in the
|
getrusage.2, madvise.2, memfd_create.2, mlock.2, mount.2, getauxval.3, core.5, capabilities.7, pid_namespaces.7, symlink.7, user_namespaces.7: Consistently use /proc/[pid] (not /proc/PID)
Reported-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2016-10-07 20:42:07 +00:00
|
|
|
.I /proc/[pid]
|
2013-03-01 08:49:21 +00:00
|
|
|
directories) only processes visible in the PID namespace
|
|
|
|
of the process that performed the mount, even if the
|
|
|
|
.I /proc
|
2014-03-14 18:54:00 +00:00
|
|
|
filesystem is viewed from processes in other namespaces.
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-02-27 07:04:38 +00:00
|
|
|
After creating a new PID namespace,
|
|
|
|
it is useful for the child to change its root directory
|
|
|
|
and mount a new procfs instance at
|
|
|
|
.I /proc
|
|
|
|
so that tools such as
|
|
|
|
.BR ps (1)
|
|
|
|
work correctly.
|
2013-03-01 08:32:51 +00:00
|
|
|
If a new mount namespace is simultaneously created by including
|
2013-02-27 07:04:38 +00:00
|
|
|
.BR CLONE_NEWNS
|
|
|
|
in the
|
2013-02-28 10:40:50 +00:00
|
|
|
.IR flags
|
2013-02-27 07:04:38 +00:00
|
|
|
argument of
|
|
|
|
.BR clone (2)
|
|
|
|
or
|
2013-03-01 08:55:58 +00:00
|
|
|
.BR unshare (2),
|
2013-02-27 07:04:38 +00:00
|
|
|
then it isn't necessary to change the root directory:
|
|
|
|
a new procfs instance can be mounted directly over
|
2013-03-01 08:32:51 +00:00
|
|
|
.IR /proc .
|
aio.7, arp.7, attributes.7, boot.7, cgroups.7, cpuset.7, credentials.7, fanotify.7, fifo.7, glob.7, hier.7, hostname.7, icmp.7, inode.7, inotify.7, keyrings.7, libc.7, mailaddr.7, mount_namespaces.7, mq_overview.7, nptl.7, numa.7, path_resolution.7, persistent-keyring.7, pid_namespaces.7, pipe.7, pkeys.7, process-keyring.7, pthreads.7, pty.7, random.7, sched.7, sem_overview.7, session-keyring.7, shm_overview.7, signal-safety.7, signal.7, spufs.7, standards.7, symlink.7, termio.7, thread-keyring.7, time.7, unicode.7, user-keyring.7, user-session-keyring.7, user_namespaces.7, utf-8.7, xattr.7: ffix
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-17 22:59:04 +00:00
|
|
|
.PP
|
2013-03-01 08:49:21 +00:00
|
|
|
From a shell, the command to mount
|
|
|
|
.I /proc
|
|
|
|
is:
|
2017-08-15 14:15:21 +00:00
|
|
|
.PP
|
|
|
|
.in +4n
|
|
|
|
.EX
|
|
|
|
$ mount -t proc proc /proc
|
|
|
|
.EE
|
|
|
|
.in
|
|
|
|
.PP
|
2013-02-28 11:02:28 +00:00
|
|
|
Calling
|
|
|
|
.BR readlink (2)
|
|
|
|
on the path
|
|
|
|
.I /proc/self
|
|
|
|
yields the process ID of the caller in the PID namespace of the procfs mount
|
|
|
|
(i.e., the PID namespace of the process that mounted the procfs).
|
2013-03-01 09:51:13 +00:00
|
|
|
This can be useful for introspection purposes,
|
|
|
|
when a process wants to discover its PID in other namespaces.
|
2013-03-01 08:32:51 +00:00
|
|
|
.\"
|
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
2017-10-23 15:27:58 +00:00
|
|
|
.SS /proc files
|
|
|
|
.TP
|
|
|
|
.BR /proc/sys/kernel/ns_last_pid " (since Linux 3.3)"
|
|
|
|
.\" commit b8f566b04d3cddd192cfd2418ae6d54ac6353792
|
|
|
|
This file displays the last PID that was allocated in this PID namespace.
|
|
|
|
When the next PID is allocated,
|
|
|
|
the kernel will search for the lowest unallocated PID
|
|
|
|
that is greater than this value,
|
|
|
|
and when this file is subsequently read it will show that PID.
|
|
|
|
.IP
|
|
|
|
This file is writable by a process that has the
|
|
|
|
.B CAP_SYS_ADMIN
|
|
|
|
capability inside its user namespace.
|
2017-10-23 15:43:45 +00:00
|
|
|
.\" This ability is necessary to support checkpoint restore in user-space
|
2017-10-23 15:27:58 +00:00
|
|
|
This makes it possible to determine the PID that is allocated
|
2017-10-23 15:43:45 +00:00
|
|
|
to the next process that is created inside this PID namespace.
|
2017-10-23 15:27:58 +00:00
|
|
|
.\"
|
|
|
|
.\" ============================================================
|
|
|
|
.\"
|
2013-03-01 08:32:51 +00:00
|
|
|
.SS Miscellaneous
|
2013-02-28 10:40:50 +00:00
|
|
|
When a process ID is passed over a UNIX domain socket to a
|
2013-02-27 06:49:48 +00:00
|
|
|
process in a different PID namespace (see the description of
|
|
|
|
.B SCM_CREDENTIALS
|
|
|
|
in
|
|
|
|
.BR unix (7)),
|
|
|
|
it is translated into the corresponding PID value in
|
|
|
|
the receiving process's PID namespace.
|
|
|
|
.SH CONFORMING TO
|
|
|
|
Namespaces are a Linux-specific feature.
|
2013-03-01 07:53:55 +00:00
|
|
|
.SH EXAMPLE
|
|
|
|
See
|
|
|
|
.BR user_namespaces (7).
|
2013-02-27 06:49:48 +00:00
|
|
|
.SH SEE ALSO
|
|
|
|
.BR clone (2),
|
2017-09-25 08:34:24 +00:00
|
|
|
.BR reboot (2),
|
2013-02-27 06:49:48 +00:00
|
|
|
.BR setns (2),
|
|
|
|
.BR unshare (2),
|
|
|
|
.BR proc (5),
|
|
|
|
.BR capabilities (7),
|
2016-08-07 18:44:38 +00:00
|
|
|
.BR credentials (7),
|
2017-12-08 09:13:42 +00:00
|
|
|
.BR mount_namespaces (7),
|
2016-05-18 17:56:13 +00:00
|
|
|
.BR namespaces (7),
|
2013-02-27 06:49:48 +00:00
|
|
|
.BR user_namespaces (7),
|
|
|
|
.BR switch_root (8)
|