2016-05-06 14:09:14 +00:00
|
|
|
.\" Copyright (c) 2016 by Michael Kerrisk <mtk.manpages@gmail.com>
|
|
|
|
.\"
|
|
|
|
.\" %%%LICENSE_START(VERBATIM)
|
|
|
|
.\" Permission is granted to make and distribute verbatim copies of this
|
|
|
|
.\" manual provided the copyright notice and this permission notice are
|
|
|
|
.\" preserved on all copies.
|
|
|
|
.\"
|
|
|
|
.\" Permission is granted to copy and distribute modified versions of this
|
|
|
|
.\" manual under the conditions for verbatim copying, provided that the
|
|
|
|
.\" entire resulting derived work is distributed under the terms of a
|
|
|
|
.\" permission notice identical to this one.
|
|
|
|
.\"
|
|
|
|
.\" Since the Linux kernel and libraries are constantly changing, this
|
|
|
|
.\" manual page may be incorrect or out-of-date. The author(s) assume no
|
|
|
|
.\" responsibility for errors or omissions, or for damages resulting from
|
|
|
|
.\" the use of the information contained herein. The author(s) may not
|
|
|
|
.\" have taken the same level of care in the production of this manual,
|
|
|
|
.\" which is licensed free of charge, as they might when working
|
|
|
|
.\" professionally.
|
|
|
|
.\"
|
|
|
|
.\" Formatted or processed versions of this manual, if unaccompanied by
|
|
|
|
.\" the source, must acknowledge the copyright and authors of this work.
|
|
|
|
.\" %%%LICENSE_END
|
|
|
|
.\"
|
|
|
|
.\"
|
iconv.1, ldd.1, locale.1, localedef.1, memusage.1, memusagestat.1, mtrace.1, pldd.1, sprof.1, time.1, _syscall.2, add_key.2, alloc_hugepages.2, arch_prctl.2, bpf.2, cacheflush.2, capget.2, chdir.2, chmod.2, chown.2, chroot.2, clock_getres.2, clock_nanosleep.2, clone.2, close.2, connect.2, copy_file_range.2, create_module.2, delete_module.2, dup.2, epoll_create.2, epoll_ctl.2, epoll_wait.2, eventfd.2, execve.2, execveat.2, fallocate.2, fanotify_init.2, fcntl.2, flock.2, fork.2, fsync.2, futex.2, futimesat.2, get_kernel_syms.2, get_mempolicy.2, get_robust_list.2, getcpu.2, getdents.2, getdomainname.2, getgid.2, getgroups.2, gethostname.2, getitimer.2, getpagesize.2, getpeername.2, getpriority.2, getrandom.2, getresuid.2, getrlimit.2, getrusage.2, getsid.2, getsockname.2, getsockopt.2, gettid.2, gettimeofday.2, getuid.2, getunwind.2, init_module.2, inotify_add_watch.2, inotify_init.2, inotify_rm_watch.2, intro.2, io_cancel.2, io_destroy.2, io_getevents.2, io_setup.2, io_submit.2, ioctl_console.2, ioctl_fat.2, ioctl_ficlonerange.2, ioctl_fideduperange.2, ioctl_getfsmap.2, ioctl_iflags.2, ioctl_list.2, ioctl_ns.2, ioctl_tty.2, ioctl_userfaultfd.2, ioperm.2, iopl.2, ioprio_set.2, ipc.2, kcmp.2, kexec_load.2, keyctl.2, kill.2, link.2, listen.2, listxattr.2, llseek.2, lookup_dcookie.2, lseek.2, madvise.2, mbind.2, membarrier.2, memfd_create.2, migrate_pages.2, mincore.2, mkdir.2, mknod.2, mlock.2, mmap.2, mmap2.2, modify_ldt.2, move_pages.2, mprotect.2, mq_getsetattr.2, mremap.2, msgctl.2, msgget.2, msgop.2, msync.2, nanosleep.2, nfsservctl.2, nice.2, open.2, open_by_handle_at.2, outb.2, perf_event_open.2, perfmonctl.2, personality.2, pivot_root.2, pkey_alloc.2, poll.2, posix_fadvise.2, prctl.2, pread.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, read.2, readahead.2, readdir.2, readlink.2, readv.2, reboot.2, recv.2, recvmmsg.2, remap_file_pages.2, rename.2, request_key.2, restart_syscall.2, rt_sigqueueinfo.2, s390_pci_mmio_write.2, s390_runtime_instr.2, sched_get_priority_max.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setparam.2, sched_setscheduler.2, sched_yield.2, seccomp.2, select.2, select_tut.2, semctl.2, semget.2, semop.2, send.2, sendfile.2, sendmmsg.2, set_mempolicy.2, set_thread_area.2, set_tid_address.2, seteuid.2, setfsgid.2, setfsuid.2, setgid.2, setns.2, setpgid.2, setresuid.2, setreuid.2, setsid.2, setuid.2, sgetmask.2, shmctl.2, shmget.2, shmop.2, sigaction.2, sigaltstack.2, signal.2, sigpending.2, sigprocmask.2, sigreturn.2, sigsuspend.2, sigwaitinfo.2, socket.2, socketcall.2, socketpair.2, splice.2, spu_create.2, spu_run.2, stat.2, statfs.2, statx.2, subpage_prot.2, swapon.2, symlink.2, sync.2, sync_file_range.2, syscall.2, syscalls.2, sysctl.2, sysfs.2, sysinfo.2, syslog.2, tee.2, time.2, timer_create.2, timer_getoverrun.2, timer_settime.2, timerfd_create.2, times.2, tkill.2, truncate.2, umask.2, umount.2, uname.2, unimplemented.2, unlink.2, unshare.2, uselib.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vfork.2, vmsplice.2, wait.2, wait4.2, write.2, CPU_SET.3, INFINITY.3, __ppc_get_timebase.3, __ppc_set_ppr_med.3, __ppc_yield.3, __setfpucw.3, acos.3, acosh.3, adjtime.3, aio_fsync.3, aio_init.3, aio_read.3, aio_return.3, aio_suspend.3, aio_write.3, alloca.3, argz_add.3, asin.3, asinh.3, asprintf.3, assert.3, assert_perror.3, atan.3, atan2.3, atanh.3, atexit.3, backtrace.3, basename.3, bindresvport.3, bsd_signal.3, bsearch.3, bswap.3, btree.3, byteorder.3, bzero.3, canonicalize_file_name.3, carg.3, cbrt.3, ccos.3, ccosh.3, ceil.3, cexp.3, cfree.3, clearenv.3, clock.3, clock_getcpuclockid.3, clog.3, clog10.3, clog2.3, cmsg.3, confstr.3, copysign.3, cos.3, cosh.3, crypt.3, csin.3, csinh.3, csqrt.3, ctan.3, ctanh.3, ctime.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlerror.3, dlinfo.3, dlopen.3, dlsym.3, drand48.3, drand48_r.3, duplocale.3, encrypt.3, end.3, endian.3, envz_add.3, erf.3, erfc.3, err.3, errno.3, error.3, ether_aton.3, euidaccess.3, exec.3, exit.3, exp.3, exp10.3, exp2.3, expm1.3, fabs.3, fcloseall.3, fdim.3, fenv.3, ferror.3, fexecve.3, fflush.3, ffs.3, fgetc.3, fgetgrent.3, fgetpwent.3, finite.3, floor.3, fma.3, fmax.3, fmemopen.3, fmin.3, fmod.3, fmtmsg.3, fopen.3, fopencookie.3, fpclassify.3, fpurge.3, fputwc.3, fputws.3, frexp.3, fseek.3, fseeko.3, ftime.3, fts.3, ftw.3, futimes.3, gamma.3, gcvt.3, get_nprocs_conf.3, get_phys_pages.3, getaddrinfo.3, getaddrinfo_a.3, getauxval.3, getcontext.3, getcwd.3, getdate.3, getentropy.3, getenv.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, gethostid.3, getifaddrs.3, getipnodebyname.3, getline.3, getlogin.3, getmntent.3, getnameinfo.3, getnetent.3, getnetent_r.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getrpcent_r.3, getrpcport.3, gets.3, getservent.3, getservent_r.3, getspnam.3, getsubopt.3, getttyent.3, getumask.3, getutent.3, getwchar.3, glob.3, gnu_get_libc_version.3, grantpt.3, gsignal.3, hash.3, hsearch.3, hypot.3, iconv.3, iconv_close.3, iconv_open.3, if_nameindex.3, if_nametoindex.3, ilogb.3, inet.3, inet_net_pton.3, inet_ntop.3, inet_pton.3, initgroups.3, insque.3, intro.3, isalpha.3, isgreater.3, j0.3, key_setsecret.3, killpg.3, ldexp.3, lgamma.3, lio_listio.3, lockf.3, log.3, log10.3, log1p.3, log2.3, logb.3, login.3, lrint.3, lround.3, lsearch.3, lseek64.3, makecontext.3, makedev.3, mallinfo.3, malloc.3, malloc_get_state.3, malloc_info.3, malloc_stats.3, malloc_trim.3, malloc_usable_size.3, mallopt.3, matherr.3, mbsnrtowcs.3, mbsrtowcs.3, mbstowcs.3, mcheck.3, memccpy.3, memchr.3, memcmp.3, memcpy.3, mkfifo.3, mkstemp.3, mktemp.3, modf.3, mpool.3, mq_close.3, mq_getattr.3, mq_notify.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, nextafter.3, nextup.3, nl_langinfo.3, ntp_gettime.3, offsetof.3, on_exit.3, open_memstream.3, opendir.3, openpty.3, perror.3, popen.3, posix_fallocate.3, posix_madvise.3, posix_memalign.3, posix_openpt.3, posix_spawn.3, pow.3, pow10.3, printf.3, profil.3, program_invocation_name.3, psignal.3, pthread_atfork.3, pthread_attr_init.3, pthread_attr_setaffinity_np.3, pthread_attr_setdetachstate.3, pthread_attr_setguardsize.3, pthread_attr_setinheritsched.3, pthread_attr_setschedparam.3, pthread_attr_setschedpolicy.3, pthread_attr_setscope.3, pthread_attr_setstack.3, pthread_attr_setstackaddr.3, pthread_attr_setstacksize.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_detach.3, pthread_exit.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_join.3, pthread_kill.3, pthread_kill_other_threads_np.3, pthread_self.3, pthread_setaffinity_np.3, pthread_setcancelstate.3, pthread_setconcurrency.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_sigqueue.3, pthread_testcancel.3, pthread_tryjoin_np.3, ptsname.3, putgrent.3, putpwent.3, qsort.3, random.3, random_r.3, rcmd.3, re_comp.3, readdir.3, realpath.3, recno.3, regex.3, remainder.3, remove.3, remquo.3, resolver.3, rexec.3, rint.3, round.3, rpc.3, rpmatch.3, rtime.3, scalb.3, scalbln.3, scandir.3, scanf.3, sched_getcpu.3, sem_close.3, sem_destroy.3, sem_getvalue.3, sem_init.3, sem_open.3, sem_wait.3, setaliasent.3, setbuf.3, setenv.3, setlocale.3, setlogmask.3, setnetgrent.3, shm_open.3, signbit.3, significand.3, sigpause.3, sigqueue.3, sigset.3, sigvec.3, sin.3, sincos.3, sinh.3, sleep.3, sockatmark.3, sqrt.3, statvfs.3, stdarg.3, stdin.3, strcasecmp.3, strcat.3, strchr.3, strcoll.3, strcpy.3, strdup.3, strerror.3, strfmon.3, strfromd.3, strftime.3, strptime.3, strsignal.3, strstr.3, strtod.3, strtok.3, strtol.3, strtoul.3, strverscmp.3, syslog.3, system.3, sysv_signal.3, tan.3, tanh.3, telldir.3, tempnam.3, termios.3, tgamma.3, timeradd.3, tmpnam.3, toupper.3, towlower.3, towupper.3, trunc.3, ttyslot.3, tzset.3, ualarm.3, ulimit.3, undocumented.3, unlocked_stdio.3, updwtmp.3, uselocale.3, usleep.3, wcrtomb.3, wcsdup.3, wcsnrtombs.3, wcsrtombs.3, wcstombs.3, wctob.3, wcwidth.3, wordexp.3, wprintf.3, xcrypt.3, xdr.3, y0.3, cciss.4, console_codes.4, dsp56k.4, fuse.4, hd.4, hpsa.4, initrd.4, intro.4, loop.4, random.4, rtc.4, sd.4, sk98lin.4, st.4, wavelan.4, acct.5, core.5, elf.5, filesystems.5, host.conf.5, hosts.5, locale.5, nologin.5, proc.5, resolv.conf.5, rpc.5, slabinfo.5, utmp.5, aio.7, arp.7, bootparam.7, capabilities.7, cgroup_namespaces.7, cgroups.7, charsets.7, cpuset.7, ddp.7, environ.7, epoll.7, fanotify.7, feature_test_macros.7, futex.7, inode.7, inotify.7, ip.7, ipv6.7, keyrings.7, locale.7, man-pages.7, man.7, math_error.7, mount_namespaces.7, mq_overview.7, namespaces.7, netdevice.7, netlink.7, packet.7, pipe.7, pkeys.7, pthreads.7, pty.7, raw.7, rtld-audit.7, rtnetlink.7, sched.7, session-keyring.7, signal.7, sock_diag.7, socket.7, spufs.7, suffixes.7, tcp.7, udp.7, udplite.7, unicode.7, units.7, unix.7, uri.7, user_namespaces.7, vdso.7, x25.7, xattr.7, iconvconfig.8, ld.so.8, ldconfig.8, sln.8: Update timestamps
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-09-15 10:44:56 +00:00
|
|
|
.TH CGROUP_NAMESPACES 7 2017-09-15 "Linux" "Linux Programmer's Manual"
|
2016-05-06 14:09:14 +00:00
|
|
|
.SH NAME
|
|
|
|
cgroup_namespaces \- overview of Linux cgroup namespaces
|
|
|
|
.SH DESCRIPTION
|
|
|
|
For an overview of namespaces, see
|
|
|
|
.BR namespaces (7).
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
Cgroup namespaces virtualize the view of a process's cgroups (see
|
|
|
|
.BR cgroups (7))
|
|
|
|
as seen via
|
|
|
|
.IR /proc/[pid]/cgroup
|
|
|
|
and
|
|
|
|
.IR /proc/[pid]/mountinfo .
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2017-06-13 08:53:16 +00:00
|
|
|
Each cgroup namespace has its own set of cgroup root directories.
|
|
|
|
These root directories are the base points for the relative
|
|
|
|
locations displayed in the corresponding records in the
|
|
|
|
.IR /proc/[pid]/cgroup
|
|
|
|
file.
|
2016-05-06 14:09:14 +00:00
|
|
|
When a process creates a new cgroup namespace using
|
|
|
|
.BR clone (2)
|
|
|
|
or
|
|
|
|
.BR unshare (2)
|
|
|
|
with the
|
|
|
|
.BR CLONE_NEWCGROUP
|
2016-05-06 21:34:43 +00:00
|
|
|
flag, it enters a new cgroup namespace in which its current
|
|
|
|
cgroups directories become the cgroup root directories
|
|
|
|
of the new namespace.
|
2016-05-06 14:09:14 +00:00
|
|
|
(This applies both for the cgroups version 1 hierarchies
|
|
|
|
and the cgroups version 2 unified hierarchy.)
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
When viewing
|
|
|
|
.IR /proc/[pid]/cgroup ,
|
|
|
|
the pathname shown in the third field of each record will be
|
2017-06-13 08:53:16 +00:00
|
|
|
relative to the reading process's root directory
|
|
|
|
for the corresponding cgroup hierarchy.
|
2016-05-06 14:09:14 +00:00
|
|
|
If the cgroup directory of the target process lies outside
|
|
|
|
the root directory of the reading process's cgroup namespace,
|
|
|
|
then the pathname will show
|
|
|
|
.I ../
|
|
|
|
entries for each ancestor level in the cgroup hierarchy.
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
The following shell session demonstrates the effect of creating
|
|
|
|
a new cgroup namespace.
|
|
|
|
First, (as superuser) we create a child cgroup in the
|
|
|
|
.I freezer
|
|
|
|
hierarchy, and put the shell into that cgroup:
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
.in +4n
|
execve.2, ioctl_console.2, ioctl_iflags.2, ioctl_ns.2, ioctl_userfaultfd.2, kcmp.2, kexec_load.2, keyctl.2, link.2, listxattr.2, membarrier.2, memfd_create.2, mmap.2, modify_ldt.2, mprotect.2, msgctl.2, nanosleep.2, open_by_handle_at.2, perf_event_open.2, poll.2, posix_fadvise.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, readdir.2, readv.2, recv.2, recvmmsg.2, request_key.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setscheduler.2, seccomp.2, select.2, select_tut.2, semctl.2, semop.2, send.2, sendmmsg.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sigaltstack.2, signal.2, sigwaitinfo.2, stat.2, statfs.2, statx.2, sync_file_range.2, syscall.2, sysctl.2, sysinfo.2, tee.2, timer_create.2, timer_settime.2, timerfd_create.2, unshare.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, aio_init.3, backtrace.3, basename.3, bswap.3, btree.3, clock_getcpuclockid.3, cmsg.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlinfo.3, dlopen.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, ether_aton.3, fgetgrent.3, fgetpwent.3, fmemopen.3, frexp.3, ftime.3, fts.3, getaddrinfo.3, getaddrinfo_a.3, getdate.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, getifaddrs.3, getipnodebyname.3, getmntent.3, getnameinfo.3, getnetent.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getservent.3, getservent_r.3, getspnam.3, getttyent.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, ntp_gettime.3, posix_openpt.3, printf.3, pthread_attr_init.3, pthread_attr_setschedparam.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_tryjoin_np.3, readdir.3, realpath.3, recno.3, regex.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, statvfs.3, strcat.3, strcpy.3, strftime.3, strtok.3, strtol.3, strverscmp.3, toupper.3, ttyslot.3, xdr.3, fuse.4, loop.4, rtc.4, st.4, acct.5, core.5, elf.5, slabinfo.5, aio.7, arp.7, capabilities.7, cgroup_namespaces.7, cgroups.7, ddp.7, fanotify.7, feature_test_macros.7, inode.7, inotify.7, ip.7, keyrings.7, locale.7, mount_namespaces.7, namespaces.7, netdevice.7, netlink.7, packet.7, pkeys.7, pthreads.7, sched.7, session-keyring.7, sock_diag.7, socket.7, spufs.7, udplite.7, unix.7, user_namespaces.7, vdso.7, x25.7, ld.so.8: Use consistent markup for code snippets
Change .nf/.fi to .EX/.EE
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:52:46 +00:00
|
|
|
.EX
|
2016-05-06 14:09:14 +00:00
|
|
|
# \fBmkdir \-p /sys/fs/cgroup/freezer/sub\fP
|
|
|
|
# \fBecho $$\fP # Show PID of this shell
|
|
|
|
30655
|
2017-06-13 10:04:08 +00:00
|
|
|
# \fBsh \-c \(aqecho 30655 > /sys/fs/cgroup/freezer/sub/cgroup.procs\(aq\fP
|
2016-05-06 14:09:14 +00:00
|
|
|
# \fBcat /proc/self/cgroup | grep freezer\fP
|
|
|
|
7:freezer:/sub
|
execve.2, ioctl_console.2, ioctl_iflags.2, ioctl_ns.2, ioctl_userfaultfd.2, kcmp.2, kexec_load.2, keyctl.2, link.2, listxattr.2, membarrier.2, memfd_create.2, mmap.2, modify_ldt.2, mprotect.2, msgctl.2, nanosleep.2, open_by_handle_at.2, perf_event_open.2, poll.2, posix_fadvise.2, process_vm_readv.2, ptrace.2, query_module.2, quotactl.2, readdir.2, readv.2, recv.2, recvmmsg.2, request_key.2, sched_rr_get_interval.2, sched_setaffinity.2, sched_setattr.2, sched_setscheduler.2, seccomp.2, select.2, select_tut.2, semctl.2, semop.2, send.2, sendmmsg.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sigaltstack.2, signal.2, sigwaitinfo.2, stat.2, statfs.2, statx.2, sync_file_range.2, syscall.2, sysctl.2, sysinfo.2, tee.2, timer_create.2, timer_settime.2, timerfd_create.2, unshare.2, userfaultfd.2, ustat.2, utime.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, aio_init.3, backtrace.3, basename.3, bswap.3, btree.3, clock_getcpuclockid.3, cmsg.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dladdr.3, dlinfo.3, dlopen.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, ether_aton.3, fgetgrent.3, fgetpwent.3, fmemopen.3, frexp.3, ftime.3, fts.3, getaddrinfo.3, getaddrinfo_a.3, getdate.3, getfsent.3, getgrent.3, getgrent_r.3, getgrnam.3, getgrouplist.3, gethostbyname.3, getifaddrs.3, getipnodebyname.3, getmntent.3, getnameinfo.3, getnetent.3, getopt.3, getprotoent.3, getprotoent_r.3, getpw.3, getpwent.3, getpwent_r.3, getpwnam.3, getrpcent.3, getservent.3, getservent_r.3, getspnam.3, getttyent.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, if_nameindex.3, inet.3, inet_net_pton.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallinfo.3, malloc_info.3, mallopt.3, matherr.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mq_receive.3, mq_send.3, mtrace.3, newlocale.3, ntp_gettime.3, posix_openpt.3, printf.3, pthread_attr_init.3, pthread_attr_setschedparam.3, pthread_cancel.3, pthread_cleanup_push.3, pthread_cleanup_push_defer_np.3, pthread_create.3, pthread_getattr_default_np.3, pthread_getattr_np.3, pthread_getcpuclockid.3, pthread_setname_np.3, pthread_setschedparam.3, pthread_sigmask.3, pthread_tryjoin_np.3, readdir.3, realpath.3, recno.3, regex.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, statvfs.3, strcat.3, strcpy.3, strftime.3, strtok.3, strtol.3, strverscmp.3, toupper.3, ttyslot.3, xdr.3, fuse.4, loop.4, rtc.4, st.4, acct.5, core.5, elf.5, slabinfo.5, aio.7, arp.7, capabilities.7, cgroup_namespaces.7, cgroups.7, ddp.7, fanotify.7, feature_test_macros.7, inode.7, inotify.7, ip.7, keyrings.7, locale.7, mount_namespaces.7, namespaces.7, netdevice.7, netlink.7, packet.7, pkeys.7, pthreads.7, sched.7, session-keyring.7, sock_diag.7, socket.7, spufs.7, udplite.7, unix.7, user_namespaces.7, vdso.7, x25.7, ld.so.8: Use consistent markup for code snippets
Change .nf/.fi to .EX/.EE
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:52:46 +00:00
|
|
|
.EE
|
ioctl_console.2, ioctl_getfsmap.2, ioctl_iflags.2, ioctl_list.2, ioctl_ns.2, kcmp.2, kexec_load.2, keyctl.2, link.2, mmap.2, modify_ldt.2, msgctl.2, poll.2, query_module.2, quotactl.2, recv.2, recvmmsg.2, sched_setscheduler.2, seccomp.2, select.2, semctl.2, semop.2, send.2, set_thread_area.2, setns.2, shmctl.2, shmget.2, sigaction.2, sysinfo.2, timer_create.2, timerfd_create.2, uname.2, unshare.2, userfaultfd.2, ustat.2, utimensat.2, vmsplice.2, wait.2, adjtime.3, backtrace.3, bswap.3, btree.3, clock_getcpuclockid.3, confstr.3, dbopen.3, dl_iterate_phdr.3, dlinfo.3, duplocale.3, encrypt.3, end.3, endian.3, err.3, errno.3, fmemopen.3, fopencookie.3, frexp.3, fts.3, ftw.3, getaddrinfo.3, getaddrinfo_a.3, getcontext.3, getgrouplist.3, getifaddrs.3, getipnodebyname.3, getnameinfo.3, getopt.3, getprotoent_r.3, getpwent_r.3, getrpcent.3, getservent_r.3, getttyent.3, getumask.3, glob.3, gnu_get_libc_version.3, hash.3, hsearch.3, inet.3, inet_pton.3, insque.3, isalpha.3, makecontext.3, mallopt.3, mbstowcs.3, mcheck.3, memchr.3, mq_getattr.3, mq_open.3, mtrace.3, newlocale.3, ntp_gettime.3, offsetof.3, posix_openpt.3, printf.3, pthread_setname_np.3, pthread_setschedparam.3, rpc.3, scanf.3, sched_getcpu.3, sem_wait.3, setaliasent.3, sigqueue.3, sigvec.3, stdarg.3, strcat.3, strcpy.3, strftime.3, strtol.3, toupper.3, ttyslot.3, fuse.4, loop.4, st.4, elf.5, cgroup_namespaces.7, cgroups.7, feature_test_macros.7, inode.7, inotify.7, keyrings.7, man-pages.7, math_error.7, mount_namespaces.7, mq_overview.7, pthreads.7, sched.7, session-keyring.7, udplite.7, unix.7, vdso.7: Use consistent markup for code snippets
The preferred form is
.PP/.IP
.in +4n
.EX
<code>
.EE
.in
.PP/.IP
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
2017-08-18 19:37:55 +00:00
|
|
|
.in
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
Next, we use
|
|
|
|
.BR unshare (1)
|
|
|
|
to create a process running a new shell in new cgroup and mount namespaces:
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2017-06-13 10:19:56 +00:00
|
|
|
.EX
|
2016-05-06 14:09:14 +00:00
|
|
|
.in +4n
|
|
|
|
# \fBunshare \-Cm bash\fP
|
|
|
|
.in
|
2017-06-13 10:19:56 +00:00
|
|
|
.EE
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
We then inspect the
|
|
|
|
.IR /proc/[pid]/cgroup
|
|
|
|
files of, respectively, the new shell process started by the
|
|
|
|
.BR unshare (1)
|
|
|
|
command, a process that is in the original cgroup namespace
|
|
|
|
.RI ( init ,
|
2017-06-13 08:53:16 +00:00
|
|
|
with PID 1), and a process in a sibling cgroup
|
|
|
|
.RI ( sub2 ):
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2017-06-13 10:19:56 +00:00
|
|
|
.EX
|
2016-05-06 14:09:14 +00:00
|
|
|
.in +4n
|
|
|
|
$ \fBcat /proc/self/cgroup | grep freezer\fP
|
|
|
|
7:freezer:/
|
|
|
|
$ \fBcat /proc/1/cgroup | grep freezer\fP
|
|
|
|
7:freezer:/..
|
|
|
|
$ \fBcat /proc/20124/cgroup | grep freezer\fP
|
|
|
|
7:freezer:/../sub2
|
|
|
|
.in
|
2017-06-13 10:19:56 +00:00
|
|
|
.EE
|
2017-06-13 09:17:16 +00:00
|
|
|
.PP
|
|
|
|
From the output of the first command,
|
|
|
|
we see that the freezer cgroup membership of the new shell
|
|
|
|
(which is in the same cgroup as the initial shell)
|
|
|
|
is shown defined relative to the freezer cgroup root directory
|
|
|
|
that was established when the new cgroup namespace was created.
|
|
|
|
(In absolute terms,
|
|
|
|
the new shell is in the
|
|
|
|
.I /sub
|
|
|
|
freezer cgroup,
|
|
|
|
and the root directory of the freezer cgroup hierarchy
|
|
|
|
in the new cgroup namespace is also
|
|
|
|
.IR /sub .
|
|
|
|
Thus, the new shell's cgroup membership is displayed as \(aq/\(aq.)
|
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
However, when we look in
|
|
|
|
.IR /proc/self/mountinfo
|
|
|
|
we see the following anomaly:
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2017-06-13 10:19:56 +00:00
|
|
|
.EX
|
2016-05-06 14:09:14 +00:00
|
|
|
.in +4n
|
|
|
|
# \fBcat /proc/self/mountinfo | grep freezer\fP
|
|
|
|
155 145 0:32 /.. /sys/fs/cgroup/freezer ...
|
|
|
|
.in
|
2017-06-13 10:19:56 +00:00
|
|
|
.EE
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2017-06-13 08:53:16 +00:00
|
|
|
The fourth field of this line
|
|
|
|
.RI ( /.. )
|
|
|
|
should show the
|
2016-05-06 14:09:14 +00:00
|
|
|
directory in the cgroup filesystem which forms the root of this mount.
|
|
|
|
Since by the definition of cgroup namespaces, the process's current
|
|
|
|
freezer cgroup directory became its root freezer cgroup directory,
|
|
|
|
we should see \(aq/\(aq in this field.
|
|
|
|
The problem here is that we are seeing a mount entry for the cgroup
|
|
|
|
filesystem corresponding to our initial shell process's cgroup namespace
|
|
|
|
(whose cgroup filesystem is indeed rooted in the parent directory of
|
|
|
|
.IR sub ).
|
|
|
|
We need to remount the freezer cgroup filesystem
|
|
|
|
inside this cgroup namespace, after which we see the expected results:
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2017-06-13 10:19:56 +00:00
|
|
|
.EX
|
2016-05-06 14:09:14 +00:00
|
|
|
.in +4n
|
2016-05-07 06:06:54 +00:00
|
|
|
# \fBmount \-\-make\-rslave /\fP # Don't propagate mount events
|
2016-05-06 14:09:14 +00:00
|
|
|
# to other namespaces
|
2016-05-07 06:06:54 +00:00
|
|
|
# \fBumount /sys/fs/cgroup/freezer\fP
|
|
|
|
# \fBmount \-t cgroup \-o freezer freezer /sys/fs/cgroup/freezer\fP
|
|
|
|
# \fBcat /proc/self/mountinfo | grep freezer\fP
|
2016-05-06 14:09:14 +00:00
|
|
|
155 145 0:32 / /sys/fs/cgroup/freezer rw,relatime ...
|
|
|
|
.in
|
2017-06-13 10:19:56 +00:00
|
|
|
.EE
|
2017-06-13 09:58:40 +00:00
|
|
|
.PP
|
2016-05-06 14:09:14 +00:00
|
|
|
Use of cgroup namespaces requires a kernel that is configured with the
|
|
|
|
.B CONFIG_CGROUPS
|
|
|
|
option.
|
|
|
|
.\"
|
2016-08-07 19:21:01 +00:00
|
|
|
.SH CONFORMING TO
|
|
|
|
Namespaces are a Linux-specific feature.
|
2016-05-06 14:09:14 +00:00
|
|
|
.SH NOTES
|
|
|
|
Among the purposes served by the
|
|
|
|
virtualization provided by cgroup namespaces are the following:
|
|
|
|
.IP * 2
|
|
|
|
It prevents information leaks whereby cgroup directory paths outside of
|
|
|
|
a container would otherwise be visible to processes in the container.
|
|
|
|
Such leakages could, for example,
|
|
|
|
reveal information about the container framework
|
|
|
|
to containerized applications.
|
|
|
|
.IP *
|
2016-05-07 07:15:19 +00:00
|
|
|
It eases tasks such as container migration.
|
|
|
|
The virtualization provided by cgroup namespaces
|
|
|
|
allows containers to be isolated from knowledge of
|
|
|
|
the pathnames of ancestor cgroups.
|
2016-05-07 20:42:45 +00:00
|
|
|
Without such isolation, the full cgroup pathnames (displayed in
|
|
|
|
.IR /proc/self/cgroups )
|
|
|
|
would need to be replicated on the target system when migrating a container;
|
2016-05-07 07:15:19 +00:00
|
|
|
those pathnames would also need to be unique,
|
|
|
|
so that they don't conflict with other pathnames on the target system.
|
|
|
|
.IP *
|
2016-05-07 20:41:34 +00:00
|
|
|
It allows better confinement of containerized processes,
|
2016-05-07 07:06:21 +00:00
|
|
|
because it is possible to mount the container's cgroup filesystems such that
|
|
|
|
the container processes can't gain access to ancestor cgroup directories.
|
2016-05-06 14:09:14 +00:00
|
|
|
Consider, for example, the following scenario:
|
|
|
|
.RS 4
|
|
|
|
.IP \(bu 2
|
|
|
|
We have a cgroup directory,
|
|
|
|
.IR /cg/1 ,
|
|
|
|
that is owned by user ID 9000.
|
|
|
|
.IP \(bu
|
|
|
|
We have a process,
|
|
|
|
.IR X ,
|
|
|
|
also owned by user ID 9000,
|
|
|
|
that is namespaced under the cgroup
|
|
|
|
.IR /cg/1/2
|
|
|
|
(i.e.,
|
|
|
|
.I X
|
|
|
|
was placed in a new cgroup namespace via
|
|
|
|
.BR clone (2)
|
|
|
|
or
|
|
|
|
.BR unshare (2)
|
|
|
|
with the
|
|
|
|
.BR CLONE_NEWCGROUP
|
|
|
|
flag).
|
|
|
|
.RE
|
|
|
|
.IP
|
|
|
|
In the absence of cgroup namespacing, because the cgroup directory
|
|
|
|
.IR /cg/1
|
2016-05-07 20:29:28 +00:00
|
|
|
is owned (and writable) by UID 9000 and process
|
2016-05-07 20:29:03 +00:00
|
|
|
.I X
|
|
|
|
is also owned by user ID 9000, then process
|
|
|
|
.I X
|
|
|
|
would be able to modify the contents of cgroups files
|
|
|
|
(i.e., change cgroup settings) not only in
|
2016-05-06 14:09:14 +00:00
|
|
|
.IR /cg/1/2
|
|
|
|
but also in the ancestor cgroup directory
|
|
|
|
.IR /cg/1 .
|
|
|
|
Namespacing process
|
|
|
|
.IR X
|
|
|
|
under the cgroup directory
|
2016-05-07 06:10:07 +00:00
|
|
|
.IR /cg/1/2 ,
|
|
|
|
in combination with suitable mount operations
|
|
|
|
for the cgroup filesystem (as shown above),
|
2016-05-06 14:09:14 +00:00
|
|
|
prevents it modifying files in
|
|
|
|
.IR /cg/1 ,
|
|
|
|
since it cannot even see the contents of that directory
|
|
|
|
(or of further removed cgroup ancestor directories).
|
|
|
|
Combined with correct enforcement of hierarchical limits,
|
2016-05-17 00:23:09 +00:00
|
|
|
this prevents process
|
|
|
|
.I X
|
|
|
|
from escaping the limits imposed by ancestor cgroups.
|
2016-05-06 14:09:14 +00:00
|
|
|
.SH SEE ALSO
|
|
|
|
.BR unshare (1),
|
|
|
|
.BR clone (2),
|
|
|
|
.BR setns (2),
|
|
|
|
.BR unshare (2),
|
|
|
|
.BR proc (5),
|
|
|
|
.BR cgroups (7),
|
|
|
|
.BR credentials (7),
|
2016-06-21 08:25:38 +00:00
|
|
|
.BR namespaces (7),
|
2016-05-06 14:09:14 +00:00
|
|
|
.BR user_namespaces (7)
|