mirror of https://github.com/mkerrisk/man-pages
Compare commits
23 Commits
c2d505de65
...
c61a1c396e
Author | SHA1 | Date |
---|---|---|
Jakub Wilk | c61a1c396e | |
Michael Kerrisk | d96bf5f5bf | |
Eric W. Biederman | 8e5918c2ec | |
Michael Kerrisk | bb75585de6 | |
Michael Kerrisk | 3e3764560d | |
Michael Kerrisk | 0e8a773e53 | |
Michael Kerrisk | 538a491e06 | |
Michael Kerrisk | 38635f0bc4 | |
Michael Kerrisk | 30397d7dd0 | |
Michael Kerrisk | 91ce7d5f0a | |
Michael Kerrisk | f606879ab1 | |
Michael Kerrisk | 8c67481023 | |
Michael Kerrisk | 5c3a06ed01 | |
Michael Kerrisk | 133e6b161c | |
Michael Kerrisk | 3643106e2c | |
Michael Kerrisk | 736498624f | |
Michael Kerrisk | 03cd41e922 | |
Alejandro Colomar | 63097cb7be | |
Christian Brauner | f3a5ba3f01 | |
Michael Kerrisk | 69bc3836cc | |
Alejandro Colomar | eeeee81162 | |
Kurt Kanzenbach | e79977aeec | |
Alejandro Colomar | 7fc5fc967d |
111
man2/futex.2
111
man2/futex.2
|
@ -222,9 +222,9 @@ This allows the kernel to make some additional performance optimizations.
|
|||
.\" taking reference counts on file backing store, and so on.
|
||||
.IP
|
||||
As a convenience,
|
||||
.IR <linux/futex.h>
|
||||
.I <linux/futex.h>
|
||||
defines a set of constants with the suffix
|
||||
.BR _PRIVATE
|
||||
.B _PRIVATE
|
||||
that are equivalents of all of the operations listed below,
|
||||
.\" except the obsolete FUTEX_FD, for which the "private" flag was
|
||||
.\" meaningless
|
||||
|
@ -241,22 +241,25 @@ and so on.
|
|||
This option bit can be employed only with the
|
||||
.BR FUTEX_WAIT_BITSET ,
|
||||
.BR FUTEX_WAIT_REQUEUE_PI ,
|
||||
and
|
||||
(since Linux 4.5)
|
||||
.\" commit 337f13046ff03717a9e99675284a817527440a49
|
||||
.BR FUTEX_WAIT
|
||||
.BR FUTEX_WAIT ,
|
||||
and
|
||||
(since Linux 5.14)
|
||||
.\" commit bf22a6976897977b0a3f1aeba6823c959fc4fdae
|
||||
.B FUTEX_LOCK_PI2
|
||||
operations.
|
||||
.IP
|
||||
If this option is set, the kernel measures the
|
||||
.I timeout
|
||||
against the
|
||||
.BR CLOCK_REALTIME
|
||||
.B CLOCK_REALTIME
|
||||
clock.
|
||||
.IP
|
||||
If this option is not set, the kernel measures the
|
||||
.I timeout
|
||||
against the
|
||||
.BR CLOCK_MONOTONIC
|
||||
.B CLOCK_MONOTONIC
|
||||
clock.
|
||||
.PP
|
||||
The operation specified in
|
||||
|
@ -904,7 +907,9 @@ value to 0 if the previous value was the expected TID.
|
|||
If a futex is already acquired (i.e., has a nonzero value),
|
||||
waiters must employ the
|
||||
.B FUTEX_LOCK_PI
|
||||
operation to acquire the lock.
|
||||
or
|
||||
.B FUTEX_LOCK_PI2
|
||||
operations to acquire the lock.
|
||||
If other threads are waiting for the lock, then the
|
||||
.B FUTEX_WAITERS
|
||||
bit is set in the futex value;
|
||||
|
@ -963,7 +968,8 @@ PI futexes are operated on by specifying one of the values listed below in
|
|||
Note that the PI futex operations must be used as paired operations
|
||||
and are subject to some additional requirements:
|
||||
.IP * 3
|
||||
.B FUTEX_LOCK_PI
|
||||
.BR FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
and
|
||||
.B FUTEX_TRYLOCK_PI
|
||||
pair with
|
||||
|
@ -1116,12 +1122,34 @@ The
|
|||
.IR uaddr2 ,
|
||||
.IR val ,
|
||||
and
|
||||
.IR val3
|
||||
.I val3
|
||||
arguments are ignored.
|
||||
.\"
|
||||
.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
||||
.\"
|
||||
.TP
|
||||
.BR FUTEX_LOCK_PI2 " (since Linux 5.14)"
|
||||
.\" commit bf22a6976897977b0a3f1aeba6823c959fc4fdae
|
||||
This operation is the same as
|
||||
.BR FUTEX_LOCK_PI ,
|
||||
except that the clock against which
|
||||
.I timeout
|
||||
is measured is selectable.
|
||||
By default, the (absolute) timeout specified in
|
||||
.I timeout
|
||||
is measured againt the
|
||||
.B CLOCK_MONOTONIC
|
||||
clock, but if the
|
||||
.B FUTEX_CLOCK_REALTIME
|
||||
flag is specified in
|
||||
.IR futex_op ,
|
||||
then the timeout is measured against the
|
||||
.B CLOCK_REALTIME
|
||||
clock.
|
||||
.\"
|
||||
.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
||||
.\"
|
||||
.TP
|
||||
.BR FUTEX_TRYLOCK_PI " (since Linux 2.6.18)"
|
||||
.\" commit c87e2837be82df479a6bae9f155c43516d2feebc
|
||||
This operation tries to acquire the lock at
|
||||
|
@ -1168,6 +1196,8 @@ arguments are ignored.
|
|||
.\" commit c87e2837be82df479a6bae9f155c43516d2feebc
|
||||
This operation wakes the top priority waiter that is waiting in
|
||||
.B FUTEX_LOCK_PI
|
||||
or
|
||||
.B FUTEX_LOCK_PI2
|
||||
on the futex address provided by the
|
||||
.I uaddr
|
||||
argument.
|
||||
|
@ -1379,6 +1409,9 @@ Returns the number of waiters that were woken up.
|
|||
.B FUTEX_LOCK_PI
|
||||
Returns 0 if the futex was successfully locked.
|
||||
.TP
|
||||
.B FUTEX_LOCK_PI2
|
||||
Returns 0 if the futex was successfully locked.
|
||||
.TP
|
||||
.B FUTEX_TRYLOCK_PI
|
||||
Returns 0 if the futex was successfully locked.
|
||||
.TP
|
||||
|
@ -1433,8 +1466,9 @@ The value pointed to by
|
|||
is not equal to the expected value
|
||||
.IR val3 .
|
||||
.TP
|
||||
.BR EAGAIN
|
||||
.B EAGAIN
|
||||
.RB ( FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
.BR FUTEX_TRYLOCK_PI ,
|
||||
.BR FUTEX_CMP_REQUEUE_PI )
|
||||
The futex owner thread ID of
|
||||
|
@ -1446,8 +1480,9 @@ is about to exit,
|
|||
but has not yet handled the internal state cleanup.
|
||||
Try again.
|
||||
.TP
|
||||
.BR EDEADLK
|
||||
.B EDEADLK
|
||||
.RB ( FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
.BR FUTEX_TRYLOCK_PI ,
|
||||
.BR FUTEX_CMP_REQUEUE_PI )
|
||||
The futex word at
|
||||
|
@ -1487,18 +1522,18 @@ a spurious wakeup; since Linux 2.6.22, this no longer happens.
|
|||
.TP
|
||||
.B EINVAL
|
||||
The operation in
|
||||
.IR futex_op
|
||||
.I futex_op
|
||||
is one of those that employs a timeout, but the supplied
|
||||
.I timeout
|
||||
argument was invalid
|
||||
.RI ( tv_sec
|
||||
was less than zero, or
|
||||
.IR tv_nsec
|
||||
.I tv_nsec
|
||||
was not less than 1,000,000,000).
|
||||
.TP
|
||||
.B EINVAL
|
||||
The operation specified in
|
||||
.IR futex_op
|
||||
.I futex_op
|
||||
employs one or both of the pointers
|
||||
.I uaddr
|
||||
and
|
||||
|
@ -1510,17 +1545,17 @@ the address is not four-byte-aligned.
|
|||
.RB ( FUTEX_WAIT_BITSET ,
|
||||
.BR FUTEX_WAKE_BITSET )
|
||||
The bit mask supplied in
|
||||
.IR val3
|
||||
.I val3
|
||||
is zero.
|
||||
.TP
|
||||
.B EINVAL
|
||||
.RB ( FUTEX_CMP_REQUEUE_PI )
|
||||
.I uaddr
|
||||
equals
|
||||
.IR uaddr2
|
||||
.I uaddr2
|
||||
(i.e., an attempt was made to requeue to the same futex).
|
||||
.TP
|
||||
.BR EINVAL
|
||||
.B EINVAL
|
||||
.RB ( FUTEX_FD )
|
||||
The signal number supplied in
|
||||
.I val
|
||||
|
@ -1535,12 +1570,15 @@ is invalid.
|
|||
The kernel detected an inconsistency between the user-space state at
|
||||
.I uaddr
|
||||
and the kernel state\(emthat is, it detected a waiter which waits in
|
||||
.BR FUTEX_LOCK_PI
|
||||
.B FUTEX_LOCK_PI
|
||||
or
|
||||
.B FUTEX_LOCK_PI2
|
||||
on
|
||||
.IR uaddr .
|
||||
.TP
|
||||
.B EINVAL
|
||||
.RB ( FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
.BR FUTEX_TRYLOCK_PI ,
|
||||
.BR FUTEX_UNLOCK_PI )
|
||||
The kernel detected an inconsistency between the user-space state at
|
||||
|
@ -1550,7 +1588,7 @@ This indicates either state corruption
|
|||
or that the kernel found a waiter on
|
||||
.I uaddr
|
||||
which is waiting via
|
||||
.BR FUTEX_WAIT
|
||||
.B FUTEX_WAIT
|
||||
or
|
||||
.BR FUTEX_WAIT_BITSET .
|
||||
.TP
|
||||
|
@ -1563,9 +1601,9 @@ and the kernel state;
|
|||
.\" The kernel sees: I have non PI state for a futex you tried to
|
||||
.\" tell me was PI
|
||||
that is, the kernel detected a waiter which waits via
|
||||
.BR FUTEX_WAIT
|
||||
.B FUTEX_WAIT
|
||||
or
|
||||
.BR FUTEX_WAIT_BITSET
|
||||
.B FUTEX_WAIT_BITSET
|
||||
on
|
||||
.IR uaddr2 .
|
||||
.TP
|
||||
|
@ -1575,9 +1613,9 @@ The kernel detected an inconsistency between the user-space state at
|
|||
.I uaddr
|
||||
and the kernel state;
|
||||
that is, the kernel detected a waiter which waits via
|
||||
.BR FUTEX_WAIT
|
||||
.B FUTEX_WAIT
|
||||
or
|
||||
.BR FUTEX_WAIT_BITSET
|
||||
.B FUTEX_WAIT_BITSET
|
||||
on
|
||||
.IR uaddr .
|
||||
.TP
|
||||
|
@ -1589,7 +1627,9 @@ and the kernel state;
|
|||
that is, the kernel detected a waiter which waits on
|
||||
.I uaddr
|
||||
via
|
||||
.BR FUTEX_LOCK_PI
|
||||
.B FUTEX_LOCK_PI
|
||||
or
|
||||
.B FUTEX_LOCK_PI2
|
||||
(instead of
|
||||
.BR FUTEX_WAIT_REQUEUE_PI ).
|
||||
.TP
|
||||
|
@ -1616,8 +1656,9 @@ Invalid argument.
|
|||
.RB ( FUTEX_FD )
|
||||
The system-wide limit on the total number of open files has been reached.
|
||||
.TP
|
||||
.BR ENOMEM
|
||||
.B ENOMEM
|
||||
.RB ( FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
.BR FUTEX_TRYLOCK_PI ,
|
||||
.BR FUTEX_CMP_REQUEUE_PI )
|
||||
The kernel could not allocate memory to hold state information.
|
||||
|
@ -1628,17 +1669,19 @@ Invalid operation specified in
|
|||
.TP
|
||||
.B ENOSYS
|
||||
The
|
||||
.BR FUTEX_CLOCK_REALTIME
|
||||
.B FUTEX_CLOCK_REALTIME
|
||||
option was specified in
|
||||
.IR futex_op ,
|
||||
but the accompanying operation was neither
|
||||
.BR FUTEX_WAIT ,
|
||||
.BR FUTEX_WAIT_BITSET ,
|
||||
.BR FUTEX_WAIT_REQUEUE_PI ,
|
||||
nor
|
||||
.BR FUTEX_WAIT_REQUEUE_PI .
|
||||
.BR FUTEX_LOCK_PI2 .
|
||||
.TP
|
||||
.BR ENOSYS
|
||||
.B ENOSYS
|
||||
.RB ( FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
.BR FUTEX_TRYLOCK_PI ,
|
||||
.BR FUTEX_UNLOCK_PI ,
|
||||
.BR FUTEX_CMP_REQUEUE_PI ,
|
||||
|
@ -1647,8 +1690,9 @@ A run-time check determined that the operation is not available.
|
|||
The PI-futex operations are not implemented on all architectures and
|
||||
are not supported on some CPU variants.
|
||||
.TP
|
||||
.BR EPERM
|
||||
.B EPERM
|
||||
.RB ( FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
.BR FUTEX_TRYLOCK_PI ,
|
||||
.BR FUTEX_CMP_REQUEUE_PI )
|
||||
The caller is not allowed to attach itself to the futex at
|
||||
|
@ -1659,19 +1703,20 @@ the futex at
|
|||
.IR uaddr2 ).
|
||||
(This may be caused by a state corruption in user space.)
|
||||
.TP
|
||||
.BR EPERM
|
||||
.B EPERM
|
||||
.RB ( FUTEX_UNLOCK_PI )
|
||||
The caller does not own the lock represented by the futex word.
|
||||
.TP
|
||||
.BR ESRCH
|
||||
.B ESRCH
|
||||
.RB ( FUTEX_LOCK_PI ,
|
||||
.BR FUTEX_LOCK_PI2 ,
|
||||
.BR FUTEX_TRYLOCK_PI ,
|
||||
.BR FUTEX_CMP_REQUEUE_PI )
|
||||
The thread ID in the futex word at
|
||||
.I uaddr
|
||||
does not exist.
|
||||
.TP
|
||||
.BR ESRCH
|
||||
.B ESRCH
|
||||
.RB ( FUTEX_CMP_REQUEUE_PI )
|
||||
The thread ID in the futex word at
|
||||
.I uaddr2
|
||||
|
@ -1679,7 +1724,7 @@ does not exist.
|
|||
.TP
|
||||
.B ETIMEDOUT
|
||||
The operation in
|
||||
.IR futex_op
|
||||
.I futex_op
|
||||
employed the timeout specified in
|
||||
.IR timeout ,
|
||||
and the timeout expired before the operation completed.
|
||||
|
|
|
@ -0,0 +1,999 @@
|
|||
.\" Copyright (c) 2021 by Christian Brauner <christian.brauner@ubuntu.com>
|
||||
.\"
|
||||
.\" %%%LICENSE_START(VERBATIM)
|
||||
.\" Permission is granted to make and distribute verbatim copies of this
|
||||
.\" manual provided the copyright notice and this permission notice are
|
||||
.\" preserved on all copies.
|
||||
.\"
|
||||
.\" Permission is granted to copy and distribute modified versions of this
|
||||
.\" manual under the conditions for verbatim copying, provided that the
|
||||
.\" entire resulting derived work is distributed under the terms of a
|
||||
.\" permission notice identical to this one.
|
||||
.\"
|
||||
.\" Since the Linux kernel and libraries are constantly changing, this
|
||||
.\" manual page may be incorrect or out-of-date. The author(s) assume no
|
||||
.\" responsibility for errors or omissions, or for damages resulting from
|
||||
.\" the use of the information contained herein. The author(s) may not
|
||||
.\" have taken the same level of care in the production of this manual,
|
||||
.\" which is licensed free of charge, as they might when working
|
||||
.\" professionally.
|
||||
.\"
|
||||
.\" Formatted or processed versions of this manual, if unaccompanied by
|
||||
.\" the source, must acknowledge the copyright and authors of this work.
|
||||
.\" %%%LICENSE_END
|
||||
.\"
|
||||
.TH MOUNT_SETATTR 2 2021-03-22 "Linux" "Linux Programmer's Manual"
|
||||
.SH NAME
|
||||
mount_setattr \- change mount properties of a mount or mount tree
|
||||
.SH SYNOPSIS
|
||||
.nf
|
||||
|
||||
.PP
|
||||
.BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
|
||||
.BR "#include <linux/mount.h>" " /* Definition of " MOUNT_ATTR_* " constants */"
|
||||
.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
|
||||
.B #include <unistd.h>
|
||||
.PP
|
||||
.BI "int syscall(SYS_mount_setattr, int " dirfd ", const char *" path ,
|
||||
.BI " unsigned int " flags ", struct mount_attr *" attr \
|
||||
", size_t " size );
|
||||
.fi
|
||||
.PP
|
||||
.IR Note :
|
||||
glibc provides no wrapper for
|
||||
.BR mount_setattr (),
|
||||
necessitating the use of
|
||||
.BR syscall (2).
|
||||
.SH DESCRIPTION
|
||||
The
|
||||
.BR mount_setattr ()
|
||||
system call changes the mount properties of a mount or an entire mount tree.
|
||||
If
|
||||
.I path
|
||||
is a relative pathname,
|
||||
then it is interpreted relative to
|
||||
the directory referred to by the file descriptor
|
||||
.IR dirfd .
|
||||
If
|
||||
.I dirfd
|
||||
is the special value
|
||||
.BR AT_FDCWD ,
|
||||
then
|
||||
.I path
|
||||
is interpreted relative to
|
||||
the current working directory of the calling process.
|
||||
If
|
||||
.I path
|
||||
is the empty string and
|
||||
.B AT_EMPTY_PATH
|
||||
is specified in
|
||||
.IR flags ,
|
||||
then the mount properties of the mount identified by
|
||||
.I dirfd
|
||||
are changed.
|
||||
.PP
|
||||
The
|
||||
.BR mount_setattr ()
|
||||
system call uses an extensible structure
|
||||
.RI ( "struct mount_attr" )
|
||||
to allow for future extensions.
|
||||
Any non-flag extensions to
|
||||
.BR mount_setattr ()
|
||||
will be implemented as new fields appended to the this structure,
|
||||
with a zero value in a new field resulting in the kernel behaving
|
||||
as though that extension field was not present.
|
||||
Therefore,
|
||||
the caller
|
||||
.I must
|
||||
zero-fill this structure on initialization.
|
||||
See the "Extensibility" subsection under
|
||||
.B NOTES
|
||||
for more details.
|
||||
.PP
|
||||
The
|
||||
.I size
|
||||
argument should usually be specified as
|
||||
.IR "sizeof(struct mount_attr)" .
|
||||
However,
|
||||
if the caller does not intend to make use of features that
|
||||
got introduced after the initial version of
|
||||
.IR "struct mount_attr" ,
|
||||
it is possible to pass
|
||||
the size of the initial struct together with the larger struct.
|
||||
This allows the kernel to not copy later parts of the struct
|
||||
that aren't used anyway.
|
||||
With each extension that changes the size of
|
||||
.IR "struct mount_attr" ,
|
||||
the kernel will expose a definition of the form
|
||||
.BI MOUNT_ATTR_SIZE_VER number\c
|
||||
\&.
|
||||
For example, the macro for the size of the initial version of
|
||||
.I struct mount_attr
|
||||
is
|
||||
.BR MOUNT_ATTR_SIZE_VER0 .
|
||||
.PP
|
||||
The
|
||||
.I flags
|
||||
argument can be used to alter the path resolution behavior.
|
||||
The supported values are:
|
||||
.TP
|
||||
.B AT_EMPTY_PATH
|
||||
If
|
||||
.I path
|
||||
is the empty string,
|
||||
change the mount properties on
|
||||
.I dirfd
|
||||
itself.
|
||||
.TP
|
||||
.B AT_RECURSIVE
|
||||
Change the mount properties of the entire mount tree.
|
||||
.TP
|
||||
.B AT_SYMLINK_NOFOLLOW
|
||||
Don't follow trailing symbolic links.
|
||||
.TP
|
||||
.B AT_NO_AUTOMOUNT
|
||||
Don't trigger automounts.
|
||||
.PP
|
||||
The
|
||||
.I attr
|
||||
argument of
|
||||
.BR mount_setattr ()
|
||||
is a structure of the following form:
|
||||
.PP
|
||||
.in +4n
|
||||
.EX
|
||||
struct mount_attr {
|
||||
__u64 attr_set; /* Mount properties to set */
|
||||
__u64 attr_clr; /* Mount properties to clear */
|
||||
__u64 propagation; /* Mount propagation type */
|
||||
__u64 userns_fd; /* User namespace file descriptor */
|
||||
};
|
||||
.EE
|
||||
.in
|
||||
.PP
|
||||
The
|
||||
.I attr_set
|
||||
and
|
||||
.I attr_clr
|
||||
members are used to specify the mount properties that
|
||||
are supposed to be set or cleared for a mount or mount tree.
|
||||
Flags set in
|
||||
.I attr_set
|
||||
enable a property on a mount or mount tree,
|
||||
and flags set in
|
||||
.I attr_clr
|
||||
remove a property from a mount or mount tree.
|
||||
.PP
|
||||
When changing mount properties,
|
||||
the kernel will first clear the flags specified
|
||||
in the
|
||||
.I attr_clr
|
||||
field,
|
||||
and then set the flags specified in the
|
||||
.I attr_set
|
||||
field:
|
||||
.PP
|
||||
.in +4n
|
||||
.EX
|
||||
struct mount_attr attr = {
|
||||
.attr_clr = MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV,
|
||||
.attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
|
||||
};
|
||||
unsigned int current_mnt_flags = mnt->mnt_flags;
|
||||
|
||||
/*
|
||||
* Clear all flags set in .attr_clr,
|
||||
* clearing MOUNT_ATTR_NOEXEC and MOUNT_ATTR_NODEV.
|
||||
*/
|
||||
current_mnt_flags &= ~attr->attr_clr;
|
||||
|
||||
/*
|
||||
* Now set all flags set in .attr_set,
|
||||
* applying MOUNT_ATTR_RDONLY and MOUNT_ATTR_NOSUID.
|
||||
*/
|
||||
current_mnt_flags |= attr->attr_set;
|
||||
|
||||
mnt->mnt_flags = current_mnt_flags;
|
||||
.EE
|
||||
.in
|
||||
.PP
|
||||
As a rsult of this change, the mount or mount tree (a) is read-only;
|
||||
(b) blocks the execution of set-user-ID and set-group-ID programs;
|
||||
(c) allows execution of programs; and (d) allows access to devices.
|
||||
.PP
|
||||
Multiple changes with the same set of flags requested
|
||||
in
|
||||
.I attr_clr
|
||||
and
|
||||
.I attr_set
|
||||
are guaranteed to be idempotent after the changes have been applied.
|
||||
.PP
|
||||
The following mount attributes can be specified in the
|
||||
.I attr_set
|
||||
or
|
||||
.I attr_clr
|
||||
fields:
|
||||
.TP
|
||||
.B MOUNT_ATTR_RDONLY
|
||||
If set in
|
||||
.IR attr_set ,
|
||||
makes the mount read-only.
|
||||
If set in
|
||||
.IR attr_clr ,
|
||||
removes the read-only setting if set on the mount.
|
||||
.TP
|
||||
.B MOUNT_ATTR_NOSUID
|
||||
If set in
|
||||
.IR attr_set ,
|
||||
causes the mount not to honor the set-user-ID and set-group-ID mode bits and
|
||||
file capabilities when executing programs.
|
||||
If set in
|
||||
.IR attr_clr ,
|
||||
clears the set-user-ID, set-group-ID,
|
||||
and file capability restriction if set on this mount.
|
||||
.TP
|
||||
.B MOUNT_ATTR_NODEV
|
||||
If set in
|
||||
.IR attr_set ,
|
||||
prevents access to devices on this mount.
|
||||
If set in
|
||||
.IR attr_clr ,
|
||||
removes the restriction that prevented accessing devices on this mount.
|
||||
.TP
|
||||
.B MOUNT_ATTR_NOEXEC
|
||||
If set in
|
||||
.IR attr_set ,
|
||||
prevents executing programs on this mount.
|
||||
If set in
|
||||
.IR attr_clr ,
|
||||
removes the restriction that prevented executing programs on this mount.
|
||||
.TP
|
||||
.B MOUNT_ATTR_NOSYMFOLLOW
|
||||
If set in
|
||||
.IR attr_set ,
|
||||
prevents following symbolic links on this mount.
|
||||
If set in
|
||||
.IR attr_clr ,
|
||||
removes the restriction that prevented following symbolic links on this mount.
|
||||
.TP
|
||||
.B MOUNT_ATTR_NODIRATIME
|
||||
If set in
|
||||
.IR attr_set ,
|
||||
prevents updating access time for directories on this mount.
|
||||
If set in
|
||||
.IR attr_clr ,
|
||||
removes the restriction that prevented updating access time for directories.
|
||||
Note that
|
||||
.B MOUNT_ATTR_NODIRATIME
|
||||
can be combined with other access-time settings
|
||||
and is implied by the noatime setting.
|
||||
All other access-time settings are mutually exclusive.
|
||||
.TP
|
||||
.BR MOUNT_ATTR__ATIME " - changing access-time settings"
|
||||
In the new mount API, the access-time values are an enum starting from 0.
|
||||
Even though they are an enum (in contrast to the other mount flags such as
|
||||
.BR MOUNT_ATTR_NOEXEC ),
|
||||
they are nonetheless passed in
|
||||
.I attr_set
|
||||
and
|
||||
.I attr_clr
|
||||
for consistency with
|
||||
.BR fsmount (2),
|
||||
which introduced this behavior.
|
||||
.IP
|
||||
Note that,
|
||||
since access times are an enum
|
||||
not a bit map,
|
||||
users wanting to transition to a different access-time setting cannot simply
|
||||
specify the access-time setting in
|
||||
.I attr_set
|
||||
but must also set
|
||||
.B MOUNT_ATTR__ATIME
|
||||
in the
|
||||
.I attr_clr
|
||||
field.
|
||||
The kernel will verify that
|
||||
.B MOUNT_ATTR__ATIME
|
||||
isn't partially set in
|
||||
.IR attr_clr ,
|
||||
and that
|
||||
.I attr_set
|
||||
doesn't have any access-time bits set if
|
||||
.B MOUNT_ATTR__ATIME
|
||||
isn't set in
|
||||
.IR attr_clr .
|
||||
.RS
|
||||
.TP
|
||||
.B MOUNT_ATTR_RELATIME
|
||||
When a file is accessed via this mount,
|
||||
update the file's last access time (atime)
|
||||
only if the current value of atime is less than or equal to
|
||||
the file's last modification time (mtime) or last status change time (ctime).
|
||||
.IP
|
||||
To enable this access-time setting on a mount or mount tree,
|
||||
.B MOUNT_ATTR_RELATIME
|
||||
must be set in
|
||||
.I attr_set
|
||||
and
|
||||
.B MOUNT_ATTR__ATIME
|
||||
must be set in the
|
||||
.I attr_clr
|
||||
field.
|
||||
.TP
|
||||
.B MOUNT_ATTR_NOATIME
|
||||
Do not update access times for (all types of) files on this mount.
|
||||
.IP
|
||||
To enable this access-time setting on a mount or mount tree,
|
||||
.B MOUNT_ATTR_NOATIME
|
||||
must be set in
|
||||
.I attr_set
|
||||
and
|
||||
.B MOUNT_ATTR__ATIME
|
||||
must be set in the
|
||||
.I attr_clr
|
||||
field.
|
||||
.TP
|
||||
.B MOUNT_ATTR_STRICTATIME
|
||||
Always update the last access time (atime)
|
||||
when files are accessed on this mount.
|
||||
.IP
|
||||
To enable this access-time setting on a mount or mount tree,
|
||||
.B MOUNT_ATTR_STRICTATIME
|
||||
must be set in
|
||||
.I attr_set
|
||||
and
|
||||
.B MOUNT_ATTR__ATIME
|
||||
must be set in the
|
||||
.I attr_clr
|
||||
field.
|
||||
.RE
|
||||
.TP
|
||||
.B MOUNT_ATTR_IDMAP
|
||||
If set in
|
||||
.IR attr_set ,
|
||||
creates an ID-mapped mount.
|
||||
The ID mapping is taken from the user namespace specified in
|
||||
.I userns_fd
|
||||
and attached to the mount.
|
||||
.IP
|
||||
Since it is not supported to
|
||||
change the ID mapping of a mount after it has been ID mapped,
|
||||
it is invalid to specify
|
||||
.B MOUNT_ATTR_IDMAP
|
||||
in
|
||||
.IR attr_clr .
|
||||
.IP
|
||||
For further details, see the subsection "ID-mapped mounts" under NOTES.
|
||||
.PP
|
||||
The
|
||||
.I propagation
|
||||
field is used to specify the propagation type of the mount or mount tree.
|
||||
Mount propagation options are mutually exclusive;
|
||||
that is,
|
||||
the propagation values behave like an enum.
|
||||
The supported mount propagation types are:
|
||||
.TP
|
||||
.B MS_PRIVATE
|
||||
Turn all mounts into private mounts.
|
||||
Mount and unmount events do not propagate into or out of this mount point.
|
||||
.TP
|
||||
.B MS_SHARED
|
||||
Turn all mounts into shared mounts.
|
||||
Mount points share events with members of a peer group.
|
||||
Mount and unmount events immediately under this mount point
|
||||
will propagate to the other mount points that are members of the peer group.
|
||||
Propagation here means that the same mount or unmount will automatically occur
|
||||
under all of the other mount points in the peer group.
|
||||
Conversely,
|
||||
mount and unmount events that take place under peer mount points
|
||||
will propagate to this mount point.
|
||||
.TP
|
||||
.B MS_SLAVE
|
||||
Turn all mounts into dependent mounts.
|
||||
Mount and unmount events propagate into this mount point
|
||||
from a shared peer group.
|
||||
Mount and unmount events under this mount point do not propagate to any peer.
|
||||
.TP
|
||||
.B MS_UNBINDABLE
|
||||
This is like a private mount,
|
||||
and in addition this mount can't be bind mounted.
|
||||
Attempts to bind mount this mount will fail.
|
||||
When a recursive bind mount is performed on a directory subtree,
|
||||
any bind mounts within the subtree are automatically pruned
|
||||
(i.e., not replicated)
|
||||
when replicating that subtree to produce the target subtree.
|
||||
.PP
|
||||
For further details on propagation types, see
|
||||
.BR mount_namespaces (7).
|
||||
.SH RETURN VALUE
|
||||
On success,
|
||||
.BR mount_setattr ()
|
||||
returns zero.
|
||||
On error,
|
||||
\-1 is returned and
|
||||
.I errno
|
||||
is set to indicate the cause of the error.
|
||||
.SH ERRORS
|
||||
.TP
|
||||
.B EBADF
|
||||
.I dirfd
|
||||
is not a valid file descriptor.
|
||||
.TP
|
||||
.B EBADF
|
||||
.I userns_fd
|
||||
is not a valid file descriptor.
|
||||
.TP
|
||||
.B EBUSY
|
||||
The caller tried to change the mount to
|
||||
.BR MOUNT_ATTR_RDONLY ,
|
||||
but the mount still holds files open for writing.
|
||||
.TP
|
||||
.B EINVAL
|
||||
The path specified via the
|
||||
.I dirfd
|
||||
and
|
||||
.I path
|
||||
arguments to
|
||||
.BR mount_setattr ()
|
||||
isn't a mount point.
|
||||
.TP
|
||||
.B EINVAL
|
||||
An unsupported value was set in
|
||||
.I flags.
|
||||
.TP
|
||||
.B EINVAL
|
||||
An unsupported value was specified in the
|
||||
.I attr_set
|
||||
field of
|
||||
.IR mount_attr .
|
||||
.TP
|
||||
.B EINVAL
|
||||
An unsupported value was specified in the
|
||||
.I attr_clr
|
||||
field of
|
||||
.IR mount_attr .
|
||||
.TP
|
||||
.B EINVAL
|
||||
An unsupported value was specified in the
|
||||
.I propagation
|
||||
field of
|
||||
.IR mount_attr .
|
||||
.TP
|
||||
.B EINVAL
|
||||
More than one of
|
||||
.BR MS_SHARED ,
|
||||
.BR MS_SLAVE ,
|
||||
.BR MS_PRIVATE ,
|
||||
or
|
||||
.B MS_UNBINDABLE
|
||||
was set in the
|
||||
.I propagation
|
||||
field of
|
||||
.IR mount_attr .
|
||||
.TP
|
||||
.B EINVAL
|
||||
An access-time setting was specified in the
|
||||
.I attr_set
|
||||
field without
|
||||
.B MOUNT_ATTR__ATIME
|
||||
being set in the
|
||||
.I attr_clr
|
||||
field.
|
||||
.TP
|
||||
.B EINVAL
|
||||
.B MOUNT_ATTR_IDMAP
|
||||
was specified in
|
||||
.IR attr_clr .
|
||||
.TP
|
||||
.B EINVAL
|
||||
A file descriptor value was specified in
|
||||
.I userns_fd
|
||||
which exceeds
|
||||
.BR INT_MAX .
|
||||
.TP
|
||||
.B EINVAL
|
||||
A valid file descriptor value was specified in
|
||||
.IR userns_fd ,
|
||||
but the file descriptor wasn't a namespace file descriptor
|
||||
or did not refer to a user namespace.
|
||||
.TP
|
||||
.B EINVAL
|
||||
The underlying filesystem does not support ID-mapped mounts.
|
||||
.TP
|
||||
.B EINVAL
|
||||
The mount that is to be ID mapped is not a detached/anonymous mount;
|
||||
that is, the mount is already visible in the filesystem.
|
||||
.TP
|
||||
.B EINVAL
|
||||
A partial access-time setting was specified in
|
||||
.I attr_clr
|
||||
instead of
|
||||
.B MOUNT_ATTR__ATIME
|
||||
being set.
|
||||
.TP
|
||||
.B EINVAL
|
||||
The mount is located outside the caller's mount namespace.
|
||||
.TP
|
||||
.B EINVAL
|
||||
The underlying filesystem is mounted in a user namespace.
|
||||
.TP
|
||||
.B ENOENT
|
||||
A pathname was empty or had a nonexistent component.
|
||||
.TP
|
||||
.B ENOMEM
|
||||
When changing mount propagation to
|
||||
.BR MS_SHARED ,
|
||||
a new peer group ID needs to be allocated for all mounts without a peer group
|
||||
ID set.
|
||||
Allocation of this peer group ID has failed.
|
||||
.TP
|
||||
.B ENOSPC
|
||||
When changing mount propagation to
|
||||
.BR MS_SHARED ,
|
||||
a new peer group ID needs to be allocated for all mounts without a peer group
|
||||
ID set.
|
||||
Allocation of this peer group ID can fail.
|
||||
Note that technically further error codes are possible that are specific to the
|
||||
ID allocation implementation used.
|
||||
.TP
|
||||
.B EPERM
|
||||
One of the mounts had at least one of
|
||||
.BR MOUNT_ATTR_NOATIME ,
|
||||
.BR MOUNT_ATTR_NODEV ,
|
||||
.BR MOUNT_ATTR_NODIRATIME ,
|
||||
.BR MOUNT_ATTR_NOEXEC ,
|
||||
.BR MOUNT_ATTR_NOSUID ,
|
||||
or
|
||||
.B MOUNT_ATTR_RDONLY
|
||||
set and the flag is locked.
|
||||
Mount attributes become locked on a mount if:
|
||||
.RS
|
||||
.IP \(bu 3
|
||||
A new mount or mount tree is created causing mount propagation across user
|
||||
namespaces.
|
||||
The kernel will lock the aforementioned flags to protect these sensitive
|
||||
properties from being altered.
|
||||
.IP \(bu
|
||||
A new mount and user namespace pair is created.
|
||||
This happens for example when specifying
|
||||
.B CLONE_NEWUSER | CLONE_NEWNS
|
||||
in
|
||||
.BR unshare (2),
|
||||
.BR clone (2),
|
||||
or
|
||||
.BR clone3 (2).
|
||||
The aforementioned flags become locked to protect user namespaces from altering
|
||||
sensitive mount properties.
|
||||
.RE
|
||||
.TP
|
||||
.B EPERM
|
||||
A valid file descriptor value was specified in
|
||||
.IR userns_fd ,
|
||||
but the file descriptor refers to the initial user namespace.
|
||||
.TP
|
||||
.B EPERM
|
||||
An already ID-mapped mount was supposed to be ID mapped.
|
||||
.TP
|
||||
.B EPERM
|
||||
The caller does not have
|
||||
.B CAP_SYS_ADMIN
|
||||
in the initial user namespace.
|
||||
.SH VERSIONS
|
||||
.BR mount_setattr ()
|
||||
first appeared in Linux 5.12.
|
||||
.\" commit 7d6beb71da3cc033649d641e1e608713b8220290
|
||||
.\" commit 2a1867219c7b27f928e2545782b86daaf9ad50bd
|
||||
.\" commit 9caccd41541a6f7d6279928d9f971f6642c361af
|
||||
.SH CONFORMING TO
|
||||
.BR mount_setattr ()
|
||||
is Linux-specific.
|
||||
.SH NOTES
|
||||
.SS ID-mapped mounts
|
||||
Creating an ID-mapped mount makes it possible to
|
||||
change the ownership of all files located under a mount.
|
||||
Thus, ID-mapped mounts make it possible to
|
||||
change ownership in a temporary and localized way.
|
||||
It is a localized change because
|
||||
ownership changes are restricted to a specific mount.
|
||||
All other users and locations where the filesystem is exposed are unaffected.
|
||||
And it is a temporary change because
|
||||
ownership changes are tied to the lifetime of the mount.
|
||||
.PP
|
||||
Whenever callers interact with the filesystem through an ID-mapped mount,
|
||||
the ID mapping of the mount will be applied to
|
||||
user and group IDs associated with filesystem objects.
|
||||
This encompasses the user and group IDs associated with inodes
|
||||
and also the following
|
||||
.BR xattr (7)
|
||||
keys:
|
||||
.IP \(bu 3
|
||||
.IR security.capability ,
|
||||
whenever filesystem capabilities
|
||||
are stored or returned in the
|
||||
.B VFS_CAP_REVISION_3
|
||||
format,
|
||||
which stores a root user ID alongside the capabilities
|
||||
(see
|
||||
.BR capabilities (7)).
|
||||
.IP \(bu
|
||||
.I system.posix_acl_access
|
||||
and
|
||||
.IR system.posix_acl_default ,
|
||||
whenever user IDs or group IDs are stored in
|
||||
.B ACL_USER
|
||||
or
|
||||
.B ACL_GROUP
|
||||
entries.
|
||||
.PP
|
||||
The following conditions must be met in order to create an ID-mapped mount:
|
||||
.IP \(bu 3
|
||||
The caller must have the
|
||||
.B CAP_SYS_ADMIN
|
||||
capability in the initial user namespace.
|
||||
.IP \(bu
|
||||
The filesystem must be mounted in the initial user namespace.
|
||||
.IP \(bu
|
||||
The underlying filesystem must support ID-mapped mounts.
|
||||
Currently, the
|
||||
.BR xfs (5),
|
||||
.BR ext4 (5),
|
||||
and
|
||||
.B FAT
|
||||
filesystems support ID-mapped mounts
|
||||
with more filesystems being actively worked on.
|
||||
.IP \(bu
|
||||
The mount must not already be ID-mapped.
|
||||
This also implies that the ID mapping of a mount cannot be altered.
|
||||
.IP \(bu
|
||||
The mount must be a detached/anonymous mount;
|
||||
that is,
|
||||
it must have been created by calling
|
||||
.BR open_tree (2)
|
||||
with the
|
||||
.B OPEN_TREE_CLONE
|
||||
flag and it must not already have been visible in the filesystem.
|
||||
.PP
|
||||
ID mappings can be created for user IDs, group IDs, and project IDs.
|
||||
An ID mapping is essentially a mapping of a range of user or group IDs into
|
||||
another or the same range of user or group IDs.
|
||||
ID mappings are usually written as three numbers
|
||||
either separated by white space or a full stop.
|
||||
The first two numbers specify the starting user or group ID
|
||||
in each of the two user namespaces.
|
||||
The third number specifies the range of the ID mapping.
|
||||
For example, a mapping for user IDs such as 1000:1001:1 would indicate that
|
||||
user ID 1000 in the caller's user namespace is mapped to
|
||||
user ID 1001 in its ancestor user namespace.
|
||||
Since the map range is 1,
|
||||
only user ID 1000 is mapped.
|
||||
.PP
|
||||
It is possible to specify up to 340 ID mappings for each ID mapping type.
|
||||
If any user IDs or group IDs are not mapped,
|
||||
all files owned by that unmapped user or group ID will appear as
|
||||
being owned by the overflow user ID or overflow group ID respectively.
|
||||
.PP
|
||||
Further details and instructions for setting up ID mappings can be found in the
|
||||
.BR user_namespaces (7)
|
||||
man page.
|
||||
.PP
|
||||
In the common case, the user namespace passed in
|
||||
.I userns_fd
|
||||
together with
|
||||
.B MOUNT_ATTR_IDMAP
|
||||
in
|
||||
.I attr_set
|
||||
to create an ID-mapped mount will be the user namespace of a container.
|
||||
In other scenarios it will be a dedicated user namespace associated with
|
||||
a user's login session as is the case for portable home directories in
|
||||
.BR systemd-homed.service (8)).
|
||||
It is also perfectly fine to create a dedicated user namespace
|
||||
for the sake of ID mapping a mount.
|
||||
.PP
|
||||
ID-mapped mounts can be useful in the following
|
||||
and a variety of other scenarios:
|
||||
.IP \(bu 3
|
||||
Sharing files between multiple users or multiple machines,
|
||||
especially in complex scenarios.
|
||||
For example,
|
||||
ID-mapped mounts are used to implement portable home directories in
|
||||
.BR systemd-homed.service (8),
|
||||
where they allow users to move their home directory
|
||||
to an external storage device
|
||||
and use it on multiple computers
|
||||
where they are assigned different user IDs and group IDs.
|
||||
This effectively makes it possible to
|
||||
assign random user IDs and group IDs at login time.
|
||||
.IP \(bu
|
||||
Sharing files from the host with unprivileged containers.
|
||||
This allows a user to avoid having to change ownership permanently through
|
||||
.BR chown (2).
|
||||
.IP \(bu
|
||||
ID mapping a container's root filesystem.
|
||||
Users don't need to change ownership permanently through
|
||||
.BR chown (2).
|
||||
Especially for large root filesystems, using
|
||||
.BR chown (2)
|
||||
can be prohibitively expensive.
|
||||
.IP \(bu
|
||||
Sharing files between containers with non-overlapping ID mappings.
|
||||
.IP \(bu
|
||||
Implementing discretionary access (DAC) permission checking
|
||||
for filesystems lacking a concept of ownership.
|
||||
.IP \(bu
|
||||
Efficiently changing ownership on a per-mount basis.
|
||||
In contrast to
|
||||
.BR chown (2),
|
||||
changing ownership of large sets of files is instantaneous with
|
||||
ID-mapped mounts.
|
||||
This is especially useful when ownership of
|
||||
an entire root filesystem of a virtual machine or container
|
||||
is to be changed as mentioned above.
|
||||
With ID-mapped mounts,
|
||||
a single
|
||||
.BR mount_setattr ()
|
||||
system call will be sufficient to change the ownership of all files.
|
||||
.IP \(bu
|
||||
Taking the current ownership into account.
|
||||
ID mappings specify precisely
|
||||
what a user or group ID is supposed to be mapped to.
|
||||
This contrasts with the
|
||||
.BR chown (2)
|
||||
system call which cannot by itself
|
||||
take the current ownership of the files it changes into account.
|
||||
It simply changes the ownership to the specified user ID and group ID.
|
||||
.IP \(bu
|
||||
Locally and temporarily restricted ownership changes.
|
||||
ID-mapped mounts make it possible to change ownership locally,
|
||||
restricting it to specific mounts,
|
||||
and temporarily as the ownership changes only apply as long as the mount exists.
|
||||
By contrast,
|
||||
changing ownership via the
|
||||
.BR chown (2)
|
||||
system call changes the ownership globally and permanently.
|
||||
.\"
|
||||
.SS Extensibility
|
||||
In order to allow for future extensibility,
|
||||
.BR mount_setattr ()
|
||||
requires the user-space application to specify the size of the
|
||||
.I mount_attr
|
||||
structure that it is passing.
|
||||
By providing this information, it is possible for
|
||||
.BR mount_setattr ()
|
||||
to provide both forwards- and backwards-compatibility, with
|
||||
.I size
|
||||
acting as an implicit version number.
|
||||
(Because new extension fields will always
|
||||
be appended, the structure size will always increase.)
|
||||
This extensibility design is very similar to other system calls such as
|
||||
.BR perf_setattr (2),
|
||||
.BR perf_event_open (2),
|
||||
.BR clone3 (2)
|
||||
and
|
||||
.BR openat2 (2).
|
||||
.PP
|
||||
Let
|
||||
.I usize
|
||||
be the size of the structure as specified by the user-space application,
|
||||
and let
|
||||
.I ksize
|
||||
be the size of the structure which the kernel supports,
|
||||
then there are three cases to consider:
|
||||
.IP \(bu 3
|
||||
If
|
||||
.I ksize
|
||||
equals
|
||||
.IR usize ,
|
||||
then there is no version mismatch and
|
||||
.I attr
|
||||
can be used verbatim.
|
||||
.IP \(bu
|
||||
If
|
||||
.I ksize
|
||||
is larger than
|
||||
.IR usize ,
|
||||
then there are some extension fields that the kernel supports
|
||||
which the user-space application is unaware of.
|
||||
Because a zero value in any added extension field signifies a no-op,
|
||||
the kernel treats all of the extension fields
|
||||
not provided by the user-space application
|
||||
as having zero values.
|
||||
This provides backwards-compatibility.
|
||||
.IP \(bu
|
||||
If
|
||||
.I ksize
|
||||
is smaller than
|
||||
.IR usize ,
|
||||
then there are some extension fields which the user-space application is aware
|
||||
of but which the kernel does not support.
|
||||
Because any extension field must have its zero values signify a no-op,
|
||||
the kernel can safely ignore the unsupported extension fields
|
||||
if they are all zero.
|
||||
If any unsupported extension fields are non-zero,
|
||||
then \-1 is returned and
|
||||
.I errno
|
||||
is set to
|
||||
.BR E2BIG .
|
||||
This provides forwards-compatibility.
|
||||
.PP
|
||||
Because the definition of
|
||||
.I struct mount_attr
|
||||
may change in the future
|
||||
(with new fields being added when system headers are updated),
|
||||
user-space applications should zero-fill
|
||||
.I struct mount_attr
|
||||
to ensure that recompiling the program with new headers will not result in
|
||||
spurious errors at runtime.
|
||||
The simplest way is to use a designated initializer:
|
||||
.PP
|
||||
.in +4n
|
||||
.EX
|
||||
struct mount_attr attr = {
|
||||
.attr_set = MOUNT_ATTR_RDONLY,
|
||||
.attr_clr = MOUNT_ATTR_NODEV
|
||||
};
|
||||
.EE
|
||||
.in
|
||||
.PP
|
||||
Alternatively, the structure can be zero-filled using
|
||||
.BR memset (3)
|
||||
or similar functions:
|
||||
.PP
|
||||
.in +4n
|
||||
.EX
|
||||
struct mount_attr attr;
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.attr_set = MOUNT_ATTR_RDONLY;
|
||||
attr.attr_clr = MOUNT_ATTR_NODEV;
|
||||
.EE
|
||||
.in
|
||||
.PP
|
||||
A user-space application that wishes to determine which extensions the running
|
||||
kernel supports can do so by conducting a binary search on
|
||||
.I size
|
||||
with a structure which has every byte nonzero
|
||||
(to find the largest value which doesn't produce an error of
|
||||
.BR E2BIG ).
|
||||
.SH EXAMPLES
|
||||
.EX
|
||||
/*
|
||||
* This program allows the caller to create a new detached mount
|
||||
* and set various properties on it.
|
||||
*/
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/types.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static inline int
|
||||
mount_setattr(int dirfd, const char *path, unsigned int flags,
|
||||
struct mount_attr *attr, size_t size)
|
||||
{
|
||||
return syscall(SYS_mount_setattr, dirfd, path, flags, attr, size);
|
||||
}
|
||||
|
||||
static inline int
|
||||
open_tree(int dirfd, const char *filename, unsigned int flags)
|
||||
{
|
||||
return syscall(SYS_open_tree, dirfd, filename, flags);
|
||||
}
|
||||
|
||||
static inline int
|
||||
move_mount(int from_dirfd, const char *from_pathname,
|
||||
int to_dirfd, const char *to_pathname, unsigned int flags)
|
||||
{
|
||||
return syscall(SYS_move_mount, from_dirfd, from_pathname,
|
||||
to_dirfd, to_pathname, flags);
|
||||
}
|
||||
|
||||
static const struct option longopts[] = {
|
||||
{"map\-mount", required_argument, NULL, 'a'},
|
||||
{"recursive", no_argument, NULL, 'b'},
|
||||
{"read\-only", no_argument, NULL, 'c'},
|
||||
{"block\-setid", no_argument, NULL, 'd'},
|
||||
{"block\-devices", no_argument, NULL, 'e'},
|
||||
{"block\-exec", no_argument, NULL, 'f'},
|
||||
{"no\-access\-time", no_argument, NULL, 'g'},
|
||||
{ NULL, 0, NULL, 0 },
|
||||
};
|
||||
|
||||
#define exit_log(format, ...) do \e
|
||||
{ \e
|
||||
fprintf(stderr, format, ##__VA_ARGS__); \e
|
||||
exit(EXIT_FAILURE); \e
|
||||
} while (0)
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
struct mount_attr *attr = &(struct mount_attr){};
|
||||
int fd_userns = \-EBADF;
|
||||
bool recursive = false;
|
||||
int index = 0;
|
||||
int ret;
|
||||
|
||||
while ((ret = getopt_long_only(argc, argv, "",
|
||||
longopts, &index)) != \-1) {
|
||||
switch (ret) {
|
||||
case 'a':
|
||||
fd_userns = open(optarg, O_RDONLY | O_CLOEXEC);
|
||||
if (fd_userns == \-1)
|
||||
exit_log("%m \- Failed top open %s\en", optarg);
|
||||
break;
|
||||
case 'b':
|
||||
recursive = true;
|
||||
break;
|
||||
case 'c':
|
||||
attr\->attr_set |= MOUNT_ATTR_RDONLY;
|
||||
break;
|
||||
case 'd':
|
||||
attr\->attr_set |= MOUNT_ATTR_NOSUID;
|
||||
break;
|
||||
case 'e':
|
||||
attr\->attr_set |= MOUNT_ATTR_NODEV;
|
||||
break;
|
||||
case 'f':
|
||||
attr\->attr_set |= MOUNT_ATTR_NOEXEC;
|
||||
break;
|
||||
case 'g':
|
||||
attr\->attr_set |= MOUNT_ATTR_NOATIME;
|
||||
attr\->attr_clr |= MOUNT_ATTR__ATIME;
|
||||
break;
|
||||
default:
|
||||
exit_log("Invalid argument specified");
|
||||
}
|
||||
}
|
||||
|
||||
if ((argc \- optind) < 2)
|
||||
exit_log("Missing source or target mount point\en");
|
||||
|
||||
const char *source = argv[optind];
|
||||
const char *target = argv[optind + 1];
|
||||
|
||||
int fd_tree = open_tree(\-EBADF, source,
|
||||
OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
|
||||
AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0));
|
||||
if (fd_tree == \-1)
|
||||
exit_log("%m \- Failed to open %s\en", source);
|
||||
|
||||
if (fd_userns >= 0) {
|
||||
attr\->attr_set |= MOUNT_ATTR_IDMAP;
|
||||
attr\->userns_fd = fd_userns;
|
||||
}
|
||||
|
||||
ret = mount_setattr(fd_tree, "",
|
||||
AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
|
||||
attr, sizeof(struct mount_attr));
|
||||
if (ret == \-1)
|
||||
exit_log("%m \- Failed to change mount attributes\en");
|
||||
|
||||
close(fd_userns);
|
||||
|
||||
ret = move_mount(fd_tree, "", \-EBADF, target,
|
||||
MOVE_MOUNT_F_EMPTY_PATH);
|
||||
if (ret == \-1)
|
||||
exit_log("%m \- Failed to attach mount to %s\en", target);
|
||||
|
||||
close(fd_tree);
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
.EE
|
||||
.SH SEE ALSO
|
||||
.BR newuidmap (1),
|
||||
.BR newgidmap (1),
|
||||
.BR clone (2),
|
||||
.BR mount (2),
|
||||
.BR unshare (2),
|
||||
.BR proc (5),
|
||||
.BR mount_namespaces (7),
|
||||
.BR capabilities (7),
|
||||
.BR user_namespaces (7),
|
||||
.BR xattr (7)
|
|
@ -69,9 +69,10 @@ The only system calls that the calling thread is permitted to make are
|
|||
.BR exit_group (2)),
|
||||
and
|
||||
.BR sigreturn (2).
|
||||
Other system calls result in the delivery of a
|
||||
Other system calls result in the termination of the calling thread,
|
||||
or termination of the entire process with the
|
||||
.BR SIGKILL
|
||||
signal.
|
||||
signal when there is only one thread.
|
||||
Strict secure computing mode is useful for number-crunching
|
||||
applications that may need to execute untrusted byte code, perhaps
|
||||
obtained by reading from a pipe or socket.
|
||||
|
|
|
@ -46,7 +46,7 @@ signal \- ANSI C signal handling
|
|||
.fi
|
||||
.SH DESCRIPTION
|
||||
.BR WARNING :
|
||||
the behavior of
|
||||
the behavior of
|
||||
.BR signal ()
|
||||
varies across UNIX versions,
|
||||
and has also varied historically across different versions of Linux.
|
||||
|
|
|
@ -108,9 +108,9 @@ The following code loops over the tokens contained in a wide-character string.
|
|||
wchar_t *wcs = ...;
|
||||
wchar_t *token;
|
||||
wchar_t *state;
|
||||
for (token = wcstok(wcs, " \et\en", &state);
|
||||
for (token = wcstok(wcs, L" \et\en", &state);
|
||||
token != NULL;
|
||||
token = wcstok(NULL, " \et\en", &state)) {
|
||||
token = wcstok(NULL, L" \et\en", &state)) {
|
||||
...
|
||||
}
|
||||
.EE
|
||||
|
|
|
@ -902,7 +902,7 @@ relevant trademarks that are sometimes misspelled:
|
|||
HP-UX
|
||||
UNIX
|
||||
UnixWare
|
||||
.SS NULL, NUL, null pointer, and null character
|
||||
.SS NULL, NUL, null pointer, and null byte
|
||||
A
|
||||
.IR "null pointer"
|
||||
is a pointer that points to nothing,
|
||||
|
|
|
@ -179,8 +179,8 @@ flag set (though note that this also restricts bind mount traversal).
|
|||
If a pathname ends in a \(aq/\(aq, that forces resolution of the preceding
|
||||
component as in Step 2:
|
||||
the component preceding the slash either exists and resolves to a directory
|
||||
of it names a directory that is to be created immediately after the
|
||||
pathname is resolved.
|
||||
or it names a directory that is to be created
|
||||
immediately after the pathname is resolved.
|
||||
Otherwise, a trailing \(aq/\(aq is ignored.
|
||||
.SS Final symlink
|
||||
If the last component of a pathname is a symbolic link, then it
|
||||
|
|
|
@ -220,7 +220,7 @@ Set (UCS) \(em Part 1: Architecture and Basic Multilingual Plane.
|
|||
International Standard ISO/IEC 10646-1, International Organization
|
||||
for Standardization, Geneva, 2000.
|
||||
.IP
|
||||
This is the official specification of UCS .
|
||||
This is the official specification of UCS.
|
||||
Available from
|
||||
.UR http://www.iso.ch/
|
||||
.UE .
|
||||
|
|
Loading…
Reference in New Issue