mirror of https://github.com/mkerrisk/man-pages
428 lines
12 KiB
Groff
428 lines
12 KiB
Groff
.\" Copyright (C) 2002 Robert Love
|
|
.\" and Copyright (C) 2006, 2015 Michael Kerrisk
|
|
.\"
|
|
.\" %%%LICENSE_START(GPLv2+_DOC_FULL)
|
|
.\" This is free documentation; you can redistribute it and/or
|
|
.\" modify it under the terms of the GNU General Public License as
|
|
.\" published by the Free Software Foundation; either version 2 of
|
|
.\" the License, or (at your option) any later version.
|
|
.\"
|
|
.\" The GNU General Public License's references to "object code"
|
|
.\" and "executables" are to be interpreted as the output of any
|
|
.\" document formatting or typesetting system, including
|
|
.\" intermediate and printed output.
|
|
.\"
|
|
.\" This manual is distributed in the hope that it will be useful,
|
|
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
.\" GNU General Public License for more details.
|
|
.\"
|
|
.\" You should have received a copy of the GNU General Public
|
|
.\" License along with this manual; if not, see
|
|
.\" <http://www.gnu.org/licenses/>.
|
|
.\" %%%LICENSE_END
|
|
.\"
|
|
.\" 2002-11-19 Robert Love <rml@tech9.net> - initial version
|
|
.\" 2004-04-20 mtk - fixed description of return value
|
|
.\" 2004-04-22 aeb - added glibc prototype history
|
|
.\" 2005-05-03 mtk - noted that sched_setaffinity may cause thread
|
|
.\" migration and that CPU affinity is a per-thread attribute.
|
|
.\" 2006-02-03 mtk -- Major rewrite
|
|
.\" 2008-11-12, mtk, removed CPU_*() macro descriptions to a
|
|
.\" separate CPU_SET(3) page.
|
|
.\"
|
|
.TH SCHED_SETAFFINITY 2 2016-10-08 "Linux" "Linux Programmer's Manual"
|
|
.SH NAME
|
|
sched_setaffinity, sched_getaffinity \- \
|
|
set and get a thread's CPU affinity mask
|
|
.SH SYNOPSIS
|
|
.nf
|
|
.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
|
|
.B #include <sched.h>
|
|
.sp
|
|
.BI "int sched_setaffinity(pid_t " pid ", size_t " cpusetsize ,
|
|
.BI " const cpu_set_t *" mask );
|
|
.sp
|
|
.BI "int sched_getaffinity(pid_t " pid ", size_t " cpusetsize ,
|
|
.BI " cpu_set_t *" mask );
|
|
.fi
|
|
.SH DESCRIPTION
|
|
A thread's CPU affinity mask determines the set of CPUs on which
|
|
it is eligible to run.
|
|
On a multiprocessor system, setting the CPU affinity mask
|
|
can be used to obtain performance benefits.
|
|
For example,
|
|
by dedicating one CPU to a particular thread
|
|
(i.e., setting the affinity mask of that thread to specify a single CPU,
|
|
and setting the affinity mask of all other threads to exclude that CPU),
|
|
it is possible to ensure maximum execution speed for that thread.
|
|
Restricting a thread to run on a single CPU also avoids
|
|
the performance cost caused by the cache invalidation that occurs
|
|
when a thread ceases to execute on one CPU and then
|
|
recommences execution on a different CPU.
|
|
|
|
A CPU affinity mask is represented by the
|
|
.I cpu_set_t
|
|
structure, a "CPU set", pointed to by
|
|
.IR mask .
|
|
A set of macros for manipulating CPU sets is described in
|
|
.BR CPU_SET (3).
|
|
|
|
.BR sched_setaffinity ()
|
|
sets the CPU affinity mask of the thread whose ID is
|
|
.I pid
|
|
to the value specified by
|
|
.IR mask .
|
|
If
|
|
.I pid
|
|
is zero, then the calling thread is used.
|
|
The argument
|
|
.I cpusetsize
|
|
is the length (in bytes) of the data pointed to by
|
|
.IR mask .
|
|
Normally this argument would be specified as
|
|
.IR "sizeof(cpu_set_t)" .
|
|
|
|
If the thread specified by
|
|
.I pid
|
|
is not currently running on one of the CPUs specified in
|
|
.IR mask ,
|
|
then that thread is migrated to one of the CPUs specified in
|
|
.IR mask .
|
|
|
|
.BR sched_getaffinity ()
|
|
writes the affinity mask of the thread whose ID is
|
|
.I pid
|
|
into the
|
|
.I cpu_set_t
|
|
structure pointed to by
|
|
.IR mask .
|
|
The
|
|
.I cpusetsize
|
|
argument specifies the size (in bytes) of
|
|
.IR mask .
|
|
If
|
|
.I pid
|
|
is zero, then the mask of the calling thread is returned.
|
|
.SH RETURN VALUE
|
|
On success,
|
|
.BR sched_setaffinity ()
|
|
and
|
|
.BR sched_getaffinity ()
|
|
return 0.
|
|
On error, \-1 is returned, and
|
|
.I errno
|
|
is set appropriately.
|
|
.SH ERRORS
|
|
.TP
|
|
.B EFAULT
|
|
A supplied memory address was invalid.
|
|
.TP
|
|
.B EINVAL
|
|
The affinity bit mask
|
|
.I mask
|
|
contains no processors that are currently physically on the system
|
|
and permitted to the thread according to any restrictions that
|
|
may be imposed by the "cpuset" mechanism described in
|
|
.BR cpuset (7).
|
|
.TP
|
|
.B EINVAL
|
|
.RB ( sched_getaffinity ()
|
|
and, in kernels before 2.6.9,
|
|
.BR sched_setaffinity ())
|
|
.I cpusetsize
|
|
is smaller than the size of the affinity mask used by the kernel.
|
|
.TP
|
|
.B EPERM
|
|
.RB ( sched_setaffinity ())
|
|
The calling thread does not have appropriate privileges.
|
|
The caller needs an effective user ID equal to the real user ID
|
|
or effective user ID of the thread identified by
|
|
.IR pid ,
|
|
or it must possess the
|
|
.B CAP_SYS_NICE
|
|
capability in the user namespace of the thread
|
|
.IR pid .
|
|
.TP
|
|
.B ESRCH
|
|
The thread whose ID is \fIpid\fP could not be found.
|
|
.SH VERSIONS
|
|
The CPU affinity system calls were introduced in Linux kernel 2.5.8.
|
|
The system call wrappers were introduced in glibc 2.3.
|
|
Initially, the glibc interfaces included a
|
|
.I cpusetsize
|
|
argument, typed as
|
|
.IR "unsigned int" .
|
|
In glibc 2.3.3, the
|
|
.I cpusetsize
|
|
argument was removed, but was then restored in glibc 2.3.4, with type
|
|
.IR size_t .
|
|
.SH CONFORMING TO
|
|
These system calls are Linux-specific.
|
|
.SH NOTES
|
|
After a call to
|
|
.BR sched_setaffinity (),
|
|
the set of CPUs on which the thread will actually run is
|
|
the intersection of the set specified in the
|
|
.I mask
|
|
argument and the set of CPUs actually present on the system.
|
|
The system may further restrict the set of CPUs on which the thread
|
|
runs if the "cpuset" mechanism described in
|
|
.BR cpuset (7)
|
|
is being used.
|
|
These restrictions on the actual set of CPUs on which the thread
|
|
will run are silently imposed by the kernel.
|
|
|
|
There are various ways of determining the number of CPUs
|
|
available on the system, including: inspecting the contents of
|
|
.IR /proc/cpuinfo ;
|
|
using
|
|
.BR sysconf (3)
|
|
to obtain the values of the
|
|
.BR _SC_NPROCESSORS_CONF
|
|
and
|
|
.BR _SC_NPROCESSORS_ONLN
|
|
parameters; and inspecting the list of CPU directories under
|
|
.IR /sys/devices/system/cpu/ .
|
|
|
|
.BR sched (7)
|
|
has a description of the Linux scheduling scheme.
|
|
.PP
|
|
The affinity mask is a per-thread attribute that can be
|
|
adjusted independently for each of the threads in a thread group.
|
|
The value returned from a call to
|
|
.BR gettid (2)
|
|
can be passed in the argument
|
|
.IR pid .
|
|
Specifying
|
|
.I pid
|
|
as 0 will set the attribute for the calling thread,
|
|
and passing the value returned from a call to
|
|
.BR getpid (2)
|
|
will set the attribute for the main thread of the thread group.
|
|
(If you are using the POSIX threads API, then use
|
|
.BR pthread_setaffinity_np (3)
|
|
instead of
|
|
.BR sched_setaffinity ().)
|
|
|
|
The
|
|
.I isolcpus
|
|
boot option can be used to isolate one or more CPUs at boot time,
|
|
so that no processes are scheduled onto those CPUs.
|
|
Following the use of this boot option,
|
|
the only way to schedule processes onto the isolated CPUs is via
|
|
.BR sched_setaffinity ()
|
|
or the
|
|
.BR cpuset (7)
|
|
mechanism.
|
|
For further information, see the kernel source file
|
|
.IR Documentation/admin-guide/kernel-parameters.txt .
|
|
As noted in that file,
|
|
.I isolcpus
|
|
is the preferred mechanism of isolating CPUs
|
|
(versus the alternative of manually setting the CPU affinity
|
|
of all processes on the system).
|
|
|
|
A child created via
|
|
.BR fork (2)
|
|
inherits its parent's CPU affinity mask.
|
|
The affinity mask is preserved across an
|
|
.BR execve (2).
|
|
.SS C library/kernel differences
|
|
This manual page describes the glibc interface for the CPU affinity calls.
|
|
The actual system call interface is slightly different, with the
|
|
.I mask
|
|
being typed as
|
|
.IR "unsigned long\ *" ,
|
|
reflecting the fact that the underlying implementation of CPU
|
|
sets is a simple bit mask.
|
|
On success, the raw
|
|
.BR sched_getaffinity ()
|
|
system call returns the size (in bytes) of the
|
|
.I cpumask_t
|
|
data type that is used internally by the kernel to
|
|
represent the CPU set bit mask.
|
|
.SS Handling systems with large CPU affinity masks
|
|
The underlying system calls (which represent CPU masks as bit masks of type
|
|
.IR "unsigned long\ *" )
|
|
impose no restriction on the size of the CPU mask.
|
|
However, the
|
|
.I cpu_set_t
|
|
data type used by glibc has a fixed size of 128 bytes,
|
|
meaning that the maximum CPU number that can be represented is 1023.
|
|
.\" FIXME . See https://sourceware.org/bugzilla/show_bug.cgi?id=15630
|
|
.\" and https://sourceware.org/ml/libc-alpha/2013-07/msg00288.html
|
|
If the kernel CPU affinity mask is larger than 1024,
|
|
then calls of the form:
|
|
|
|
sched_getaffinity(pid, sizeof(cpu_set_t), &mask);
|
|
|
|
will fail with the error
|
|
.BR EINVAL ,
|
|
the error produced by the underlying system call for the case where the
|
|
.I mask
|
|
size specified in
|
|
.I cpusetsize
|
|
is smaller than the size of the affinity mask used by the kernel.
|
|
(Depending on the system CPU topology, the kernel affinity mask can
|
|
be substantially larger than the number of active CPUs in the system.)
|
|
.PP
|
|
When working on systems with large kernel CPU affinity masks,
|
|
one must dynamically allocate the
|
|
.I mask
|
|
argument (see
|
|
.BR CPU_ALLOC (3)).
|
|
Currently, the only way to do this is by probing for the size
|
|
of the required mask using
|
|
.BR sched_getaffinity ()
|
|
calls with increasing mask sizes (until the call does not fail with the error
|
|
.BR EINVAL ).
|
|
|
|
Be aware that
|
|
.BR CPU_ALLOC (3)
|
|
may allocate a slightly larger CPU set than requested
|
|
(because CPU sets are implemented as bit masks allocated in units of
|
|
.IR sizeof(long) ).
|
|
Consequently,
|
|
.BR sched_getaffinity ()
|
|
can set bits beyond the requested allocation size, because the kernel
|
|
sees a few additional bits.
|
|
Therefore, the caller should iterate over the bits in the returned set,
|
|
counting those which are set, and stop upon reaching the value returned by
|
|
.BR CPU_COUNT (3)
|
|
(rather than iterating over the number of bits
|
|
requested to be allocated).
|
|
.SH EXAMPLE
|
|
The program below creates a child process.
|
|
The parent and child then each assign themselves to a specified CPU
|
|
and execute identical loops that consume some CPU time.
|
|
Before terminating, the parent waits for the child to complete.
|
|
The program takes three command-line arguments:
|
|
the CPU number for the parent,
|
|
the CPU number for the child,
|
|
and the number of loop iterations that both processes should perform.
|
|
|
|
As the sample runs below demonstrate, the amount of real and CPU time
|
|
consumed when running the program will depend on intra-core caching effects
|
|
and whether the processes are using the same CPU.
|
|
|
|
We first employ
|
|
.BR lscpu (1)
|
|
to determine that this (x86)
|
|
system has two cores, each with two CPUs:
|
|
|
|
.in +4n
|
|
.nf
|
|
$ \fBlscpu | grep -i 'core.*:|socket'\fP
|
|
Thread(s) per core: 2
|
|
Core(s) per socket: 2
|
|
Socket(s): 1
|
|
.fi
|
|
.in
|
|
|
|
We then time the operation of the example program for three cases:
|
|
both processes running on the same CPU;
|
|
both processes running on different CPUs on the same core;
|
|
and both processes running on different CPUs on different cores.
|
|
|
|
.in +4n
|
|
.nf
|
|
$ \fBtime \-p ./a.out 0 0 100000000\fP
|
|
real 14.75
|
|
user 3.02
|
|
sys 11.73
|
|
$ \fBtime \-p ./a.out 0 1 100000000\fP
|
|
real 11.52
|
|
user 3.98
|
|
sys 19.06
|
|
$ \fBtime \-p ./a.out 0 3 100000000\fP
|
|
real 7.89
|
|
user 3.29
|
|
sys 12.07
|
|
.fi
|
|
.in
|
|
.SS Program source
|
|
\&
|
|
.nf
|
|
#define _GNU_SOURCE
|
|
#include <sched.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <sys/wait.h>
|
|
|
|
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
|
|
} while (0)
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
cpu_set_t set;
|
|
int parentCPU, childCPU;
|
|
int nloops, j;
|
|
|
|
if (argc != 4) {
|
|
fprintf(stderr, "Usage: %s parent\-cpu child\-cpu num\-loops\\n",
|
|
argv[0]);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
parentCPU = atoi(argv[1]);
|
|
childCPU = atoi(argv[2]);
|
|
nloops = atoi(argv[3]);
|
|
|
|
CPU_ZERO(&set);
|
|
|
|
switch (fork()) {
|
|
case \-1: /* Error */
|
|
errExit("fork");
|
|
|
|
case 0: /* Child */
|
|
CPU_SET(childCPU, &set);
|
|
|
|
if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
|
|
errExit("sched_setaffinity");
|
|
|
|
for (j = 0; j < nloops; j++)
|
|
getppid();
|
|
|
|
exit(EXIT_SUCCESS);
|
|
|
|
default: /* Parent */
|
|
CPU_SET(parentCPU, &set);
|
|
|
|
if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
|
|
errExit("sched_setaffinity");
|
|
|
|
for (j = 0; j < nloops; j++)
|
|
getppid();
|
|
|
|
wait(NULL); /* Wait for child to terminate */
|
|
exit(EXIT_SUCCESS);
|
|
}
|
|
}
|
|
.fi
|
|
.SH SEE ALSO
|
|
.ad l
|
|
.nh
|
|
.BR lscpu (1),
|
|
.BR nproc (1),
|
|
.BR taskset (1),
|
|
.BR clone (2),
|
|
.BR getcpu (2),
|
|
.BR getpriority (2),
|
|
.BR gettid (2),
|
|
.BR nice (2),
|
|
.BR sched_get_priority_max (2),
|
|
.BR sched_get_priority_min (2),
|
|
.BR sched_getscheduler (2),
|
|
.BR sched_setscheduler (2),
|
|
.BR setpriority (2),
|
|
.BR CPU_SET (3),
|
|
.BR get_nprocs (3),
|
|
.BR pthread_setaffinity_np (3),
|
|
.BR sched_getcpu (3),
|
|
.BR capabilities (7),
|
|
.BR cpuset (7),
|
|
.BR sched (7)
|