From ce88f77b91f0b6cf3694c44544405d08fcb96a11 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Thu, 17 Apr 2014 08:44:50 +0200 Subject: [PATCH] perf_event_open.2: Minor grammar, formatting, wording, and typo fixes Signed-off-by: Michael Kerrisk --- man2/perf_event_open.2 | 159 ++++++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 73 deletions(-) diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2 index 00842df3f..72e265f8a 100644 --- a/man2/perf_event_open.2 +++ b/man2/perf_event_open.2 @@ -97,13 +97,13 @@ when running on the specified CPU. .TP .BR "pid == \-1" " and " "cpu >= 0" This measures all processes/threads on the specified CPU. -Measurements such as this require the +This requires .B CAP_SYS_ADMIN capability or a .I /proc/sys/kernel/perf_event_paranoid value of less than 1. .TP -.BR pid==\-1 " and " cpu==\-1 +.BR "pid == \-1" " and " "cpu == \-1" This setting is invalid and will return an error. .P The @@ -124,7 +124,7 @@ An event group is scheduled onto the CPU as a unit: it will be put onto the CPU only if all of the events in the group can be put onto the CPU. This means that the values of the member events can be -meaningfully compared, added, divided (to get ratios), and so on, with each +meaningfully compared\(emadded, divided (to get ratios), and so on\(emwith each other, since they have counted events for the same set of executed instructions. .P @@ -161,7 +161,7 @@ This flag reroutes the output from an event to the group leader. .BR PERF_FLAG_PID_CGROUP " (since Linux 2.6.39)." This flag activates per-container system-wide monitoring. A container -is an abstraction that isolates a set of resources for finer grain +is an abstraction that isolates a set of resources for finer-grained control (CPUs, memory, etc.). In this mode, the event is measured only if the thread running on the monitored CPU belongs to the designated @@ -189,43 +189,43 @@ for the event being created. .in +4n .nf struct perf_event_attr { - __u32 type; /* Type of event */ - __u32 size; /* Size of attribute structure */ - __u64 config; /* Type-specific configuration */ + __u32 type; /* Type of event */ + __u32 size; /* Size of attribute structure */ + __u64 config; /* Type-specific configuration */ union { __u64 sample_period; /* Period of sampling */ __u64 sample_freq; /* Frequency of sampling */ }; - __u64 sample_type; /* Specifies values included in sample */ - __u64 read_format; /* Specifies values returned in read */ + __u64 sample_type; /* Specifies values included in sample */ + __u64 read_format; /* Specifies values returned in read */ - __u64 disabled : 1, /* off by default */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* don't count kernel */ - exclude_hv : 1, /* don't count hypervisor */ - exclude_idle : 1, /* don't count when idle */ - mmap : 1, /* include mmap data */ - comm : 1, /* include comm data */ - freq : 1, /* use freq, not period */ - inherit_stat : 1, /* per task counts */ - enable_on_exec : 1, /* next exec enables */ - task : 1, /* trace fork/exit */ - watermark : 1, /* wakeup_watermark */ - precise_ip : 2, /* skid constraint */ - mmap_data : 1, /* non-exec mmap data */ - sample_id_all : 1, /* sample_type all events */ - exclude_host : 1, /* don't count in host */ - exclude_guest : 1, /* don't count in guest */ - exclude_callchain_kernel : 1, - /* exclude kernel callchains */ - exclude_callchain_user : 1, - /* exclude user callchains */ - __reserved_1 : 41; + __u64 disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* don't count kernel */ + exclude_hv : 1, /* don't count hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + precise_ip : 2, /* skid constraint */ + mmap_data : 1, /* non-exec mmap data */ + sample_id_all : 1, /* sample_type all events */ + exclude_host : 1, /* don't count in host */ + exclude_guest : 1, /* don't count in guest */ + exclude_callchain_kernel : 1, + /* exclude kernel callchains */ + exclude_callchain_user : 1, + /* exclude user callchains */ + __reserved_1 : 41; union { __u32 wakeup_events; /* wakeup every n events */ @@ -243,11 +243,11 @@ struct perf_event_attr { __u64 bp_len; /* breakpoint length */ __u64 config2; /* extension of config1 */ }; - __u64 branch_sample_type; /* enum perf_branch_sample_type */ - __u64 sample_regs_user; /* user regs to dump on samples */ - __u32 sample_stack_user; /* size of stack to dump on + __u64 branch_sample_type; /* enum perf_branch_sample_type */ + __u64 sample_regs_user; /* user regs to dump on samples */ + __u32 sample_stack_user; /* size of stack to dump on samples */ - __u32 __reserved_2; /* Align to u64 */ + __u32 __reserved_2; /* Align to u64 */ }; .fi @@ -1234,35 +1234,35 @@ The structure of the first metadata mmap page is as follows: .in +4n .nf struct perf_event_mmap_page { - __u32 version; /* version number of this structure */ - __u32 compat_version; /* lowest version this is compat with */ - __u32 lock; /* seqlock for synchronization */ - __u32 index; /* hardware counter identifier */ - __s64 offset; /* add to hardware counter value */ - __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on CPU */ + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware counter identifier */ + __s64 offset; /* add to hardware counter value */ + __u64 time_enabled; /* time event active */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { - __u64 cap_usr_time / cap_usr_rdpmc / cap_bit0 : 1, - cap_bit0_is_deprecated : 1, - cap_user_rdpmc : 1, - cap_user_time : 1, - cap_user_time_zero : 1, + __u64 cap_usr_time / cap_usr_rdpmc / cap_bit0 : 1, + cap_bit0_is_deprecated : 1, + cap_user_rdpmc : 1, + cap_user_time : 1, + cap_user_time_zero : 1, }; }; - __u16 pmc_width; - __u16 time_shift; - __u32 time_mult; - __u64 time_offset; - __u64 __reserved[120]; /* Pad to 1k */ - __u64 data_head; /* head in the data section */ - __u64 data_tail; /* user-space written tail */ + __u16 pmc_width; + __u16 time_shift; + __u32 time_mult; + __u64 time_offset; + __u64 __reserved[120]; /* Pad to 1k */ + __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ } .fi .in -The following looks at the fields in the +The following list describes the fields in the .I perf_event_mmap_page structure in more detail: .TP @@ -1432,13 +1432,16 @@ If is set, then the hardware clock (the TSC timestamp counter on x86) can be calculated from the .IR time_zero ", " time_mult ", and " time_shift " values:" + .nf time = timestamp - time_zero; quot = time / time_mult; rem = time % time_mult; cyc = (quot << time_shift) + (rem << time_shift) / time_mult; .fi + And vice versa: + .nf quot = cyc >> time_shift; rem = cyc & ((1 << time_shift) - 1); @@ -1452,7 +1455,9 @@ The value continuously increases, it does not wrap. The value needs to be manually wrapped by the size of the mmap buffer before accessing the samples. -On SMP-capable platforms, after reading the data_head value, +On SMP-capable platforms, after reading the +.I data_head +value, user space should issue an rmb(). .TP .I data_tail @@ -2146,10 +2151,10 @@ to calculate event values can be found in that section. .PP Various ioctls act on .BR perf_event_open () -file descriptors +file descriptors: .TP .B PERF_EVENT_IOC_ENABLE -Enables the individual event or event group specified by the +This enables the individual event or event group specified by the file descriptor argument. If the @@ -2159,7 +2164,7 @@ enabled, even if the event specified is not the group leader (but see BUGS). .TP .B PERF_EVENT_IOC_DISABLE -Disables the individual counter or event group specified by the +This disables the individual counter or event group specified by the file descriptor argument. Enabling or disabling the leader of a group enables or disables the @@ -2234,7 +2239,7 @@ This adds an ftrace filter to this event. The argument is a pointer to the desired ftrace filter. .TP .BR PERF_EVENT_IOC_ID " (since Linux 3.12)" -Returns the event ID value for the given event fd. +This returns the event ID value for the given event fd. The argument is a pointer to a 64-bit unsigned integer to hold the result. @@ -2288,14 +2293,15 @@ The default value is .TP .I /proc/sys/kernel/perf_event_mlock_kb -Maximum number of pages an unprivileged user can mlock (2) . +Maximum number of pages an unprivileged user can +.BR mlock (2). The default is 516 (kB). .RE Files in .I /sys/bus/event_source/devices/ .RS 4 -Since Linux 2.6.34 the kernel supports having multiple PMUs +Since Linux 2.6.34, the kernel supports having multiple PMUs available for monitoring. Information on how to program these PMUs can be found under .IR /sys/bus/event_source/devices/ . @@ -2304,7 +2310,9 @@ Each subdirectory corresponds to a different PMU. .IR /sys/bus/event_source/devices/*/type " (since Linux 2.6.38)" This contains an integer that can be used in the .I type -field of perf_event_attr to indicate you wish to use this PMU. +field of +.I perf_event_attr +to indicate that you wish to use this PMU. .TP .IR /sys/bus/event_source/devices/*/rdpmc " (since Linux 3.4)" If this file is 1, then direct user-space access to the @@ -2315,7 +2323,9 @@ This can be disabled by echoing 0 to the file. This subdirectory contains information on the architecture-specific subfields available for programming the various .I config -fields in the perf_event_attr struct. +fields in the +.I perf_event_attr +struct. The content of each file is the name of the config field, followed by a colon, followed by a series of integer bit ranges separated by @@ -2325,7 +2335,8 @@ For example, the file may contain the value .I config1:1,6-10,44 which indicates that event is an attribute that occupies bits 1,6-10, and 44 -of perf_event_attr::config1. +of +.IR perf_event_attr::config1 . .TP .IR /sys/bus/event_source/devices/*/events/ " (since Linux 3.4)" This subdirectory contains files with predefined events. @@ -2370,7 +2381,8 @@ can be inconsistent, and may vary across processor architectures and performance monitoring units. .TP .B E2BIG -Returned if the perf_event_attr +Returned if the +.I perf_event_attr .I size value is too small (smaller than @@ -2379,7 +2391,8 @@ too big (larger than the page size), or larger than the kernel supports and the extra bytes are not zero. When .B E2BIG -is returned, the perf_event_attr +is returned, the +.I perf_event_attr .I size field is overwritten by the kernel to be the size of the structure it was expecting. @@ -2522,7 +2535,7 @@ option to is needed to properly get overflow signals in threads. This was introduced in Linux 2.6.32. -Prior to Linux 2.6.33 (at least for x86) the kernel did not check +Prior to Linux 2.6.33 (at least for x86), the kernel did not check if events could be scheduled together until read time. The same happens on all known kernels if the NMI watchdog is enabled. This means to see if a given set of events works you have to @@ -2530,11 +2543,11 @@ This means to see if a given set of events works you have to start, then read before you know for sure you can get valid measurements. -Prior to Linux 2.6.34 event constraints were not enforced by the kernel. +Prior to Linux 2.6.34, event constraints were not enforced by the kernel. In that case, some events would silently return "0" if the kernel scheduled them in an improper counter slot. -Prior to Linux 2.6.34 there was a bug when multiplexing where the +Prior to Linux 2.6.34, there was a bug when multiplexing where the wrong results could be returned. Kernels from Linux 2.6.35 to Linux 2.6.39 can quickly crash the kernel if