mirror of https://github.com/mkerrisk/man-pages
perf_event_open.2: Minor fixes
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
This commit is contained in:
parent
f2b1d7209e
commit
7db515ef59
|
@ -203,33 +203,35 @@ struct perf_event_attr {
|
|||
__u64 sample_type; /* Specifies values included in sample */
|
||||
__u64 read_format; /* Specifies values returned in read */
|
||||
|
||||
__u64 disabled : 1, /* off by default */
|
||||
inherit : 1, /* children inherit it */
|
||||
pinned : 1, /* must always be on PMU */
|
||||
exclusive : 1, /* only group on PMU */
|
||||
exclude_user : 1, /* don't count user */
|
||||
exclude_kernel : 1, /* don't count kernel */
|
||||
__u64 disabled : 1, /* off by default */
|
||||
inherit : 1, /* children inherit it */
|
||||
pinned : 1, /* must always be on PMU */
|
||||
exclusive : 1, /* only group on PMU */
|
||||
exclude_user : 1, /* don't count user */
|
||||
exclude_kernel : 1, /* don't count kernel */
|
||||
exclude_hv : 1, /* don't count hypervisor */
|
||||
exclude_idle : 1, /* don't count when idle */
|
||||
mmap : 1, /* include mmap data */
|
||||
comm : 1, /* include comm data */
|
||||
freq : 1, /* use freq, not period */
|
||||
inherit_stat : 1, /* per task counts */
|
||||
enable_on_exec : 1, /* next exec enables */
|
||||
task : 1, /* trace fork/exit */
|
||||
watermark : 1, /* wakeup_watermark */
|
||||
precise_ip : 2, /* skid constraint */
|
||||
mmap_data : 1, /* non-exec mmap data */
|
||||
exclude_idle : 1, /* don't count when idle */
|
||||
mmap : 1, /* include mmap data */
|
||||
comm : 1, /* include comm data */
|
||||
freq : 1, /* use freq, not period */
|
||||
inherit_stat : 1, /* per task counts */
|
||||
enable_on_exec : 1, /* next exec enables */
|
||||
task : 1, /* trace fork/exit */
|
||||
watermark : 1, /* wakeup_watermark */
|
||||
precise_ip : 2, /* skid constraint */
|
||||
mmap_data : 1, /* non-exec mmap data */
|
||||
sample_id_all : 1, /* sample_type all events */
|
||||
exclude_host : 1, /* don't count in host */
|
||||
exclude_guest : 1, /* don't count in guest */
|
||||
exclude_callchain_kernel : 1, /* exclude kernel callchains */
|
||||
exclude_callchain_user : 1, /* exclude user callchains */
|
||||
exclude_host : 1, /* don't count in host */
|
||||
exclude_guest : 1, /* don't count in guest */
|
||||
exclude_callchain_kernel : 1,
|
||||
/* exclude kernel callchains */
|
||||
exclude_callchain_user : 1,
|
||||
/* exclude user callchains */
|
||||
__reserved_1 : 41;
|
||||
|
||||
union {
|
||||
__u32 wakeup_events; /* wakeup every n events */
|
||||
__u32 wakeup_watermark; /* bytes before wakeup */
|
||||
__u32 wakeup_watermark; /* bytes before wakeup */
|
||||
};
|
||||
|
||||
__u32 bp_type; /* breakpoint type */
|
||||
|
@ -243,10 +245,11 @@ struct perf_event_attr {
|
|||
__u64 bp_len; /* breakpoint length */
|
||||
__u64 config2; /* extension of config1 */
|
||||
};
|
||||
__u64 branch_sample_type; /* enum perf_branch_sample_type */
|
||||
__u64 sample_regs_user; /* user regs to dump on samples */
|
||||
__u32 sample_stack_user; /* size of stack to dump on samples */
|
||||
__u32 __reserved_2; /* Align to u64. */
|
||||
__u64 branch_sample_type; /* enum perf_branch_sample_type */
|
||||
__u64 sample_regs_user; /* user regs to dump on samples */
|
||||
__u32 sample_stack_user; /* size of stack to dump on
|
||||
samples */
|
||||
__u32 __reserved_2; /* Align to u64 */
|
||||
|
||||
};
|
||||
.fi
|
||||
|
@ -294,7 +297,7 @@ execution of an instruction address.
|
|||
.TP
|
||||
.RB "dynamic PMU"
|
||||
Since Linux 2.6.39,
|
||||
.BR perf_event_open()
|
||||
.BR perf_event_open ()
|
||||
can support multiple PMUs.
|
||||
To enable this, a value exported by the kernel can be used in the
|
||||
.I type
|
||||
|
@ -462,7 +465,7 @@ This only happens on some architectures (never on x86).
|
|||
.BR PERF_COUNT_SW_EMULATION_FAULTS " (Since Linux 2.6.33)"
|
||||
This counts the number of emulation faults.
|
||||
The kernel sometimes traps on unimplemented instructions
|
||||
and emulates them for userspace.
|
||||
and emulates them for user space.
|
||||
This can negatively impact performance.
|
||||
.RE
|
||||
.RE
|
||||
|
@ -501,7 +504,7 @@ value use the following equation:
|
|||
where
|
||||
.I perf_hw_cache_id
|
||||
is one of:
|
||||
.RS
|
||||
.RS 4
|
||||
.TP
|
||||
.B PERF_COUNT_HW_CACHE_L1D
|
||||
for measuring Level 1 Data Cache
|
||||
|
@ -529,7 +532,7 @@ for measuring local memory accesses
|
|||
and
|
||||
.I perf_hw_cache_op_id
|
||||
is one of
|
||||
.RS
|
||||
.RS 4
|
||||
.TP
|
||||
.B PERF_COUNT_HW_CACHE_OP_READ
|
||||
for read accesses
|
||||
|
@ -545,7 +548,7 @@ for prefetch accesses
|
|||
and
|
||||
.I perf_hw_cache_op_result_id
|
||||
is one of
|
||||
.RS
|
||||
.RS 4
|
||||
.TP
|
||||
.B PERF_COUNT_HW_CACHE_RESULT_ACCESS
|
||||
to measure accesses
|
||||
|
@ -622,7 +625,7 @@ order.
|
|||
Records instruction pointer.
|
||||
.TP
|
||||
.B PERF_SAMPLE_TID
|
||||
Records the process and thread ids.
|
||||
Records the process and thread IDs.
|
||||
.TP
|
||||
.B PERF_SAMPLE_TIME
|
||||
Records a timestamp.
|
||||
|
@ -657,7 +660,8 @@ Records additional data, if applicable.
|
|||
Usually returned by tracepoint events.
|
||||
.TP
|
||||
.BR PERF_SAMPLE_BRANCH_STACK " (Since Linux 3.4)"
|
||||
Records the branch stack. See branch_sample_type.
|
||||
Records the branch stack.
|
||||
See branch_sample_type.
|
||||
.TP
|
||||
.BR PERF_SAMPLE_REGS_USER " (Since Linux 3.7)"
|
||||
Records the current register state.
|
||||
|
@ -671,7 +675,7 @@ Records the current register state.
|
|||
This field specifies the format of the data returned by
|
||||
.BR read (2)
|
||||
on a
|
||||
.BR perf_event_open()
|
||||
.BR perf_event_open ()
|
||||
file descriptor.
|
||||
.RS
|
||||
.TP
|
||||
|
@ -915,7 +919,7 @@ count when we read or write the memory location
|
|||
count when we execute code at the memory location
|
||||
|
||||
.LP
|
||||
The values can be combined via a bitwsie or, but the
|
||||
The values can be combined via a bitwise or, but the
|
||||
combination of
|
||||
.B HW_BREAKPOINT_R
|
||||
or
|
||||
|
@ -998,7 +1002,9 @@ User, kernel, and hv
|
|||
.TP
|
||||
.IR "sample_regs_user" " (Since Linux 3.7)"
|
||||
This defines the set of user registers to dump on samples.
|
||||
See asm/perf_regs.h.
|
||||
See
|
||||
.\" FIXME: The following refernce seems to be not quite right:
|
||||
.IR asm/perf_regs.h .
|
||||
|
||||
.TP
|
||||
.IR "sample_stack_user" " (Since Linux 3.7)"
|
||||
|
@ -1008,12 +1014,14 @@ This defines the size of the user stack to dump on sample.
|
|||
|
||||
.SS "Reading Results"
|
||||
Once a
|
||||
.BR perf_event_open()
|
||||
.BR perf_event_open ()
|
||||
file descriptor has been opened, the values
|
||||
of the events can be read from the file descriptor.
|
||||
The values that are there are specified by the
|
||||
.I read_format
|
||||
field in the attr structure at open time.
|
||||
field in the
|
||||
.I attr
|
||||
structure at open time.
|
||||
|
||||
If you attempt to read into a buffer that is not big enough to hold the
|
||||
data
|
||||
|
@ -1097,7 +1105,7 @@ was specified in read_format.
|
|||
.SS "MMAP Layout"
|
||||
|
||||
When using
|
||||
.BR perf_event_open()
|
||||
.BR perf_event_open ()
|
||||
in sampled mode, asynchronous events
|
||||
(like counter overflow or
|
||||
.B PROT_EXEC
|
||||
|
@ -1120,13 +1128,13 @@ The structure of the first metadata mmap page is as follows:
|
|||
.in +4n
|
||||
.nf
|
||||
struct perf_event_mmap_page {
|
||||
__u32 version; /* version number of this structure */
|
||||
__u32 version; /* version number of this structure */
|
||||
__u32 compat_version; /* lowest version this is compat with */
|
||||
__u32 lock; /* seqlock for synchronization */
|
||||
__u32 index; /* hardware counter identifier */
|
||||
__s64 offset; /* add to hardware counter value */
|
||||
__u64 time_enabled; /* time event active */
|
||||
__u64 time_running; /* time event on CPU */
|
||||
__u32 lock; /* seqlock for synchronization */
|
||||
__u32 index; /* hardware counter identifier */
|
||||
__s64 offset; /* add to hardware counter value */
|
||||
__u64 time_enabled; /* time event active */
|
||||
__u64 time_running; /* time event on CPU */
|
||||
union {
|
||||
__u64 capabilities;
|
||||
__u64 cap_usr_time : 1,
|
||||
|
@ -1136,9 +1144,9 @@ struct perf_event_mmap_page {
|
|||
__u16 time_shift;
|
||||
__u32 time_mult;
|
||||
__u64 time_offset;
|
||||
__u64 __reserved[120]; /* Pad to 1k */
|
||||
__u64 __reserved[120]; /* Pad to 1k */
|
||||
__u64 data_head; /* head in the data section */
|
||||
__u64 data_tail; /* user-space written tail */
|
||||
__u64 data_tail; /* user-space written tail */
|
||||
}
|
||||
.fi
|
||||
.in
|
||||
|
@ -1149,7 +1157,7 @@ The following looks at the fields in the
|
|||
.I perf_event_mmap_page
|
||||
structure in more detail.
|
||||
|
||||
.RS
|
||||
.RS 4
|
||||
|
||||
.TP
|
||||
.I version
|
||||
|
@ -1248,7 +1256,7 @@ count += pmc;
|
|||
If
|
||||
.IR cap_usr_time ,
|
||||
these fields can be used to compute the time
|
||||
delta since time_enabled (in ns) using rdtsc or similar.
|
||||
delta since time_enabled (in nanoseconds) using rdtsc or similar.
|
||||
.nf
|
||||
|
||||
u64 quot, rem;
|
||||
|
@ -1259,7 +1267,13 @@ delta since time_enabled (in ns) using rdtsc or similar.
|
|||
((rem * time_mult) >> time_shift);
|
||||
.fi
|
||||
|
||||
Where time_offset,time_mult,time_shift and cyc are read in the
|
||||
Where
|
||||
.IR time_offset ,
|
||||
.IR time_mult ,
|
||||
.IR time_shift ,
|
||||
and
|
||||
.IR cyc
|
||||
are read in the
|
||||
seqcount loop described above.
|
||||
This delta can then be added to
|
||||
enabled and possible running (if idx), improving the scaling:
|
||||
|
@ -1276,8 +1290,8 @@ enabled and possible running (if idx), improving the scaling:
|
|||
.TP
|
||||
.I data_head
|
||||
This points to the head of the data section.
|
||||
The value continuously increases, it does not wrap. The value
|
||||
needs to be manually wrapped by the size of the mmap buffer
|
||||
The value continuously increases, it does not wrap.
|
||||
The value needs to be manually wrapped by the size of the mmap buffer
|
||||
before accessing the samples.
|
||||
|
||||
On SMP-capable platforms, after reading the data_head value,
|
||||
|
@ -1287,8 +1301,9 @@ user-space should issue an rmb().
|
|||
.I data_tail;
|
||||
When the mapping is
|
||||
.BR PROT_WRITE ,
|
||||
the data_tail value should be written by
|
||||
userspace to reflect the last read data.
|
||||
the
|
||||
.I data_tail
|
||||
value should be written by user space to reflect the last read data.
|
||||
In this case the kernel will not over-write unread data.
|
||||
|
||||
.RE
|
||||
|
@ -1303,7 +1318,8 @@ have the sample_type selected fields related to where/when (identity)
|
|||
an event took place (TID, TIME, ID, CPU, STREAM_ID) described in
|
||||
.B PERF_RECORD_SAMPLE
|
||||
below, it will be stashed just after the
|
||||
perf_event_header and the fields already present for the existing
|
||||
.I perf_event_header
|
||||
and the fields already present for the existing
|
||||
fields, i.e., at the end of the payload.
|
||||
That way a newer perf.data
|
||||
file will be supported by older perf tools, with these new optional
|
||||
|
@ -1336,12 +1352,12 @@ depend on the
|
|||
selected as shown.
|
||||
|
||||
.RS
|
||||
.TP
|
||||
.TP 4
|
||||
.B PERF_RECORD_MMAP
|
||||
The MMAP events record the
|
||||
.B PROT_EXEC
|
||||
mappings so that we can correlate
|
||||
userspace IPs to code.
|
||||
user space IPs to code.
|
||||
They have the following structure:
|
||||
|
||||
.in +4n
|
||||
|
@ -1461,96 +1477,124 @@ This record indicates a sample.
|
|||
.nf
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
u64 ip; /* if PERF_SAMPLE_IP */
|
||||
u32 pid, tid; /* if PERF_SAMPLE_TID */
|
||||
u64 time; /* if PERF_SAMPLE_TIME */
|
||||
u64 addr; /* if PERF_SAMPLE_ADDR */
|
||||
u64 id; /* if PERF_SAMPLE_ID */
|
||||
u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */
|
||||
u32 cpu, res; /* if PERF_SAMPLE_CPU */
|
||||
u64 period; /* if PERF_SAMPLE_PERIOD */
|
||||
u64 ip; /* if PERF_SAMPLE_IP */
|
||||
u32 pid, tid; /* if PERF_SAMPLE_TID */
|
||||
u64 time; /* if PERF_SAMPLE_TIME */
|
||||
u64 addr; /* if PERF_SAMPLE_ADDR */
|
||||
u64 id; /* if PERF_SAMPLE_ID */
|
||||
u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */
|
||||
u32 cpu, res; /* if PERF_SAMPLE_CPU */
|
||||
u64 period; /* if PERF_SAMPLE_PERIOD */
|
||||
struct read_format v; /* if PERF_SAMPLE_READ */
|
||||
u64 nr; /* if PERF_SAMPLE_CALLCHAIN */
|
||||
u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */
|
||||
u32 size; /* if PERF_SAMPLE_RAW */
|
||||
char data[size]; /* if PERF_SAMPLE_RAW */
|
||||
u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */
|
||||
struct perf_branch_entry
|
||||
lbr[bnr]; /* if PERF_SAMPLE_BRANCH_STACK */
|
||||
u64 abi; /* if PERF_SAMPLE_REGS_USER */
|
||||
u64 regs[weight(mask)]; /* if PERF_SAMPLE_REGS_USER */
|
||||
u64 size; /* if PERF_SAMPLE_STACK_USER */
|
||||
char data[size]; /* if PERF_SAMPLE_STACK_USER */
|
||||
u64 dyn_size; /* if PERF_SAMPLE_STACK_USER */
|
||||
u64 nr; /* if PERF_SAMPLE_CALLCHAIN */
|
||||
u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */
|
||||
u32 size; /* if PERF_SAMPLE_RAW */
|
||||
char data[size]; /* if PERF_SAMPLE_RAW */
|
||||
u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */
|
||||
struct perf_branch_entry lbr[bnr];
|
||||
/* if PERF_SAMPLE_BRANCH_STACK */
|
||||
u64 abi; /* if PERF_SAMPLE_REGS_USER */
|
||||
u64 regs[weight(mask)];
|
||||
/* if PERF_SAMPLE_REGS_USER */
|
||||
u64 size; /* if PERF_SAMPLE_STACK_USER */
|
||||
char data[size]; /* if PERF_SAMPLE_STACK_USER */
|
||||
u64 dyn_size; /* if PERF_SAMPLE_STACK_USER */
|
||||
};
|
||||
.fi
|
||||
|
||||
.RS
|
||||
.TP
|
||||
.I ip
|
||||
If PERF_SAMPLE_IP is enabled then a 64-bit instruction
|
||||
If
|
||||
.B PERF_SAMPLE_IP
|
||||
is enabled, then a 64-bit instruction
|
||||
pointer value is included.
|
||||
|
||||
.TP
|
||||
.IR pid , tid
|
||||
If PERF_SAMPLE_TID is enabled then a 32-bit process id
|
||||
and 32-bit thread id are included.
|
||||
.IR pid ", " tid
|
||||
If
|
||||
.B PERF_SAMPLE_TID
|
||||
is enabled, then a 32-bit process ID
|
||||
and 32-bit thread ID are included.
|
||||
|
||||
.TP
|
||||
.I time
|
||||
If PERF_SAMPLE_TIME is enabled then a 64-bit timestamp
|
||||
If
|
||||
.B PERF_SAMPLE_TIME
|
||||
is enabled, then a 64-bit timestamp
|
||||
is included.
|
||||
This is obtained via local_clock() which is a hardware timestamp
|
||||
if available and the jiffies value if not.
|
||||
|
||||
.TP
|
||||
.I addr
|
||||
If PERF_SAMPLE_ADDR is enabled than a 64-bit address is included.
|
||||
If
|
||||
.B PERF_SAMPLE_ADDR
|
||||
is enabled, then a 64-bit address is included.
|
||||
This is usually the address of a tracepoint,
|
||||
breakpoint, or software event; otherwise the value is 0.
|
||||
|
||||
.TP
|
||||
.I id
|
||||
If PERF_SAMPLE_ID is enabled a 64-bit unique ID is included.
|
||||
If
|
||||
.B PERF_SAMPLE_ID
|
||||
is enabled, a 64-bit unique ID is included.
|
||||
If the event is a member of an event group, the group leader ID is returned.
|
||||
This ID is the same as the one returned by PERF_FORMAT_ID.
|
||||
This ID is the same as the one returned by
|
||||
.BR PERF_FORMAT_ID .
|
||||
|
||||
.TP
|
||||
.I stream_id
|
||||
If PERF_SAMPLE_STREAM_ID is enabled a 64-bit unique ID is included.
|
||||
If
|
||||
.B PERF_SAMPLE_STREAM_ID
|
||||
is enabled, a 64-bit unique ID is included.
|
||||
Unlike
|
||||
.B PERF_SAMPLE_ID
|
||||
the actual ID is returned, not the group leader.
|
||||
This ID is the same as the one returned by PERF_FORMAT_ID.
|
||||
This ID is the same as the one returned by
|
||||
.BR PERF_FORMAT_ID .
|
||||
|
||||
.TP
|
||||
.IR cpu , res
|
||||
If PERF_SAMPLE_CPU is enabled this is a 32-bit value indicating
|
||||
.IR cpu ", " res
|
||||
If
|
||||
.B PERF_SAMPLE_CPU
|
||||
is enabled, this is a 32-bit value indicating
|
||||
which CPU was being used, in addition to a reserved (unused)
|
||||
32-bit value.
|
||||
|
||||
.TP
|
||||
.I period
|
||||
If PERF_SAMPLE_PERIOD is enabled a 64-bit value indicating
|
||||
If
|
||||
.B PERF_SAMPLE_PERIOD
|
||||
is enabled, a 64-bit value indicating
|
||||
the current sampling period is written.
|
||||
|
||||
.TP
|
||||
.I v
|
||||
If PERF_SAMPLE_READ is enabled a structure of type read_format
|
||||
If
|
||||
.B PERF_SAMPLE_READ
|
||||
is enabled, a structure of type read_format
|
||||
is included which has values for all events in the event group.
|
||||
The values included depend on the
|
||||
.I read_format
|
||||
value used at perf_event_open() time.
|
||||
value used at
|
||||
.BR perf_event_open ()
|
||||
time.
|
||||
|
||||
.TP
|
||||
.IR nr , ips[nr]
|
||||
If PERF_SAMPLE_CALLCHAIN is enabled then a 64-bit number is included
|
||||
.IR nr ", " ips[nr]
|
||||
If
|
||||
.B PERF_SAMPLE_CALLCHAIN
|
||||
is enabled, then a 64-bit number is included
|
||||
which indicates how many following 64-bit instruction pointers will
|
||||
follow. This is the current callchain.
|
||||
follow.
|
||||
This is the current callchain.
|
||||
|
||||
.TP
|
||||
.IR size , data
|
||||
If PERF_SAMPLE_RAW is enabled then a 32-bit value indicating size
|
||||
.IR size ", " data
|
||||
If
|
||||
.B PERF_SAMPLE_RAW
|
||||
is enabled, then a 32-bit value indicating size
|
||||
is included followed by an array of 8-bit values of length size.
|
||||
The values are padded with 0 to have 64-bit alignment.
|
||||
|
||||
|
@ -1560,26 +1604,35 @@ of its content, it may vary depending
|
|||
on event, hardware, and kernel version.
|
||||
|
||||
.TP
|
||||
.IR bnr , lbr[bnr]
|
||||
If PERF_SAMPLE_BRANCH_STACK is enabled then a 64-bit value indicating
|
||||
the number of records is included, followed by bnr perf_branch_entry
|
||||
structures. These structures have from, to, and flags values indicating
|
||||
.IR bnr ", " lbr[bnr]
|
||||
If
|
||||
.B PERF_SAMPLE_BRANCH_STACK
|
||||
is enabled, then a 64-bit value indicating
|
||||
the number of records is included, followed by
|
||||
.I bnr
|
||||
.I perf_branch_entry
|
||||
structures.
|
||||
These structures have from, to, and flags values indicating
|
||||
the from and to addresses from the branches on the callstack.
|
||||
|
||||
.TP
|
||||
.IR abi , regs[weight(mask)]
|
||||
If PERF_SAMPLE_REGS_USER is enabled then
|
||||
.IR abi ", " regs[weight(mask)]
|
||||
If
|
||||
.B PERF_SAMPLE_REGS_USER
|
||||
is enabled, then
|
||||
[to be documented].
|
||||
|
||||
The
|
||||
.I abi
|
||||
field is one of
|
||||
.BR PERF_SAMPLE_REGS_ABI_NONE ", " PERF_SAMPLE_REGS_ABI_32 " or "
|
||||
.BR PERF_SAMPLE_REGS_ABI_64 ". "
|
||||
.BR PERF_SAMPLE_REGS_ABI_64 .
|
||||
|
||||
.TP
|
||||
.IR size , data[size] , dyn_size
|
||||
If PERF_SAMPLE_STACK_USER is enabled then
|
||||
.IR size ", " data[size] ", " dyn_size
|
||||
If
|
||||
.B PERF_SAMPLE_STACK_USER
|
||||
is enabled, then
|
||||
[to be documented].
|
||||
|
||||
.RE
|
||||
|
@ -1618,7 +1671,7 @@ Sample happened in the guest kernel.
|
|||
Sample happened in guest user code.
|
||||
.RE
|
||||
|
||||
In addition one of the following bits can be set:
|
||||
In addition, one of the following bits can be set:
|
||||
.RS
|
||||
.TP
|
||||
.B PERF_RECORD_MISC_EXACT_IP
|
||||
|
@ -1664,18 +1717,24 @@ or
|
|||
.I wakeup_watermark
|
||||
value that will generate a signal if a certain number of samples
|
||||
or bytes have been written to the mmap ring buffer.
|
||||
In this case a signal of type POLL_IN is sent.
|
||||
In this case a signal of type
|
||||
.B POLL_IN
|
||||
is sent.
|
||||
|
||||
The other way is by use of the
|
||||
.I PERF_EVENT_IOC_REFRESH
|
||||
.B PERF_EVENT_IOC_REFRESH
|
||||
ioctl.
|
||||
This ioctl adds to a counter that decrements each time the event overflows.
|
||||
When non-zero, a POLL_IN signal is sent on overflow, but
|
||||
once the value reaches 0, a signal is sent of type POLL_HUP and
|
||||
When non-zero, a
|
||||
.B POLL_IN
|
||||
signal is sent on overflow, but
|
||||
once the value reaches 0, a signal is sent of type
|
||||
.B POLL_HUP
|
||||
and
|
||||
the underlying event is disabled.
|
||||
|
||||
Note: on newer kernels (definitely noticed with 3.2)
|
||||
.\" FIXME : Find out when this was introduced
|
||||
.\" FIXME(Vince) : Find out when this was introduced
|
||||
a signal is provided for every overflow, even if
|
||||
.I wakeup_events
|
||||
is not set.
|
||||
|
@ -1696,18 +1755,20 @@ to calculate event values can be found in that section.
|
|||
.SS "perf_event ioctl calls"
|
||||
.PP
|
||||
Various ioctls act on
|
||||
.BR perf_event_open()
|
||||
.BR perf_event_open ()
|
||||
file descriptors
|
||||
|
||||
.TP
|
||||
.B PERF_EVENT_IOC_ENABLE
|
||||
Enables the individual event or event group specified by the fd.
|
||||
Enables the individual event or event group specified by the
|
||||
file descriptor argument.
|
||||
|
||||
The ioctl argument is ignored.
|
||||
|
||||
.TP
|
||||
.B PERF_EVENT_IOC_DISABLE
|
||||
Disables the individual counter or event group specified by the fd.
|
||||
Disables the individual counter or event group specified by the
|
||||
file descriptor argument.
|
||||
|
||||
Enabling or disabling the leader of a group enables or disables the
|
||||
entire group; that is, while the group leader is disabled, none of the
|
||||
|
@ -1725,14 +1786,18 @@ to enable a counter for a number of overflows specified by the argument,
|
|||
after which it is disabled.
|
||||
Subsequent calls of this ioctl add the argument value to the current
|
||||
count.
|
||||
A signal with POLL_IN set will happen on each overflow until the
|
||||
count reaches 0; when that happens a signal with POLL_HUP set is
|
||||
sent and the event is disabled.
|
||||
A signal with
|
||||
.B POLL_IN
|
||||
set will happen on each overflow until the
|
||||
count reaches 0; when that happens a signal with
|
||||
POLL_HUP
|
||||
set is sent and the event is disabled.
|
||||
Using an argument of 0 is considered undefined behavior.
|
||||
|
||||
.TP
|
||||
.B PERF_EVENT_IOC_RESET
|
||||
Reset the event count specified by the fd to zero.
|
||||
Reset the event count specified by the
|
||||
file descriptor argumentto zero.
|
||||
This only resets the counts; there is no way to reset the
|
||||
multiplexing
|
||||
.I time_enabled
|
||||
|
@ -1783,18 +1848,18 @@ the group leaders, not any other members in the groups.
|
|||
|
||||
.SS perf_event related configuration files
|
||||
|
||||
Files in /proc/sys/kernel/
|
||||
Files in
|
||||
.I /proc/sys/kernel/
|
||||
|
||||
.RS
|
||||
.RS 4
|
||||
.TP
|
||||
.I
|
||||
/proc/sys/kernel/perf_event_paranoid
|
||||
.I /proc/sys/kernel/perf_event_paranoid
|
||||
|
||||
The
|
||||
.I perf_event_paranoid
|
||||
file can be set to restrict access to the performance counters.
|
||||
|
||||
2 - only allow userspace measurements
|
||||
2 - only allow user-space measurements
|
||||
|
||||
1 - (default) allow both kernel and user measurements
|
||||
|
||||
|
@ -1805,32 +1870,35 @@ file can be set to restrict access to the performance counters.
|
|||
The existence of the
|
||||
.I perf_event_paranoid
|
||||
file is the official method for determining if a kernel supports
|
||||
.BR perf_event_open().
|
||||
.BR perf_event_open ().
|
||||
|
||||
.TP
|
||||
.I /proc/sys/kernel/perf_event_max_sample_rate
|
||||
|
||||
This sets the maximum sample rate. Setting this too high can allow
|
||||
This sets the maximum sample rate.
|
||||
Setting this too high can allow
|
||||
users to sample at a rate that impacts overall machine performance
|
||||
and potentially lock up the machine. The default value is
|
||||
and potentially lock up the machine.
|
||||
The default value is
|
||||
100000 (samples per second).
|
||||
|
||||
.TP
|
||||
.I /proc/sys/kernel/perf_event_mlock_kb
|
||||
|
||||
Maximum number of pages an unprivledged user can mlock (2) .
|
||||
Maximum number of pages an unprivileged user can mlock (2) .
|
||||
The default is 516 (kB).
|
||||
.RE
|
||||
|
||||
Files in /sys/bus/event_source/devices/
|
||||
Files in
|
||||
.I /sys/bus/event_source/devices/
|
||||
|
||||
.RS 4
|
||||
Since Linux 2.6.34 the kernel supports having multiple PMUs
|
||||
available for monitoring.
|
||||
Information on how to program these PMUs can be found under
|
||||
.IR /sys/bus/event_source/devices/ .
|
||||
Each subdirectory corresponds to a different PMU.
|
||||
|
||||
.RS
|
||||
.TP
|
||||
.I /sys/bus/event_source/devices/*/type
|
||||
This contains an integer that can be used in the
|
||||
|
@ -1894,25 +1962,28 @@ It was renamed in Linux 2.6.32.
|
|||
|
||||
.SH CONFORMING TO
|
||||
|
||||
This call is specific to Linux
|
||||
This
|
||||
.BR perf_event_open ()
|
||||
system call Linux- specific
|
||||
and should not be used in programs intended to be portable.
|
||||
|
||||
.SH NOTES
|
||||
Glibc does not provide a wrapper for this system call; call it using
|
||||
.BR syscall (2).
|
||||
See the example below.
|
||||
|
||||
The official way of knowing if
|
||||
.BR perf_event_open()
|
||||
.BR perf_event_open ()
|
||||
support is enabled is checking
|
||||
for the existence of the file
|
||||
.I /proc/sys/kernel/perf_event_paranoid
|
||||
.IR /proc/sys/kernel/perf_event_paranoid .
|
||||
|
||||
.SH BUGS
|
||||
|
||||
The
|
||||
.B F_SETOWN_EX
|
||||
option to
|
||||
.IR fcntl (2)
|
||||
.BR fcntl (2)
|
||||
is needed to properly get overflow signals in threads.
|
||||
This was introduced in Linux 2.6.32.
|
||||
|
||||
|
@ -1949,14 +2020,15 @@ Linux 2.6.36 and Linux 3.0 that ignores the
|
|||
was chosen if the union has a
|
||||
non-zero value in it.
|
||||
|
||||
Always double-check your results! Various generalized events
|
||||
have had wrong values.
|
||||
Always double-check your results!
|
||||
Various generalized events have had wrong values.
|
||||
For example, retired branches measured
|
||||
the wrong thing on AMD machines until Linux 2.6.35.
|
||||
|
||||
.SH EXAMPLE
|
||||
The following is a short example that measures the total
|
||||
instruction count of a call to printf().
|
||||
instruction count of a call to
|
||||
.BR printf (3).
|
||||
.nf
|
||||
|
||||
#include <stdlib.h>
|
||||
|
@ -1967,13 +2039,14 @@ instruction count of a call to printf().
|
|||
#include <linux/perf_event.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
long perf_event_open( struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags )
|
||||
long
|
||||
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = syscall( __NR_perf_event_open, hw_event, pid, cpu,
|
||||
group_fd, flags );
|
||||
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
|
||||
group_fd, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1995,8 +2068,9 @@ main(int argc, char **argv)
|
|||
pe.exclude_hv = 1;
|
||||
|
||||
fd = perf_event_open(&pe, 0, \-1, \-1, 0);
|
||||
if (fd < 0) {
|
||||
if (fd == \-1) {
|
||||
fprintf(stderr, "Error opening leader %llx\\n", pe.config);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
||||
|
|
Loading…
Reference in New Issue