498 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			498 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| perf.data format
 | |
| 
 | |
| Uptodate as of v4.7
 | |
| 
 | |
| This document describes the on-disk perf.data format, generated by perf record
 | |
| or perf inject and consumed by the other perf tools.
 | |
| 
 | |
| On a high level perf.data contains the events generated by the PMUs, plus metadata.
 | |
| 
 | |
| All fields are in native-endian of the machine that generated the perf.data.
 | |
| 
 | |
| When perf is writing to a pipe it uses a special version of the file
 | |
| format that does not rely on seeking to adjust data offsets.  This
 | |
| format is described in "Pipe-mode data" section. The pipe data version can be
 | |
| augmented with additional events using perf inject.
 | |
| 
 | |
| The file starts with a perf_header:
 | |
| 
 | |
| struct perf_header {
 | |
| 	char magic[8];		/* PERFILE2 */
 | |
| 	uint64_t size;		/* size of the header */
 | |
| 	uint64_t attr_size;	/* size of an attribute in attrs */
 | |
| 	struct perf_file_section attrs;
 | |
| 	struct perf_file_section data;
 | |
| 	struct perf_file_section event_types;
 | |
| 	uint64_t flags;
 | |
| 	uint64_t flags1[3];
 | |
| };
 | |
| 
 | |
| The magic number identifies the perf file and the version. Current perf versions
 | |
| use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
 | |
| is not described here. The magic number also identifies the endian. When the
 | |
| magic value is 64bit byte swapped compared the file is in non-native
 | |
| endian.
 | |
| 
 | |
| A perf_file_section contains a pointer to another section of the perf file.
 | |
| The header contains three such pointers: for attributes, data and event types.
 | |
| 
 | |
| struct perf_file_section {
 | |
| 	uint64_t offset;	/* offset from start of file */
 | |
| 	uint64_t size;		/* size of the section */
 | |
| };
 | |
| 
 | |
| Flags section:
 | |
| 
 | |
| The header is followed by different optional headers, described by the bits set
 | |
| in flags. Only headers for which the bit is set are included. Each header
 | |
| consists of a perf_file_section located after the initial header.
 | |
| The respective perf_file_section points to the data of the additional
 | |
| header and defines its size.
 | |
| 
 | |
| Some headers consist of strings, which are defined like this:
 | |
| 
 | |
| struct perf_header_string {
 | |
|        uint32_t len;
 | |
|        char string[len]; /* zero terminated */
 | |
| };
 | |
| 
 | |
| Some headers consist of a sequence of strings, which start with a
 | |
| 
 | |
| struct perf_header_string_list {
 | |
|      uint32_t nr;
 | |
|      struct perf_header_string strings[nr]; /* variable length records */
 | |
| };
 | |
| 
 | |
| The bits are the flags bits in a 256 bit bitmap starting with
 | |
| flags. These define the valid bits:
 | |
| 
 | |
| 	HEADER_RESERVED		= 0,	/* always cleared */
 | |
| 	HEADER_FIRST_FEATURE	= 1,
 | |
| 	HEADER_TRACING_DATA	= 1,
 | |
| 
 | |
| Describe me.
 | |
| 
 | |
| 	HEADER_BUILD_ID = 2,
 | |
| 
 | |
| The header consists of an sequence of build_id_event. The size of each record
 | |
| is defined by header.size (see perf_event.h). Each event defines a ELF build id
 | |
| for a executable file name for a pid. An ELF build id is a unique identifier
 | |
| assigned by the linker to an executable.
 | |
| 
 | |
| struct build_id_event {
 | |
| 	struct perf_event_header header;
 | |
| 	pid_t			 pid;
 | |
| 	uint8_t			 build_id[24];
 | |
| 	char			 filename[header.size - offsetof(struct build_id_event, filename)];
 | |
| };
 | |
| 
 | |
| 	HEADER_HOSTNAME = 3,
 | |
| 
 | |
| A perf_header_string with the hostname where the data was collected
 | |
| (uname -n)
 | |
| 
 | |
| 	HEADER_OSRELEASE = 4,
 | |
| 
 | |
| A perf_header_string with the os release where the data was collected
 | |
| (uname -r)
 | |
| 
 | |
| 	HEADER_VERSION = 5,
 | |
| 
 | |
| A perf_header_string with the perf user tool version where the
 | |
| data was collected. This is the same as the version of the source tree
 | |
| the perf tool was built from.
 | |
| 
 | |
| 	HEADER_ARCH = 6,
 | |
| 
 | |
| A perf_header_string with the CPU architecture (uname -m)
 | |
| 
 | |
| 	HEADER_NRCPUS = 7,
 | |
| 
 | |
| A structure defining the number of CPUs.
 | |
| 
 | |
| struct nr_cpus {
 | |
|        uint32_t nr_cpus_available; /* CPUs not yet onlined */
 | |
|        uint32_t nr_cpus_online;
 | |
| };
 | |
| 
 | |
| 	HEADER_CPUDESC = 8,
 | |
| 
 | |
| A perf_header_string with description of the CPU. On x86 this is the model name
 | |
| in /proc/cpuinfo
 | |
| 
 | |
| 	HEADER_CPUID = 9,
 | |
| 
 | |
| A perf_header_string with the exact CPU type. On x86 this is
 | |
| vendor,family,model,stepping. For example: GenuineIntel,6,69,1
 | |
| 
 | |
| 	HEADER_TOTAL_MEM = 10,
 | |
| 
 | |
| An uint64_t with the total memory in bytes.
 | |
| 
 | |
| 	HEADER_CMDLINE = 11,
 | |
| 
 | |
| A perf_header_string with the perf command line used to collect the data.
 | |
| 
 | |
| 	HEADER_EVENT_DESC = 12,
 | |
| 
 | |
| Another description of the perf_event_attrs, more detailed than header.attrs
 | |
| including IDs and names. See perf_event.h or the man page for a description
 | |
| of a struct perf_event_attr.
 | |
| 
 | |
| struct {
 | |
|        uint32_t nr; /* number of events */
 | |
|        uint32_t attr_size; /* size of each perf_event_attr */
 | |
|        struct {
 | |
| 	      struct perf_event_attr attr;  /* size of attr_size */
 | |
| 	      uint32_t nr_ids;
 | |
| 	      struct perf_header_string event_string;
 | |
| 	      uint64_t ids[nr_ids];
 | |
|        } events[nr]; /* Variable length records */
 | |
| };
 | |
| 
 | |
| 	HEADER_CPU_TOPOLOGY = 13,
 | |
| 
 | |
| String lists defining the core and CPU threads topology.
 | |
| The string lists are followed by a variable length array
 | |
| which contains core_id and socket_id of each cpu.
 | |
| The number of entries can be determined by the size of the
 | |
| section minus the sizes of both string lists.
 | |
| 
 | |
| struct {
 | |
|        struct perf_header_string_list cores; /* Variable length */
 | |
|        struct perf_header_string_list threads; /* Variable length */
 | |
|        struct {
 | |
| 	      uint32_t core_id;
 | |
| 	      uint32_t socket_id;
 | |
|        } cpus[nr]; /* Variable length records */
 | |
| };
 | |
| 
 | |
| Example:
 | |
| 	sibling cores   : 0-3
 | |
| 	sibling threads : 0-1
 | |
| 	sibling threads : 2-3
 | |
| 
 | |
| 	HEADER_NUMA_TOPOLOGY = 14,
 | |
| 
 | |
| 	A list of NUMA node descriptions
 | |
| 
 | |
| struct {
 | |
|        uint32_t nr;
 | |
|        struct {
 | |
| 	      uint32_t nodenr;
 | |
| 	      uint64_t mem_total;
 | |
| 	      uint64_t mem_free;
 | |
| 	      struct perf_header_string cpus;
 | |
|        } nodes[nr]; /* Variable length records */
 | |
| };
 | |
| 
 | |
| 	HEADER_BRANCH_STACK = 15,
 | |
| 
 | |
| Not implemented in perf.
 | |
| 
 | |
| 	HEADER_PMU_MAPPINGS = 16,
 | |
| 
 | |
| 	A list of PMU structures, defining the different PMUs supported by perf.
 | |
| 
 | |
| struct {
 | |
|        uint32_t nr;
 | |
|        struct pmu {
 | |
| 	      uint32_t pmu_type;
 | |
| 	      struct perf_header_string pmu_name;
 | |
|        } [nr]; /* Variable length records */
 | |
| };
 | |
| 
 | |
| 	HEADER_GROUP_DESC = 17,
 | |
| 
 | |
| 	Description of counter groups ({...} in perf syntax)
 | |
| 
 | |
| struct {
 | |
|          uint32_t nr;
 | |
|          struct {
 | |
| 		struct perf_header_string string;
 | |
| 		uint32_t leader_idx;
 | |
| 		uint32_t nr_members;
 | |
| 	 } [nr]; /* Variable length records */
 | |
| };
 | |
| 
 | |
| 	HEADER_AUXTRACE = 18,
 | |
| 
 | |
| Define additional auxtrace areas in the perf.data. auxtrace is used to store
 | |
| undecoded hardware tracing information, such as Intel Processor Trace data.
 | |
| 
 | |
| /**
 | |
|  * struct auxtrace_index_entry - indexes a AUX area tracing event within a
 | |
|  *                               perf.data file.
 | |
|  * @file_offset: offset within the perf.data file
 | |
|  * @sz: size of the event
 | |
|  */
 | |
| struct auxtrace_index_entry {
 | |
| 	u64			file_offset;
 | |
| 	u64			sz;
 | |
| };
 | |
| 
 | |
| #define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
 | |
| 
 | |
| /**
 | |
|  * struct auxtrace_index - index of AUX area tracing events within a perf.data
 | |
|  *                         file.
 | |
|  * @list: linking a number of arrays of entries
 | |
|  * @nr: number of entries
 | |
|  * @entries: array of entries
 | |
|  */
 | |
| struct auxtrace_index {
 | |
| 	struct list_head	list;
 | |
| 	size_t			nr;
 | |
| 	struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
 | |
| };
 | |
| 
 | |
| 	HEADER_STAT = 19,
 | |
| 
 | |
| This is merely a flag signifying that the data section contains data
 | |
| recorded from perf stat record.
 | |
| 
 | |
| 	HEADER_CACHE = 20,
 | |
| 
 | |
| Description of the cache hierarchy. Based on the Linux sysfs format
 | |
| in /sys/devices/system/cpu/cpu*/cache/
 | |
| 
 | |
| 	u32 version	Currently always 1
 | |
| 	u32 number_of_cache_levels
 | |
| 
 | |
| struct {
 | |
| 	u32	level;
 | |
| 	u32	line_size;
 | |
| 	u32	sets;
 | |
| 	u32	ways;
 | |
| 	struct perf_header_string type;
 | |
| 	struct perf_header_string size;
 | |
| 	struct perf_header_string map;
 | |
| }[number_of_cache_levels];
 | |
| 
 | |
| 	HEADER_SAMPLE_TIME = 21,
 | |
| 
 | |
| Two uint64_t for the time of first sample and the time of last sample.
 | |
| 
 | |
| 	other bits are reserved and should ignored for now
 | |
| 	HEADER_FEAT_BITS	= 256,
 | |
| 
 | |
| Attributes
 | |
| 
 | |
| This is an array of perf_event_attrs, each attr_size bytes long, which defines
 | |
| each event collected. See perf_event.h or the man page for a detailed
 | |
| description.
 | |
| 
 | |
| Data
 | |
| 
 | |
| This section is the bulk of the file. It consist of a stream of perf_events
 | |
| describing events. This matches the format generated by the kernel.
 | |
| See perf_event.h or the manpage for a detailed description.
 | |
| 
 | |
| Some notes on parsing:
 | |
| 
 | |
| Ordering
 | |
| 
 | |
| The events are not necessarily in time stamp order, as they can be
 | |
| collected in parallel on different CPUs. If the events should be
 | |
| processed in time order they need to be sorted first. It is possible
 | |
| to only do a partial sort using the FINISHED_ROUND event header (see
 | |
| below). perf record guarantees that there is no reordering over a
 | |
| FINISHED_ROUND.
 | |
| 
 | |
| ID vs IDENTIFIER
 | |
| 
 | |
| When the event stream contains multiple events each event is identified
 | |
| by an ID. This can be either through the PERF_SAMPLE_ID or the
 | |
| PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
 | |
| at a fixed offset from the event header, which allows reliable
 | |
| parsing of the header. Relying on ID may be ambiguous.
 | |
| IDENTIFIER is only supported by newer Linux kernels.
 | |
| 
 | |
| Perf record specific events:
 | |
| 
 | |
| In addition to the kernel generated event types perf record adds its
 | |
| own event types (in addition it also synthesizes some kernel events,
 | |
| for example MMAP events)
 | |
| 
 | |
| 	PERF_RECORD_USER_TYPE_START		= 64,
 | |
| 	PERF_RECORD_HEADER_ATTR			= 64,
 | |
| 
 | |
| struct attr_event {
 | |
| 	struct perf_event_header header;
 | |
| 	struct perf_event_attr attr;
 | |
| 	uint64_t id[];
 | |
| };
 | |
| 
 | |
| 	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
 | |
| 
 | |
| #define MAX_EVENT_NAME 64
 | |
| 
 | |
| struct perf_trace_event_type {
 | |
| 	uint64_t	event_id;
 | |
| 	char	name[MAX_EVENT_NAME];
 | |
| };
 | |
| 
 | |
| struct event_type_event {
 | |
| 	struct perf_event_header header;
 | |
| 	struct perf_trace_event_type event_type;
 | |
| };
 | |
| 
 | |
| 
 | |
| 	PERF_RECORD_HEADER_TRACING_DATA		= 66,
 | |
| 
 | |
| Describe me
 | |
| 
 | |
| struct tracing_data_event {
 | |
| 	struct perf_event_header header;
 | |
| 	uint32_t size;
 | |
| };
 | |
| 
 | |
| 	PERF_RECORD_HEADER_BUILD_ID		= 67,
 | |
| 
 | |
| Define a ELF build ID for a referenced executable.
 | |
| 
 | |
|        struct build_id_event;   /* See above */
 | |
| 
 | |
| 	PERF_RECORD_FINISHED_ROUND		= 68,
 | |
| 
 | |
| No event reordering over this header. No payload.
 | |
| 
 | |
| 	PERF_RECORD_ID_INDEX			= 69,
 | |
| 
 | |
| Map event ids to CPUs and TIDs.
 | |
| 
 | |
| struct id_index_entry {
 | |
| 	uint64_t id;
 | |
| 	uint64_t idx;
 | |
| 	uint64_t cpu;
 | |
| 	uint64_t tid;
 | |
| };
 | |
| 
 | |
| struct id_index_event {
 | |
| 	struct perf_event_header header;
 | |
| 	uint64_t nr;
 | |
| 	struct id_index_entry entries[nr];
 | |
| };
 | |
| 
 | |
| 	PERF_RECORD_AUXTRACE_INFO		= 70,
 | |
| 
 | |
| Auxtrace type specific information. Describe me
 | |
| 
 | |
| struct auxtrace_info_event {
 | |
| 	struct perf_event_header header;
 | |
| 	uint32_t type;
 | |
| 	uint32_t reserved__; /* For alignment */
 | |
| 	uint64_t priv[];
 | |
| };
 | |
| 
 | |
| 	PERF_RECORD_AUXTRACE			= 71,
 | |
| 
 | |
| Defines auxtrace data. Followed by the actual data. The contents of
 | |
| the auxtrace data is dependent on the event and the CPU. For example
 | |
| for Intel Processor Trace it contains Processor Trace data generated
 | |
| by the CPU.
 | |
| 
 | |
| struct auxtrace_event {
 | |
| 	struct perf_event_header header;
 | |
| 	uint64_t size;
 | |
| 	uint64_t offset;
 | |
| 	uint64_t reference;
 | |
| 	uint32_t idx;
 | |
| 	uint32_t tid;
 | |
| 	uint32_t cpu;
 | |
| 	uint32_t reserved__; /* For alignment */
 | |
| };
 | |
| 
 | |
| struct aux_event {
 | |
| 	struct perf_event_header header;
 | |
| 	uint64_t	aux_offset;
 | |
| 	uint64_t	aux_size;
 | |
| 	uint64_t	flags;
 | |
| };
 | |
| 
 | |
| 	PERF_RECORD_AUXTRACE_ERROR		= 72,
 | |
| 
 | |
| Describes an error in hardware tracing
 | |
| 
 | |
| enum auxtrace_error_type {
 | |
| 	PERF_AUXTRACE_ERROR_ITRACE  = 1,
 | |
| 	PERF_AUXTRACE_ERROR_MAX
 | |
| };
 | |
| 
 | |
| #define MAX_AUXTRACE_ERROR_MSG 64
 | |
| 
 | |
| struct auxtrace_error_event {
 | |
| 	struct perf_event_header header;
 | |
| 	uint32_t type;
 | |
| 	uint32_t code;
 | |
| 	uint32_t cpu;
 | |
| 	uint32_t pid;
 | |
| 	uint32_t tid;
 | |
| 	uint32_t reserved__; /* For alignment */
 | |
| 	uint64_t ip;
 | |
| 	char msg[MAX_AUXTRACE_ERROR_MSG];
 | |
| };
 | |
| 
 | |
| 	PERF_RECORD_HEADER_FEATURE		= 80,
 | |
| 
 | |
| Describes a header feature. These are records used in pipe-mode that
 | |
| contain information that otherwise would be in perf.data file's header.
 | |
| 
 | |
| Event types
 | |
| 
 | |
| Define the event attributes with their IDs.
 | |
| 
 | |
| An array bound by the perf_file_section size.
 | |
| 
 | |
| 	struct {
 | |
| 		struct perf_event_attr attr;   /* Size defined by header.attr_size */
 | |
| 		struct perf_file_section ids;
 | |
| 	}
 | |
| 
 | |
| ids points to a array of uint64_t defining the ids for event attr attr.
 | |
| 
 | |
| Pipe-mode data
 | |
| 
 | |
| Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
 | |
| from the struct perf_header. The trimmed header is:
 | |
| 
 | |
| struct perf_pipe_file_header {
 | |
| 	u64				magic;
 | |
| 	u64				size;
 | |
| };
 | |
| 
 | |
| The information about attrs, data, and event_types is instead in the
 | |
| synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA,
 | |
| PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE
 | |
| that are generated by perf record in pipe-mode.
 | |
| 
 | |
| 
 | |
| References:
 | |
| 
 | |
| include/uapi/linux/perf_event.h
 | |
| 
 | |
| This is the canonical description of the kernel generated perf_events
 | |
| and the perf_event_attrs.
 | |
| 
 | |
| perf_events manpage
 | |
| 
 | |
| A manpage describing perf_event and perf_event_attr is here:
 | |
| http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
 | |
| This tends to be slightly behind the kernel include, but has better
 | |
| descriptions.  An (typically older) version of the man page may be
 | |
| included with the standard Linux man pages, available with "man
 | |
| perf_events"
 | |
| 
 | |
| pmu-tools
 | |
| 
 | |
| https://github.com/andikleen/pmu-tools/tree/master/parser
 | |
| 
 | |
| A definition of the perf.data format in python "construct" format is available
 | |
| in pmu-tools parser. This allows to read perf.data from python and dump it.
 | |
| 
 | |
| quipper
 | |
| 
 | |
| The quipper C++ parser is available at
 | |
| http://github.com/google/perf_data_converter/tree/master/src/quipper
 | |
| 
 | 
