cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

perf_event.h (49178B)


      1/*
      2 * Performance events:
      3 *
      4 *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
      5 *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
      6 *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
      7 *
      8 * Data type definitions, declarations, prototypes.
      9 *
     10 *    Started by: Thomas Gleixner and Ingo Molnar
     11 *
     12 * For licencing details see kernel-base/COPYING
     13 */
     14#ifndef _LINUX_PERF_EVENT_H
     15#define _LINUX_PERF_EVENT_H
     16
     17#include <uapi/linux/perf_event.h>
     18#include <uapi/linux/bpf_perf_event.h>
     19
     20/*
     21 * Kernel-internal data types and definitions:
     22 */
     23
     24#ifdef CONFIG_PERF_EVENTS
     25# include <asm/perf_event.h>
     26# include <asm/local64.h>
     27#endif
     28
     29#define PERF_GUEST_ACTIVE	0x01
     30#define PERF_GUEST_USER	0x02
     31
     32struct perf_guest_info_callbacks {
     33	unsigned int			(*state)(void);
     34	unsigned long			(*get_ip)(void);
     35	unsigned int			(*handle_intel_pt_intr)(void);
     36};
     37
     38#ifdef CONFIG_HAVE_HW_BREAKPOINT
     39#include <asm/hw_breakpoint.h>
     40#endif
     41
     42#include <linux/list.h>
     43#include <linux/mutex.h>
     44#include <linux/rculist.h>
     45#include <linux/rcupdate.h>
     46#include <linux/spinlock.h>
     47#include <linux/hrtimer.h>
     48#include <linux/fs.h>
     49#include <linux/pid_namespace.h>
     50#include <linux/workqueue.h>
     51#include <linux/ftrace.h>
     52#include <linux/cpu.h>
     53#include <linux/irq_work.h>
     54#include <linux/static_key.h>
     55#include <linux/jump_label_ratelimit.h>
     56#include <linux/atomic.h>
     57#include <linux/sysfs.h>
     58#include <linux/perf_regs.h>
     59#include <linux/cgroup.h>
     60#include <linux/refcount.h>
     61#include <linux/security.h>
     62#include <linux/static_call.h>
     63#include <asm/local.h>
     64
     65struct perf_callchain_entry {
     66	__u64				nr;
     67	__u64				ip[]; /* /proc/sys/kernel/perf_event_max_stack */
     68};
     69
     70struct perf_callchain_entry_ctx {
     71	struct perf_callchain_entry *entry;
     72	u32			    max_stack;
     73	u32			    nr;
     74	short			    contexts;
     75	bool			    contexts_maxed;
     76};
     77
     78typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
     79				     unsigned long off, unsigned long len);
     80
     81struct perf_raw_frag {
     82	union {
     83		struct perf_raw_frag	*next;
     84		unsigned long		pad;
     85	};
     86	perf_copy_f			copy;
     87	void				*data;
     88	u32				size;
     89} __packed;
     90
     91struct perf_raw_record {
     92	struct perf_raw_frag		frag;
     93	u32				size;
     94};
     95
     96/*
     97 * branch stack layout:
     98 *  nr: number of taken branches stored in entries[]
     99 *  hw_idx: The low level index of raw branch records
    100 *          for the most recent branch.
    101 *          -1ULL means invalid/unknown.
    102 *
    103 * Note that nr can vary from sample to sample
    104 * branches (to, from) are stored from most recent
    105 * to least recent, i.e., entries[0] contains the most
    106 * recent branch.
    107 * The entries[] is an abstraction of raw branch records,
    108 * which may not be stored in age order in HW, e.g. Intel LBR.
    109 * The hw_idx is to expose the low level index of raw
    110 * branch record for the most recent branch aka entries[0].
    111 * The hw_idx index is between -1 (unknown) and max depth,
    112 * which can be retrieved in /sys/devices/cpu/caps/branches.
    113 * For the architectures whose raw branch records are
    114 * already stored in age order, the hw_idx should be 0.
    115 */
    116struct perf_branch_stack {
    117	__u64				nr;
    118	__u64				hw_idx;
    119	struct perf_branch_entry	entries[];
    120};
    121
    122struct task_struct;
    123
    124/*
    125 * extra PMU register associated with an event
    126 */
    127struct hw_perf_event_extra {
    128	u64		config;	/* register value */
    129	unsigned int	reg;	/* register address or index */
    130	int		alloc;	/* extra register already allocated */
    131	int		idx;	/* index in shared_regs->regs[] */
    132};
    133
    134/**
    135 * hw_perf_event::flag values
    136 *
    137 * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
    138 * usage.
    139 */
    140#define PERF_EVENT_FLAG_ARCH			0x0000ffff
    141#define PERF_EVENT_FLAG_USER_READ_CNT		0x80000000
    142
    143/**
    144 * struct hw_perf_event - performance event hardware details:
    145 */
    146struct hw_perf_event {
    147#ifdef CONFIG_PERF_EVENTS
    148	union {
    149		struct { /* hardware */
    150			u64		config;
    151			u64		last_tag;
    152			unsigned long	config_base;
    153			unsigned long	event_base;
    154			int		event_base_rdpmc;
    155			int		idx;
    156			int		last_cpu;
    157			int		flags;
    158
    159			struct hw_perf_event_extra extra_reg;
    160			struct hw_perf_event_extra branch_reg;
    161		};
    162		struct { /* software */
    163			struct hrtimer	hrtimer;
    164		};
    165		struct { /* tracepoint */
    166			/* for tp_event->class */
    167			struct list_head	tp_list;
    168		};
    169		struct { /* amd_power */
    170			u64	pwr_acc;
    171			u64	ptsc;
    172		};
    173#ifdef CONFIG_HAVE_HW_BREAKPOINT
    174		struct { /* breakpoint */
    175			/*
    176			 * Crufty hack to avoid the chicken and egg
    177			 * problem hw_breakpoint has with context
    178			 * creation and event initalization.
    179			 */
    180			struct arch_hw_breakpoint	info;
    181			struct list_head		bp_list;
    182		};
    183#endif
    184		struct { /* amd_iommu */
    185			u8	iommu_bank;
    186			u8	iommu_cntr;
    187			u16	padding;
    188			u64	conf;
    189			u64	conf1;
    190		};
    191	};
    192	/*
    193	 * If the event is a per task event, this will point to the task in
    194	 * question. See the comment in perf_event_alloc().
    195	 */
    196	struct task_struct		*target;
    197
    198	/*
    199	 * PMU would store hardware filter configuration
    200	 * here.
    201	 */
    202	void				*addr_filters;
    203
    204	/* Last sync'ed generation of filters */
    205	unsigned long			addr_filters_gen;
    206
    207/*
    208 * hw_perf_event::state flags; used to track the PERF_EF_* state.
    209 */
    210#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
    211#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
    212#define PERF_HES_ARCH		0x04
    213
    214	int				state;
    215
    216	/*
    217	 * The last observed hardware counter value, updated with a
    218	 * local64_cmpxchg() such that pmu::read() can be called nested.
    219	 */
    220	local64_t			prev_count;
    221
    222	/*
    223	 * The period to start the next sample with.
    224	 */
    225	u64				sample_period;
    226
    227	union {
    228		struct { /* Sampling */
    229			/*
    230			 * The period we started this sample with.
    231			 */
    232			u64				last_period;
    233
    234			/*
    235			 * However much is left of the current period;
    236			 * note that this is a full 64bit value and
    237			 * allows for generation of periods longer
    238			 * than hardware might allow.
    239			 */
    240			local64_t			period_left;
    241		};
    242		struct { /* Topdown events counting for context switch */
    243			u64				saved_metric;
    244			u64				saved_slots;
    245		};
    246	};
    247
    248	/*
    249	 * State for throttling the event, see __perf_event_overflow() and
    250	 * perf_adjust_freq_unthr_context().
    251	 */
    252	u64                             interrupts_seq;
    253	u64				interrupts;
    254
    255	/*
    256	 * State for freq target events, see __perf_event_overflow() and
    257	 * perf_adjust_freq_unthr_context().
    258	 */
    259	u64				freq_time_stamp;
    260	u64				freq_count_stamp;
    261#endif
    262};
    263
    264struct perf_event;
    265
    266/*
    267 * Common implementation detail of pmu::{start,commit,cancel}_txn
    268 */
    269#define PERF_PMU_TXN_ADD  0x1		/* txn to add/schedule event on PMU */
    270#define PERF_PMU_TXN_READ 0x2		/* txn to read event group from PMU */
    271
    272/**
    273 * pmu::capabilities flags
    274 */
    275#define PERF_PMU_CAP_NO_INTERRUPT		0x0001
    276#define PERF_PMU_CAP_NO_NMI			0x0002
    277#define PERF_PMU_CAP_AUX_NO_SG			0x0004
    278#define PERF_PMU_CAP_EXTENDED_REGS		0x0008
    279#define PERF_PMU_CAP_EXCLUSIVE			0x0010
    280#define PERF_PMU_CAP_ITRACE			0x0020
    281#define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x0040
    282#define PERF_PMU_CAP_NO_EXCLUDE			0x0080
    283#define PERF_PMU_CAP_AUX_OUTPUT			0x0100
    284#define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0200
    285
    286struct perf_output_handle;
    287
    288/**
    289 * struct pmu - generic performance monitoring unit
    290 */
    291struct pmu {
    292	struct list_head		entry;
    293
    294	struct module			*module;
    295	struct device			*dev;
    296	const struct attribute_group	**attr_groups;
    297	const struct attribute_group	**attr_update;
    298	const char			*name;
    299	int				type;
    300
    301	/*
    302	 * various common per-pmu feature flags
    303	 */
    304	int				capabilities;
    305
    306	int __percpu			*pmu_disable_count;
    307	struct perf_cpu_context __percpu *pmu_cpu_context;
    308	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
    309	int				task_ctx_nr;
    310	int				hrtimer_interval_ms;
    311
    312	/* number of address filters this PMU can do */
    313	unsigned int			nr_addr_filters;
    314
    315	/*
    316	 * Fully disable/enable this PMU, can be used to protect from the PMI
    317	 * as well as for lazy/batch writing of the MSRs.
    318	 */
    319	void (*pmu_enable)		(struct pmu *pmu); /* optional */
    320	void (*pmu_disable)		(struct pmu *pmu); /* optional */
    321
    322	/*
    323	 * Try and initialize the event for this PMU.
    324	 *
    325	 * Returns:
    326	 *  -ENOENT	-- @event is not for this PMU
    327	 *
    328	 *  -ENODEV	-- @event is for this PMU but PMU not present
    329	 *  -EBUSY	-- @event is for this PMU but PMU temporarily unavailable
    330	 *  -EINVAL	-- @event is for this PMU but @event is not valid
    331	 *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
    332	 *  -EACCES	-- @event is for this PMU, @event is valid, but no privileges
    333	 *
    334	 *  0		-- @event is for this PMU and valid
    335	 *
    336	 * Other error return values are allowed.
    337	 */
    338	int (*event_init)		(struct perf_event *event);
    339
    340	/*
    341	 * Notification that the event was mapped or unmapped.  Called
    342	 * in the context of the mapping task.
    343	 */
    344	void (*event_mapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
    345	void (*event_unmapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
    346
    347	/*
    348	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
    349	 * matching hw_perf_event::state flags.
    350	 */
    351#define PERF_EF_START	0x01		/* start the counter when adding    */
    352#define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
    353#define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
    354
    355	/*
    356	 * Adds/Removes a counter to/from the PMU, can be done inside a
    357	 * transaction, see the ->*_txn() methods.
    358	 *
    359	 * The add/del callbacks will reserve all hardware resources required
    360	 * to service the event, this includes any counter constraint
    361	 * scheduling etc.
    362	 *
    363	 * Called with IRQs disabled and the PMU disabled on the CPU the event
    364	 * is on.
    365	 *
    366	 * ->add() called without PERF_EF_START should result in the same state
    367	 *  as ->add() followed by ->stop().
    368	 *
    369	 * ->del() must always PERF_EF_UPDATE stop an event. If it calls
    370	 *  ->stop() that must deal with already being stopped without
    371	 *  PERF_EF_UPDATE.
    372	 */
    373	int  (*add)			(struct perf_event *event, int flags);
    374	void (*del)			(struct perf_event *event, int flags);
    375
    376	/*
    377	 * Starts/Stops a counter present on the PMU.
    378	 *
    379	 * The PMI handler should stop the counter when perf_event_overflow()
    380	 * returns !0. ->start() will be used to continue.
    381	 *
    382	 * Also used to change the sample period.
    383	 *
    384	 * Called with IRQs disabled and the PMU disabled on the CPU the event
    385	 * is on -- will be called from NMI context with the PMU generates
    386	 * NMIs.
    387	 *
    388	 * ->stop() with PERF_EF_UPDATE will read the counter and update
    389	 *  period/count values like ->read() would.
    390	 *
    391	 * ->start() with PERF_EF_RELOAD will reprogram the counter
    392	 *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
    393	 */
    394	void (*start)			(struct perf_event *event, int flags);
    395	void (*stop)			(struct perf_event *event, int flags);
    396
    397	/*
    398	 * Updates the counter value of the event.
    399	 *
    400	 * For sampling capable PMUs this will also update the software period
    401	 * hw_perf_event::period_left field.
    402	 */
    403	void (*read)			(struct perf_event *event);
    404
    405	/*
    406	 * Group events scheduling is treated as a transaction, add
    407	 * group events as a whole and perform one schedulability test.
    408	 * If the test fails, roll back the whole group
    409	 *
    410	 * Start the transaction, after this ->add() doesn't need to
    411	 * do schedulability tests.
    412	 *
    413	 * Optional.
    414	 */
    415	void (*start_txn)		(struct pmu *pmu, unsigned int txn_flags);
    416	/*
    417	 * If ->start_txn() disabled the ->add() schedulability test
    418	 * then ->commit_txn() is required to perform one. On success
    419	 * the transaction is closed. On error the transaction is kept
    420	 * open until ->cancel_txn() is called.
    421	 *
    422	 * Optional.
    423	 */
    424	int  (*commit_txn)		(struct pmu *pmu);
    425	/*
    426	 * Will cancel the transaction, assumes ->del() is called
    427	 * for each successful ->add() during the transaction.
    428	 *
    429	 * Optional.
    430	 */
    431	void (*cancel_txn)		(struct pmu *pmu);
    432
    433	/*
    434	 * Will return the value for perf_event_mmap_page::index for this event,
    435	 * if no implementation is provided it will default to: event->hw.idx + 1.
    436	 */
    437	int (*event_idx)		(struct perf_event *event); /*optional */
    438
    439	/*
    440	 * context-switches callback
    441	 */
    442	void (*sched_task)		(struct perf_event_context *ctx,
    443					bool sched_in);
    444
    445	/*
    446	 * Kmem cache of PMU specific data
    447	 */
    448	struct kmem_cache		*task_ctx_cache;
    449
    450	/*
    451	 * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
    452	 * can be synchronized using this function. See Intel LBR callstack support
    453	 * implementation and Perf core context switch handling callbacks for usage
    454	 * examples.
    455	 */
    456	void (*swap_task_ctx)		(struct perf_event_context *prev,
    457					 struct perf_event_context *next);
    458					/* optional */
    459
    460	/*
    461	 * Set up pmu-private data structures for an AUX area
    462	 */
    463	void *(*setup_aux)		(struct perf_event *event, void **pages,
    464					 int nr_pages, bool overwrite);
    465					/* optional */
    466
    467	/*
    468	 * Free pmu-private AUX data structures
    469	 */
    470	void (*free_aux)		(void *aux); /* optional */
    471
    472	/*
    473	 * Take a snapshot of the AUX buffer without touching the event
    474	 * state, so that preempting ->start()/->stop() callbacks does
    475	 * not interfere with their logic. Called in PMI context.
    476	 *
    477	 * Returns the size of AUX data copied to the output handle.
    478	 *
    479	 * Optional.
    480	 */
    481	long (*snapshot_aux)		(struct perf_event *event,
    482					 struct perf_output_handle *handle,
    483					 unsigned long size);
    484
    485	/*
    486	 * Validate address range filters: make sure the HW supports the
    487	 * requested configuration and number of filters; return 0 if the
    488	 * supplied filters are valid, -errno otherwise.
    489	 *
    490	 * Runs in the context of the ioctl()ing process and is not serialized
    491	 * with the rest of the PMU callbacks.
    492	 */
    493	int (*addr_filters_validate)	(struct list_head *filters);
    494					/* optional */
    495
    496	/*
    497	 * Synchronize address range filter configuration:
    498	 * translate hw-agnostic filters into hardware configuration in
    499	 * event::hw::addr_filters.
    500	 *
    501	 * Runs as a part of filter sync sequence that is done in ->start()
    502	 * callback by calling perf_event_addr_filters_sync().
    503	 *
    504	 * May (and should) traverse event::addr_filters::list, for which its
    505	 * caller provides necessary serialization.
    506	 */
    507	void (*addr_filters_sync)	(struct perf_event *event);
    508					/* optional */
    509
    510	/*
    511	 * Check if event can be used for aux_output purposes for
    512	 * events of this PMU.
    513	 *
    514	 * Runs from perf_event_open(). Should return 0 for "no match"
    515	 * or non-zero for "match".
    516	 */
    517	int (*aux_output_match)		(struct perf_event *event);
    518					/* optional */
    519
    520	/*
    521	 * Filter events for PMU-specific reasons.
    522	 */
    523	int (*filter_match)		(struct perf_event *event); /* optional */
    524
    525	/*
    526	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
    527	 */
    528	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
    529};
    530
    531enum perf_addr_filter_action_t {
    532	PERF_ADDR_FILTER_ACTION_STOP = 0,
    533	PERF_ADDR_FILTER_ACTION_START,
    534	PERF_ADDR_FILTER_ACTION_FILTER,
    535};
    536
    537/**
    538 * struct perf_addr_filter - address range filter definition
    539 * @entry:	event's filter list linkage
    540 * @path:	object file's path for file-based filters
    541 * @offset:	filter range offset
    542 * @size:	filter range size (size==0 means single address trigger)
    543 * @action:	filter/start/stop
    544 *
    545 * This is a hardware-agnostic filter configuration as specified by the user.
    546 */
    547struct perf_addr_filter {
    548	struct list_head	entry;
    549	struct path		path;
    550	unsigned long		offset;
    551	unsigned long		size;
    552	enum perf_addr_filter_action_t	action;
    553};
    554
    555/**
    556 * struct perf_addr_filters_head - container for address range filters
    557 * @list:	list of filters for this event
    558 * @lock:	spinlock that serializes accesses to the @list and event's
    559 *		(and its children's) filter generations.
    560 * @nr_file_filters:	number of file-based filters
    561 *
    562 * A child event will use parent's @list (and therefore @lock), so they are
    563 * bundled together; see perf_event_addr_filters().
    564 */
    565struct perf_addr_filters_head {
    566	struct list_head	list;
    567	raw_spinlock_t		lock;
    568	unsigned int		nr_file_filters;
    569};
    570
    571struct perf_addr_filter_range {
    572	unsigned long		start;
    573	unsigned long		size;
    574};
    575
    576/**
    577 * enum perf_event_state - the states of an event:
    578 */
    579enum perf_event_state {
    580	PERF_EVENT_STATE_DEAD		= -4,
    581	PERF_EVENT_STATE_EXIT		= -3,
    582	PERF_EVENT_STATE_ERROR		= -2,
    583	PERF_EVENT_STATE_OFF		= -1,
    584	PERF_EVENT_STATE_INACTIVE	=  0,
    585	PERF_EVENT_STATE_ACTIVE		=  1,
    586};
    587
    588struct file;
    589struct perf_sample_data;
    590
    591typedef void (*perf_overflow_handler_t)(struct perf_event *,
    592					struct perf_sample_data *,
    593					struct pt_regs *regs);
    594
    595/*
    596 * Event capabilities. For event_caps and groups caps.
    597 *
    598 * PERF_EV_CAP_SOFTWARE: Is a software event.
    599 * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
    600 * from any CPU in the package where it is active.
    601 * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
    602 * cannot be a group leader. If an event with this flag is detached from the
    603 * group it is scheduled out and moved into an unrecoverable ERROR state.
    604 */
    605#define PERF_EV_CAP_SOFTWARE		BIT(0)
    606#define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
    607#define PERF_EV_CAP_SIBLING		BIT(2)
    608
    609#define SWEVENT_HLIST_BITS		8
    610#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
    611
    612struct swevent_hlist {
    613	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
    614	struct rcu_head			rcu_head;
    615};
    616
    617#define PERF_ATTACH_CONTEXT	0x01
    618#define PERF_ATTACH_GROUP	0x02
    619#define PERF_ATTACH_TASK	0x04
    620#define PERF_ATTACH_TASK_DATA	0x08
    621#define PERF_ATTACH_ITRACE	0x10
    622#define PERF_ATTACH_SCHED_CB	0x20
    623#define PERF_ATTACH_CHILD	0x40
    624
    625struct bpf_prog;
    626struct perf_cgroup;
    627struct perf_buffer;
    628
    629struct pmu_event_list {
    630	raw_spinlock_t		lock;
    631	struct list_head	list;
    632};
    633
    634#define for_each_sibling_event(sibling, event)			\
    635	if ((event)->group_leader == (event))			\
    636		list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
    637
    638/**
    639 * struct perf_event - performance event kernel representation:
    640 */
    641struct perf_event {
    642#ifdef CONFIG_PERF_EVENTS
    643	/*
    644	 * entry onto perf_event_context::event_list;
    645	 *   modifications require ctx->lock
    646	 *   RCU safe iterations.
    647	 */
    648	struct list_head		event_entry;
    649
    650	/*
    651	 * Locked for modification by both ctx->mutex and ctx->lock; holding
    652	 * either sufficies for read.
    653	 */
    654	struct list_head		sibling_list;
    655	struct list_head		active_list;
    656	/*
    657	 * Node on the pinned or flexible tree located at the event context;
    658	 */
    659	struct rb_node			group_node;
    660	u64				group_index;
    661	/*
    662	 * We need storage to track the entries in perf_pmu_migrate_context; we
    663	 * cannot use the event_entry because of RCU and we want to keep the
    664	 * group in tact which avoids us using the other two entries.
    665	 */
    666	struct list_head		migrate_entry;
    667
    668	struct hlist_node		hlist_entry;
    669	struct list_head		active_entry;
    670	int				nr_siblings;
    671
    672	/* Not serialized. Only written during event initialization. */
    673	int				event_caps;
    674	/* The cumulative AND of all event_caps for events in this group. */
    675	int				group_caps;
    676
    677	struct perf_event		*group_leader;
    678	struct pmu			*pmu;
    679	void				*pmu_private;
    680
    681	enum perf_event_state		state;
    682	unsigned int			attach_state;
    683	local64_t			count;
    684	atomic64_t			child_count;
    685
    686	/*
    687	 * These are the total time in nanoseconds that the event
    688	 * has been enabled (i.e. eligible to run, and the task has
    689	 * been scheduled in, if this is a per-task event)
    690	 * and running (scheduled onto the CPU), respectively.
    691	 */
    692	u64				total_time_enabled;
    693	u64				total_time_running;
    694	u64				tstamp;
    695
    696	struct perf_event_attr		attr;
    697	u16				header_size;
    698	u16				id_header_size;
    699	u16				read_size;
    700	struct hw_perf_event		hw;
    701
    702	struct perf_event_context	*ctx;
    703	atomic_long_t			refcount;
    704
    705	/*
    706	 * These accumulate total time (in nanoseconds) that children
    707	 * events have been enabled and running, respectively.
    708	 */
    709	atomic64_t			child_total_time_enabled;
    710	atomic64_t			child_total_time_running;
    711
    712	/*
    713	 * Protect attach/detach and child_list:
    714	 */
    715	struct mutex			child_mutex;
    716	struct list_head		child_list;
    717	struct perf_event		*parent;
    718
    719	int				oncpu;
    720	int				cpu;
    721
    722	struct list_head		owner_entry;
    723	struct task_struct		*owner;
    724
    725	/* mmap bits */
    726	struct mutex			mmap_mutex;
    727	atomic_t			mmap_count;
    728
    729	struct perf_buffer		*rb;
    730	struct list_head		rb_entry;
    731	unsigned long			rcu_batches;
    732	int				rcu_pending;
    733
    734	/* poll related */
    735	wait_queue_head_t		waitq;
    736	struct fasync_struct		*fasync;
    737
    738	/* delayed work for NMIs and such */
    739	int				pending_wakeup;
    740	int				pending_kill;
    741	int				pending_disable;
    742	unsigned long			pending_addr;	/* SIGTRAP */
    743	struct irq_work			pending;
    744
    745	atomic_t			event_limit;
    746
    747	/* address range filters */
    748	struct perf_addr_filters_head	addr_filters;
    749	/* vma address array for file-based filders */
    750	struct perf_addr_filter_range	*addr_filter_ranges;
    751	unsigned long			addr_filters_gen;
    752
    753	/* for aux_output events */
    754	struct perf_event		*aux_event;
    755
    756	void (*destroy)(struct perf_event *);
    757	struct rcu_head			rcu_head;
    758
    759	struct pid_namespace		*ns;
    760	u64				id;
    761
    762	u64				(*clock)(void);
    763	perf_overflow_handler_t		overflow_handler;
    764	void				*overflow_handler_context;
    765#ifdef CONFIG_BPF_SYSCALL
    766	perf_overflow_handler_t		orig_overflow_handler;
    767	struct bpf_prog			*prog;
    768	u64				bpf_cookie;
    769#endif
    770
    771#ifdef CONFIG_EVENT_TRACING
    772	struct trace_event_call		*tp_event;
    773	struct event_filter		*filter;
    774#ifdef CONFIG_FUNCTION_TRACER
    775	struct ftrace_ops               ftrace_ops;
    776#endif
    777#endif
    778
    779#ifdef CONFIG_CGROUP_PERF
    780	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
    781#endif
    782
    783#ifdef CONFIG_SECURITY
    784	void *security;
    785#endif
    786	struct list_head		sb_list;
    787#endif /* CONFIG_PERF_EVENTS */
    788};
    789
    790
    791struct perf_event_groups {
    792	struct rb_root	tree;
    793	u64		index;
    794};
    795
    796/**
    797 * struct perf_event_context - event context structure
    798 *
    799 * Used as a container for task events and CPU events as well:
    800 */
    801struct perf_event_context {
    802	struct pmu			*pmu;
    803	/*
    804	 * Protect the states of the events in the list,
    805	 * nr_active, and the list:
    806	 */
    807	raw_spinlock_t			lock;
    808	/*
    809	 * Protect the list of events.  Locking either mutex or lock
    810	 * is sufficient to ensure the list doesn't change; to change
    811	 * the list you need to lock both the mutex and the spinlock.
    812	 */
    813	struct mutex			mutex;
    814
    815	struct list_head		active_ctx_list;
    816	struct perf_event_groups	pinned_groups;
    817	struct perf_event_groups	flexible_groups;
    818	struct list_head		event_list;
    819
    820	struct list_head		pinned_active;
    821	struct list_head		flexible_active;
    822
    823	int				nr_events;
    824	int				nr_active;
    825	int				nr_user;
    826	int				is_active;
    827	int				nr_stat;
    828	int				nr_freq;
    829	int				rotate_disable;
    830	/*
    831	 * Set when nr_events != nr_active, except tolerant to events not
    832	 * necessary to be active due to scheduling constraints, such as cgroups.
    833	 */
    834	int				rotate_necessary;
    835	refcount_t			refcount;
    836	struct task_struct		*task;
    837
    838	/*
    839	 * Context clock, runs when context enabled.
    840	 */
    841	u64				time;
    842	u64				timestamp;
    843	u64				timeoffset;
    844
    845	/*
    846	 * These fields let us detect when two contexts have both
    847	 * been cloned (inherited) from a common ancestor.
    848	 */
    849	struct perf_event_context	*parent_ctx;
    850	u64				parent_gen;
    851	u64				generation;
    852	int				pin_count;
    853#ifdef CONFIG_CGROUP_PERF
    854	int				nr_cgroups;	 /* cgroup evts */
    855#endif
    856	void				*task_ctx_data; /* pmu specific data */
    857	struct rcu_head			rcu_head;
    858};
    859
    860/*
    861 * Number of contexts where an event can trigger:
    862 *	task, softirq, hardirq, nmi.
    863 */
    864#define PERF_NR_CONTEXTS	4
    865
    866/**
    867 * struct perf_cpu_context - per cpu event context structure
    868 */
    869struct perf_cpu_context {
    870	struct perf_event_context	ctx;
    871	struct perf_event_context	*task_ctx;
    872	int				active_oncpu;
    873	int				exclusive;
    874
    875	raw_spinlock_t			hrtimer_lock;
    876	struct hrtimer			hrtimer;
    877	ktime_t				hrtimer_interval;
    878	unsigned int			hrtimer_active;
    879
    880#ifdef CONFIG_CGROUP_PERF
    881	struct perf_cgroup		*cgrp;
    882	struct list_head		cgrp_cpuctx_entry;
    883#endif
    884
    885	struct list_head		sched_cb_entry;
    886	int				sched_cb_usage;
    887
    888	int				online;
    889	/*
    890	 * Per-CPU storage for iterators used in visit_groups_merge. The default
    891	 * storage is of size 2 to hold the CPU and any CPU event iterators.
    892	 */
    893	int				heap_size;
    894	struct perf_event		**heap;
    895	struct perf_event		*heap_default[2];
    896};
    897
    898struct perf_output_handle {
    899	struct perf_event		*event;
    900	struct perf_buffer		*rb;
    901	unsigned long			wakeup;
    902	unsigned long			size;
    903	u64				aux_flags;
    904	union {
    905		void			*addr;
    906		unsigned long		head;
    907	};
    908	int				page;
    909};
    910
    911struct bpf_perf_event_data_kern {
    912	bpf_user_pt_regs_t *regs;
    913	struct perf_sample_data *data;
    914	struct perf_event *event;
    915};
    916
    917#ifdef CONFIG_CGROUP_PERF
    918
    919/*
    920 * perf_cgroup_info keeps track of time_enabled for a cgroup.
    921 * This is a per-cpu dynamically allocated data structure.
    922 */
    923struct perf_cgroup_info {
    924	u64				time;
    925	u64				timestamp;
    926	u64				timeoffset;
    927	int				active;
    928};
    929
    930struct perf_cgroup {
    931	struct cgroup_subsys_state	css;
    932	struct perf_cgroup_info	__percpu *info;
    933};
    934
    935/*
    936 * Must ensure cgroup is pinned (css_get) before calling
    937 * this function. In other words, we cannot call this function
    938 * if there is no cgroup event for the current CPU context.
    939 */
    940static inline struct perf_cgroup *
    941perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
    942{
    943	return container_of(task_css_check(task, perf_event_cgrp_id,
    944					   ctx ? lockdep_is_held(&ctx->lock)
    945					       : true),
    946			    struct perf_cgroup, css);
    947}
    948#endif /* CONFIG_CGROUP_PERF */
    949
    950#ifdef CONFIG_PERF_EVENTS
    951
    952extern void *perf_aux_output_begin(struct perf_output_handle *handle,
    953				   struct perf_event *event);
    954extern void perf_aux_output_end(struct perf_output_handle *handle,
    955				unsigned long size);
    956extern int perf_aux_output_skip(struct perf_output_handle *handle,
    957				unsigned long size);
    958extern void *perf_get_aux(struct perf_output_handle *handle);
    959extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
    960extern void perf_event_itrace_started(struct perf_event *event);
    961
    962extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
    963extern void perf_pmu_unregister(struct pmu *pmu);
    964
    965extern void __perf_event_task_sched_in(struct task_struct *prev,
    966				       struct task_struct *task);
    967extern void __perf_event_task_sched_out(struct task_struct *prev,
    968					struct task_struct *next);
    969extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
    970extern void perf_event_exit_task(struct task_struct *child);
    971extern void perf_event_free_task(struct task_struct *task);
    972extern void perf_event_delayed_put(struct task_struct *task);
    973extern struct file *perf_event_get(unsigned int fd);
    974extern const struct perf_event *perf_get_event(struct file *file);
    975extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
    976extern void perf_event_print_debug(void);
    977extern void perf_pmu_disable(struct pmu *pmu);
    978extern void perf_pmu_enable(struct pmu *pmu);
    979extern void perf_sched_cb_dec(struct pmu *pmu);
    980extern void perf_sched_cb_inc(struct pmu *pmu);
    981extern int perf_event_task_disable(void);
    982extern int perf_event_task_enable(void);
    983
    984extern void perf_pmu_resched(struct pmu *pmu);
    985
    986extern int perf_event_refresh(struct perf_event *event, int refresh);
    987extern void perf_event_update_userpage(struct perf_event *event);
    988extern int perf_event_release_kernel(struct perf_event *event);
    989extern struct perf_event *
    990perf_event_create_kernel_counter(struct perf_event_attr *attr,
    991				int cpu,
    992				struct task_struct *task,
    993				perf_overflow_handler_t callback,
    994				void *context);
    995extern void perf_pmu_migrate_context(struct pmu *pmu,
    996				int src_cpu, int dst_cpu);
    997int perf_event_read_local(struct perf_event *event, u64 *value,
    998			  u64 *enabled, u64 *running);
    999extern u64 perf_event_read_value(struct perf_event *event,
   1000				 u64 *enabled, u64 *running);
   1001
   1002
   1003struct perf_sample_data {
   1004	/*
   1005	 * Fields set by perf_sample_data_init(), group so as to
   1006	 * minimize the cachelines touched.
   1007	 */
   1008	u64				addr;
   1009	struct perf_raw_record		*raw;
   1010	struct perf_branch_stack	*br_stack;
   1011	u64				period;
   1012	union perf_sample_weight	weight;
   1013	u64				txn;
   1014	union  perf_mem_data_src	data_src;
   1015
   1016	/*
   1017	 * The other fields, optionally {set,used} by
   1018	 * perf_{prepare,output}_sample().
   1019	 */
   1020	u64				type;
   1021	u64				ip;
   1022	struct {
   1023		u32	pid;
   1024		u32	tid;
   1025	}				tid_entry;
   1026	u64				time;
   1027	u64				id;
   1028	u64				stream_id;
   1029	struct {
   1030		u32	cpu;
   1031		u32	reserved;
   1032	}				cpu_entry;
   1033	struct perf_callchain_entry	*callchain;
   1034	u64				aux_size;
   1035
   1036	struct perf_regs		regs_user;
   1037	struct perf_regs		regs_intr;
   1038	u64				stack_user_size;
   1039
   1040	u64				phys_addr;
   1041	u64				cgroup;
   1042	u64				data_page_size;
   1043	u64				code_page_size;
   1044} ____cacheline_aligned;
   1045
   1046/* default value for data source */
   1047#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
   1048		    PERF_MEM_S(LVL, NA)   |\
   1049		    PERF_MEM_S(SNOOP, NA) |\
   1050		    PERF_MEM_S(LOCK, NA)  |\
   1051		    PERF_MEM_S(TLB, NA))
   1052
   1053static inline void perf_sample_data_init(struct perf_sample_data *data,
   1054					 u64 addr, u64 period)
   1055{
   1056	/* remaining struct members initialized in perf_prepare_sample() */
   1057	data->addr = addr;
   1058	data->raw  = NULL;
   1059	data->br_stack = NULL;
   1060	data->period = period;
   1061	data->weight.full = 0;
   1062	data->data_src.val = PERF_MEM_NA;
   1063	data->txn = 0;
   1064}
   1065
   1066/*
   1067 * Clear all bitfields in the perf_branch_entry.
   1068 * The to and from fields are not cleared because they are
   1069 * systematically modified by caller.
   1070 */
   1071static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
   1072{
   1073	br->mispred = 0;
   1074	br->predicted = 0;
   1075	br->in_tx = 0;
   1076	br->abort = 0;
   1077	br->cycles = 0;
   1078	br->type = 0;
   1079	br->reserved = 0;
   1080}
   1081
   1082extern void perf_output_sample(struct perf_output_handle *handle,
   1083			       struct perf_event_header *header,
   1084			       struct perf_sample_data *data,
   1085			       struct perf_event *event);
   1086extern void perf_prepare_sample(struct perf_event_header *header,
   1087				struct perf_sample_data *data,
   1088				struct perf_event *event,
   1089				struct pt_regs *regs);
   1090
   1091extern int perf_event_overflow(struct perf_event *event,
   1092				 struct perf_sample_data *data,
   1093				 struct pt_regs *regs);
   1094
   1095extern void perf_event_output_forward(struct perf_event *event,
   1096				     struct perf_sample_data *data,
   1097				     struct pt_regs *regs);
   1098extern void perf_event_output_backward(struct perf_event *event,
   1099				       struct perf_sample_data *data,
   1100				       struct pt_regs *regs);
   1101extern int perf_event_output(struct perf_event *event,
   1102			     struct perf_sample_data *data,
   1103			     struct pt_regs *regs);
   1104
   1105static inline bool
   1106is_default_overflow_handler(struct perf_event *event)
   1107{
   1108	if (likely(event->overflow_handler == perf_event_output_forward))
   1109		return true;
   1110	if (unlikely(event->overflow_handler == perf_event_output_backward))
   1111		return true;
   1112	return false;
   1113}
   1114
   1115extern void
   1116perf_event_header__init_id(struct perf_event_header *header,
   1117			   struct perf_sample_data *data,
   1118			   struct perf_event *event);
   1119extern void
   1120perf_event__output_id_sample(struct perf_event *event,
   1121			     struct perf_output_handle *handle,
   1122			     struct perf_sample_data *sample);
   1123
   1124extern void
   1125perf_log_lost_samples(struct perf_event *event, u64 lost);
   1126
   1127static inline bool event_has_any_exclude_flag(struct perf_event *event)
   1128{
   1129	struct perf_event_attr *attr = &event->attr;
   1130
   1131	return attr->exclude_idle || attr->exclude_user ||
   1132	       attr->exclude_kernel || attr->exclude_hv ||
   1133	       attr->exclude_guest || attr->exclude_host;
   1134}
   1135
   1136static inline bool is_sampling_event(struct perf_event *event)
   1137{
   1138	return event->attr.sample_period != 0;
   1139}
   1140
   1141/*
   1142 * Return 1 for a software event, 0 for a hardware event
   1143 */
   1144static inline int is_software_event(struct perf_event *event)
   1145{
   1146	return event->event_caps & PERF_EV_CAP_SOFTWARE;
   1147}
   1148
   1149/*
   1150 * Return 1 for event in sw context, 0 for event in hw context
   1151 */
   1152static inline int in_software_context(struct perf_event *event)
   1153{
   1154	return event->ctx->pmu->task_ctx_nr == perf_sw_context;
   1155}
   1156
   1157static inline int is_exclusive_pmu(struct pmu *pmu)
   1158{
   1159	return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
   1160}
   1161
   1162extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
   1163
   1164extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
   1165extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
   1166
   1167#ifndef perf_arch_fetch_caller_regs
   1168static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
   1169#endif
   1170
   1171/*
   1172 * When generating a perf sample in-line, instead of from an interrupt /
   1173 * exception, we lack a pt_regs. This is typically used from software events
   1174 * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
   1175 *
   1176 * We typically don't need a full set, but (for x86) do require:
   1177 * - ip for PERF_SAMPLE_IP
   1178 * - cs for user_mode() tests
   1179 * - sp for PERF_SAMPLE_CALLCHAIN
   1180 * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
   1181 *
   1182 * NOTE: assumes @regs is otherwise already 0 filled; this is important for
   1183 * things like PERF_SAMPLE_REGS_INTR.
   1184 */
   1185static inline void perf_fetch_caller_regs(struct pt_regs *regs)
   1186{
   1187	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
   1188}
   1189
   1190static __always_inline void
   1191perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
   1192{
   1193	if (static_key_false(&perf_swevent_enabled[event_id]))
   1194		__perf_sw_event(event_id, nr, regs, addr);
   1195}
   1196
   1197DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
   1198
   1199/*
   1200 * 'Special' version for the scheduler, it hard assumes no recursion,
   1201 * which is guaranteed by us not actually scheduling inside other swevents
   1202 * because those disable preemption.
   1203 */
   1204static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
   1205{
   1206	struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
   1207
   1208	perf_fetch_caller_regs(regs);
   1209	___perf_sw_event(event_id, nr, regs, addr);
   1210}
   1211
   1212extern struct static_key_false perf_sched_events;
   1213
   1214static __always_inline bool __perf_sw_enabled(int swevt)
   1215{
   1216	return static_key_false(&perf_swevent_enabled[swevt]);
   1217}
   1218
   1219static inline void perf_event_task_migrate(struct task_struct *task)
   1220{
   1221	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
   1222		task->sched_migrated = 1;
   1223}
   1224
   1225static inline void perf_event_task_sched_in(struct task_struct *prev,
   1226					    struct task_struct *task)
   1227{
   1228	if (static_branch_unlikely(&perf_sched_events))
   1229		__perf_event_task_sched_in(prev, task);
   1230
   1231	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
   1232	    task->sched_migrated) {
   1233		__perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
   1234		task->sched_migrated = 0;
   1235	}
   1236}
   1237
   1238static inline void perf_event_task_sched_out(struct task_struct *prev,
   1239					     struct task_struct *next)
   1240{
   1241	if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
   1242		__perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
   1243
   1244#ifdef CONFIG_CGROUP_PERF
   1245	if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
   1246	    perf_cgroup_from_task(prev, NULL) !=
   1247	    perf_cgroup_from_task(next, NULL))
   1248		__perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
   1249#endif
   1250
   1251	if (static_branch_unlikely(&perf_sched_events))
   1252		__perf_event_task_sched_out(prev, next);
   1253}
   1254
   1255extern void perf_event_mmap(struct vm_area_struct *vma);
   1256
   1257extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
   1258			       bool unregister, const char *sym);
   1259extern void perf_event_bpf_event(struct bpf_prog *prog,
   1260				 enum perf_bpf_event_type type,
   1261				 u16 flags);
   1262
   1263#ifdef CONFIG_GUEST_PERF_EVENTS
   1264extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
   1265
   1266DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
   1267DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
   1268DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
   1269
   1270static inline unsigned int perf_guest_state(void)
   1271{
   1272	return static_call(__perf_guest_state)();
   1273}
   1274static inline unsigned long perf_guest_get_ip(void)
   1275{
   1276	return static_call(__perf_guest_get_ip)();
   1277}
   1278static inline unsigned int perf_guest_handle_intel_pt_intr(void)
   1279{
   1280	return static_call(__perf_guest_handle_intel_pt_intr)();
   1281}
   1282extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
   1283extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
   1284#else
   1285static inline unsigned int perf_guest_state(void)		 { return 0; }
   1286static inline unsigned long perf_guest_get_ip(void)		 { return 0; }
   1287static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
   1288#endif /* CONFIG_GUEST_PERF_EVENTS */
   1289
   1290extern void perf_event_exec(void);
   1291extern void perf_event_comm(struct task_struct *tsk, bool exec);
   1292extern void perf_event_namespaces(struct task_struct *tsk);
   1293extern void perf_event_fork(struct task_struct *tsk);
   1294extern void perf_event_text_poke(const void *addr,
   1295				 const void *old_bytes, size_t old_len,
   1296				 const void *new_bytes, size_t new_len);
   1297
   1298/* Callchains */
   1299DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
   1300
   1301extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
   1302extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
   1303extern struct perf_callchain_entry *
   1304get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
   1305		   u32 max_stack, bool crosstask, bool add_mark);
   1306extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
   1307extern int get_callchain_buffers(int max_stack);
   1308extern void put_callchain_buffers(void);
   1309extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
   1310extern void put_callchain_entry(int rctx);
   1311
   1312extern int sysctl_perf_event_max_stack;
   1313extern int sysctl_perf_event_max_contexts_per_stack;
   1314
   1315static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
   1316{
   1317	if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
   1318		struct perf_callchain_entry *entry = ctx->entry;
   1319		entry->ip[entry->nr++] = ip;
   1320		++ctx->contexts;
   1321		return 0;
   1322	} else {
   1323		ctx->contexts_maxed = true;
   1324		return -1; /* no more room, stop walking the stack */
   1325	}
   1326}
   1327
   1328static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
   1329{
   1330	if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
   1331		struct perf_callchain_entry *entry = ctx->entry;
   1332		entry->ip[entry->nr++] = ip;
   1333		++ctx->nr;
   1334		return 0;
   1335	} else {
   1336		return -1; /* no more room, stop walking the stack */
   1337	}
   1338}
   1339
   1340extern int sysctl_perf_event_paranoid;
   1341extern int sysctl_perf_event_mlock;
   1342extern int sysctl_perf_event_sample_rate;
   1343extern int sysctl_perf_cpu_time_max_percent;
   1344
   1345extern void perf_sample_event_took(u64 sample_len_ns);
   1346
   1347int perf_proc_update_handler(struct ctl_table *table, int write,
   1348		void *buffer, size_t *lenp, loff_t *ppos);
   1349int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
   1350		void *buffer, size_t *lenp, loff_t *ppos);
   1351int perf_event_max_stack_handler(struct ctl_table *table, int write,
   1352		void *buffer, size_t *lenp, loff_t *ppos);
   1353
   1354/* Access to perf_event_open(2) syscall. */
   1355#define PERF_SECURITY_OPEN		0
   1356
   1357/* Finer grained perf_event_open(2) access control. */
   1358#define PERF_SECURITY_CPU		1
   1359#define PERF_SECURITY_KERNEL		2
   1360#define PERF_SECURITY_TRACEPOINT	3
   1361
   1362static inline int perf_is_paranoid(void)
   1363{
   1364	return sysctl_perf_event_paranoid > -1;
   1365}
   1366
   1367static inline int perf_allow_kernel(struct perf_event_attr *attr)
   1368{
   1369	if (sysctl_perf_event_paranoid > 1 && !perfmon_capable())
   1370		return -EACCES;
   1371
   1372	return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
   1373}
   1374
   1375static inline int perf_allow_cpu(struct perf_event_attr *attr)
   1376{
   1377	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
   1378		return -EACCES;
   1379
   1380	return security_perf_event_open(attr, PERF_SECURITY_CPU);
   1381}
   1382
   1383static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
   1384{
   1385	if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
   1386		return -EPERM;
   1387
   1388	return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
   1389}
   1390
   1391extern void perf_event_init(void);
   1392extern void perf_tp_event(u16 event_type, u64 count, void *record,
   1393			  int entry_size, struct pt_regs *regs,
   1394			  struct hlist_head *head, int rctx,
   1395			  struct task_struct *task);
   1396extern void perf_bp_event(struct perf_event *event, void *data);
   1397
   1398#ifndef perf_misc_flags
   1399# define perf_misc_flags(regs) \
   1400		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
   1401# define perf_instruction_pointer(regs)	instruction_pointer(regs)
   1402#endif
   1403#ifndef perf_arch_bpf_user_pt_regs
   1404# define perf_arch_bpf_user_pt_regs(regs) regs
   1405#endif
   1406
   1407static inline bool has_branch_stack(struct perf_event *event)
   1408{
   1409	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
   1410}
   1411
   1412static inline bool needs_branch_stack(struct perf_event *event)
   1413{
   1414	return event->attr.branch_sample_type != 0;
   1415}
   1416
   1417static inline bool has_aux(struct perf_event *event)
   1418{
   1419	return event->pmu->setup_aux;
   1420}
   1421
   1422static inline bool is_write_backward(struct perf_event *event)
   1423{
   1424	return !!event->attr.write_backward;
   1425}
   1426
   1427static inline bool has_addr_filter(struct perf_event *event)
   1428{
   1429	return event->pmu->nr_addr_filters;
   1430}
   1431
   1432/*
   1433 * An inherited event uses parent's filters
   1434 */
   1435static inline struct perf_addr_filters_head *
   1436perf_event_addr_filters(struct perf_event *event)
   1437{
   1438	struct perf_addr_filters_head *ifh = &event->addr_filters;
   1439
   1440	if (event->parent)
   1441		ifh = &event->parent->addr_filters;
   1442
   1443	return ifh;
   1444}
   1445
   1446extern void perf_event_addr_filters_sync(struct perf_event *event);
   1447extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
   1448
   1449extern int perf_output_begin(struct perf_output_handle *handle,
   1450			     struct perf_sample_data *data,
   1451			     struct perf_event *event, unsigned int size);
   1452extern int perf_output_begin_forward(struct perf_output_handle *handle,
   1453				     struct perf_sample_data *data,
   1454				     struct perf_event *event,
   1455				     unsigned int size);
   1456extern int perf_output_begin_backward(struct perf_output_handle *handle,
   1457				      struct perf_sample_data *data,
   1458				      struct perf_event *event,
   1459				      unsigned int size);
   1460
   1461extern void perf_output_end(struct perf_output_handle *handle);
   1462extern unsigned int perf_output_copy(struct perf_output_handle *handle,
   1463			     const void *buf, unsigned int len);
   1464extern unsigned int perf_output_skip(struct perf_output_handle *handle,
   1465				     unsigned int len);
   1466extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
   1467				 struct perf_output_handle *handle,
   1468				 unsigned long from, unsigned long to);
   1469extern int perf_swevent_get_recursion_context(void);
   1470extern void perf_swevent_put_recursion_context(int rctx);
   1471extern u64 perf_swevent_set_period(struct perf_event *event);
   1472extern void perf_event_enable(struct perf_event *event);
   1473extern void perf_event_disable(struct perf_event *event);
   1474extern void perf_event_disable_local(struct perf_event *event);
   1475extern void perf_event_disable_inatomic(struct perf_event *event);
   1476extern void perf_event_task_tick(void);
   1477extern int perf_event_account_interrupt(struct perf_event *event);
   1478extern int perf_event_period(struct perf_event *event, u64 value);
   1479extern u64 perf_event_pause(struct perf_event *event, bool reset);
   1480#else /* !CONFIG_PERF_EVENTS: */
   1481static inline void *
   1482perf_aux_output_begin(struct perf_output_handle *handle,
   1483		      struct perf_event *event)				{ return NULL; }
   1484static inline void
   1485perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
   1486									{ }
   1487static inline int
   1488perf_aux_output_skip(struct perf_output_handle *handle,
   1489		     unsigned long size)				{ return -EINVAL; }
   1490static inline void *
   1491perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
   1492static inline void
   1493perf_event_task_migrate(struct task_struct *task)			{ }
   1494static inline void
   1495perf_event_task_sched_in(struct task_struct *prev,
   1496			 struct task_struct *task)			{ }
   1497static inline void
   1498perf_event_task_sched_out(struct task_struct *prev,
   1499			  struct task_struct *next)			{ }
   1500static inline int perf_event_init_task(struct task_struct *child,
   1501				       u64 clone_flags)			{ return 0; }
   1502static inline void perf_event_exit_task(struct task_struct *child)	{ }
   1503static inline void perf_event_free_task(struct task_struct *task)	{ }
   1504static inline void perf_event_delayed_put(struct task_struct *task)	{ }
   1505static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
   1506static inline const struct perf_event *perf_get_event(struct file *file)
   1507{
   1508	return ERR_PTR(-EINVAL);
   1509}
   1510static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
   1511{
   1512	return ERR_PTR(-EINVAL);
   1513}
   1514static inline int perf_event_read_local(struct perf_event *event, u64 *value,
   1515					u64 *enabled, u64 *running)
   1516{
   1517	return -EINVAL;
   1518}
   1519static inline void perf_event_print_debug(void)				{ }
   1520static inline int perf_event_task_disable(void)				{ return -EINVAL; }
   1521static inline int perf_event_task_enable(void)				{ return -EINVAL; }
   1522static inline int perf_event_refresh(struct perf_event *event, int refresh)
   1523{
   1524	return -EINVAL;
   1525}
   1526
   1527static inline void
   1528perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
   1529static inline void
   1530perf_bp_event(struct perf_event *event, void *data)			{ }
   1531
   1532static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
   1533
   1534typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
   1535static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
   1536				      bool unregister, const char *sym)	{ }
   1537static inline void perf_event_bpf_event(struct bpf_prog *prog,
   1538					enum perf_bpf_event_type type,
   1539					u16 flags)			{ }
   1540static inline void perf_event_exec(void)				{ }
   1541static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
   1542static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
   1543static inline void perf_event_fork(struct task_struct *tsk)		{ }
   1544static inline void perf_event_text_poke(const void *addr,
   1545					const void *old_bytes,
   1546					size_t old_len,
   1547					const void *new_bytes,
   1548					size_t new_len)			{ }
   1549static inline void perf_event_init(void)				{ }
   1550static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
   1551static inline void perf_swevent_put_recursion_context(int rctx)		{ }
   1552static inline u64 perf_swevent_set_period(struct perf_event *event)	{ return 0; }
   1553static inline void perf_event_enable(struct perf_event *event)		{ }
   1554static inline void perf_event_disable(struct perf_event *event)		{ }
   1555static inline int __perf_event_disable(void *info)			{ return -1; }
   1556static inline void perf_event_task_tick(void)				{ }
   1557static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
   1558static inline int perf_event_period(struct perf_event *event, u64 value)
   1559{
   1560	return -EINVAL;
   1561}
   1562static inline u64 perf_event_pause(struct perf_event *event, bool reset)
   1563{
   1564	return 0;
   1565}
   1566#endif
   1567
   1568#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
   1569extern void perf_restore_debug_store(void);
   1570#else
   1571static inline void perf_restore_debug_store(void)			{ }
   1572#endif
   1573
   1574static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
   1575{
   1576	return frag->pad < sizeof(u64);
   1577}
   1578
   1579#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
   1580
   1581struct perf_pmu_events_attr {
   1582	struct device_attribute attr;
   1583	u64 id;
   1584	const char *event_str;
   1585};
   1586
   1587struct perf_pmu_events_ht_attr {
   1588	struct device_attribute			attr;
   1589	u64					id;
   1590	const char				*event_str_ht;
   1591	const char				*event_str_noht;
   1592};
   1593
   1594struct perf_pmu_events_hybrid_attr {
   1595	struct device_attribute			attr;
   1596	u64					id;
   1597	const char				*event_str;
   1598	u64					pmu_type;
   1599};
   1600
   1601struct perf_pmu_format_hybrid_attr {
   1602	struct device_attribute			attr;
   1603	u64					pmu_type;
   1604};
   1605
   1606ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
   1607			      char *page);
   1608
   1609#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
   1610static struct perf_pmu_events_attr _var = {				\
   1611	.attr = __ATTR(_name, 0444, _show, NULL),			\
   1612	.id   =  _id,							\
   1613};
   1614
   1615#define PMU_EVENT_ATTR_STRING(_name, _var, _str)			    \
   1616static struct perf_pmu_events_attr _var = {				    \
   1617	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
   1618	.id		= 0,						    \
   1619	.event_str	= _str,						    \
   1620};
   1621
   1622#define PMU_EVENT_ATTR_ID(_name, _show, _id)				\
   1623	(&((struct perf_pmu_events_attr[]) {				\
   1624		{ .attr = __ATTR(_name, 0444, _show, NULL),		\
   1625		  .id = _id, }						\
   1626	})[0].attr.attr)
   1627
   1628#define PMU_FORMAT_ATTR(_name, _format)					\
   1629static ssize_t								\
   1630_name##_show(struct device *dev,					\
   1631			       struct device_attribute *attr,		\
   1632			       char *page)				\
   1633{									\
   1634	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
   1635	return sprintf(page, _format "\n");				\
   1636}									\
   1637									\
   1638static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
   1639
   1640/* Performance counter hotplug functions */
   1641#ifdef CONFIG_PERF_EVENTS
   1642int perf_event_init_cpu(unsigned int cpu);
   1643int perf_event_exit_cpu(unsigned int cpu);
   1644#else
   1645#define perf_event_init_cpu	NULL
   1646#define perf_event_exit_cpu	NULL
   1647#endif
   1648
   1649extern void __weak arch_perf_update_userpage(struct perf_event *event,
   1650					     struct perf_event_mmap_page *userpg,
   1651					     u64 now);
   1652
   1653#ifdef CONFIG_MMU
   1654extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
   1655#endif
   1656
   1657/*
   1658 * Snapshot branch stack on software events.
   1659 *
   1660 * Branch stack can be very useful in understanding software events. For
   1661 * example, when a long function, e.g. sys_perf_event_open, returns an
   1662 * errno, it is not obvious why the function failed. Branch stack could
   1663 * provide very helpful information in this type of scenarios.
   1664 *
   1665 * On software event, it is necessary to stop the hardware branch recorder
   1666 * fast. Otherwise, the hardware register/buffer will be flushed with
   1667 * entries of the triggering event. Therefore, static call is used to
   1668 * stop the hardware recorder.
   1669 */
   1670
   1671/*
   1672 * cnt is the number of entries allocated for entries.
   1673 * Return number of entries copied to .
   1674 */
   1675typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
   1676					   unsigned int cnt);
   1677DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
   1678
   1679#ifndef PERF_NEEDS_LOPWR_CB
   1680static inline void perf_lopwr_cb(bool mode)
   1681{
   1682}
   1683#endif
   1684
   1685#endif /* _LINUX_PERF_EVENT_H */