cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

i915_pmu.c (30125B)


      1/*
      2 * SPDX-License-Identifier: MIT
      3 *
      4 * Copyright © 2017-2018 Intel Corporation
      5 */
      6
      7#include <linux/pm_runtime.h>
      8
      9#include "gt/intel_engine.h"
     10#include "gt/intel_engine_pm.h"
     11#include "gt/intel_engine_regs.h"
     12#include "gt/intel_engine_user.h"
     13#include "gt/intel_gt_pm.h"
     14#include "gt/intel_gt_regs.h"
     15#include "gt/intel_rc6.h"
     16#include "gt/intel_rps.h"
     17
     18#include "i915_drv.h"
     19#include "i915_pmu.h"
     20#include "intel_pm.h"
     21
     22/* Frequency for the sampling timer for events which need it. */
     23#define FREQUENCY 200
     24#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
     25
     26#define ENGINE_SAMPLE_MASK \
     27	(BIT(I915_SAMPLE_BUSY) | \
     28	 BIT(I915_SAMPLE_WAIT) | \
     29	 BIT(I915_SAMPLE_SEMA))
     30
     31static cpumask_t i915_pmu_cpumask;
     32static unsigned int i915_pmu_target_cpu = -1;
     33
     34static u8 engine_config_sample(u64 config)
     35{
     36	return config & I915_PMU_SAMPLE_MASK;
     37}
     38
     39static u8 engine_event_sample(struct perf_event *event)
     40{
     41	return engine_config_sample(event->attr.config);
     42}
     43
     44static u8 engine_event_class(struct perf_event *event)
     45{
     46	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
     47}
     48
     49static u8 engine_event_instance(struct perf_event *event)
     50{
     51	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
     52}
     53
     54static bool is_engine_config(u64 config)
     55{
     56	return config < __I915_PMU_OTHER(0);
     57}
     58
     59static unsigned int other_bit(const u64 config)
     60{
     61	unsigned int val;
     62
     63	switch (config) {
     64	case I915_PMU_ACTUAL_FREQUENCY:
     65		val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
     66		break;
     67	case I915_PMU_REQUESTED_FREQUENCY:
     68		val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
     69		break;
     70	case I915_PMU_RC6_RESIDENCY:
     71		val = __I915_PMU_RC6_RESIDENCY_ENABLED;
     72		break;
     73	default:
     74		/*
     75		 * Events that do not require sampling, or tracking state
     76		 * transitions between enabled and disabled can be ignored.
     77		 */
     78		return -1;
     79	}
     80
     81	return I915_ENGINE_SAMPLE_COUNT + val;
     82}
     83
     84static unsigned int config_bit(const u64 config)
     85{
     86	if (is_engine_config(config))
     87		return engine_config_sample(config);
     88	else
     89		return other_bit(config);
     90}
     91
     92static u64 config_mask(u64 config)
     93{
     94	return BIT_ULL(config_bit(config));
     95}
     96
     97static bool is_engine_event(struct perf_event *event)
     98{
     99	return is_engine_config(event->attr.config);
    100}
    101
    102static unsigned int event_bit(struct perf_event *event)
    103{
    104	return config_bit(event->attr.config);
    105}
    106
    107static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
    108{
    109	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
    110	u32 enable;
    111
    112	/*
    113	 * Only some counters need the sampling timer.
    114	 *
    115	 * We start with a bitmask of all currently enabled events.
    116	 */
    117	enable = pmu->enable;
    118
    119	/*
    120	 * Mask out all the ones which do not need the timer, or in
    121	 * other words keep all the ones that could need the timer.
    122	 */
    123	enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
    124		  config_mask(I915_PMU_REQUESTED_FREQUENCY) |
    125		  ENGINE_SAMPLE_MASK;
    126
    127	/*
    128	 * When the GPU is idle per-engine counters do not need to be
    129	 * running so clear those bits out.
    130	 */
    131	if (!gpu_active)
    132		enable &= ~ENGINE_SAMPLE_MASK;
    133	/*
    134	 * Also there is software busyness tracking available we do not
    135	 * need the timer for I915_SAMPLE_BUSY counter.
    136	 */
    137	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
    138		enable &= ~BIT(I915_SAMPLE_BUSY);
    139
    140	/*
    141	 * If some bits remain it means we need the sampling timer running.
    142	 */
    143	return enable;
    144}
    145
    146static u64 __get_rc6(struct intel_gt *gt)
    147{
    148	struct drm_i915_private *i915 = gt->i915;
    149	u64 val;
    150
    151	val = intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6);
    152
    153	if (HAS_RC6p(i915))
    154		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
    155
    156	if (HAS_RC6pp(i915))
    157		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
    158
    159	return val;
    160}
    161
    162static inline s64 ktime_since_raw(const ktime_t kt)
    163{
    164	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
    165}
    166
    167static u64 get_rc6(struct intel_gt *gt)
    168{
    169	struct drm_i915_private *i915 = gt->i915;
    170	struct i915_pmu *pmu = &i915->pmu;
    171	unsigned long flags;
    172	bool awake = false;
    173	u64 val;
    174
    175	if (intel_gt_pm_get_if_awake(gt)) {
    176		val = __get_rc6(gt);
    177		intel_gt_pm_put_async(gt);
    178		awake = true;
    179	}
    180
    181	spin_lock_irqsave(&pmu->lock, flags);
    182
    183	if (awake) {
    184		pmu->sample[__I915_SAMPLE_RC6].cur = val;
    185	} else {
    186		/*
    187		 * We think we are runtime suspended.
    188		 *
    189		 * Report the delta from when the device was suspended to now,
    190		 * on top of the last known real value, as the approximated RC6
    191		 * counter value.
    192		 */
    193		val = ktime_since_raw(pmu->sleep_last);
    194		val += pmu->sample[__I915_SAMPLE_RC6].cur;
    195	}
    196
    197	if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
    198		val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
    199	else
    200		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
    201
    202	spin_unlock_irqrestore(&pmu->lock, flags);
    203
    204	return val;
    205}
    206
    207static void init_rc6(struct i915_pmu *pmu)
    208{
    209	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
    210	intel_wakeref_t wakeref;
    211
    212	with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
    213		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
    214		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
    215					pmu->sample[__I915_SAMPLE_RC6].cur;
    216		pmu->sleep_last = ktime_get_raw();
    217	}
    218}
    219
    220static void park_rc6(struct drm_i915_private *i915)
    221{
    222	struct i915_pmu *pmu = &i915->pmu;
    223
    224	pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
    225	pmu->sleep_last = ktime_get_raw();
    226}
    227
    228static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
    229{
    230	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
    231		pmu->timer_enabled = true;
    232		pmu->timer_last = ktime_get();
    233		hrtimer_start_range_ns(&pmu->timer,
    234				       ns_to_ktime(PERIOD), 0,
    235				       HRTIMER_MODE_REL_PINNED);
    236	}
    237}
    238
    239void i915_pmu_gt_parked(struct drm_i915_private *i915)
    240{
    241	struct i915_pmu *pmu = &i915->pmu;
    242
    243	if (!pmu->base.event_init)
    244		return;
    245
    246	spin_lock_irq(&pmu->lock);
    247
    248	park_rc6(i915);
    249
    250	/*
    251	 * Signal sampling timer to stop if only engine events are enabled and
    252	 * GPU went idle.
    253	 */
    254	pmu->timer_enabled = pmu_needs_timer(pmu, false);
    255
    256	spin_unlock_irq(&pmu->lock);
    257}
    258
    259void i915_pmu_gt_unparked(struct drm_i915_private *i915)
    260{
    261	struct i915_pmu *pmu = &i915->pmu;
    262
    263	if (!pmu->base.event_init)
    264		return;
    265
    266	spin_lock_irq(&pmu->lock);
    267
    268	/*
    269	 * Re-enable sampling timer when GPU goes active.
    270	 */
    271	__i915_pmu_maybe_start_timer(pmu);
    272
    273	spin_unlock_irq(&pmu->lock);
    274}
    275
    276static void
    277add_sample(struct i915_pmu_sample *sample, u32 val)
    278{
    279	sample->cur += val;
    280}
    281
    282static bool exclusive_mmio_access(const struct drm_i915_private *i915)
    283{
    284	/*
    285	 * We have to avoid concurrent mmio cache line access on gen7 or
    286	 * risk a machine hang. For a fun history lesson dig out the old
    287	 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
    288	 */
    289	return GRAPHICS_VER(i915) == 7;
    290}
    291
    292static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
    293{
    294	struct intel_engine_pmu *pmu = &engine->pmu;
    295	bool busy;
    296	u32 val;
    297
    298	val = ENGINE_READ_FW(engine, RING_CTL);
    299	if (val == 0) /* powerwell off => engine idle */
    300		return;
    301
    302	if (val & RING_WAIT)
    303		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
    304	if (val & RING_WAIT_SEMAPHORE)
    305		add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
    306
    307	/* No need to sample when busy stats are supported. */
    308	if (intel_engine_supports_stats(engine))
    309		return;
    310
    311	/*
    312	 * While waiting on a semaphore or event, MI_MODE reports the
    313	 * ring as idle. However, previously using the seqno, and with
    314	 * execlists sampling, we account for the ring waiting as the
    315	 * engine being busy. Therefore, we record the sample as being
    316	 * busy if either waiting or !idle.
    317	 */
    318	busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
    319	if (!busy) {
    320		val = ENGINE_READ_FW(engine, RING_MI_MODE);
    321		busy = !(val & MODE_IDLE);
    322	}
    323	if (busy)
    324		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
    325}
    326
    327static void
    328engines_sample(struct intel_gt *gt, unsigned int period_ns)
    329{
    330	struct drm_i915_private *i915 = gt->i915;
    331	struct intel_engine_cs *engine;
    332	enum intel_engine_id id;
    333	unsigned long flags;
    334
    335	if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
    336		return;
    337
    338	if (!intel_gt_pm_is_awake(gt))
    339		return;
    340
    341	for_each_engine(engine, gt, id) {
    342		if (!intel_engine_pm_get_if_awake(engine))
    343			continue;
    344
    345		if (exclusive_mmio_access(i915)) {
    346			spin_lock_irqsave(&engine->uncore->lock, flags);
    347			engine_sample(engine, period_ns);
    348			spin_unlock_irqrestore(&engine->uncore->lock, flags);
    349		} else {
    350			engine_sample(engine, period_ns);
    351		}
    352
    353		intel_engine_pm_put_async(engine);
    354	}
    355}
    356
    357static void
    358add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
    359{
    360	sample->cur += mul_u32_u32(val, mul);
    361}
    362
    363static bool frequency_sampling_enabled(struct i915_pmu *pmu)
    364{
    365	return pmu->enable &
    366	       (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
    367		config_mask(I915_PMU_REQUESTED_FREQUENCY));
    368}
    369
    370static void
    371frequency_sample(struct intel_gt *gt, unsigned int period_ns)
    372{
    373	struct drm_i915_private *i915 = gt->i915;
    374	struct intel_uncore *uncore = gt->uncore;
    375	struct i915_pmu *pmu = &i915->pmu;
    376	struct intel_rps *rps = &gt->rps;
    377
    378	if (!frequency_sampling_enabled(pmu))
    379		return;
    380
    381	/* Report 0/0 (actual/requested) frequency while parked. */
    382	if (!intel_gt_pm_get_if_awake(gt))
    383		return;
    384
    385	if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
    386		u32 val;
    387
    388		/*
    389		 * We take a quick peek here without using forcewake
    390		 * so that we don't perturb the system under observation
    391		 * (forcewake => !rc6 => increased power use). We expect
    392		 * that if the read fails because it is outside of the
    393		 * mmio power well, then it will return 0 -- in which
    394		 * case we assume the system is running at the intended
    395		 * frequency. Fortunately, the read should rarely fail!
    396		 */
    397		val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
    398		if (val)
    399			val = intel_rps_get_cagf(rps, val);
    400		else
    401			val = rps->cur_freq;
    402
    403		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
    404				intel_gpu_freq(rps, val), period_ns / 1000);
    405	}
    406
    407	if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
    408		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
    409				intel_rps_get_requested_frequency(rps),
    410				period_ns / 1000);
    411	}
    412
    413	intel_gt_pm_put_async(gt);
    414}
    415
    416static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
    417{
    418	struct drm_i915_private *i915 =
    419		container_of(hrtimer, struct drm_i915_private, pmu.timer);
    420	struct i915_pmu *pmu = &i915->pmu;
    421	struct intel_gt *gt = to_gt(i915);
    422	unsigned int period_ns;
    423	ktime_t now;
    424
    425	if (!READ_ONCE(pmu->timer_enabled))
    426		return HRTIMER_NORESTART;
    427
    428	now = ktime_get();
    429	period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
    430	pmu->timer_last = now;
    431
    432	/*
    433	 * Strictly speaking the passed in period may not be 100% accurate for
    434	 * all internal calculation, since some amount of time can be spent on
    435	 * grabbing the forcewake. However the potential error from timer call-
    436	 * back delay greatly dominates this so we keep it simple.
    437	 */
    438	engines_sample(gt, period_ns);
    439	frequency_sample(gt, period_ns);
    440
    441	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
    442
    443	return HRTIMER_RESTART;
    444}
    445
    446static void i915_pmu_event_destroy(struct perf_event *event)
    447{
    448	struct drm_i915_private *i915 =
    449		container_of(event->pmu, typeof(*i915), pmu.base);
    450
    451	drm_WARN_ON(&i915->drm, event->parent);
    452
    453	drm_dev_put(&i915->drm);
    454}
    455
    456static int
    457engine_event_status(struct intel_engine_cs *engine,
    458		    enum drm_i915_pmu_engine_sample sample)
    459{
    460	switch (sample) {
    461	case I915_SAMPLE_BUSY:
    462	case I915_SAMPLE_WAIT:
    463		break;
    464	case I915_SAMPLE_SEMA:
    465		if (GRAPHICS_VER(engine->i915) < 6)
    466			return -ENODEV;
    467		break;
    468	default:
    469		return -ENOENT;
    470	}
    471
    472	return 0;
    473}
    474
    475static int
    476config_status(struct drm_i915_private *i915, u64 config)
    477{
    478	struct intel_gt *gt = to_gt(i915);
    479
    480	switch (config) {
    481	case I915_PMU_ACTUAL_FREQUENCY:
    482		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
    483			/* Requires a mutex for sampling! */
    484			return -ENODEV;
    485		fallthrough;
    486	case I915_PMU_REQUESTED_FREQUENCY:
    487		if (GRAPHICS_VER(i915) < 6)
    488			return -ENODEV;
    489		break;
    490	case I915_PMU_INTERRUPTS:
    491		break;
    492	case I915_PMU_RC6_RESIDENCY:
    493		if (!gt->rc6.supported)
    494			return -ENODEV;
    495		break;
    496	case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
    497		break;
    498	default:
    499		return -ENOENT;
    500	}
    501
    502	return 0;
    503}
    504
    505static int engine_event_init(struct perf_event *event)
    506{
    507	struct drm_i915_private *i915 =
    508		container_of(event->pmu, typeof(*i915), pmu.base);
    509	struct intel_engine_cs *engine;
    510
    511	engine = intel_engine_lookup_user(i915, engine_event_class(event),
    512					  engine_event_instance(event));
    513	if (!engine)
    514		return -ENODEV;
    515
    516	return engine_event_status(engine, engine_event_sample(event));
    517}
    518
    519static int i915_pmu_event_init(struct perf_event *event)
    520{
    521	struct drm_i915_private *i915 =
    522		container_of(event->pmu, typeof(*i915), pmu.base);
    523	struct i915_pmu *pmu = &i915->pmu;
    524	int ret;
    525
    526	if (pmu->closed)
    527		return -ENODEV;
    528
    529	if (event->attr.type != event->pmu->type)
    530		return -ENOENT;
    531
    532	/* unsupported modes and filters */
    533	if (event->attr.sample_period) /* no sampling */
    534		return -EINVAL;
    535
    536	if (has_branch_stack(event))
    537		return -EOPNOTSUPP;
    538
    539	if (event->cpu < 0)
    540		return -EINVAL;
    541
    542	/* only allow running on one cpu at a time */
    543	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
    544		return -EINVAL;
    545
    546	if (is_engine_event(event))
    547		ret = engine_event_init(event);
    548	else
    549		ret = config_status(i915, event->attr.config);
    550	if (ret)
    551		return ret;
    552
    553	if (!event->parent) {
    554		drm_dev_get(&i915->drm);
    555		event->destroy = i915_pmu_event_destroy;
    556	}
    557
    558	return 0;
    559}
    560
    561static u64 __i915_pmu_event_read(struct perf_event *event)
    562{
    563	struct drm_i915_private *i915 =
    564		container_of(event->pmu, typeof(*i915), pmu.base);
    565	struct i915_pmu *pmu = &i915->pmu;
    566	u64 val = 0;
    567
    568	if (is_engine_event(event)) {
    569		u8 sample = engine_event_sample(event);
    570		struct intel_engine_cs *engine;
    571
    572		engine = intel_engine_lookup_user(i915,
    573						  engine_event_class(event),
    574						  engine_event_instance(event));
    575
    576		if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
    577			/* Do nothing */
    578		} else if (sample == I915_SAMPLE_BUSY &&
    579			   intel_engine_supports_stats(engine)) {
    580			ktime_t unused;
    581
    582			val = ktime_to_ns(intel_engine_get_busy_time(engine,
    583								     &unused));
    584		} else {
    585			val = engine->pmu.sample[sample].cur;
    586		}
    587	} else {
    588		switch (event->attr.config) {
    589		case I915_PMU_ACTUAL_FREQUENCY:
    590			val =
    591			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
    592				   USEC_PER_SEC /* to MHz */);
    593			break;
    594		case I915_PMU_REQUESTED_FREQUENCY:
    595			val =
    596			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
    597				   USEC_PER_SEC /* to MHz */);
    598			break;
    599		case I915_PMU_INTERRUPTS:
    600			val = READ_ONCE(pmu->irq_count);
    601			break;
    602		case I915_PMU_RC6_RESIDENCY:
    603			val = get_rc6(to_gt(i915));
    604			break;
    605		case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
    606			val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
    607			break;
    608		}
    609	}
    610
    611	return val;
    612}
    613
    614static void i915_pmu_event_read(struct perf_event *event)
    615{
    616	struct drm_i915_private *i915 =
    617		container_of(event->pmu, typeof(*i915), pmu.base);
    618	struct hw_perf_event *hwc = &event->hw;
    619	struct i915_pmu *pmu = &i915->pmu;
    620	u64 prev, new;
    621
    622	if (pmu->closed) {
    623		event->hw.state = PERF_HES_STOPPED;
    624		return;
    625	}
    626again:
    627	prev = local64_read(&hwc->prev_count);
    628	new = __i915_pmu_event_read(event);
    629
    630	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
    631		goto again;
    632
    633	local64_add(new - prev, &event->count);
    634}
    635
    636static void i915_pmu_enable(struct perf_event *event)
    637{
    638	struct drm_i915_private *i915 =
    639		container_of(event->pmu, typeof(*i915), pmu.base);
    640	struct i915_pmu *pmu = &i915->pmu;
    641	unsigned long flags;
    642	unsigned int bit;
    643
    644	bit = event_bit(event);
    645	if (bit == -1)
    646		goto update;
    647
    648	spin_lock_irqsave(&pmu->lock, flags);
    649
    650	/*
    651	 * Update the bitmask of enabled events and increment
    652	 * the event reference counter.
    653	 */
    654	BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
    655	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
    656	GEM_BUG_ON(pmu->enable_count[bit] == ~0);
    657
    658	pmu->enable |= BIT_ULL(bit);
    659	pmu->enable_count[bit]++;
    660
    661	/*
    662	 * Start the sampling timer if needed and not already enabled.
    663	 */
    664	__i915_pmu_maybe_start_timer(pmu);
    665
    666	/*
    667	 * For per-engine events the bitmask and reference counting
    668	 * is stored per engine.
    669	 */
    670	if (is_engine_event(event)) {
    671		u8 sample = engine_event_sample(event);
    672		struct intel_engine_cs *engine;
    673
    674		engine = intel_engine_lookup_user(i915,
    675						  engine_event_class(event),
    676						  engine_event_instance(event));
    677
    678		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
    679			     I915_ENGINE_SAMPLE_COUNT);
    680		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
    681			     I915_ENGINE_SAMPLE_COUNT);
    682		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
    683		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
    684		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
    685
    686		engine->pmu.enable |= BIT(sample);
    687		engine->pmu.enable_count[sample]++;
    688	}
    689
    690	spin_unlock_irqrestore(&pmu->lock, flags);
    691
    692update:
    693	/*
    694	 * Store the current counter value so we can report the correct delta
    695	 * for all listeners. Even when the event was already enabled and has
    696	 * an existing non-zero value.
    697	 */
    698	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
    699}
    700
    701static void i915_pmu_disable(struct perf_event *event)
    702{
    703	struct drm_i915_private *i915 =
    704		container_of(event->pmu, typeof(*i915), pmu.base);
    705	unsigned int bit = event_bit(event);
    706	struct i915_pmu *pmu = &i915->pmu;
    707	unsigned long flags;
    708
    709	if (bit == -1)
    710		return;
    711
    712	spin_lock_irqsave(&pmu->lock, flags);
    713
    714	if (is_engine_event(event)) {
    715		u8 sample = engine_event_sample(event);
    716		struct intel_engine_cs *engine;
    717
    718		engine = intel_engine_lookup_user(i915,
    719						  engine_event_class(event),
    720						  engine_event_instance(event));
    721
    722		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
    723		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
    724		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
    725
    726		/*
    727		 * Decrement the reference count and clear the enabled
    728		 * bitmask when the last listener on an event goes away.
    729		 */
    730		if (--engine->pmu.enable_count[sample] == 0)
    731			engine->pmu.enable &= ~BIT(sample);
    732	}
    733
    734	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
    735	GEM_BUG_ON(pmu->enable_count[bit] == 0);
    736	/*
    737	 * Decrement the reference count and clear the enabled
    738	 * bitmask when the last listener on an event goes away.
    739	 */
    740	if (--pmu->enable_count[bit] == 0) {
    741		pmu->enable &= ~BIT_ULL(bit);
    742		pmu->timer_enabled &= pmu_needs_timer(pmu, true);
    743	}
    744
    745	spin_unlock_irqrestore(&pmu->lock, flags);
    746}
    747
    748static void i915_pmu_event_start(struct perf_event *event, int flags)
    749{
    750	struct drm_i915_private *i915 =
    751		container_of(event->pmu, typeof(*i915), pmu.base);
    752	struct i915_pmu *pmu = &i915->pmu;
    753
    754	if (pmu->closed)
    755		return;
    756
    757	i915_pmu_enable(event);
    758	event->hw.state = 0;
    759}
    760
    761static void i915_pmu_event_stop(struct perf_event *event, int flags)
    762{
    763	if (flags & PERF_EF_UPDATE)
    764		i915_pmu_event_read(event);
    765	i915_pmu_disable(event);
    766	event->hw.state = PERF_HES_STOPPED;
    767}
    768
    769static int i915_pmu_event_add(struct perf_event *event, int flags)
    770{
    771	struct drm_i915_private *i915 =
    772		container_of(event->pmu, typeof(*i915), pmu.base);
    773	struct i915_pmu *pmu = &i915->pmu;
    774
    775	if (pmu->closed)
    776		return -ENODEV;
    777
    778	if (flags & PERF_EF_START)
    779		i915_pmu_event_start(event, flags);
    780
    781	return 0;
    782}
    783
    784static void i915_pmu_event_del(struct perf_event *event, int flags)
    785{
    786	i915_pmu_event_stop(event, PERF_EF_UPDATE);
    787}
    788
    789static int i915_pmu_event_event_idx(struct perf_event *event)
    790{
    791	return 0;
    792}
    793
    794struct i915_str_attribute {
    795	struct device_attribute attr;
    796	const char *str;
    797};
    798
    799static ssize_t i915_pmu_format_show(struct device *dev,
    800				    struct device_attribute *attr, char *buf)
    801{
    802	struct i915_str_attribute *eattr;
    803
    804	eattr = container_of(attr, struct i915_str_attribute, attr);
    805	return sprintf(buf, "%s\n", eattr->str);
    806}
    807
    808#define I915_PMU_FORMAT_ATTR(_name, _config) \
    809	(&((struct i915_str_attribute[]) { \
    810		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
    811		  .str = _config, } \
    812	})[0].attr.attr)
    813
    814static struct attribute *i915_pmu_format_attrs[] = {
    815	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
    816	NULL,
    817};
    818
    819static const struct attribute_group i915_pmu_format_attr_group = {
    820	.name = "format",
    821	.attrs = i915_pmu_format_attrs,
    822};
    823
    824struct i915_ext_attribute {
    825	struct device_attribute attr;
    826	unsigned long val;
    827};
    828
    829static ssize_t i915_pmu_event_show(struct device *dev,
    830				   struct device_attribute *attr, char *buf)
    831{
    832	struct i915_ext_attribute *eattr;
    833
    834	eattr = container_of(attr, struct i915_ext_attribute, attr);
    835	return sprintf(buf, "config=0x%lx\n", eattr->val);
    836}
    837
    838static ssize_t cpumask_show(struct device *dev,
    839			    struct device_attribute *attr, char *buf)
    840{
    841	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
    842}
    843
    844static DEVICE_ATTR_RO(cpumask);
    845
    846static struct attribute *i915_cpumask_attrs[] = {
    847	&dev_attr_cpumask.attr,
    848	NULL,
    849};
    850
    851static const struct attribute_group i915_pmu_cpumask_attr_group = {
    852	.attrs = i915_cpumask_attrs,
    853};
    854
    855#define __event(__config, __name, __unit) \
    856{ \
    857	.config = (__config), \
    858	.name = (__name), \
    859	.unit = (__unit), \
    860}
    861
    862#define __engine_event(__sample, __name) \
    863{ \
    864	.sample = (__sample), \
    865	.name = (__name), \
    866}
    867
    868static struct i915_ext_attribute *
    869add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
    870{
    871	sysfs_attr_init(&attr->attr.attr);
    872	attr->attr.attr.name = name;
    873	attr->attr.attr.mode = 0444;
    874	attr->attr.show = i915_pmu_event_show;
    875	attr->val = config;
    876
    877	return ++attr;
    878}
    879
    880static struct perf_pmu_events_attr *
    881add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
    882	     const char *str)
    883{
    884	sysfs_attr_init(&attr->attr.attr);
    885	attr->attr.attr.name = name;
    886	attr->attr.attr.mode = 0444;
    887	attr->attr.show = perf_event_sysfs_show;
    888	attr->event_str = str;
    889
    890	return ++attr;
    891}
    892
    893static struct attribute **
    894create_event_attributes(struct i915_pmu *pmu)
    895{
    896	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
    897	static const struct {
    898		u64 config;
    899		const char *name;
    900		const char *unit;
    901	} events[] = {
    902		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
    903		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
    904		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
    905		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
    906		__event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"),
    907	};
    908	static const struct {
    909		enum drm_i915_pmu_engine_sample sample;
    910		char *name;
    911	} engine_events[] = {
    912		__engine_event(I915_SAMPLE_BUSY, "busy"),
    913		__engine_event(I915_SAMPLE_SEMA, "sema"),
    914		__engine_event(I915_SAMPLE_WAIT, "wait"),
    915	};
    916	unsigned int count = 0;
    917	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
    918	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
    919	struct attribute **attr = NULL, **attr_iter;
    920	struct intel_engine_cs *engine;
    921	unsigned int i;
    922
    923	/* Count how many counters we will be exposing. */
    924	for (i = 0; i < ARRAY_SIZE(events); i++) {
    925		if (!config_status(i915, events[i].config))
    926			count++;
    927	}
    928
    929	for_each_uabi_engine(engine, i915) {
    930		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
    931			if (!engine_event_status(engine,
    932						 engine_events[i].sample))
    933				count++;
    934		}
    935	}
    936
    937	/* Allocate attribute objects and table. */
    938	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
    939	if (!i915_attr)
    940		goto err_alloc;
    941
    942	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
    943	if (!pmu_attr)
    944		goto err_alloc;
    945
    946	/* Max one pointer of each attribute type plus a termination entry. */
    947	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
    948	if (!attr)
    949		goto err_alloc;
    950
    951	i915_iter = i915_attr;
    952	pmu_iter = pmu_attr;
    953	attr_iter = attr;
    954
    955	/* Initialize supported non-engine counters. */
    956	for (i = 0; i < ARRAY_SIZE(events); i++) {
    957		char *str;
    958
    959		if (config_status(i915, events[i].config))
    960			continue;
    961
    962		str = kstrdup(events[i].name, GFP_KERNEL);
    963		if (!str)
    964			goto err;
    965
    966		*attr_iter++ = &i915_iter->attr.attr;
    967		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
    968
    969		if (events[i].unit) {
    970			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
    971			if (!str)
    972				goto err;
    973
    974			*attr_iter++ = &pmu_iter->attr.attr;
    975			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
    976		}
    977	}
    978
    979	/* Initialize supported engine counters. */
    980	for_each_uabi_engine(engine, i915) {
    981		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
    982			char *str;
    983
    984			if (engine_event_status(engine,
    985						engine_events[i].sample))
    986				continue;
    987
    988			str = kasprintf(GFP_KERNEL, "%s-%s",
    989					engine->name, engine_events[i].name);
    990			if (!str)
    991				goto err;
    992
    993			*attr_iter++ = &i915_iter->attr.attr;
    994			i915_iter =
    995				add_i915_attr(i915_iter, str,
    996					      __I915_PMU_ENGINE(engine->uabi_class,
    997								engine->uabi_instance,
    998								engine_events[i].sample));
    999
   1000			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
   1001					engine->name, engine_events[i].name);
   1002			if (!str)
   1003				goto err;
   1004
   1005			*attr_iter++ = &pmu_iter->attr.attr;
   1006			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
   1007		}
   1008	}
   1009
   1010	pmu->i915_attr = i915_attr;
   1011	pmu->pmu_attr = pmu_attr;
   1012
   1013	return attr;
   1014
   1015err:;
   1016	for (attr_iter = attr; *attr_iter; attr_iter++)
   1017		kfree((*attr_iter)->name);
   1018
   1019err_alloc:
   1020	kfree(attr);
   1021	kfree(i915_attr);
   1022	kfree(pmu_attr);
   1023
   1024	return NULL;
   1025}
   1026
   1027static void free_event_attributes(struct i915_pmu *pmu)
   1028{
   1029	struct attribute **attr_iter = pmu->events_attr_group.attrs;
   1030
   1031	for (; *attr_iter; attr_iter++)
   1032		kfree((*attr_iter)->name);
   1033
   1034	kfree(pmu->events_attr_group.attrs);
   1035	kfree(pmu->i915_attr);
   1036	kfree(pmu->pmu_attr);
   1037
   1038	pmu->events_attr_group.attrs = NULL;
   1039	pmu->i915_attr = NULL;
   1040	pmu->pmu_attr = NULL;
   1041}
   1042
   1043static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
   1044{
   1045	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
   1046
   1047	GEM_BUG_ON(!pmu->base.event_init);
   1048
   1049	/* Select the first online CPU as a designated reader. */
   1050	if (cpumask_empty(&i915_pmu_cpumask))
   1051		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
   1052
   1053	return 0;
   1054}
   1055
   1056static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
   1057{
   1058	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
   1059	unsigned int target = i915_pmu_target_cpu;
   1060
   1061	GEM_BUG_ON(!pmu->base.event_init);
   1062
   1063	/*
   1064	 * Unregistering an instance generates a CPU offline event which we must
   1065	 * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
   1066	 */
   1067	if (pmu->closed)
   1068		return 0;
   1069
   1070	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
   1071		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
   1072
   1073		/* Migrate events if there is a valid target */
   1074		if (target < nr_cpu_ids) {
   1075			cpumask_set_cpu(target, &i915_pmu_cpumask);
   1076			i915_pmu_target_cpu = target;
   1077		}
   1078	}
   1079
   1080	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
   1081		perf_pmu_migrate_context(&pmu->base, cpu, target);
   1082		pmu->cpuhp.cpu = target;
   1083	}
   1084
   1085	return 0;
   1086}
   1087
   1088static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
   1089
   1090int i915_pmu_init(void)
   1091{
   1092	int ret;
   1093
   1094	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
   1095				      "perf/x86/intel/i915:online",
   1096				      i915_pmu_cpu_online,
   1097				      i915_pmu_cpu_offline);
   1098	if (ret < 0)
   1099		pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
   1100			  ret);
   1101	else
   1102		cpuhp_slot = ret;
   1103
   1104	return 0;
   1105}
   1106
   1107void i915_pmu_exit(void)
   1108{
   1109	if (cpuhp_slot != CPUHP_INVALID)
   1110		cpuhp_remove_multi_state(cpuhp_slot);
   1111}
   1112
   1113static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
   1114{
   1115	if (cpuhp_slot == CPUHP_INVALID)
   1116		return -EINVAL;
   1117
   1118	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
   1119}
   1120
   1121static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
   1122{
   1123	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
   1124}
   1125
   1126static bool is_igp(struct drm_i915_private *i915)
   1127{
   1128	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
   1129
   1130	/* IGP is 0000:00:02.0 */
   1131	return pci_domain_nr(pdev->bus) == 0 &&
   1132	       pdev->bus->number == 0 &&
   1133	       PCI_SLOT(pdev->devfn) == 2 &&
   1134	       PCI_FUNC(pdev->devfn) == 0;
   1135}
   1136
   1137void i915_pmu_register(struct drm_i915_private *i915)
   1138{
   1139	struct i915_pmu *pmu = &i915->pmu;
   1140	const struct attribute_group *attr_groups[] = {
   1141		&i915_pmu_format_attr_group,
   1142		&pmu->events_attr_group,
   1143		&i915_pmu_cpumask_attr_group,
   1144		NULL
   1145	};
   1146
   1147	int ret = -ENOMEM;
   1148
   1149	if (GRAPHICS_VER(i915) <= 2) {
   1150		drm_info(&i915->drm, "PMU not supported for this GPU.");
   1151		return;
   1152	}
   1153
   1154	spin_lock_init(&pmu->lock);
   1155	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
   1156	pmu->timer.function = i915_sample;
   1157	pmu->cpuhp.cpu = -1;
   1158	init_rc6(pmu);
   1159
   1160	if (!is_igp(i915)) {
   1161		pmu->name = kasprintf(GFP_KERNEL,
   1162				      "i915_%s",
   1163				      dev_name(i915->drm.dev));
   1164		if (pmu->name) {
   1165			/* tools/perf reserves colons as special. */
   1166			strreplace((char *)pmu->name, ':', '_');
   1167		}
   1168	} else {
   1169		pmu->name = "i915";
   1170	}
   1171	if (!pmu->name)
   1172		goto err;
   1173
   1174	pmu->events_attr_group.name = "events";
   1175	pmu->events_attr_group.attrs = create_event_attributes(pmu);
   1176	if (!pmu->events_attr_group.attrs)
   1177		goto err_name;
   1178
   1179	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
   1180					GFP_KERNEL);
   1181	if (!pmu->base.attr_groups)
   1182		goto err_attr;
   1183
   1184	pmu->base.module	= THIS_MODULE;
   1185	pmu->base.task_ctx_nr	= perf_invalid_context;
   1186	pmu->base.event_init	= i915_pmu_event_init;
   1187	pmu->base.add		= i915_pmu_event_add;
   1188	pmu->base.del		= i915_pmu_event_del;
   1189	pmu->base.start		= i915_pmu_event_start;
   1190	pmu->base.stop		= i915_pmu_event_stop;
   1191	pmu->base.read		= i915_pmu_event_read;
   1192	pmu->base.event_idx	= i915_pmu_event_event_idx;
   1193
   1194	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
   1195	if (ret)
   1196		goto err_groups;
   1197
   1198	ret = i915_pmu_register_cpuhp_state(pmu);
   1199	if (ret)
   1200		goto err_unreg;
   1201
   1202	return;
   1203
   1204err_unreg:
   1205	perf_pmu_unregister(&pmu->base);
   1206err_groups:
   1207	kfree(pmu->base.attr_groups);
   1208err_attr:
   1209	pmu->base.event_init = NULL;
   1210	free_event_attributes(pmu);
   1211err_name:
   1212	if (!is_igp(i915))
   1213		kfree(pmu->name);
   1214err:
   1215	drm_notice(&i915->drm, "Failed to register PMU!\n");
   1216}
   1217
   1218void i915_pmu_unregister(struct drm_i915_private *i915)
   1219{
   1220	struct i915_pmu *pmu = &i915->pmu;
   1221
   1222	if (!pmu->base.event_init)
   1223		return;
   1224
   1225	/*
   1226	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
   1227	 * ensures all currently executing ones will have exited before we
   1228	 * proceed with unregistration.
   1229	 */
   1230	pmu->closed = true;
   1231	synchronize_rcu();
   1232
   1233	hrtimer_cancel(&pmu->timer);
   1234
   1235	i915_pmu_unregister_cpuhp_state(pmu);
   1236
   1237	perf_pmu_unregister(&pmu->base);
   1238	pmu->base.event_init = NULL;
   1239	kfree(pmu->base.attr_groups);
   1240	if (!is_igp(i915))
   1241		kfree(pmu->name);
   1242	free_event_attributes(pmu);
   1243}