cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

selftest_rps.c (31403B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2020 Intel Corporation
      4 */
      5
      6#include <linux/pm_qos.h>
      7#include <linux/sort.h>
      8
      9#include "gem/i915_gem_internal.h"
     10
     11#include "intel_engine_heartbeat.h"
     12#include "intel_engine_pm.h"
     13#include "intel_engine_regs.h"
     14#include "intel_gpu_commands.h"
     15#include "intel_gt_clock_utils.h"
     16#include "intel_gt_pm.h"
     17#include "intel_rc6.h"
     18#include "selftest_engine_heartbeat.h"
     19#include "selftest_rps.h"
     20#include "selftests/igt_flush_test.h"
     21#include "selftests/igt_spinner.h"
     22#include "selftests/librapl.h"
     23
     24/* Try to isolate the impact of cstates from determing frequency response */
     25#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
     26
     27static void dummy_rps_work(struct work_struct *wrk)
     28{
     29}
     30
     31static int cmp_u64(const void *A, const void *B)
     32{
     33	const u64 *a = A, *b = B;
     34
     35	if (*a < *b)
     36		return -1;
     37	else if (*a > *b)
     38		return 1;
     39	else
     40		return 0;
     41}
     42
     43static int cmp_u32(const void *A, const void *B)
     44{
     45	const u32 *a = A, *b = B;
     46
     47	if (*a < *b)
     48		return -1;
     49	else if (*a > *b)
     50		return 1;
     51	else
     52		return 0;
     53}
     54
     55static struct i915_vma *
     56create_spin_counter(struct intel_engine_cs *engine,
     57		    struct i915_address_space *vm,
     58		    bool srm,
     59		    u32 **cancel,
     60		    u32 **counter)
     61{
     62	enum {
     63		COUNT,
     64		INC,
     65		__NGPR__,
     66	};
     67#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
     68	struct drm_i915_gem_object *obj;
     69	struct i915_vma *vma;
     70	unsigned long end;
     71	u32 *base, *cs;
     72	int loop, i;
     73	int err;
     74
     75	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
     76	if (IS_ERR(obj))
     77		return ERR_CAST(obj);
     78
     79	end = obj->base.size / sizeof(u32) - 1;
     80
     81	vma = i915_vma_instance(obj, vm, NULL);
     82	if (IS_ERR(vma)) {
     83		err = PTR_ERR(vma);
     84		goto err_put;
     85	}
     86
     87	err = i915_vma_pin(vma, 0, 0, PIN_USER);
     88	if (err)
     89		goto err_unlock;
     90
     91	i915_vma_lock(vma);
     92
     93	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
     94	if (IS_ERR(base)) {
     95		err = PTR_ERR(base);
     96		goto err_unpin;
     97	}
     98	cs = base;
     99
    100	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
    101	for (i = 0; i < __NGPR__; i++) {
    102		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
    103		*cs++ = 0;
    104		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
    105		*cs++ = 0;
    106	}
    107
    108	*cs++ = MI_LOAD_REGISTER_IMM(1);
    109	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
    110	*cs++ = 1;
    111
    112	loop = cs - base;
    113
    114	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
    115	for (i = 0; i < 1024; i++) {
    116		*cs++ = MI_MATH(4);
    117		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
    118		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
    119		*cs++ = MI_MATH_ADD;
    120		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
    121
    122		if (srm) {
    123			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
    124			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
    125			*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
    126			*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
    127		}
    128	}
    129
    130	*cs++ = MI_BATCH_BUFFER_START_GEN8;
    131	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
    132	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
    133	GEM_BUG_ON(cs - base > end);
    134
    135	i915_gem_object_flush_map(obj);
    136
    137	*cancel = base + loop;
    138	*counter = srm ? memset32(base + end, 0, 1) : NULL;
    139	return vma;
    140
    141err_unpin:
    142	i915_vma_unpin(vma);
    143err_unlock:
    144	i915_vma_unlock(vma);
    145err_put:
    146	i915_gem_object_put(obj);
    147	return ERR_PTR(err);
    148}
    149
    150static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
    151{
    152	u8 history[64], i;
    153	unsigned long end;
    154	int sleep;
    155
    156	i = 0;
    157	memset(history, freq, sizeof(history));
    158	sleep = 20;
    159
    160	/* The PCU does not change instantly, but drifts towards the goal? */
    161	end = jiffies + msecs_to_jiffies(timeout_ms);
    162	do {
    163		u8 act;
    164
    165		act = read_cagf(rps);
    166		if (time_after(jiffies, end))
    167			return act;
    168
    169		/* Target acquired */
    170		if (act == freq)
    171			return act;
    172
    173		/* Any change within the last N samples? */
    174		if (!memchr_inv(history, act, sizeof(history)))
    175			return act;
    176
    177		history[i] = act;
    178		i = (i + 1) % ARRAY_SIZE(history);
    179
    180		usleep_range(sleep, 2 * sleep);
    181		sleep *= 2;
    182		if (sleep > timeout_ms * 20)
    183			sleep = timeout_ms * 20;
    184	} while (1);
    185}
    186
    187static u8 rps_set_check(struct intel_rps *rps, u8 freq)
    188{
    189	mutex_lock(&rps->lock);
    190	GEM_BUG_ON(!intel_rps_is_active(rps));
    191	if (wait_for(!intel_rps_set(rps, freq), 50)) {
    192		mutex_unlock(&rps->lock);
    193		return 0;
    194	}
    195	GEM_BUG_ON(rps->last_freq != freq);
    196	mutex_unlock(&rps->lock);
    197
    198	return wait_for_freq(rps, freq, 50);
    199}
    200
    201static void show_pstate_limits(struct intel_rps *rps)
    202{
    203	struct drm_i915_private *i915 = rps_to_i915(rps);
    204
    205	if (IS_BROXTON(i915)) {
    206		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
    207			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
    208			intel_uncore_read(rps_to_uncore(rps),
    209					  BXT_RP_STATE_CAP));
    210	} else if (GRAPHICS_VER(i915) == 9) {
    211		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
    212			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
    213			intel_uncore_read(rps_to_uncore(rps),
    214					  GEN9_RP_STATE_LIMITS));
    215	}
    216}
    217
    218int live_rps_clock_interval(void *arg)
    219{
    220	struct intel_gt *gt = arg;
    221	struct intel_rps *rps = &gt->rps;
    222	void (*saved_work)(struct work_struct *wrk);
    223	struct intel_engine_cs *engine;
    224	enum intel_engine_id id;
    225	struct igt_spinner spin;
    226	int err = 0;
    227
    228	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
    229		return 0;
    230
    231	if (igt_spinner_init(&spin, gt))
    232		return -ENOMEM;
    233
    234	intel_gt_pm_wait_for_idle(gt);
    235	saved_work = rps->work.func;
    236	rps->work.func = dummy_rps_work;
    237
    238	intel_gt_pm_get(gt);
    239	intel_rps_disable(&gt->rps);
    240
    241	intel_gt_check_clock_frequency(gt);
    242
    243	for_each_engine(engine, gt, id) {
    244		struct i915_request *rq;
    245		u32 cycles;
    246		u64 dt;
    247
    248		if (!intel_engine_can_store_dword(engine))
    249			continue;
    250
    251		st_engine_heartbeat_disable(engine);
    252
    253		rq = igt_spinner_create_request(&spin,
    254						engine->kernel_context,
    255						MI_NOOP);
    256		if (IS_ERR(rq)) {
    257			st_engine_heartbeat_enable(engine);
    258			err = PTR_ERR(rq);
    259			break;
    260		}
    261
    262		i915_request_add(rq);
    263
    264		if (!igt_wait_for_spinner(&spin, rq)) {
    265			pr_err("%s: RPS spinner did not start\n",
    266			       engine->name);
    267			igt_spinner_end(&spin);
    268			st_engine_heartbeat_enable(engine);
    269			intel_gt_set_wedged(engine->gt);
    270			err = -EIO;
    271			break;
    272		}
    273
    274		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
    275
    276		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
    277
    278		/* Set the evaluation interval to infinity! */
    279		intel_uncore_write_fw(gt->uncore,
    280				      GEN6_RP_UP_EI, 0xffffffff);
    281		intel_uncore_write_fw(gt->uncore,
    282				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
    283
    284		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
    285				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
    286
    287		if (wait_for(intel_uncore_read_fw(gt->uncore,
    288						  GEN6_RP_CUR_UP_EI),
    289			     10)) {
    290			/* Just skip the test; assume lack of HW support */
    291			pr_notice("%s: rps evaluation interval not ticking\n",
    292				  engine->name);
    293			err = -ENODEV;
    294		} else {
    295			ktime_t dt_[5];
    296			u32 cycles_[5];
    297			int i;
    298
    299			for (i = 0; i < 5; i++) {
    300				preempt_disable();
    301
    302				dt_[i] = ktime_get();
    303				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
    304
    305				udelay(1000);
    306
    307				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
    308				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
    309
    310				preempt_enable();
    311			}
    312
    313			/* Use the median of both cycle/dt; close enough */
    314			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
    315			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
    316			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
    317			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
    318		}
    319
    320		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
    321		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
    322
    323		igt_spinner_end(&spin);
    324		st_engine_heartbeat_enable(engine);
    325
    326		if (err == 0) {
    327			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
    328			u32 expected =
    329				intel_gt_ns_to_pm_interval(gt, dt);
    330
    331			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
    332				engine->name, cycles, time, dt, expected,
    333				gt->clock_frequency / 1000);
    334
    335			if (10 * time < 8 * dt ||
    336			    8 * time > 10 * dt) {
    337				pr_err("%s: rps clock time does not match walltime!\n",
    338				       engine->name);
    339				err = -EINVAL;
    340			}
    341
    342			if (10 * expected < 8 * cycles ||
    343			    8 * expected > 10 * cycles) {
    344				pr_err("%s: walltime does not match rps clock ticks!\n",
    345				       engine->name);
    346				err = -EINVAL;
    347			}
    348		}
    349
    350		if (igt_flush_test(gt->i915))
    351			err = -EIO;
    352
    353		break; /* once is enough */
    354	}
    355
    356	intel_rps_enable(&gt->rps);
    357	intel_gt_pm_put(gt);
    358
    359	igt_spinner_fini(&spin);
    360
    361	intel_gt_pm_wait_for_idle(gt);
    362	rps->work.func = saved_work;
    363
    364	if (err == -ENODEV) /* skipped, don't report a fail */
    365		err = 0;
    366
    367	return err;
    368}
    369
    370int live_rps_control(void *arg)
    371{
    372	struct intel_gt *gt = arg;
    373	struct intel_rps *rps = &gt->rps;
    374	void (*saved_work)(struct work_struct *wrk);
    375	struct intel_engine_cs *engine;
    376	enum intel_engine_id id;
    377	struct igt_spinner spin;
    378	int err = 0;
    379
    380	/*
    381	 * Check that the actual frequency matches our requested frequency,
    382	 * to verify our control mechanism. We have to be careful that the
    383	 * PCU may throttle the GPU in which case the actual frequency used
    384	 * will be lowered than requested.
    385	 */
    386
    387	if (!intel_rps_is_enabled(rps))
    388		return 0;
    389
    390	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
    391		return 0;
    392
    393	if (igt_spinner_init(&spin, gt))
    394		return -ENOMEM;
    395
    396	intel_gt_pm_wait_for_idle(gt);
    397	saved_work = rps->work.func;
    398	rps->work.func = dummy_rps_work;
    399
    400	intel_gt_pm_get(gt);
    401	for_each_engine(engine, gt, id) {
    402		struct i915_request *rq;
    403		ktime_t min_dt, max_dt;
    404		int f, limit;
    405		int min, max;
    406
    407		if (!intel_engine_can_store_dword(engine))
    408			continue;
    409
    410		st_engine_heartbeat_disable(engine);
    411
    412		rq = igt_spinner_create_request(&spin,
    413						engine->kernel_context,
    414						MI_NOOP);
    415		if (IS_ERR(rq)) {
    416			err = PTR_ERR(rq);
    417			break;
    418		}
    419
    420		i915_request_add(rq);
    421
    422		if (!igt_wait_for_spinner(&spin, rq)) {
    423			pr_err("%s: RPS spinner did not start\n",
    424			       engine->name);
    425			igt_spinner_end(&spin);
    426			st_engine_heartbeat_enable(engine);
    427			intel_gt_set_wedged(engine->gt);
    428			err = -EIO;
    429			break;
    430		}
    431
    432		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
    433			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
    434			       engine->name, rps->min_freq, read_cagf(rps));
    435			igt_spinner_end(&spin);
    436			st_engine_heartbeat_enable(engine);
    437			show_pstate_limits(rps);
    438			err = -EINVAL;
    439			break;
    440		}
    441
    442		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
    443			if (rps_set_check(rps, f) < f)
    444				break;
    445		}
    446
    447		limit = rps_set_check(rps, f);
    448
    449		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
    450			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
    451			       engine->name, rps->min_freq, read_cagf(rps));
    452			igt_spinner_end(&spin);
    453			st_engine_heartbeat_enable(engine);
    454			show_pstate_limits(rps);
    455			err = -EINVAL;
    456			break;
    457		}
    458
    459		max_dt = ktime_get();
    460		max = rps_set_check(rps, limit);
    461		max_dt = ktime_sub(ktime_get(), max_dt);
    462
    463		min_dt = ktime_get();
    464		min = rps_set_check(rps, rps->min_freq);
    465		min_dt = ktime_sub(ktime_get(), min_dt);
    466
    467		igt_spinner_end(&spin);
    468		st_engine_heartbeat_enable(engine);
    469
    470		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
    471			engine->name,
    472			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
    473			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
    474			limit, intel_gpu_freq(rps, limit),
    475			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
    476
    477		if (limit == rps->min_freq) {
    478			pr_err("%s: GPU throttled to minimum!\n",
    479			       engine->name);
    480			show_pstate_limits(rps);
    481			err = -ENODEV;
    482			break;
    483		}
    484
    485		if (igt_flush_test(gt->i915)) {
    486			err = -EIO;
    487			break;
    488		}
    489	}
    490	intel_gt_pm_put(gt);
    491
    492	igt_spinner_fini(&spin);
    493
    494	intel_gt_pm_wait_for_idle(gt);
    495	rps->work.func = saved_work;
    496
    497	return err;
    498}
    499
    500static void show_pcu_config(struct intel_rps *rps)
    501{
    502	struct drm_i915_private *i915 = rps_to_i915(rps);
    503	unsigned int max_gpu_freq, min_gpu_freq;
    504	intel_wakeref_t wakeref;
    505	int gpu_freq;
    506
    507	if (!HAS_LLC(i915))
    508		return;
    509
    510	min_gpu_freq = rps->min_freq;
    511	max_gpu_freq = rps->max_freq;
    512	if (GRAPHICS_VER(i915) >= 9) {
    513		/* Convert GT frequency to 50 HZ units */
    514		min_gpu_freq /= GEN9_FREQ_SCALER;
    515		max_gpu_freq /= GEN9_FREQ_SCALER;
    516	}
    517
    518	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
    519
    520	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
    521	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
    522		int ia_freq = gpu_freq;
    523
    524		snb_pcode_read(i915, GEN6_PCODE_READ_MIN_FREQ_TABLE,
    525			       &ia_freq, NULL);
    526
    527		pr_info("%5d  %5d  %5d\n",
    528			gpu_freq * 50,
    529			((ia_freq >> 0) & 0xff) * 100,
    530			((ia_freq >> 8) & 0xff) * 100);
    531	}
    532
    533	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
    534}
    535
    536static u64 __measure_frequency(u32 *cntr, int duration_ms)
    537{
    538	u64 dc, dt;
    539
    540	dt = ktime_get();
    541	dc = READ_ONCE(*cntr);
    542	usleep_range(1000 * duration_ms, 2000 * duration_ms);
    543	dc = READ_ONCE(*cntr) - dc;
    544	dt = ktime_get() - dt;
    545
    546	return div64_u64(1000 * 1000 * dc, dt);
    547}
    548
    549static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
    550{
    551	u64 x[5];
    552	int i;
    553
    554	*freq = rps_set_check(rps, *freq);
    555	for (i = 0; i < 5; i++)
    556		x[i] = __measure_frequency(cntr, 2);
    557	*freq = (*freq + read_cagf(rps)) / 2;
    558
    559	/* A simple triangle filter for better result stability */
    560	sort(x, 5, sizeof(*x), cmp_u64, NULL);
    561	return div_u64(x[1] + 2 * x[2] + x[3], 4);
    562}
    563
    564static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
    565				  int duration_ms)
    566{
    567	u64 dc, dt;
    568
    569	dt = ktime_get();
    570	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
    571	usleep_range(1000 * duration_ms, 2000 * duration_ms);
    572	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
    573	dt = ktime_get() - dt;
    574
    575	return div64_u64(1000 * 1000 * dc, dt);
    576}
    577
    578static u64 measure_cs_frequency_at(struct intel_rps *rps,
    579				   struct intel_engine_cs *engine,
    580				   int *freq)
    581{
    582	u64 x[5];
    583	int i;
    584
    585	*freq = rps_set_check(rps, *freq);
    586	for (i = 0; i < 5; i++)
    587		x[i] = __measure_cs_frequency(engine, 2);
    588	*freq = (*freq + read_cagf(rps)) / 2;
    589
    590	/* A simple triangle filter for better result stability */
    591	sort(x, 5, sizeof(*x), cmp_u64, NULL);
    592	return div_u64(x[1] + 2 * x[2] + x[3], 4);
    593}
    594
    595static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
    596{
    597	return f_d * x > f_n * y && f_n * x < f_d * y;
    598}
    599
    600int live_rps_frequency_cs(void *arg)
    601{
    602	void (*saved_work)(struct work_struct *wrk);
    603	struct intel_gt *gt = arg;
    604	struct intel_rps *rps = &gt->rps;
    605	struct intel_engine_cs *engine;
    606	struct pm_qos_request qos;
    607	enum intel_engine_id id;
    608	int err = 0;
    609
    610	/*
    611	 * The premise is that the GPU does change frequency at our behest.
    612	 * Let's check there is a correspondence between the requested
    613	 * frequency, the actual frequency, and the observed clock rate.
    614	 */
    615
    616	if (!intel_rps_is_enabled(rps))
    617		return 0;
    618
    619	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
    620		return 0;
    621
    622	if (CPU_LATENCY >= 0)
    623		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
    624
    625	intel_gt_pm_wait_for_idle(gt);
    626	saved_work = rps->work.func;
    627	rps->work.func = dummy_rps_work;
    628
    629	for_each_engine(engine, gt, id) {
    630		struct i915_request *rq;
    631		struct i915_vma *vma;
    632		u32 *cancel, *cntr;
    633		struct {
    634			u64 count;
    635			int freq;
    636		} min, max;
    637
    638		st_engine_heartbeat_disable(engine);
    639
    640		vma = create_spin_counter(engine,
    641					  engine->kernel_context->vm, false,
    642					  &cancel, &cntr);
    643		if (IS_ERR(vma)) {
    644			err = PTR_ERR(vma);
    645			st_engine_heartbeat_enable(engine);
    646			break;
    647		}
    648
    649		rq = intel_engine_create_kernel_request(engine);
    650		if (IS_ERR(rq)) {
    651			err = PTR_ERR(rq);
    652			goto err_vma;
    653		}
    654
    655		err = i915_request_await_object(rq, vma->obj, false);
    656		if (!err)
    657			err = i915_vma_move_to_active(vma, rq, 0);
    658		if (!err)
    659			err = rq->engine->emit_bb_start(rq,
    660							vma->node.start,
    661							PAGE_SIZE, 0);
    662		i915_request_add(rq);
    663		if (err)
    664			goto err_vma;
    665
    666		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
    667			     10)) {
    668			pr_err("%s: timed loop did not start\n",
    669			       engine->name);
    670			goto err_vma;
    671		}
    672
    673		min.freq = rps->min_freq;
    674		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
    675
    676		max.freq = rps->max_freq;
    677		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
    678
    679		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
    680			engine->name,
    681			min.count, intel_gpu_freq(rps, min.freq),
    682			max.count, intel_gpu_freq(rps, max.freq),
    683			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
    684						     max.freq * min.count));
    685
    686		if (!scaled_within(max.freq * min.count,
    687				   min.freq * max.count,
    688				   2, 3)) {
    689			int f;
    690
    691			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
    692			       engine->name,
    693			       max.freq * min.count,
    694			       min.freq * max.count);
    695			show_pcu_config(rps);
    696
    697			for (f = min.freq + 1; f <= rps->max_freq; f++) {
    698				int act = f;
    699				u64 count;
    700
    701				count = measure_cs_frequency_at(rps, engine, &act);
    702				if (act < f)
    703					break;
    704
    705				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
    706					engine->name,
    707					act, intel_gpu_freq(rps, act), count,
    708					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
    709								     act * min.count));
    710
    711				f = act; /* may skip ahead [pcu granularity] */
    712			}
    713
    714			err = -EINTR; /* ignore error, continue on with test */
    715		}
    716
    717err_vma:
    718		*cancel = MI_BATCH_BUFFER_END;
    719		i915_gem_object_flush_map(vma->obj);
    720		i915_gem_object_unpin_map(vma->obj);
    721		i915_vma_unpin(vma);
    722		i915_vma_unlock(vma);
    723		i915_vma_put(vma);
    724
    725		st_engine_heartbeat_enable(engine);
    726		if (igt_flush_test(gt->i915))
    727			err = -EIO;
    728		if (err)
    729			break;
    730	}
    731
    732	intel_gt_pm_wait_for_idle(gt);
    733	rps->work.func = saved_work;
    734
    735	if (CPU_LATENCY >= 0)
    736		cpu_latency_qos_remove_request(&qos);
    737
    738	return err;
    739}
    740
    741int live_rps_frequency_srm(void *arg)
    742{
    743	void (*saved_work)(struct work_struct *wrk);
    744	struct intel_gt *gt = arg;
    745	struct intel_rps *rps = &gt->rps;
    746	struct intel_engine_cs *engine;
    747	struct pm_qos_request qos;
    748	enum intel_engine_id id;
    749	int err = 0;
    750
    751	/*
    752	 * The premise is that the GPU does change frequency at our behest.
    753	 * Let's check there is a correspondence between the requested
    754	 * frequency, the actual frequency, and the observed clock rate.
    755	 */
    756
    757	if (!intel_rps_is_enabled(rps))
    758		return 0;
    759
    760	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
    761		return 0;
    762
    763	if (CPU_LATENCY >= 0)
    764		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
    765
    766	intel_gt_pm_wait_for_idle(gt);
    767	saved_work = rps->work.func;
    768	rps->work.func = dummy_rps_work;
    769
    770	for_each_engine(engine, gt, id) {
    771		struct i915_request *rq;
    772		struct i915_vma *vma;
    773		u32 *cancel, *cntr;
    774		struct {
    775			u64 count;
    776			int freq;
    777		} min, max;
    778
    779		st_engine_heartbeat_disable(engine);
    780
    781		vma = create_spin_counter(engine,
    782					  engine->kernel_context->vm, true,
    783					  &cancel, &cntr);
    784		if (IS_ERR(vma)) {
    785			err = PTR_ERR(vma);
    786			st_engine_heartbeat_enable(engine);
    787			break;
    788		}
    789
    790		rq = intel_engine_create_kernel_request(engine);
    791		if (IS_ERR(rq)) {
    792			err = PTR_ERR(rq);
    793			goto err_vma;
    794		}
    795
    796		err = i915_request_await_object(rq, vma->obj, false);
    797		if (!err)
    798			err = i915_vma_move_to_active(vma, rq, 0);
    799		if (!err)
    800			err = rq->engine->emit_bb_start(rq,
    801							vma->node.start,
    802							PAGE_SIZE, 0);
    803		i915_request_add(rq);
    804		if (err)
    805			goto err_vma;
    806
    807		if (wait_for(READ_ONCE(*cntr), 10)) {
    808			pr_err("%s: timed loop did not start\n",
    809			       engine->name);
    810			goto err_vma;
    811		}
    812
    813		min.freq = rps->min_freq;
    814		min.count = measure_frequency_at(rps, cntr, &min.freq);
    815
    816		max.freq = rps->max_freq;
    817		max.count = measure_frequency_at(rps, cntr, &max.freq);
    818
    819		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
    820			engine->name,
    821			min.count, intel_gpu_freq(rps, min.freq),
    822			max.count, intel_gpu_freq(rps, max.freq),
    823			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
    824						     max.freq * min.count));
    825
    826		if (!scaled_within(max.freq * min.count,
    827				   min.freq * max.count,
    828				   1, 2)) {
    829			int f;
    830
    831			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
    832			       engine->name,
    833			       max.freq * min.count,
    834			       min.freq * max.count);
    835			show_pcu_config(rps);
    836
    837			for (f = min.freq + 1; f <= rps->max_freq; f++) {
    838				int act = f;
    839				u64 count;
    840
    841				count = measure_frequency_at(rps, cntr, &act);
    842				if (act < f)
    843					break;
    844
    845				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
    846					engine->name,
    847					act, intel_gpu_freq(rps, act), count,
    848					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
    849								     act * min.count));
    850
    851				f = act; /* may skip ahead [pcu granularity] */
    852			}
    853
    854			err = -EINTR; /* ignore error, continue on with test */
    855		}
    856
    857err_vma:
    858		*cancel = MI_BATCH_BUFFER_END;
    859		i915_gem_object_flush_map(vma->obj);
    860		i915_gem_object_unpin_map(vma->obj);
    861		i915_vma_unpin(vma);
    862		i915_vma_unlock(vma);
    863		i915_vma_put(vma);
    864
    865		st_engine_heartbeat_enable(engine);
    866		if (igt_flush_test(gt->i915))
    867			err = -EIO;
    868		if (err)
    869			break;
    870	}
    871
    872	intel_gt_pm_wait_for_idle(gt);
    873	rps->work.func = saved_work;
    874
    875	if (CPU_LATENCY >= 0)
    876		cpu_latency_qos_remove_request(&qos);
    877
    878	return err;
    879}
    880
    881static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
    882{
    883	/* Flush any previous EI */
    884	usleep_range(timeout_us, 2 * timeout_us);
    885
    886	/* Reset the interrupt status */
    887	rps_disable_interrupts(rps);
    888	GEM_BUG_ON(rps->pm_iir);
    889	rps_enable_interrupts(rps);
    890
    891	/* And then wait for the timeout, for real this time */
    892	usleep_range(2 * timeout_us, 3 * timeout_us);
    893}
    894
    895static int __rps_up_interrupt(struct intel_rps *rps,
    896			      struct intel_engine_cs *engine,
    897			      struct igt_spinner *spin)
    898{
    899	struct intel_uncore *uncore = engine->uncore;
    900	struct i915_request *rq;
    901	u32 timeout;
    902
    903	if (!intel_engine_can_store_dword(engine))
    904		return 0;
    905
    906	rps_set_check(rps, rps->min_freq);
    907
    908	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
    909	if (IS_ERR(rq))
    910		return PTR_ERR(rq);
    911
    912	i915_request_get(rq);
    913	i915_request_add(rq);
    914
    915	if (!igt_wait_for_spinner(spin, rq)) {
    916		pr_err("%s: RPS spinner did not start\n",
    917		       engine->name);
    918		i915_request_put(rq);
    919		intel_gt_set_wedged(engine->gt);
    920		return -EIO;
    921	}
    922
    923	if (!intel_rps_is_active(rps)) {
    924		pr_err("%s: RPS not enabled on starting spinner\n",
    925		       engine->name);
    926		igt_spinner_end(spin);
    927		i915_request_put(rq);
    928		return -EINVAL;
    929	}
    930
    931	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
    932		pr_err("%s: RPS did not register UP interrupt\n",
    933		       engine->name);
    934		i915_request_put(rq);
    935		return -EINVAL;
    936	}
    937
    938	if (rps->last_freq != rps->min_freq) {
    939		pr_err("%s: RPS did not program min frequency\n",
    940		       engine->name);
    941		i915_request_put(rq);
    942		return -EINVAL;
    943	}
    944
    945	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
    946	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
    947	timeout = DIV_ROUND_UP(timeout, 1000);
    948
    949	sleep_for_ei(rps, timeout);
    950	GEM_BUG_ON(i915_request_completed(rq));
    951
    952	igt_spinner_end(spin);
    953	i915_request_put(rq);
    954
    955	if (rps->cur_freq != rps->min_freq) {
    956		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
    957		       engine->name, intel_rps_read_actual_frequency(rps));
    958		return -EINVAL;
    959	}
    960
    961	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
    962		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
    963		       engine->name, rps->pm_iir,
    964		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
    965		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
    966		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
    967		return -EINVAL;
    968	}
    969
    970	return 0;
    971}
    972
    973static int __rps_down_interrupt(struct intel_rps *rps,
    974				struct intel_engine_cs *engine)
    975{
    976	struct intel_uncore *uncore = engine->uncore;
    977	u32 timeout;
    978
    979	rps_set_check(rps, rps->max_freq);
    980
    981	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
    982		pr_err("%s: RPS did not register DOWN interrupt\n",
    983		       engine->name);
    984		return -EINVAL;
    985	}
    986
    987	if (rps->last_freq != rps->max_freq) {
    988		pr_err("%s: RPS did not program max frequency\n",
    989		       engine->name);
    990		return -EINVAL;
    991	}
    992
    993	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
    994	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
    995	timeout = DIV_ROUND_UP(timeout, 1000);
    996
    997	sleep_for_ei(rps, timeout);
    998
    999	if (rps->cur_freq != rps->max_freq) {
   1000		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
   1001		       engine->name,
   1002		       intel_rps_read_actual_frequency(rps));
   1003		return -EINVAL;
   1004	}
   1005
   1006	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
   1007		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
   1008		       engine->name, rps->pm_iir,
   1009		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
   1010		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
   1011		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
   1012		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
   1013		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
   1014		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
   1015		return -EINVAL;
   1016	}
   1017
   1018	return 0;
   1019}
   1020
   1021int live_rps_interrupt(void *arg)
   1022{
   1023	struct intel_gt *gt = arg;
   1024	struct intel_rps *rps = &gt->rps;
   1025	void (*saved_work)(struct work_struct *wrk);
   1026	struct intel_engine_cs *engine;
   1027	enum intel_engine_id id;
   1028	struct igt_spinner spin;
   1029	u32 pm_events;
   1030	int err = 0;
   1031
   1032	/*
   1033	 * First, let's check whether or not we are receiving interrupts.
   1034	 */
   1035
   1036	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
   1037		return 0;
   1038
   1039	intel_gt_pm_get(gt);
   1040	pm_events = rps->pm_events;
   1041	intel_gt_pm_put(gt);
   1042	if (!pm_events) {
   1043		pr_err("No RPS PM events registered, but RPS is enabled?\n");
   1044		return -ENODEV;
   1045	}
   1046
   1047	if (igt_spinner_init(&spin, gt))
   1048		return -ENOMEM;
   1049
   1050	intel_gt_pm_wait_for_idle(gt);
   1051	saved_work = rps->work.func;
   1052	rps->work.func = dummy_rps_work;
   1053
   1054	for_each_engine(engine, gt, id) {
   1055		/* Keep the engine busy with a spinner; expect an UP! */
   1056		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
   1057			intel_gt_pm_wait_for_idle(engine->gt);
   1058			GEM_BUG_ON(intel_rps_is_active(rps));
   1059
   1060			st_engine_heartbeat_disable(engine);
   1061
   1062			err = __rps_up_interrupt(rps, engine, &spin);
   1063
   1064			st_engine_heartbeat_enable(engine);
   1065			if (err)
   1066				goto out;
   1067
   1068			intel_gt_pm_wait_for_idle(engine->gt);
   1069		}
   1070
   1071		/* Keep the engine awake but idle and check for DOWN */
   1072		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
   1073			st_engine_heartbeat_disable(engine);
   1074			intel_rc6_disable(&gt->rc6);
   1075
   1076			err = __rps_down_interrupt(rps, engine);
   1077
   1078			intel_rc6_enable(&gt->rc6);
   1079			st_engine_heartbeat_enable(engine);
   1080			if (err)
   1081				goto out;
   1082		}
   1083	}
   1084
   1085out:
   1086	if (igt_flush_test(gt->i915))
   1087		err = -EIO;
   1088
   1089	igt_spinner_fini(&spin);
   1090
   1091	intel_gt_pm_wait_for_idle(gt);
   1092	rps->work.func = saved_work;
   1093
   1094	return err;
   1095}
   1096
   1097static u64 __measure_power(int duration_ms)
   1098{
   1099	u64 dE, dt;
   1100
   1101	dt = ktime_get();
   1102	dE = librapl_energy_uJ();
   1103	usleep_range(1000 * duration_ms, 2000 * duration_ms);
   1104	dE = librapl_energy_uJ() - dE;
   1105	dt = ktime_get() - dt;
   1106
   1107	return div64_u64(1000 * 1000 * dE, dt);
   1108}
   1109
   1110static u64 measure_power_at(struct intel_rps *rps, int *freq)
   1111{
   1112	u64 x[5];
   1113	int i;
   1114
   1115	*freq = rps_set_check(rps, *freq);
   1116	for (i = 0; i < 5; i++)
   1117		x[i] = __measure_power(5);
   1118	*freq = (*freq + read_cagf(rps)) / 2;
   1119
   1120	/* A simple triangle filter for better result stability */
   1121	sort(x, 5, sizeof(*x), cmp_u64, NULL);
   1122	return div_u64(x[1] + 2 * x[2] + x[3], 4);
   1123}
   1124
   1125int live_rps_power(void *arg)
   1126{
   1127	struct intel_gt *gt = arg;
   1128	struct intel_rps *rps = &gt->rps;
   1129	void (*saved_work)(struct work_struct *wrk);
   1130	struct intel_engine_cs *engine;
   1131	enum intel_engine_id id;
   1132	struct igt_spinner spin;
   1133	int err = 0;
   1134
   1135	/*
   1136	 * Our fundamental assumption is that running at lower frequency
   1137	 * actually saves power. Let's see if our RAPL measurement support
   1138	 * that theory.
   1139	 */
   1140
   1141	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
   1142		return 0;
   1143
   1144	if (!librapl_supported(gt->i915))
   1145		return 0;
   1146
   1147	if (igt_spinner_init(&spin, gt))
   1148		return -ENOMEM;
   1149
   1150	intel_gt_pm_wait_for_idle(gt);
   1151	saved_work = rps->work.func;
   1152	rps->work.func = dummy_rps_work;
   1153
   1154	for_each_engine(engine, gt, id) {
   1155		struct i915_request *rq;
   1156		struct {
   1157			u64 power;
   1158			int freq;
   1159		} min, max;
   1160
   1161		if (!intel_engine_can_store_dword(engine))
   1162			continue;
   1163
   1164		st_engine_heartbeat_disable(engine);
   1165
   1166		rq = igt_spinner_create_request(&spin,
   1167						engine->kernel_context,
   1168						MI_NOOP);
   1169		if (IS_ERR(rq)) {
   1170			st_engine_heartbeat_enable(engine);
   1171			err = PTR_ERR(rq);
   1172			break;
   1173		}
   1174
   1175		i915_request_add(rq);
   1176
   1177		if (!igt_wait_for_spinner(&spin, rq)) {
   1178			pr_err("%s: RPS spinner did not start\n",
   1179			       engine->name);
   1180			igt_spinner_end(&spin);
   1181			st_engine_heartbeat_enable(engine);
   1182			intel_gt_set_wedged(engine->gt);
   1183			err = -EIO;
   1184			break;
   1185		}
   1186
   1187		max.freq = rps->max_freq;
   1188		max.power = measure_power_at(rps, &max.freq);
   1189
   1190		min.freq = rps->min_freq;
   1191		min.power = measure_power_at(rps, &min.freq);
   1192
   1193		igt_spinner_end(&spin);
   1194		st_engine_heartbeat_enable(engine);
   1195
   1196		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
   1197			engine->name,
   1198			min.power, intel_gpu_freq(rps, min.freq),
   1199			max.power, intel_gpu_freq(rps, max.freq));
   1200
   1201		if (10 * min.freq >= 9 * max.freq) {
   1202			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
   1203				  min.freq, intel_gpu_freq(rps, min.freq),
   1204				  max.freq, intel_gpu_freq(rps, max.freq));
   1205			continue;
   1206		}
   1207
   1208		if (11 * min.power > 10 * max.power) {
   1209			pr_err("%s: did not conserve power when setting lower frequency!\n",
   1210			       engine->name);
   1211			err = -EINVAL;
   1212			break;
   1213		}
   1214
   1215		if (igt_flush_test(gt->i915)) {
   1216			err = -EIO;
   1217			break;
   1218		}
   1219	}
   1220
   1221	igt_spinner_fini(&spin);
   1222
   1223	intel_gt_pm_wait_for_idle(gt);
   1224	rps->work.func = saved_work;
   1225
   1226	return err;
   1227}
   1228
   1229int live_rps_dynamic(void *arg)
   1230{
   1231	struct intel_gt *gt = arg;
   1232	struct intel_rps *rps = &gt->rps;
   1233	struct intel_engine_cs *engine;
   1234	enum intel_engine_id id;
   1235	struct igt_spinner spin;
   1236	int err = 0;
   1237
   1238	/*
   1239	 * We've looked at the bascs, and have established that we
   1240	 * can change the clock frequency and that the HW will generate
   1241	 * interrupts based on load. Now we check how we integrate those
   1242	 * moving parts into dynamic reclocking based on load.
   1243	 */
   1244
   1245	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
   1246		return 0;
   1247
   1248	if (igt_spinner_init(&spin, gt))
   1249		return -ENOMEM;
   1250
   1251	if (intel_rps_has_interrupts(rps))
   1252		pr_info("RPS has interrupt support\n");
   1253	if (intel_rps_uses_timer(rps))
   1254		pr_info("RPS has timer support\n");
   1255
   1256	for_each_engine(engine, gt, id) {
   1257		struct i915_request *rq;
   1258		struct {
   1259			ktime_t dt;
   1260			u8 freq;
   1261		} min, max;
   1262
   1263		if (!intel_engine_can_store_dword(engine))
   1264			continue;
   1265
   1266		intel_gt_pm_wait_for_idle(gt);
   1267		GEM_BUG_ON(intel_rps_is_active(rps));
   1268		rps->cur_freq = rps->min_freq;
   1269
   1270		intel_engine_pm_get(engine);
   1271		intel_rc6_disable(&gt->rc6);
   1272		GEM_BUG_ON(rps->last_freq != rps->min_freq);
   1273
   1274		rq = igt_spinner_create_request(&spin,
   1275						engine->kernel_context,
   1276						MI_NOOP);
   1277		if (IS_ERR(rq)) {
   1278			err = PTR_ERR(rq);
   1279			goto err;
   1280		}
   1281
   1282		i915_request_add(rq);
   1283
   1284		max.dt = ktime_get();
   1285		max.freq = wait_for_freq(rps, rps->max_freq, 500);
   1286		max.dt = ktime_sub(ktime_get(), max.dt);
   1287
   1288		igt_spinner_end(&spin);
   1289
   1290		min.dt = ktime_get();
   1291		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
   1292		min.dt = ktime_sub(ktime_get(), min.dt);
   1293
   1294		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
   1295			engine->name,
   1296			max.freq, intel_gpu_freq(rps, max.freq),
   1297			ktime_to_ns(max.dt),
   1298			min.freq, intel_gpu_freq(rps, min.freq),
   1299			ktime_to_ns(min.dt));
   1300		if (min.freq >= max.freq) {
   1301			pr_err("%s: dynamic reclocking of spinner failed\n!",
   1302			       engine->name);
   1303			err = -EINVAL;
   1304		}
   1305
   1306err:
   1307		intel_rc6_enable(&gt->rc6);
   1308		intel_engine_pm_put(engine);
   1309
   1310		if (igt_flush_test(gt->i915))
   1311			err = -EIO;
   1312		if (err)
   1313			break;
   1314	}
   1315
   1316	igt_spinner_fini(&spin);
   1317
   1318	return err;
   1319}