cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

i915_perf.c (9120B)


      1/*
      2 * SPDX-License-Identifier: MIT
      3 *
      4 * Copyright © 2019 Intel Corporation
      5 */
      6
      7#include <linux/kref.h>
      8
      9#include "gem/i915_gem_pm.h"
     10#include "gt/intel_gt.h"
     11
     12#include "i915_selftest.h"
     13
     14#include "igt_flush_test.h"
     15#include "lib_sw_fence.h"
     16
     17#define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
     18
     19static int
     20alloc_empty_config(struct i915_perf *perf)
     21{
     22	struct i915_oa_config *oa_config;
     23
     24	oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
     25	if (!oa_config)
     26		return -ENOMEM;
     27
     28	oa_config->perf = perf;
     29	kref_init(&oa_config->ref);
     30
     31	strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
     32
     33	mutex_lock(&perf->metrics_lock);
     34
     35	oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
     36	if (oa_config->id < 0)  {
     37		mutex_unlock(&perf->metrics_lock);
     38		i915_oa_config_put(oa_config);
     39		return -ENOMEM;
     40	}
     41
     42	mutex_unlock(&perf->metrics_lock);
     43
     44	return 0;
     45}
     46
     47static void
     48destroy_empty_config(struct i915_perf *perf)
     49{
     50	struct i915_oa_config *oa_config = NULL, *tmp;
     51	int id;
     52
     53	mutex_lock(&perf->metrics_lock);
     54
     55	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
     56		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
     57			oa_config = tmp;
     58			break;
     59		}
     60	}
     61
     62	if (oa_config)
     63		idr_remove(&perf->metrics_idr, oa_config->id);
     64
     65	mutex_unlock(&perf->metrics_lock);
     66
     67	if (oa_config)
     68		i915_oa_config_put(oa_config);
     69}
     70
     71static struct i915_oa_config *
     72get_empty_config(struct i915_perf *perf)
     73{
     74	struct i915_oa_config *oa_config = NULL, *tmp;
     75	int id;
     76
     77	mutex_lock(&perf->metrics_lock);
     78
     79	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
     80		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
     81			oa_config = i915_oa_config_get(tmp);
     82			break;
     83		}
     84	}
     85
     86	mutex_unlock(&perf->metrics_lock);
     87
     88	return oa_config;
     89}
     90
     91static struct i915_perf_stream *
     92test_stream(struct i915_perf *perf)
     93{
     94	struct drm_i915_perf_open_param param = {};
     95	struct i915_oa_config *oa_config = get_empty_config(perf);
     96	struct perf_open_properties props = {
     97		.engine = intel_engine_lookup_user(perf->i915,
     98						   I915_ENGINE_CLASS_RENDER,
     99						   0),
    100		.sample_flags = SAMPLE_OA_REPORT,
    101		.oa_format = GRAPHICS_VER(perf->i915) == 12 ?
    102		I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
    103	};
    104	struct i915_perf_stream *stream;
    105
    106	if (!oa_config)
    107		return NULL;
    108
    109	props.metrics_set = oa_config->id;
    110
    111	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
    112	if (!stream) {
    113		i915_oa_config_put(oa_config);
    114		return NULL;
    115	}
    116
    117	stream->perf = perf;
    118
    119	mutex_lock(&perf->lock);
    120	if (i915_oa_stream_init(stream, &param, &props)) {
    121		kfree(stream);
    122		stream =  NULL;
    123	}
    124	mutex_unlock(&perf->lock);
    125
    126	i915_oa_config_put(oa_config);
    127
    128	return stream;
    129}
    130
    131static void stream_destroy(struct i915_perf_stream *stream)
    132{
    133	struct i915_perf *perf = stream->perf;
    134
    135	mutex_lock(&perf->lock);
    136	i915_perf_destroy_locked(stream);
    137	mutex_unlock(&perf->lock);
    138}
    139
    140static int live_sanitycheck(void *arg)
    141{
    142	struct drm_i915_private *i915 = arg;
    143	struct i915_perf_stream *stream;
    144
    145	/* Quick check we can create a perf stream */
    146
    147	stream = test_stream(&i915->perf);
    148	if (!stream)
    149		return -EINVAL;
    150
    151	stream_destroy(stream);
    152	return 0;
    153}
    154
    155static int write_timestamp(struct i915_request *rq, int slot)
    156{
    157	u32 *cs;
    158	int len;
    159
    160	cs = intel_ring_begin(rq, 6);
    161	if (IS_ERR(cs))
    162		return PTR_ERR(cs);
    163
    164	len = 5;
    165	if (GRAPHICS_VER(rq->engine->i915) >= 8)
    166		len++;
    167
    168	*cs++ = GFX_OP_PIPE_CONTROL(len);
    169	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
    170		PIPE_CONTROL_STORE_DATA_INDEX |
    171		PIPE_CONTROL_WRITE_TIMESTAMP;
    172	*cs++ = slot * sizeof(u32);
    173	*cs++ = 0;
    174	*cs++ = 0;
    175	*cs++ = 0;
    176
    177	intel_ring_advance(rq, cs);
    178
    179	return 0;
    180}
    181
    182static ktime_t poll_status(struct i915_request *rq, int slot)
    183{
    184	while (!intel_read_status_page(rq->engine, slot) &&
    185	       !i915_request_completed(rq))
    186		cpu_relax();
    187
    188	return ktime_get();
    189}
    190
    191static int live_noa_delay(void *arg)
    192{
    193	struct drm_i915_private *i915 = arg;
    194	struct i915_perf_stream *stream;
    195	struct i915_request *rq;
    196	ktime_t t0, t1;
    197	u64 expected;
    198	u32 delay;
    199	int err;
    200	int i;
    201
    202	/* Check that the GPU delays matches expectations */
    203
    204	stream = test_stream(&i915->perf);
    205	if (!stream)
    206		return -ENOMEM;
    207
    208	expected = atomic64_read(&stream->perf->noa_programming_delay);
    209
    210	if (stream->engine->class != RENDER_CLASS) {
    211		err = -ENODEV;
    212		goto out;
    213	}
    214
    215	for (i = 0; i < 4; i++)
    216		intel_write_status_page(stream->engine, 0x100 + i, 0);
    217
    218	rq = intel_engine_create_kernel_request(stream->engine);
    219	if (IS_ERR(rq)) {
    220		err = PTR_ERR(rq);
    221		goto out;
    222	}
    223
    224	if (rq->engine->emit_init_breadcrumb) {
    225		err = rq->engine->emit_init_breadcrumb(rq);
    226		if (err) {
    227			i915_request_add(rq);
    228			goto out;
    229		}
    230	}
    231
    232	err = write_timestamp(rq, 0x100);
    233	if (err) {
    234		i915_request_add(rq);
    235		goto out;
    236	}
    237
    238	err = rq->engine->emit_bb_start(rq,
    239					i915_ggtt_offset(stream->noa_wait), 0,
    240					I915_DISPATCH_SECURE);
    241	if (err) {
    242		i915_request_add(rq);
    243		goto out;
    244	}
    245
    246	err = write_timestamp(rq, 0x102);
    247	if (err) {
    248		i915_request_add(rq);
    249		goto out;
    250	}
    251
    252	i915_request_get(rq);
    253	i915_request_add(rq);
    254
    255	preempt_disable();
    256	t0 = poll_status(rq, 0x100);
    257	t1 = poll_status(rq, 0x102);
    258	preempt_enable();
    259
    260	pr_info("CPU delay: %lluns, expected %lluns\n",
    261		ktime_sub(t1, t0), expected);
    262
    263	delay = intel_read_status_page(stream->engine, 0x102);
    264	delay -= intel_read_status_page(stream->engine, 0x100);
    265	delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
    266	pr_info("GPU delay: %uns, expected %lluns\n",
    267		delay, expected);
    268
    269	if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
    270		pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
    271		       delay / 1000,
    272		       div_u64(3 * expected, 4000),
    273		       div_u64(3 * expected, 2000));
    274		err = -EINVAL;
    275	}
    276
    277	i915_request_put(rq);
    278out:
    279	stream_destroy(stream);
    280	return err;
    281}
    282
    283static int live_noa_gpr(void *arg)
    284{
    285	struct drm_i915_private *i915 = arg;
    286	struct i915_perf_stream *stream;
    287	struct intel_context *ce;
    288	struct i915_request *rq;
    289	u32 *cs, *store;
    290	void *scratch;
    291	u32 gpr0;
    292	int err;
    293	int i;
    294
    295	/* Check that the delay does not clobber user context state (GPR) */
    296
    297	stream = test_stream(&i915->perf);
    298	if (!stream)
    299		return -ENOMEM;
    300
    301	gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
    302
    303	ce = intel_context_create(stream->engine);
    304	if (IS_ERR(ce)) {
    305		err = PTR_ERR(ce);
    306		goto out;
    307	}
    308
    309	/* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
    310	scratch = __px_vaddr(ce->vm->scratch[0]);
    311	memset(scratch, POISON_FREE, PAGE_SIZE);
    312
    313	rq = intel_context_create_request(ce);
    314	if (IS_ERR(rq)) {
    315		err = PTR_ERR(rq);
    316		goto out_ce;
    317	}
    318	i915_request_get(rq);
    319
    320	if (rq->engine->emit_init_breadcrumb) {
    321		err = rq->engine->emit_init_breadcrumb(rq);
    322		if (err) {
    323			i915_request_add(rq);
    324			goto out_rq;
    325		}
    326	}
    327
    328	/* Fill the 16 qword [32 dword] GPR with a known unlikely value */
    329	cs = intel_ring_begin(rq, 2 * 32 + 2);
    330	if (IS_ERR(cs)) {
    331		err = PTR_ERR(cs);
    332		i915_request_add(rq);
    333		goto out_rq;
    334	}
    335
    336	*cs++ = MI_LOAD_REGISTER_IMM(32);
    337	for (i = 0; i < 32; i++) {
    338		*cs++ = gpr0 + i * sizeof(u32);
    339		*cs++ = STACK_MAGIC;
    340	}
    341	*cs++ = MI_NOOP;
    342	intel_ring_advance(rq, cs);
    343
    344	/* Execute the GPU delay */
    345	err = rq->engine->emit_bb_start(rq,
    346					i915_ggtt_offset(stream->noa_wait), 0,
    347					I915_DISPATCH_SECURE);
    348	if (err) {
    349		i915_request_add(rq);
    350		goto out_rq;
    351	}
    352
    353	/* Read the GPR back, using the pinned global HWSP for convenience */
    354	store = memset32(rq->engine->status_page.addr + 512, 0, 32);
    355	for (i = 0; i < 32; i++) {
    356		u32 cmd;
    357
    358		cs = intel_ring_begin(rq, 4);
    359		if (IS_ERR(cs)) {
    360			err = PTR_ERR(cs);
    361			i915_request_add(rq);
    362			goto out_rq;
    363		}
    364
    365		cmd = MI_STORE_REGISTER_MEM;
    366		if (GRAPHICS_VER(i915) >= 8)
    367			cmd++;
    368		cmd |= MI_USE_GGTT;
    369
    370		*cs++ = cmd;
    371		*cs++ = gpr0 + i * sizeof(u32);
    372		*cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
    373			offset_in_page(store) +
    374			i * sizeof(u32);
    375		*cs++ = 0;
    376		intel_ring_advance(rq, cs);
    377	}
    378
    379	i915_request_add(rq);
    380
    381	if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
    382		pr_err("noa_wait timed out\n");
    383		intel_gt_set_wedged(stream->engine->gt);
    384		err = -EIO;
    385		goto out_rq;
    386	}
    387
    388	/* Verify that the GPR contain our expected values */
    389	for (i = 0; i < 32; i++) {
    390		if (store[i] == STACK_MAGIC)
    391			continue;
    392
    393		pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
    394		       i, store[i], STACK_MAGIC);
    395		err = -EINVAL;
    396	}
    397
    398	/* Verify that the user's scratch page was not used for GPR storage */
    399	if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
    400		pr_err("Scratch page overwritten!\n");
    401		igt_hexdump(scratch, 4096);
    402		err = -EINVAL;
    403	}
    404
    405out_rq:
    406	i915_request_put(rq);
    407out_ce:
    408	intel_context_put(ce);
    409out:
    410	stream_destroy(stream);
    411	return err;
    412}
    413
    414int i915_perf_live_selftests(struct drm_i915_private *i915)
    415{
    416	static const struct i915_subtest tests[] = {
    417		SUBTEST(live_sanitycheck),
    418		SUBTEST(live_noa_delay),
    419		SUBTEST(live_noa_gpr),
    420	};
    421	struct i915_perf *perf = &i915->perf;
    422	int err;
    423
    424	if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
    425		return 0;
    426
    427	if (intel_gt_is_wedged(to_gt(i915)))
    428		return 0;
    429
    430	err = alloc_empty_config(&i915->perf);
    431	if (err)
    432		return err;
    433
    434	err = i915_subtests(tests, i915);
    435
    436	destroy_empty_config(&i915->perf);
    437
    438	return err;
    439}