cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

i915_gem_context.c (41214B)


      1/*
      2 * SPDX-License-Identifier: MIT
      3 *
      4 * Copyright © 2017 Intel Corporation
      5 */
      6
      7#include <linux/prime_numbers.h>
      8#include <linux/string_helpers.h>
      9
     10#include "gem/i915_gem_internal.h"
     11#include "gem/i915_gem_pm.h"
     12#include "gt/intel_engine_pm.h"
     13#include "gt/intel_engine_regs.h"
     14#include "gt/intel_gt.h"
     15#include "gt/intel_gt_requests.h"
     16#include "gt/intel_reset.h"
     17#include "i915_selftest.h"
     18
     19#include "gem/selftests/igt_gem_utils.h"
     20#include "selftests/i915_random.h"
     21#include "selftests/igt_flush_test.h"
     22#include "selftests/igt_live_test.h"
     23#include "selftests/igt_reset.h"
     24#include "selftests/igt_spinner.h"
     25#include "selftests/mock_drm.h"
     26#include "selftests/mock_gem_device.h"
     27
     28#include "huge_gem_object.h"
     29#include "igt_gem_utils.h"
     30
     31#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
     32
     33static int live_nop_switch(void *arg)
     34{
     35	const unsigned int nctx = 1024;
     36	struct drm_i915_private *i915 = arg;
     37	struct intel_engine_cs *engine;
     38	struct i915_gem_context **ctx;
     39	struct igt_live_test t;
     40	struct file *file;
     41	unsigned long n;
     42	int err = -ENODEV;
     43
     44	/*
     45	 * Create as many contexts as we can feasibly get away with
     46	 * and check we can switch between them rapidly.
     47	 *
     48	 * Serves as very simple stress test for submission and HW switching
     49	 * between contexts.
     50	 */
     51
     52	if (!DRIVER_CAPS(i915)->has_logical_contexts)
     53		return 0;
     54
     55	file = mock_file(i915);
     56	if (IS_ERR(file))
     57		return PTR_ERR(file);
     58
     59	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
     60	if (!ctx) {
     61		err = -ENOMEM;
     62		goto out_file;
     63	}
     64
     65	for (n = 0; n < nctx; n++) {
     66		ctx[n] = live_context(i915, file);
     67		if (IS_ERR(ctx[n])) {
     68			err = PTR_ERR(ctx[n]);
     69			goto out_file;
     70		}
     71	}
     72
     73	for_each_uabi_engine(engine, i915) {
     74		struct i915_request *rq = NULL;
     75		unsigned long end_time, prime;
     76		ktime_t times[2] = {};
     77
     78		times[0] = ktime_get_raw();
     79		for (n = 0; n < nctx; n++) {
     80			struct i915_request *this;
     81
     82			this = igt_request_alloc(ctx[n], engine);
     83			if (IS_ERR(this)) {
     84				err = PTR_ERR(this);
     85				goto out_file;
     86			}
     87			if (rq) {
     88				i915_request_await_dma_fence(this, &rq->fence);
     89				i915_request_put(rq);
     90			}
     91			rq = i915_request_get(this);
     92			i915_request_add(this);
     93		}
     94		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
     95			pr_err("Failed to populated %d contexts\n", nctx);
     96			intel_gt_set_wedged(to_gt(i915));
     97			i915_request_put(rq);
     98			err = -EIO;
     99			goto out_file;
    100		}
    101		i915_request_put(rq);
    102
    103		times[1] = ktime_get_raw();
    104
    105		pr_info("Populated %d contexts on %s in %lluns\n",
    106			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
    107
    108		err = igt_live_test_begin(&t, i915, __func__, engine->name);
    109		if (err)
    110			goto out_file;
    111
    112		end_time = jiffies + i915_selftest.timeout_jiffies;
    113		for_each_prime_number_from(prime, 2, 8192) {
    114			times[1] = ktime_get_raw();
    115
    116			rq = NULL;
    117			for (n = 0; n < prime; n++) {
    118				struct i915_request *this;
    119
    120				this = igt_request_alloc(ctx[n % nctx], engine);
    121				if (IS_ERR(this)) {
    122					err = PTR_ERR(this);
    123					goto out_file;
    124				}
    125
    126				if (rq) { /* Force submission order */
    127					i915_request_await_dma_fence(this, &rq->fence);
    128					i915_request_put(rq);
    129				}
    130
    131				/*
    132				 * This space is left intentionally blank.
    133				 *
    134				 * We do not actually want to perform any
    135				 * action with this request, we just want
    136				 * to measure the latency in allocation
    137				 * and submission of our breadcrumbs -
    138				 * ensuring that the bare request is sufficient
    139				 * for the system to work (i.e. proper HEAD
    140				 * tracking of the rings, interrupt handling,
    141				 * etc). It also gives us the lowest bounds
    142				 * for latency.
    143				 */
    144
    145				rq = i915_request_get(this);
    146				i915_request_add(this);
    147			}
    148			GEM_BUG_ON(!rq);
    149			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
    150				pr_err("Switching between %ld contexts timed out\n",
    151				       prime);
    152				intel_gt_set_wedged(to_gt(i915));
    153				i915_request_put(rq);
    154				break;
    155			}
    156			i915_request_put(rq);
    157
    158			times[1] = ktime_sub(ktime_get_raw(), times[1]);
    159			if (prime == 2)
    160				times[0] = times[1];
    161
    162			if (__igt_timeout(end_time, NULL))
    163				break;
    164		}
    165
    166		err = igt_live_test_end(&t);
    167		if (err)
    168			goto out_file;
    169
    170		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
    171			engine->name,
    172			ktime_to_ns(times[0]),
    173			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
    174	}
    175
    176out_file:
    177	fput(file);
    178	return err;
    179}
    180
    181struct parallel_switch {
    182	struct task_struct *tsk;
    183	struct intel_context *ce[2];
    184};
    185
    186static int __live_parallel_switch1(void *data)
    187{
    188	struct parallel_switch *arg = data;
    189	IGT_TIMEOUT(end_time);
    190	unsigned long count;
    191
    192	count = 0;
    193	do {
    194		struct i915_request *rq = NULL;
    195		int err, n;
    196
    197		err = 0;
    198		for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
    199			struct i915_request *prev = rq;
    200
    201			rq = i915_request_create(arg->ce[n]);
    202			if (IS_ERR(rq)) {
    203				i915_request_put(prev);
    204				return PTR_ERR(rq);
    205			}
    206
    207			i915_request_get(rq);
    208			if (prev) {
    209				err = i915_request_await_dma_fence(rq, &prev->fence);
    210				i915_request_put(prev);
    211			}
    212
    213			i915_request_add(rq);
    214		}
    215		if (i915_request_wait(rq, 0, HZ / 5) < 0)
    216			err = -ETIME;
    217		i915_request_put(rq);
    218		if (err)
    219			return err;
    220
    221		count++;
    222	} while (!__igt_timeout(end_time, NULL));
    223
    224	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
    225	return 0;
    226}
    227
    228static int __live_parallel_switchN(void *data)
    229{
    230	struct parallel_switch *arg = data;
    231	struct i915_request *rq = NULL;
    232	IGT_TIMEOUT(end_time);
    233	unsigned long count;
    234	int n;
    235
    236	count = 0;
    237	do {
    238		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
    239			struct i915_request *prev = rq;
    240			int err = 0;
    241
    242			rq = i915_request_create(arg->ce[n]);
    243			if (IS_ERR(rq)) {
    244				i915_request_put(prev);
    245				return PTR_ERR(rq);
    246			}
    247
    248			i915_request_get(rq);
    249			if (prev) {
    250				err = i915_request_await_dma_fence(rq, &prev->fence);
    251				i915_request_put(prev);
    252			}
    253
    254			i915_request_add(rq);
    255			if (err) {
    256				i915_request_put(rq);
    257				return err;
    258			}
    259		}
    260
    261		count++;
    262	} while (!__igt_timeout(end_time, NULL));
    263	i915_request_put(rq);
    264
    265	pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
    266	return 0;
    267}
    268
    269static int live_parallel_switch(void *arg)
    270{
    271	struct drm_i915_private *i915 = arg;
    272	static int (* const func[])(void *arg) = {
    273		__live_parallel_switch1,
    274		__live_parallel_switchN,
    275		NULL,
    276	};
    277	struct parallel_switch *data = NULL;
    278	struct i915_gem_engines *engines;
    279	struct i915_gem_engines_iter it;
    280	int (* const *fn)(void *arg);
    281	struct i915_gem_context *ctx;
    282	struct intel_context *ce;
    283	struct file *file;
    284	int n, m, count;
    285	int err = 0;
    286
    287	/*
    288	 * Check we can process switches on all engines simultaneously.
    289	 */
    290
    291	if (!DRIVER_CAPS(i915)->has_logical_contexts)
    292		return 0;
    293
    294	file = mock_file(i915);
    295	if (IS_ERR(file))
    296		return PTR_ERR(file);
    297
    298	ctx = live_context(i915, file);
    299	if (IS_ERR(ctx)) {
    300		err = PTR_ERR(ctx);
    301		goto out_file;
    302	}
    303
    304	engines = i915_gem_context_lock_engines(ctx);
    305	count = engines->num_engines;
    306
    307	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
    308	if (!data) {
    309		i915_gem_context_unlock_engines(ctx);
    310		err = -ENOMEM;
    311		goto out_file;
    312	}
    313
    314	m = 0; /* Use the first context as our template for the engines */
    315	for_each_gem_engine(ce, engines, it) {
    316		err = intel_context_pin(ce);
    317		if (err) {
    318			i915_gem_context_unlock_engines(ctx);
    319			goto out;
    320		}
    321		data[m++].ce[0] = intel_context_get(ce);
    322	}
    323	i915_gem_context_unlock_engines(ctx);
    324
    325	/* Clone the same set of engines into the other contexts */
    326	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
    327		ctx = live_context(i915, file);
    328		if (IS_ERR(ctx)) {
    329			err = PTR_ERR(ctx);
    330			goto out;
    331		}
    332
    333		for (m = 0; m < count; m++) {
    334			if (!data[m].ce[0])
    335				continue;
    336
    337			ce = intel_context_create(data[m].ce[0]->engine);
    338			if (IS_ERR(ce))
    339				goto out;
    340
    341			err = intel_context_pin(ce);
    342			if (err) {
    343				intel_context_put(ce);
    344				goto out;
    345			}
    346
    347			data[m].ce[n] = ce;
    348		}
    349	}
    350
    351	for (fn = func; !err && *fn; fn++) {
    352		struct igt_live_test t;
    353		int n;
    354
    355		err = igt_live_test_begin(&t, i915, __func__, "");
    356		if (err)
    357			break;
    358
    359		for (n = 0; n < count; n++) {
    360			if (!data[n].ce[0])
    361				continue;
    362
    363			data[n].tsk = kthread_run(*fn, &data[n],
    364						  "igt/parallel:%s",
    365						  data[n].ce[0]->engine->name);
    366			if (IS_ERR(data[n].tsk)) {
    367				err = PTR_ERR(data[n].tsk);
    368				break;
    369			}
    370			get_task_struct(data[n].tsk);
    371		}
    372
    373		yield(); /* start all threads before we kthread_stop() */
    374
    375		for (n = 0; n < count; n++) {
    376			int status;
    377
    378			if (IS_ERR_OR_NULL(data[n].tsk))
    379				continue;
    380
    381			status = kthread_stop(data[n].tsk);
    382			if (status && !err)
    383				err = status;
    384
    385			put_task_struct(data[n].tsk);
    386			data[n].tsk = NULL;
    387		}
    388
    389		if (igt_live_test_end(&t))
    390			err = -EIO;
    391	}
    392
    393out:
    394	for (n = 0; n < count; n++) {
    395		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
    396			if (!data[n].ce[m])
    397				continue;
    398
    399			intel_context_unpin(data[n].ce[m]);
    400			intel_context_put(data[n].ce[m]);
    401		}
    402	}
    403	kfree(data);
    404out_file:
    405	fput(file);
    406	return err;
    407}
    408
    409static unsigned long real_page_count(struct drm_i915_gem_object *obj)
    410{
    411	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
    412}
    413
    414static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
    415{
    416	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
    417}
    418
    419static int gpu_fill(struct intel_context *ce,
    420		    struct drm_i915_gem_object *obj,
    421		    unsigned int dw)
    422{
    423	struct i915_vma *vma;
    424	int err;
    425
    426	GEM_BUG_ON(obj->base.size > ce->vm->total);
    427	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
    428
    429	vma = i915_vma_instance(obj, ce->vm, NULL);
    430	if (IS_ERR(vma))
    431		return PTR_ERR(vma);
    432
    433	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
    434	if (err)
    435		return err;
    436
    437	/*
    438	 * Within the GTT the huge objects maps every page onto
    439	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
    440	 * We set the nth dword within the page using the nth
    441	 * mapping via the GTT - this should exercise the GTT mapping
    442	 * whilst checking that each context provides a unique view
    443	 * into the object.
    444	 */
    445	err = igt_gpu_fill_dw(ce, vma,
    446			      (dw * real_page_count(obj)) << PAGE_SHIFT |
    447			      (dw * sizeof(u32)),
    448			      real_page_count(obj),
    449			      dw);
    450	i915_vma_unpin(vma);
    451
    452	return err;
    453}
    454
    455static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
    456{
    457	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
    458	unsigned int n, m, need_flush;
    459	int err;
    460
    461	i915_gem_object_lock(obj, NULL);
    462	err = i915_gem_object_prepare_write(obj, &need_flush);
    463	if (err)
    464		goto out;
    465
    466	for (n = 0; n < real_page_count(obj); n++) {
    467		u32 *map;
    468
    469		map = kmap_atomic(i915_gem_object_get_page(obj, n));
    470		for (m = 0; m < DW_PER_PAGE; m++)
    471			map[m] = value;
    472		if (!has_llc)
    473			drm_clflush_virt_range(map, PAGE_SIZE);
    474		kunmap_atomic(map);
    475	}
    476
    477	i915_gem_object_finish_access(obj);
    478	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
    479	obj->write_domain = 0;
    480out:
    481	i915_gem_object_unlock(obj);
    482	return err;
    483}
    484
    485static noinline int cpu_check(struct drm_i915_gem_object *obj,
    486			      unsigned int idx, unsigned int max)
    487{
    488	unsigned int n, m, needs_flush;
    489	int err;
    490
    491	i915_gem_object_lock(obj, NULL);
    492	err = i915_gem_object_prepare_read(obj, &needs_flush);
    493	if (err)
    494		goto out_unlock;
    495
    496	for (n = 0; n < real_page_count(obj); n++) {
    497		u32 *map;
    498
    499		map = kmap_atomic(i915_gem_object_get_page(obj, n));
    500		if (needs_flush & CLFLUSH_BEFORE)
    501			drm_clflush_virt_range(map, PAGE_SIZE);
    502
    503		for (m = 0; m < max; m++) {
    504			if (map[m] != m) {
    505				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
    506				       __builtin_return_address(0), idx,
    507				       n, real_page_count(obj), m, max,
    508				       map[m], m);
    509				err = -EINVAL;
    510				goto out_unmap;
    511			}
    512		}
    513
    514		for (; m < DW_PER_PAGE; m++) {
    515			if (map[m] != STACK_MAGIC) {
    516				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
    517				       __builtin_return_address(0), idx, n, m,
    518				       map[m], STACK_MAGIC);
    519				err = -EINVAL;
    520				goto out_unmap;
    521			}
    522		}
    523
    524out_unmap:
    525		kunmap_atomic(map);
    526		if (err)
    527			break;
    528	}
    529
    530	i915_gem_object_finish_access(obj);
    531out_unlock:
    532	i915_gem_object_unlock(obj);
    533	return err;
    534}
    535
    536static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
    537{
    538	int err;
    539
    540	GEM_BUG_ON(obj->base.handle_count);
    541
    542	/* tie the object to the drm_file for easy reaping */
    543	err = idr_alloc(&to_drm_file(file)->object_idr,
    544			&obj->base, 1, 0, GFP_KERNEL);
    545	if (err < 0)
    546		return err;
    547
    548	i915_gem_object_get(obj);
    549	obj->base.handle_count++;
    550	return 0;
    551}
    552
    553static struct drm_i915_gem_object *
    554create_test_object(struct i915_address_space *vm,
    555		   struct file *file,
    556		   struct list_head *objects)
    557{
    558	struct drm_i915_gem_object *obj;
    559	u64 size;
    560	int err;
    561
    562	/* Keep in GEM's good graces */
    563	intel_gt_retire_requests(vm->gt);
    564
    565	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
    566	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
    567
    568	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
    569	if (IS_ERR(obj))
    570		return obj;
    571
    572	err = file_add_object(file, obj);
    573	i915_gem_object_put(obj);
    574	if (err)
    575		return ERR_PTR(err);
    576
    577	err = cpu_fill(obj, STACK_MAGIC);
    578	if (err) {
    579		pr_err("Failed to fill object with cpu, err=%d\n",
    580		       err);
    581		return ERR_PTR(err);
    582	}
    583
    584	list_add_tail(&obj->st_link, objects);
    585	return obj;
    586}
    587
    588static unsigned long max_dwords(struct drm_i915_gem_object *obj)
    589{
    590	unsigned long npages = fake_page_count(obj);
    591
    592	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
    593	return npages / DW_PER_PAGE;
    594}
    595
    596static void throttle_release(struct i915_request **q, int count)
    597{
    598	int i;
    599
    600	for (i = 0; i < count; i++) {
    601		if (IS_ERR_OR_NULL(q[i]))
    602			continue;
    603
    604		i915_request_put(fetch_and_zero(&q[i]));
    605	}
    606}
    607
    608static int throttle(struct intel_context *ce,
    609		    struct i915_request **q, int count)
    610{
    611	int i;
    612
    613	if (!IS_ERR_OR_NULL(q[0])) {
    614		if (i915_request_wait(q[0],
    615				      I915_WAIT_INTERRUPTIBLE,
    616				      MAX_SCHEDULE_TIMEOUT) < 0)
    617			return -EINTR;
    618
    619		i915_request_put(q[0]);
    620	}
    621
    622	for (i = 0; i < count - 1; i++)
    623		q[i] = q[i + 1];
    624
    625	q[i] = intel_context_create_request(ce);
    626	if (IS_ERR(q[i]))
    627		return PTR_ERR(q[i]);
    628
    629	i915_request_get(q[i]);
    630	i915_request_add(q[i]);
    631
    632	return 0;
    633}
    634
    635static int igt_ctx_exec(void *arg)
    636{
    637	struct drm_i915_private *i915 = arg;
    638	struct intel_engine_cs *engine;
    639	int err = -ENODEV;
    640
    641	/*
    642	 * Create a few different contexts (with different mm) and write
    643	 * through each ctx/mm using the GPU making sure those writes end
    644	 * up in the expected pages of our obj.
    645	 */
    646
    647	if (!DRIVER_CAPS(i915)->has_logical_contexts)
    648		return 0;
    649
    650	for_each_uabi_engine(engine, i915) {
    651		struct drm_i915_gem_object *obj = NULL;
    652		unsigned long ncontexts, ndwords, dw;
    653		struct i915_request *tq[5] = {};
    654		struct igt_live_test t;
    655		IGT_TIMEOUT(end_time);
    656		LIST_HEAD(objects);
    657		struct file *file;
    658
    659		if (!intel_engine_can_store_dword(engine))
    660			continue;
    661
    662		if (!engine->context_size)
    663			continue; /* No logical context support in HW */
    664
    665		file = mock_file(i915);
    666		if (IS_ERR(file))
    667			return PTR_ERR(file);
    668
    669		err = igt_live_test_begin(&t, i915, __func__, engine->name);
    670		if (err)
    671			goto out_file;
    672
    673		ncontexts = 0;
    674		ndwords = 0;
    675		dw = 0;
    676		while (!time_after(jiffies, end_time)) {
    677			struct i915_gem_context *ctx;
    678			struct intel_context *ce;
    679
    680			ctx = kernel_context(i915, NULL);
    681			if (IS_ERR(ctx)) {
    682				err = PTR_ERR(ctx);
    683				goto out_file;
    684			}
    685
    686			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
    687			GEM_BUG_ON(IS_ERR(ce));
    688
    689			if (!obj) {
    690				obj = create_test_object(ce->vm, file, &objects);
    691				if (IS_ERR(obj)) {
    692					err = PTR_ERR(obj);
    693					intel_context_put(ce);
    694					kernel_context_close(ctx);
    695					goto out_file;
    696				}
    697			}
    698
    699			err = gpu_fill(ce, obj, dw);
    700			if (err) {
    701				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
    702				       ndwords, dw, max_dwords(obj),
    703				       engine->name,
    704				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
    705				       err);
    706				intel_context_put(ce);
    707				kernel_context_close(ctx);
    708				goto out_file;
    709			}
    710
    711			err = throttle(ce, tq, ARRAY_SIZE(tq));
    712			if (err) {
    713				intel_context_put(ce);
    714				kernel_context_close(ctx);
    715				goto out_file;
    716			}
    717
    718			if (++dw == max_dwords(obj)) {
    719				obj = NULL;
    720				dw = 0;
    721			}
    722
    723			ndwords++;
    724			ncontexts++;
    725
    726			intel_context_put(ce);
    727			kernel_context_close(ctx);
    728		}
    729
    730		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
    731			ncontexts, engine->name, ndwords);
    732
    733		ncontexts = dw = 0;
    734		list_for_each_entry(obj, &objects, st_link) {
    735			unsigned int rem =
    736				min_t(unsigned int, ndwords - dw, max_dwords(obj));
    737
    738			err = cpu_check(obj, ncontexts++, rem);
    739			if (err)
    740				break;
    741
    742			dw += rem;
    743		}
    744
    745out_file:
    746		throttle_release(tq, ARRAY_SIZE(tq));
    747		if (igt_live_test_end(&t))
    748			err = -EIO;
    749
    750		fput(file);
    751		if (err)
    752			return err;
    753
    754		i915_gem_drain_freed_objects(i915);
    755	}
    756
    757	return 0;
    758}
    759
    760static int igt_shared_ctx_exec(void *arg)
    761{
    762	struct drm_i915_private *i915 = arg;
    763	struct i915_request *tq[5] = {};
    764	struct i915_gem_context *parent;
    765	struct intel_engine_cs *engine;
    766	struct igt_live_test t;
    767	struct file *file;
    768	int err = 0;
    769
    770	/*
    771	 * Create a few different contexts with the same mm and write
    772	 * through each ctx using the GPU making sure those writes end
    773	 * up in the expected pages of our obj.
    774	 */
    775	if (!DRIVER_CAPS(i915)->has_logical_contexts)
    776		return 0;
    777
    778	file = mock_file(i915);
    779	if (IS_ERR(file))
    780		return PTR_ERR(file);
    781
    782	parent = live_context(i915, file);
    783	if (IS_ERR(parent)) {
    784		err = PTR_ERR(parent);
    785		goto out_file;
    786	}
    787
    788	if (!parent->vm) { /* not full-ppgtt; nothing to share */
    789		err = 0;
    790		goto out_file;
    791	}
    792
    793	err = igt_live_test_begin(&t, i915, __func__, "");
    794	if (err)
    795		goto out_file;
    796
    797	for_each_uabi_engine(engine, i915) {
    798		unsigned long ncontexts, ndwords, dw;
    799		struct drm_i915_gem_object *obj = NULL;
    800		IGT_TIMEOUT(end_time);
    801		LIST_HEAD(objects);
    802
    803		if (!intel_engine_can_store_dword(engine))
    804			continue;
    805
    806		dw = 0;
    807		ndwords = 0;
    808		ncontexts = 0;
    809		while (!time_after(jiffies, end_time)) {
    810			struct i915_gem_context *ctx;
    811			struct intel_context *ce;
    812
    813			ctx = kernel_context(i915, parent->vm);
    814			if (IS_ERR(ctx)) {
    815				err = PTR_ERR(ctx);
    816				goto out_test;
    817			}
    818
    819			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
    820			GEM_BUG_ON(IS_ERR(ce));
    821
    822			if (!obj) {
    823				obj = create_test_object(parent->vm,
    824							 file, &objects);
    825				if (IS_ERR(obj)) {
    826					err = PTR_ERR(obj);
    827					intel_context_put(ce);
    828					kernel_context_close(ctx);
    829					goto out_test;
    830				}
    831			}
    832
    833			err = gpu_fill(ce, obj, dw);
    834			if (err) {
    835				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
    836				       ndwords, dw, max_dwords(obj),
    837				       engine->name,
    838				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
    839				       err);
    840				intel_context_put(ce);
    841				kernel_context_close(ctx);
    842				goto out_test;
    843			}
    844
    845			err = throttle(ce, tq, ARRAY_SIZE(tq));
    846			if (err) {
    847				intel_context_put(ce);
    848				kernel_context_close(ctx);
    849				goto out_test;
    850			}
    851
    852			if (++dw == max_dwords(obj)) {
    853				obj = NULL;
    854				dw = 0;
    855			}
    856
    857			ndwords++;
    858			ncontexts++;
    859
    860			intel_context_put(ce);
    861			kernel_context_close(ctx);
    862		}
    863		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
    864			ncontexts, engine->name, ndwords);
    865
    866		ncontexts = dw = 0;
    867		list_for_each_entry(obj, &objects, st_link) {
    868			unsigned int rem =
    869				min_t(unsigned int, ndwords - dw, max_dwords(obj));
    870
    871			err = cpu_check(obj, ncontexts++, rem);
    872			if (err)
    873				goto out_test;
    874
    875			dw += rem;
    876		}
    877
    878		i915_gem_drain_freed_objects(i915);
    879	}
    880out_test:
    881	throttle_release(tq, ARRAY_SIZE(tq));
    882	if (igt_live_test_end(&t))
    883		err = -EIO;
    884out_file:
    885	fput(file);
    886	return err;
    887}
    888
    889static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
    890			    struct i915_vma *vma,
    891			    struct intel_engine_cs *engine)
    892{
    893	u32 *cmd;
    894
    895	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
    896
    897	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
    898	if (IS_ERR(cmd))
    899		return PTR_ERR(cmd);
    900
    901	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
    902	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
    903	*cmd++ = lower_32_bits(vma->node.start);
    904	*cmd++ = upper_32_bits(vma->node.start);
    905	*cmd = MI_BATCH_BUFFER_END;
    906
    907	__i915_gem_object_flush_map(rpcs, 0, 64);
    908	i915_gem_object_unpin_map(rpcs);
    909
    910	intel_gt_chipset_flush(vma->vm->gt);
    911
    912	return 0;
    913}
    914
    915static int
    916emit_rpcs_query(struct drm_i915_gem_object *obj,
    917		struct intel_context *ce,
    918		struct i915_request **rq_out)
    919{
    920	struct drm_i915_private *i915 = to_i915(obj->base.dev);
    921	struct i915_request *rq;
    922	struct i915_gem_ww_ctx ww;
    923	struct i915_vma *batch;
    924	struct i915_vma *vma;
    925	struct drm_i915_gem_object *rpcs;
    926	int err;
    927
    928	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
    929
    930	if (GRAPHICS_VER(i915) < 8)
    931		return -EINVAL;
    932
    933	vma = i915_vma_instance(obj, ce->vm, NULL);
    934	if (IS_ERR(vma))
    935		return PTR_ERR(vma);
    936
    937	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
    938	if (IS_ERR(rpcs))
    939		return PTR_ERR(rpcs);
    940
    941	batch = i915_vma_instance(rpcs, ce->vm, NULL);
    942	if (IS_ERR(batch)) {
    943		err = PTR_ERR(batch);
    944		goto err_put;
    945	}
    946
    947	i915_gem_ww_ctx_init(&ww, false);
    948retry:
    949	err = i915_gem_object_lock(obj, &ww);
    950	if (!err)
    951		err = i915_gem_object_lock(rpcs, &ww);
    952	if (!err)
    953		err = i915_gem_object_set_to_gtt_domain(obj, false);
    954	if (!err)
    955		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
    956	if (err)
    957		goto err_put;
    958
    959	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
    960	if (err)
    961		goto err_vma;
    962
    963	err = rpcs_query_batch(rpcs, vma, ce->engine);
    964	if (err)
    965		goto err_batch;
    966
    967	rq = i915_request_create(ce);
    968	if (IS_ERR(rq)) {
    969		err = PTR_ERR(rq);
    970		goto err_batch;
    971	}
    972
    973	err = i915_request_await_object(rq, batch->obj, false);
    974	if (err == 0)
    975		err = i915_vma_move_to_active(batch, rq, 0);
    976	if (err)
    977		goto skip_request;
    978
    979	err = i915_request_await_object(rq, vma->obj, true);
    980	if (err == 0)
    981		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
    982	if (err)
    983		goto skip_request;
    984
    985	if (rq->engine->emit_init_breadcrumb) {
    986		err = rq->engine->emit_init_breadcrumb(rq);
    987		if (err)
    988			goto skip_request;
    989	}
    990
    991	err = rq->engine->emit_bb_start(rq,
    992					batch->node.start, batch->node.size,
    993					0);
    994	if (err)
    995		goto skip_request;
    996
    997	*rq_out = i915_request_get(rq);
    998
    999skip_request:
   1000	if (err)
   1001		i915_request_set_error_once(rq, err);
   1002	i915_request_add(rq);
   1003err_batch:
   1004	i915_vma_unpin(batch);
   1005err_vma:
   1006	i915_vma_unpin(vma);
   1007err_put:
   1008	if (err == -EDEADLK) {
   1009		err = i915_gem_ww_ctx_backoff(&ww);
   1010		if (!err)
   1011			goto retry;
   1012	}
   1013	i915_gem_ww_ctx_fini(&ww);
   1014	i915_gem_object_put(rpcs);
   1015	return err;
   1016}
   1017
   1018#define TEST_IDLE	BIT(0)
   1019#define TEST_BUSY	BIT(1)
   1020#define TEST_RESET	BIT(2)
   1021
   1022static int
   1023__sseu_prepare(const char *name,
   1024	       unsigned int flags,
   1025	       struct intel_context *ce,
   1026	       struct igt_spinner **spin)
   1027{
   1028	struct i915_request *rq;
   1029	int ret;
   1030
   1031	*spin = NULL;
   1032	if (!(flags & (TEST_BUSY | TEST_RESET)))
   1033		return 0;
   1034
   1035	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
   1036	if (!*spin)
   1037		return -ENOMEM;
   1038
   1039	ret = igt_spinner_init(*spin, ce->engine->gt);
   1040	if (ret)
   1041		goto err_free;
   1042
   1043	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
   1044	if (IS_ERR(rq)) {
   1045		ret = PTR_ERR(rq);
   1046		goto err_fini;
   1047	}
   1048
   1049	i915_request_add(rq);
   1050
   1051	if (!igt_wait_for_spinner(*spin, rq)) {
   1052		pr_err("%s: Spinner failed to start!\n", name);
   1053		ret = -ETIMEDOUT;
   1054		goto err_end;
   1055	}
   1056
   1057	return 0;
   1058
   1059err_end:
   1060	igt_spinner_end(*spin);
   1061err_fini:
   1062	igt_spinner_fini(*spin);
   1063err_free:
   1064	kfree(fetch_and_zero(spin));
   1065	return ret;
   1066}
   1067
   1068static int
   1069__read_slice_count(struct intel_context *ce,
   1070		   struct drm_i915_gem_object *obj,
   1071		   struct igt_spinner *spin,
   1072		   u32 *rpcs)
   1073{
   1074	struct i915_request *rq = NULL;
   1075	u32 s_mask, s_shift;
   1076	unsigned int cnt;
   1077	u32 *buf, val;
   1078	long ret;
   1079
   1080	ret = emit_rpcs_query(obj, ce, &rq);
   1081	if (ret)
   1082		return ret;
   1083
   1084	if (spin)
   1085		igt_spinner_end(spin);
   1086
   1087	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
   1088	i915_request_put(rq);
   1089	if (ret < 0)
   1090		return ret;
   1091
   1092	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
   1093	if (IS_ERR(buf)) {
   1094		ret = PTR_ERR(buf);
   1095		return ret;
   1096	}
   1097
   1098	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
   1099		s_mask = GEN11_RPCS_S_CNT_MASK;
   1100		s_shift = GEN11_RPCS_S_CNT_SHIFT;
   1101	} else {
   1102		s_mask = GEN8_RPCS_S_CNT_MASK;
   1103		s_shift = GEN8_RPCS_S_CNT_SHIFT;
   1104	}
   1105
   1106	val = *buf;
   1107	cnt = (val & s_mask) >> s_shift;
   1108	*rpcs = val;
   1109
   1110	i915_gem_object_unpin_map(obj);
   1111
   1112	return cnt;
   1113}
   1114
   1115static int
   1116__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
   1117	     const char *prefix, const char *suffix)
   1118{
   1119	if (slices == expected)
   1120		return 0;
   1121
   1122	if (slices < 0) {
   1123		pr_err("%s: %s read slice count failed with %d%s\n",
   1124		       name, prefix, slices, suffix);
   1125		return slices;
   1126	}
   1127
   1128	pr_err("%s: %s slice count %d is not %u%s\n",
   1129	       name, prefix, slices, expected, suffix);
   1130
   1131	pr_info("RPCS=0x%x; %u%sx%u%s\n",
   1132		rpcs, slices,
   1133		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
   1134		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
   1135		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
   1136
   1137	return -EINVAL;
   1138}
   1139
   1140static int
   1141__sseu_finish(const char *name,
   1142	      unsigned int flags,
   1143	      struct intel_context *ce,
   1144	      struct drm_i915_gem_object *obj,
   1145	      unsigned int expected,
   1146	      struct igt_spinner *spin)
   1147{
   1148	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
   1149	u32 rpcs = 0;
   1150	int ret = 0;
   1151
   1152	if (flags & TEST_RESET) {
   1153		ret = intel_engine_reset(ce->engine, "sseu");
   1154		if (ret)
   1155			goto out;
   1156	}
   1157
   1158	ret = __read_slice_count(ce, obj,
   1159				 flags & TEST_RESET ? NULL : spin, &rpcs);
   1160	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
   1161	if (ret)
   1162		goto out;
   1163
   1164	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
   1165	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
   1166
   1167out:
   1168	if (spin)
   1169		igt_spinner_end(spin);
   1170
   1171	if ((flags & TEST_IDLE) && ret == 0) {
   1172		ret = igt_flush_test(ce->engine->i915);
   1173		if (ret)
   1174			return ret;
   1175
   1176		ret = __read_slice_count(ce, obj, NULL, &rpcs);
   1177		ret = __check_rpcs(name, rpcs, ret, expected,
   1178				   "Context", " after idle!");
   1179	}
   1180
   1181	return ret;
   1182}
   1183
   1184static int
   1185__sseu_test(const char *name,
   1186	    unsigned int flags,
   1187	    struct intel_context *ce,
   1188	    struct drm_i915_gem_object *obj,
   1189	    struct intel_sseu sseu)
   1190{
   1191	struct igt_spinner *spin = NULL;
   1192	int ret;
   1193
   1194	intel_engine_pm_get(ce->engine);
   1195
   1196	ret = __sseu_prepare(name, flags, ce, &spin);
   1197	if (ret)
   1198		goto out_pm;
   1199
   1200	ret = intel_context_reconfigure_sseu(ce, sseu);
   1201	if (ret)
   1202		goto out_spin;
   1203
   1204	ret = __sseu_finish(name, flags, ce, obj,
   1205			    hweight32(sseu.slice_mask), spin);
   1206
   1207out_spin:
   1208	if (spin) {
   1209		igt_spinner_end(spin);
   1210		igt_spinner_fini(spin);
   1211		kfree(spin);
   1212	}
   1213out_pm:
   1214	intel_engine_pm_put(ce->engine);
   1215	return ret;
   1216}
   1217
   1218static int
   1219__igt_ctx_sseu(struct drm_i915_private *i915,
   1220	       const char *name,
   1221	       unsigned int flags)
   1222{
   1223	struct drm_i915_gem_object *obj;
   1224	int inst = 0;
   1225	int ret = 0;
   1226
   1227	if (GRAPHICS_VER(i915) < 9)
   1228		return 0;
   1229
   1230	if (flags & TEST_RESET)
   1231		igt_global_reset_lock(to_gt(i915));
   1232
   1233	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
   1234	if (IS_ERR(obj)) {
   1235		ret = PTR_ERR(obj);
   1236		goto out_unlock;
   1237	}
   1238
   1239	do {
   1240		struct intel_engine_cs *engine;
   1241		struct intel_context *ce;
   1242		struct intel_sseu pg_sseu;
   1243
   1244		engine = intel_engine_lookup_user(i915,
   1245						  I915_ENGINE_CLASS_RENDER,
   1246						  inst++);
   1247		if (!engine)
   1248			break;
   1249
   1250		if (hweight32(engine->sseu.slice_mask) < 2)
   1251			continue;
   1252
   1253		if (!engine->gt->info.sseu.has_slice_pg)
   1254			continue;
   1255
   1256		/*
   1257		 * Gen11 VME friendly power-gated configuration with
   1258		 * half enabled sub-slices.
   1259		 */
   1260		pg_sseu = engine->sseu;
   1261		pg_sseu.slice_mask = 1;
   1262		pg_sseu.subslice_mask =
   1263			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
   1264
   1265		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
   1266			engine->name, name, flags,
   1267			hweight32(engine->sseu.slice_mask),
   1268			hweight32(pg_sseu.slice_mask));
   1269
   1270		ce = intel_context_create(engine);
   1271		if (IS_ERR(ce)) {
   1272			ret = PTR_ERR(ce);
   1273			goto out_put;
   1274		}
   1275
   1276		ret = intel_context_pin(ce);
   1277		if (ret)
   1278			goto out_ce;
   1279
   1280		/* First set the default mask. */
   1281		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
   1282		if (ret)
   1283			goto out_unpin;
   1284
   1285		/* Then set a power-gated configuration. */
   1286		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
   1287		if (ret)
   1288			goto out_unpin;
   1289
   1290		/* Back to defaults. */
   1291		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
   1292		if (ret)
   1293			goto out_unpin;
   1294
   1295		/* One last power-gated configuration for the road. */
   1296		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
   1297		if (ret)
   1298			goto out_unpin;
   1299
   1300out_unpin:
   1301		intel_context_unpin(ce);
   1302out_ce:
   1303		intel_context_put(ce);
   1304	} while (!ret);
   1305
   1306	if (igt_flush_test(i915))
   1307		ret = -EIO;
   1308
   1309out_put:
   1310	i915_gem_object_put(obj);
   1311
   1312out_unlock:
   1313	if (flags & TEST_RESET)
   1314		igt_global_reset_unlock(to_gt(i915));
   1315
   1316	if (ret)
   1317		pr_err("%s: Failed with %d!\n", name, ret);
   1318
   1319	return ret;
   1320}
   1321
   1322static int igt_ctx_sseu(void *arg)
   1323{
   1324	struct {
   1325		const char *name;
   1326		unsigned int flags;
   1327	} *phase, phases[] = {
   1328		{ .name = "basic", .flags = 0 },
   1329		{ .name = "idle", .flags = TEST_IDLE },
   1330		{ .name = "busy", .flags = TEST_BUSY },
   1331		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
   1332		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
   1333		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
   1334	};
   1335	unsigned int i;
   1336	int ret = 0;
   1337
   1338	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
   1339	     i++, phase++)
   1340		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
   1341
   1342	return ret;
   1343}
   1344
   1345static int igt_ctx_readonly(void *arg)
   1346{
   1347	struct drm_i915_private *i915 = arg;
   1348	unsigned long idx, ndwords, dw, num_engines;
   1349	struct drm_i915_gem_object *obj = NULL;
   1350	struct i915_request *tq[5] = {};
   1351	struct i915_gem_engines_iter it;
   1352	struct i915_address_space *vm;
   1353	struct i915_gem_context *ctx;
   1354	struct intel_context *ce;
   1355	struct igt_live_test t;
   1356	I915_RND_STATE(prng);
   1357	IGT_TIMEOUT(end_time);
   1358	LIST_HEAD(objects);
   1359	struct file *file;
   1360	int err = -ENODEV;
   1361
   1362	/*
   1363	 * Create a few read-only objects (with the occasional writable object)
   1364	 * and try to write into these object checking that the GPU discards
   1365	 * any write to a read-only object.
   1366	 */
   1367
   1368	file = mock_file(i915);
   1369	if (IS_ERR(file))
   1370		return PTR_ERR(file);
   1371
   1372	err = igt_live_test_begin(&t, i915, __func__, "");
   1373	if (err)
   1374		goto out_file;
   1375
   1376	ctx = live_context(i915, file);
   1377	if (IS_ERR(ctx)) {
   1378		err = PTR_ERR(ctx);
   1379		goto out_file;
   1380	}
   1381
   1382	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
   1383	if (!vm || !vm->has_read_only) {
   1384		err = 0;
   1385		goto out_file;
   1386	}
   1387
   1388	num_engines = 0;
   1389	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
   1390		if (intel_engine_can_store_dword(ce->engine))
   1391			num_engines++;
   1392	i915_gem_context_unlock_engines(ctx);
   1393
   1394	ndwords = 0;
   1395	dw = 0;
   1396	while (!time_after(jiffies, end_time)) {
   1397		for_each_gem_engine(ce,
   1398				    i915_gem_context_lock_engines(ctx), it) {
   1399			if (!intel_engine_can_store_dword(ce->engine))
   1400				continue;
   1401
   1402			if (!obj) {
   1403				obj = create_test_object(ce->vm, file, &objects);
   1404				if (IS_ERR(obj)) {
   1405					err = PTR_ERR(obj);
   1406					i915_gem_context_unlock_engines(ctx);
   1407					goto out_file;
   1408				}
   1409
   1410				if (prandom_u32_state(&prng) & 1)
   1411					i915_gem_object_set_readonly(obj);
   1412			}
   1413
   1414			err = gpu_fill(ce, obj, dw);
   1415			if (err) {
   1416				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
   1417				       ndwords, dw, max_dwords(obj),
   1418				       ce->engine->name,
   1419				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
   1420				       err);
   1421				i915_gem_context_unlock_engines(ctx);
   1422				goto out_file;
   1423			}
   1424
   1425			err = throttle(ce, tq, ARRAY_SIZE(tq));
   1426			if (err) {
   1427				i915_gem_context_unlock_engines(ctx);
   1428				goto out_file;
   1429			}
   1430
   1431			if (++dw == max_dwords(obj)) {
   1432				obj = NULL;
   1433				dw = 0;
   1434			}
   1435			ndwords++;
   1436		}
   1437		i915_gem_context_unlock_engines(ctx);
   1438	}
   1439	pr_info("Submitted %lu dwords (across %lu engines)\n",
   1440		ndwords, num_engines);
   1441
   1442	dw = 0;
   1443	idx = 0;
   1444	list_for_each_entry(obj, &objects, st_link) {
   1445		unsigned int rem =
   1446			min_t(unsigned int, ndwords - dw, max_dwords(obj));
   1447		unsigned int num_writes;
   1448
   1449		num_writes = rem;
   1450		if (i915_gem_object_is_readonly(obj))
   1451			num_writes = 0;
   1452
   1453		err = cpu_check(obj, idx++, num_writes);
   1454		if (err)
   1455			break;
   1456
   1457		dw += rem;
   1458	}
   1459
   1460out_file:
   1461	throttle_release(tq, ARRAY_SIZE(tq));
   1462	if (igt_live_test_end(&t))
   1463		err = -EIO;
   1464
   1465	fput(file);
   1466	return err;
   1467}
   1468
   1469static int check_scratch(struct i915_address_space *vm, u64 offset)
   1470{
   1471	struct drm_mm_node *node;
   1472
   1473	mutex_lock(&vm->mutex);
   1474	node = __drm_mm_interval_first(&vm->mm,
   1475				       offset, offset + sizeof(u32) - 1);
   1476	mutex_unlock(&vm->mutex);
   1477	if (!node || node->start > offset)
   1478		return 0;
   1479
   1480	GEM_BUG_ON(offset >= node->start + node->size);
   1481
   1482	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
   1483	       upper_32_bits(offset), lower_32_bits(offset));
   1484	return -EINVAL;
   1485}
   1486
   1487static int write_to_scratch(struct i915_gem_context *ctx,
   1488			    struct intel_engine_cs *engine,
   1489			    struct drm_i915_gem_object *obj,
   1490			    u64 offset, u32 value)
   1491{
   1492	struct drm_i915_private *i915 = ctx->i915;
   1493	struct i915_address_space *vm;
   1494	struct i915_request *rq;
   1495	struct i915_vma *vma;
   1496	u32 *cmd;
   1497	int err;
   1498
   1499	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
   1500
   1501	err = check_scratch(ctx->vm, offset);
   1502	if (err)
   1503		return err;
   1504
   1505	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
   1506	if (IS_ERR(cmd))
   1507		return PTR_ERR(cmd);
   1508
   1509	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
   1510	if (GRAPHICS_VER(i915) >= 8) {
   1511		*cmd++ = lower_32_bits(offset);
   1512		*cmd++ = upper_32_bits(offset);
   1513	} else {
   1514		*cmd++ = 0;
   1515		*cmd++ = offset;
   1516	}
   1517	*cmd++ = value;
   1518	*cmd = MI_BATCH_BUFFER_END;
   1519	__i915_gem_object_flush_map(obj, 0, 64);
   1520	i915_gem_object_unpin_map(obj);
   1521
   1522	intel_gt_chipset_flush(engine->gt);
   1523
   1524	vm = i915_gem_context_get_eb_vm(ctx);
   1525	vma = i915_vma_instance(obj, vm, NULL);
   1526	if (IS_ERR(vma)) {
   1527		err = PTR_ERR(vma);
   1528		goto out_vm;
   1529	}
   1530
   1531	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
   1532	if (err)
   1533		goto out_vm;
   1534
   1535	rq = igt_request_alloc(ctx, engine);
   1536	if (IS_ERR(rq)) {
   1537		err = PTR_ERR(rq);
   1538		goto err_unpin;
   1539	}
   1540
   1541	i915_vma_lock(vma);
   1542	err = i915_request_await_object(rq, vma->obj, false);
   1543	if (err == 0)
   1544		err = i915_vma_move_to_active(vma, rq, 0);
   1545	i915_vma_unlock(vma);
   1546	if (err)
   1547		goto skip_request;
   1548
   1549	if (rq->engine->emit_init_breadcrumb) {
   1550		err = rq->engine->emit_init_breadcrumb(rq);
   1551		if (err)
   1552			goto skip_request;
   1553	}
   1554
   1555	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
   1556	if (err)
   1557		goto skip_request;
   1558
   1559	i915_vma_unpin(vma);
   1560
   1561	i915_request_add(rq);
   1562
   1563	goto out_vm;
   1564skip_request:
   1565	i915_request_set_error_once(rq, err);
   1566	i915_request_add(rq);
   1567err_unpin:
   1568	i915_vma_unpin(vma);
   1569out_vm:
   1570	i915_vm_put(vm);
   1571
   1572	if (!err)
   1573		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
   1574
   1575	return err;
   1576}
   1577
   1578static int read_from_scratch(struct i915_gem_context *ctx,
   1579			     struct intel_engine_cs *engine,
   1580			     struct drm_i915_gem_object *obj,
   1581			     u64 offset, u32 *value)
   1582{
   1583	struct drm_i915_private *i915 = ctx->i915;
   1584	struct i915_address_space *vm;
   1585	const u32 result = 0x100;
   1586	struct i915_request *rq;
   1587	struct i915_vma *vma;
   1588	unsigned int flags;
   1589	u32 *cmd;
   1590	int err;
   1591
   1592	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
   1593
   1594	err = check_scratch(ctx->vm, offset);
   1595	if (err)
   1596		return err;
   1597
   1598	if (GRAPHICS_VER(i915) >= 8) {
   1599		const u32 GPR0 = engine->mmio_base + 0x600;
   1600
   1601		vm = i915_gem_context_get_eb_vm(ctx);
   1602		vma = i915_vma_instance(obj, vm, NULL);
   1603		if (IS_ERR(vma)) {
   1604			err = PTR_ERR(vma);
   1605			goto out_vm;
   1606		}
   1607
   1608		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
   1609		if (err)
   1610			goto out_vm;
   1611
   1612		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
   1613		if (IS_ERR(cmd)) {
   1614			err = PTR_ERR(cmd);
   1615			goto err_unpin;
   1616		}
   1617
   1618		memset(cmd, POISON_INUSE, PAGE_SIZE);
   1619		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
   1620		*cmd++ = GPR0;
   1621		*cmd++ = lower_32_bits(offset);
   1622		*cmd++ = upper_32_bits(offset);
   1623		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
   1624		*cmd++ = GPR0;
   1625		*cmd++ = result;
   1626		*cmd++ = 0;
   1627		*cmd = MI_BATCH_BUFFER_END;
   1628
   1629		i915_gem_object_flush_map(obj);
   1630		i915_gem_object_unpin_map(obj);
   1631
   1632		flags = 0;
   1633	} else {
   1634		const u32 reg = engine->mmio_base + 0x420;
   1635
   1636		/* hsw: register access even to 3DPRIM! is protected */
   1637		vm = i915_vm_get(&engine->gt->ggtt->vm);
   1638		vma = i915_vma_instance(obj, vm, NULL);
   1639		if (IS_ERR(vma)) {
   1640			err = PTR_ERR(vma);
   1641			goto out_vm;
   1642		}
   1643
   1644		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
   1645		if (err)
   1646			goto out_vm;
   1647
   1648		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
   1649		if (IS_ERR(cmd)) {
   1650			err = PTR_ERR(cmd);
   1651			goto err_unpin;
   1652		}
   1653
   1654		memset(cmd, POISON_INUSE, PAGE_SIZE);
   1655		*cmd++ = MI_LOAD_REGISTER_MEM;
   1656		*cmd++ = reg;
   1657		*cmd++ = offset;
   1658		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
   1659		*cmd++ = reg;
   1660		*cmd++ = vma->node.start + result;
   1661		*cmd = MI_BATCH_BUFFER_END;
   1662
   1663		i915_gem_object_flush_map(obj);
   1664		i915_gem_object_unpin_map(obj);
   1665
   1666		flags = I915_DISPATCH_SECURE;
   1667	}
   1668
   1669	intel_gt_chipset_flush(engine->gt);
   1670
   1671	rq = igt_request_alloc(ctx, engine);
   1672	if (IS_ERR(rq)) {
   1673		err = PTR_ERR(rq);
   1674		goto err_unpin;
   1675	}
   1676
   1677	i915_vma_lock(vma);
   1678	err = i915_request_await_object(rq, vma->obj, true);
   1679	if (err == 0)
   1680		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
   1681	i915_vma_unlock(vma);
   1682	if (err)
   1683		goto skip_request;
   1684
   1685	if (rq->engine->emit_init_breadcrumb) {
   1686		err = rq->engine->emit_init_breadcrumb(rq);
   1687		if (err)
   1688			goto skip_request;
   1689	}
   1690
   1691	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
   1692	if (err)
   1693		goto skip_request;
   1694
   1695	i915_vma_unpin(vma);
   1696
   1697	i915_request_add(rq);
   1698
   1699	i915_gem_object_lock(obj, NULL);
   1700	err = i915_gem_object_set_to_cpu_domain(obj, false);
   1701	i915_gem_object_unlock(obj);
   1702	if (err)
   1703		goto out_vm;
   1704
   1705	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
   1706	if (IS_ERR(cmd)) {
   1707		err = PTR_ERR(cmd);
   1708		goto out_vm;
   1709	}
   1710
   1711	*value = cmd[result / sizeof(*cmd)];
   1712	i915_gem_object_unpin_map(obj);
   1713
   1714	goto out_vm;
   1715skip_request:
   1716	i915_request_set_error_once(rq, err);
   1717	i915_request_add(rq);
   1718err_unpin:
   1719	i915_vma_unpin(vma);
   1720out_vm:
   1721	i915_vm_put(vm);
   1722
   1723	if (!err)
   1724		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
   1725
   1726	return err;
   1727}
   1728
   1729static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
   1730{
   1731	struct i915_address_space *vm;
   1732	u32 *vaddr;
   1733	int err = 0;
   1734
   1735	vm = ctx->vm;
   1736	if (!vm)
   1737		return -ENODEV;
   1738
   1739	if (!vm->scratch[0]) {
   1740		pr_err("No scratch page!\n");
   1741		return -EINVAL;
   1742	}
   1743
   1744	vaddr = __px_vaddr(vm->scratch[0]);
   1745
   1746	memcpy(out, vaddr, sizeof(*out));
   1747	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
   1748		pr_err("Inconsistent initial state of scratch page!\n");
   1749		err = -EINVAL;
   1750	}
   1751
   1752	return err;
   1753}
   1754
   1755static int igt_vm_isolation(void *arg)
   1756{
   1757	struct drm_i915_private *i915 = arg;
   1758	struct i915_gem_context *ctx_a, *ctx_b;
   1759	struct drm_i915_gem_object *obj_a, *obj_b;
   1760	unsigned long num_engines, count;
   1761	struct intel_engine_cs *engine;
   1762	struct igt_live_test t;
   1763	I915_RND_STATE(prng);
   1764	struct file *file;
   1765	u64 vm_total;
   1766	u32 expected;
   1767	int err;
   1768
   1769	if (GRAPHICS_VER(i915) < 7)
   1770		return 0;
   1771
   1772	/*
   1773	 * The simple goal here is that a write into one context is not
   1774	 * observed in a second (separate page tables and scratch).
   1775	 */
   1776
   1777	file = mock_file(i915);
   1778	if (IS_ERR(file))
   1779		return PTR_ERR(file);
   1780
   1781	err = igt_live_test_begin(&t, i915, __func__, "");
   1782	if (err)
   1783		goto out_file;
   1784
   1785	ctx_a = live_context(i915, file);
   1786	if (IS_ERR(ctx_a)) {
   1787		err = PTR_ERR(ctx_a);
   1788		goto out_file;
   1789	}
   1790
   1791	ctx_b = live_context(i915, file);
   1792	if (IS_ERR(ctx_b)) {
   1793		err = PTR_ERR(ctx_b);
   1794		goto out_file;
   1795	}
   1796
   1797	/* We can only test vm isolation, if the vm are distinct */
   1798	if (ctx_a->vm == ctx_b->vm)
   1799		goto out_file;
   1800
   1801	/* Read the initial state of the scratch page */
   1802	err = check_scratch_page(ctx_a, &expected);
   1803	if (err)
   1804		goto out_file;
   1805
   1806	err = check_scratch_page(ctx_b, &expected);
   1807	if (err)
   1808		goto out_file;
   1809
   1810	vm_total = ctx_a->vm->total;
   1811	GEM_BUG_ON(ctx_b->vm->total != vm_total);
   1812
   1813	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
   1814	if (IS_ERR(obj_a)) {
   1815		err = PTR_ERR(obj_a);
   1816		goto out_file;
   1817	}
   1818
   1819	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
   1820	if (IS_ERR(obj_b)) {
   1821		err = PTR_ERR(obj_b);
   1822		goto put_a;
   1823	}
   1824
   1825	count = 0;
   1826	num_engines = 0;
   1827	for_each_uabi_engine(engine, i915) {
   1828		IGT_TIMEOUT(end_time);
   1829		unsigned long this = 0;
   1830
   1831		if (!intel_engine_can_store_dword(engine))
   1832			continue;
   1833
   1834		/* Not all engines have their own GPR! */
   1835		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
   1836			continue;
   1837
   1838		while (!__igt_timeout(end_time, NULL)) {
   1839			u32 value = 0xc5c5c5c5;
   1840			u64 offset;
   1841
   1842			/* Leave enough space at offset 0 for the batch */
   1843			offset = igt_random_offset(&prng,
   1844						   I915_GTT_PAGE_SIZE, vm_total,
   1845						   sizeof(u32), alignof_dword);
   1846
   1847			err = write_to_scratch(ctx_a, engine, obj_a,
   1848					       offset, 0xdeadbeef);
   1849			if (err == 0)
   1850				err = read_from_scratch(ctx_b, engine, obj_b,
   1851							offset, &value);
   1852			if (err)
   1853				goto put_b;
   1854
   1855			if (value != expected) {
   1856				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
   1857				       engine->name, value,
   1858				       upper_32_bits(offset),
   1859				       lower_32_bits(offset),
   1860				       this);
   1861				err = -EINVAL;
   1862				goto put_b;
   1863			}
   1864
   1865			this++;
   1866		}
   1867		count += this;
   1868		num_engines++;
   1869	}
   1870	pr_info("Checked %lu scratch offsets across %lu engines\n",
   1871		count, num_engines);
   1872
   1873put_b:
   1874	i915_gem_object_put(obj_b);
   1875put_a:
   1876	i915_gem_object_put(obj_a);
   1877out_file:
   1878	if (igt_live_test_end(&t))
   1879		err = -EIO;
   1880	fput(file);
   1881	return err;
   1882}
   1883
   1884int i915_gem_context_live_selftests(struct drm_i915_private *i915)
   1885{
   1886	static const struct i915_subtest tests[] = {
   1887		SUBTEST(live_nop_switch),
   1888		SUBTEST(live_parallel_switch),
   1889		SUBTEST(igt_ctx_exec),
   1890		SUBTEST(igt_ctx_readonly),
   1891		SUBTEST(igt_ctx_sseu),
   1892		SUBTEST(igt_shared_ctx_exec),
   1893		SUBTEST(igt_vm_isolation),
   1894	};
   1895
   1896	if (intel_gt_is_wedged(to_gt(i915)))
   1897		return 0;
   1898
   1899	return i915_live_subtests(tests, i915);
   1900}