cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

selftest_execlists.c (100119B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2018 Intel Corporation
      4 */
      5
      6#include <linux/prime_numbers.h>
      7
      8#include "gem/i915_gem_internal.h"
      9#include "gem/i915_gem_pm.h"
     10#include "gt/intel_engine_heartbeat.h"
     11#include "gt/intel_reset.h"
     12#include "gt/selftest_engine_heartbeat.h"
     13
     14#include "i915_selftest.h"
     15#include "selftests/i915_random.h"
     16#include "selftests/igt_flush_test.h"
     17#include "selftests/igt_live_test.h"
     18#include "selftests/igt_spinner.h"
     19#include "selftests/lib_sw_fence.h"
     20
     21#include "gem/selftests/igt_gem_utils.h"
     22#include "gem/selftests/mock_context.h"
     23
     24#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
     25#define NUM_GPR 16
     26#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
     27
     28static bool is_active(struct i915_request *rq)
     29{
     30	if (i915_request_is_active(rq))
     31		return true;
     32
     33	if (i915_request_on_hold(rq))
     34		return true;
     35
     36	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
     37		return true;
     38
     39	return false;
     40}
     41
     42static int wait_for_submit(struct intel_engine_cs *engine,
     43			   struct i915_request *rq,
     44			   unsigned long timeout)
     45{
     46	/* Ignore our own attempts to suppress excess tasklets */
     47	tasklet_hi_schedule(&engine->sched_engine->tasklet);
     48
     49	timeout += jiffies;
     50	do {
     51		bool done = time_after(jiffies, timeout);
     52
     53		if (i915_request_completed(rq)) /* that was quick! */
     54			return 0;
     55
     56		/* Wait until the HW has acknowleged the submission (or err) */
     57		intel_engine_flush_submission(engine);
     58		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
     59			return 0;
     60
     61		if (done)
     62			return -ETIME;
     63
     64		cond_resched();
     65	} while (1);
     66}
     67
     68static int wait_for_reset(struct intel_engine_cs *engine,
     69			  struct i915_request *rq,
     70			  unsigned long timeout)
     71{
     72	timeout += jiffies;
     73
     74	do {
     75		cond_resched();
     76		intel_engine_flush_submission(engine);
     77
     78		if (READ_ONCE(engine->execlists.pending[0]))
     79			continue;
     80
     81		if (i915_request_completed(rq))
     82			break;
     83
     84		if (READ_ONCE(rq->fence.error))
     85			break;
     86	} while (time_before(jiffies, timeout));
     87
     88	flush_scheduled_work();
     89
     90	if (rq->fence.error != -EIO) {
     91		pr_err("%s: hanging request %llx:%lld not reset\n",
     92		       engine->name,
     93		       rq->fence.context,
     94		       rq->fence.seqno);
     95		return -EINVAL;
     96	}
     97
     98	/* Give the request a jiffie to complete after flushing the worker */
     99	if (i915_request_wait(rq, 0,
    100			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
    101		pr_err("%s: hanging request %llx:%lld did not complete\n",
    102		       engine->name,
    103		       rq->fence.context,
    104		       rq->fence.seqno);
    105		return -ETIME;
    106	}
    107
    108	return 0;
    109}
    110
    111static int live_sanitycheck(void *arg)
    112{
    113	struct intel_gt *gt = arg;
    114	struct intel_engine_cs *engine;
    115	enum intel_engine_id id;
    116	struct igt_spinner spin;
    117	int err = 0;
    118
    119	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
    120		return 0;
    121
    122	if (igt_spinner_init(&spin, gt))
    123		return -ENOMEM;
    124
    125	for_each_engine(engine, gt, id) {
    126		struct intel_context *ce;
    127		struct i915_request *rq;
    128
    129		ce = intel_context_create(engine);
    130		if (IS_ERR(ce)) {
    131			err = PTR_ERR(ce);
    132			break;
    133		}
    134
    135		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
    136		if (IS_ERR(rq)) {
    137			err = PTR_ERR(rq);
    138			goto out_ctx;
    139		}
    140
    141		i915_request_add(rq);
    142		if (!igt_wait_for_spinner(&spin, rq)) {
    143			GEM_TRACE("spinner failed to start\n");
    144			GEM_TRACE_DUMP();
    145			intel_gt_set_wedged(gt);
    146			err = -EIO;
    147			goto out_ctx;
    148		}
    149
    150		igt_spinner_end(&spin);
    151		if (igt_flush_test(gt->i915)) {
    152			err = -EIO;
    153			goto out_ctx;
    154		}
    155
    156out_ctx:
    157		intel_context_put(ce);
    158		if (err)
    159			break;
    160	}
    161
    162	igt_spinner_fini(&spin);
    163	return err;
    164}
    165
    166static int live_unlite_restore(struct intel_gt *gt, int prio)
    167{
    168	struct intel_engine_cs *engine;
    169	enum intel_engine_id id;
    170	struct igt_spinner spin;
    171	int err = -ENOMEM;
    172
    173	/*
    174	 * Check that we can correctly context switch between 2 instances
    175	 * on the same engine from the same parent context.
    176	 */
    177
    178	if (igt_spinner_init(&spin, gt))
    179		return err;
    180
    181	err = 0;
    182	for_each_engine(engine, gt, id) {
    183		struct intel_context *ce[2] = {};
    184		struct i915_request *rq[2];
    185		struct igt_live_test t;
    186		int n;
    187
    188		if (prio && !intel_engine_has_preemption(engine))
    189			continue;
    190
    191		if (!intel_engine_can_store_dword(engine))
    192			continue;
    193
    194		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
    195			err = -EIO;
    196			break;
    197		}
    198		st_engine_heartbeat_disable(engine);
    199
    200		for (n = 0; n < ARRAY_SIZE(ce); n++) {
    201			struct intel_context *tmp;
    202
    203			tmp = intel_context_create(engine);
    204			if (IS_ERR(tmp)) {
    205				err = PTR_ERR(tmp);
    206				goto err_ce;
    207			}
    208
    209			err = intel_context_pin(tmp);
    210			if (err) {
    211				intel_context_put(tmp);
    212				goto err_ce;
    213			}
    214
    215			/*
    216			 * Setup the pair of contexts such that if we
    217			 * lite-restore using the RING_TAIL from ce[1] it
    218			 * will execute garbage from ce[0]->ring.
    219			 */
    220			memset(tmp->ring->vaddr,
    221			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
    222			       tmp->ring->vma->size);
    223
    224			ce[n] = tmp;
    225		}
    226		GEM_BUG_ON(!ce[1]->ring->size);
    227		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
    228		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
    229
    230		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
    231		if (IS_ERR(rq[0])) {
    232			err = PTR_ERR(rq[0]);
    233			goto err_ce;
    234		}
    235
    236		i915_request_get(rq[0]);
    237		i915_request_add(rq[0]);
    238		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
    239
    240		if (!igt_wait_for_spinner(&spin, rq[0])) {
    241			i915_request_put(rq[0]);
    242			goto err_ce;
    243		}
    244
    245		rq[1] = i915_request_create(ce[1]);
    246		if (IS_ERR(rq[1])) {
    247			err = PTR_ERR(rq[1]);
    248			i915_request_put(rq[0]);
    249			goto err_ce;
    250		}
    251
    252		if (!prio) {
    253			/*
    254			 * Ensure we do the switch to ce[1] on completion.
    255			 *
    256			 * rq[0] is already submitted, so this should reduce
    257			 * to a no-op (a wait on a request on the same engine
    258			 * uses the submit fence, not the completion fence),
    259			 * but it will install a dependency on rq[1] for rq[0]
    260			 * that will prevent the pair being reordered by
    261			 * timeslicing.
    262			 */
    263			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
    264		}
    265
    266		i915_request_get(rq[1]);
    267		i915_request_add(rq[1]);
    268		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
    269		i915_request_put(rq[0]);
    270
    271		if (prio) {
    272			struct i915_sched_attr attr = {
    273				.priority = prio,
    274			};
    275
    276			/* Alternatively preempt the spinner with ce[1] */
    277			engine->sched_engine->schedule(rq[1], &attr);
    278		}
    279
    280		/* And switch back to ce[0] for good measure */
    281		rq[0] = i915_request_create(ce[0]);
    282		if (IS_ERR(rq[0])) {
    283			err = PTR_ERR(rq[0]);
    284			i915_request_put(rq[1]);
    285			goto err_ce;
    286		}
    287
    288		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
    289		i915_request_get(rq[0]);
    290		i915_request_add(rq[0]);
    291		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
    292		i915_request_put(rq[1]);
    293		i915_request_put(rq[0]);
    294
    295err_ce:
    296		intel_engine_flush_submission(engine);
    297		igt_spinner_end(&spin);
    298		for (n = 0; n < ARRAY_SIZE(ce); n++) {
    299			if (IS_ERR_OR_NULL(ce[n]))
    300				break;
    301
    302			intel_context_unpin(ce[n]);
    303			intel_context_put(ce[n]);
    304		}
    305
    306		st_engine_heartbeat_enable(engine);
    307		if (igt_live_test_end(&t))
    308			err = -EIO;
    309		if (err)
    310			break;
    311	}
    312
    313	igt_spinner_fini(&spin);
    314	return err;
    315}
    316
    317static int live_unlite_switch(void *arg)
    318{
    319	return live_unlite_restore(arg, 0);
    320}
    321
    322static int live_unlite_preempt(void *arg)
    323{
    324	return live_unlite_restore(arg, I915_PRIORITY_MAX);
    325}
    326
    327static int live_unlite_ring(void *arg)
    328{
    329	struct intel_gt *gt = arg;
    330	struct intel_engine_cs *engine;
    331	struct igt_spinner spin;
    332	enum intel_engine_id id;
    333	int err = 0;
    334
    335	/*
    336	 * Setup a preemption event that will cause almost the entire ring
    337	 * to be unwound, potentially fooling our intel_ring_direction()
    338	 * into emitting a forward lite-restore instead of the rollback.
    339	 */
    340
    341	if (igt_spinner_init(&spin, gt))
    342		return -ENOMEM;
    343
    344	for_each_engine(engine, gt, id) {
    345		struct intel_context *ce[2] = {};
    346		struct i915_request *rq;
    347		struct igt_live_test t;
    348		int n;
    349
    350		if (!intel_engine_has_preemption(engine))
    351			continue;
    352
    353		if (!intel_engine_can_store_dword(engine))
    354			continue;
    355
    356		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
    357			err = -EIO;
    358			break;
    359		}
    360		st_engine_heartbeat_disable(engine);
    361
    362		for (n = 0; n < ARRAY_SIZE(ce); n++) {
    363			struct intel_context *tmp;
    364
    365			tmp = intel_context_create(engine);
    366			if (IS_ERR(tmp)) {
    367				err = PTR_ERR(tmp);
    368				goto err_ce;
    369			}
    370
    371			err = intel_context_pin(tmp);
    372			if (err) {
    373				intel_context_put(tmp);
    374				goto err_ce;
    375			}
    376
    377			memset32(tmp->ring->vaddr,
    378				 0xdeadbeef, /* trigger a hang if executed */
    379				 tmp->ring->vma->size / sizeof(u32));
    380
    381			ce[n] = tmp;
    382		}
    383
    384		/* Create max prio spinner, followed by N low prio nops */
    385		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
    386		if (IS_ERR(rq)) {
    387			err = PTR_ERR(rq);
    388			goto err_ce;
    389		}
    390
    391		i915_request_get(rq);
    392		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
    393		i915_request_add(rq);
    394
    395		if (!igt_wait_for_spinner(&spin, rq)) {
    396			intel_gt_set_wedged(gt);
    397			i915_request_put(rq);
    398			err = -ETIME;
    399			goto err_ce;
    400		}
    401
    402		/* Fill the ring, until we will cause a wrap */
    403		n = 0;
    404		while (intel_ring_direction(ce[0]->ring,
    405					    rq->wa_tail,
    406					    ce[0]->ring->tail) <= 0) {
    407			struct i915_request *tmp;
    408
    409			tmp = intel_context_create_request(ce[0]);
    410			if (IS_ERR(tmp)) {
    411				err = PTR_ERR(tmp);
    412				i915_request_put(rq);
    413				goto err_ce;
    414			}
    415
    416			i915_request_add(tmp);
    417			intel_engine_flush_submission(engine);
    418			n++;
    419		}
    420		intel_engine_flush_submission(engine);
    421		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
    422			 engine->name, n,
    423			 ce[0]->ring->size,
    424			 ce[0]->ring->tail,
    425			 ce[0]->ring->emit,
    426			 rq->tail);
    427		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
    428						rq->tail,
    429						ce[0]->ring->tail) <= 0);
    430		i915_request_put(rq);
    431
    432		/* Create a second ring to preempt the first ring after rq[0] */
    433		rq = intel_context_create_request(ce[1]);
    434		if (IS_ERR(rq)) {
    435			err = PTR_ERR(rq);
    436			goto err_ce;
    437		}
    438
    439		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
    440		i915_request_get(rq);
    441		i915_request_add(rq);
    442
    443		err = wait_for_submit(engine, rq, HZ / 2);
    444		i915_request_put(rq);
    445		if (err) {
    446			pr_err("%s: preemption request was not submitted\n",
    447			       engine->name);
    448			err = -ETIME;
    449		}
    450
    451		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
    452			 engine->name,
    453			 ce[0]->ring->tail, ce[0]->ring->emit,
    454			 ce[1]->ring->tail, ce[1]->ring->emit);
    455
    456err_ce:
    457		intel_engine_flush_submission(engine);
    458		igt_spinner_end(&spin);
    459		for (n = 0; n < ARRAY_SIZE(ce); n++) {
    460			if (IS_ERR_OR_NULL(ce[n]))
    461				break;
    462
    463			intel_context_unpin(ce[n]);
    464			intel_context_put(ce[n]);
    465		}
    466		st_engine_heartbeat_enable(engine);
    467		if (igt_live_test_end(&t))
    468			err = -EIO;
    469		if (err)
    470			break;
    471	}
    472
    473	igt_spinner_fini(&spin);
    474	return err;
    475}
    476
    477static int live_pin_rewind(void *arg)
    478{
    479	struct intel_gt *gt = arg;
    480	struct intel_engine_cs *engine;
    481	enum intel_engine_id id;
    482	int err = 0;
    483
    484	/*
    485	 * We have to be careful not to trust intel_ring too much, for example
    486	 * ring->head is updated upon retire which is out of sync with pinning
    487	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
    488	 * or else we risk writing an older, stale value.
    489	 *
    490	 * To simulate this, let's apply a bit of deliberate sabotague.
    491	 */
    492
    493	for_each_engine(engine, gt, id) {
    494		struct intel_context *ce;
    495		struct i915_request *rq;
    496		struct intel_ring *ring;
    497		struct igt_live_test t;
    498
    499		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
    500			err = -EIO;
    501			break;
    502		}
    503
    504		ce = intel_context_create(engine);
    505		if (IS_ERR(ce)) {
    506			err = PTR_ERR(ce);
    507			break;
    508		}
    509
    510		err = intel_context_pin(ce);
    511		if (err) {
    512			intel_context_put(ce);
    513			break;
    514		}
    515
    516		/* Keep the context awake while we play games */
    517		err = i915_active_acquire(&ce->active);
    518		if (err) {
    519			intel_context_unpin(ce);
    520			intel_context_put(ce);
    521			break;
    522		}
    523		ring = ce->ring;
    524
    525		/* Poison the ring, and offset the next request from HEAD */
    526		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
    527		ring->emit = ring->size / 2;
    528		ring->tail = ring->emit;
    529		GEM_BUG_ON(ring->head);
    530
    531		intel_context_unpin(ce);
    532
    533		/* Submit a simple nop request */
    534		GEM_BUG_ON(intel_context_is_pinned(ce));
    535		rq = intel_context_create_request(ce);
    536		i915_active_release(&ce->active); /* e.g. async retire */
    537		intel_context_put(ce);
    538		if (IS_ERR(rq)) {
    539			err = PTR_ERR(rq);
    540			break;
    541		}
    542		GEM_BUG_ON(!rq->head);
    543		i915_request_add(rq);
    544
    545		/* Expect not to hang! */
    546		if (igt_live_test_end(&t)) {
    547			err = -EIO;
    548			break;
    549		}
    550	}
    551
    552	return err;
    553}
    554
    555static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
    556{
    557	tasklet_disable(&engine->sched_engine->tasklet);
    558	local_bh_disable();
    559
    560	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
    561			     &engine->gt->reset.flags)) {
    562		local_bh_enable();
    563		tasklet_enable(&engine->sched_engine->tasklet);
    564
    565		intel_gt_set_wedged(engine->gt);
    566		return -EBUSY;
    567	}
    568
    569	return 0;
    570}
    571
    572static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
    573{
    574	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
    575			      &engine->gt->reset.flags);
    576
    577	local_bh_enable();
    578	tasklet_enable(&engine->sched_engine->tasklet);
    579}
    580
    581static int live_hold_reset(void *arg)
    582{
    583	struct intel_gt *gt = arg;
    584	struct intel_engine_cs *engine;
    585	enum intel_engine_id id;
    586	struct igt_spinner spin;
    587	int err = 0;
    588
    589	/*
    590	 * In order to support offline error capture for fast preempt reset,
    591	 * we need to decouple the guilty request and ensure that it and its
    592	 * descendents are not executed while the capture is in progress.
    593	 */
    594
    595	if (!intel_has_reset_engine(gt))
    596		return 0;
    597
    598	if (igt_spinner_init(&spin, gt))
    599		return -ENOMEM;
    600
    601	for_each_engine(engine, gt, id) {
    602		struct intel_context *ce;
    603		struct i915_request *rq;
    604
    605		ce = intel_context_create(engine);
    606		if (IS_ERR(ce)) {
    607			err = PTR_ERR(ce);
    608			break;
    609		}
    610
    611		st_engine_heartbeat_disable(engine);
    612
    613		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
    614		if (IS_ERR(rq)) {
    615			err = PTR_ERR(rq);
    616			goto out;
    617		}
    618		i915_request_add(rq);
    619
    620		if (!igt_wait_for_spinner(&spin, rq)) {
    621			intel_gt_set_wedged(gt);
    622			err = -ETIME;
    623			goto out;
    624		}
    625
    626		/* We have our request executing, now remove it and reset */
    627
    628		err = engine_lock_reset_tasklet(engine);
    629		if (err)
    630			goto out;
    631
    632		engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
    633		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
    634
    635		i915_request_get(rq);
    636		execlists_hold(engine, rq);
    637		GEM_BUG_ON(!i915_request_on_hold(rq));
    638
    639		__intel_engine_reset_bh(engine, NULL);
    640		GEM_BUG_ON(rq->fence.error != -EIO);
    641
    642		engine_unlock_reset_tasklet(engine);
    643
    644		/* Check that we do not resubmit the held request */
    645		if (!i915_request_wait(rq, 0, HZ / 5)) {
    646			pr_err("%s: on hold request completed!\n",
    647			       engine->name);
    648			i915_request_put(rq);
    649			err = -EIO;
    650			goto out;
    651		}
    652		GEM_BUG_ON(!i915_request_on_hold(rq));
    653
    654		/* But is resubmitted on release */
    655		execlists_unhold(engine, rq);
    656		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
    657			pr_err("%s: held request did not complete!\n",
    658			       engine->name);
    659			intel_gt_set_wedged(gt);
    660			err = -ETIME;
    661		}
    662		i915_request_put(rq);
    663
    664out:
    665		st_engine_heartbeat_enable(engine);
    666		intel_context_put(ce);
    667		if (err)
    668			break;
    669	}
    670
    671	igt_spinner_fini(&spin);
    672	return err;
    673}
    674
    675static const char *error_repr(int err)
    676{
    677	return err ? "bad" : "good";
    678}
    679
    680static int live_error_interrupt(void *arg)
    681{
    682	static const struct error_phase {
    683		enum { GOOD = 0, BAD = -EIO } error[2];
    684	} phases[] = {
    685		{ { BAD,  GOOD } },
    686		{ { BAD,  BAD  } },
    687		{ { BAD,  GOOD } },
    688		{ { GOOD, GOOD } }, /* sentinel */
    689	};
    690	struct intel_gt *gt = arg;
    691	struct intel_engine_cs *engine;
    692	enum intel_engine_id id;
    693
    694	/*
    695	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
    696	 * of invalid commands in user batches that will cause a GPU hang.
    697	 * This is a faster mechanism than using hangcheck/heartbeats, but
    698	 * only detects problems the HW knows about -- it will not warn when
    699	 * we kill the HW!
    700	 *
    701	 * To verify our detection and reset, we throw some invalid commands
    702	 * at the HW and wait for the interrupt.
    703	 */
    704
    705	if (!intel_has_reset_engine(gt))
    706		return 0;
    707
    708	for_each_engine(engine, gt, id) {
    709		const struct error_phase *p;
    710		int err = 0;
    711
    712		st_engine_heartbeat_disable(engine);
    713
    714		for (p = phases; p->error[0] != GOOD; p++) {
    715			struct i915_request *client[ARRAY_SIZE(phases->error)];
    716			u32 *cs;
    717			int i;
    718
    719			memset(client, 0, sizeof(*client));
    720			for (i = 0; i < ARRAY_SIZE(client); i++) {
    721				struct intel_context *ce;
    722				struct i915_request *rq;
    723
    724				ce = intel_context_create(engine);
    725				if (IS_ERR(ce)) {
    726					err = PTR_ERR(ce);
    727					goto out;
    728				}
    729
    730				rq = intel_context_create_request(ce);
    731				intel_context_put(ce);
    732				if (IS_ERR(rq)) {
    733					err = PTR_ERR(rq);
    734					goto out;
    735				}
    736
    737				if (rq->engine->emit_init_breadcrumb) {
    738					err = rq->engine->emit_init_breadcrumb(rq);
    739					if (err) {
    740						i915_request_add(rq);
    741						goto out;
    742					}
    743				}
    744
    745				cs = intel_ring_begin(rq, 2);
    746				if (IS_ERR(cs)) {
    747					i915_request_add(rq);
    748					err = PTR_ERR(cs);
    749					goto out;
    750				}
    751
    752				if (p->error[i]) {
    753					*cs++ = 0xdeadbeef;
    754					*cs++ = 0xdeadbeef;
    755				} else {
    756					*cs++ = MI_NOOP;
    757					*cs++ = MI_NOOP;
    758				}
    759
    760				client[i] = i915_request_get(rq);
    761				i915_request_add(rq);
    762			}
    763
    764			err = wait_for_submit(engine, client[0], HZ / 2);
    765			if (err) {
    766				pr_err("%s: first request did not start within time!\n",
    767				       engine->name);
    768				err = -ETIME;
    769				goto out;
    770			}
    771
    772			for (i = 0; i < ARRAY_SIZE(client); i++) {
    773				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
    774					pr_debug("%s: %s request incomplete!\n",
    775						 engine->name,
    776						 error_repr(p->error[i]));
    777
    778				if (!i915_request_started(client[i])) {
    779					pr_err("%s: %s request not started!\n",
    780					       engine->name,
    781					       error_repr(p->error[i]));
    782					err = -ETIME;
    783					goto out;
    784				}
    785
    786				/* Kick the tasklet to process the error */
    787				intel_engine_flush_submission(engine);
    788				if (client[i]->fence.error != p->error[i]) {
    789					pr_err("%s: %s request (%s) with wrong error code: %d\n",
    790					       engine->name,
    791					       error_repr(p->error[i]),
    792					       i915_request_completed(client[i]) ? "completed" : "running",
    793					       client[i]->fence.error);
    794					err = -EINVAL;
    795					goto out;
    796				}
    797			}
    798
    799out:
    800			for (i = 0; i < ARRAY_SIZE(client); i++)
    801				if (client[i])
    802					i915_request_put(client[i]);
    803			if (err) {
    804				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
    805				       engine->name, p - phases,
    806				       p->error[0], p->error[1]);
    807				break;
    808			}
    809		}
    810
    811		st_engine_heartbeat_enable(engine);
    812		if (err) {
    813			intel_gt_set_wedged(gt);
    814			return err;
    815		}
    816	}
    817
    818	return 0;
    819}
    820
    821static int
    822emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
    823{
    824	u32 *cs;
    825
    826	cs = intel_ring_begin(rq, 10);
    827	if (IS_ERR(cs))
    828		return PTR_ERR(cs);
    829
    830	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
    831
    832	*cs++ = MI_SEMAPHORE_WAIT |
    833		MI_SEMAPHORE_GLOBAL_GTT |
    834		MI_SEMAPHORE_POLL |
    835		MI_SEMAPHORE_SAD_NEQ_SDD;
    836	*cs++ = 0;
    837	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
    838	*cs++ = 0;
    839
    840	if (idx > 0) {
    841		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
    842		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
    843		*cs++ = 0;
    844		*cs++ = 1;
    845	} else {
    846		*cs++ = MI_NOOP;
    847		*cs++ = MI_NOOP;
    848		*cs++ = MI_NOOP;
    849		*cs++ = MI_NOOP;
    850	}
    851
    852	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
    853
    854	intel_ring_advance(rq, cs);
    855	return 0;
    856}
    857
    858static struct i915_request *
    859semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
    860{
    861	struct intel_context *ce;
    862	struct i915_request *rq;
    863	int err;
    864
    865	ce = intel_context_create(engine);
    866	if (IS_ERR(ce))
    867		return ERR_CAST(ce);
    868
    869	rq = intel_context_create_request(ce);
    870	if (IS_ERR(rq))
    871		goto out_ce;
    872
    873	err = 0;
    874	if (rq->engine->emit_init_breadcrumb)
    875		err = rq->engine->emit_init_breadcrumb(rq);
    876	if (err == 0)
    877		err = emit_semaphore_chain(rq, vma, idx);
    878	if (err == 0)
    879		i915_request_get(rq);
    880	i915_request_add(rq);
    881	if (err)
    882		rq = ERR_PTR(err);
    883
    884out_ce:
    885	intel_context_put(ce);
    886	return rq;
    887}
    888
    889static int
    890release_queue(struct intel_engine_cs *engine,
    891	      struct i915_vma *vma,
    892	      int idx, int prio)
    893{
    894	struct i915_sched_attr attr = {
    895		.priority = prio,
    896	};
    897	struct i915_request *rq;
    898	u32 *cs;
    899
    900	rq = intel_engine_create_kernel_request(engine);
    901	if (IS_ERR(rq))
    902		return PTR_ERR(rq);
    903
    904	cs = intel_ring_begin(rq, 4);
    905	if (IS_ERR(cs)) {
    906		i915_request_add(rq);
    907		return PTR_ERR(cs);
    908	}
    909
    910	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
    911	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
    912	*cs++ = 0;
    913	*cs++ = 1;
    914
    915	intel_ring_advance(rq, cs);
    916
    917	i915_request_get(rq);
    918	i915_request_add(rq);
    919
    920	local_bh_disable();
    921	engine->sched_engine->schedule(rq, &attr);
    922	local_bh_enable(); /* kick tasklet */
    923
    924	i915_request_put(rq);
    925
    926	return 0;
    927}
    928
    929static int
    930slice_semaphore_queue(struct intel_engine_cs *outer,
    931		      struct i915_vma *vma,
    932		      int count)
    933{
    934	struct intel_engine_cs *engine;
    935	struct i915_request *head;
    936	enum intel_engine_id id;
    937	int err, i, n = 0;
    938
    939	head = semaphore_queue(outer, vma, n++);
    940	if (IS_ERR(head))
    941		return PTR_ERR(head);
    942
    943	for_each_engine(engine, outer->gt, id) {
    944		if (!intel_engine_has_preemption(engine))
    945			continue;
    946
    947		for (i = 0; i < count; i++) {
    948			struct i915_request *rq;
    949
    950			rq = semaphore_queue(engine, vma, n++);
    951			if (IS_ERR(rq)) {
    952				err = PTR_ERR(rq);
    953				goto out;
    954			}
    955
    956			i915_request_put(rq);
    957		}
    958	}
    959
    960	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
    961	if (err)
    962		goto out;
    963
    964	if (i915_request_wait(head, 0,
    965			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
    966		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
    967		       outer->name, count, n);
    968		GEM_TRACE_DUMP();
    969		intel_gt_set_wedged(outer->gt);
    970		err = -EIO;
    971	}
    972
    973out:
    974	i915_request_put(head);
    975	return err;
    976}
    977
    978static int live_timeslice_preempt(void *arg)
    979{
    980	struct intel_gt *gt = arg;
    981	struct drm_i915_gem_object *obj;
    982	struct intel_engine_cs *engine;
    983	enum intel_engine_id id;
    984	struct i915_vma *vma;
    985	void *vaddr;
    986	int err = 0;
    987
    988	/*
    989	 * If a request takes too long, we would like to give other users
    990	 * a fair go on the GPU. In particular, users may create batches
    991	 * that wait upon external input, where that input may even be
    992	 * supplied by another GPU job. To avoid blocking forever, we
    993	 * need to preempt the current task and replace it with another
    994	 * ready task.
    995	 */
    996	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
    997		return 0;
    998
    999	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
   1000	if (IS_ERR(obj))
   1001		return PTR_ERR(obj);
   1002
   1003	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
   1004	if (IS_ERR(vma)) {
   1005		err = PTR_ERR(vma);
   1006		goto err_obj;
   1007	}
   1008
   1009	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
   1010	if (IS_ERR(vaddr)) {
   1011		err = PTR_ERR(vaddr);
   1012		goto err_obj;
   1013	}
   1014
   1015	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
   1016	if (err)
   1017		goto err_map;
   1018
   1019	err = i915_vma_sync(vma);
   1020	if (err)
   1021		goto err_pin;
   1022
   1023	for_each_engine(engine, gt, id) {
   1024		if (!intel_engine_has_preemption(engine))
   1025			continue;
   1026
   1027		memset(vaddr, 0, PAGE_SIZE);
   1028
   1029		st_engine_heartbeat_disable(engine);
   1030		err = slice_semaphore_queue(engine, vma, 5);
   1031		st_engine_heartbeat_enable(engine);
   1032		if (err)
   1033			goto err_pin;
   1034
   1035		if (igt_flush_test(gt->i915)) {
   1036			err = -EIO;
   1037			goto err_pin;
   1038		}
   1039	}
   1040
   1041err_pin:
   1042	i915_vma_unpin(vma);
   1043err_map:
   1044	i915_gem_object_unpin_map(obj);
   1045err_obj:
   1046	i915_gem_object_put(obj);
   1047	return err;
   1048}
   1049
   1050static struct i915_request *
   1051create_rewinder(struct intel_context *ce,
   1052		struct i915_request *wait,
   1053		void *slot, int idx)
   1054{
   1055	const u32 offset =
   1056		i915_ggtt_offset(ce->engine->status_page.vma) +
   1057		offset_in_page(slot);
   1058	struct i915_request *rq;
   1059	u32 *cs;
   1060	int err;
   1061
   1062	rq = intel_context_create_request(ce);
   1063	if (IS_ERR(rq))
   1064		return rq;
   1065
   1066	if (wait) {
   1067		err = i915_request_await_dma_fence(rq, &wait->fence);
   1068		if (err)
   1069			goto err;
   1070	}
   1071
   1072	cs = intel_ring_begin(rq, 14);
   1073	if (IS_ERR(cs)) {
   1074		err = PTR_ERR(cs);
   1075		goto err;
   1076	}
   1077
   1078	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   1079	*cs++ = MI_NOOP;
   1080
   1081	*cs++ = MI_SEMAPHORE_WAIT |
   1082		MI_SEMAPHORE_GLOBAL_GTT |
   1083		MI_SEMAPHORE_POLL |
   1084		MI_SEMAPHORE_SAD_GTE_SDD;
   1085	*cs++ = idx;
   1086	*cs++ = offset;
   1087	*cs++ = 0;
   1088
   1089	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
   1090	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
   1091	*cs++ = offset + idx * sizeof(u32);
   1092	*cs++ = 0;
   1093
   1094	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
   1095	*cs++ = offset;
   1096	*cs++ = 0;
   1097	*cs++ = idx + 1;
   1098
   1099	intel_ring_advance(rq, cs);
   1100
   1101	err = 0;
   1102err:
   1103	i915_request_get(rq);
   1104	i915_request_add(rq);
   1105	if (err) {
   1106		i915_request_put(rq);
   1107		return ERR_PTR(err);
   1108	}
   1109
   1110	return rq;
   1111}
   1112
   1113static int live_timeslice_rewind(void *arg)
   1114{
   1115	struct intel_gt *gt = arg;
   1116	struct intel_engine_cs *engine;
   1117	enum intel_engine_id id;
   1118
   1119	/*
   1120	 * The usual presumption on timeslice expiration is that we replace
   1121	 * the active context with another. However, given a chain of
   1122	 * dependencies we may end up with replacing the context with itself,
   1123	 * but only a few of those requests, forcing us to rewind the
   1124	 * RING_TAIL of the original request.
   1125	 */
   1126	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
   1127		return 0;
   1128
   1129	for_each_engine(engine, gt, id) {
   1130		enum { A1, A2, B1 };
   1131		enum { X = 1, Z, Y };
   1132		struct i915_request *rq[3] = {};
   1133		struct intel_context *ce;
   1134		unsigned long timeslice;
   1135		int i, err = 0;
   1136		u32 *slot;
   1137
   1138		if (!intel_engine_has_timeslices(engine))
   1139			continue;
   1140
   1141		/*
   1142		 * A:rq1 -- semaphore wait, timestamp X
   1143		 * A:rq2 -- write timestamp Y
   1144		 *
   1145		 * B:rq1 [await A:rq1] -- write timestamp Z
   1146		 *
   1147		 * Force timeslice, release semaphore.
   1148		 *
   1149		 * Expect execution/evaluation order XZY
   1150		 */
   1151
   1152		st_engine_heartbeat_disable(engine);
   1153		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
   1154
   1155		slot = memset32(engine->status_page.addr + 1000, 0, 4);
   1156
   1157		ce = intel_context_create(engine);
   1158		if (IS_ERR(ce)) {
   1159			err = PTR_ERR(ce);
   1160			goto err;
   1161		}
   1162
   1163		rq[A1] = create_rewinder(ce, NULL, slot, X);
   1164		if (IS_ERR(rq[A1])) {
   1165			intel_context_put(ce);
   1166			goto err;
   1167		}
   1168
   1169		rq[A2] = create_rewinder(ce, NULL, slot, Y);
   1170		intel_context_put(ce);
   1171		if (IS_ERR(rq[A2]))
   1172			goto err;
   1173
   1174		err = wait_for_submit(engine, rq[A2], HZ / 2);
   1175		if (err) {
   1176			pr_err("%s: failed to submit first context\n",
   1177			       engine->name);
   1178			goto err;
   1179		}
   1180
   1181		ce = intel_context_create(engine);
   1182		if (IS_ERR(ce)) {
   1183			err = PTR_ERR(ce);
   1184			goto err;
   1185		}
   1186
   1187		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
   1188		intel_context_put(ce);
   1189		if (IS_ERR(rq[2]))
   1190			goto err;
   1191
   1192		err = wait_for_submit(engine, rq[B1], HZ / 2);
   1193		if (err) {
   1194			pr_err("%s: failed to submit second context\n",
   1195			       engine->name);
   1196			goto err;
   1197		}
   1198
   1199		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
   1200		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
   1201		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
   1202			/* Wait for the timeslice to kick in */
   1203			del_timer(&engine->execlists.timer);
   1204			tasklet_hi_schedule(&engine->sched_engine->tasklet);
   1205			intel_engine_flush_submission(engine);
   1206		}
   1207		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
   1208		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
   1209		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
   1210		GEM_BUG_ON(i915_request_is_active(rq[A2]));
   1211
   1212		/* Release the hounds! */
   1213		slot[0] = 1;
   1214		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
   1215
   1216		for (i = 1; i <= 3; i++) {
   1217			unsigned long timeout = jiffies + HZ / 2;
   1218
   1219			while (!READ_ONCE(slot[i]) &&
   1220			       time_before(jiffies, timeout))
   1221				;
   1222
   1223			if (!time_before(jiffies, timeout)) {
   1224				pr_err("%s: rq[%d] timed out\n",
   1225				       engine->name, i - 1);
   1226				err = -ETIME;
   1227				goto err;
   1228			}
   1229
   1230			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
   1231		}
   1232
   1233		/* XZY: XZ < XY */
   1234		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
   1235			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
   1236			       engine->name,
   1237			       slot[Z] - slot[X],
   1238			       slot[Y] - slot[X]);
   1239			err = -EINVAL;
   1240		}
   1241
   1242err:
   1243		memset32(&slot[0], -1, 4);
   1244		wmb();
   1245
   1246		engine->props.timeslice_duration_ms = timeslice;
   1247		st_engine_heartbeat_enable(engine);
   1248		for (i = 0; i < 3; i++)
   1249			i915_request_put(rq[i]);
   1250		if (igt_flush_test(gt->i915))
   1251			err = -EIO;
   1252		if (err)
   1253			return err;
   1254	}
   1255
   1256	return 0;
   1257}
   1258
   1259static struct i915_request *nop_request(struct intel_engine_cs *engine)
   1260{
   1261	struct i915_request *rq;
   1262
   1263	rq = intel_engine_create_kernel_request(engine);
   1264	if (IS_ERR(rq))
   1265		return rq;
   1266
   1267	i915_request_get(rq);
   1268	i915_request_add(rq);
   1269
   1270	return rq;
   1271}
   1272
   1273static long slice_timeout(struct intel_engine_cs *engine)
   1274{
   1275	long timeout;
   1276
   1277	/* Enough time for a timeslice to kick in, and kick out */
   1278	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
   1279
   1280	/* Enough time for the nop request to complete */
   1281	timeout += HZ / 5;
   1282
   1283	return timeout + 1;
   1284}
   1285
   1286static int live_timeslice_queue(void *arg)
   1287{
   1288	struct intel_gt *gt = arg;
   1289	struct drm_i915_gem_object *obj;
   1290	struct intel_engine_cs *engine;
   1291	enum intel_engine_id id;
   1292	struct i915_vma *vma;
   1293	void *vaddr;
   1294	int err = 0;
   1295
   1296	/*
   1297	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
   1298	 * timeslicing between them disabled, we *do* enable timeslicing
   1299	 * if the queue demands it. (Normally, we do not submit if
   1300	 * ELSP[1] is already occupied, so must rely on timeslicing to
   1301	 * eject ELSP[0] in favour of the queue.)
   1302	 */
   1303	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
   1304		return 0;
   1305
   1306	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
   1307	if (IS_ERR(obj))
   1308		return PTR_ERR(obj);
   1309
   1310	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
   1311	if (IS_ERR(vma)) {
   1312		err = PTR_ERR(vma);
   1313		goto err_obj;
   1314	}
   1315
   1316	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
   1317	if (IS_ERR(vaddr)) {
   1318		err = PTR_ERR(vaddr);
   1319		goto err_obj;
   1320	}
   1321
   1322	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
   1323	if (err)
   1324		goto err_map;
   1325
   1326	err = i915_vma_sync(vma);
   1327	if (err)
   1328		goto err_pin;
   1329
   1330	for_each_engine(engine, gt, id) {
   1331		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
   1332		struct i915_request *rq, *nop;
   1333
   1334		if (!intel_engine_has_preemption(engine))
   1335			continue;
   1336
   1337		st_engine_heartbeat_disable(engine);
   1338		memset(vaddr, 0, PAGE_SIZE);
   1339
   1340		/* ELSP[0]: semaphore wait */
   1341		rq = semaphore_queue(engine, vma, 0);
   1342		if (IS_ERR(rq)) {
   1343			err = PTR_ERR(rq);
   1344			goto err_heartbeat;
   1345		}
   1346		engine->sched_engine->schedule(rq, &attr);
   1347		err = wait_for_submit(engine, rq, HZ / 2);
   1348		if (err) {
   1349			pr_err("%s: Timed out trying to submit semaphores\n",
   1350			       engine->name);
   1351			goto err_rq;
   1352		}
   1353
   1354		/* ELSP[1]: nop request */
   1355		nop = nop_request(engine);
   1356		if (IS_ERR(nop)) {
   1357			err = PTR_ERR(nop);
   1358			goto err_rq;
   1359		}
   1360		err = wait_for_submit(engine, nop, HZ / 2);
   1361		i915_request_put(nop);
   1362		if (err) {
   1363			pr_err("%s: Timed out trying to submit nop\n",
   1364			       engine->name);
   1365			goto err_rq;
   1366		}
   1367
   1368		GEM_BUG_ON(i915_request_completed(rq));
   1369		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
   1370
   1371		/* Queue: semaphore signal, matching priority as semaphore */
   1372		err = release_queue(engine, vma, 1, effective_prio(rq));
   1373		if (err)
   1374			goto err_rq;
   1375
   1376		/* Wait until we ack the release_queue and start timeslicing */
   1377		do {
   1378			cond_resched();
   1379			intel_engine_flush_submission(engine);
   1380		} while (READ_ONCE(engine->execlists.pending[0]));
   1381
   1382		/* Timeslice every jiffy, so within 2 we should signal */
   1383		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
   1384			struct drm_printer p =
   1385				drm_info_printer(gt->i915->drm.dev);
   1386
   1387			pr_err("%s: Failed to timeslice into queue\n",
   1388			       engine->name);
   1389			intel_engine_dump(engine, &p,
   1390					  "%s\n", engine->name);
   1391
   1392			memset(vaddr, 0xff, PAGE_SIZE);
   1393			err = -EIO;
   1394		}
   1395err_rq:
   1396		i915_request_put(rq);
   1397err_heartbeat:
   1398		st_engine_heartbeat_enable(engine);
   1399		if (err)
   1400			break;
   1401	}
   1402
   1403err_pin:
   1404	i915_vma_unpin(vma);
   1405err_map:
   1406	i915_gem_object_unpin_map(obj);
   1407err_obj:
   1408	i915_gem_object_put(obj);
   1409	return err;
   1410}
   1411
   1412static int live_timeslice_nopreempt(void *arg)
   1413{
   1414	struct intel_gt *gt = arg;
   1415	struct intel_engine_cs *engine;
   1416	enum intel_engine_id id;
   1417	struct igt_spinner spin;
   1418	int err = 0;
   1419
   1420	/*
   1421	 * We should not timeslice into a request that is marked with
   1422	 * I915_REQUEST_NOPREEMPT.
   1423	 */
   1424	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
   1425		return 0;
   1426
   1427	if (igt_spinner_init(&spin, gt))
   1428		return -ENOMEM;
   1429
   1430	for_each_engine(engine, gt, id) {
   1431		struct intel_context *ce;
   1432		struct i915_request *rq;
   1433		unsigned long timeslice;
   1434
   1435		if (!intel_engine_has_preemption(engine))
   1436			continue;
   1437
   1438		ce = intel_context_create(engine);
   1439		if (IS_ERR(ce)) {
   1440			err = PTR_ERR(ce);
   1441			break;
   1442		}
   1443
   1444		st_engine_heartbeat_disable(engine);
   1445		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
   1446
   1447		/* Create an unpreemptible spinner */
   1448
   1449		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
   1450		intel_context_put(ce);
   1451		if (IS_ERR(rq)) {
   1452			err = PTR_ERR(rq);
   1453			goto out_heartbeat;
   1454		}
   1455
   1456		i915_request_get(rq);
   1457		i915_request_add(rq);
   1458
   1459		if (!igt_wait_for_spinner(&spin, rq)) {
   1460			i915_request_put(rq);
   1461			err = -ETIME;
   1462			goto out_spin;
   1463		}
   1464
   1465		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
   1466		i915_request_put(rq);
   1467
   1468		/* Followed by a maximum priority barrier (heartbeat) */
   1469
   1470		ce = intel_context_create(engine);
   1471		if (IS_ERR(ce)) {
   1472			err = PTR_ERR(ce);
   1473			goto out_spin;
   1474		}
   1475
   1476		rq = intel_context_create_request(ce);
   1477		intel_context_put(ce);
   1478		if (IS_ERR(rq)) {
   1479			err = PTR_ERR(rq);
   1480			goto out_spin;
   1481		}
   1482
   1483		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
   1484		i915_request_get(rq);
   1485		i915_request_add(rq);
   1486
   1487		/*
   1488		 * Wait until the barrier is in ELSP, and we know timeslicing
   1489		 * will have been activated.
   1490		 */
   1491		if (wait_for_submit(engine, rq, HZ / 2)) {
   1492			i915_request_put(rq);
   1493			err = -ETIME;
   1494			goto out_spin;
   1495		}
   1496
   1497		/*
   1498		 * Since the ELSP[0] request is unpreemptible, it should not
   1499		 * allow the maximum priority barrier through. Wait long
   1500		 * enough to see if it is timesliced in by mistake.
   1501		 */
   1502		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
   1503			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
   1504			       engine->name);
   1505			err = -EINVAL;
   1506		}
   1507		i915_request_put(rq);
   1508
   1509out_spin:
   1510		igt_spinner_end(&spin);
   1511out_heartbeat:
   1512		xchg(&engine->props.timeslice_duration_ms, timeslice);
   1513		st_engine_heartbeat_enable(engine);
   1514		if (err)
   1515			break;
   1516
   1517		if (igt_flush_test(gt->i915)) {
   1518			err = -EIO;
   1519			break;
   1520		}
   1521	}
   1522
   1523	igt_spinner_fini(&spin);
   1524	return err;
   1525}
   1526
   1527static int live_busywait_preempt(void *arg)
   1528{
   1529	struct intel_gt *gt = arg;
   1530	struct i915_gem_context *ctx_hi, *ctx_lo;
   1531	struct intel_engine_cs *engine;
   1532	struct drm_i915_gem_object *obj;
   1533	struct i915_vma *vma;
   1534	enum intel_engine_id id;
   1535	int err = -ENOMEM;
   1536	u32 *map;
   1537
   1538	/*
   1539	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
   1540	 * preempt the busywaits used to synchronise between rings.
   1541	 */
   1542
   1543	ctx_hi = kernel_context(gt->i915, NULL);
   1544	if (!ctx_hi)
   1545		return -ENOMEM;
   1546	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
   1547
   1548	ctx_lo = kernel_context(gt->i915, NULL);
   1549	if (!ctx_lo)
   1550		goto err_ctx_hi;
   1551	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
   1552
   1553	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
   1554	if (IS_ERR(obj)) {
   1555		err = PTR_ERR(obj);
   1556		goto err_ctx_lo;
   1557	}
   1558
   1559	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
   1560	if (IS_ERR(map)) {
   1561		err = PTR_ERR(map);
   1562		goto err_obj;
   1563	}
   1564
   1565	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
   1566	if (IS_ERR(vma)) {
   1567		err = PTR_ERR(vma);
   1568		goto err_map;
   1569	}
   1570
   1571	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
   1572	if (err)
   1573		goto err_map;
   1574
   1575	err = i915_vma_sync(vma);
   1576	if (err)
   1577		goto err_vma;
   1578
   1579	for_each_engine(engine, gt, id) {
   1580		struct i915_request *lo, *hi;
   1581		struct igt_live_test t;
   1582		u32 *cs;
   1583
   1584		if (!intel_engine_has_preemption(engine))
   1585			continue;
   1586
   1587		if (!intel_engine_can_store_dword(engine))
   1588			continue;
   1589
   1590		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
   1591			err = -EIO;
   1592			goto err_vma;
   1593		}
   1594
   1595		/*
   1596		 * We create two requests. The low priority request
   1597		 * busywaits on a semaphore (inside the ringbuffer where
   1598		 * is should be preemptible) and the high priority requests
   1599		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
   1600		 * allowing the first request to complete. If preemption
   1601		 * fails, we hang instead.
   1602		 */
   1603
   1604		lo = igt_request_alloc(ctx_lo, engine);
   1605		if (IS_ERR(lo)) {
   1606			err = PTR_ERR(lo);
   1607			goto err_vma;
   1608		}
   1609
   1610		cs = intel_ring_begin(lo, 8);
   1611		if (IS_ERR(cs)) {
   1612			err = PTR_ERR(cs);
   1613			i915_request_add(lo);
   1614			goto err_vma;
   1615		}
   1616
   1617		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
   1618		*cs++ = i915_ggtt_offset(vma);
   1619		*cs++ = 0;
   1620		*cs++ = 1;
   1621
   1622		/* XXX Do we need a flush + invalidate here? */
   1623
   1624		*cs++ = MI_SEMAPHORE_WAIT |
   1625			MI_SEMAPHORE_GLOBAL_GTT |
   1626			MI_SEMAPHORE_POLL |
   1627			MI_SEMAPHORE_SAD_EQ_SDD;
   1628		*cs++ = 0;
   1629		*cs++ = i915_ggtt_offset(vma);
   1630		*cs++ = 0;
   1631
   1632		intel_ring_advance(lo, cs);
   1633
   1634		i915_request_get(lo);
   1635		i915_request_add(lo);
   1636
   1637		if (wait_for(READ_ONCE(*map), 10)) {
   1638			i915_request_put(lo);
   1639			err = -ETIMEDOUT;
   1640			goto err_vma;
   1641		}
   1642
   1643		/* Low priority request should be busywaiting now */
   1644		if (i915_request_wait(lo, 0, 1) != -ETIME) {
   1645			i915_request_put(lo);
   1646			pr_err("%s: Busywaiting request did not!\n",
   1647			       engine->name);
   1648			err = -EIO;
   1649			goto err_vma;
   1650		}
   1651
   1652		hi = igt_request_alloc(ctx_hi, engine);
   1653		if (IS_ERR(hi)) {
   1654			err = PTR_ERR(hi);
   1655			i915_request_put(lo);
   1656			goto err_vma;
   1657		}
   1658
   1659		cs = intel_ring_begin(hi, 4);
   1660		if (IS_ERR(cs)) {
   1661			err = PTR_ERR(cs);
   1662			i915_request_add(hi);
   1663			i915_request_put(lo);
   1664			goto err_vma;
   1665		}
   1666
   1667		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
   1668		*cs++ = i915_ggtt_offset(vma);
   1669		*cs++ = 0;
   1670		*cs++ = 0;
   1671
   1672		intel_ring_advance(hi, cs);
   1673		i915_request_add(hi);
   1674
   1675		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
   1676			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
   1677
   1678			pr_err("%s: Failed to preempt semaphore busywait!\n",
   1679			       engine->name);
   1680
   1681			intel_engine_dump(engine, &p, "%s\n", engine->name);
   1682			GEM_TRACE_DUMP();
   1683
   1684			i915_request_put(lo);
   1685			intel_gt_set_wedged(gt);
   1686			err = -EIO;
   1687			goto err_vma;
   1688		}
   1689		GEM_BUG_ON(READ_ONCE(*map));
   1690		i915_request_put(lo);
   1691
   1692		if (igt_live_test_end(&t)) {
   1693			err = -EIO;
   1694			goto err_vma;
   1695		}
   1696	}
   1697
   1698	err = 0;
   1699err_vma:
   1700	i915_vma_unpin(vma);
   1701err_map:
   1702	i915_gem_object_unpin_map(obj);
   1703err_obj:
   1704	i915_gem_object_put(obj);
   1705err_ctx_lo:
   1706	kernel_context_close(ctx_lo);
   1707err_ctx_hi:
   1708	kernel_context_close(ctx_hi);
   1709	return err;
   1710}
   1711
   1712static struct i915_request *
   1713spinner_create_request(struct igt_spinner *spin,
   1714		       struct i915_gem_context *ctx,
   1715		       struct intel_engine_cs *engine,
   1716		       u32 arb)
   1717{
   1718	struct intel_context *ce;
   1719	struct i915_request *rq;
   1720
   1721	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
   1722	if (IS_ERR(ce))
   1723		return ERR_CAST(ce);
   1724
   1725	rq = igt_spinner_create_request(spin, ce, arb);
   1726	intel_context_put(ce);
   1727	return rq;
   1728}
   1729
   1730static int live_preempt(void *arg)
   1731{
   1732	struct intel_gt *gt = arg;
   1733	struct i915_gem_context *ctx_hi, *ctx_lo;
   1734	struct igt_spinner spin_hi, spin_lo;
   1735	struct intel_engine_cs *engine;
   1736	enum intel_engine_id id;
   1737	int err = -ENOMEM;
   1738
   1739	ctx_hi = kernel_context(gt->i915, NULL);
   1740	if (!ctx_hi)
   1741		return -ENOMEM;
   1742	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
   1743
   1744	ctx_lo = kernel_context(gt->i915, NULL);
   1745	if (!ctx_lo)
   1746		goto err_ctx_hi;
   1747	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
   1748
   1749	if (igt_spinner_init(&spin_hi, gt))
   1750		goto err_ctx_lo;
   1751
   1752	if (igt_spinner_init(&spin_lo, gt))
   1753		goto err_spin_hi;
   1754
   1755	for_each_engine(engine, gt, id) {
   1756		struct igt_live_test t;
   1757		struct i915_request *rq;
   1758
   1759		if (!intel_engine_has_preemption(engine))
   1760			continue;
   1761
   1762		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
   1763			err = -EIO;
   1764			goto err_spin_lo;
   1765		}
   1766
   1767		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
   1768					    MI_ARB_CHECK);
   1769		if (IS_ERR(rq)) {
   1770			err = PTR_ERR(rq);
   1771			goto err_spin_lo;
   1772		}
   1773
   1774		i915_request_add(rq);
   1775		if (!igt_wait_for_spinner(&spin_lo, rq)) {
   1776			GEM_TRACE("lo spinner failed to start\n");
   1777			GEM_TRACE_DUMP();
   1778			intel_gt_set_wedged(gt);
   1779			err = -EIO;
   1780			goto err_spin_lo;
   1781		}
   1782
   1783		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
   1784					    MI_ARB_CHECK);
   1785		if (IS_ERR(rq)) {
   1786			igt_spinner_end(&spin_lo);
   1787			err = PTR_ERR(rq);
   1788			goto err_spin_lo;
   1789		}
   1790
   1791		i915_request_add(rq);
   1792		if (!igt_wait_for_spinner(&spin_hi, rq)) {
   1793			GEM_TRACE("hi spinner failed to start\n");
   1794			GEM_TRACE_DUMP();
   1795			intel_gt_set_wedged(gt);
   1796			err = -EIO;
   1797			goto err_spin_lo;
   1798		}
   1799
   1800		igt_spinner_end(&spin_hi);
   1801		igt_spinner_end(&spin_lo);
   1802
   1803		if (igt_live_test_end(&t)) {
   1804			err = -EIO;
   1805			goto err_spin_lo;
   1806		}
   1807	}
   1808
   1809	err = 0;
   1810err_spin_lo:
   1811	igt_spinner_fini(&spin_lo);
   1812err_spin_hi:
   1813	igt_spinner_fini(&spin_hi);
   1814err_ctx_lo:
   1815	kernel_context_close(ctx_lo);
   1816err_ctx_hi:
   1817	kernel_context_close(ctx_hi);
   1818	return err;
   1819}
   1820
   1821static int live_late_preempt(void *arg)
   1822{
   1823	struct intel_gt *gt = arg;
   1824	struct i915_gem_context *ctx_hi, *ctx_lo;
   1825	struct igt_spinner spin_hi, spin_lo;
   1826	struct intel_engine_cs *engine;
   1827	struct i915_sched_attr attr = {};
   1828	enum intel_engine_id id;
   1829	int err = -ENOMEM;
   1830
   1831	ctx_hi = kernel_context(gt->i915, NULL);
   1832	if (!ctx_hi)
   1833		return -ENOMEM;
   1834
   1835	ctx_lo = kernel_context(gt->i915, NULL);
   1836	if (!ctx_lo)
   1837		goto err_ctx_hi;
   1838
   1839	if (igt_spinner_init(&spin_hi, gt))
   1840		goto err_ctx_lo;
   1841
   1842	if (igt_spinner_init(&spin_lo, gt))
   1843		goto err_spin_hi;
   1844
   1845	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
   1846	ctx_lo->sched.priority = 1;
   1847
   1848	for_each_engine(engine, gt, id) {
   1849		struct igt_live_test t;
   1850		struct i915_request *rq;
   1851
   1852		if (!intel_engine_has_preemption(engine))
   1853			continue;
   1854
   1855		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
   1856			err = -EIO;
   1857			goto err_spin_lo;
   1858		}
   1859
   1860		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
   1861					    MI_ARB_CHECK);
   1862		if (IS_ERR(rq)) {
   1863			err = PTR_ERR(rq);
   1864			goto err_spin_lo;
   1865		}
   1866
   1867		i915_request_add(rq);
   1868		if (!igt_wait_for_spinner(&spin_lo, rq)) {
   1869			pr_err("First context failed to start\n");
   1870			goto err_wedged;
   1871		}
   1872
   1873		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
   1874					    MI_NOOP);
   1875		if (IS_ERR(rq)) {
   1876			igt_spinner_end(&spin_lo);
   1877			err = PTR_ERR(rq);
   1878			goto err_spin_lo;
   1879		}
   1880
   1881		i915_request_add(rq);
   1882		if (igt_wait_for_spinner(&spin_hi, rq)) {
   1883			pr_err("Second context overtook first?\n");
   1884			goto err_wedged;
   1885		}
   1886
   1887		attr.priority = I915_PRIORITY_MAX;
   1888		engine->sched_engine->schedule(rq, &attr);
   1889
   1890		if (!igt_wait_for_spinner(&spin_hi, rq)) {
   1891			pr_err("High priority context failed to preempt the low priority context\n");
   1892			GEM_TRACE_DUMP();
   1893			goto err_wedged;
   1894		}
   1895
   1896		igt_spinner_end(&spin_hi);
   1897		igt_spinner_end(&spin_lo);
   1898
   1899		if (igt_live_test_end(&t)) {
   1900			err = -EIO;
   1901			goto err_spin_lo;
   1902		}
   1903	}
   1904
   1905	err = 0;
   1906err_spin_lo:
   1907	igt_spinner_fini(&spin_lo);
   1908err_spin_hi:
   1909	igt_spinner_fini(&spin_hi);
   1910err_ctx_lo:
   1911	kernel_context_close(ctx_lo);
   1912err_ctx_hi:
   1913	kernel_context_close(ctx_hi);
   1914	return err;
   1915
   1916err_wedged:
   1917	igt_spinner_end(&spin_hi);
   1918	igt_spinner_end(&spin_lo);
   1919	intel_gt_set_wedged(gt);
   1920	err = -EIO;
   1921	goto err_spin_lo;
   1922}
   1923
   1924struct preempt_client {
   1925	struct igt_spinner spin;
   1926	struct i915_gem_context *ctx;
   1927};
   1928
   1929static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
   1930{
   1931	c->ctx = kernel_context(gt->i915, NULL);
   1932	if (!c->ctx)
   1933		return -ENOMEM;
   1934
   1935	if (igt_spinner_init(&c->spin, gt))
   1936		goto err_ctx;
   1937
   1938	return 0;
   1939
   1940err_ctx:
   1941	kernel_context_close(c->ctx);
   1942	return -ENOMEM;
   1943}
   1944
   1945static void preempt_client_fini(struct preempt_client *c)
   1946{
   1947	igt_spinner_fini(&c->spin);
   1948	kernel_context_close(c->ctx);
   1949}
   1950
   1951static int live_nopreempt(void *arg)
   1952{
   1953	struct intel_gt *gt = arg;
   1954	struct intel_engine_cs *engine;
   1955	struct preempt_client a, b;
   1956	enum intel_engine_id id;
   1957	int err = -ENOMEM;
   1958
   1959	/*
   1960	 * Verify that we can disable preemption for an individual request
   1961	 * that may be being observed and not want to be interrupted.
   1962	 */
   1963
   1964	if (preempt_client_init(gt, &a))
   1965		return -ENOMEM;
   1966	if (preempt_client_init(gt, &b))
   1967		goto err_client_a;
   1968	b.ctx->sched.priority = I915_PRIORITY_MAX;
   1969
   1970	for_each_engine(engine, gt, id) {
   1971		struct i915_request *rq_a, *rq_b;
   1972
   1973		if (!intel_engine_has_preemption(engine))
   1974			continue;
   1975
   1976		engine->execlists.preempt_hang.count = 0;
   1977
   1978		rq_a = spinner_create_request(&a.spin,
   1979					      a.ctx, engine,
   1980					      MI_ARB_CHECK);
   1981		if (IS_ERR(rq_a)) {
   1982			err = PTR_ERR(rq_a);
   1983			goto err_client_b;
   1984		}
   1985
   1986		/* Low priority client, but unpreemptable! */
   1987		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
   1988
   1989		i915_request_add(rq_a);
   1990		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
   1991			pr_err("First client failed to start\n");
   1992			goto err_wedged;
   1993		}
   1994
   1995		rq_b = spinner_create_request(&b.spin,
   1996					      b.ctx, engine,
   1997					      MI_ARB_CHECK);
   1998		if (IS_ERR(rq_b)) {
   1999			err = PTR_ERR(rq_b);
   2000			goto err_client_b;
   2001		}
   2002
   2003		i915_request_add(rq_b);
   2004
   2005		/* B is much more important than A! (But A is unpreemptable.) */
   2006		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
   2007
   2008		/* Wait long enough for preemption and timeslicing */
   2009		if (igt_wait_for_spinner(&b.spin, rq_b)) {
   2010			pr_err("Second client started too early!\n");
   2011			goto err_wedged;
   2012		}
   2013
   2014		igt_spinner_end(&a.spin);
   2015
   2016		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
   2017			pr_err("Second client failed to start\n");
   2018			goto err_wedged;
   2019		}
   2020
   2021		igt_spinner_end(&b.spin);
   2022
   2023		if (engine->execlists.preempt_hang.count) {
   2024			pr_err("Preemption recorded x%d; should have been suppressed!\n",
   2025			       engine->execlists.preempt_hang.count);
   2026			err = -EINVAL;
   2027			goto err_wedged;
   2028		}
   2029
   2030		if (igt_flush_test(gt->i915))
   2031			goto err_wedged;
   2032	}
   2033
   2034	err = 0;
   2035err_client_b:
   2036	preempt_client_fini(&b);
   2037err_client_a:
   2038	preempt_client_fini(&a);
   2039	return err;
   2040
   2041err_wedged:
   2042	igt_spinner_end(&b.spin);
   2043	igt_spinner_end(&a.spin);
   2044	intel_gt_set_wedged(gt);
   2045	err = -EIO;
   2046	goto err_client_b;
   2047}
   2048
   2049struct live_preempt_cancel {
   2050	struct intel_engine_cs *engine;
   2051	struct preempt_client a, b;
   2052};
   2053
   2054static int __cancel_active0(struct live_preempt_cancel *arg)
   2055{
   2056	struct i915_request *rq;
   2057	struct igt_live_test t;
   2058	int err;
   2059
   2060	/* Preempt cancel of ELSP0 */
   2061	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
   2062	if (igt_live_test_begin(&t, arg->engine->i915,
   2063				__func__, arg->engine->name))
   2064		return -EIO;
   2065
   2066	rq = spinner_create_request(&arg->a.spin,
   2067				    arg->a.ctx, arg->engine,
   2068				    MI_ARB_CHECK);
   2069	if (IS_ERR(rq))
   2070		return PTR_ERR(rq);
   2071
   2072	clear_bit(CONTEXT_BANNED, &rq->context->flags);
   2073	i915_request_get(rq);
   2074	i915_request_add(rq);
   2075	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
   2076		err = -EIO;
   2077		goto out;
   2078	}
   2079
   2080	intel_context_set_banned(rq->context);
   2081	err = intel_engine_pulse(arg->engine);
   2082	if (err)
   2083		goto out;
   2084
   2085	err = wait_for_reset(arg->engine, rq, HZ / 2);
   2086	if (err) {
   2087		pr_err("Cancelled inflight0 request did not reset\n");
   2088		goto out;
   2089	}
   2090
   2091out:
   2092	i915_request_put(rq);
   2093	if (igt_live_test_end(&t))
   2094		err = -EIO;
   2095	return err;
   2096}
   2097
   2098static int __cancel_active1(struct live_preempt_cancel *arg)
   2099{
   2100	struct i915_request *rq[2] = {};
   2101	struct igt_live_test t;
   2102	int err;
   2103
   2104	/* Preempt cancel of ELSP1 */
   2105	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
   2106	if (igt_live_test_begin(&t, arg->engine->i915,
   2107				__func__, arg->engine->name))
   2108		return -EIO;
   2109
   2110	rq[0] = spinner_create_request(&arg->a.spin,
   2111				       arg->a.ctx, arg->engine,
   2112				       MI_NOOP); /* no preemption */
   2113	if (IS_ERR(rq[0]))
   2114		return PTR_ERR(rq[0]);
   2115
   2116	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
   2117	i915_request_get(rq[0]);
   2118	i915_request_add(rq[0]);
   2119	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
   2120		err = -EIO;
   2121		goto out;
   2122	}
   2123
   2124	rq[1] = spinner_create_request(&arg->b.spin,
   2125				       arg->b.ctx, arg->engine,
   2126				       MI_ARB_CHECK);
   2127	if (IS_ERR(rq[1])) {
   2128		err = PTR_ERR(rq[1]);
   2129		goto out;
   2130	}
   2131
   2132	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
   2133	i915_request_get(rq[1]);
   2134	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
   2135	i915_request_add(rq[1]);
   2136	if (err)
   2137		goto out;
   2138
   2139	intel_context_set_banned(rq[1]->context);
   2140	err = intel_engine_pulse(arg->engine);
   2141	if (err)
   2142		goto out;
   2143
   2144	igt_spinner_end(&arg->a.spin);
   2145	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
   2146	if (err)
   2147		goto out;
   2148
   2149	if (rq[0]->fence.error != 0) {
   2150		pr_err("Normal inflight0 request did not complete\n");
   2151		err = -EINVAL;
   2152		goto out;
   2153	}
   2154
   2155	if (rq[1]->fence.error != -EIO) {
   2156		pr_err("Cancelled inflight1 request did not report -EIO\n");
   2157		err = -EINVAL;
   2158		goto out;
   2159	}
   2160
   2161out:
   2162	i915_request_put(rq[1]);
   2163	i915_request_put(rq[0]);
   2164	if (igt_live_test_end(&t))
   2165		err = -EIO;
   2166	return err;
   2167}
   2168
   2169static int __cancel_queued(struct live_preempt_cancel *arg)
   2170{
   2171	struct i915_request *rq[3] = {};
   2172	struct igt_live_test t;
   2173	int err;
   2174
   2175	/* Full ELSP and one in the wings */
   2176	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
   2177	if (igt_live_test_begin(&t, arg->engine->i915,
   2178				__func__, arg->engine->name))
   2179		return -EIO;
   2180
   2181	rq[0] = spinner_create_request(&arg->a.spin,
   2182				       arg->a.ctx, arg->engine,
   2183				       MI_ARB_CHECK);
   2184	if (IS_ERR(rq[0]))
   2185		return PTR_ERR(rq[0]);
   2186
   2187	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
   2188	i915_request_get(rq[0]);
   2189	i915_request_add(rq[0]);
   2190	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
   2191		err = -EIO;
   2192		goto out;
   2193	}
   2194
   2195	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
   2196	if (IS_ERR(rq[1])) {
   2197		err = PTR_ERR(rq[1]);
   2198		goto out;
   2199	}
   2200
   2201	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
   2202	i915_request_get(rq[1]);
   2203	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
   2204	i915_request_add(rq[1]);
   2205	if (err)
   2206		goto out;
   2207
   2208	rq[2] = spinner_create_request(&arg->b.spin,
   2209				       arg->a.ctx, arg->engine,
   2210				       MI_ARB_CHECK);
   2211	if (IS_ERR(rq[2])) {
   2212		err = PTR_ERR(rq[2]);
   2213		goto out;
   2214	}
   2215
   2216	i915_request_get(rq[2]);
   2217	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
   2218	i915_request_add(rq[2]);
   2219	if (err)
   2220		goto out;
   2221
   2222	intel_context_set_banned(rq[2]->context);
   2223	err = intel_engine_pulse(arg->engine);
   2224	if (err)
   2225		goto out;
   2226
   2227	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
   2228	if (err)
   2229		goto out;
   2230
   2231	if (rq[0]->fence.error != -EIO) {
   2232		pr_err("Cancelled inflight0 request did not report -EIO\n");
   2233		err = -EINVAL;
   2234		goto out;
   2235	}
   2236
   2237	if (rq[1]->fence.error != 0) {
   2238		pr_err("Normal inflight1 request did not complete\n");
   2239		err = -EINVAL;
   2240		goto out;
   2241	}
   2242
   2243	if (rq[2]->fence.error != -EIO) {
   2244		pr_err("Cancelled queued request did not report -EIO\n");
   2245		err = -EINVAL;
   2246		goto out;
   2247	}
   2248
   2249out:
   2250	i915_request_put(rq[2]);
   2251	i915_request_put(rq[1]);
   2252	i915_request_put(rq[0]);
   2253	if (igt_live_test_end(&t))
   2254		err = -EIO;
   2255	return err;
   2256}
   2257
   2258static int __cancel_hostile(struct live_preempt_cancel *arg)
   2259{
   2260	struct i915_request *rq;
   2261	int err;
   2262
   2263	/* Preempt cancel non-preemptible spinner in ELSP0 */
   2264	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
   2265		return 0;
   2266
   2267	if (!intel_has_reset_engine(arg->engine->gt))
   2268		return 0;
   2269
   2270	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
   2271	rq = spinner_create_request(&arg->a.spin,
   2272				    arg->a.ctx, arg->engine,
   2273				    MI_NOOP); /* preemption disabled */
   2274	if (IS_ERR(rq))
   2275		return PTR_ERR(rq);
   2276
   2277	clear_bit(CONTEXT_BANNED, &rq->context->flags);
   2278	i915_request_get(rq);
   2279	i915_request_add(rq);
   2280	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
   2281		err = -EIO;
   2282		goto out;
   2283	}
   2284
   2285	intel_context_set_banned(rq->context);
   2286	err = intel_engine_pulse(arg->engine); /* force reset */
   2287	if (err)
   2288		goto out;
   2289
   2290	err = wait_for_reset(arg->engine, rq, HZ / 2);
   2291	if (err) {
   2292		pr_err("Cancelled inflight0 request did not reset\n");
   2293		goto out;
   2294	}
   2295
   2296out:
   2297	i915_request_put(rq);
   2298	if (igt_flush_test(arg->engine->i915))
   2299		err = -EIO;
   2300	return err;
   2301}
   2302
   2303static void force_reset_timeout(struct intel_engine_cs *engine)
   2304{
   2305	engine->reset_timeout.probability = 999;
   2306	atomic_set(&engine->reset_timeout.times, -1);
   2307}
   2308
   2309static void cancel_reset_timeout(struct intel_engine_cs *engine)
   2310{
   2311	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
   2312}
   2313
   2314static int __cancel_fail(struct live_preempt_cancel *arg)
   2315{
   2316	struct intel_engine_cs *engine = arg->engine;
   2317	struct i915_request *rq;
   2318	int err;
   2319
   2320	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
   2321		return 0;
   2322
   2323	if (!intel_has_reset_engine(engine->gt))
   2324		return 0;
   2325
   2326	GEM_TRACE("%s(%s)\n", __func__, engine->name);
   2327	rq = spinner_create_request(&arg->a.spin,
   2328				    arg->a.ctx, engine,
   2329				    MI_NOOP); /* preemption disabled */
   2330	if (IS_ERR(rq))
   2331		return PTR_ERR(rq);
   2332
   2333	clear_bit(CONTEXT_BANNED, &rq->context->flags);
   2334	i915_request_get(rq);
   2335	i915_request_add(rq);
   2336	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
   2337		err = -EIO;
   2338		goto out;
   2339	}
   2340
   2341	intel_context_set_banned(rq->context);
   2342
   2343	err = intel_engine_pulse(engine);
   2344	if (err)
   2345		goto out;
   2346
   2347	force_reset_timeout(engine);
   2348
   2349	/* force preempt reset [failure] */
   2350	while (!engine->execlists.pending[0])
   2351		intel_engine_flush_submission(engine);
   2352	del_timer_sync(&engine->execlists.preempt);
   2353	intel_engine_flush_submission(engine);
   2354
   2355	cancel_reset_timeout(engine);
   2356
   2357	/* after failure, require heartbeats to reset device */
   2358	intel_engine_set_heartbeat(engine, 1);
   2359	err = wait_for_reset(engine, rq, HZ / 2);
   2360	intel_engine_set_heartbeat(engine,
   2361				   engine->defaults.heartbeat_interval_ms);
   2362	if (err) {
   2363		pr_err("Cancelled inflight0 request did not reset\n");
   2364		goto out;
   2365	}
   2366
   2367out:
   2368	i915_request_put(rq);
   2369	if (igt_flush_test(engine->i915))
   2370		err = -EIO;
   2371	return err;
   2372}
   2373
   2374static int live_preempt_cancel(void *arg)
   2375{
   2376	struct intel_gt *gt = arg;
   2377	struct live_preempt_cancel data;
   2378	enum intel_engine_id id;
   2379	int err = -ENOMEM;
   2380
   2381	/*
   2382	 * To cancel an inflight context, we need to first remove it from the
   2383	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
   2384	 */
   2385
   2386	if (preempt_client_init(gt, &data.a))
   2387		return -ENOMEM;
   2388	if (preempt_client_init(gt, &data.b))
   2389		goto err_client_a;
   2390
   2391	for_each_engine(data.engine, gt, id) {
   2392		if (!intel_engine_has_preemption(data.engine))
   2393			continue;
   2394
   2395		err = __cancel_active0(&data);
   2396		if (err)
   2397			goto err_wedged;
   2398
   2399		err = __cancel_active1(&data);
   2400		if (err)
   2401			goto err_wedged;
   2402
   2403		err = __cancel_queued(&data);
   2404		if (err)
   2405			goto err_wedged;
   2406
   2407		err = __cancel_hostile(&data);
   2408		if (err)
   2409			goto err_wedged;
   2410
   2411		err = __cancel_fail(&data);
   2412		if (err)
   2413			goto err_wedged;
   2414	}
   2415
   2416	err = 0;
   2417err_client_b:
   2418	preempt_client_fini(&data.b);
   2419err_client_a:
   2420	preempt_client_fini(&data.a);
   2421	return err;
   2422
   2423err_wedged:
   2424	GEM_TRACE_DUMP();
   2425	igt_spinner_end(&data.b.spin);
   2426	igt_spinner_end(&data.a.spin);
   2427	intel_gt_set_wedged(gt);
   2428	goto err_client_b;
   2429}
   2430
   2431static int live_suppress_self_preempt(void *arg)
   2432{
   2433	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
   2434	struct intel_gt *gt = arg;
   2435	struct intel_engine_cs *engine;
   2436	struct preempt_client a, b;
   2437	enum intel_engine_id id;
   2438	int err = -ENOMEM;
   2439
   2440	/*
   2441	 * Verify that if a preemption request does not cause a change in
   2442	 * the current execution order, the preempt-to-idle injection is
   2443	 * skipped and that we do not accidentally apply it after the CS
   2444	 * completion event.
   2445	 */
   2446
   2447	if (intel_uc_uses_guc_submission(&gt->uc))
   2448		return 0; /* presume black blox */
   2449
   2450	if (intel_vgpu_active(gt->i915))
   2451		return 0; /* GVT forces single port & request submission */
   2452
   2453	if (preempt_client_init(gt, &a))
   2454		return -ENOMEM;
   2455	if (preempt_client_init(gt, &b))
   2456		goto err_client_a;
   2457
   2458	for_each_engine(engine, gt, id) {
   2459		struct i915_request *rq_a, *rq_b;
   2460		int depth;
   2461
   2462		if (!intel_engine_has_preemption(engine))
   2463			continue;
   2464
   2465		if (igt_flush_test(gt->i915))
   2466			goto err_wedged;
   2467
   2468		st_engine_heartbeat_disable(engine);
   2469		engine->execlists.preempt_hang.count = 0;
   2470
   2471		rq_a = spinner_create_request(&a.spin,
   2472					      a.ctx, engine,
   2473					      MI_NOOP);
   2474		if (IS_ERR(rq_a)) {
   2475			err = PTR_ERR(rq_a);
   2476			st_engine_heartbeat_enable(engine);
   2477			goto err_client_b;
   2478		}
   2479
   2480		i915_request_add(rq_a);
   2481		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
   2482			pr_err("First client failed to start\n");
   2483			st_engine_heartbeat_enable(engine);
   2484			goto err_wedged;
   2485		}
   2486
   2487		/* Keep postponing the timer to avoid premature slicing */
   2488		mod_timer(&engine->execlists.timer, jiffies + HZ);
   2489		for (depth = 0; depth < 8; depth++) {
   2490			rq_b = spinner_create_request(&b.spin,
   2491						      b.ctx, engine,
   2492						      MI_NOOP);
   2493			if (IS_ERR(rq_b)) {
   2494				err = PTR_ERR(rq_b);
   2495				st_engine_heartbeat_enable(engine);
   2496				goto err_client_b;
   2497			}
   2498			i915_request_add(rq_b);
   2499
   2500			GEM_BUG_ON(i915_request_completed(rq_a));
   2501			engine->sched_engine->schedule(rq_a, &attr);
   2502			igt_spinner_end(&a.spin);
   2503
   2504			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
   2505				pr_err("Second client failed to start\n");
   2506				st_engine_heartbeat_enable(engine);
   2507				goto err_wedged;
   2508			}
   2509
   2510			swap(a, b);
   2511			rq_a = rq_b;
   2512		}
   2513		igt_spinner_end(&a.spin);
   2514
   2515		if (engine->execlists.preempt_hang.count) {
   2516			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
   2517			       engine->name,
   2518			       engine->execlists.preempt_hang.count,
   2519			       depth);
   2520			st_engine_heartbeat_enable(engine);
   2521			err = -EINVAL;
   2522			goto err_client_b;
   2523		}
   2524
   2525		st_engine_heartbeat_enable(engine);
   2526		if (igt_flush_test(gt->i915))
   2527			goto err_wedged;
   2528	}
   2529
   2530	err = 0;
   2531err_client_b:
   2532	preempt_client_fini(&b);
   2533err_client_a:
   2534	preempt_client_fini(&a);
   2535	return err;
   2536
   2537err_wedged:
   2538	igt_spinner_end(&b.spin);
   2539	igt_spinner_end(&a.spin);
   2540	intel_gt_set_wedged(gt);
   2541	err = -EIO;
   2542	goto err_client_b;
   2543}
   2544
   2545static int live_chain_preempt(void *arg)
   2546{
   2547	struct intel_gt *gt = arg;
   2548	struct intel_engine_cs *engine;
   2549	struct preempt_client hi, lo;
   2550	enum intel_engine_id id;
   2551	int err = -ENOMEM;
   2552
   2553	/*
   2554	 * Build a chain AB...BA between two contexts (A, B) and request
   2555	 * preemption of the last request. It should then complete before
   2556	 * the previously submitted spinner in B.
   2557	 */
   2558
   2559	if (preempt_client_init(gt, &hi))
   2560		return -ENOMEM;
   2561
   2562	if (preempt_client_init(gt, &lo))
   2563		goto err_client_hi;
   2564
   2565	for_each_engine(engine, gt, id) {
   2566		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
   2567		struct igt_live_test t;
   2568		struct i915_request *rq;
   2569		int ring_size, count, i;
   2570
   2571		if (!intel_engine_has_preemption(engine))
   2572			continue;
   2573
   2574		rq = spinner_create_request(&lo.spin,
   2575					    lo.ctx, engine,
   2576					    MI_ARB_CHECK);
   2577		if (IS_ERR(rq))
   2578			goto err_wedged;
   2579
   2580		i915_request_get(rq);
   2581		i915_request_add(rq);
   2582
   2583		ring_size = rq->wa_tail - rq->head;
   2584		if (ring_size < 0)
   2585			ring_size += rq->ring->size;
   2586		ring_size = rq->ring->size / ring_size;
   2587		pr_debug("%s(%s): Using maximum of %d requests\n",
   2588			 __func__, engine->name, ring_size);
   2589
   2590		igt_spinner_end(&lo.spin);
   2591		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
   2592			pr_err("Timed out waiting to flush %s\n", engine->name);
   2593			i915_request_put(rq);
   2594			goto err_wedged;
   2595		}
   2596		i915_request_put(rq);
   2597
   2598		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
   2599			err = -EIO;
   2600			goto err_wedged;
   2601		}
   2602
   2603		for_each_prime_number_from(count, 1, ring_size) {
   2604			rq = spinner_create_request(&hi.spin,
   2605						    hi.ctx, engine,
   2606						    MI_ARB_CHECK);
   2607			if (IS_ERR(rq))
   2608				goto err_wedged;
   2609			i915_request_add(rq);
   2610			if (!igt_wait_for_spinner(&hi.spin, rq))
   2611				goto err_wedged;
   2612
   2613			rq = spinner_create_request(&lo.spin,
   2614						    lo.ctx, engine,
   2615						    MI_ARB_CHECK);
   2616			if (IS_ERR(rq))
   2617				goto err_wedged;
   2618			i915_request_add(rq);
   2619
   2620			for (i = 0; i < count; i++) {
   2621				rq = igt_request_alloc(lo.ctx, engine);
   2622				if (IS_ERR(rq))
   2623					goto err_wedged;
   2624				i915_request_add(rq);
   2625			}
   2626
   2627			rq = igt_request_alloc(hi.ctx, engine);
   2628			if (IS_ERR(rq))
   2629				goto err_wedged;
   2630
   2631			i915_request_get(rq);
   2632			i915_request_add(rq);
   2633			engine->sched_engine->schedule(rq, &attr);
   2634
   2635			igt_spinner_end(&hi.spin);
   2636			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
   2637				struct drm_printer p =
   2638					drm_info_printer(gt->i915->drm.dev);
   2639
   2640				pr_err("Failed to preempt over chain of %d\n",
   2641				       count);
   2642				intel_engine_dump(engine, &p,
   2643						  "%s\n", engine->name);
   2644				i915_request_put(rq);
   2645				goto err_wedged;
   2646			}
   2647			igt_spinner_end(&lo.spin);
   2648			i915_request_put(rq);
   2649
   2650			rq = igt_request_alloc(lo.ctx, engine);
   2651			if (IS_ERR(rq))
   2652				goto err_wedged;
   2653
   2654			i915_request_get(rq);
   2655			i915_request_add(rq);
   2656
   2657			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
   2658				struct drm_printer p =
   2659					drm_info_printer(gt->i915->drm.dev);
   2660
   2661				pr_err("Failed to flush low priority chain of %d requests\n",
   2662				       count);
   2663				intel_engine_dump(engine, &p,
   2664						  "%s\n", engine->name);
   2665
   2666				i915_request_put(rq);
   2667				goto err_wedged;
   2668			}
   2669			i915_request_put(rq);
   2670		}
   2671
   2672		if (igt_live_test_end(&t)) {
   2673			err = -EIO;
   2674			goto err_wedged;
   2675		}
   2676	}
   2677
   2678	err = 0;
   2679err_client_lo:
   2680	preempt_client_fini(&lo);
   2681err_client_hi:
   2682	preempt_client_fini(&hi);
   2683	return err;
   2684
   2685err_wedged:
   2686	igt_spinner_end(&hi.spin);
   2687	igt_spinner_end(&lo.spin);
   2688	intel_gt_set_wedged(gt);
   2689	err = -EIO;
   2690	goto err_client_lo;
   2691}
   2692
   2693static int create_gang(struct intel_engine_cs *engine,
   2694		       struct i915_request **prev)
   2695{
   2696	struct drm_i915_gem_object *obj;
   2697	struct intel_context *ce;
   2698	struct i915_request *rq;
   2699	struct i915_vma *vma;
   2700	u32 *cs;
   2701	int err;
   2702
   2703	ce = intel_context_create(engine);
   2704	if (IS_ERR(ce))
   2705		return PTR_ERR(ce);
   2706
   2707	obj = i915_gem_object_create_internal(engine->i915, 4096);
   2708	if (IS_ERR(obj)) {
   2709		err = PTR_ERR(obj);
   2710		goto err_ce;
   2711	}
   2712
   2713	vma = i915_vma_instance(obj, ce->vm, NULL);
   2714	if (IS_ERR(vma)) {
   2715		err = PTR_ERR(vma);
   2716		goto err_obj;
   2717	}
   2718
   2719	err = i915_vma_pin(vma, 0, 0, PIN_USER);
   2720	if (err)
   2721		goto err_obj;
   2722
   2723	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
   2724	if (IS_ERR(cs)) {
   2725		err = PTR_ERR(cs);
   2726		goto err_obj;
   2727	}
   2728
   2729	/* Semaphore target: spin until zero */
   2730	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   2731
   2732	*cs++ = MI_SEMAPHORE_WAIT |
   2733		MI_SEMAPHORE_POLL |
   2734		MI_SEMAPHORE_SAD_EQ_SDD;
   2735	*cs++ = 0;
   2736	*cs++ = lower_32_bits(vma->node.start);
   2737	*cs++ = upper_32_bits(vma->node.start);
   2738
   2739	if (*prev) {
   2740		u64 offset = (*prev)->batch->node.start;
   2741
   2742		/* Terminate the spinner in the next lower priority batch. */
   2743		*cs++ = MI_STORE_DWORD_IMM_GEN4;
   2744		*cs++ = lower_32_bits(offset);
   2745		*cs++ = upper_32_bits(offset);
   2746		*cs++ = 0;
   2747	}
   2748
   2749	*cs++ = MI_BATCH_BUFFER_END;
   2750	i915_gem_object_flush_map(obj);
   2751	i915_gem_object_unpin_map(obj);
   2752
   2753	rq = intel_context_create_request(ce);
   2754	if (IS_ERR(rq)) {
   2755		err = PTR_ERR(rq);
   2756		goto err_obj;
   2757	}
   2758
   2759	rq->batch = i915_vma_get(vma);
   2760	i915_request_get(rq);
   2761
   2762	i915_vma_lock(vma);
   2763	err = i915_request_await_object(rq, vma->obj, false);
   2764	if (!err)
   2765		err = i915_vma_move_to_active(vma, rq, 0);
   2766	if (!err)
   2767		err = rq->engine->emit_bb_start(rq,
   2768						vma->node.start,
   2769						PAGE_SIZE, 0);
   2770	i915_vma_unlock(vma);
   2771	i915_request_add(rq);
   2772	if (err)
   2773		goto err_rq;
   2774
   2775	i915_gem_object_put(obj);
   2776	intel_context_put(ce);
   2777
   2778	rq->mock.link.next = &(*prev)->mock.link;
   2779	*prev = rq;
   2780	return 0;
   2781
   2782err_rq:
   2783	i915_vma_put(rq->batch);
   2784	i915_request_put(rq);
   2785err_obj:
   2786	i915_gem_object_put(obj);
   2787err_ce:
   2788	intel_context_put(ce);
   2789	return err;
   2790}
   2791
   2792static int __live_preempt_ring(struct intel_engine_cs *engine,
   2793			       struct igt_spinner *spin,
   2794			       int queue_sz, int ring_sz)
   2795{
   2796	struct intel_context *ce[2] = {};
   2797	struct i915_request *rq;
   2798	struct igt_live_test t;
   2799	int err = 0;
   2800	int n;
   2801
   2802	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
   2803		return -EIO;
   2804
   2805	for (n = 0; n < ARRAY_SIZE(ce); n++) {
   2806		struct intel_context *tmp;
   2807
   2808		tmp = intel_context_create(engine);
   2809		if (IS_ERR(tmp)) {
   2810			err = PTR_ERR(tmp);
   2811			goto err_ce;
   2812		}
   2813
   2814		tmp->ring_size = ring_sz;
   2815
   2816		err = intel_context_pin(tmp);
   2817		if (err) {
   2818			intel_context_put(tmp);
   2819			goto err_ce;
   2820		}
   2821
   2822		memset32(tmp->ring->vaddr,
   2823			 0xdeadbeef, /* trigger a hang if executed */
   2824			 tmp->ring->vma->size / sizeof(u32));
   2825
   2826		ce[n] = tmp;
   2827	}
   2828
   2829	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
   2830	if (IS_ERR(rq)) {
   2831		err = PTR_ERR(rq);
   2832		goto err_ce;
   2833	}
   2834
   2835	i915_request_get(rq);
   2836	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
   2837	i915_request_add(rq);
   2838
   2839	if (!igt_wait_for_spinner(spin, rq)) {
   2840		intel_gt_set_wedged(engine->gt);
   2841		i915_request_put(rq);
   2842		err = -ETIME;
   2843		goto err_ce;
   2844	}
   2845
   2846	/* Fill the ring, until we will cause a wrap */
   2847	n = 0;
   2848	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
   2849		struct i915_request *tmp;
   2850
   2851		tmp = intel_context_create_request(ce[0]);
   2852		if (IS_ERR(tmp)) {
   2853			err = PTR_ERR(tmp);
   2854			i915_request_put(rq);
   2855			goto err_ce;
   2856		}
   2857
   2858		i915_request_add(tmp);
   2859		intel_engine_flush_submission(engine);
   2860		n++;
   2861	}
   2862	intel_engine_flush_submission(engine);
   2863	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
   2864		 engine->name, queue_sz, n,
   2865		 ce[0]->ring->size,
   2866		 ce[0]->ring->tail,
   2867		 ce[0]->ring->emit,
   2868		 rq->tail);
   2869	i915_request_put(rq);
   2870
   2871	/* Create a second request to preempt the first ring */
   2872	rq = intel_context_create_request(ce[1]);
   2873	if (IS_ERR(rq)) {
   2874		err = PTR_ERR(rq);
   2875		goto err_ce;
   2876	}
   2877
   2878	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
   2879	i915_request_get(rq);
   2880	i915_request_add(rq);
   2881
   2882	err = wait_for_submit(engine, rq, HZ / 2);
   2883	i915_request_put(rq);
   2884	if (err) {
   2885		pr_err("%s: preemption request was not submitted\n",
   2886		       engine->name);
   2887		err = -ETIME;
   2888	}
   2889
   2890	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
   2891		 engine->name,
   2892		 ce[0]->ring->tail, ce[0]->ring->emit,
   2893		 ce[1]->ring->tail, ce[1]->ring->emit);
   2894
   2895err_ce:
   2896	intel_engine_flush_submission(engine);
   2897	igt_spinner_end(spin);
   2898	for (n = 0; n < ARRAY_SIZE(ce); n++) {
   2899		if (IS_ERR_OR_NULL(ce[n]))
   2900			break;
   2901
   2902		intel_context_unpin(ce[n]);
   2903		intel_context_put(ce[n]);
   2904	}
   2905	if (igt_live_test_end(&t))
   2906		err = -EIO;
   2907	return err;
   2908}
   2909
   2910static int live_preempt_ring(void *arg)
   2911{
   2912	struct intel_gt *gt = arg;
   2913	struct intel_engine_cs *engine;
   2914	struct igt_spinner spin;
   2915	enum intel_engine_id id;
   2916	int err = 0;
   2917
   2918	/*
   2919	 * Check that we rollback large chunks of a ring in order to do a
   2920	 * preemption event. Similar to live_unlite_ring, but looking at
   2921	 * ring size rather than the impact of intel_ring_direction().
   2922	 */
   2923
   2924	if (igt_spinner_init(&spin, gt))
   2925		return -ENOMEM;
   2926
   2927	for_each_engine(engine, gt, id) {
   2928		int n;
   2929
   2930		if (!intel_engine_has_preemption(engine))
   2931			continue;
   2932
   2933		if (!intel_engine_can_store_dword(engine))
   2934			continue;
   2935
   2936		st_engine_heartbeat_disable(engine);
   2937
   2938		for (n = 0; n <= 3; n++) {
   2939			err = __live_preempt_ring(engine, &spin,
   2940						  n * SZ_4K / 4, SZ_4K);
   2941			if (err)
   2942				break;
   2943		}
   2944
   2945		st_engine_heartbeat_enable(engine);
   2946		if (err)
   2947			break;
   2948	}
   2949
   2950	igt_spinner_fini(&spin);
   2951	return err;
   2952}
   2953
   2954static int live_preempt_gang(void *arg)
   2955{
   2956	struct intel_gt *gt = arg;
   2957	struct intel_engine_cs *engine;
   2958	enum intel_engine_id id;
   2959
   2960	/*
   2961	 * Build as long a chain of preempters as we can, with each
   2962	 * request higher priority than the last. Once we are ready, we release
   2963	 * the last batch which then precolates down the chain, each releasing
   2964	 * the next oldest in turn. The intent is to simply push as hard as we
   2965	 * can with the number of preemptions, trying to exceed narrow HW
   2966	 * limits. At a minimum, we insist that we can sort all the user
   2967	 * high priority levels into execution order.
   2968	 */
   2969
   2970	for_each_engine(engine, gt, id) {
   2971		struct i915_request *rq = NULL;
   2972		struct igt_live_test t;
   2973		IGT_TIMEOUT(end_time);
   2974		int prio = 0;
   2975		int err = 0;
   2976		u32 *cs;
   2977
   2978		if (!intel_engine_has_preemption(engine))
   2979			continue;
   2980
   2981		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
   2982			return -EIO;
   2983
   2984		do {
   2985			struct i915_sched_attr attr = { .priority = prio++ };
   2986
   2987			err = create_gang(engine, &rq);
   2988			if (err)
   2989				break;
   2990
   2991			/* Submit each spinner at increasing priority */
   2992			engine->sched_engine->schedule(rq, &attr);
   2993		} while (prio <= I915_PRIORITY_MAX &&
   2994			 !__igt_timeout(end_time, NULL));
   2995		pr_debug("%s: Preempt chain of %d requests\n",
   2996			 engine->name, prio);
   2997
   2998		/*
   2999		 * Such that the last spinner is the highest priority and
   3000		 * should execute first. When that spinner completes,
   3001		 * it will terminate the next lowest spinner until there
   3002		 * are no more spinners and the gang is complete.
   3003		 */
   3004		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
   3005		if (!IS_ERR(cs)) {
   3006			*cs = 0;
   3007			i915_gem_object_unpin_map(rq->batch->obj);
   3008		} else {
   3009			err = PTR_ERR(cs);
   3010			intel_gt_set_wedged(gt);
   3011		}
   3012
   3013		while (rq) { /* wait for each rq from highest to lowest prio */
   3014			struct i915_request *n = list_next_entry(rq, mock.link);
   3015
   3016			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
   3017				struct drm_printer p =
   3018					drm_info_printer(engine->i915->drm.dev);
   3019
   3020				pr_err("Failed to flush chain of %d requests, at %d\n",
   3021				       prio, rq_prio(rq));
   3022				intel_engine_dump(engine, &p,
   3023						  "%s\n", engine->name);
   3024
   3025				err = -ETIME;
   3026			}
   3027
   3028			i915_vma_put(rq->batch);
   3029			i915_request_put(rq);
   3030			rq = n;
   3031		}
   3032
   3033		if (igt_live_test_end(&t))
   3034			err = -EIO;
   3035		if (err)
   3036			return err;
   3037	}
   3038
   3039	return 0;
   3040}
   3041
   3042static struct i915_vma *
   3043create_gpr_user(struct intel_engine_cs *engine,
   3044		struct i915_vma *result,
   3045		unsigned int offset)
   3046{
   3047	struct drm_i915_gem_object *obj;
   3048	struct i915_vma *vma;
   3049	u32 *cs;
   3050	int err;
   3051	int i;
   3052
   3053	obj = i915_gem_object_create_internal(engine->i915, 4096);
   3054	if (IS_ERR(obj))
   3055		return ERR_CAST(obj);
   3056
   3057	vma = i915_vma_instance(obj, result->vm, NULL);
   3058	if (IS_ERR(vma)) {
   3059		i915_gem_object_put(obj);
   3060		return vma;
   3061	}
   3062
   3063	err = i915_vma_pin(vma, 0, 0, PIN_USER);
   3064	if (err) {
   3065		i915_vma_put(vma);
   3066		return ERR_PTR(err);
   3067	}
   3068
   3069	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
   3070	if (IS_ERR(cs)) {
   3071		i915_vma_put(vma);
   3072		return ERR_CAST(cs);
   3073	}
   3074
   3075	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
   3076	*cs++ = MI_LOAD_REGISTER_IMM(1);
   3077	*cs++ = CS_GPR(engine, 0);
   3078	*cs++ = 1;
   3079
   3080	for (i = 1; i < NUM_GPR; i++) {
   3081		u64 addr;
   3082
   3083		/*
   3084		 * Perform: GPR[i]++
   3085		 *
   3086		 * As we read and write into the context saved GPR[i], if
   3087		 * we restart this batch buffer from an earlier point, we
   3088		 * will repeat the increment and store a value > 1.
   3089		 */
   3090		*cs++ = MI_MATH(4);
   3091		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
   3092		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
   3093		*cs++ = MI_MATH_ADD;
   3094		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
   3095
   3096		addr = result->node.start + offset + i * sizeof(*cs);
   3097		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
   3098		*cs++ = CS_GPR(engine, 2 * i);
   3099		*cs++ = lower_32_bits(addr);
   3100		*cs++ = upper_32_bits(addr);
   3101
   3102		*cs++ = MI_SEMAPHORE_WAIT |
   3103			MI_SEMAPHORE_POLL |
   3104			MI_SEMAPHORE_SAD_GTE_SDD;
   3105		*cs++ = i;
   3106		*cs++ = lower_32_bits(result->node.start);
   3107		*cs++ = upper_32_bits(result->node.start);
   3108	}
   3109
   3110	*cs++ = MI_BATCH_BUFFER_END;
   3111	i915_gem_object_flush_map(obj);
   3112	i915_gem_object_unpin_map(obj);
   3113
   3114	return vma;
   3115}
   3116
   3117static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
   3118{
   3119	struct drm_i915_gem_object *obj;
   3120	struct i915_vma *vma;
   3121	int err;
   3122
   3123	obj = i915_gem_object_create_internal(gt->i915, sz);
   3124	if (IS_ERR(obj))
   3125		return ERR_CAST(obj);
   3126
   3127	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
   3128	if (IS_ERR(vma)) {
   3129		i915_gem_object_put(obj);
   3130		return vma;
   3131	}
   3132
   3133	err = i915_ggtt_pin(vma, NULL, 0, 0);
   3134	if (err) {
   3135		i915_vma_put(vma);
   3136		return ERR_PTR(err);
   3137	}
   3138
   3139	return vma;
   3140}
   3141
   3142static struct i915_request *
   3143create_gpr_client(struct intel_engine_cs *engine,
   3144		  struct i915_vma *global,
   3145		  unsigned int offset)
   3146{
   3147	struct i915_vma *batch, *vma;
   3148	struct intel_context *ce;
   3149	struct i915_request *rq;
   3150	int err;
   3151
   3152	ce = intel_context_create(engine);
   3153	if (IS_ERR(ce))
   3154		return ERR_CAST(ce);
   3155
   3156	vma = i915_vma_instance(global->obj, ce->vm, NULL);
   3157	if (IS_ERR(vma)) {
   3158		err = PTR_ERR(vma);
   3159		goto out_ce;
   3160	}
   3161
   3162	err = i915_vma_pin(vma, 0, 0, PIN_USER);
   3163	if (err)
   3164		goto out_ce;
   3165
   3166	batch = create_gpr_user(engine, vma, offset);
   3167	if (IS_ERR(batch)) {
   3168		err = PTR_ERR(batch);
   3169		goto out_vma;
   3170	}
   3171
   3172	rq = intel_context_create_request(ce);
   3173	if (IS_ERR(rq)) {
   3174		err = PTR_ERR(rq);
   3175		goto out_batch;
   3176	}
   3177
   3178	i915_vma_lock(vma);
   3179	err = i915_request_await_object(rq, vma->obj, false);
   3180	if (!err)
   3181		err = i915_vma_move_to_active(vma, rq, 0);
   3182	i915_vma_unlock(vma);
   3183
   3184	i915_vma_lock(batch);
   3185	if (!err)
   3186		err = i915_request_await_object(rq, batch->obj, false);
   3187	if (!err)
   3188		err = i915_vma_move_to_active(batch, rq, 0);
   3189	if (!err)
   3190		err = rq->engine->emit_bb_start(rq,
   3191						batch->node.start,
   3192						PAGE_SIZE, 0);
   3193	i915_vma_unlock(batch);
   3194	i915_vma_unpin(batch);
   3195
   3196	if (!err)
   3197		i915_request_get(rq);
   3198	i915_request_add(rq);
   3199
   3200out_batch:
   3201	i915_vma_put(batch);
   3202out_vma:
   3203	i915_vma_unpin(vma);
   3204out_ce:
   3205	intel_context_put(ce);
   3206	return err ? ERR_PTR(err) : rq;
   3207}
   3208
   3209static int preempt_user(struct intel_engine_cs *engine,
   3210			struct i915_vma *global,
   3211			int id)
   3212{
   3213	struct i915_sched_attr attr = {
   3214		.priority = I915_PRIORITY_MAX
   3215	};
   3216	struct i915_request *rq;
   3217	int err = 0;
   3218	u32 *cs;
   3219
   3220	rq = intel_engine_create_kernel_request(engine);
   3221	if (IS_ERR(rq))
   3222		return PTR_ERR(rq);
   3223
   3224	cs = intel_ring_begin(rq, 4);
   3225	if (IS_ERR(cs)) {
   3226		i915_request_add(rq);
   3227		return PTR_ERR(cs);
   3228	}
   3229
   3230	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
   3231	*cs++ = i915_ggtt_offset(global);
   3232	*cs++ = 0;
   3233	*cs++ = id;
   3234
   3235	intel_ring_advance(rq, cs);
   3236
   3237	i915_request_get(rq);
   3238	i915_request_add(rq);
   3239
   3240	engine->sched_engine->schedule(rq, &attr);
   3241
   3242	if (i915_request_wait(rq, 0, HZ / 2) < 0)
   3243		err = -ETIME;
   3244	i915_request_put(rq);
   3245
   3246	return err;
   3247}
   3248
   3249static int live_preempt_user(void *arg)
   3250{
   3251	struct intel_gt *gt = arg;
   3252	struct intel_engine_cs *engine;
   3253	struct i915_vma *global;
   3254	enum intel_engine_id id;
   3255	u32 *result;
   3256	int err = 0;
   3257
   3258	/*
   3259	 * In our other tests, we look at preemption in carefully
   3260	 * controlled conditions in the ringbuffer. Since most of the
   3261	 * time is spent in user batches, most of our preemptions naturally
   3262	 * occur there. We want to verify that when we preempt inside a batch
   3263	 * we continue on from the current instruction and do not roll back
   3264	 * to the start, or another earlier arbitration point.
   3265	 *
   3266	 * To verify this, we create a batch which is a mixture of
   3267	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
   3268	 * a few preempting contexts thrown into the mix, we look for any
   3269	 * repeated instructions (which show up as incorrect values).
   3270	 */
   3271
   3272	global = create_global(gt, 4096);
   3273	if (IS_ERR(global))
   3274		return PTR_ERR(global);
   3275
   3276	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
   3277	if (IS_ERR(result)) {
   3278		i915_vma_unpin_and_release(&global, 0);
   3279		return PTR_ERR(result);
   3280	}
   3281
   3282	for_each_engine(engine, gt, id) {
   3283		struct i915_request *client[3] = {};
   3284		struct igt_live_test t;
   3285		int i;
   3286
   3287		if (!intel_engine_has_preemption(engine))
   3288			continue;
   3289
   3290		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
   3291			continue; /* we need per-context GPR */
   3292
   3293		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
   3294			err = -EIO;
   3295			break;
   3296		}
   3297
   3298		memset(result, 0, 4096);
   3299
   3300		for (i = 0; i < ARRAY_SIZE(client); i++) {
   3301			struct i915_request *rq;
   3302
   3303			rq = create_gpr_client(engine, global,
   3304					       NUM_GPR * i * sizeof(u32));
   3305			if (IS_ERR(rq)) {
   3306				err = PTR_ERR(rq);
   3307				goto end_test;
   3308			}
   3309
   3310			client[i] = rq;
   3311		}
   3312
   3313		/* Continuously preempt the set of 3 running contexts */
   3314		for (i = 1; i <= NUM_GPR; i++) {
   3315			err = preempt_user(engine, global, i);
   3316			if (err)
   3317				goto end_test;
   3318		}
   3319
   3320		if (READ_ONCE(result[0]) != NUM_GPR) {
   3321			pr_err("%s: Failed to release semaphore\n",
   3322			       engine->name);
   3323			err = -EIO;
   3324			goto end_test;
   3325		}
   3326
   3327		for (i = 0; i < ARRAY_SIZE(client); i++) {
   3328			int gpr;
   3329
   3330			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
   3331				err = -ETIME;
   3332				goto end_test;
   3333			}
   3334
   3335			for (gpr = 1; gpr < NUM_GPR; gpr++) {
   3336				if (result[NUM_GPR * i + gpr] != 1) {
   3337					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
   3338					       engine->name,
   3339					       i, gpr, result[NUM_GPR * i + gpr]);
   3340					err = -EINVAL;
   3341					goto end_test;
   3342				}
   3343			}
   3344		}
   3345
   3346end_test:
   3347		for (i = 0; i < ARRAY_SIZE(client); i++) {
   3348			if (!client[i])
   3349				break;
   3350
   3351			i915_request_put(client[i]);
   3352		}
   3353
   3354		/* Flush the semaphores on error */
   3355		smp_store_mb(result[0], -1);
   3356		if (igt_live_test_end(&t))
   3357			err = -EIO;
   3358		if (err)
   3359			break;
   3360	}
   3361
   3362	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
   3363	return err;
   3364}
   3365
   3366static int live_preempt_timeout(void *arg)
   3367{
   3368	struct intel_gt *gt = arg;
   3369	struct i915_gem_context *ctx_hi, *ctx_lo;
   3370	struct igt_spinner spin_lo;
   3371	struct intel_engine_cs *engine;
   3372	enum intel_engine_id id;
   3373	int err = -ENOMEM;
   3374
   3375	/*
   3376	 * Check that we force preemption to occur by cancelling the previous
   3377	 * context if it refuses to yield the GPU.
   3378	 */
   3379	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
   3380		return 0;
   3381
   3382	if (!intel_has_reset_engine(gt))
   3383		return 0;
   3384
   3385	ctx_hi = kernel_context(gt->i915, NULL);
   3386	if (!ctx_hi)
   3387		return -ENOMEM;
   3388	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
   3389
   3390	ctx_lo = kernel_context(gt->i915, NULL);
   3391	if (!ctx_lo)
   3392		goto err_ctx_hi;
   3393	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
   3394
   3395	if (igt_spinner_init(&spin_lo, gt))
   3396		goto err_ctx_lo;
   3397
   3398	for_each_engine(engine, gt, id) {
   3399		unsigned long saved_timeout;
   3400		struct i915_request *rq;
   3401
   3402		if (!intel_engine_has_preemption(engine))
   3403			continue;
   3404
   3405		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
   3406					    MI_NOOP); /* preemption disabled */
   3407		if (IS_ERR(rq)) {
   3408			err = PTR_ERR(rq);
   3409			goto err_spin_lo;
   3410		}
   3411
   3412		i915_request_add(rq);
   3413		if (!igt_wait_for_spinner(&spin_lo, rq)) {
   3414			intel_gt_set_wedged(gt);
   3415			err = -EIO;
   3416			goto err_spin_lo;
   3417		}
   3418
   3419		rq = igt_request_alloc(ctx_hi, engine);
   3420		if (IS_ERR(rq)) {
   3421			igt_spinner_end(&spin_lo);
   3422			err = PTR_ERR(rq);
   3423			goto err_spin_lo;
   3424		}
   3425
   3426		/* Flush the previous CS ack before changing timeouts */
   3427		while (READ_ONCE(engine->execlists.pending[0]))
   3428			cpu_relax();
   3429
   3430		saved_timeout = engine->props.preempt_timeout_ms;
   3431		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
   3432
   3433		i915_request_get(rq);
   3434		i915_request_add(rq);
   3435
   3436		intel_engine_flush_submission(engine);
   3437		engine->props.preempt_timeout_ms = saved_timeout;
   3438
   3439		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
   3440			intel_gt_set_wedged(gt);
   3441			i915_request_put(rq);
   3442			err = -ETIME;
   3443			goto err_spin_lo;
   3444		}
   3445
   3446		igt_spinner_end(&spin_lo);
   3447		i915_request_put(rq);
   3448	}
   3449
   3450	err = 0;
   3451err_spin_lo:
   3452	igt_spinner_fini(&spin_lo);
   3453err_ctx_lo:
   3454	kernel_context_close(ctx_lo);
   3455err_ctx_hi:
   3456	kernel_context_close(ctx_hi);
   3457	return err;
   3458}
   3459
   3460static int random_range(struct rnd_state *rnd, int min, int max)
   3461{
   3462	return i915_prandom_u32_max_state(max - min, rnd) + min;
   3463}
   3464
   3465static int random_priority(struct rnd_state *rnd)
   3466{
   3467	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
   3468}
   3469
   3470struct preempt_smoke {
   3471	struct intel_gt *gt;
   3472	struct i915_gem_context **contexts;
   3473	struct intel_engine_cs *engine;
   3474	struct drm_i915_gem_object *batch;
   3475	unsigned int ncontext;
   3476	struct rnd_state prng;
   3477	unsigned long count;
   3478};
   3479
   3480static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
   3481{
   3482	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
   3483							  &smoke->prng)];
   3484}
   3485
   3486static int smoke_submit(struct preempt_smoke *smoke,
   3487			struct i915_gem_context *ctx, int prio,
   3488			struct drm_i915_gem_object *batch)
   3489{
   3490	struct i915_request *rq;
   3491	struct i915_vma *vma = NULL;
   3492	int err = 0;
   3493
   3494	if (batch) {
   3495		struct i915_address_space *vm;
   3496
   3497		vm = i915_gem_context_get_eb_vm(ctx);
   3498		vma = i915_vma_instance(batch, vm, NULL);
   3499		i915_vm_put(vm);
   3500		if (IS_ERR(vma))
   3501			return PTR_ERR(vma);
   3502
   3503		err = i915_vma_pin(vma, 0, 0, PIN_USER);
   3504		if (err)
   3505			return err;
   3506	}
   3507
   3508	ctx->sched.priority = prio;
   3509
   3510	rq = igt_request_alloc(ctx, smoke->engine);
   3511	if (IS_ERR(rq)) {
   3512		err = PTR_ERR(rq);
   3513		goto unpin;
   3514	}
   3515
   3516	if (vma) {
   3517		i915_vma_lock(vma);
   3518		err = i915_request_await_object(rq, vma->obj, false);
   3519		if (!err)
   3520			err = i915_vma_move_to_active(vma, rq, 0);
   3521		if (!err)
   3522			err = rq->engine->emit_bb_start(rq,
   3523							vma->node.start,
   3524							PAGE_SIZE, 0);
   3525		i915_vma_unlock(vma);
   3526	}
   3527
   3528	i915_request_add(rq);
   3529
   3530unpin:
   3531	if (vma)
   3532		i915_vma_unpin(vma);
   3533
   3534	return err;
   3535}
   3536
   3537static int smoke_crescendo_thread(void *arg)
   3538{
   3539	struct preempt_smoke *smoke = arg;
   3540	IGT_TIMEOUT(end_time);
   3541	unsigned long count;
   3542
   3543	count = 0;
   3544	do {
   3545		struct i915_gem_context *ctx = smoke_context(smoke);
   3546		int err;
   3547
   3548		err = smoke_submit(smoke,
   3549				   ctx, count % I915_PRIORITY_MAX,
   3550				   smoke->batch);
   3551		if (err)
   3552			return err;
   3553
   3554		count++;
   3555	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
   3556
   3557	smoke->count = count;
   3558	return 0;
   3559}
   3560
   3561static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
   3562#define BATCH BIT(0)
   3563{
   3564	struct task_struct *tsk[I915_NUM_ENGINES] = {};
   3565	struct preempt_smoke *arg;
   3566	struct intel_engine_cs *engine;
   3567	enum intel_engine_id id;
   3568	unsigned long count;
   3569	int err = 0;
   3570
   3571	arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL);
   3572	if (!arg)
   3573		return -ENOMEM;
   3574
   3575	for_each_engine(engine, smoke->gt, id) {
   3576		arg[id] = *smoke;
   3577		arg[id].engine = engine;
   3578		if (!(flags & BATCH))
   3579			arg[id].batch = NULL;
   3580		arg[id].count = 0;
   3581
   3582		tsk[id] = kthread_run(smoke_crescendo_thread, arg,
   3583				      "igt/smoke:%d", id);
   3584		if (IS_ERR(tsk[id])) {
   3585			err = PTR_ERR(tsk[id]);
   3586			break;
   3587		}
   3588		get_task_struct(tsk[id]);
   3589	}
   3590
   3591	yield(); /* start all threads before we kthread_stop() */
   3592
   3593	count = 0;
   3594	for_each_engine(engine, smoke->gt, id) {
   3595		int status;
   3596
   3597		if (IS_ERR_OR_NULL(tsk[id]))
   3598			continue;
   3599
   3600		status = kthread_stop(tsk[id]);
   3601		if (status && !err)
   3602			err = status;
   3603
   3604		count += arg[id].count;
   3605
   3606		put_task_struct(tsk[id]);
   3607	}
   3608
   3609	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
   3610		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
   3611
   3612	kfree(arg);
   3613	return 0;
   3614}
   3615
   3616static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
   3617{
   3618	enum intel_engine_id id;
   3619	IGT_TIMEOUT(end_time);
   3620	unsigned long count;
   3621
   3622	count = 0;
   3623	do {
   3624		for_each_engine(smoke->engine, smoke->gt, id) {
   3625			struct i915_gem_context *ctx = smoke_context(smoke);
   3626			int err;
   3627
   3628			err = smoke_submit(smoke,
   3629					   ctx, random_priority(&smoke->prng),
   3630					   flags & BATCH ? smoke->batch : NULL);
   3631			if (err)
   3632				return err;
   3633
   3634			count++;
   3635		}
   3636	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
   3637
   3638	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
   3639		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
   3640	return 0;
   3641}
   3642
   3643static int live_preempt_smoke(void *arg)
   3644{
   3645	struct preempt_smoke smoke = {
   3646		.gt = arg,
   3647		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
   3648		.ncontext = 256,
   3649	};
   3650	const unsigned int phase[] = { 0, BATCH };
   3651	struct igt_live_test t;
   3652	int err = -ENOMEM;
   3653	u32 *cs;
   3654	int n;
   3655
   3656	smoke.contexts = kmalloc_array(smoke.ncontext,
   3657				       sizeof(*smoke.contexts),
   3658				       GFP_KERNEL);
   3659	if (!smoke.contexts)
   3660		return -ENOMEM;
   3661
   3662	smoke.batch =
   3663		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
   3664	if (IS_ERR(smoke.batch)) {
   3665		err = PTR_ERR(smoke.batch);
   3666		goto err_free;
   3667	}
   3668
   3669	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
   3670	if (IS_ERR(cs)) {
   3671		err = PTR_ERR(cs);
   3672		goto err_batch;
   3673	}
   3674	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
   3675		cs[n] = MI_ARB_CHECK;
   3676	cs[n] = MI_BATCH_BUFFER_END;
   3677	i915_gem_object_flush_map(smoke.batch);
   3678	i915_gem_object_unpin_map(smoke.batch);
   3679
   3680	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
   3681		err = -EIO;
   3682		goto err_batch;
   3683	}
   3684
   3685	for (n = 0; n < smoke.ncontext; n++) {
   3686		smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL);
   3687		if (!smoke.contexts[n])
   3688			goto err_ctx;
   3689	}
   3690
   3691	for (n = 0; n < ARRAY_SIZE(phase); n++) {
   3692		err = smoke_crescendo(&smoke, phase[n]);
   3693		if (err)
   3694			goto err_ctx;
   3695
   3696		err = smoke_random(&smoke, phase[n]);
   3697		if (err)
   3698			goto err_ctx;
   3699	}
   3700
   3701err_ctx:
   3702	if (igt_live_test_end(&t))
   3703		err = -EIO;
   3704
   3705	for (n = 0; n < smoke.ncontext; n++) {
   3706		if (!smoke.contexts[n])
   3707			break;
   3708		kernel_context_close(smoke.contexts[n]);
   3709	}
   3710
   3711err_batch:
   3712	i915_gem_object_put(smoke.batch);
   3713err_free:
   3714	kfree(smoke.contexts);
   3715
   3716	return err;
   3717}
   3718
   3719static int nop_virtual_engine(struct intel_gt *gt,
   3720			      struct intel_engine_cs **siblings,
   3721			      unsigned int nsibling,
   3722			      unsigned int nctx,
   3723			      unsigned int flags)
   3724#define CHAIN BIT(0)
   3725{
   3726	IGT_TIMEOUT(end_time);
   3727	struct i915_request *request[16] = {};
   3728	struct intel_context *ve[16];
   3729	unsigned long n, prime, nc;
   3730	struct igt_live_test t;
   3731	ktime_t times[2] = {};
   3732	int err;
   3733
   3734	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
   3735
   3736	for (n = 0; n < nctx; n++) {
   3737		ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
   3738		if (IS_ERR(ve[n])) {
   3739			err = PTR_ERR(ve[n]);
   3740			nctx = n;
   3741			goto out;
   3742		}
   3743
   3744		err = intel_context_pin(ve[n]);
   3745		if (err) {
   3746			intel_context_put(ve[n]);
   3747			nctx = n;
   3748			goto out;
   3749		}
   3750	}
   3751
   3752	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
   3753	if (err)
   3754		goto out;
   3755
   3756	for_each_prime_number_from(prime, 1, 8192) {
   3757		times[1] = ktime_get_raw();
   3758
   3759		if (flags & CHAIN) {
   3760			for (nc = 0; nc < nctx; nc++) {
   3761				for (n = 0; n < prime; n++) {
   3762					struct i915_request *rq;
   3763
   3764					rq = i915_request_create(ve[nc]);
   3765					if (IS_ERR(rq)) {
   3766						err = PTR_ERR(rq);
   3767						goto out;
   3768					}
   3769
   3770					if (request[nc])
   3771						i915_request_put(request[nc]);
   3772					request[nc] = i915_request_get(rq);
   3773					i915_request_add(rq);
   3774				}
   3775			}
   3776		} else {
   3777			for (n = 0; n < prime; n++) {
   3778				for (nc = 0; nc < nctx; nc++) {
   3779					struct i915_request *rq;
   3780
   3781					rq = i915_request_create(ve[nc]);
   3782					if (IS_ERR(rq)) {
   3783						err = PTR_ERR(rq);
   3784						goto out;
   3785					}
   3786
   3787					if (request[nc])
   3788						i915_request_put(request[nc]);
   3789					request[nc] = i915_request_get(rq);
   3790					i915_request_add(rq);
   3791				}
   3792			}
   3793		}
   3794
   3795		for (nc = 0; nc < nctx; nc++) {
   3796			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
   3797				pr_err("%s(%s): wait for %llx:%lld timed out\n",
   3798				       __func__, ve[0]->engine->name,
   3799				       request[nc]->fence.context,
   3800				       request[nc]->fence.seqno);
   3801
   3802				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
   3803					  __func__, ve[0]->engine->name,
   3804					  request[nc]->fence.context,
   3805					  request[nc]->fence.seqno);
   3806				GEM_TRACE_DUMP();
   3807				intel_gt_set_wedged(gt);
   3808				break;
   3809			}
   3810		}
   3811
   3812		times[1] = ktime_sub(ktime_get_raw(), times[1]);
   3813		if (prime == 1)
   3814			times[0] = times[1];
   3815
   3816		for (nc = 0; nc < nctx; nc++) {
   3817			i915_request_put(request[nc]);
   3818			request[nc] = NULL;
   3819		}
   3820
   3821		if (__igt_timeout(end_time, NULL))
   3822			break;
   3823	}
   3824
   3825	err = igt_live_test_end(&t);
   3826	if (err)
   3827		goto out;
   3828
   3829	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
   3830		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
   3831		prime, div64_u64(ktime_to_ns(times[1]), prime));
   3832
   3833out:
   3834	if (igt_flush_test(gt->i915))
   3835		err = -EIO;
   3836
   3837	for (nc = 0; nc < nctx; nc++) {
   3838		i915_request_put(request[nc]);
   3839		intel_context_unpin(ve[nc]);
   3840		intel_context_put(ve[nc]);
   3841	}
   3842	return err;
   3843}
   3844
   3845static unsigned int
   3846__select_siblings(struct intel_gt *gt,
   3847		  unsigned int class,
   3848		  struct intel_engine_cs **siblings,
   3849		  bool (*filter)(const struct intel_engine_cs *))
   3850{
   3851	unsigned int n = 0;
   3852	unsigned int inst;
   3853
   3854	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
   3855		if (!gt->engine_class[class][inst])
   3856			continue;
   3857
   3858		if (filter && !filter(gt->engine_class[class][inst]))
   3859			continue;
   3860
   3861		siblings[n++] = gt->engine_class[class][inst];
   3862	}
   3863
   3864	return n;
   3865}
   3866
   3867static unsigned int
   3868select_siblings(struct intel_gt *gt,
   3869		unsigned int class,
   3870		struct intel_engine_cs **siblings)
   3871{
   3872	return __select_siblings(gt, class, siblings, NULL);
   3873}
   3874
   3875static int live_virtual_engine(void *arg)
   3876{
   3877	struct intel_gt *gt = arg;
   3878	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
   3879	struct intel_engine_cs *engine;
   3880	enum intel_engine_id id;
   3881	unsigned int class;
   3882	int err;
   3883
   3884	if (intel_uc_uses_guc_submission(&gt->uc))
   3885		return 0;
   3886
   3887	for_each_engine(engine, gt, id) {
   3888		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
   3889		if (err) {
   3890			pr_err("Failed to wrap engine %s: err=%d\n",
   3891			       engine->name, err);
   3892			return err;
   3893		}
   3894	}
   3895
   3896	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
   3897		int nsibling, n;
   3898
   3899		nsibling = select_siblings(gt, class, siblings);
   3900		if (nsibling < 2)
   3901			continue;
   3902
   3903		for (n = 1; n <= nsibling + 1; n++) {
   3904			err = nop_virtual_engine(gt, siblings, nsibling,
   3905						 n, 0);
   3906			if (err)
   3907				return err;
   3908		}
   3909
   3910		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
   3911		if (err)
   3912			return err;
   3913	}
   3914
   3915	return 0;
   3916}
   3917
   3918static int mask_virtual_engine(struct intel_gt *gt,
   3919			       struct intel_engine_cs **siblings,
   3920			       unsigned int nsibling)
   3921{
   3922	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
   3923	struct intel_context *ve;
   3924	struct igt_live_test t;
   3925	unsigned int n;
   3926	int err;
   3927
   3928	/*
   3929	 * Check that by setting the execution mask on a request, we can
   3930	 * restrict it to our desired engine within the virtual engine.
   3931	 */
   3932
   3933	ve = intel_engine_create_virtual(siblings, nsibling, 0);
   3934	if (IS_ERR(ve)) {
   3935		err = PTR_ERR(ve);
   3936		goto out_close;
   3937	}
   3938
   3939	err = intel_context_pin(ve);
   3940	if (err)
   3941		goto out_put;
   3942
   3943	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
   3944	if (err)
   3945		goto out_unpin;
   3946
   3947	for (n = 0; n < nsibling; n++) {
   3948		request[n] = i915_request_create(ve);
   3949		if (IS_ERR(request[n])) {
   3950			err = PTR_ERR(request[n]);
   3951			nsibling = n;
   3952			goto out;
   3953		}
   3954
   3955		/* Reverse order as it's more likely to be unnatural */
   3956		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
   3957
   3958		i915_request_get(request[n]);
   3959		i915_request_add(request[n]);
   3960	}
   3961
   3962	for (n = 0; n < nsibling; n++) {
   3963		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
   3964			pr_err("%s(%s): wait for %llx:%lld timed out\n",
   3965			       __func__, ve->engine->name,
   3966			       request[n]->fence.context,
   3967			       request[n]->fence.seqno);
   3968
   3969			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
   3970				  __func__, ve->engine->name,
   3971				  request[n]->fence.context,
   3972				  request[n]->fence.seqno);
   3973			GEM_TRACE_DUMP();
   3974			intel_gt_set_wedged(gt);
   3975			err = -EIO;
   3976			goto out;
   3977		}
   3978
   3979		if (request[n]->engine != siblings[nsibling - n - 1]) {
   3980			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
   3981			       request[n]->engine->name,
   3982			       siblings[nsibling - n - 1]->name);
   3983			err = -EINVAL;
   3984			goto out;
   3985		}
   3986	}
   3987
   3988	err = igt_live_test_end(&t);
   3989out:
   3990	if (igt_flush_test(gt->i915))
   3991		err = -EIO;
   3992
   3993	for (n = 0; n < nsibling; n++)
   3994		i915_request_put(request[n]);
   3995
   3996out_unpin:
   3997	intel_context_unpin(ve);
   3998out_put:
   3999	intel_context_put(ve);
   4000out_close:
   4001	return err;
   4002}
   4003
   4004static int live_virtual_mask(void *arg)
   4005{
   4006	struct intel_gt *gt = arg;
   4007	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
   4008	unsigned int class;
   4009	int err;
   4010
   4011	if (intel_uc_uses_guc_submission(&gt->uc))
   4012		return 0;
   4013
   4014	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
   4015		unsigned int nsibling;
   4016
   4017		nsibling = select_siblings(gt, class, siblings);
   4018		if (nsibling < 2)
   4019			continue;
   4020
   4021		err = mask_virtual_engine(gt, siblings, nsibling);
   4022		if (err)
   4023			return err;
   4024	}
   4025
   4026	return 0;
   4027}
   4028
   4029static int slicein_virtual_engine(struct intel_gt *gt,
   4030				  struct intel_engine_cs **siblings,
   4031				  unsigned int nsibling)
   4032{
   4033	const long timeout = slice_timeout(siblings[0]);
   4034	struct intel_context *ce;
   4035	struct i915_request *rq;
   4036	struct igt_spinner spin;
   4037	unsigned int n;
   4038	int err = 0;
   4039
   4040	/*
   4041	 * Virtual requests must take part in timeslicing on the target engines.
   4042	 */
   4043
   4044	if (igt_spinner_init(&spin, gt))
   4045		return -ENOMEM;
   4046
   4047	for (n = 0; n < nsibling; n++) {
   4048		ce = intel_context_create(siblings[n]);
   4049		if (IS_ERR(ce)) {
   4050			err = PTR_ERR(ce);
   4051			goto out;
   4052		}
   4053
   4054		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
   4055		intel_context_put(ce);
   4056		if (IS_ERR(rq)) {
   4057			err = PTR_ERR(rq);
   4058			goto out;
   4059		}
   4060
   4061		i915_request_add(rq);
   4062	}
   4063
   4064	ce = intel_engine_create_virtual(siblings, nsibling, 0);
   4065	if (IS_ERR(ce)) {
   4066		err = PTR_ERR(ce);
   4067		goto out;
   4068	}
   4069
   4070	rq = intel_context_create_request(ce);
   4071	intel_context_put(ce);
   4072	if (IS_ERR(rq)) {
   4073		err = PTR_ERR(rq);
   4074		goto out;
   4075	}
   4076
   4077	i915_request_get(rq);
   4078	i915_request_add(rq);
   4079	if (i915_request_wait(rq, 0, timeout) < 0) {
   4080		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
   4081			      __func__, rq->engine->name);
   4082		GEM_TRACE_DUMP();
   4083		intel_gt_set_wedged(gt);
   4084		err = -EIO;
   4085	}
   4086	i915_request_put(rq);
   4087
   4088out:
   4089	igt_spinner_end(&spin);
   4090	if (igt_flush_test(gt->i915))
   4091		err = -EIO;
   4092	igt_spinner_fini(&spin);
   4093	return err;
   4094}
   4095
   4096static int sliceout_virtual_engine(struct intel_gt *gt,
   4097				   struct intel_engine_cs **siblings,
   4098				   unsigned int nsibling)
   4099{
   4100	const long timeout = slice_timeout(siblings[0]);
   4101	struct intel_context *ce;
   4102	struct i915_request *rq;
   4103	struct igt_spinner spin;
   4104	unsigned int n;
   4105	int err = 0;
   4106
   4107	/*
   4108	 * Virtual requests must allow others a fair timeslice.
   4109	 */
   4110
   4111	if (igt_spinner_init(&spin, gt))
   4112		return -ENOMEM;
   4113
   4114	/* XXX We do not handle oversubscription and fairness with normal rq */
   4115	for (n = 0; n < nsibling; n++) {
   4116		ce = intel_engine_create_virtual(siblings, nsibling, 0);
   4117		if (IS_ERR(ce)) {
   4118			err = PTR_ERR(ce);
   4119			goto out;
   4120		}
   4121
   4122		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
   4123		intel_context_put(ce);
   4124		if (IS_ERR(rq)) {
   4125			err = PTR_ERR(rq);
   4126			goto out;
   4127		}
   4128
   4129		i915_request_add(rq);
   4130	}
   4131
   4132	for (n = 0; !err && n < nsibling; n++) {
   4133		ce = intel_context_create(siblings[n]);
   4134		if (IS_ERR(ce)) {
   4135			err = PTR_ERR(ce);
   4136			goto out;
   4137		}
   4138
   4139		rq = intel_context_create_request(ce);
   4140		intel_context_put(ce);
   4141		if (IS_ERR(rq)) {
   4142			err = PTR_ERR(rq);
   4143			goto out;
   4144		}
   4145
   4146		i915_request_get(rq);
   4147		i915_request_add(rq);
   4148		if (i915_request_wait(rq, 0, timeout) < 0) {
   4149			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
   4150				      __func__, siblings[n]->name);
   4151			GEM_TRACE_DUMP();
   4152			intel_gt_set_wedged(gt);
   4153			err = -EIO;
   4154		}
   4155		i915_request_put(rq);
   4156	}
   4157
   4158out:
   4159	igt_spinner_end(&spin);
   4160	if (igt_flush_test(gt->i915))
   4161		err = -EIO;
   4162	igt_spinner_fini(&spin);
   4163	return err;
   4164}
   4165
   4166static int live_virtual_slice(void *arg)
   4167{
   4168	struct intel_gt *gt = arg;
   4169	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
   4170	unsigned int class;
   4171	int err;
   4172
   4173	if (intel_uc_uses_guc_submission(&gt->uc))
   4174		return 0;
   4175
   4176	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
   4177		unsigned int nsibling;
   4178
   4179		nsibling = __select_siblings(gt, class, siblings,
   4180					     intel_engine_has_timeslices);
   4181		if (nsibling < 2)
   4182			continue;
   4183
   4184		err = slicein_virtual_engine(gt, siblings, nsibling);
   4185		if (err)
   4186			return err;
   4187
   4188		err = sliceout_virtual_engine(gt, siblings, nsibling);
   4189		if (err)
   4190			return err;
   4191	}
   4192
   4193	return 0;
   4194}
   4195
   4196static int preserved_virtual_engine(struct intel_gt *gt,
   4197				    struct intel_engine_cs **siblings,
   4198				    unsigned int nsibling)
   4199{
   4200	struct i915_request *last = NULL;
   4201	struct intel_context *ve;
   4202	struct i915_vma *scratch;
   4203	struct igt_live_test t;
   4204	unsigned int n;
   4205	int err = 0;
   4206	u32 *cs;
   4207
   4208	scratch =
   4209		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
   4210						    PAGE_SIZE);
   4211	if (IS_ERR(scratch))
   4212		return PTR_ERR(scratch);
   4213
   4214	err = i915_vma_sync(scratch);
   4215	if (err)
   4216		goto out_scratch;
   4217
   4218	ve = intel_engine_create_virtual(siblings, nsibling, 0);
   4219	if (IS_ERR(ve)) {
   4220		err = PTR_ERR(ve);
   4221		goto out_scratch;
   4222	}
   4223
   4224	err = intel_context_pin(ve);
   4225	if (err)
   4226		goto out_put;
   4227
   4228	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
   4229	if (err)
   4230		goto out_unpin;
   4231
   4232	for (n = 0; n < NUM_GPR_DW; n++) {
   4233		struct intel_engine_cs *engine = siblings[n % nsibling];
   4234		struct i915_request *rq;
   4235
   4236		rq = i915_request_create(ve);
   4237		if (IS_ERR(rq)) {
   4238			err = PTR_ERR(rq);
   4239			goto out_end;
   4240		}
   4241
   4242		i915_request_put(last);
   4243		last = i915_request_get(rq);
   4244
   4245		cs = intel_ring_begin(rq, 8);
   4246		if (IS_ERR(cs)) {
   4247			i915_request_add(rq);
   4248			err = PTR_ERR(cs);
   4249			goto out_end;
   4250		}
   4251
   4252		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
   4253		*cs++ = CS_GPR(engine, n);
   4254		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
   4255		*cs++ = 0;
   4256
   4257		*cs++ = MI_LOAD_REGISTER_IMM(1);
   4258		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
   4259		*cs++ = n + 1;
   4260
   4261		*cs++ = MI_NOOP;
   4262		intel_ring_advance(rq, cs);
   4263
   4264		/* Restrict this request to run on a particular engine */
   4265		rq->execution_mask = engine->mask;
   4266		i915_request_add(rq);
   4267	}
   4268
   4269	if (i915_request_wait(last, 0, HZ / 5) < 0) {
   4270		err = -ETIME;
   4271		goto out_end;
   4272	}
   4273
   4274	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
   4275	if (IS_ERR(cs)) {
   4276		err = PTR_ERR(cs);
   4277		goto out_end;
   4278	}
   4279
   4280	for (n = 0; n < NUM_GPR_DW; n++) {
   4281		if (cs[n] != n) {
   4282			pr_err("Incorrect value[%d] found for GPR[%d]\n",
   4283			       cs[n], n);
   4284			err = -EINVAL;
   4285			break;
   4286		}
   4287	}
   4288
   4289	i915_gem_object_unpin_map(scratch->obj);
   4290
   4291out_end:
   4292	if (igt_live_test_end(&t))
   4293		err = -EIO;
   4294	i915_request_put(last);
   4295out_unpin:
   4296	intel_context_unpin(ve);
   4297out_put:
   4298	intel_context_put(ve);
   4299out_scratch:
   4300	i915_vma_unpin_and_release(&scratch, 0);
   4301	return err;
   4302}
   4303
   4304static int live_virtual_preserved(void *arg)
   4305{
   4306	struct intel_gt *gt = arg;
   4307	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
   4308	unsigned int class;
   4309
   4310	/*
   4311	 * Check that the context image retains non-privileged (user) registers
   4312	 * from one engine to the next. For this we check that the CS_GPR
   4313	 * are preserved.
   4314	 */
   4315
   4316	if (intel_uc_uses_guc_submission(&gt->uc))
   4317		return 0;
   4318
   4319	/* As we use CS_GPR we cannot run before they existed on all engines. */
   4320	if (GRAPHICS_VER(gt->i915) < 9)
   4321		return 0;
   4322
   4323	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
   4324		int nsibling, err;
   4325
   4326		nsibling = select_siblings(gt, class, siblings);
   4327		if (nsibling < 2)
   4328			continue;
   4329
   4330		err = preserved_virtual_engine(gt, siblings, nsibling);
   4331		if (err)
   4332			return err;
   4333	}
   4334
   4335	return 0;
   4336}
   4337
   4338static int reset_virtual_engine(struct intel_gt *gt,
   4339				struct intel_engine_cs **siblings,
   4340				unsigned int nsibling)
   4341{
   4342	struct intel_engine_cs *engine;
   4343	struct intel_context *ve;
   4344	struct igt_spinner spin;
   4345	struct i915_request *rq;
   4346	unsigned int n;
   4347	int err = 0;
   4348
   4349	/*
   4350	 * In order to support offline error capture for fast preempt reset,
   4351	 * we need to decouple the guilty request and ensure that it and its
   4352	 * descendents are not executed while the capture is in progress.
   4353	 */
   4354
   4355	if (igt_spinner_init(&spin, gt))
   4356		return -ENOMEM;
   4357
   4358	ve = intel_engine_create_virtual(siblings, nsibling, 0);
   4359	if (IS_ERR(ve)) {
   4360		err = PTR_ERR(ve);
   4361		goto out_spin;
   4362	}
   4363
   4364	for (n = 0; n < nsibling; n++)
   4365		st_engine_heartbeat_disable(siblings[n]);
   4366
   4367	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
   4368	if (IS_ERR(rq)) {
   4369		err = PTR_ERR(rq);
   4370		goto out_heartbeat;
   4371	}
   4372	i915_request_add(rq);
   4373
   4374	if (!igt_wait_for_spinner(&spin, rq)) {
   4375		intel_gt_set_wedged(gt);
   4376		err = -ETIME;
   4377		goto out_heartbeat;
   4378	}
   4379
   4380	engine = rq->engine;
   4381	GEM_BUG_ON(engine == ve->engine);
   4382
   4383	/* Take ownership of the reset and tasklet */
   4384	err = engine_lock_reset_tasklet(engine);
   4385	if (err)
   4386		goto out_heartbeat;
   4387
   4388	engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
   4389	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
   4390
   4391	/* Fake a preemption event; failed of course */
   4392	spin_lock_irq(&engine->sched_engine->lock);
   4393	__unwind_incomplete_requests(engine);
   4394	spin_unlock_irq(&engine->sched_engine->lock);
   4395	GEM_BUG_ON(rq->engine != engine);
   4396
   4397	/* Reset the engine while keeping our active request on hold */
   4398	execlists_hold(engine, rq);
   4399	GEM_BUG_ON(!i915_request_on_hold(rq));
   4400
   4401	__intel_engine_reset_bh(engine, NULL);
   4402	GEM_BUG_ON(rq->fence.error != -EIO);
   4403
   4404	/* Release our grasp on the engine, letting CS flow again */
   4405	engine_unlock_reset_tasklet(engine);
   4406
   4407	/* Check that we do not resubmit the held request */
   4408	i915_request_get(rq);
   4409	if (!i915_request_wait(rq, 0, HZ / 5)) {
   4410		pr_err("%s: on hold request completed!\n",
   4411		       engine->name);
   4412		intel_gt_set_wedged(gt);
   4413		err = -EIO;
   4414		goto out_rq;
   4415	}
   4416	GEM_BUG_ON(!i915_request_on_hold(rq));
   4417
   4418	/* But is resubmitted on release */
   4419	execlists_unhold(engine, rq);
   4420	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
   4421		pr_err("%s: held request did not complete!\n",
   4422		       engine->name);
   4423		intel_gt_set_wedged(gt);
   4424		err = -ETIME;
   4425	}
   4426
   4427out_rq:
   4428	i915_request_put(rq);
   4429out_heartbeat:
   4430	for (n = 0; n < nsibling; n++)
   4431		st_engine_heartbeat_enable(siblings[n]);
   4432
   4433	intel_context_put(ve);
   4434out_spin:
   4435	igt_spinner_fini(&spin);
   4436	return err;
   4437}
   4438
   4439static int live_virtual_reset(void *arg)
   4440{
   4441	struct intel_gt *gt = arg;
   4442	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
   4443	unsigned int class;
   4444
   4445	/*
   4446	 * Check that we handle a reset event within a virtual engine.
   4447	 * Only the physical engine is reset, but we have to check the flow
   4448	 * of the virtual requests around the reset, and make sure it is not
   4449	 * forgotten.
   4450	 */
   4451
   4452	if (intel_uc_uses_guc_submission(&gt->uc))
   4453		return 0;
   4454
   4455	if (!intel_has_reset_engine(gt))
   4456		return 0;
   4457
   4458	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
   4459		int nsibling, err;
   4460
   4461		nsibling = select_siblings(gt, class, siblings);
   4462		if (nsibling < 2)
   4463			continue;
   4464
   4465		err = reset_virtual_engine(gt, siblings, nsibling);
   4466		if (err)
   4467			return err;
   4468	}
   4469
   4470	return 0;
   4471}
   4472
   4473int intel_execlists_live_selftests(struct drm_i915_private *i915)
   4474{
   4475	static const struct i915_subtest tests[] = {
   4476		SUBTEST(live_sanitycheck),
   4477		SUBTEST(live_unlite_switch),
   4478		SUBTEST(live_unlite_preempt),
   4479		SUBTEST(live_unlite_ring),
   4480		SUBTEST(live_pin_rewind),
   4481		SUBTEST(live_hold_reset),
   4482		SUBTEST(live_error_interrupt),
   4483		SUBTEST(live_timeslice_preempt),
   4484		SUBTEST(live_timeslice_rewind),
   4485		SUBTEST(live_timeslice_queue),
   4486		SUBTEST(live_timeslice_nopreempt),
   4487		SUBTEST(live_busywait_preempt),
   4488		SUBTEST(live_preempt),
   4489		SUBTEST(live_late_preempt),
   4490		SUBTEST(live_nopreempt),
   4491		SUBTEST(live_preempt_cancel),
   4492		SUBTEST(live_suppress_self_preempt),
   4493		SUBTEST(live_chain_preempt),
   4494		SUBTEST(live_preempt_ring),
   4495		SUBTEST(live_preempt_gang),
   4496		SUBTEST(live_preempt_timeout),
   4497		SUBTEST(live_preempt_user),
   4498		SUBTEST(live_preempt_smoke),
   4499		SUBTEST(live_virtual_engine),
   4500		SUBTEST(live_virtual_mask),
   4501		SUBTEST(live_virtual_preserved),
   4502		SUBTEST(live_virtual_slice),
   4503		SUBTEST(live_virtual_reset),
   4504	};
   4505
   4506	if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
   4507		return 0;
   4508
   4509	if (intel_gt_is_wedged(to_gt(i915)))
   4510		return 0;
   4511
   4512	return intel_gt_live_subtests(tests, to_gt(i915));
   4513}