cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_ring_submission.c (37257B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2008-2021 Intel Corporation
      4 */
      5
      6#include <drm/drm_cache.h>
      7
      8#include "gem/i915_gem_internal.h"
      9
     10#include "gen2_engine_cs.h"
     11#include "gen6_engine_cs.h"
     12#include "gen6_ppgtt.h"
     13#include "gen7_renderclear.h"
     14#include "i915_drv.h"
     15#include "i915_mitigations.h"
     16#include "intel_breadcrumbs.h"
     17#include "intel_context.h"
     18#include "intel_engine_regs.h"
     19#include "intel_gt.h"
     20#include "intel_gt_irq.h"
     21#include "intel_gt_regs.h"
     22#include "intel_reset.h"
     23#include "intel_ring.h"
     24#include "shmem_utils.h"
     25#include "intel_engine_heartbeat.h"
     26#include "intel_engine_pm.h"
     27
     28/* Rough estimate of the typical request size, performing a flush,
     29 * set-context and then emitting the batch.
     30 */
     31#define LEGACY_REQUEST_SIZE 200
     32
     33static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
     34{
     35	/*
     36	 * Keep the render interrupt unmasked as this papers over
     37	 * lost interrupts following a reset.
     38	 */
     39	if (engine->class == RENDER_CLASS) {
     40		if (GRAPHICS_VER(engine->i915) >= 6)
     41			mask &= ~BIT(0);
     42		else
     43			mask &= ~I915_USER_INTERRUPT;
     44	}
     45
     46	intel_engine_set_hwsp_writemask(engine, mask);
     47}
     48
     49static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
     50{
     51	u32 addr;
     52
     53	addr = lower_32_bits(phys);
     54	if (GRAPHICS_VER(engine->i915) >= 4)
     55		addr |= (phys >> 28) & 0xf0;
     56
     57	intel_uncore_write(engine->uncore, HWS_PGA, addr);
     58}
     59
     60static struct page *status_page(struct intel_engine_cs *engine)
     61{
     62	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
     63
     64	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
     65	return sg_page(obj->mm.pages->sgl);
     66}
     67
     68static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
     69{
     70	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
     71	set_hwstam(engine, ~0u);
     72}
     73
     74static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
     75{
     76	i915_reg_t hwsp;
     77
     78	/*
     79	 * The ring status page addresses are no longer next to the rest of
     80	 * the ring registers as of gen7.
     81	 */
     82	if (GRAPHICS_VER(engine->i915) == 7) {
     83		switch (engine->id) {
     84		/*
     85		 * No more rings exist on Gen7. Default case is only to shut up
     86		 * gcc switch check warning.
     87		 */
     88		default:
     89			GEM_BUG_ON(engine->id);
     90			fallthrough;
     91		case RCS0:
     92			hwsp = RENDER_HWS_PGA_GEN7;
     93			break;
     94		case BCS0:
     95			hwsp = BLT_HWS_PGA_GEN7;
     96			break;
     97		case VCS0:
     98			hwsp = BSD_HWS_PGA_GEN7;
     99			break;
    100		case VECS0:
    101			hwsp = VEBOX_HWS_PGA_GEN7;
    102			break;
    103		}
    104	} else if (GRAPHICS_VER(engine->i915) == 6) {
    105		hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
    106	} else {
    107		hwsp = RING_HWS_PGA(engine->mmio_base);
    108	}
    109
    110	intel_uncore_write_fw(engine->uncore, hwsp, offset);
    111	intel_uncore_posting_read_fw(engine->uncore, hwsp);
    112}
    113
    114static void flush_cs_tlb(struct intel_engine_cs *engine)
    115{
    116	if (!IS_GRAPHICS_VER(engine->i915, 6, 7))
    117		return;
    118
    119	/* ring should be idle before issuing a sync flush*/
    120	GEM_DEBUG_WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
    121
    122	ENGINE_WRITE_FW(engine, RING_INSTPM,
    123			_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
    124					   INSTPM_SYNC_FLUSH));
    125	if (__intel_wait_for_register_fw(engine->uncore,
    126					 RING_INSTPM(engine->mmio_base),
    127					 INSTPM_SYNC_FLUSH, 0,
    128					 2000, 0, NULL))
    129		ENGINE_TRACE(engine,
    130			     "wait for SyncFlush to complete for TLB invalidation timed out\n");
    131}
    132
    133static void ring_setup_status_page(struct intel_engine_cs *engine)
    134{
    135	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
    136	set_hwstam(engine, ~0u);
    137
    138	flush_cs_tlb(engine);
    139}
    140
    141static struct i915_address_space *vm_alias(struct i915_address_space *vm)
    142{
    143	if (i915_is_ggtt(vm))
    144		vm = &i915_vm_to_ggtt(vm)->alias->vm;
    145
    146	return vm;
    147}
    148
    149static u32 pp_dir(struct i915_address_space *vm)
    150{
    151	return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
    152}
    153
    154static void set_pp_dir(struct intel_engine_cs *engine)
    155{
    156	struct i915_address_space *vm = vm_alias(engine->gt->vm);
    157
    158	if (!vm)
    159		return;
    160
    161	ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
    162	ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm));
    163
    164	if (GRAPHICS_VER(engine->i915) >= 7) {
    165		ENGINE_WRITE_FW(engine,
    166				RING_MODE_GEN7,
    167				_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
    168	}
    169}
    170
    171static bool stop_ring(struct intel_engine_cs *engine)
    172{
    173	/* Empty the ring by skipping to the end */
    174	ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL));
    175	ENGINE_POSTING_READ(engine, RING_HEAD);
    176
    177	/* The ring must be empty before it is disabled */
    178	ENGINE_WRITE_FW(engine, RING_CTL, 0);
    179	ENGINE_POSTING_READ(engine, RING_CTL);
    180
    181	/* Then reset the disabled ring */
    182	ENGINE_WRITE_FW(engine, RING_HEAD, 0);
    183	ENGINE_WRITE_FW(engine, RING_TAIL, 0);
    184
    185	return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0;
    186}
    187
    188static int xcs_resume(struct intel_engine_cs *engine)
    189{
    190	struct intel_ring *ring = engine->legacy.ring;
    191
    192	ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
    193		     ring->head, ring->tail);
    194
    195	/*
    196	 * Double check the ring is empty & disabled before we resume. Called
    197	 * from atomic context during PCI probe, so _hardirq().
    198	 */
    199	intel_synchronize_hardirq(engine->i915);
    200	if (!stop_ring(engine))
    201		goto err;
    202
    203	if (HWS_NEEDS_PHYSICAL(engine->i915))
    204		ring_setup_phys_status_page(engine);
    205	else
    206		ring_setup_status_page(engine);
    207
    208	intel_breadcrumbs_reset(engine->breadcrumbs);
    209
    210	/* Enforce ordering by reading HEAD register back */
    211	ENGINE_POSTING_READ(engine, RING_HEAD);
    212
    213	/*
    214	 * Initialize the ring. This must happen _after_ we've cleared the ring
    215	 * registers with the above sequence (the readback of the HEAD registers
    216	 * also enforces ordering), otherwise the hw might lose the new ring
    217	 * register values.
    218	 */
    219	ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma));
    220
    221	/* Check that the ring offsets point within the ring! */
    222	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
    223	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
    224	intel_ring_update_space(ring);
    225
    226	set_pp_dir(engine);
    227
    228	/* First wake the ring up to an empty/idle ring */
    229	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
    230	ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
    231	ENGINE_POSTING_READ(engine, RING_TAIL);
    232
    233	ENGINE_WRITE_FW(engine, RING_CTL,
    234			RING_CTL_SIZE(ring->size) | RING_VALID);
    235
    236	/* If the head is still not zero, the ring is dead */
    237	if (__intel_wait_for_register_fw(engine->uncore,
    238					 RING_CTL(engine->mmio_base),
    239					 RING_VALID, RING_VALID,
    240					 5000, 0, NULL))
    241		goto err;
    242
    243	if (GRAPHICS_VER(engine->i915) > 2)
    244		ENGINE_WRITE_FW(engine,
    245				RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
    246
    247	/* Now awake, let it get started */
    248	if (ring->tail != ring->head) {
    249		ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail);
    250		ENGINE_POSTING_READ(engine, RING_TAIL);
    251	}
    252
    253	/* Papering over lost _interrupts_ immediately following the restart */
    254	intel_engine_signal_breadcrumbs(engine);
    255	return 0;
    256
    257err:
    258	drm_err(&engine->i915->drm,
    259		"%s initialization failed; "
    260		"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
    261		engine->name,
    262		ENGINE_READ(engine, RING_CTL),
    263		ENGINE_READ(engine, RING_CTL) & RING_VALID,
    264		ENGINE_READ(engine, RING_HEAD), ring->head,
    265		ENGINE_READ(engine, RING_TAIL), ring->tail,
    266		ENGINE_READ(engine, RING_START),
    267		i915_ggtt_offset(ring->vma));
    268	return -EIO;
    269}
    270
    271static void sanitize_hwsp(struct intel_engine_cs *engine)
    272{
    273	struct intel_timeline *tl;
    274
    275	list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
    276		intel_timeline_reset_seqno(tl);
    277}
    278
    279static void xcs_sanitize(struct intel_engine_cs *engine)
    280{
    281	/*
    282	 * Poison residual state on resume, in case the suspend didn't!
    283	 *
    284	 * We have to assume that across suspend/resume (or other loss
    285	 * of control) that the contents of our pinned buffers has been
    286	 * lost, replaced by garbage. Since this doesn't always happen,
    287	 * let's poison such state so that we more quickly spot when
    288	 * we falsely assume it has been preserved.
    289	 */
    290	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
    291		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
    292
    293	/*
    294	 * The kernel_context HWSP is stored in the status_page. As above,
    295	 * that may be lost on resume/initialisation, and so we need to
    296	 * reset the value in the HWSP.
    297	 */
    298	sanitize_hwsp(engine);
    299
    300	/* And scrub the dirty cachelines for the HWSP */
    301	drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
    302
    303	intel_engine_reset_pinned_contexts(engine);
    304}
    305
    306static void reset_prepare(struct intel_engine_cs *engine)
    307{
    308	/*
    309	 * We stop engines, otherwise we might get failed reset and a
    310	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
    311	 * from system hang if batchbuffer is progressing when
    312	 * the reset is issued, regardless of READY_TO_RESET ack.
    313	 * Thus assume it is best to stop engines on all gens
    314	 * where we have a gpu reset.
    315	 *
    316	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
    317	 *
    318	 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
    319	 * WaClearRingBufHeadRegAtInit:ctg,elk
    320	 *
    321	 * FIXME: Wa for more modern gens needs to be validated
    322	 */
    323	ENGINE_TRACE(engine, "\n");
    324	intel_engine_stop_cs(engine);
    325
    326	if (!stop_ring(engine)) {
    327		/* G45 ring initialization often fails to reset head to zero */
    328		ENGINE_TRACE(engine,
    329			     "HEAD not reset to zero, "
    330			     "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n",
    331			     ENGINE_READ_FW(engine, RING_CTL),
    332			     ENGINE_READ_FW(engine, RING_HEAD),
    333			     ENGINE_READ_FW(engine, RING_TAIL),
    334			     ENGINE_READ_FW(engine, RING_START));
    335		if (!stop_ring(engine)) {
    336			drm_err(&engine->i915->drm,
    337				"failed to set %s head to zero "
    338				"ctl %08x head %08x tail %08x start %08x\n",
    339				engine->name,
    340				ENGINE_READ_FW(engine, RING_CTL),
    341				ENGINE_READ_FW(engine, RING_HEAD),
    342				ENGINE_READ_FW(engine, RING_TAIL),
    343				ENGINE_READ_FW(engine, RING_START));
    344		}
    345	}
    346}
    347
    348static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
    349{
    350	struct i915_request *pos, *rq;
    351	unsigned long flags;
    352	u32 head;
    353
    354	rq = NULL;
    355	spin_lock_irqsave(&engine->sched_engine->lock, flags);
    356	rcu_read_lock();
    357	list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) {
    358		if (!__i915_request_is_complete(pos)) {
    359			rq = pos;
    360			break;
    361		}
    362	}
    363	rcu_read_unlock();
    364
    365	/*
    366	 * The guilty request will get skipped on a hung engine.
    367	 *
    368	 * Users of client default contexts do not rely on logical
    369	 * state preserved between batches so it is safe to execute
    370	 * queued requests following the hang. Non default contexts
    371	 * rely on preserved state, so skipping a batch loses the
    372	 * evolution of the state and it needs to be considered corrupted.
    373	 * Executing more queued batches on top of corrupted state is
    374	 * risky. But we take the risk by trying to advance through
    375	 * the queued requests in order to make the client behaviour
    376	 * more predictable around resets, by not throwing away random
    377	 * amount of batches it has prepared for execution. Sophisticated
    378	 * clients can use gem_reset_stats_ioctl and dma fence status
    379	 * (exported via sync_file info ioctl on explicit fences) to observe
    380	 * when it loses the context state and should rebuild accordingly.
    381	 *
    382	 * The context ban, and ultimately the client ban, mechanism are safety
    383	 * valves if client submission ends up resulting in nothing more than
    384	 * subsequent hangs.
    385	 */
    386
    387	if (rq) {
    388		/*
    389		 * Try to restore the logical GPU state to match the
    390		 * continuation of the request queue. If we skip the
    391		 * context/PD restore, then the next request may try to execute
    392		 * assuming that its context is valid and loaded on the GPU and
    393		 * so may try to access invalid memory, prompting repeated GPU
    394		 * hangs.
    395		 *
    396		 * If the request was guilty, we still restore the logical
    397		 * state in case the next request requires it (e.g. the
    398		 * aliasing ppgtt), but skip over the hung batch.
    399		 *
    400		 * If the request was innocent, we try to replay the request
    401		 * with the restored context.
    402		 */
    403		__i915_request_reset(rq, stalled);
    404
    405		GEM_BUG_ON(rq->ring != engine->legacy.ring);
    406		head = rq->head;
    407	} else {
    408		head = engine->legacy.ring->tail;
    409	}
    410	engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
    411
    412	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
    413}
    414
    415static void reset_finish(struct intel_engine_cs *engine)
    416{
    417}
    418
    419static void reset_cancel(struct intel_engine_cs *engine)
    420{
    421	struct i915_request *request;
    422	unsigned long flags;
    423
    424	spin_lock_irqsave(&engine->sched_engine->lock, flags);
    425
    426	/* Mark all submitted requests as skipped. */
    427	list_for_each_entry(request, &engine->sched_engine->requests, sched.link)
    428		i915_request_put(i915_request_mark_eio(request));
    429	intel_engine_signal_breadcrumbs(engine);
    430
    431	/* Remaining _unready_ requests will be nop'ed when submitted */
    432
    433	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
    434}
    435
    436static void i9xx_submit_request(struct i915_request *request)
    437{
    438	i915_request_submit(request);
    439	wmb(); /* paranoid flush writes out of the WCB before mmio */
    440
    441	ENGINE_WRITE(request->engine, RING_TAIL,
    442		     intel_ring_set_tail(request->ring, request->tail));
    443}
    444
    445static void __ring_context_fini(struct intel_context *ce)
    446{
    447	i915_vma_put(ce->state);
    448}
    449
    450static void ring_context_destroy(struct kref *ref)
    451{
    452	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
    453
    454	GEM_BUG_ON(intel_context_is_pinned(ce));
    455
    456	if (ce->state)
    457		__ring_context_fini(ce);
    458
    459	intel_context_fini(ce);
    460	intel_context_free(ce);
    461}
    462
    463static int ring_context_init_default_state(struct intel_context *ce,
    464					   struct i915_gem_ww_ctx *ww)
    465{
    466	struct drm_i915_gem_object *obj = ce->state->obj;
    467	void *vaddr;
    468
    469	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
    470	if (IS_ERR(vaddr))
    471		return PTR_ERR(vaddr);
    472
    473	shmem_read(ce->engine->default_state, 0,
    474		   vaddr, ce->engine->context_size);
    475
    476	i915_gem_object_flush_map(obj);
    477	__i915_gem_object_release_map(obj);
    478
    479	__set_bit(CONTEXT_VALID_BIT, &ce->flags);
    480	return 0;
    481}
    482
    483static int ring_context_pre_pin(struct intel_context *ce,
    484				struct i915_gem_ww_ctx *ww,
    485				void **unused)
    486{
    487	struct i915_address_space *vm;
    488	int err = 0;
    489
    490	if (ce->engine->default_state &&
    491	    !test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
    492		err = ring_context_init_default_state(ce, ww);
    493		if (err)
    494			return err;
    495	}
    496
    497	vm = vm_alias(ce->vm);
    498	if (vm)
    499		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
    500
    501	return err;
    502}
    503
    504static void __context_unpin_ppgtt(struct intel_context *ce)
    505{
    506	struct i915_address_space *vm;
    507
    508	vm = vm_alias(ce->vm);
    509	if (vm)
    510		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
    511}
    512
    513static void ring_context_unpin(struct intel_context *ce)
    514{
    515}
    516
    517static void ring_context_post_unpin(struct intel_context *ce)
    518{
    519	__context_unpin_ppgtt(ce);
    520}
    521
    522static struct i915_vma *
    523alloc_context_vma(struct intel_engine_cs *engine)
    524{
    525	struct drm_i915_private *i915 = engine->i915;
    526	struct drm_i915_gem_object *obj;
    527	struct i915_vma *vma;
    528	int err;
    529
    530	obj = i915_gem_object_create_shmem(i915, engine->context_size);
    531	if (IS_ERR(obj))
    532		return ERR_CAST(obj);
    533
    534	/*
    535	 * Try to make the context utilize L3 as well as LLC.
    536	 *
    537	 * On VLV we don't have L3 controls in the PTEs so we
    538	 * shouldn't touch the cache level, especially as that
    539	 * would make the object snooped which might have a
    540	 * negative performance impact.
    541	 *
    542	 * Snooping is required on non-llc platforms in execlist
    543	 * mode, but since all GGTT accesses use PAT entry 0 we
    544	 * get snooping anyway regardless of cache_level.
    545	 *
    546	 * This is only applicable for Ivy Bridge devices since
    547	 * later platforms don't have L3 control bits in the PTE.
    548	 */
    549	if (IS_IVYBRIDGE(i915))
    550		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
    551
    552	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
    553	if (IS_ERR(vma)) {
    554		err = PTR_ERR(vma);
    555		goto err_obj;
    556	}
    557
    558	return vma;
    559
    560err_obj:
    561	i915_gem_object_put(obj);
    562	return ERR_PTR(err);
    563}
    564
    565static int ring_context_alloc(struct intel_context *ce)
    566{
    567	struct intel_engine_cs *engine = ce->engine;
    568
    569	/* One ringbuffer to rule them all */
    570	GEM_BUG_ON(!engine->legacy.ring);
    571	ce->ring = engine->legacy.ring;
    572	ce->timeline = intel_timeline_get(engine->legacy.timeline);
    573
    574	GEM_BUG_ON(ce->state);
    575	if (engine->context_size) {
    576		struct i915_vma *vma;
    577
    578		vma = alloc_context_vma(engine);
    579		if (IS_ERR(vma))
    580			return PTR_ERR(vma);
    581
    582		ce->state = vma;
    583	}
    584
    585	return 0;
    586}
    587
    588static int ring_context_pin(struct intel_context *ce, void *unused)
    589{
    590	return 0;
    591}
    592
    593static void ring_context_reset(struct intel_context *ce)
    594{
    595	intel_ring_reset(ce->ring, ce->ring->emit);
    596	clear_bit(CONTEXT_VALID_BIT, &ce->flags);
    597}
    598
    599static void ring_context_ban(struct intel_context *ce,
    600			     struct i915_request *rq)
    601{
    602	struct intel_engine_cs *engine;
    603
    604	if (!rq || !i915_request_is_active(rq))
    605		return;
    606
    607	engine = rq->engine;
    608	lockdep_assert_held(&engine->sched_engine->lock);
    609	list_for_each_entry_continue(rq, &engine->sched_engine->requests,
    610				     sched.link)
    611		if (rq->context == ce) {
    612			i915_request_set_error_once(rq, -EIO);
    613			__i915_request_skip(rq);
    614		}
    615}
    616
    617static void ring_context_cancel_request(struct intel_context *ce,
    618					struct i915_request *rq)
    619{
    620	struct intel_engine_cs *engine = NULL;
    621
    622	i915_request_active_engine(rq, &engine);
    623
    624	if (engine && intel_engine_pulse(engine))
    625		intel_gt_handle_error(engine->gt, engine->mask, 0,
    626				      "request cancellation by %s",
    627				      current->comm);
    628}
    629
    630static const struct intel_context_ops ring_context_ops = {
    631	.alloc = ring_context_alloc,
    632
    633	.cancel_request = ring_context_cancel_request,
    634
    635	.ban = ring_context_ban,
    636
    637	.pre_pin = ring_context_pre_pin,
    638	.pin = ring_context_pin,
    639	.unpin = ring_context_unpin,
    640	.post_unpin = ring_context_post_unpin,
    641
    642	.enter = intel_context_enter_engine,
    643	.exit = intel_context_exit_engine,
    644
    645	.reset = ring_context_reset,
    646	.destroy = ring_context_destroy,
    647};
    648
    649static int load_pd_dir(struct i915_request *rq,
    650		       struct i915_address_space *vm,
    651		       u32 valid)
    652{
    653	const struct intel_engine_cs * const engine = rq->engine;
    654	u32 *cs;
    655
    656	cs = intel_ring_begin(rq, 12);
    657	if (IS_ERR(cs))
    658		return PTR_ERR(cs);
    659
    660	*cs++ = MI_LOAD_REGISTER_IMM(1);
    661	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
    662	*cs++ = valid;
    663
    664	*cs++ = MI_LOAD_REGISTER_IMM(1);
    665	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
    666	*cs++ = pp_dir(vm);
    667
    668	/* Stall until the page table load is complete? */
    669	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
    670	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
    671	*cs++ = intel_gt_scratch_offset(engine->gt,
    672					INTEL_GT_SCRATCH_FIELD_DEFAULT);
    673
    674	*cs++ = MI_LOAD_REGISTER_IMM(1);
    675	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
    676	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
    677
    678	intel_ring_advance(rq, cs);
    679
    680	return rq->engine->emit_flush(rq, EMIT_FLUSH);
    681}
    682
    683static int mi_set_context(struct i915_request *rq,
    684			  struct intel_context *ce,
    685			  u32 flags)
    686{
    687	struct intel_engine_cs *engine = rq->engine;
    688	struct drm_i915_private *i915 = engine->i915;
    689	enum intel_engine_id id;
    690	const int num_engines =
    691		IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0;
    692	bool force_restore = false;
    693	int len;
    694	u32 *cs;
    695
    696	len = 4;
    697	if (GRAPHICS_VER(i915) == 7)
    698		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
    699	else if (GRAPHICS_VER(i915) == 5)
    700		len += 2;
    701	if (flags & MI_FORCE_RESTORE) {
    702		GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
    703		flags &= ~MI_FORCE_RESTORE;
    704		force_restore = true;
    705		len += 2;
    706	}
    707
    708	cs = intel_ring_begin(rq, len);
    709	if (IS_ERR(cs))
    710		return PTR_ERR(cs);
    711
    712	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
    713	if (GRAPHICS_VER(i915) == 7) {
    714		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
    715		if (num_engines) {
    716			struct intel_engine_cs *signaller;
    717
    718			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
    719			for_each_engine(signaller, engine->gt, id) {
    720				if (signaller == engine)
    721					continue;
    722
    723				*cs++ = i915_mmio_reg_offset(
    724					   RING_PSMI_CTL(signaller->mmio_base));
    725				*cs++ = _MASKED_BIT_ENABLE(
    726						GEN6_PSMI_SLEEP_MSG_DISABLE);
    727			}
    728		}
    729	} else if (GRAPHICS_VER(i915) == 5) {
    730		/*
    731		 * This w/a is only listed for pre-production ilk a/b steppings,
    732		 * but is also mentioned for programming the powerctx. To be
    733		 * safe, just apply the workaround; we do not use SyncFlush so
    734		 * this should never take effect and so be a no-op!
    735		 */
    736		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
    737	}
    738
    739	if (force_restore) {
    740		/*
    741		 * The HW doesn't handle being told to restore the current
    742		 * context very well. Quite often it likes goes to go off and
    743		 * sulk, especially when it is meant to be reloading PP_DIR.
    744		 * A very simple fix to force the reload is to simply switch
    745		 * away from the current context and back again.
    746		 *
    747		 * Note that the kernel_context will contain random state
    748		 * following the INHIBIT_RESTORE. We accept this since we
    749		 * never use the kernel_context state; it is merely a
    750		 * placeholder we use to flush other contexts.
    751		 */
    752		*cs++ = MI_SET_CONTEXT;
    753		*cs++ = i915_ggtt_offset(engine->kernel_context->state) |
    754			MI_MM_SPACE_GTT |
    755			MI_RESTORE_INHIBIT;
    756	}
    757
    758	*cs++ = MI_NOOP;
    759	*cs++ = MI_SET_CONTEXT;
    760	*cs++ = i915_ggtt_offset(ce->state) | flags;
    761	/*
    762	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
    763	 * WaMiSetContext_Hang:snb,ivb,vlv
    764	 */
    765	*cs++ = MI_NOOP;
    766
    767	if (GRAPHICS_VER(i915) == 7) {
    768		if (num_engines) {
    769			struct intel_engine_cs *signaller;
    770			i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */
    771
    772			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
    773			for_each_engine(signaller, engine->gt, id) {
    774				if (signaller == engine)
    775					continue;
    776
    777				last_reg = RING_PSMI_CTL(signaller->mmio_base);
    778				*cs++ = i915_mmio_reg_offset(last_reg);
    779				*cs++ = _MASKED_BIT_DISABLE(
    780						GEN6_PSMI_SLEEP_MSG_DISABLE);
    781			}
    782
    783			/* Insert a delay before the next switch! */
    784			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
    785			*cs++ = i915_mmio_reg_offset(last_reg);
    786			*cs++ = intel_gt_scratch_offset(engine->gt,
    787							INTEL_GT_SCRATCH_FIELD_DEFAULT);
    788			*cs++ = MI_NOOP;
    789		}
    790		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
    791	} else if (GRAPHICS_VER(i915) == 5) {
    792		*cs++ = MI_SUSPEND_FLUSH;
    793	}
    794
    795	intel_ring_advance(rq, cs);
    796
    797	return 0;
    798}
    799
    800static int remap_l3_slice(struct i915_request *rq, int slice)
    801{
    802#define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32))
    803	u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice];
    804	int i;
    805
    806	if (!remap_info)
    807		return 0;
    808
    809	cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2);
    810	if (IS_ERR(cs))
    811		return PTR_ERR(cs);
    812
    813	/*
    814	 * Note: We do not worry about the concurrent register cacheline hang
    815	 * here because no other code should access these registers other than
    816	 * at initialization time.
    817	 */
    818	*cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW);
    819	for (i = 0; i < L3LOG_DW; i++) {
    820		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
    821		*cs++ = remap_info[i];
    822	}
    823	*cs++ = MI_NOOP;
    824	intel_ring_advance(rq, cs);
    825
    826	return 0;
    827#undef L3LOG_DW
    828}
    829
    830static int remap_l3(struct i915_request *rq)
    831{
    832	struct i915_gem_context *ctx = i915_request_gem_context(rq);
    833	int i, err;
    834
    835	if (!ctx || !ctx->remap_slice)
    836		return 0;
    837
    838	for (i = 0; i < MAX_L3_SLICES; i++) {
    839		if (!(ctx->remap_slice & BIT(i)))
    840			continue;
    841
    842		err = remap_l3_slice(rq, i);
    843		if (err)
    844			return err;
    845	}
    846
    847	ctx->remap_slice = 0;
    848	return 0;
    849}
    850
    851static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
    852{
    853	int ret;
    854
    855	if (!vm)
    856		return 0;
    857
    858	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
    859	if (ret)
    860		return ret;
    861
    862	/*
    863	 * Not only do we need a full barrier (post-sync write) after
    864	 * invalidating the TLBs, but we need to wait a little bit
    865	 * longer. Whether this is merely delaying us, or the
    866	 * subsequent flush is a key part of serialising with the
    867	 * post-sync op, this extra pass appears vital before a
    868	 * mm switch!
    869	 */
    870	ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
    871	if (ret)
    872		return ret;
    873
    874	return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
    875}
    876
    877static int clear_residuals(struct i915_request *rq)
    878{
    879	struct intel_engine_cs *engine = rq->engine;
    880	int ret;
    881
    882	ret = switch_mm(rq, vm_alias(engine->kernel_context->vm));
    883	if (ret)
    884		return ret;
    885
    886	if (engine->kernel_context->state) {
    887		ret = mi_set_context(rq,
    888				     engine->kernel_context,
    889				     MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
    890		if (ret)
    891			return ret;
    892	}
    893
    894	ret = engine->emit_bb_start(rq,
    895				    engine->wa_ctx.vma->node.start, 0,
    896				    0);
    897	if (ret)
    898		return ret;
    899
    900	ret = engine->emit_flush(rq, EMIT_FLUSH);
    901	if (ret)
    902		return ret;
    903
    904	/* Always invalidate before the next switch_mm() */
    905	return engine->emit_flush(rq, EMIT_INVALIDATE);
    906}
    907
    908static int switch_context(struct i915_request *rq)
    909{
    910	struct intel_engine_cs *engine = rq->engine;
    911	struct intel_context *ce = rq->context;
    912	void **residuals = NULL;
    913	int ret;
    914
    915	GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
    916
    917	if (engine->wa_ctx.vma && ce != engine->kernel_context) {
    918		if (engine->wa_ctx.vma->private != ce &&
    919		    i915_mitigate_clear_residuals()) {
    920			ret = clear_residuals(rq);
    921			if (ret)
    922				return ret;
    923
    924			residuals = &engine->wa_ctx.vma->private;
    925		}
    926	}
    927
    928	ret = switch_mm(rq, vm_alias(ce->vm));
    929	if (ret)
    930		return ret;
    931
    932	if (ce->state) {
    933		u32 flags;
    934
    935		GEM_BUG_ON(engine->id != RCS0);
    936
    937		/* For resource streamer on HSW+ and power context elsewhere */
    938		BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
    939		BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
    940
    941		flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
    942		if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
    943			flags |= MI_RESTORE_EXT_STATE_EN;
    944		else
    945			flags |= MI_RESTORE_INHIBIT;
    946
    947		ret = mi_set_context(rq, ce, flags);
    948		if (ret)
    949			return ret;
    950	}
    951
    952	ret = remap_l3(rq);
    953	if (ret)
    954		return ret;
    955
    956	/*
    957	 * Now past the point of no return, this request _will_ be emitted.
    958	 *
    959	 * Or at least this preamble will be emitted, the request may be
    960	 * interrupted prior to submitting the user payload. If so, we
    961	 * still submit the "empty" request in order to preserve global
    962	 * state tracking such as this, our tracking of the current
    963	 * dirty context.
    964	 */
    965	if (residuals) {
    966		intel_context_put(*residuals);
    967		*residuals = intel_context_get(ce);
    968	}
    969
    970	return 0;
    971}
    972
    973static int ring_request_alloc(struct i915_request *request)
    974{
    975	int ret;
    976
    977	GEM_BUG_ON(!intel_context_is_pinned(request->context));
    978	GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
    979
    980	/*
    981	 * Flush enough space to reduce the likelihood of waiting after
    982	 * we start building the request - in which case we will just
    983	 * have to repeat work.
    984	 */
    985	request->reserved_space += LEGACY_REQUEST_SIZE;
    986
    987	/* Unconditionally invalidate GPU caches and TLBs. */
    988	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
    989	if (ret)
    990		return ret;
    991
    992	ret = switch_context(request);
    993	if (ret)
    994		return ret;
    995
    996	request->reserved_space -= LEGACY_REQUEST_SIZE;
    997	return 0;
    998}
    999
   1000static void gen6_bsd_submit_request(struct i915_request *request)
   1001{
   1002	struct intel_uncore *uncore = request->engine->uncore;
   1003
   1004	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
   1005
   1006       /* Every tail move must follow the sequence below */
   1007
   1008	/* Disable notification that the ring is IDLE. The GT
   1009	 * will then assume that it is busy and bring it out of rc6.
   1010	 */
   1011	intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
   1012			      _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
   1013
   1014	/* Clear the context id. Here be magic! */
   1015	intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
   1016
   1017	/* Wait for the ring not to be idle, i.e. for it to wake up. */
   1018	if (__intel_wait_for_register_fw(uncore,
   1019					 RING_PSMI_CTL(GEN6_BSD_RING_BASE),
   1020					 GEN6_BSD_SLEEP_INDICATOR,
   1021					 0,
   1022					 1000, 0, NULL))
   1023		drm_err(&uncore->i915->drm,
   1024			"timed out waiting for the BSD ring to wake up\n");
   1025
   1026	/* Now that the ring is fully powered up, update the tail */
   1027	i9xx_submit_request(request);
   1028
   1029	/* Let the ring send IDLE messages to the GT again,
   1030	 * and so let it sleep to conserve power when idle.
   1031	 */
   1032	intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
   1033			      _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
   1034
   1035	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
   1036}
   1037
   1038static void i9xx_set_default_submission(struct intel_engine_cs *engine)
   1039{
   1040	engine->submit_request = i9xx_submit_request;
   1041}
   1042
   1043static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
   1044{
   1045	engine->submit_request = gen6_bsd_submit_request;
   1046}
   1047
   1048static void ring_release(struct intel_engine_cs *engine)
   1049{
   1050	struct drm_i915_private *dev_priv = engine->i915;
   1051
   1052	drm_WARN_ON(&dev_priv->drm, GRAPHICS_VER(dev_priv) > 2 &&
   1053		    (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
   1054
   1055	intel_engine_cleanup_common(engine);
   1056
   1057	if (engine->wa_ctx.vma) {
   1058		intel_context_put(engine->wa_ctx.vma->private);
   1059		i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
   1060	}
   1061
   1062	intel_ring_unpin(engine->legacy.ring);
   1063	intel_ring_put(engine->legacy.ring);
   1064
   1065	intel_timeline_unpin(engine->legacy.timeline);
   1066	intel_timeline_put(engine->legacy.timeline);
   1067}
   1068
   1069static void irq_handler(struct intel_engine_cs *engine, u16 iir)
   1070{
   1071	intel_engine_signal_breadcrumbs(engine);
   1072}
   1073
   1074static void setup_irq(struct intel_engine_cs *engine)
   1075{
   1076	struct drm_i915_private *i915 = engine->i915;
   1077
   1078	intel_engine_set_irq_handler(engine, irq_handler);
   1079
   1080	if (GRAPHICS_VER(i915) >= 6) {
   1081		engine->irq_enable = gen6_irq_enable;
   1082		engine->irq_disable = gen6_irq_disable;
   1083	} else if (GRAPHICS_VER(i915) >= 5) {
   1084		engine->irq_enable = gen5_irq_enable;
   1085		engine->irq_disable = gen5_irq_disable;
   1086	} else if (GRAPHICS_VER(i915) >= 3) {
   1087		engine->irq_enable = gen3_irq_enable;
   1088		engine->irq_disable = gen3_irq_disable;
   1089	} else {
   1090		engine->irq_enable = gen2_irq_enable;
   1091		engine->irq_disable = gen2_irq_disable;
   1092	}
   1093}
   1094
   1095static void add_to_engine(struct i915_request *rq)
   1096{
   1097	lockdep_assert_held(&rq->engine->sched_engine->lock);
   1098	list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests);
   1099}
   1100
   1101static void remove_from_engine(struct i915_request *rq)
   1102{
   1103	spin_lock_irq(&rq->engine->sched_engine->lock);
   1104	list_del_init(&rq->sched.link);
   1105
   1106	/* Prevent further __await_execution() registering a cb, then flush */
   1107	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
   1108
   1109	spin_unlock_irq(&rq->engine->sched_engine->lock);
   1110
   1111	i915_request_notify_execute_cb_imm(rq);
   1112}
   1113
   1114static void setup_common(struct intel_engine_cs *engine)
   1115{
   1116	struct drm_i915_private *i915 = engine->i915;
   1117
   1118	/* gen8+ are only supported with execlists */
   1119	GEM_BUG_ON(GRAPHICS_VER(i915) >= 8);
   1120
   1121	setup_irq(engine);
   1122
   1123	engine->resume = xcs_resume;
   1124	engine->sanitize = xcs_sanitize;
   1125
   1126	engine->reset.prepare = reset_prepare;
   1127	engine->reset.rewind = reset_rewind;
   1128	engine->reset.cancel = reset_cancel;
   1129	engine->reset.finish = reset_finish;
   1130
   1131	engine->add_active_request = add_to_engine;
   1132	engine->remove_active_request = remove_from_engine;
   1133
   1134	engine->cops = &ring_context_ops;
   1135	engine->request_alloc = ring_request_alloc;
   1136
   1137	/*
   1138	 * Using a global execution timeline; the previous final breadcrumb is
   1139	 * equivalent to our next initial bread so we can elide
   1140	 * engine->emit_init_breadcrumb().
   1141	 */
   1142	engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
   1143	if (GRAPHICS_VER(i915) == 5)
   1144		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
   1145
   1146	engine->set_default_submission = i9xx_set_default_submission;
   1147
   1148	if (GRAPHICS_VER(i915) >= 6)
   1149		engine->emit_bb_start = gen6_emit_bb_start;
   1150	else if (GRAPHICS_VER(i915) >= 4)
   1151		engine->emit_bb_start = gen4_emit_bb_start;
   1152	else if (IS_I830(i915) || IS_I845G(i915))
   1153		engine->emit_bb_start = i830_emit_bb_start;
   1154	else
   1155		engine->emit_bb_start = gen3_emit_bb_start;
   1156}
   1157
   1158static void setup_rcs(struct intel_engine_cs *engine)
   1159{
   1160	struct drm_i915_private *i915 = engine->i915;
   1161
   1162	if (HAS_L3_DPF(i915))
   1163		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
   1164
   1165	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
   1166
   1167	if (GRAPHICS_VER(i915) >= 7) {
   1168		engine->emit_flush = gen7_emit_flush_rcs;
   1169		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs;
   1170	} else if (GRAPHICS_VER(i915) == 6) {
   1171		engine->emit_flush = gen6_emit_flush_rcs;
   1172		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs;
   1173	} else if (GRAPHICS_VER(i915) == 5) {
   1174		engine->emit_flush = gen4_emit_flush_rcs;
   1175	} else {
   1176		if (GRAPHICS_VER(i915) < 4)
   1177			engine->emit_flush = gen2_emit_flush;
   1178		else
   1179			engine->emit_flush = gen4_emit_flush_rcs;
   1180		engine->irq_enable_mask = I915_USER_INTERRUPT;
   1181	}
   1182
   1183	if (IS_HASWELL(i915))
   1184		engine->emit_bb_start = hsw_emit_bb_start;
   1185}
   1186
   1187static void setup_vcs(struct intel_engine_cs *engine)
   1188{
   1189	struct drm_i915_private *i915 = engine->i915;
   1190
   1191	if (GRAPHICS_VER(i915) >= 6) {
   1192		/* gen6 bsd needs a special wa for tail updates */
   1193		if (GRAPHICS_VER(i915) == 6)
   1194			engine->set_default_submission = gen6_bsd_set_default_submission;
   1195		engine->emit_flush = gen6_emit_flush_vcs;
   1196		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
   1197
   1198		if (GRAPHICS_VER(i915) == 6)
   1199			engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
   1200		else
   1201			engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
   1202	} else {
   1203		engine->emit_flush = gen4_emit_flush_vcs;
   1204		if (GRAPHICS_VER(i915) == 5)
   1205			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
   1206		else
   1207			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
   1208	}
   1209}
   1210
   1211static void setup_bcs(struct intel_engine_cs *engine)
   1212{
   1213	struct drm_i915_private *i915 = engine->i915;
   1214
   1215	engine->emit_flush = gen6_emit_flush_xcs;
   1216	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
   1217
   1218	if (GRAPHICS_VER(i915) == 6)
   1219		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
   1220	else
   1221		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
   1222}
   1223
   1224static void setup_vecs(struct intel_engine_cs *engine)
   1225{
   1226	struct drm_i915_private *i915 = engine->i915;
   1227
   1228	GEM_BUG_ON(GRAPHICS_VER(i915) < 7);
   1229
   1230	engine->emit_flush = gen6_emit_flush_xcs;
   1231	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
   1232	engine->irq_enable = hsw_irq_enable_vecs;
   1233	engine->irq_disable = hsw_irq_disable_vecs;
   1234
   1235	engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
   1236}
   1237
   1238static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
   1239				    struct i915_vma * const vma)
   1240{
   1241	return gen7_setup_clear_gpr_bb(engine, vma);
   1242}
   1243
   1244static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine,
   1245				   struct i915_gem_ww_ctx *ww,
   1246				   struct i915_vma *vma)
   1247{
   1248	int err;
   1249
   1250	err = i915_vma_pin_ww(vma, ww, 0, 0, PIN_USER | PIN_HIGH);
   1251	if (err)
   1252		return err;
   1253
   1254	err = i915_vma_sync(vma);
   1255	if (err)
   1256		goto err_unpin;
   1257
   1258	err = gen7_ctx_switch_bb_setup(engine, vma);
   1259	if (err)
   1260		goto err_unpin;
   1261
   1262	engine->wa_ctx.vma = vma;
   1263	return 0;
   1264
   1265err_unpin:
   1266	i915_vma_unpin(vma);
   1267	return err;
   1268}
   1269
   1270static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine)
   1271{
   1272	struct drm_i915_gem_object *obj;
   1273	struct i915_vma *vma;
   1274	int size, err;
   1275
   1276	if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS)
   1277		return NULL;
   1278
   1279	err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
   1280	if (err < 0)
   1281		return ERR_PTR(err);
   1282	if (!err)
   1283		return NULL;
   1284
   1285	size = ALIGN(err, PAGE_SIZE);
   1286
   1287	obj = i915_gem_object_create_internal(engine->i915, size);
   1288	if (IS_ERR(obj))
   1289		return ERR_CAST(obj);
   1290
   1291	vma = i915_vma_instance(obj, engine->gt->vm, NULL);
   1292	if (IS_ERR(vma)) {
   1293		i915_gem_object_put(obj);
   1294		return ERR_CAST(vma);
   1295	}
   1296
   1297	vma->private = intel_context_create(engine); /* dummy residuals */
   1298	if (IS_ERR(vma->private)) {
   1299		err = PTR_ERR(vma->private);
   1300		vma->private = NULL;
   1301		i915_gem_object_put(obj);
   1302		return ERR_PTR(err);
   1303	}
   1304
   1305	return vma;
   1306}
   1307
   1308int intel_ring_submission_setup(struct intel_engine_cs *engine)
   1309{
   1310	struct i915_gem_ww_ctx ww;
   1311	struct intel_timeline *timeline;
   1312	struct intel_ring *ring;
   1313	struct i915_vma *gen7_wa_vma;
   1314	int err;
   1315
   1316	setup_common(engine);
   1317
   1318	switch (engine->class) {
   1319	case RENDER_CLASS:
   1320		setup_rcs(engine);
   1321		break;
   1322	case VIDEO_DECODE_CLASS:
   1323		setup_vcs(engine);
   1324		break;
   1325	case COPY_ENGINE_CLASS:
   1326		setup_bcs(engine);
   1327		break;
   1328	case VIDEO_ENHANCEMENT_CLASS:
   1329		setup_vecs(engine);
   1330		break;
   1331	default:
   1332		MISSING_CASE(engine->class);
   1333		return -ENODEV;
   1334	}
   1335
   1336	timeline = intel_timeline_create_from_engine(engine,
   1337						     I915_GEM_HWS_SEQNO_ADDR);
   1338	if (IS_ERR(timeline)) {
   1339		err = PTR_ERR(timeline);
   1340		goto err;
   1341	}
   1342	GEM_BUG_ON(timeline->has_initial_breadcrumb);
   1343
   1344	ring = intel_engine_create_ring(engine, SZ_16K);
   1345	if (IS_ERR(ring)) {
   1346		err = PTR_ERR(ring);
   1347		goto err_timeline;
   1348	}
   1349
   1350	GEM_BUG_ON(engine->legacy.ring);
   1351	engine->legacy.ring = ring;
   1352	engine->legacy.timeline = timeline;
   1353
   1354	gen7_wa_vma = gen7_ctx_vma(engine);
   1355	if (IS_ERR(gen7_wa_vma)) {
   1356		err = PTR_ERR(gen7_wa_vma);
   1357		goto err_ring;
   1358	}
   1359
   1360	i915_gem_ww_ctx_init(&ww, false);
   1361
   1362retry:
   1363	err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww);
   1364	if (!err && gen7_wa_vma)
   1365		err = i915_gem_object_lock(gen7_wa_vma->obj, &ww);
   1366	if (!err)
   1367		err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww);
   1368	if (!err)
   1369		err = intel_timeline_pin(timeline, &ww);
   1370	if (!err) {
   1371		err = intel_ring_pin(ring, &ww);
   1372		if (err)
   1373			intel_timeline_unpin(timeline);
   1374	}
   1375	if (err)
   1376		goto out;
   1377
   1378	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
   1379
   1380	if (gen7_wa_vma) {
   1381		err = gen7_ctx_switch_bb_init(engine, &ww, gen7_wa_vma);
   1382		if (err) {
   1383			intel_ring_unpin(ring);
   1384			intel_timeline_unpin(timeline);
   1385		}
   1386	}
   1387
   1388out:
   1389	if (err == -EDEADLK) {
   1390		err = i915_gem_ww_ctx_backoff(&ww);
   1391		if (!err)
   1392			goto retry;
   1393	}
   1394	i915_gem_ww_ctx_fini(&ww);
   1395	if (err)
   1396		goto err_gen7_put;
   1397
   1398	/* Finally, take ownership and responsibility for cleanup! */
   1399	engine->release = ring_release;
   1400
   1401	return 0;
   1402
   1403err_gen7_put:
   1404	if (gen7_wa_vma) {
   1405		intel_context_put(gen7_wa_vma->private);
   1406		i915_gem_object_put(gen7_wa_vma->obj);
   1407	}
   1408err_ring:
   1409	intel_ring_put(ring);
   1410err_timeline:
   1411	intel_timeline_put(timeline);
   1412err:
   1413	intel_engine_cleanup_common(engine);
   1414	return err;
   1415}
   1416
   1417#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
   1418#include "selftest_ring_submission.c"
   1419#endif