cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_gt.c (30580B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2019 Intel Corporation
      4 */
      5
      6#include <drm/drm_managed.h>
      7
      8#include "gem/i915_gem_internal.h"
      9#include "gem/i915_gem_lmem.h"
     10#include "pxp/intel_pxp.h"
     11
     12#include "i915_drv.h"
     13#include "intel_context.h"
     14#include "intel_engine_regs.h"
     15#include "intel_gt.h"
     16#include "intel_gt_buffer_pool.h"
     17#include "intel_gt_clock_utils.h"
     18#include "intel_gt_debugfs.h"
     19#include "intel_gt_gmch.h"
     20#include "intel_gt_pm.h"
     21#include "intel_gt_regs.h"
     22#include "intel_gt_requests.h"
     23#include "intel_migrate.h"
     24#include "intel_mocs.h"
     25#include "intel_pm.h"
     26#include "intel_rc6.h"
     27#include "intel_renderstate.h"
     28#include "intel_rps.h"
     29#include "intel_gt_sysfs.h"
     30#include "intel_uncore.h"
     31#include "shmem_utils.h"
     32
     33static void __intel_gt_init_early(struct intel_gt *gt)
     34{
     35	spin_lock_init(&gt->irq_lock);
     36
     37	mutex_init(&gt->tlb_invalidate_lock);
     38
     39	INIT_LIST_HEAD(&gt->closed_vma);
     40	spin_lock_init(&gt->closed_lock);
     41
     42	init_llist_head(&gt->watchdog.list);
     43	INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
     44
     45	intel_gt_init_buffer_pool(gt);
     46	intel_gt_init_reset(gt);
     47	intel_gt_init_requests(gt);
     48	intel_gt_init_timelines(gt);
     49	intel_gt_pm_init_early(gt);
     50
     51	intel_uc_init_early(&gt->uc);
     52	intel_rps_init_early(&gt->rps);
     53}
     54
     55/* Preliminary initialization of Tile 0 */
     56void intel_root_gt_init_early(struct drm_i915_private *i915)
     57{
     58	struct intel_gt *gt = to_gt(i915);
     59
     60	gt->i915 = i915;
     61	gt->uncore = &i915->uncore;
     62
     63	__intel_gt_init_early(gt);
     64}
     65
     66static int intel_gt_probe_lmem(struct intel_gt *gt)
     67{
     68	struct drm_i915_private *i915 = gt->i915;
     69	unsigned int instance = gt->info.id;
     70	int id = INTEL_REGION_LMEM_0 + instance;
     71	struct intel_memory_region *mem;
     72	int err;
     73
     74	mem = intel_gt_setup_lmem(gt);
     75	if (IS_ERR(mem)) {
     76		err = PTR_ERR(mem);
     77		if (err == -ENODEV)
     78			return 0;
     79
     80		drm_err(&i915->drm,
     81			"Failed to setup region(%d) type=%d\n",
     82			err, INTEL_MEMORY_LOCAL);
     83		return err;
     84	}
     85
     86	mem->id = id;
     87	mem->instance = instance;
     88
     89	intel_memory_region_set_name(mem, "local%u", mem->instance);
     90
     91	GEM_BUG_ON(!HAS_REGION(i915, id));
     92	GEM_BUG_ON(i915->mm.regions[id]);
     93	i915->mm.regions[id] = mem;
     94
     95	return 0;
     96}
     97
     98int intel_gt_assign_ggtt(struct intel_gt *gt)
     99{
    100	gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
    101
    102	return gt->ggtt ? 0 : -ENOMEM;
    103}
    104
    105static const char * const intel_steering_types[] = {
    106	"L3BANK",
    107	"MSLICE",
    108	"LNCF",
    109};
    110
    111static const struct intel_mmio_range icl_l3bank_steering_table[] = {
    112	{ 0x00B100, 0x00B3FF },
    113	{},
    114};
    115
    116static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
    117	{ 0x004000, 0x004AFF },
    118	{ 0x00C800, 0x00CFFF },
    119	{ 0x00DD00, 0x00DDFF },
    120	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
    121	{},
    122};
    123
    124static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
    125	{ 0x00B000, 0x00B0FF },
    126	{ 0x00D800, 0x00D8FF },
    127	{},
    128};
    129
    130static const struct intel_mmio_range dg2_lncf_steering_table[] = {
    131	{ 0x00B000, 0x00B0FF },
    132	{ 0x00D880, 0x00D8FF },
    133	{},
    134};
    135
    136static u16 slicemask(struct intel_gt *gt, int count)
    137{
    138	u64 dss_mask = intel_sseu_get_subslices(&gt->info.sseu, 0);
    139
    140	return intel_slicemask_from_dssmask(dss_mask, count);
    141}
    142
    143int intel_gt_init_mmio(struct intel_gt *gt)
    144{
    145	struct drm_i915_private *i915 = gt->i915;
    146
    147	intel_gt_init_clock_frequency(gt);
    148
    149	intel_uc_init_mmio(&gt->uc);
    150	intel_sseu_info_init(gt);
    151
    152	/*
    153	 * An mslice is unavailable only if both the meml3 for the slice is
    154	 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
    155	 */
    156	if (HAS_MSLICES(i915))
    157		gt->info.mslice_mask =
    158			slicemask(gt, GEN_DSS_PER_MSLICE) |
    159			(intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
    160			 GEN12_MEML3_EN_MASK);
    161
    162	if (IS_DG2(i915)) {
    163		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
    164		gt->steering_table[LNCF] = dg2_lncf_steering_table;
    165	} else if (IS_XEHPSDV(i915)) {
    166		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
    167		gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
    168	} else if (GRAPHICS_VER(i915) >= 11 &&
    169		   GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
    170		gt->steering_table[L3BANK] = icl_l3bank_steering_table;
    171		gt->info.l3bank_mask =
    172			~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
    173			GEN10_L3BANK_MASK;
    174	} else if (HAS_MSLICES(i915)) {
    175		MISSING_CASE(INTEL_INFO(i915)->platform);
    176	}
    177
    178	return intel_engines_init_mmio(gt);
    179}
    180
    181static void init_unused_ring(struct intel_gt *gt, u32 base)
    182{
    183	struct intel_uncore *uncore = gt->uncore;
    184
    185	intel_uncore_write(uncore, RING_CTL(base), 0);
    186	intel_uncore_write(uncore, RING_HEAD(base), 0);
    187	intel_uncore_write(uncore, RING_TAIL(base), 0);
    188	intel_uncore_write(uncore, RING_START(base), 0);
    189}
    190
    191static void init_unused_rings(struct intel_gt *gt)
    192{
    193	struct drm_i915_private *i915 = gt->i915;
    194
    195	if (IS_I830(i915)) {
    196		init_unused_ring(gt, PRB1_BASE);
    197		init_unused_ring(gt, SRB0_BASE);
    198		init_unused_ring(gt, SRB1_BASE);
    199		init_unused_ring(gt, SRB2_BASE);
    200		init_unused_ring(gt, SRB3_BASE);
    201	} else if (GRAPHICS_VER(i915) == 2) {
    202		init_unused_ring(gt, SRB0_BASE);
    203		init_unused_ring(gt, SRB1_BASE);
    204	} else if (GRAPHICS_VER(i915) == 3) {
    205		init_unused_ring(gt, PRB1_BASE);
    206		init_unused_ring(gt, PRB2_BASE);
    207	}
    208}
    209
    210int intel_gt_init_hw(struct intel_gt *gt)
    211{
    212	struct drm_i915_private *i915 = gt->i915;
    213	struct intel_uncore *uncore = gt->uncore;
    214	int ret;
    215
    216	gt->last_init_time = ktime_get();
    217
    218	/* Double layer security blanket, see i915_gem_init() */
    219	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
    220
    221	if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9)
    222		intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
    223
    224	if (IS_HASWELL(i915))
    225		intel_uncore_write(uncore,
    226				   HSW_MI_PREDICATE_RESULT_2,
    227				   IS_HSW_GT3(i915) ?
    228				   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
    229
    230	/* Apply the GT workarounds... */
    231	intel_gt_apply_workarounds(gt);
    232	/* ...and determine whether they are sticking. */
    233	intel_gt_verify_workarounds(gt, "init");
    234
    235	intel_gt_init_swizzling(gt);
    236
    237	/*
    238	 * At least 830 can leave some of the unused rings
    239	 * "active" (ie. head != tail) after resume which
    240	 * will prevent c3 entry. Makes sure all unused rings
    241	 * are totally idle.
    242	 */
    243	init_unused_rings(gt);
    244
    245	ret = i915_ppgtt_init_hw(gt);
    246	if (ret) {
    247		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
    248		goto out;
    249	}
    250
    251	/* We can't enable contexts until all firmware is loaded */
    252	ret = intel_uc_init_hw(&gt->uc);
    253	if (ret) {
    254		i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
    255		goto out;
    256	}
    257
    258	intel_mocs_init(gt);
    259
    260out:
    261	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
    262	return ret;
    263}
    264
    265static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
    266{
    267	intel_uncore_rmw(uncore, reg, 0, set);
    268}
    269
    270static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
    271{
    272	intel_uncore_rmw(uncore, reg, clr, 0);
    273}
    274
    275static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
    276{
    277	intel_uncore_rmw(uncore, reg, 0, 0);
    278}
    279
    280static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
    281{
    282	GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
    283	GEN6_RING_FAULT_REG_POSTING_READ(engine);
    284}
    285
    286void
    287intel_gt_clear_error_registers(struct intel_gt *gt,
    288			       intel_engine_mask_t engine_mask)
    289{
    290	struct drm_i915_private *i915 = gt->i915;
    291	struct intel_uncore *uncore = gt->uncore;
    292	u32 eir;
    293
    294	if (GRAPHICS_VER(i915) != 2)
    295		clear_register(uncore, PGTBL_ER);
    296
    297	if (GRAPHICS_VER(i915) < 4)
    298		clear_register(uncore, IPEIR(RENDER_RING_BASE));
    299	else
    300		clear_register(uncore, IPEIR_I965);
    301
    302	clear_register(uncore, EIR);
    303	eir = intel_uncore_read(uncore, EIR);
    304	if (eir) {
    305		/*
    306		 * some errors might have become stuck,
    307		 * mask them.
    308		 */
    309		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
    310		rmw_set(uncore, EMR, eir);
    311		intel_uncore_write(uncore, GEN2_IIR,
    312				   I915_MASTER_ERROR_INTERRUPT);
    313	}
    314
    315	if (GRAPHICS_VER(i915) >= 12) {
    316		rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
    317		intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
    318	} else if (GRAPHICS_VER(i915) >= 8) {
    319		rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
    320		intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
    321	} else if (GRAPHICS_VER(i915) >= 6) {
    322		struct intel_engine_cs *engine;
    323		enum intel_engine_id id;
    324
    325		for_each_engine_masked(engine, gt, engine_mask, id)
    326			gen6_clear_engine_error_register(engine);
    327	}
    328}
    329
    330static void gen6_check_faults(struct intel_gt *gt)
    331{
    332	struct intel_engine_cs *engine;
    333	enum intel_engine_id id;
    334	u32 fault;
    335
    336	for_each_engine(engine, gt, id) {
    337		fault = GEN6_RING_FAULT_REG_READ(engine);
    338		if (fault & RING_FAULT_VALID) {
    339			drm_dbg(&engine->i915->drm, "Unexpected fault\n"
    340				"\tAddr: 0x%08lx\n"
    341				"\tAddress space: %s\n"
    342				"\tSource ID: %d\n"
    343				"\tType: %d\n",
    344				fault & PAGE_MASK,
    345				fault & RING_FAULT_GTTSEL_MASK ?
    346				"GGTT" : "PPGTT",
    347				RING_FAULT_SRCID(fault),
    348				RING_FAULT_FAULT_TYPE(fault));
    349		}
    350	}
    351}
    352
    353static void gen8_check_faults(struct intel_gt *gt)
    354{
    355	struct intel_uncore *uncore = gt->uncore;
    356	i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
    357	u32 fault;
    358
    359	if (GRAPHICS_VER(gt->i915) >= 12) {
    360		fault_reg = GEN12_RING_FAULT_REG;
    361		fault_data0_reg = GEN12_FAULT_TLB_DATA0;
    362		fault_data1_reg = GEN12_FAULT_TLB_DATA1;
    363	} else {
    364		fault_reg = GEN8_RING_FAULT_REG;
    365		fault_data0_reg = GEN8_FAULT_TLB_DATA0;
    366		fault_data1_reg = GEN8_FAULT_TLB_DATA1;
    367	}
    368
    369	fault = intel_uncore_read(uncore, fault_reg);
    370	if (fault & RING_FAULT_VALID) {
    371		u32 fault_data0, fault_data1;
    372		u64 fault_addr;
    373
    374		fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
    375		fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
    376
    377		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
    378			     ((u64)fault_data0 << 12);
    379
    380		drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
    381			"\tAddr: 0x%08x_%08x\n"
    382			"\tAddress space: %s\n"
    383			"\tEngine ID: %d\n"
    384			"\tSource ID: %d\n"
    385			"\tType: %d\n",
    386			upper_32_bits(fault_addr), lower_32_bits(fault_addr),
    387			fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
    388			GEN8_RING_FAULT_ENGINE_ID(fault),
    389			RING_FAULT_SRCID(fault),
    390			RING_FAULT_FAULT_TYPE(fault));
    391	}
    392}
    393
    394void intel_gt_check_and_clear_faults(struct intel_gt *gt)
    395{
    396	struct drm_i915_private *i915 = gt->i915;
    397
    398	/* From GEN8 onwards we only have one 'All Engine Fault Register' */
    399	if (GRAPHICS_VER(i915) >= 8)
    400		gen8_check_faults(gt);
    401	else if (GRAPHICS_VER(i915) >= 6)
    402		gen6_check_faults(gt);
    403	else
    404		return;
    405
    406	intel_gt_clear_error_registers(gt, ALL_ENGINES);
    407}
    408
    409void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
    410{
    411	struct intel_uncore *uncore = gt->uncore;
    412	intel_wakeref_t wakeref;
    413
    414	/*
    415	 * No actual flushing is required for the GTT write domain for reads
    416	 * from the GTT domain. Writes to it "immediately" go to main memory
    417	 * as far as we know, so there's no chipset flush. It also doesn't
    418	 * land in the GPU render cache.
    419	 *
    420	 * However, we do have to enforce the order so that all writes through
    421	 * the GTT land before any writes to the device, such as updates to
    422	 * the GATT itself.
    423	 *
    424	 * We also have to wait a bit for the writes to land from the GTT.
    425	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
    426	 * timing. This issue has only been observed when switching quickly
    427	 * between GTT writes and CPU reads from inside the kernel on recent hw,
    428	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
    429	 * system agents we cannot reproduce this behaviour, until Cannonlake
    430	 * that was!).
    431	 */
    432
    433	wmb();
    434
    435	if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
    436		return;
    437
    438	intel_gt_chipset_flush(gt);
    439
    440	with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
    441		unsigned long flags;
    442
    443		spin_lock_irqsave(&uncore->lock, flags);
    444		intel_uncore_posting_read_fw(uncore,
    445					     RING_HEAD(RENDER_RING_BASE));
    446		spin_unlock_irqrestore(&uncore->lock, flags);
    447	}
    448}
    449
    450void intel_gt_chipset_flush(struct intel_gt *gt)
    451{
    452	wmb();
    453	if (GRAPHICS_VER(gt->i915) < 6)
    454		intel_gt_gmch_gen5_chipset_flush(gt);
    455}
    456
    457void intel_gt_driver_register(struct intel_gt *gt)
    458{
    459	intel_gsc_init(&gt->gsc, gt->i915);
    460
    461	intel_rps_driver_register(&gt->rps);
    462
    463	intel_gt_debugfs_register(gt);
    464	intel_gt_sysfs_register(gt);
    465}
    466
    467static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
    468{
    469	struct drm_i915_private *i915 = gt->i915;
    470	struct drm_i915_gem_object *obj;
    471	struct i915_vma *vma;
    472	int ret;
    473
    474	obj = i915_gem_object_create_lmem(i915, size,
    475					  I915_BO_ALLOC_VOLATILE |
    476					  I915_BO_ALLOC_GPU_ONLY);
    477	if (IS_ERR(obj))
    478		obj = i915_gem_object_create_stolen(i915, size);
    479	if (IS_ERR(obj))
    480		obj = i915_gem_object_create_internal(i915, size);
    481	if (IS_ERR(obj)) {
    482		drm_err(&i915->drm, "Failed to allocate scratch page\n");
    483		return PTR_ERR(obj);
    484	}
    485
    486	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
    487	if (IS_ERR(vma)) {
    488		ret = PTR_ERR(vma);
    489		goto err_unref;
    490	}
    491
    492	ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
    493	if (ret)
    494		goto err_unref;
    495
    496	gt->scratch = i915_vma_make_unshrinkable(vma);
    497
    498	return 0;
    499
    500err_unref:
    501	i915_gem_object_put(obj);
    502	return ret;
    503}
    504
    505static void intel_gt_fini_scratch(struct intel_gt *gt)
    506{
    507	i915_vma_unpin_and_release(&gt->scratch, 0);
    508}
    509
    510static struct i915_address_space *kernel_vm(struct intel_gt *gt)
    511{
    512	if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
    513		return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
    514	else
    515		return i915_vm_get(&gt->ggtt->vm);
    516}
    517
    518static int __engines_record_defaults(struct intel_gt *gt)
    519{
    520	struct i915_request *requests[I915_NUM_ENGINES] = {};
    521	struct intel_engine_cs *engine;
    522	enum intel_engine_id id;
    523	int err = 0;
    524
    525	/*
    526	 * As we reset the gpu during very early sanitisation, the current
    527	 * register state on the GPU should reflect its defaults values.
    528	 * We load a context onto the hw (with restore-inhibit), then switch
    529	 * over to a second context to save that default register state. We
    530	 * can then prime every new context with that state so they all start
    531	 * from the same default HW values.
    532	 */
    533
    534	for_each_engine(engine, gt, id) {
    535		struct intel_renderstate so;
    536		struct intel_context *ce;
    537		struct i915_request *rq;
    538
    539		/* We must be able to switch to something! */
    540		GEM_BUG_ON(!engine->kernel_context);
    541
    542		ce = intel_context_create(engine);
    543		if (IS_ERR(ce)) {
    544			err = PTR_ERR(ce);
    545			goto out;
    546		}
    547
    548		err = intel_renderstate_init(&so, ce);
    549		if (err)
    550			goto err;
    551
    552		rq = i915_request_create(ce);
    553		if (IS_ERR(rq)) {
    554			err = PTR_ERR(rq);
    555			goto err_fini;
    556		}
    557
    558		err = intel_engine_emit_ctx_wa(rq);
    559		if (err)
    560			goto err_rq;
    561
    562		err = intel_renderstate_emit(&so, rq);
    563		if (err)
    564			goto err_rq;
    565
    566err_rq:
    567		requests[id] = i915_request_get(rq);
    568		i915_request_add(rq);
    569err_fini:
    570		intel_renderstate_fini(&so, ce);
    571err:
    572		if (err) {
    573			intel_context_put(ce);
    574			goto out;
    575		}
    576	}
    577
    578	/* Flush the default context image to memory, and enable powersaving. */
    579	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
    580		err = -EIO;
    581		goto out;
    582	}
    583
    584	for (id = 0; id < ARRAY_SIZE(requests); id++) {
    585		struct i915_request *rq;
    586		struct file *state;
    587
    588		rq = requests[id];
    589		if (!rq)
    590			continue;
    591
    592		if (rq->fence.error) {
    593			err = -EIO;
    594			goto out;
    595		}
    596
    597		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
    598		if (!rq->context->state)
    599			continue;
    600
    601		/* Keep a copy of the state's backing pages; free the obj */
    602		state = shmem_create_from_object(rq->context->state->obj);
    603		if (IS_ERR(state)) {
    604			err = PTR_ERR(state);
    605			goto out;
    606		}
    607		rq->engine->default_state = state;
    608	}
    609
    610out:
    611	/*
    612	 * If we have to abandon now, we expect the engines to be idle
    613	 * and ready to be torn-down. The quickest way we can accomplish
    614	 * this is by declaring ourselves wedged.
    615	 */
    616	if (err)
    617		intel_gt_set_wedged(gt);
    618
    619	for (id = 0; id < ARRAY_SIZE(requests); id++) {
    620		struct intel_context *ce;
    621		struct i915_request *rq;
    622
    623		rq = requests[id];
    624		if (!rq)
    625			continue;
    626
    627		ce = rq->context;
    628		i915_request_put(rq);
    629		intel_context_put(ce);
    630	}
    631	return err;
    632}
    633
    634static int __engines_verify_workarounds(struct intel_gt *gt)
    635{
    636	struct intel_engine_cs *engine;
    637	enum intel_engine_id id;
    638	int err = 0;
    639
    640	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
    641		return 0;
    642
    643	for_each_engine(engine, gt, id) {
    644		if (intel_engine_verify_workarounds(engine, "load"))
    645			err = -EIO;
    646	}
    647
    648	/* Flush and restore the kernel context for safety */
    649	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
    650		err = -EIO;
    651
    652	return err;
    653}
    654
    655static void __intel_gt_disable(struct intel_gt *gt)
    656{
    657	intel_gt_set_wedged_on_fini(gt);
    658
    659	intel_gt_suspend_prepare(gt);
    660	intel_gt_suspend_late(gt);
    661
    662	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
    663}
    664
    665int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
    666{
    667	long remaining_timeout;
    668
    669	/* If the device is asleep, we have no requests outstanding */
    670	if (!intel_gt_pm_is_awake(gt))
    671		return 0;
    672
    673	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
    674							   &remaining_timeout)) > 0) {
    675		cond_resched();
    676		if (signal_pending(current))
    677			return -EINTR;
    678	}
    679
    680	return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
    681							  remaining_timeout);
    682}
    683
    684int intel_gt_init(struct intel_gt *gt)
    685{
    686	int err;
    687
    688	err = i915_inject_probe_error(gt->i915, -ENODEV);
    689	if (err)
    690		return err;
    691
    692	intel_gt_init_workarounds(gt);
    693
    694	/*
    695	 * This is just a security blanket to placate dragons.
    696	 * On some systems, we very sporadically observe that the first TLBs
    697	 * used by the CS may be stale, despite us poking the TLB reset. If
    698	 * we hold the forcewake during initialisation these problems
    699	 * just magically go away.
    700	 */
    701	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
    702
    703	err = intel_gt_init_scratch(gt,
    704				    GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K);
    705	if (err)
    706		goto out_fw;
    707
    708	intel_gt_pm_init(gt);
    709
    710	gt->vm = kernel_vm(gt);
    711	if (!gt->vm) {
    712		err = -ENOMEM;
    713		goto err_pm;
    714	}
    715
    716	intel_set_mocs_index(gt);
    717
    718	err = intel_engines_init(gt);
    719	if (err)
    720		goto err_engines;
    721
    722	err = intel_uc_init(&gt->uc);
    723	if (err)
    724		goto err_engines;
    725
    726	err = intel_gt_resume(gt);
    727	if (err)
    728		goto err_uc_init;
    729
    730	err = intel_gt_init_hwconfig(gt);
    731	if (err)
    732		drm_err(&gt->i915->drm, "Failed to retrieve hwconfig table: %pe\n",
    733			ERR_PTR(err));
    734
    735	err = __engines_record_defaults(gt);
    736	if (err)
    737		goto err_gt;
    738
    739	err = __engines_verify_workarounds(gt);
    740	if (err)
    741		goto err_gt;
    742
    743	intel_uc_init_late(&gt->uc);
    744
    745	err = i915_inject_probe_error(gt->i915, -EIO);
    746	if (err)
    747		goto err_gt;
    748
    749	intel_migrate_init(&gt->migrate, gt);
    750
    751	intel_pxp_init(&gt->pxp);
    752
    753	goto out_fw;
    754err_gt:
    755	__intel_gt_disable(gt);
    756	intel_uc_fini_hw(&gt->uc);
    757err_uc_init:
    758	intel_uc_fini(&gt->uc);
    759err_engines:
    760	intel_engines_release(gt);
    761	i915_vm_put(fetch_and_zero(&gt->vm));
    762err_pm:
    763	intel_gt_pm_fini(gt);
    764	intel_gt_fini_scratch(gt);
    765out_fw:
    766	if (err)
    767		intel_gt_set_wedged_on_init(gt);
    768	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
    769	return err;
    770}
    771
    772void intel_gt_driver_remove(struct intel_gt *gt)
    773{
    774	__intel_gt_disable(gt);
    775
    776	intel_migrate_fini(&gt->migrate);
    777	intel_uc_driver_remove(&gt->uc);
    778
    779	intel_engines_release(gt);
    780
    781	intel_gt_flush_buffer_pool(gt);
    782}
    783
    784void intel_gt_driver_unregister(struct intel_gt *gt)
    785{
    786	intel_wakeref_t wakeref;
    787
    788	intel_gt_sysfs_unregister(gt);
    789	intel_rps_driver_unregister(&gt->rps);
    790	intel_gsc_fini(&gt->gsc);
    791
    792	intel_pxp_fini(&gt->pxp);
    793
    794	/*
    795	 * Upon unregistering the device to prevent any new users, cancel
    796	 * all in-flight requests so that we can quickly unbind the active
    797	 * resources.
    798	 */
    799	intel_gt_set_wedged_on_fini(gt);
    800
    801	/* Scrub all HW state upon release */
    802	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
    803		__intel_gt_reset(gt, ALL_ENGINES);
    804}
    805
    806void intel_gt_driver_release(struct intel_gt *gt)
    807{
    808	struct i915_address_space *vm;
    809
    810	vm = fetch_and_zero(&gt->vm);
    811	if (vm) /* FIXME being called twice on error paths :( */
    812		i915_vm_put(vm);
    813
    814	intel_wa_list_free(&gt->wa_list);
    815	intel_gt_pm_fini(gt);
    816	intel_gt_fini_scratch(gt);
    817	intel_gt_fini_buffer_pool(gt);
    818	intel_gt_fini_hwconfig(gt);
    819}
    820
    821void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
    822{
    823	struct intel_gt *gt;
    824	unsigned int id;
    825
    826	/* We need to wait for inflight RCU frees to release their grip */
    827	rcu_barrier();
    828
    829	for_each_gt(gt, i915, id) {
    830		intel_uc_driver_late_release(&gt->uc);
    831		intel_gt_fini_requests(gt);
    832		intel_gt_fini_reset(gt);
    833		intel_gt_fini_timelines(gt);
    834		intel_engines_free(gt);
    835	}
    836}
    837
    838/**
    839 * intel_gt_reg_needs_read_steering - determine whether a register read
    840 *     requires explicit steering
    841 * @gt: GT structure
    842 * @reg: the register to check steering requirements for
    843 * @type: type of multicast steering to check
    844 *
    845 * Determines whether @reg needs explicit steering of a specific type for
    846 * reads.
    847 *
    848 * Returns false if @reg does not belong to a register range of the given
    849 * steering type, or if the default (subslice-based) steering IDs are suitable
    850 * for @type steering too.
    851 */
    852static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt,
    853					     i915_reg_t reg,
    854					     enum intel_steering_type type)
    855{
    856	const u32 offset = i915_mmio_reg_offset(reg);
    857	const struct intel_mmio_range *entry;
    858
    859	if (likely(!intel_gt_needs_read_steering(gt, type)))
    860		return false;
    861
    862	for (entry = gt->steering_table[type]; entry->end; entry++) {
    863		if (offset >= entry->start && offset <= entry->end)
    864			return true;
    865	}
    866
    867	return false;
    868}
    869
    870/**
    871 * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering
    872 * @gt: GT structure
    873 * @type: multicast register type
    874 * @sliceid: Slice ID returned
    875 * @subsliceid: Subslice ID returned
    876 *
    877 * Determines sliceid and subsliceid values that will steer reads
    878 * of a specific multicast register class to a valid value.
    879 */
    880static void intel_gt_get_valid_steering(struct intel_gt *gt,
    881					enum intel_steering_type type,
    882					u8 *sliceid, u8 *subsliceid)
    883{
    884	switch (type) {
    885	case L3BANK:
    886		GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */
    887
    888		*sliceid = 0;		/* unused */
    889		*subsliceid = __ffs(gt->info.l3bank_mask);
    890		break;
    891	case MSLICE:
    892		GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
    893
    894		*sliceid = __ffs(gt->info.mslice_mask);
    895		*subsliceid = 0;	/* unused */
    896		break;
    897	case LNCF:
    898		GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
    899
    900		/*
    901		 * An LNCF is always present if its mslice is present, so we
    902		 * can safely just steer to LNCF 0 in all cases.
    903		 */
    904		*sliceid = __ffs(gt->info.mslice_mask) << 1;
    905		*subsliceid = 0;	/* unused */
    906		break;
    907	default:
    908		MISSING_CASE(type);
    909		*sliceid = 0;
    910		*subsliceid = 0;
    911	}
    912}
    913
    914/**
    915 * intel_gt_read_register_fw - reads a GT register with support for multicast
    916 * @gt: GT structure
    917 * @reg: register to read
    918 *
    919 * This function will read a GT register.  If the register is a multicast
    920 * register, the read will be steered to a valid instance (i.e., one that
    921 * isn't fused off or powered down by power gating).
    922 *
    923 * Returns the value from a valid instance of @reg.
    924 */
    925u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
    926{
    927	int type;
    928	u8 sliceid, subsliceid;
    929
    930	for (type = 0; type < NUM_STEERING_TYPES; type++) {
    931		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
    932			intel_gt_get_valid_steering(gt, type, &sliceid,
    933						    &subsliceid);
    934			return intel_uncore_read_with_mcr_steering_fw(gt->uncore,
    935								      reg,
    936								      sliceid,
    937								      subsliceid);
    938		}
    939	}
    940
    941	return intel_uncore_read_fw(gt->uncore, reg);
    942}
    943
    944/**
    945 * intel_gt_get_valid_steering_for_reg - get a valid steering for a register
    946 * @gt: GT structure
    947 * @reg: register for which the steering is required
    948 * @sliceid: return variable for slice steering
    949 * @subsliceid: return variable for subslice steering
    950 *
    951 * This function returns a slice/subslice pair that is guaranteed to work for
    952 * read steering of the given register. Note that a value will be returned even
    953 * if the register is not replicated and therefore does not actually require
    954 * steering.
    955 */
    956void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
    957					 u8 *sliceid, u8 *subsliceid)
    958{
    959	int type;
    960
    961	for (type = 0; type < NUM_STEERING_TYPES; type++) {
    962		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
    963			intel_gt_get_valid_steering(gt, type, sliceid,
    964						    subsliceid);
    965			return;
    966		}
    967	}
    968
    969	*sliceid = gt->default_steering.groupid;
    970	*subsliceid = gt->default_steering.instanceid;
    971}
    972
    973u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
    974{
    975	int type;
    976	u8 sliceid, subsliceid;
    977
    978	for (type = 0; type < NUM_STEERING_TYPES; type++) {
    979		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
    980			intel_gt_get_valid_steering(gt, type, &sliceid,
    981						    &subsliceid);
    982			return intel_uncore_read_with_mcr_steering(gt->uncore,
    983								   reg,
    984								   sliceid,
    985								   subsliceid);
    986		}
    987	}
    988
    989	return intel_uncore_read(gt->uncore, reg);
    990}
    991
    992static void report_steering_type(struct drm_printer *p,
    993				 struct intel_gt *gt,
    994				 enum intel_steering_type type,
    995				 bool dump_table)
    996{
    997	const struct intel_mmio_range *entry;
    998	u8 slice, subslice;
    999
   1000	BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES);
   1001
   1002	if (!gt->steering_table[type]) {
   1003		drm_printf(p, "%s steering: uses default steering\n",
   1004			   intel_steering_types[type]);
   1005		return;
   1006	}
   1007
   1008	intel_gt_get_valid_steering(gt, type, &slice, &subslice);
   1009	drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n",
   1010		   intel_steering_types[type], slice, subslice);
   1011
   1012	if (!dump_table)
   1013		return;
   1014
   1015	for (entry = gt->steering_table[type]; entry->end; entry++)
   1016		drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end);
   1017}
   1018
   1019void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
   1020			      bool dump_table)
   1021{
   1022	drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n",
   1023		   gt->default_steering.groupid,
   1024		   gt->default_steering.instanceid);
   1025
   1026	if (HAS_MSLICES(gt->i915)) {
   1027		report_steering_type(p, gt, MSLICE, dump_table);
   1028		report_steering_type(p, gt, LNCF, dump_table);
   1029	}
   1030}
   1031
   1032static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
   1033{
   1034	int ret;
   1035
   1036	if (!gt_is_root(gt)) {
   1037		struct intel_uncore_mmio_debug *mmio_debug;
   1038		struct intel_uncore *uncore;
   1039
   1040		uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
   1041		if (!uncore)
   1042			return -ENOMEM;
   1043
   1044		mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL);
   1045		if (!mmio_debug) {
   1046			kfree(uncore);
   1047			return -ENOMEM;
   1048		}
   1049
   1050		gt->uncore = uncore;
   1051		gt->uncore->debug = mmio_debug;
   1052
   1053		__intel_gt_init_early(gt);
   1054	}
   1055
   1056	intel_uncore_init_early(gt->uncore, gt);
   1057
   1058	ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
   1059	if (ret)
   1060		return ret;
   1061
   1062	gt->phys_addr = phys_addr;
   1063
   1064	return 0;
   1065}
   1066
   1067static void
   1068intel_gt_tile_cleanup(struct intel_gt *gt)
   1069{
   1070	intel_uncore_cleanup_mmio(gt->uncore);
   1071
   1072	if (!gt_is_root(gt)) {
   1073		kfree(gt->uncore->debug);
   1074		kfree(gt->uncore);
   1075		kfree(gt);
   1076	}
   1077}
   1078
   1079int intel_gt_probe_all(struct drm_i915_private *i915)
   1080{
   1081	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
   1082	struct intel_gt *gt = &i915->gt0;
   1083	phys_addr_t phys_addr;
   1084	unsigned int mmio_bar;
   1085	int ret;
   1086
   1087	mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0;
   1088	phys_addr = pci_resource_start(pdev, mmio_bar);
   1089
   1090	/*
   1091	 * We always have at least one primary GT on any device
   1092	 * and it has been already initialized early during probe
   1093	 * in i915_driver_probe()
   1094	 */
   1095	ret = intel_gt_tile_setup(gt, phys_addr);
   1096	if (ret)
   1097		return ret;
   1098
   1099	i915->gt[0] = gt;
   1100
   1101	/* TODO: add more tiles */
   1102	return 0;
   1103}
   1104
   1105int intel_gt_tiles_init(struct drm_i915_private *i915)
   1106{
   1107	struct intel_gt *gt;
   1108	unsigned int id;
   1109	int ret;
   1110
   1111	for_each_gt(gt, i915, id) {
   1112		ret = intel_gt_probe_lmem(gt);
   1113		if (ret)
   1114			return ret;
   1115	}
   1116
   1117	return 0;
   1118}
   1119
   1120void intel_gt_release_all(struct drm_i915_private *i915)
   1121{
   1122	struct intel_gt *gt;
   1123	unsigned int id;
   1124
   1125	for_each_gt(gt, i915, id) {
   1126		intel_gt_tile_cleanup(gt);
   1127		i915->gt[id] = NULL;
   1128	}
   1129}
   1130
   1131void intel_gt_info_print(const struct intel_gt_info *info,
   1132			 struct drm_printer *p)
   1133{
   1134	drm_printf(p, "available engines: %x\n", info->engine_mask);
   1135
   1136	intel_sseu_dump(&info->sseu, p);
   1137}
   1138
   1139struct reg_and_bit {
   1140	i915_reg_t reg;
   1141	u32 bit;
   1142};
   1143
   1144static struct reg_and_bit
   1145get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
   1146		const i915_reg_t *regs, const unsigned int num)
   1147{
   1148	const unsigned int class = engine->class;
   1149	struct reg_and_bit rb = { };
   1150
   1151	if (drm_WARN_ON_ONCE(&engine->i915->drm,
   1152			     class >= num || !regs[class].reg))
   1153		return rb;
   1154
   1155	rb.reg = regs[class];
   1156	if (gen8 && class == VIDEO_DECODE_CLASS)
   1157		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
   1158	else
   1159		rb.bit = engine->instance;
   1160
   1161	rb.bit = BIT(rb.bit);
   1162
   1163	return rb;
   1164}
   1165
   1166void intel_gt_invalidate_tlbs(struct intel_gt *gt)
   1167{
   1168	static const i915_reg_t gen8_regs[] = {
   1169		[RENDER_CLASS]			= GEN8_RTCR,
   1170		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
   1171		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
   1172		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
   1173	};
   1174	static const i915_reg_t gen12_regs[] = {
   1175		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
   1176		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
   1177		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
   1178		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
   1179		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
   1180	};
   1181	struct drm_i915_private *i915 = gt->i915;
   1182	struct intel_uncore *uncore = gt->uncore;
   1183	struct intel_engine_cs *engine;
   1184	enum intel_engine_id id;
   1185	const i915_reg_t *regs;
   1186	unsigned int num = 0;
   1187
   1188	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
   1189		return;
   1190
   1191	if (GRAPHICS_VER(i915) == 12) {
   1192		regs = gen12_regs;
   1193		num = ARRAY_SIZE(gen12_regs);
   1194	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
   1195		regs = gen8_regs;
   1196		num = ARRAY_SIZE(gen8_regs);
   1197	} else if (GRAPHICS_VER(i915) < 8) {
   1198		return;
   1199	}
   1200
   1201	if (drm_WARN_ONCE(&i915->drm, !num,
   1202			  "Platform does not implement TLB invalidation!"))
   1203		return;
   1204
   1205	GEM_TRACE("\n");
   1206
   1207	assert_rpm_wakelock_held(&i915->runtime_pm);
   1208
   1209	mutex_lock(&gt->tlb_invalidate_lock);
   1210	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
   1211
   1212	for_each_engine(engine, gt, id) {
   1213		/*
   1214		 * HW architecture suggest typical invalidation time at 40us,
   1215		 * with pessimistic cases up to 100us and a recommendation to
   1216		 * cap at 1ms. We go a bit higher just in case.
   1217		 */
   1218		const unsigned int timeout_us = 100;
   1219		const unsigned int timeout_ms = 4;
   1220		struct reg_and_bit rb;
   1221
   1222		rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
   1223		if (!i915_mmio_reg_offset(rb.reg))
   1224			continue;
   1225
   1226		intel_uncore_write_fw(uncore, rb.reg, rb.bit);
   1227		if (__intel_wait_for_register_fw(uncore,
   1228						 rb.reg, rb.bit, 0,
   1229						 timeout_us, timeout_ms,
   1230						 NULL))
   1231			drm_err_ratelimited(&gt->i915->drm,
   1232					    "%s TLB invalidation did not complete in %ums!\n",
   1233					    engine->name, timeout_ms);
   1234	}
   1235
   1236	/*
   1237	 * Use delayed put since a) we mostly expect a flurry of TLB
   1238	 * invalidations so it is good to avoid paying the forcewake cost and
   1239	 * b) it works around a bug in Icelake which cannot cope with too rapid
   1240	 * transitions.
   1241	 */
   1242	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
   1243	mutex_unlock(&gt->tlb_invalidate_lock);
   1244}