gen2_engine_cs.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
gen2_engine_cs.c (7935B)
      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2020 Intel Corporation
      4 */
      5
      6#include "gen2_engine_cs.h"
      7#include "i915_drv.h"
      8#include "i915_reg.h"
      9#include "intel_engine.h"
     10#include "intel_engine_regs.h"
     11#include "intel_gpu_commands.h"
     12#include "intel_gt.h"
     13#include "intel_gt_irq.h"
     14#include "intel_ring.h"
     15
     16int gen2_emit_flush(struct i915_request *rq, u32 mode)
     17{
     18	unsigned int num_store_dw = 12;
     19	u32 cmd, *cs;
     20
     21	cmd = MI_FLUSH;
     22	if (mode & EMIT_INVALIDATE)
     23		cmd |= MI_READ_FLUSH;
     24
     25	cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
     26	if (IS_ERR(cs))
     27		return PTR_ERR(cs);
     28
     29	*cs++ = cmd;
     30	while (num_store_dw--) {
     31		*cs++ = MI_STORE_DWORD_INDEX;
     32		*cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
     33		*cs++ = 0;
     34		*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
     35	}
     36	*cs++ = cmd;
     37
     38	intel_ring_advance(rq, cs);
     39
     40	return 0;
     41}
     42
     43int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
     44{
     45	u32 cmd, *cs;
     46	int i;
     47
     48	/*
     49	 * read/write caches:
     50	 *
     51	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
     52	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
     53	 * also flushed at 2d versus 3d pipeline switches.
     54	 *
     55	 * read-only caches:
     56	 *
     57	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
     58	 * MI_READ_FLUSH is set, and is always flushed on 965.
     59	 *
     60	 * I915_GEM_DOMAIN_COMMAND may not exist?
     61	 *
     62	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
     63	 * invalidated when MI_EXE_FLUSH is set.
     64	 *
     65	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
     66	 * invalidated with every MI_FLUSH.
     67	 *
     68	 * TLBs:
     69	 *
     70	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
     71	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
     72	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
     73	 * are flushed at any MI_FLUSH.
     74	 */
     75
     76	cmd = MI_FLUSH;
     77	if (mode & EMIT_INVALIDATE) {
     78		cmd |= MI_EXE_FLUSH;
     79		if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
     80			cmd |= MI_INVALIDATE_ISP;
     81	}
     82
     83	i = 2;
     84	if (mode & EMIT_INVALIDATE)
     85		i += 20;
     86
     87	cs = intel_ring_begin(rq, i);
     88	if (IS_ERR(cs))
     89		return PTR_ERR(cs);
     90
     91	*cs++ = cmd;
     92
     93	/*
     94	 * A random delay to let the CS invalidate take effect? Without this
     95	 * delay, the GPU relocation path fails as the CS does not see
     96	 * the updated contents. Just as important, if we apply the flushes
     97	 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
     98	 * write and before the invalidate on the next batch), the relocations
     99	 * still fail. This implies that is a delay following invalidation
    100	 * that is required to reset the caches as opposed to a delay to
    101	 * ensure the memory is written.
    102	 */
    103	if (mode & EMIT_INVALIDATE) {
    104		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
    105		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
    106						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
    107			PIPE_CONTROL_GLOBAL_GTT;
    108		*cs++ = 0;
    109		*cs++ = 0;
    110
    111		for (i = 0; i < 12; i++)
    112			*cs++ = MI_FLUSH;
    113
    114		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
    115		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
    116						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
    117			PIPE_CONTROL_GLOBAL_GTT;
    118		*cs++ = 0;
    119		*cs++ = 0;
    120	}
    121
    122	*cs++ = cmd;
    123
    124	intel_ring_advance(rq, cs);
    125
    126	return 0;
    127}
    128
    129int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
    130{
    131	u32 *cs;
    132
    133	cs = intel_ring_begin(rq, 2);
    134	if (IS_ERR(cs))
    135		return PTR_ERR(cs);
    136
    137	*cs++ = MI_FLUSH;
    138	*cs++ = MI_NOOP;
    139	intel_ring_advance(rq, cs);
    140
    141	return 0;
    142}
    143
    144static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
    145				   int flush, int post)
    146{
    147	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
    148	GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
    149
    150	*cs++ = MI_FLUSH;
    151
    152	while (flush--) {
    153		*cs++ = MI_STORE_DWORD_INDEX;
    154		*cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
    155		*cs++ = rq->fence.seqno;
    156	}
    157
    158	while (post--) {
    159		*cs++ = MI_STORE_DWORD_INDEX;
    160		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
    161		*cs++ = rq->fence.seqno;
    162	}
    163
    164	*cs++ = MI_USER_INTERRUPT;
    165
    166	rq->tail = intel_ring_offset(rq, cs);
    167	assert_ring_tail_valid(rq->ring, rq->tail);
    168
    169	return cs;
    170}
    171
    172u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
    173{
    174	return __gen2_emit_breadcrumb(rq, cs, 16, 8);
    175}
    176
    177u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
    178{
    179	return __gen2_emit_breadcrumb(rq, cs, 8, 8);
    180}
    181
    182/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
    183#define I830_BATCH_LIMIT SZ_256K
    184#define I830_TLB_ENTRIES (2)
    185#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
    186int i830_emit_bb_start(struct i915_request *rq,
    187		       u64 offset, u32 len,
    188		       unsigned int dispatch_flags)
    189{
    190	u32 *cs, cs_offset =
    191		intel_gt_scratch_offset(rq->engine->gt,
    192					INTEL_GT_SCRATCH_FIELD_DEFAULT);
    193
    194	GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
    195
    196	cs = intel_ring_begin(rq, 6);
    197	if (IS_ERR(cs))
    198		return PTR_ERR(cs);
    199
    200	/* Evict the invalid PTE TLBs */
    201	*cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
    202	*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
    203	*cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
    204	*cs++ = cs_offset;
    205	*cs++ = 0xdeadbeef;
    206	*cs++ = MI_NOOP;
    207	intel_ring_advance(rq, cs);
    208
    209	if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
    210		if (len > I830_BATCH_LIMIT)
    211			return -ENOSPC;
    212
    213		cs = intel_ring_begin(rq, 6 + 2);
    214		if (IS_ERR(cs))
    215			return PTR_ERR(cs);
    216
    217		/*
    218		 * Blit the batch (which has now all relocs applied) to the
    219		 * stable batch scratch bo area (so that the CS never
    220		 * stumbles over its tlb invalidation bug) ...
    221		 */
    222		*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
    223		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
    224		*cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
    225		*cs++ = cs_offset;
    226		*cs++ = 4096;
    227		*cs++ = offset;
    228
    229		*cs++ = MI_FLUSH;
    230		*cs++ = MI_NOOP;
    231		intel_ring_advance(rq, cs);
    232
    233		/* ... and execute it. */
    234		offset = cs_offset;
    235	}
    236
    237	if (!(dispatch_flags & I915_DISPATCH_SECURE))
    238		offset |= MI_BATCH_NON_SECURE;
    239
    240	cs = intel_ring_begin(rq, 2);
    241	if (IS_ERR(cs))
    242		return PTR_ERR(cs);
    243
    244	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
    245	*cs++ = offset;
    246	intel_ring_advance(rq, cs);
    247
    248	return 0;
    249}
    250
    251int gen3_emit_bb_start(struct i915_request *rq,
    252		       u64 offset, u32 len,
    253		       unsigned int dispatch_flags)
    254{
    255	u32 *cs;
    256
    257	if (!(dispatch_flags & I915_DISPATCH_SECURE))
    258		offset |= MI_BATCH_NON_SECURE;
    259
    260	cs = intel_ring_begin(rq, 2);
    261	if (IS_ERR(cs))
    262		return PTR_ERR(cs);
    263
    264	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
    265	*cs++ = offset;
    266	intel_ring_advance(rq, cs);
    267
    268	return 0;
    269}
    270
    271int gen4_emit_bb_start(struct i915_request *rq,
    272		       u64 offset, u32 length,
    273		       unsigned int dispatch_flags)
    274{
    275	u32 security;
    276	u32 *cs;
    277
    278	security = MI_BATCH_NON_SECURE_I965;
    279	if (dispatch_flags & I915_DISPATCH_SECURE)
    280		security = 0;
    281
    282	cs = intel_ring_begin(rq, 2);
    283	if (IS_ERR(cs))
    284		return PTR_ERR(cs);
    285
    286	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
    287	*cs++ = offset;
    288	intel_ring_advance(rq, cs);
    289
    290	return 0;
    291}
    292
    293void gen2_irq_enable(struct intel_engine_cs *engine)
    294{
    295	struct drm_i915_private *i915 = engine->i915;
    296
    297	i915->irq_mask &= ~engine->irq_enable_mask;
    298	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
    299	ENGINE_POSTING_READ16(engine, RING_IMR);
    300}
    301
    302void gen2_irq_disable(struct intel_engine_cs *engine)
    303{
    304	struct drm_i915_private *i915 = engine->i915;
    305
    306	i915->irq_mask |= engine->irq_enable_mask;
    307	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
    308}
    309
    310void gen3_irq_enable(struct intel_engine_cs *engine)
    311{
    312	engine->i915->irq_mask &= ~engine->irq_enable_mask;
    313	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
    314	intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
    315}
    316
    317void gen3_irq_disable(struct intel_engine_cs *engine)
    318{
    319	engine->i915->irq_mask |= engine->irq_enable_mask;
    320	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
    321}
    322
    323void gen5_irq_enable(struct intel_engine_cs *engine)
    324{
    325	gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
    326}
    327
    328void gen5_irq_disable(struct intel_engine_cs *engine)
    329{
    330	gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
    331}