amdgpu_gfx.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
amdgpu_gfx.c (22035B)
      1/*
      2 * Copyright 2014 Advanced Micro Devices, Inc.
      3 * Copyright 2008 Red Hat Inc.
      4 * Copyright 2009 Jerome Glisse.
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a
      7 * copy of this software and associated documentation files (the "Software"),
      8 * to deal in the Software without restriction, including without limitation
      9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10 * and/or sell copies of the Software, and to permit persons to whom the
     11 * Software is furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in
     14 * all copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22 * OTHER DEALINGS IN THE SOFTWARE.
     23 *
     24 */
     25
     26#include "amdgpu.h"
     27#include "amdgpu_gfx.h"
     28#include "amdgpu_rlc.h"
     29#include "amdgpu_ras.h"
     30
     31/* delay 0.1 second to enable gfx off feature */
     32#define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
     33
     34#define GFX_OFF_NO_DELAY 0
     35
     36/*
     37 * GPU GFX IP block helpers function.
     38 */
     39
     40int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
     41				int pipe, int queue)
     42{
     43	int bit = 0;
     44
     45	bit += mec * adev->gfx.mec.num_pipe_per_mec
     46		* adev->gfx.mec.num_queue_per_pipe;
     47	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
     48	bit += queue;
     49
     50	return bit;
     51}
     52
     53void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
     54				 int *mec, int *pipe, int *queue)
     55{
     56	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
     57	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
     58		% adev->gfx.mec.num_pipe_per_mec;
     59	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
     60	       / adev->gfx.mec.num_pipe_per_mec;
     61
     62}
     63
     64bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
     65				     int mec, int pipe, int queue)
     66{
     67	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
     68			adev->gfx.mec.queue_bitmap);
     69}
     70
     71int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
     72			       int me, int pipe, int queue)
     73{
     74	int bit = 0;
     75
     76	bit += me * adev->gfx.me.num_pipe_per_me
     77		* adev->gfx.me.num_queue_per_pipe;
     78	bit += pipe * adev->gfx.me.num_queue_per_pipe;
     79	bit += queue;
     80
     81	return bit;
     82}
     83
     84void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
     85				int *me, int *pipe, int *queue)
     86{
     87	*queue = bit % adev->gfx.me.num_queue_per_pipe;
     88	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
     89		% adev->gfx.me.num_pipe_per_me;
     90	*me = (bit / adev->gfx.me.num_queue_per_pipe)
     91		/ adev->gfx.me.num_pipe_per_me;
     92}
     93
     94bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
     95				    int me, int pipe, int queue)
     96{
     97	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
     98			adev->gfx.me.queue_bitmap);
     99}
    100
    101/**
    102 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
    103 *
    104 * @mask: array in which the per-shader array disable masks will be stored
    105 * @max_se: number of SEs
    106 * @max_sh: number of SHs
    107 *
    108 * The bitmask of CUs to be disabled in the shader array determined by se and
    109 * sh is stored in mask[se * max_sh + sh].
    110 */
    111void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
    112{
    113	unsigned se, sh, cu;
    114	const char *p;
    115
    116	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
    117
    118	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
    119		return;
    120
    121	p = amdgpu_disable_cu;
    122	for (;;) {
    123		char *next;
    124		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
    125		if (ret < 3) {
    126			DRM_ERROR("amdgpu: could not parse disable_cu\n");
    127			return;
    128		}
    129
    130		if (se < max_se && sh < max_sh && cu < 16) {
    131			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
    132			mask[se * max_sh + sh] |= 1u << cu;
    133		} else {
    134			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
    135				  se, sh, cu);
    136		}
    137
    138		next = strchr(p, ',');
    139		if (!next)
    140			break;
    141		p = next + 1;
    142	}
    143}
    144
    145static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
    146{
    147	if (amdgpu_compute_multipipe != -1) {
    148		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
    149			 amdgpu_compute_multipipe);
    150		return amdgpu_compute_multipipe == 1;
    151	}
    152
    153	/* FIXME: spreading the queues across pipes causes perf regressions
    154	 * on POLARIS11 compute workloads */
    155	if (adev->asic_type == CHIP_POLARIS11)
    156		return false;
    157
    158	return adev->gfx.mec.num_mec > 1;
    159}
    160
    161bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
    162					       struct amdgpu_ring *ring)
    163{
    164	/* Policy: use 1st queue as high priority compute queue if we
    165	 * have more than one compute queue.
    166	 */
    167	if (adev->gfx.num_compute_rings > 1 &&
    168	    ring == &adev->gfx.compute_ring[0])
    169		return true;
    170
    171	return false;
    172}
    173
    174void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
    175{
    176	int i, queue, pipe;
    177	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
    178	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
    179				     adev->gfx.mec.num_queue_per_pipe,
    180				     adev->gfx.num_compute_rings);
    181
    182	if (multipipe_policy) {
    183		/* policy: make queues evenly cross all pipes on MEC1 only */
    184		for (i = 0; i < max_queues_per_mec; i++) {
    185			pipe = i % adev->gfx.mec.num_pipe_per_mec;
    186			queue = (i / adev->gfx.mec.num_pipe_per_mec) %
    187				adev->gfx.mec.num_queue_per_pipe;
    188
    189			set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
    190					adev->gfx.mec.queue_bitmap);
    191		}
    192	} else {
    193		/* policy: amdgpu owns all queues in the given pipe */
    194		for (i = 0; i < max_queues_per_mec; ++i)
    195			set_bit(i, adev->gfx.mec.queue_bitmap);
    196	}
    197
    198	dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
    199}
    200
    201void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
    202{
    203	int i, queue, me;
    204
    205	for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
    206		queue = i % adev->gfx.me.num_queue_per_pipe;
    207		me = (i / adev->gfx.me.num_queue_per_pipe)
    208		      / adev->gfx.me.num_pipe_per_me;
    209
    210		if (me >= adev->gfx.me.num_me)
    211			break;
    212		/* policy: amdgpu owns the first queue per pipe at this stage
    213		 * will extend to mulitple queues per pipe later */
    214		if (me == 0 && queue < 1)
    215			set_bit(i, adev->gfx.me.queue_bitmap);
    216	}
    217
    218	/* update the number of active graphics rings */
    219	adev->gfx.num_gfx_rings =
    220		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
    221}
    222
    223static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
    224				  struct amdgpu_ring *ring)
    225{
    226	int queue_bit;
    227	int mec, pipe, queue;
    228
    229	queue_bit = adev->gfx.mec.num_mec
    230		    * adev->gfx.mec.num_pipe_per_mec
    231		    * adev->gfx.mec.num_queue_per_pipe;
    232
    233	while (--queue_bit >= 0) {
    234		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
    235			continue;
    236
    237		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
    238
    239		/*
    240		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
    241		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
    242		 * only can be issued on queue 0.
    243		 */
    244		if ((mec == 1 && pipe > 1) || queue != 0)
    245			continue;
    246
    247		ring->me = mec + 1;
    248		ring->pipe = pipe;
    249		ring->queue = queue;
    250
    251		return 0;
    252	}
    253
    254	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
    255	return -EINVAL;
    256}
    257
    258int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
    259			     struct amdgpu_ring *ring,
    260			     struct amdgpu_irq_src *irq)
    261{
    262	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    263	int r = 0;
    264
    265	spin_lock_init(&kiq->ring_lock);
    266
    267	ring->adev = NULL;
    268	ring->ring_obj = NULL;
    269	ring->use_doorbell = true;
    270	ring->doorbell_index = adev->doorbell_index.kiq;
    271
    272	r = amdgpu_gfx_kiq_acquire(adev, ring);
    273	if (r)
    274		return r;
    275
    276	ring->eop_gpu_addr = kiq->eop_gpu_addr;
    277	ring->no_scheduler = true;
    278	sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
    279	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
    280			     AMDGPU_RING_PRIO_DEFAULT, NULL);
    281	if (r)
    282		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
    283
    284	return r;
    285}
    286
    287void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
    288{
    289	amdgpu_ring_fini(ring);
    290}
    291
    292void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
    293{
    294	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    295
    296	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
    297}
    298
    299int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
    300			unsigned hpd_size)
    301{
    302	int r;
    303	u32 *hpd;
    304	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    305
    306	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
    307				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
    308				    &kiq->eop_gpu_addr, (void **)&hpd);
    309	if (r) {
    310		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
    311		return r;
    312	}
    313
    314	memset(hpd, 0, hpd_size);
    315
    316	r = amdgpu_bo_reserve(kiq->eop_obj, true);
    317	if (unlikely(r != 0))
    318		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
    319	amdgpu_bo_kunmap(kiq->eop_obj);
    320	amdgpu_bo_unreserve(kiq->eop_obj);
    321
    322	return 0;
    323}
    324
    325/* create MQD for each compute/gfx queue */
    326int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
    327			   unsigned mqd_size)
    328{
    329	struct amdgpu_ring *ring = NULL;
    330	int r, i;
    331
    332	/* create MQD for KIQ */
    333	ring = &adev->gfx.kiq.ring;
    334	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
    335		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
    336		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
    337		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
    338		 * KIQ MQD no matter SRIOV or Bare-metal
    339		 */
    340		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
    341					    AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
    342					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
    343		if (r) {
    344			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
    345			return r;
    346		}
    347
    348		/* prepare MQD backup */
    349		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
    350		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
    351				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
    352	}
    353
    354	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
    355		/* create MQD for each KGQ */
    356		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    357			ring = &adev->gfx.gfx_ring[i];
    358			if (!ring->mqd_obj) {
    359				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
    360							    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
    361							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
    362				if (r) {
    363					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
    364					return r;
    365				}
    366
    367				/* prepare MQD backup */
    368				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
    369				if (!adev->gfx.me.mqd_backup[i])
    370					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
    371			}
    372		}
    373	}
    374
    375	/* create MQD for each KCQ */
    376	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
    377		ring = &adev->gfx.compute_ring[i];
    378		if (!ring->mqd_obj) {
    379			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
    380						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
    381						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
    382			if (r) {
    383				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
    384				return r;
    385			}
    386
    387			/* prepare MQD backup */
    388			adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
    389			if (!adev->gfx.mec.mqd_backup[i])
    390				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
    391		}
    392	}
    393
    394	return 0;
    395}
    396
    397void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
    398{
    399	struct amdgpu_ring *ring = NULL;
    400	int i;
    401
    402	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
    403		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    404			ring = &adev->gfx.gfx_ring[i];
    405			kfree(adev->gfx.me.mqd_backup[i]);
    406			amdgpu_bo_free_kernel(&ring->mqd_obj,
    407					      &ring->mqd_gpu_addr,
    408					      &ring->mqd_ptr);
    409		}
    410	}
    411
    412	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
    413		ring = &adev->gfx.compute_ring[i];
    414		kfree(adev->gfx.mec.mqd_backup[i]);
    415		amdgpu_bo_free_kernel(&ring->mqd_obj,
    416				      &ring->mqd_gpu_addr,
    417				      &ring->mqd_ptr);
    418	}
    419
    420	ring = &adev->gfx.kiq.ring;
    421	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
    422	amdgpu_bo_free_kernel(&ring->mqd_obj,
    423			      &ring->mqd_gpu_addr,
    424			      &ring->mqd_ptr);
    425}
    426
    427int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
    428{
    429	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    430	struct amdgpu_ring *kiq_ring = &kiq->ring;
    431	int i, r = 0;
    432
    433	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
    434		return -EINVAL;
    435
    436	spin_lock(&adev->gfx.kiq.ring_lock);
    437	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
    438					adev->gfx.num_compute_rings)) {
    439		spin_unlock(&adev->gfx.kiq.ring_lock);
    440		return -ENOMEM;
    441	}
    442
    443	for (i = 0; i < adev->gfx.num_compute_rings; i++)
    444		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
    445					   RESET_QUEUES, 0, 0);
    446
    447	if (adev->gfx.kiq.ring.sched.ready)
    448		r = amdgpu_ring_test_helper(kiq_ring);
    449	spin_unlock(&adev->gfx.kiq.ring_lock);
    450
    451	return r;
    452}
    453
    454int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
    455					int queue_bit)
    456{
    457	int mec, pipe, queue;
    458	int set_resource_bit = 0;
    459
    460	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
    461
    462	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
    463
    464	return set_resource_bit;
    465}
    466
    467int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
    468{
    469	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    470	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
    471	uint64_t queue_mask = 0;
    472	int r, i;
    473
    474	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
    475		return -EINVAL;
    476
    477	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
    478		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
    479			continue;
    480
    481		/* This situation may be hit in the future if a new HW
    482		 * generation exposes more than 64 queues. If so, the
    483		 * definition of queue_mask needs updating */
    484		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
    485			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
    486			break;
    487		}
    488
    489		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
    490	}
    491
    492	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
    493							kiq_ring->queue);
    494	spin_lock(&adev->gfx.kiq.ring_lock);
    495	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
    496					adev->gfx.num_compute_rings +
    497					kiq->pmf->set_resources_size);
    498	if (r) {
    499		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
    500		spin_unlock(&adev->gfx.kiq.ring_lock);
    501		return r;
    502	}
    503
    504	if (adev->enable_mes)
    505		queue_mask = ~0ULL;
    506
    507	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
    508	for (i = 0; i < adev->gfx.num_compute_rings; i++)
    509		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
    510
    511	r = amdgpu_ring_test_helper(kiq_ring);
    512	spin_unlock(&adev->gfx.kiq.ring_lock);
    513	if (r)
    514		DRM_ERROR("KCQ enable failed\n");
    515
    516	return r;
    517}
    518
    519/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
    520 *
    521 * @adev: amdgpu_device pointer
    522 * @bool enable true: enable gfx off feature, false: disable gfx off feature
    523 *
    524 * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
    525 * 2. other client can send request to disable gfx off feature, the request should be honored.
    526 * 3. other client can cancel their request of disable gfx off feature
    527 * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
    528 */
    529
    530void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
    531{
    532	unsigned long delay = GFX_OFF_DELAY_ENABLE;
    533
    534	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
    535		return;
    536
    537	mutex_lock(&adev->gfx.gfx_off_mutex);
    538
    539	if (enable) {
    540		/* If the count is already 0, it means there's an imbalance bug somewhere.
    541		 * Note that the bug may be in a different caller than the one which triggers the
    542		 * WARN_ON_ONCE.
    543		 */
    544		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
    545			goto unlock;
    546
    547		adev->gfx.gfx_off_req_count--;
    548
    549		if (adev->gfx.gfx_off_req_count == 0 &&
    550		    !adev->gfx.gfx_off_state) {
    551			/* If going to s2idle, no need to wait */
    552			if (adev->in_s0ix)
    553				delay = GFX_OFF_NO_DELAY;
    554			schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
    555					      delay);
    556		}
    557	} else {
    558		if (adev->gfx.gfx_off_req_count == 0) {
    559			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
    560
    561			if (adev->gfx.gfx_off_state &&
    562			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
    563				adev->gfx.gfx_off_state = false;
    564
    565				if (adev->gfx.funcs->init_spm_golden) {
    566					dev_dbg(adev->dev,
    567						"GFXOFF is disabled, re-init SPM golden settings\n");
    568					amdgpu_gfx_init_spm_golden(adev);
    569				}
    570			}
    571		}
    572
    573		adev->gfx.gfx_off_req_count++;
    574	}
    575
    576unlock:
    577	mutex_unlock(&adev->gfx.gfx_off_mutex);
    578}
    579
    580int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
    581{
    582
    583	int r = 0;
    584
    585	mutex_lock(&adev->gfx.gfx_off_mutex);
    586
    587	r = amdgpu_dpm_get_status_gfxoff(adev, value);
    588
    589	mutex_unlock(&adev->gfx.gfx_off_mutex);
    590
    591	return r;
    592}
    593
    594int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
    595{
    596	int r;
    597
    598	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
    599		if (!amdgpu_persistent_edc_harvesting_supported(adev))
    600			amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
    601
    602		r = amdgpu_ras_block_late_init(adev, ras_block);
    603		if (r)
    604			return r;
    605
    606		r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
    607		if (r)
    608			goto late_fini;
    609	} else {
    610		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
    611	}
    612
    613	return 0;
    614late_fini:
    615	amdgpu_ras_block_late_fini(adev, ras_block);
    616	return r;
    617}
    618
    619int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
    620		void *err_data,
    621		struct amdgpu_iv_entry *entry)
    622{
    623	/* TODO ue will trigger an interrupt.
    624	 *
    625	 * When “Full RAS” is enabled, the per-IP interrupt sources should
    626	 * be disabled and the driver should only look for the aggregated
    627	 * interrupt via sync flood
    628	 */
    629	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
    630		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
    631		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
    632		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
    633			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
    634		amdgpu_ras_reset_gpu(adev);
    635	}
    636	return AMDGPU_RAS_SUCCESS;
    637}
    638
    639int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
    640				  struct amdgpu_irq_src *source,
    641				  struct amdgpu_iv_entry *entry)
    642{
    643	struct ras_common_if *ras_if = adev->gfx.ras_if;
    644	struct ras_dispatch_if ih_data = {
    645		.entry = entry,
    646	};
    647
    648	if (!ras_if)
    649		return 0;
    650
    651	ih_data.head = *ras_if;
    652
    653	DRM_ERROR("CP ECC ERROR IRQ\n");
    654	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
    655	return 0;
    656}
    657
    658uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
    659{
    660	signed long r, cnt = 0;
    661	unsigned long flags;
    662	uint32_t seq, reg_val_offs = 0, value = 0;
    663	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    664	struct amdgpu_ring *ring = &kiq->ring;
    665
    666	if (amdgpu_device_skip_hw_access(adev))
    667		return 0;
    668
    669	BUG_ON(!ring->funcs->emit_rreg);
    670
    671	spin_lock_irqsave(&kiq->ring_lock, flags);
    672	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
    673		pr_err("critical bug! too many kiq readers\n");
    674		goto failed_unlock;
    675	}
    676	amdgpu_ring_alloc(ring, 32);
    677	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
    678	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
    679	if (r)
    680		goto failed_undo;
    681
    682	amdgpu_ring_commit(ring);
    683	spin_unlock_irqrestore(&kiq->ring_lock, flags);
    684
    685	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
    686
    687	/* don't wait anymore for gpu reset case because this way may
    688	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
    689	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
    690	 * never return if we keep waiting in virt_kiq_rreg, which cause
    691	 * gpu_recover() hang there.
    692	 *
    693	 * also don't wait anymore for IRQ context
    694	 * */
    695	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
    696		goto failed_kiq_read;
    697
    698	might_sleep();
    699	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
    700		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
    701		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
    702	}
    703
    704	if (cnt > MAX_KIQ_REG_TRY)
    705		goto failed_kiq_read;
    706
    707	mb();
    708	value = adev->wb.wb[reg_val_offs];
    709	amdgpu_device_wb_free(adev, reg_val_offs);
    710	return value;
    711
    712failed_undo:
    713	amdgpu_ring_undo(ring);
    714failed_unlock:
    715	spin_unlock_irqrestore(&kiq->ring_lock, flags);
    716failed_kiq_read:
    717	if (reg_val_offs)
    718		amdgpu_device_wb_free(adev, reg_val_offs);
    719	dev_err(adev->dev, "failed to read reg:%x\n", reg);
    720	return ~0;
    721}
    722
    723void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
    724{
    725	signed long r, cnt = 0;
    726	unsigned long flags;
    727	uint32_t seq;
    728	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    729	struct amdgpu_ring *ring = &kiq->ring;
    730
    731	BUG_ON(!ring->funcs->emit_wreg);
    732
    733	if (amdgpu_device_skip_hw_access(adev))
    734		return;
    735
    736	spin_lock_irqsave(&kiq->ring_lock, flags);
    737	amdgpu_ring_alloc(ring, 32);
    738	amdgpu_ring_emit_wreg(ring, reg, v);
    739	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
    740	if (r)
    741		goto failed_undo;
    742
    743	amdgpu_ring_commit(ring);
    744	spin_unlock_irqrestore(&kiq->ring_lock, flags);
    745
    746	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
    747
    748	/* don't wait anymore for gpu reset case because this way may
    749	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
    750	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
    751	 * never return if we keep waiting in virt_kiq_rreg, which cause
    752	 * gpu_recover() hang there.
    753	 *
    754	 * also don't wait anymore for IRQ context
    755	 * */
    756	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
    757		goto failed_kiq_write;
    758
    759	might_sleep();
    760	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
    761
    762		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
    763		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
    764	}
    765
    766	if (cnt > MAX_KIQ_REG_TRY)
    767		goto failed_kiq_write;
    768
    769	return;
    770
    771failed_undo:
    772	amdgpu_ring_undo(ring);
    773	spin_unlock_irqrestore(&kiq->ring_lock, flags);
    774failed_kiq_write:
    775	dev_err(adev->dev, "failed to write reg:%x\n", reg);
    776}
    777
    778int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
    779{
    780	if (amdgpu_num_kcq == -1) {
    781		return 8;
    782	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
    783		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
    784		return 8;
    785	}
    786	return amdgpu_num_kcq;
    787}