cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

radeon_ring.c (15764B)


      1/*
      2 * Copyright 2008 Advanced Micro Devices, Inc.
      3 * Copyright 2008 Red Hat Inc.
      4 * Copyright 2009 Jerome Glisse.
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a
      7 * copy of this software and associated documentation files (the "Software"),
      8 * to deal in the Software without restriction, including without limitation
      9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10 * and/or sell copies of the Software, and to permit persons to whom the
     11 * Software is furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in
     14 * all copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22 * OTHER DEALINGS IN THE SOFTWARE.
     23 *
     24 * Authors: Dave Airlie
     25 *          Alex Deucher
     26 *          Jerome Glisse
     27 *          Christian König
     28 */
     29
     30#include <drm/drm_device.h>
     31#include <drm/drm_file.h>
     32
     33#include "radeon.h"
     34
     35/*
     36 * Rings
     37 * Most engines on the GPU are fed via ring buffers.  Ring
     38 * buffers are areas of GPU accessible memory that the host
     39 * writes commands into and the GPU reads commands out of.
     40 * There is a rptr (read pointer) that determines where the
     41 * GPU is currently reading, and a wptr (write pointer)
     42 * which determines where the host has written.  When the
     43 * pointers are equal, the ring is idle.  When the host
     44 * writes commands to the ring buffer, it increments the
     45 * wptr.  The GPU then starts fetching commands and executes
     46 * them until the pointers are equal again.
     47 */
     48static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);
     49
     50/**
     51 * radeon_ring_supports_scratch_reg - check if the ring supports
     52 * writing to scratch registers
     53 *
     54 * @rdev: radeon_device pointer
     55 * @ring: radeon_ring structure holding ring information
     56 *
     57 * Check if a specific ring supports writing to scratch registers (all asics).
     58 * Returns true if the ring supports writing to scratch regs, false if not.
     59 */
     60bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
     61				      struct radeon_ring *ring)
     62{
     63	switch (ring->idx) {
     64	case RADEON_RING_TYPE_GFX_INDEX:
     65	case CAYMAN_RING_TYPE_CP1_INDEX:
     66	case CAYMAN_RING_TYPE_CP2_INDEX:
     67		return true;
     68	default:
     69		return false;
     70	}
     71}
     72
     73/**
     74 * radeon_ring_free_size - update the free size
     75 *
     76 * @rdev: radeon_device pointer
     77 * @ring: radeon_ring structure holding ring information
     78 *
     79 * Update the free dw slots in the ring buffer (all asics).
     80 */
     81void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
     82{
     83	uint32_t rptr = radeon_ring_get_rptr(rdev, ring);
     84
     85	/* This works because ring_size is a power of 2 */
     86	ring->ring_free_dw = rptr + (ring->ring_size / 4);
     87	ring->ring_free_dw -= ring->wptr;
     88	ring->ring_free_dw &= ring->ptr_mask;
     89	if (!ring->ring_free_dw) {
     90		/* this is an empty ring */
     91		ring->ring_free_dw = ring->ring_size / 4;
     92		/*  update lockup info to avoid false positive */
     93		radeon_ring_lockup_update(rdev, ring);
     94	}
     95}
     96
     97/**
     98 * radeon_ring_alloc - allocate space on the ring buffer
     99 *
    100 * @rdev: radeon_device pointer
    101 * @ring: radeon_ring structure holding ring information
    102 * @ndw: number of dwords to allocate in the ring buffer
    103 *
    104 * Allocate @ndw dwords in the ring buffer (all asics).
    105 * Returns 0 on success, error on failure.
    106 */
    107int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
    108{
    109	int r;
    110
    111	/* make sure we aren't trying to allocate more space than there is on the ring */
    112	if (ndw > (ring->ring_size / 4))
    113		return -ENOMEM;
    114	/* Align requested size with padding so unlock_commit can
    115	 * pad safely */
    116	radeon_ring_free_size(rdev, ring);
    117	ndw = (ndw + ring->align_mask) & ~ring->align_mask;
    118	while (ndw > (ring->ring_free_dw - 1)) {
    119		radeon_ring_free_size(rdev, ring);
    120		if (ndw < ring->ring_free_dw) {
    121			break;
    122		}
    123		r = radeon_fence_wait_next(rdev, ring->idx);
    124		if (r)
    125			return r;
    126	}
    127	ring->count_dw = ndw;
    128	ring->wptr_old = ring->wptr;
    129	return 0;
    130}
    131
    132/**
    133 * radeon_ring_lock - lock the ring and allocate space on it
    134 *
    135 * @rdev: radeon_device pointer
    136 * @ring: radeon_ring structure holding ring information
    137 * @ndw: number of dwords to allocate in the ring buffer
    138 *
    139 * Lock the ring and allocate @ndw dwords in the ring buffer
    140 * (all asics).
    141 * Returns 0 on success, error on failure.
    142 */
    143int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
    144{
    145	int r;
    146
    147	mutex_lock(&rdev->ring_lock);
    148	r = radeon_ring_alloc(rdev, ring, ndw);
    149	if (r) {
    150		mutex_unlock(&rdev->ring_lock);
    151		return r;
    152	}
    153	return 0;
    154}
    155
    156/**
    157 * radeon_ring_commit - tell the GPU to execute the new
    158 * commands on the ring buffer
    159 *
    160 * @rdev: radeon_device pointer
    161 * @ring: radeon_ring structure holding ring information
    162 * @hdp_flush: Whether or not to perform an HDP cache flush
    163 *
    164 * Update the wptr (write pointer) to tell the GPU to
    165 * execute new commands on the ring buffer (all asics).
    166 */
    167void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring,
    168			bool hdp_flush)
    169{
    170	/* If we are emitting the HDP flush via the ring buffer, we need to
    171	 * do it before padding.
    172	 */
    173	if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush)
    174		rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring);
    175	/* We pad to match fetch size */
    176	while (ring->wptr & ring->align_mask) {
    177		radeon_ring_write(ring, ring->nop);
    178	}
    179	mb();
    180	/* If we are emitting the HDP flush via MMIO, we need to do it after
    181	 * all CPU writes to VRAM finished.
    182	 */
    183	if (hdp_flush && rdev->asic->mmio_hdp_flush)
    184		rdev->asic->mmio_hdp_flush(rdev);
    185	radeon_ring_set_wptr(rdev, ring);
    186}
    187
    188/**
    189 * radeon_ring_unlock_commit - tell the GPU to execute the new
    190 * commands on the ring buffer and unlock it
    191 *
    192 * @rdev: radeon_device pointer
    193 * @ring: radeon_ring structure holding ring information
    194 * @hdp_flush: Whether or not to perform an HDP cache flush
    195 *
    196 * Call radeon_ring_commit() then unlock the ring (all asics).
    197 */
    198void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring,
    199			       bool hdp_flush)
    200{
    201	radeon_ring_commit(rdev, ring, hdp_flush);
    202	mutex_unlock(&rdev->ring_lock);
    203}
    204
    205/**
    206 * radeon_ring_undo - reset the wptr
    207 *
    208 * @ring: radeon_ring structure holding ring information
    209 *
    210 * Reset the driver's copy of the wptr (all asics).
    211 */
    212void radeon_ring_undo(struct radeon_ring *ring)
    213{
    214	ring->wptr = ring->wptr_old;
    215}
    216
    217/**
    218 * radeon_ring_unlock_undo - reset the wptr and unlock the ring
    219 *
    220 * @rdev:       radeon device structure
    221 * @ring: radeon_ring structure holding ring information
    222 *
    223 * Call radeon_ring_undo() then unlock the ring (all asics).
    224 */
    225void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring)
    226{
    227	radeon_ring_undo(ring);
    228	mutex_unlock(&rdev->ring_lock);
    229}
    230
    231/**
    232 * radeon_ring_lockup_update - update lockup variables
    233 *
    234 * @rdev:       radeon device structure
    235 * @ring: radeon_ring structure holding ring information
    236 *
    237 * Update the last rptr value and timestamp (all asics).
    238 */
    239void radeon_ring_lockup_update(struct radeon_device *rdev,
    240			       struct radeon_ring *ring)
    241{
    242	atomic_set(&ring->last_rptr, radeon_ring_get_rptr(rdev, ring));
    243	atomic64_set(&ring->last_activity, jiffies_64);
    244}
    245
    246/**
    247 * radeon_ring_test_lockup() - check if ring is lockedup by recording information
    248 * @rdev:       radeon device structure
    249 * @ring:       radeon_ring structure holding ring information
    250 *
    251 */
    252bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
    253{
    254	uint32_t rptr = radeon_ring_get_rptr(rdev, ring);
    255	uint64_t last = atomic64_read(&ring->last_activity);
    256	uint64_t elapsed;
    257
    258	if (rptr != atomic_read(&ring->last_rptr)) {
    259		/* ring is still working, no lockup */
    260		radeon_ring_lockup_update(rdev, ring);
    261		return false;
    262	}
    263
    264	elapsed = jiffies_to_msecs(jiffies_64 - last);
    265	if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) {
    266		dev_err(rdev->dev, "ring %d stalled for more than %llumsec\n",
    267			ring->idx, elapsed);
    268		return true;
    269	}
    270	/* give a chance to the GPU ... */
    271	return false;
    272}
    273
    274/**
    275 * radeon_ring_backup - Back up the content of a ring
    276 *
    277 * @rdev: radeon_device pointer
    278 * @ring: the ring we want to back up
    279 * @data: placeholder for returned commit data
    280 *
    281 * Saves all unprocessed commits from a ring, returns the number of dwords saved.
    282 */
    283unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
    284			    uint32_t **data)
    285{
    286	unsigned size, ptr, i;
    287
    288	/* just in case lock the ring */
    289	mutex_lock(&rdev->ring_lock);
    290	*data = NULL;
    291
    292	if (ring->ring_obj == NULL) {
    293		mutex_unlock(&rdev->ring_lock);
    294		return 0;
    295	}
    296
    297	/* it doesn't make sense to save anything if all fences are signaled */
    298	if (!radeon_fence_count_emitted(rdev, ring->idx)) {
    299		mutex_unlock(&rdev->ring_lock);
    300		return 0;
    301	}
    302
    303	/* calculate the number of dw on the ring */
    304	if (ring->rptr_save_reg)
    305		ptr = RREG32(ring->rptr_save_reg);
    306	else if (rdev->wb.enabled)
    307		ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
    308	else {
    309		/* no way to read back the next rptr */
    310		mutex_unlock(&rdev->ring_lock);
    311		return 0;
    312	}
    313
    314	size = ring->wptr + (ring->ring_size / 4);
    315	size -= ptr;
    316	size &= ring->ptr_mask;
    317	if (size == 0) {
    318		mutex_unlock(&rdev->ring_lock);
    319		return 0;
    320	}
    321
    322	/* and then save the content of the ring */
    323	*data = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
    324	if (!*data) {
    325		mutex_unlock(&rdev->ring_lock);
    326		return 0;
    327	}
    328	for (i = 0; i < size; ++i) {
    329		(*data)[i] = ring->ring[ptr++];
    330		ptr &= ring->ptr_mask;
    331	}
    332
    333	mutex_unlock(&rdev->ring_lock);
    334	return size;
    335}
    336
    337/**
    338 * radeon_ring_restore - append saved commands to the ring again
    339 *
    340 * @rdev: radeon_device pointer
    341 * @ring: ring to append commands to
    342 * @size: number of dwords we want to write
    343 * @data: saved commands
    344 *
    345 * Allocates space on the ring and restore the previously saved commands.
    346 */
    347int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
    348			unsigned size, uint32_t *data)
    349{
    350	int i, r;
    351
    352	if (!size || !data)
    353		return 0;
    354
    355	/* restore the saved ring content */
    356	r = radeon_ring_lock(rdev, ring, size);
    357	if (r)
    358		return r;
    359
    360	for (i = 0; i < size; ++i) {
    361		radeon_ring_write(ring, data[i]);
    362	}
    363
    364	radeon_ring_unlock_commit(rdev, ring, false);
    365	kvfree(data);
    366	return 0;
    367}
    368
    369/**
    370 * radeon_ring_init - init driver ring struct.
    371 *
    372 * @rdev: radeon_device pointer
    373 * @ring: radeon_ring structure holding ring information
    374 * @ring_size: size of the ring
    375 * @rptr_offs: offset of the rptr writeback location in the WB buffer
    376 * @nop: nop packet for this ring
    377 *
    378 * Initialize the driver information for the selected ring (all asics).
    379 * Returns 0 on success, error on failure.
    380 */
    381int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size,
    382		     unsigned rptr_offs, u32 nop)
    383{
    384	int r;
    385
    386	ring->ring_size = ring_size;
    387	ring->rptr_offs = rptr_offs;
    388	ring->nop = nop;
    389	ring->rdev = rdev;
    390	/* Allocate ring buffer */
    391	if (ring->ring_obj == NULL) {
    392		r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
    393				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
    394				     NULL, &ring->ring_obj);
    395		if (r) {
    396			dev_err(rdev->dev, "(%d) ring create failed\n", r);
    397			return r;
    398		}
    399		r = radeon_bo_reserve(ring->ring_obj, false);
    400		if (unlikely(r != 0))
    401			return r;
    402		r = radeon_bo_pin(ring->ring_obj, RADEON_GEM_DOMAIN_GTT,
    403					&ring->gpu_addr);
    404		if (r) {
    405			radeon_bo_unreserve(ring->ring_obj);
    406			dev_err(rdev->dev, "(%d) ring pin failed\n", r);
    407			return r;
    408		}
    409		r = radeon_bo_kmap(ring->ring_obj,
    410				       (void **)&ring->ring);
    411		radeon_bo_unreserve(ring->ring_obj);
    412		if (r) {
    413			dev_err(rdev->dev, "(%d) ring map failed\n", r);
    414			return r;
    415		}
    416	}
    417	ring->ptr_mask = (ring->ring_size / 4) - 1;
    418	ring->ring_free_dw = ring->ring_size / 4;
    419	if (rdev->wb.enabled) {
    420		u32 index = RADEON_WB_RING0_NEXT_RPTR + (ring->idx * 4);
    421		ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index;
    422		ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4];
    423	}
    424	radeon_debugfs_ring_init(rdev, ring);
    425	radeon_ring_lockup_update(rdev, ring);
    426	return 0;
    427}
    428
    429/**
    430 * radeon_ring_fini - tear down the driver ring struct.
    431 *
    432 * @rdev: radeon_device pointer
    433 * @ring: radeon_ring structure holding ring information
    434 *
    435 * Tear down the driver information for the selected ring (all asics).
    436 */
    437void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring)
    438{
    439	int r;
    440	struct radeon_bo *ring_obj;
    441
    442	mutex_lock(&rdev->ring_lock);
    443	ring_obj = ring->ring_obj;
    444	ring->ready = false;
    445	ring->ring = NULL;
    446	ring->ring_obj = NULL;
    447	mutex_unlock(&rdev->ring_lock);
    448
    449	if (ring_obj) {
    450		r = radeon_bo_reserve(ring_obj, false);
    451		if (likely(r == 0)) {
    452			radeon_bo_kunmap(ring_obj);
    453			radeon_bo_unpin(ring_obj);
    454			radeon_bo_unreserve(ring_obj);
    455		}
    456		radeon_bo_unref(&ring_obj);
    457	}
    458}
    459
    460/*
    461 * Debugfs info
    462 */
    463#if defined(CONFIG_DEBUG_FS)
    464
    465static int radeon_debugfs_ring_info_show(struct seq_file *m, void *unused)
    466{
    467	struct radeon_ring *ring = (struct radeon_ring *) m->private;
    468	struct radeon_device *rdev = ring->rdev;
    469
    470	uint32_t rptr, wptr, rptr_next;
    471	unsigned count, i, j;
    472
    473	radeon_ring_free_size(rdev, ring);
    474	count = (ring->ring_size / 4) - ring->ring_free_dw;
    475
    476	wptr = radeon_ring_get_wptr(rdev, ring);
    477	seq_printf(m, "wptr: 0x%08x [%5d]\n",
    478		   wptr, wptr);
    479
    480	rptr = radeon_ring_get_rptr(rdev, ring);
    481	seq_printf(m, "rptr: 0x%08x [%5d]\n",
    482		   rptr, rptr);
    483
    484	if (ring->rptr_save_reg) {
    485		rptr_next = RREG32(ring->rptr_save_reg);
    486		seq_printf(m, "rptr next(0x%04x): 0x%08x [%5d]\n",
    487			   ring->rptr_save_reg, rptr_next, rptr_next);
    488	} else
    489		rptr_next = ~0;
    490
    491	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
    492		   ring->wptr, ring->wptr);
    493	seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
    494		   ring->last_semaphore_signal_addr);
    495	seq_printf(m, "last semaphore wait addr   : 0x%016llx\n",
    496		   ring->last_semaphore_wait_addr);
    497	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
    498	seq_printf(m, "%u dwords in ring\n", count);
    499
    500	if (!ring->ring)
    501		return 0;
    502
    503	/* print 8 dw before current rptr as often it's the last executed
    504	 * packet that is the root issue
    505	 */
    506	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
    507	for (j = 0; j <= (count + 32); j++) {
    508		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
    509		if (rptr == i)
    510			seq_puts(m, " *");
    511		if (rptr_next == i)
    512			seq_puts(m, " #");
    513		seq_puts(m, "\n");
    514		i = (i + 1) & ring->ptr_mask;
    515	}
    516	return 0;
    517}
    518
    519DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_ring_info);
    520
    521static const char *radeon_debugfs_ring_idx_to_name(uint32_t ridx)
    522{
    523	switch (ridx) {
    524	case RADEON_RING_TYPE_GFX_INDEX:
    525		return "radeon_ring_gfx";
    526	case CAYMAN_RING_TYPE_CP1_INDEX:
    527		return "radeon_ring_cp1";
    528	case CAYMAN_RING_TYPE_CP2_INDEX:
    529		return "radeon_ring_cp2";
    530	case R600_RING_TYPE_DMA_INDEX:
    531		return "radeon_ring_dma1";
    532	case CAYMAN_RING_TYPE_DMA1_INDEX:
    533		return "radeon_ring_dma2";
    534	case R600_RING_TYPE_UVD_INDEX:
    535		return "radeon_ring_uvd";
    536	case TN_RING_TYPE_VCE1_INDEX:
    537		return "radeon_ring_vce1";
    538	case TN_RING_TYPE_VCE2_INDEX:
    539		return "radeon_ring_vce2";
    540	default:
    541		return NULL;
    542
    543	}
    544}
    545#endif
    546
    547static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring)
    548{
    549#if defined(CONFIG_DEBUG_FS)
    550	const char *ring_name = radeon_debugfs_ring_idx_to_name(ring->idx);
    551	struct dentry *root = rdev->ddev->primary->debugfs_root;
    552
    553	if (ring_name)
    554		debugfs_create_file(ring_name, 0444, root, ring,
    555				    &radeon_debugfs_ring_info_fops);
    556
    557#endif
    558}