cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

si_dma.c (8658B)


      1/*
      2 * Copyright 2013 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 * Authors: Alex Deucher
     23 */
     24
     25#include "radeon.h"
     26#include "radeon_asic.h"
     27#include "radeon_trace.h"
     28#include "si.h"
     29#include "sid.h"
     30
     31/**
     32 * si_dma_is_lockup - Check if the DMA engine is locked up
     33 *
     34 * @rdev: radeon_device pointer
     35 * @ring: radeon_ring structure holding ring information
     36 *
     37 * Check if the async DMA engine is locked up.
     38 * Returns true if the engine appears to be locked up, false if not.
     39 */
     40bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
     41{
     42	u32 reset_mask = si_gpu_check_soft_reset(rdev);
     43	u32 mask;
     44
     45	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
     46		mask = RADEON_RESET_DMA;
     47	else
     48		mask = RADEON_RESET_DMA1;
     49
     50	if (!(reset_mask & mask)) {
     51		radeon_ring_lockup_update(rdev, ring);
     52		return false;
     53	}
     54	return radeon_ring_test_lockup(rdev, ring);
     55}
     56
     57/**
     58 * si_dma_vm_copy_pages - update PTEs by copying them from the GART
     59 *
     60 * @rdev: radeon_device pointer
     61 * @ib: indirect buffer to fill with commands
     62 * @pe: addr of the page entry
     63 * @src: src addr where to copy from
     64 * @count: number of page entries to update
     65 *
     66 * Update PTEs by copying them from the GART using the DMA (SI).
     67 */
     68void si_dma_vm_copy_pages(struct radeon_device *rdev,
     69			  struct radeon_ib *ib,
     70			  uint64_t pe, uint64_t src,
     71			  unsigned count)
     72{
     73	while (count) {
     74		unsigned bytes = count * 8;
     75		if (bytes > 0xFFFF8)
     76			bytes = 0xFFFF8;
     77
     78		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
     79						      1, 0, 0, bytes);
     80		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
     81		ib->ptr[ib->length_dw++] = lower_32_bits(src);
     82		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
     83		ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
     84
     85		pe += bytes;
     86		src += bytes;
     87		count -= bytes / 8;
     88	}
     89}
     90
     91/**
     92 * si_dma_vm_write_pages - update PTEs by writing them manually
     93 *
     94 * @rdev: radeon_device pointer
     95 * @ib: indirect buffer to fill with commands
     96 * @pe: addr of the page entry
     97 * @addr: dst addr to write into pe
     98 * @count: number of page entries to update
     99 * @incr: increase next addr by incr bytes
    100 * @flags: access flags
    101 *
    102 * Update PTEs by writing them manually using the DMA (SI).
    103 */
    104void si_dma_vm_write_pages(struct radeon_device *rdev,
    105			   struct radeon_ib *ib,
    106			   uint64_t pe,
    107			   uint64_t addr, unsigned count,
    108			   uint32_t incr, uint32_t flags)
    109{
    110	uint64_t value;
    111	unsigned ndw;
    112
    113	while (count) {
    114		ndw = count * 2;
    115		if (ndw > 0xFFFFE)
    116			ndw = 0xFFFFE;
    117
    118		/* for non-physically contiguous pages (system) */
    119		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
    120		ib->ptr[ib->length_dw++] = pe;
    121		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
    122		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
    123			if (flags & R600_PTE_SYSTEM) {
    124				value = radeon_vm_map_gart(rdev, addr);
    125			} else if (flags & R600_PTE_VALID) {
    126				value = addr;
    127			} else {
    128				value = 0;
    129			}
    130			addr += incr;
    131			value |= flags;
    132			ib->ptr[ib->length_dw++] = value;
    133			ib->ptr[ib->length_dw++] = upper_32_bits(value);
    134		}
    135	}
    136}
    137
    138/**
    139 * si_dma_vm_set_pages - update the page tables using the DMA
    140 *
    141 * @rdev: radeon_device pointer
    142 * @ib: indirect buffer to fill with commands
    143 * @pe: addr of the page entry
    144 * @addr: dst addr to write into pe
    145 * @count: number of page entries to update
    146 * @incr: increase next addr by incr bytes
    147 * @flags: access flags
    148 *
    149 * Update the page tables using the DMA (SI).
    150 */
    151void si_dma_vm_set_pages(struct radeon_device *rdev,
    152			 struct radeon_ib *ib,
    153			 uint64_t pe,
    154			 uint64_t addr, unsigned count,
    155			 uint32_t incr, uint32_t flags)
    156{
    157	uint64_t value;
    158	unsigned ndw;
    159
    160	while (count) {
    161		ndw = count * 2;
    162		if (ndw > 0xFFFFE)
    163			ndw = 0xFFFFE;
    164
    165		if (flags & R600_PTE_VALID)
    166			value = addr;
    167		else
    168			value = 0;
    169
    170		/* for physically contiguous pages (vram) */
    171		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
    172		ib->ptr[ib->length_dw++] = pe; /* dst addr */
    173		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
    174		ib->ptr[ib->length_dw++] = flags; /* mask */
    175		ib->ptr[ib->length_dw++] = 0;
    176		ib->ptr[ib->length_dw++] = value; /* value */
    177		ib->ptr[ib->length_dw++] = upper_32_bits(value);
    178		ib->ptr[ib->length_dw++] = incr; /* increment size */
    179		ib->ptr[ib->length_dw++] = 0;
    180		pe += ndw * 4;
    181		addr += (ndw / 2) * incr;
    182		count -= ndw / 2;
    183	}
    184}
    185
    186void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
    187		     unsigned vm_id, uint64_t pd_addr)
    188
    189{
    190	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
    191	if (vm_id < 8) {
    192		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
    193	} else {
    194		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));
    195	}
    196	radeon_ring_write(ring, pd_addr >> 12);
    197
    198	/* flush hdp cache */
    199	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
    200	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
    201	radeon_ring_write(ring, 1);
    202
    203	/* bits 0-7 are the VM contexts0-7 */
    204	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
    205	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
    206	radeon_ring_write(ring, 1 << vm_id);
    207
    208	/* wait for invalidate to complete */
    209	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
    210	radeon_ring_write(ring, VM_INVALIDATE_REQUEST);
    211	radeon_ring_write(ring, 0xff << 16); /* retry */
    212	radeon_ring_write(ring, 1 << vm_id); /* mask */
    213	radeon_ring_write(ring, 0); /* value */
    214	radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
    215}
    216
    217/**
    218 * si_copy_dma - copy pages using the DMA engine
    219 *
    220 * @rdev: radeon_device pointer
    221 * @src_offset: src GPU address
    222 * @dst_offset: dst GPU address
    223 * @num_gpu_pages: number of GPU pages to xfer
    224 * @resv: reservation object to sync to
    225 *
    226 * Copy GPU paging using the DMA engine (SI).
    227 * Used by the radeon ttm implementation to move pages if
    228 * registered as the asic copy callback.
    229 */
    230struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
    231				 uint64_t src_offset, uint64_t dst_offset,
    232				 unsigned num_gpu_pages,
    233				 struct dma_resv *resv)
    234{
    235	struct radeon_fence *fence;
    236	struct radeon_sync sync;
    237	int ring_index = rdev->asic->copy.dma_ring_index;
    238	struct radeon_ring *ring = &rdev->ring[ring_index];
    239	u32 size_in_bytes, cur_size_in_bytes;
    240	int i, num_loops;
    241	int r = 0;
    242
    243	radeon_sync_create(&sync);
    244
    245	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
    246	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
    247	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
    248	if (r) {
    249		DRM_ERROR("radeon: moving bo (%d).\n", r);
    250		radeon_sync_free(rdev, &sync, NULL);
    251		return ERR_PTR(r);
    252	}
    253
    254	radeon_sync_resv(rdev, &sync, resv, false);
    255	radeon_sync_rings(rdev, &sync, ring->idx);
    256
    257	for (i = 0; i < num_loops; i++) {
    258		cur_size_in_bytes = size_in_bytes;
    259		if (cur_size_in_bytes > 0xFFFFF)
    260			cur_size_in_bytes = 0xFFFFF;
    261		size_in_bytes -= cur_size_in_bytes;
    262		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
    263		radeon_ring_write(ring, lower_32_bits(dst_offset));
    264		radeon_ring_write(ring, lower_32_bits(src_offset));
    265		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
    266		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
    267		src_offset += cur_size_in_bytes;
    268		dst_offset += cur_size_in_bytes;
    269	}
    270
    271	r = radeon_fence_emit(rdev, &fence, ring->idx);
    272	if (r) {
    273		radeon_ring_unlock_undo(rdev, ring);
    274		radeon_sync_free(rdev, &sync, NULL);
    275		return ERR_PTR(r);
    276	}
    277
    278	radeon_ring_unlock_commit(rdev, ring, false);
    279	radeon_sync_free(rdev, &sync, fence);
    280
    281	return fence;
    282}
    283