ni_dma.c (13489B)
1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 25#include "radeon.h" 26#include "radeon_asic.h" 27#include "radeon_trace.h" 28#include "ni.h" 29#include "nid.h" 30 31/* 32 * DMA 33 * Starting with R600, the GPU has an asynchronous 34 * DMA engine. The programming model is very similar 35 * to the 3D engine (ring buffer, IBs, etc.), but the 36 * DMA controller has it's own packet format that is 37 * different form the PM4 format used by the 3D engine. 38 * It supports copying data, writing embedded data, 39 * solid fills, and a number of other things. It also 40 * has support for tiling/detiling of buffers. 41 * Cayman and newer support two asynchronous DMA engines. 42 */ 43 44/** 45 * cayman_dma_get_rptr - get the current read pointer 46 * 47 * @rdev: radeon_device pointer 48 * @ring: radeon ring pointer 49 * 50 * Get the current rptr from the hardware (cayman+). 51 */ 52uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, 53 struct radeon_ring *ring) 54{ 55 u32 rptr, reg; 56 57 if (rdev->wb.enabled) { 58 rptr = rdev->wb.wb[ring->rptr_offs/4]; 59 } else { 60 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 61 reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; 62 else 63 reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; 64 65 rptr = RREG32(reg); 66 } 67 68 return (rptr & 0x3fffc) >> 2; 69} 70 71/** 72 * cayman_dma_get_wptr - get the current write pointer 73 * 74 * @rdev: radeon_device pointer 75 * @ring: radeon ring pointer 76 * 77 * Get the current wptr from the hardware (cayman+). 78 */ 79uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, 80 struct radeon_ring *ring) 81{ 82 u32 reg; 83 84 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 85 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 86 else 87 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 88 89 return (RREG32(reg) & 0x3fffc) >> 2; 90} 91 92/** 93 * cayman_dma_set_wptr - commit the write pointer 94 * 95 * @rdev: radeon_device pointer 96 * @ring: radeon ring pointer 97 * 98 * Write the wptr back to the hardware (cayman+). 99 */ 100void cayman_dma_set_wptr(struct radeon_device *rdev, 101 struct radeon_ring *ring) 102{ 103 u32 reg; 104 105 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 106 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 107 else 108 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 109 110 WREG32(reg, (ring->wptr << 2) & 0x3fffc); 111} 112 113/** 114 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 115 * 116 * @rdev: radeon_device pointer 117 * @ib: IB object to schedule 118 * 119 * Schedule an IB in the DMA ring (cayman-SI). 120 */ 121void cayman_dma_ring_ib_execute(struct radeon_device *rdev, 122 struct radeon_ib *ib) 123{ 124 struct radeon_ring *ring = &rdev->ring[ib->ring]; 125 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 126 127 if (rdev->wb.enabled) { 128 u32 next_rptr = ring->wptr + 4; 129 while ((next_rptr & 7) != 5) 130 next_rptr++; 131 next_rptr += 3; 132 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 133 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 134 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 135 radeon_ring_write(ring, next_rptr); 136 } 137 138 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 139 * Pad as necessary with NOPs. 140 */ 141 while ((ring->wptr & 7) != 5) 142 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 143 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 144 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 145 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 146 147} 148 149/** 150 * cayman_dma_stop - stop the async dma engines 151 * 152 * @rdev: radeon_device pointer 153 * 154 * Stop the async dma engines (cayman-SI). 155 */ 156void cayman_dma_stop(struct radeon_device *rdev) 157{ 158 u32 rb_cntl; 159 160 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 161 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 162 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 163 164 /* dma0 */ 165 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 166 rb_cntl &= ~DMA_RB_ENABLE; 167 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 168 169 /* dma1 */ 170 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 171 rb_cntl &= ~DMA_RB_ENABLE; 172 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 173 174 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 175 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 176} 177 178/** 179 * cayman_dma_resume - setup and start the async dma engines 180 * 181 * @rdev: radeon_device pointer 182 * 183 * Set up the DMA ring buffers and enable them. (cayman-SI). 184 * Returns 0 for success, error for failure. 185 */ 186int cayman_dma_resume(struct radeon_device *rdev) 187{ 188 struct radeon_ring *ring; 189 u32 rb_cntl, dma_cntl, ib_cntl; 190 u32 rb_bufsz; 191 u32 reg_offset, wb_offset; 192 int i, r; 193 194 for (i = 0; i < 2; i++) { 195 if (i == 0) { 196 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 197 reg_offset = DMA0_REGISTER_OFFSET; 198 wb_offset = R600_WB_DMA_RPTR_OFFSET; 199 } else { 200 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 201 reg_offset = DMA1_REGISTER_OFFSET; 202 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 203 } 204 205 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 206 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 207 208 /* Set ring buffer size in dwords */ 209 rb_bufsz = order_base_2(ring->ring_size / 4); 210 rb_cntl = rb_bufsz << 1; 211#ifdef __BIG_ENDIAN 212 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 213#endif 214 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 215 216 /* Initialize the ring buffer's read and write pointers */ 217 WREG32(DMA_RB_RPTR + reg_offset, 0); 218 WREG32(DMA_RB_WPTR + reg_offset, 0); 219 220 /* set the wb address whether it's enabled or not */ 221 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 222 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 223 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 224 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 225 226 if (rdev->wb.enabled) 227 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 228 229 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 230 231 /* enable DMA IBs */ 232 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 233#ifdef __BIG_ENDIAN 234 ib_cntl |= DMA_IB_SWAP_ENABLE; 235#endif 236 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 237 238 dma_cntl = RREG32(DMA_CNTL + reg_offset); 239 dma_cntl &= ~CTXEMPTY_INT_ENABLE; 240 WREG32(DMA_CNTL + reg_offset, dma_cntl); 241 242 ring->wptr = 0; 243 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 244 245 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 246 247 ring->ready = true; 248 249 r = radeon_ring_test(rdev, ring->idx, ring); 250 if (r) { 251 ring->ready = false; 252 return r; 253 } 254 } 255 256 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 257 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 258 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 259 260 return 0; 261} 262 263/** 264 * cayman_dma_fini - tear down the async dma engines 265 * 266 * @rdev: radeon_device pointer 267 * 268 * Stop the async dma engines and free the rings (cayman-SI). 269 */ 270void cayman_dma_fini(struct radeon_device *rdev) 271{ 272 cayman_dma_stop(rdev); 273 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 274 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 275} 276 277/** 278 * cayman_dma_is_lockup - Check if the DMA engine is locked up 279 * 280 * @rdev: radeon_device pointer 281 * @ring: radeon_ring structure holding ring information 282 * 283 * Check if the async DMA engine is locked up. 284 * Returns true if the engine appears to be locked up, false if not. 285 */ 286bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 287{ 288 u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 289 u32 mask; 290 291 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 292 mask = RADEON_RESET_DMA; 293 else 294 mask = RADEON_RESET_DMA1; 295 296 if (!(reset_mask & mask)) { 297 radeon_ring_lockup_update(rdev, ring); 298 return false; 299 } 300 return radeon_ring_test_lockup(rdev, ring); 301} 302 303/** 304 * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 305 * 306 * @rdev: radeon_device pointer 307 * @ib: indirect buffer to fill with commands 308 * @pe: addr of the page entry 309 * @src: src addr where to copy from 310 * @count: number of page entries to update 311 * 312 * Update PTEs by copying them from the GART using the DMA (cayman/TN). 313 */ 314void cayman_dma_vm_copy_pages(struct radeon_device *rdev, 315 struct radeon_ib *ib, 316 uint64_t pe, uint64_t src, 317 unsigned count) 318{ 319 unsigned ndw; 320 321 while (count) { 322 ndw = count * 2; 323 if (ndw > 0xFFFFE) 324 ndw = 0xFFFFE; 325 326 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 327 0, 0, ndw); 328 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 329 ib->ptr[ib->length_dw++] = lower_32_bits(src); 330 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 331 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 332 333 pe += ndw * 4; 334 src += ndw * 4; 335 count -= ndw / 2; 336 } 337} 338 339/** 340 * cayman_dma_vm_write_pages - update PTEs by writing them manually 341 * 342 * @rdev: radeon_device pointer 343 * @ib: indirect buffer to fill with commands 344 * @pe: addr of the page entry 345 * @addr: dst addr to write into pe 346 * @count: number of page entries to update 347 * @incr: increase next addr by incr bytes 348 * @flags: hw access flags 349 * 350 * Update PTEs by writing them manually using the DMA (cayman/TN). 351 */ 352void cayman_dma_vm_write_pages(struct radeon_device *rdev, 353 struct radeon_ib *ib, 354 uint64_t pe, 355 uint64_t addr, unsigned count, 356 uint32_t incr, uint32_t flags) 357{ 358 uint64_t value; 359 unsigned ndw; 360 361 while (count) { 362 ndw = count * 2; 363 if (ndw > 0xFFFFE) 364 ndw = 0xFFFFE; 365 366 /* for non-physically contiguous pages (system) */ 367 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 368 0, 0, ndw); 369 ib->ptr[ib->length_dw++] = pe; 370 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 371 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 372 if (flags & R600_PTE_SYSTEM) { 373 value = radeon_vm_map_gart(rdev, addr); 374 } else if (flags & R600_PTE_VALID) { 375 value = addr; 376 } else { 377 value = 0; 378 } 379 addr += incr; 380 value |= flags; 381 ib->ptr[ib->length_dw++] = value; 382 ib->ptr[ib->length_dw++] = upper_32_bits(value); 383 } 384 } 385} 386 387/** 388 * cayman_dma_vm_set_pages - update the page tables using the DMA 389 * 390 * @rdev: radeon_device pointer 391 * @ib: indirect buffer to fill with commands 392 * @pe: addr of the page entry 393 * @addr: dst addr to write into pe 394 * @count: number of page entries to update 395 * @incr: increase next addr by incr bytes 396 * @flags: hw access flags 397 * 398 * Update the page tables using the DMA (cayman/TN). 399 */ 400void cayman_dma_vm_set_pages(struct radeon_device *rdev, 401 struct radeon_ib *ib, 402 uint64_t pe, 403 uint64_t addr, unsigned count, 404 uint32_t incr, uint32_t flags) 405{ 406 uint64_t value; 407 unsigned ndw; 408 409 while (count) { 410 ndw = count * 2; 411 if (ndw > 0xFFFFE) 412 ndw = 0xFFFFE; 413 414 if (flags & R600_PTE_VALID) 415 value = addr; 416 else 417 value = 0; 418 419 /* for physically contiguous pages (vram) */ 420 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 421 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 422 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 423 ib->ptr[ib->length_dw++] = flags; /* mask */ 424 ib->ptr[ib->length_dw++] = 0; 425 ib->ptr[ib->length_dw++] = value; /* value */ 426 ib->ptr[ib->length_dw++] = upper_32_bits(value); 427 ib->ptr[ib->length_dw++] = incr; /* increment size */ 428 ib->ptr[ib->length_dw++] = 0; 429 430 pe += ndw * 4; 431 addr += (ndw / 2) * incr; 432 count -= ndw / 2; 433 } 434} 435 436/** 437 * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 438 * 439 * @ib: indirect buffer to fill with padding 440 * 441 */ 442void cayman_dma_vm_pad_ib(struct radeon_ib *ib) 443{ 444 while (ib->length_dw & 0x7) 445 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 446} 447 448void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 449 unsigned vm_id, uint64_t pd_addr) 450{ 451 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 452 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); 453 radeon_ring_write(ring, pd_addr >> 12); 454 455 /* flush hdp cache */ 456 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 457 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 458 radeon_ring_write(ring, 1); 459 460 /* bits 0-7 are the VM contexts0-7 */ 461 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 462 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 463 radeon_ring_write(ring, 1 << vm_id); 464 465 /* wait for invalidate to complete */ 466 radeon_ring_write(ring, DMA_SRBM_READ_PACKET); 467 radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); 468 radeon_ring_write(ring, 0); /* mask */ 469 radeon_ring_write(ring, 0); /* value */ 470} 471