savage_state.c (30968B)
1/* savage_state.c -- State and drawing support for Savage 2 * 3 * Copyright 2004 Felix Kuehling 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sub license, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include <linux/slab.h> 27#include <linux/uaccess.h> 28 29#include <drm/drm_device.h> 30#include <drm/drm_file.h> 31#include <drm/drm_print.h> 32#include <drm/savage_drm.h> 33 34#include "savage_drv.h" 35 36void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv, 37 const struct drm_clip_rect * pbox) 38{ 39 uint32_t scstart = dev_priv->state.s3d.new_scstart; 40 uint32_t scend = dev_priv->state.s3d.new_scend; 41 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) | 42 ((uint32_t) pbox->x1 & 0x000007ff) | 43 (((uint32_t) pbox->y1 << 16) & 0x07ff0000); 44 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) | 45 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 46 ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000); 47 if (scstart != dev_priv->state.s3d.scstart || 48 scend != dev_priv->state.s3d.scend) { 49 DMA_LOCALS; 50 BEGIN_DMA(4); 51 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 52 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); 53 DMA_WRITE(scstart); 54 DMA_WRITE(scend); 55 dev_priv->state.s3d.scstart = scstart; 56 dev_priv->state.s3d.scend = scend; 57 dev_priv->waiting = 1; 58 DMA_COMMIT(); 59 } 60} 61 62void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv, 63 const struct drm_clip_rect * pbox) 64{ 65 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0; 66 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1; 67 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) | 68 ((uint32_t) pbox->x1 & 0x000007ff) | 69 (((uint32_t) pbox->y1 << 12) & 0x00fff000); 70 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) | 71 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 72 ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000); 73 if (drawctrl0 != dev_priv->state.s4.drawctrl0 || 74 drawctrl1 != dev_priv->state.s4.drawctrl1) { 75 DMA_LOCALS; 76 BEGIN_DMA(4); 77 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 78 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); 79 DMA_WRITE(drawctrl0); 80 DMA_WRITE(drawctrl1); 81 dev_priv->state.s4.drawctrl0 = drawctrl0; 82 dev_priv->state.s4.drawctrl1 = drawctrl1; 83 dev_priv->waiting = 1; 84 DMA_COMMIT(); 85 } 86} 87 88static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit, 89 uint32_t addr) 90{ 91 if ((addr & 6) != 2) { /* reserved bits */ 92 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr); 93 return -EINVAL; 94 } 95 if (!(addr & 1)) { /* local */ 96 addr &= ~7; 97 if (addr < dev_priv->texture_offset || 98 addr >= dev_priv->texture_offset + dev_priv->texture_size) { 99 DRM_ERROR 100 ("bad texAddr%d %08x (local addr out of range)\n", 101 unit, addr); 102 return -EINVAL; 103 } 104 } else { /* AGP */ 105 if (!dev_priv->agp_textures) { 106 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n", 107 unit, addr); 108 return -EINVAL; 109 } 110 addr &= ~7; 111 if (addr < dev_priv->agp_textures->offset || 112 addr >= (dev_priv->agp_textures->offset + 113 dev_priv->agp_textures->size)) { 114 DRM_ERROR 115 ("bad texAddr%d %08x (AGP addr out of range)\n", 116 unit, addr); 117 return -EINVAL; 118 } 119 } 120 return 0; 121} 122 123#define SAVE_STATE(reg,where) \ 124 if(start <= reg && start+count > reg) \ 125 dev_priv->state.where = regs[reg - start] 126#define SAVE_STATE_MASK(reg,where,mask) do { \ 127 if(start <= reg && start+count > reg) { \ 128 uint32_t tmp; \ 129 tmp = regs[reg - start]; \ 130 dev_priv->state.where = (tmp & (mask)) | \ 131 (dev_priv->state.where & ~(mask)); \ 132 } \ 133} while (0) 134 135static int savage_verify_state_s3d(drm_savage_private_t * dev_priv, 136 unsigned int start, unsigned int count, 137 const uint32_t *regs) 138{ 139 if (start < SAVAGE_TEXPALADDR_S3D || 140 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) { 141 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 142 start, start + count - 1); 143 return -EINVAL; 144 } 145 146 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart, 147 ~SAVAGE_SCISSOR_MASK_S3D); 148 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend, 149 ~SAVAGE_SCISSOR_MASK_S3D); 150 151 /* if any texture regs were changed ... */ 152 if (start <= SAVAGE_TEXCTRL_S3D && 153 start + count > SAVAGE_TEXPALADDR_S3D) { 154 /* ... check texture state */ 155 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl); 156 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr); 157 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK) 158 return savage_verify_texaddr(dev_priv, 0, 159 dev_priv->state.s3d.texaddr); 160 } 161 162 return 0; 163} 164 165static int savage_verify_state_s4(drm_savage_private_t * dev_priv, 166 unsigned int start, unsigned int count, 167 const uint32_t *regs) 168{ 169 int ret = 0; 170 171 if (start < SAVAGE_DRAWLOCALCTRL_S4 || 172 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) { 173 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 174 start, start + count - 1); 175 return -EINVAL; 176 } 177 178 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0, 179 ~SAVAGE_SCISSOR_MASK_S4); 180 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1, 181 ~SAVAGE_SCISSOR_MASK_S4); 182 183 /* if any texture regs were changed ... */ 184 if (start <= SAVAGE_TEXDESCR_S4 && 185 start + count > SAVAGE_TEXPALADDR_S4) { 186 /* ... check texture state */ 187 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr); 188 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0); 189 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1); 190 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK) 191 ret |= savage_verify_texaddr(dev_priv, 0, 192 dev_priv->state.s4.texaddr0); 193 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK) 194 ret |= savage_verify_texaddr(dev_priv, 1, 195 dev_priv->state.s4.texaddr1); 196 } 197 198 return ret; 199} 200 201#undef SAVE_STATE 202#undef SAVE_STATE_MASK 203 204static int savage_dispatch_state(drm_savage_private_t * dev_priv, 205 const drm_savage_cmd_header_t * cmd_header, 206 const uint32_t *regs) 207{ 208 unsigned int count = cmd_header->state.count; 209 unsigned int start = cmd_header->state.start; 210 unsigned int count2 = 0; 211 unsigned int bci_size; 212 int ret; 213 DMA_LOCALS; 214 215 if (!count) 216 return 0; 217 218 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 219 ret = savage_verify_state_s3d(dev_priv, start, count, regs); 220 if (ret != 0) 221 return ret; 222 /* scissor regs are emitted in savage_dispatch_draw */ 223 if (start < SAVAGE_SCSTART_S3D) { 224 if (start + count > SAVAGE_SCEND_S3D + 1) 225 count2 = count - (SAVAGE_SCEND_S3D + 1 - start); 226 if (start + count > SAVAGE_SCSTART_S3D) 227 count = SAVAGE_SCSTART_S3D - start; 228 } else if (start <= SAVAGE_SCEND_S3D) { 229 if (start + count > SAVAGE_SCEND_S3D + 1) { 230 count -= SAVAGE_SCEND_S3D + 1 - start; 231 start = SAVAGE_SCEND_S3D + 1; 232 } else 233 return 0; 234 } 235 } else { 236 ret = savage_verify_state_s4(dev_priv, start, count, regs); 237 if (ret != 0) 238 return ret; 239 /* scissor regs are emitted in savage_dispatch_draw */ 240 if (start < SAVAGE_DRAWCTRL0_S4) { 241 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) 242 count2 = count - 243 (SAVAGE_DRAWCTRL1_S4 + 1 - start); 244 if (start + count > SAVAGE_DRAWCTRL0_S4) 245 count = SAVAGE_DRAWCTRL0_S4 - start; 246 } else if (start <= SAVAGE_DRAWCTRL1_S4) { 247 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) { 248 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start; 249 start = SAVAGE_DRAWCTRL1_S4 + 1; 250 } else 251 return 0; 252 } 253 } 254 255 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255; 256 257 if (cmd_header->state.global) { 258 BEGIN_DMA(bci_size + 1); 259 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 260 dev_priv->waiting = 1; 261 } else { 262 BEGIN_DMA(bci_size); 263 } 264 265 do { 266 while (count > 0) { 267 unsigned int n = count < 255 ? count : 255; 268 DMA_SET_REGISTERS(start, n); 269 DMA_COPY(regs, n); 270 count -= n; 271 start += n; 272 regs += n; 273 } 274 start += 2; 275 regs += 2; 276 count = count2; 277 count2 = 0; 278 } while (count); 279 280 DMA_COMMIT(); 281 282 return 0; 283} 284 285static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv, 286 const drm_savage_cmd_header_t * cmd_header, 287 const struct drm_buf * dmabuf) 288{ 289 unsigned char reorder = 0; 290 unsigned int prim = cmd_header->prim.prim; 291 unsigned int skip = cmd_header->prim.skip; 292 unsigned int n = cmd_header->prim.count; 293 unsigned int start = cmd_header->prim.start; 294 unsigned int i; 295 BCI_LOCALS; 296 297 if (!dmabuf) { 298 DRM_ERROR("called without dma buffers!\n"); 299 return -EINVAL; 300 } 301 302 if (!n) 303 return 0; 304 305 switch (prim) { 306 case SAVAGE_PRIM_TRILIST_201: 307 reorder = 1; 308 prim = SAVAGE_PRIM_TRILIST; 309 fallthrough; 310 case SAVAGE_PRIM_TRILIST: 311 if (n % 3 != 0) { 312 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 313 n); 314 return -EINVAL; 315 } 316 break; 317 case SAVAGE_PRIM_TRISTRIP: 318 case SAVAGE_PRIM_TRIFAN: 319 if (n < 3) { 320 DRM_ERROR 321 ("wrong number of vertices %u in TRIFAN/STRIP\n", 322 n); 323 return -EINVAL; 324 } 325 break; 326 default: 327 DRM_ERROR("invalid primitive type %u\n", prim); 328 return -EINVAL; 329 } 330 331 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 332 if (skip != 0) { 333 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 334 return -EINVAL; 335 } 336 } else { 337 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 338 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 339 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 340 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 341 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 342 return -EINVAL; 343 } 344 if (reorder) { 345 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 346 return -EINVAL; 347 } 348 } 349 350 if (start + n > dmabuf->total / 32) { 351 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 352 start, start + n - 1, dmabuf->total / 32); 353 return -EINVAL; 354 } 355 356 /* Vertex DMA doesn't work with command DMA at the same time, 357 * so we use BCI_... to submit commands here. Flush buffered 358 * faked DMA first. */ 359 DMA_FLUSH(); 360 361 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 362 BEGIN_BCI(2); 363 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 364 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 365 dev_priv->state.common.vbaddr = dmabuf->bus_address; 366 } 367 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 368 /* Workaround for what looks like a hardware bug. If a 369 * WAIT_3D_IDLE was emitted some time before the 370 * indexed drawing command then the engine will lock 371 * up. There are two known workarounds: 372 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 373 BEGIN_BCI(63); 374 for (i = 0; i < 63; ++i) 375 BCI_WRITE(BCI_CMD_WAIT); 376 dev_priv->waiting = 0; 377 } 378 379 prim <<= 25; 380 while (n != 0) { 381 /* Can emit up to 255 indices (85 triangles) at once. */ 382 unsigned int count = n > 255 ? 255 : n; 383 if (reorder) { 384 /* Need to reorder indices for correct flat 385 * shading while preserving the clock sense 386 * for correct culling. Only on Savage3D. */ 387 int reorder[3] = { -1, -1, -1 }; 388 reorder[start % 3] = 2; 389 390 BEGIN_BCI((count + 1 + 1) / 2); 391 BCI_DRAW_INDICES_S3D(count, prim, start + 2); 392 393 for (i = start + 1; i + 1 < start + count; i += 2) 394 BCI_WRITE((i + reorder[i % 3]) | 395 ((i + 1 + 396 reorder[(i + 1) % 3]) << 16)); 397 if (i < start + count) 398 BCI_WRITE(i + reorder[i % 3]); 399 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 400 BEGIN_BCI((count + 1 + 1) / 2); 401 BCI_DRAW_INDICES_S3D(count, prim, start); 402 403 for (i = start + 1; i + 1 < start + count; i += 2) 404 BCI_WRITE(i | ((i + 1) << 16)); 405 if (i < start + count) 406 BCI_WRITE(i); 407 } else { 408 BEGIN_BCI((count + 2 + 1) / 2); 409 BCI_DRAW_INDICES_S4(count, prim, skip); 410 411 for (i = start; i + 1 < start + count; i += 2) 412 BCI_WRITE(i | ((i + 1) << 16)); 413 if (i < start + count) 414 BCI_WRITE(i); 415 } 416 417 start += count; 418 n -= count; 419 420 prim |= BCI_CMD_DRAW_CONT; 421 } 422 423 return 0; 424} 425 426static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv, 427 const drm_savage_cmd_header_t * cmd_header, 428 const uint32_t *vtxbuf, unsigned int vb_size, 429 unsigned int vb_stride) 430{ 431 unsigned char reorder = 0; 432 unsigned int prim = cmd_header->prim.prim; 433 unsigned int skip = cmd_header->prim.skip; 434 unsigned int n = cmd_header->prim.count; 435 unsigned int start = cmd_header->prim.start; 436 unsigned int vtx_size; 437 unsigned int i; 438 DMA_LOCALS; 439 440 if (!n) 441 return 0; 442 443 switch (prim) { 444 case SAVAGE_PRIM_TRILIST_201: 445 reorder = 1; 446 prim = SAVAGE_PRIM_TRILIST; 447 fallthrough; 448 case SAVAGE_PRIM_TRILIST: 449 if (n % 3 != 0) { 450 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 451 n); 452 return -EINVAL; 453 } 454 break; 455 case SAVAGE_PRIM_TRISTRIP: 456 case SAVAGE_PRIM_TRIFAN: 457 if (n < 3) { 458 DRM_ERROR 459 ("wrong number of vertices %u in TRIFAN/STRIP\n", 460 n); 461 return -EINVAL; 462 } 463 break; 464 default: 465 DRM_ERROR("invalid primitive type %u\n", prim); 466 return -EINVAL; 467 } 468 469 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 470 if (skip > SAVAGE_SKIP_ALL_S3D) { 471 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 472 return -EINVAL; 473 } 474 vtx_size = 8; /* full vertex */ 475 } else { 476 if (skip > SAVAGE_SKIP_ALL_S4) { 477 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 478 return -EINVAL; 479 } 480 vtx_size = 10; /* full vertex */ 481 } 482 483 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 484 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 485 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 486 487 if (vtx_size > vb_stride) { 488 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 489 vtx_size, vb_stride); 490 return -EINVAL; 491 } 492 493 if (start + n > vb_size / (vb_stride * 4)) { 494 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 495 start, start + n - 1, vb_size / (vb_stride * 4)); 496 return -EINVAL; 497 } 498 499 prim <<= 25; 500 while (n != 0) { 501 /* Can emit up to 255 vertices (85 triangles) at once. */ 502 unsigned int count = n > 255 ? 255 : n; 503 if (reorder) { 504 /* Need to reorder vertices for correct flat 505 * shading while preserving the clock sense 506 * for correct culling. Only on Savage3D. */ 507 int reorder[3] = { -1, -1, -1 }; 508 reorder[start % 3] = 2; 509 510 BEGIN_DMA(count * vtx_size + 1); 511 DMA_DRAW_PRIMITIVE(count, prim, skip); 512 513 for (i = start; i < start + count; ++i) { 514 unsigned int j = i + reorder[i % 3]; 515 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 516 } 517 518 DMA_COMMIT(); 519 } else { 520 BEGIN_DMA(count * vtx_size + 1); 521 DMA_DRAW_PRIMITIVE(count, prim, skip); 522 523 if (vb_stride == vtx_size) { 524 DMA_COPY(&vtxbuf[vb_stride * start], 525 vtx_size * count); 526 } else { 527 for (i = start; i < start + count; ++i) { 528 DMA_COPY(&vtxbuf [vb_stride * i], 529 vtx_size); 530 } 531 } 532 533 DMA_COMMIT(); 534 } 535 536 start += count; 537 n -= count; 538 539 prim |= BCI_CMD_DRAW_CONT; 540 } 541 542 return 0; 543} 544 545static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv, 546 const drm_savage_cmd_header_t * cmd_header, 547 const uint16_t *idx, 548 const struct drm_buf * dmabuf) 549{ 550 unsigned char reorder = 0; 551 unsigned int prim = cmd_header->idx.prim; 552 unsigned int skip = cmd_header->idx.skip; 553 unsigned int n = cmd_header->idx.count; 554 unsigned int i; 555 BCI_LOCALS; 556 557 if (!dmabuf) { 558 DRM_ERROR("called without dma buffers!\n"); 559 return -EINVAL; 560 } 561 562 if (!n) 563 return 0; 564 565 switch (prim) { 566 case SAVAGE_PRIM_TRILIST_201: 567 reorder = 1; 568 prim = SAVAGE_PRIM_TRILIST; 569 fallthrough; 570 case SAVAGE_PRIM_TRILIST: 571 if (n % 3 != 0) { 572 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 573 return -EINVAL; 574 } 575 break; 576 case SAVAGE_PRIM_TRISTRIP: 577 case SAVAGE_PRIM_TRIFAN: 578 if (n < 3) { 579 DRM_ERROR 580 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 581 return -EINVAL; 582 } 583 break; 584 default: 585 DRM_ERROR("invalid primitive type %u\n", prim); 586 return -EINVAL; 587 } 588 589 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 590 if (skip != 0) { 591 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 592 return -EINVAL; 593 } 594 } else { 595 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 596 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 597 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 598 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 599 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 600 return -EINVAL; 601 } 602 if (reorder) { 603 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 604 return -EINVAL; 605 } 606 } 607 608 /* Vertex DMA doesn't work with command DMA at the same time, 609 * so we use BCI_... to submit commands here. Flush buffered 610 * faked DMA first. */ 611 DMA_FLUSH(); 612 613 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 614 BEGIN_BCI(2); 615 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 616 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 617 dev_priv->state.common.vbaddr = dmabuf->bus_address; 618 } 619 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 620 /* Workaround for what looks like a hardware bug. If a 621 * WAIT_3D_IDLE was emitted some time before the 622 * indexed drawing command then the engine will lock 623 * up. There are two known workarounds: 624 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 625 BEGIN_BCI(63); 626 for (i = 0; i < 63; ++i) 627 BCI_WRITE(BCI_CMD_WAIT); 628 dev_priv->waiting = 0; 629 } 630 631 prim <<= 25; 632 while (n != 0) { 633 /* Can emit up to 255 indices (85 triangles) at once. */ 634 unsigned int count = n > 255 ? 255 : n; 635 636 /* check indices */ 637 for (i = 0; i < count; ++i) { 638 if (idx[i] > dmabuf->total / 32) { 639 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 640 i, idx[i], dmabuf->total / 32); 641 return -EINVAL; 642 } 643 } 644 645 if (reorder) { 646 /* Need to reorder indices for correct flat 647 * shading while preserving the clock sense 648 * for correct culling. Only on Savage3D. */ 649 int reorder[3] = { 2, -1, -1 }; 650 651 BEGIN_BCI((count + 1 + 1) / 2); 652 BCI_DRAW_INDICES_S3D(count, prim, idx[2]); 653 654 for (i = 1; i + 1 < count; i += 2) 655 BCI_WRITE(idx[i + reorder[i % 3]] | 656 (idx[i + 1 + 657 reorder[(i + 1) % 3]] << 16)); 658 if (i < count) 659 BCI_WRITE(idx[i + reorder[i % 3]]); 660 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 661 BEGIN_BCI((count + 1 + 1) / 2); 662 BCI_DRAW_INDICES_S3D(count, prim, idx[0]); 663 664 for (i = 1; i + 1 < count; i += 2) 665 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 666 if (i < count) 667 BCI_WRITE(idx[i]); 668 } else { 669 BEGIN_BCI((count + 2 + 1) / 2); 670 BCI_DRAW_INDICES_S4(count, prim, skip); 671 672 for (i = 0; i + 1 < count; i += 2) 673 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 674 if (i < count) 675 BCI_WRITE(idx[i]); 676 } 677 678 idx += count; 679 n -= count; 680 681 prim |= BCI_CMD_DRAW_CONT; 682 } 683 684 return 0; 685} 686 687static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv, 688 const drm_savage_cmd_header_t * cmd_header, 689 const uint16_t *idx, 690 const uint32_t *vtxbuf, 691 unsigned int vb_size, unsigned int vb_stride) 692{ 693 unsigned char reorder = 0; 694 unsigned int prim = cmd_header->idx.prim; 695 unsigned int skip = cmd_header->idx.skip; 696 unsigned int n = cmd_header->idx.count; 697 unsigned int vtx_size; 698 unsigned int i; 699 DMA_LOCALS; 700 701 if (!n) 702 return 0; 703 704 switch (prim) { 705 case SAVAGE_PRIM_TRILIST_201: 706 reorder = 1; 707 prim = SAVAGE_PRIM_TRILIST; 708 fallthrough; 709 case SAVAGE_PRIM_TRILIST: 710 if (n % 3 != 0) { 711 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 712 return -EINVAL; 713 } 714 break; 715 case SAVAGE_PRIM_TRISTRIP: 716 case SAVAGE_PRIM_TRIFAN: 717 if (n < 3) { 718 DRM_ERROR 719 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 720 return -EINVAL; 721 } 722 break; 723 default: 724 DRM_ERROR("invalid primitive type %u\n", prim); 725 return -EINVAL; 726 } 727 728 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 729 if (skip > SAVAGE_SKIP_ALL_S3D) { 730 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 731 return -EINVAL; 732 } 733 vtx_size = 8; /* full vertex */ 734 } else { 735 if (skip > SAVAGE_SKIP_ALL_S4) { 736 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 737 return -EINVAL; 738 } 739 vtx_size = 10; /* full vertex */ 740 } 741 742 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 743 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 744 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 745 746 if (vtx_size > vb_stride) { 747 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 748 vtx_size, vb_stride); 749 return -EINVAL; 750 } 751 752 prim <<= 25; 753 while (n != 0) { 754 /* Can emit up to 255 vertices (85 triangles) at once. */ 755 unsigned int count = n > 255 ? 255 : n; 756 757 /* Check indices */ 758 for (i = 0; i < count; ++i) { 759 if (idx[i] > vb_size / (vb_stride * 4)) { 760 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 761 i, idx[i], vb_size / (vb_stride * 4)); 762 return -EINVAL; 763 } 764 } 765 766 if (reorder) { 767 /* Need to reorder vertices for correct flat 768 * shading while preserving the clock sense 769 * for correct culling. Only on Savage3D. */ 770 int reorder[3] = { 2, -1, -1 }; 771 772 BEGIN_DMA(count * vtx_size + 1); 773 DMA_DRAW_PRIMITIVE(count, prim, skip); 774 775 for (i = 0; i < count; ++i) { 776 unsigned int j = idx[i + reorder[i % 3]]; 777 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 778 } 779 780 DMA_COMMIT(); 781 } else { 782 BEGIN_DMA(count * vtx_size + 1); 783 DMA_DRAW_PRIMITIVE(count, prim, skip); 784 785 for (i = 0; i < count; ++i) { 786 unsigned int j = idx[i]; 787 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 788 } 789 790 DMA_COMMIT(); 791 } 792 793 idx += count; 794 n -= count; 795 796 prim |= BCI_CMD_DRAW_CONT; 797 } 798 799 return 0; 800} 801 802static int savage_dispatch_clear(drm_savage_private_t * dev_priv, 803 const drm_savage_cmd_header_t * cmd_header, 804 const drm_savage_cmd_header_t *data, 805 unsigned int nbox, 806 const struct drm_clip_rect *boxes) 807{ 808 unsigned int flags = cmd_header->clear0.flags; 809 unsigned int clear_cmd; 810 unsigned int i, nbufs; 811 DMA_LOCALS; 812 813 if (nbox == 0) 814 return 0; 815 816 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 817 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW; 818 BCI_CMD_SET_ROP(clear_cmd, 0xCC); 819 820 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) + 821 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0); 822 if (nbufs == 0) 823 return 0; 824 825 if (data->clear1.mask != 0xffffffff) { 826 /* set mask */ 827 BEGIN_DMA(2); 828 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 829 DMA_WRITE(data->clear1.mask); 830 DMA_COMMIT(); 831 } 832 for (i = 0; i < nbox; ++i) { 833 unsigned int x, y, w, h; 834 unsigned int buf; 835 x = boxes[i].x1, y = boxes[i].y1; 836 w = boxes[i].x2 - boxes[i].x1; 837 h = boxes[i].y2 - boxes[i].y1; 838 BEGIN_DMA(nbufs * 6); 839 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { 840 if (!(flags & buf)) 841 continue; 842 DMA_WRITE(clear_cmd); 843 switch (buf) { 844 case SAVAGE_FRONT: 845 DMA_WRITE(dev_priv->front_offset); 846 DMA_WRITE(dev_priv->front_bd); 847 break; 848 case SAVAGE_BACK: 849 DMA_WRITE(dev_priv->back_offset); 850 DMA_WRITE(dev_priv->back_bd); 851 break; 852 case SAVAGE_DEPTH: 853 DMA_WRITE(dev_priv->depth_offset); 854 DMA_WRITE(dev_priv->depth_bd); 855 break; 856 } 857 DMA_WRITE(data->clear1.value); 858 DMA_WRITE(BCI_X_Y(x, y)); 859 DMA_WRITE(BCI_W_H(w, h)); 860 } 861 DMA_COMMIT(); 862 } 863 if (data->clear1.mask != 0xffffffff) { 864 /* reset mask */ 865 BEGIN_DMA(2); 866 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 867 DMA_WRITE(0xffffffff); 868 DMA_COMMIT(); 869 } 870 871 return 0; 872} 873 874static int savage_dispatch_swap(drm_savage_private_t * dev_priv, 875 unsigned int nbox, const struct drm_clip_rect *boxes) 876{ 877 unsigned int swap_cmd; 878 unsigned int i; 879 DMA_LOCALS; 880 881 if (nbox == 0) 882 return 0; 883 884 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 885 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD; 886 BCI_CMD_SET_ROP(swap_cmd, 0xCC); 887 888 for (i = 0; i < nbox; ++i) { 889 BEGIN_DMA(6); 890 DMA_WRITE(swap_cmd); 891 DMA_WRITE(dev_priv->back_offset); 892 DMA_WRITE(dev_priv->back_bd); 893 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 894 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 895 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1, 896 boxes[i].y2 - boxes[i].y1)); 897 DMA_COMMIT(); 898 } 899 900 return 0; 901} 902 903static int savage_dispatch_draw(drm_savage_private_t * dev_priv, 904 const drm_savage_cmd_header_t *start, 905 const drm_savage_cmd_header_t *end, 906 const struct drm_buf * dmabuf, 907 const unsigned int *vtxbuf, 908 unsigned int vb_size, unsigned int vb_stride, 909 unsigned int nbox, 910 const struct drm_clip_rect *boxes) 911{ 912 unsigned int i, j; 913 int ret; 914 915 for (i = 0; i < nbox; ++i) { 916 const drm_savage_cmd_header_t *cmdbuf; 917 dev_priv->emit_clip_rect(dev_priv, &boxes[i]); 918 919 cmdbuf = start; 920 while (cmdbuf < end) { 921 drm_savage_cmd_header_t cmd_header; 922 cmd_header = *cmdbuf; 923 cmdbuf++; 924 switch (cmd_header.cmd.cmd) { 925 case SAVAGE_CMD_DMA_PRIM: 926 ret = savage_dispatch_dma_prim( 927 dev_priv, &cmd_header, dmabuf); 928 break; 929 case SAVAGE_CMD_VB_PRIM: 930 ret = savage_dispatch_vb_prim( 931 dev_priv, &cmd_header, 932 vtxbuf, vb_size, vb_stride); 933 break; 934 case SAVAGE_CMD_DMA_IDX: 935 j = (cmd_header.idx.count + 3) / 4; 936 /* j was check in savage_bci_cmdbuf */ 937 ret = savage_dispatch_dma_idx(dev_priv, 938 &cmd_header, (const uint16_t *)cmdbuf, 939 dmabuf); 940 cmdbuf += j; 941 break; 942 case SAVAGE_CMD_VB_IDX: 943 j = (cmd_header.idx.count + 3) / 4; 944 /* j was check in savage_bci_cmdbuf */ 945 ret = savage_dispatch_vb_idx(dev_priv, 946 &cmd_header, (const uint16_t *)cmdbuf, 947 (const uint32_t *)vtxbuf, vb_size, 948 vb_stride); 949 cmdbuf += j; 950 break; 951 default: 952 /* What's the best return code? EFAULT? */ 953 DRM_ERROR("IMPLEMENTATION ERROR: " 954 "non-drawing-command %d\n", 955 cmd_header.cmd.cmd); 956 return -EINVAL; 957 } 958 959 if (ret != 0) 960 return ret; 961 } 962 } 963 964 return 0; 965} 966 967int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv) 968{ 969 drm_savage_private_t *dev_priv = dev->dev_private; 970 struct drm_device_dma *dma = dev->dma; 971 struct drm_buf *dmabuf; 972 drm_savage_cmdbuf_t *cmdbuf = data; 973 drm_savage_cmd_header_t *kcmd_addr = NULL; 974 drm_savage_cmd_header_t *first_draw_cmd; 975 unsigned int *kvb_addr = NULL; 976 struct drm_clip_rect *kbox_addr = NULL; 977 unsigned int i, j; 978 int ret = 0; 979 980 DRM_DEBUG("\n"); 981 982 LOCK_TEST_WITH_RETURN(dev, file_priv); 983 984 if (dma && dma->buflist) { 985 if (cmdbuf->dma_idx >= dma->buf_count) { 986 DRM_ERROR 987 ("vertex buffer index %u out of range (0-%u)\n", 988 cmdbuf->dma_idx, dma->buf_count - 1); 989 return -EINVAL; 990 } 991 dmabuf = dma->buflist[cmdbuf->dma_idx]; 992 } else { 993 dmabuf = NULL; 994 } 995 996 /* Copy the user buffers into kernel temporary areas. This hasn't been 997 * a performance loss compared to VERIFYAREA_READ/ 998 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct 999 * for locking on FreeBSD. 1000 */ 1001 if (cmdbuf->size) { 1002 kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL); 1003 if (kcmd_addr == NULL) 1004 return -ENOMEM; 1005 1006 if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr, 1007 cmdbuf->size * 8)) 1008 { 1009 kfree(kcmd_addr); 1010 return -EFAULT; 1011 } 1012 cmdbuf->cmd_addr = kcmd_addr; 1013 } 1014 if (cmdbuf->vb_size) { 1015 kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size); 1016 if (IS_ERR(kvb_addr)) { 1017 ret = PTR_ERR(kvb_addr); 1018 kvb_addr = NULL; 1019 goto done; 1020 } 1021 cmdbuf->vb_addr = kvb_addr; 1022 } 1023 if (cmdbuf->nbox) { 1024 kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect), 1025 GFP_KERNEL); 1026 if (kbox_addr == NULL) { 1027 ret = -ENOMEM; 1028 goto done; 1029 } 1030 1031 if (copy_from_user(kbox_addr, cmdbuf->box_addr, 1032 cmdbuf->nbox * sizeof(struct drm_clip_rect))) { 1033 ret = -EFAULT; 1034 goto done; 1035 } 1036 cmdbuf->box_addr = kbox_addr; 1037 } 1038 1039 /* Make sure writes to DMA buffers are finished before sending 1040 * DMA commands to the graphics hardware. */ 1041 mb(); 1042 1043 /* Coming from user space. Don't know if the Xserver has 1044 * emitted wait commands. Assuming the worst. */ 1045 dev_priv->waiting = 1; 1046 1047 i = 0; 1048 first_draw_cmd = NULL; 1049 while (i < cmdbuf->size) { 1050 drm_savage_cmd_header_t cmd_header; 1051 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr; 1052 cmdbuf->cmd_addr++; 1053 i++; 1054 1055 /* Group drawing commands with same state to minimize 1056 * iterations over clip rects. */ 1057 j = 0; 1058 switch (cmd_header.cmd.cmd) { 1059 case SAVAGE_CMD_DMA_IDX: 1060 case SAVAGE_CMD_VB_IDX: 1061 j = (cmd_header.idx.count + 3) / 4; 1062 if (i + j > cmdbuf->size) { 1063 DRM_ERROR("indexed drawing command extends " 1064 "beyond end of command buffer\n"); 1065 DMA_FLUSH(); 1066 ret = -EINVAL; 1067 goto done; 1068 } 1069 fallthrough; 1070 case SAVAGE_CMD_DMA_PRIM: 1071 case SAVAGE_CMD_VB_PRIM: 1072 if (!first_draw_cmd) 1073 first_draw_cmd = cmdbuf->cmd_addr - 1; 1074 cmdbuf->cmd_addr += j; 1075 i += j; 1076 break; 1077 default: 1078 if (first_draw_cmd) { 1079 ret = savage_dispatch_draw( 1080 dev_priv, first_draw_cmd, 1081 cmdbuf->cmd_addr - 1, 1082 dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size, 1083 cmdbuf->vb_stride, 1084 cmdbuf->nbox, cmdbuf->box_addr); 1085 if (ret != 0) 1086 goto done; 1087 first_draw_cmd = NULL; 1088 } 1089 } 1090 if (first_draw_cmd) 1091 continue; 1092 1093 switch (cmd_header.cmd.cmd) { 1094 case SAVAGE_CMD_STATE: 1095 j = (cmd_header.state.count + 1) / 2; 1096 if (i + j > cmdbuf->size) { 1097 DRM_ERROR("command SAVAGE_CMD_STATE extends " 1098 "beyond end of command buffer\n"); 1099 DMA_FLUSH(); 1100 ret = -EINVAL; 1101 goto done; 1102 } 1103 ret = savage_dispatch_state(dev_priv, &cmd_header, 1104 (const uint32_t *)cmdbuf->cmd_addr); 1105 cmdbuf->cmd_addr += j; 1106 i += j; 1107 break; 1108 case SAVAGE_CMD_CLEAR: 1109 if (i + 1 > cmdbuf->size) { 1110 DRM_ERROR("command SAVAGE_CMD_CLEAR extends " 1111 "beyond end of command buffer\n"); 1112 DMA_FLUSH(); 1113 ret = -EINVAL; 1114 goto done; 1115 } 1116 ret = savage_dispatch_clear(dev_priv, &cmd_header, 1117 cmdbuf->cmd_addr, 1118 cmdbuf->nbox, 1119 cmdbuf->box_addr); 1120 cmdbuf->cmd_addr++; 1121 i++; 1122 break; 1123 case SAVAGE_CMD_SWAP: 1124 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox, 1125 cmdbuf->box_addr); 1126 break; 1127 default: 1128 DRM_ERROR("invalid command 0x%x\n", 1129 cmd_header.cmd.cmd); 1130 DMA_FLUSH(); 1131 ret = -EINVAL; 1132 goto done; 1133 } 1134 1135 if (ret != 0) { 1136 DMA_FLUSH(); 1137 goto done; 1138 } 1139 } 1140 1141 if (first_draw_cmd) { 1142 ret = savage_dispatch_draw ( 1143 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf, 1144 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride, 1145 cmdbuf->nbox, cmdbuf->box_addr); 1146 if (ret != 0) { 1147 DMA_FLUSH(); 1148 goto done; 1149 } 1150 } 1151 1152 DMA_FLUSH(); 1153 1154 if (dmabuf && cmdbuf->discard) { 1155 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; 1156 uint16_t event; 1157 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); 1158 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); 1159 savage_freelist_put(dev, dmabuf); 1160 } 1161 1162done: 1163 /* If we didn't need to allocate them, these'll be NULL */ 1164 kfree(kcmd_addr); 1165 kfree(kvb_addr); 1166 kfree(kbox_addr); 1167 1168 return ret; 1169}