i915_gem_client_blt.c (12859B)
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6#include "i915_selftest.h" 7 8#include "gt/intel_context.h" 9#include "gt/intel_engine_user.h" 10#include "gt/intel_gpu_commands.h" 11#include "gt/intel_gt.h" 12#include "gt/intel_gt_regs.h" 13#include "gem/i915_gem_lmem.h" 14 15#include "selftests/igt_flush_test.h" 16#include "selftests/mock_drm.h" 17#include "selftests/i915_random.h" 18#include "huge_gem_object.h" 19#include "mock_context.h" 20 21enum client_tiling { 22 CLIENT_TILING_LINEAR, 23 CLIENT_TILING_X, 24 CLIENT_TILING_Y, 25 CLIENT_NUM_TILING_TYPES 26}; 27 28#define WIDTH 512 29#define HEIGHT 32 30 31struct blit_buffer { 32 struct i915_vma *vma; 33 u32 start_val; 34 enum client_tiling tiling; 35}; 36 37struct tiled_blits { 38 struct intel_context *ce; 39 struct blit_buffer buffers[3]; 40 struct blit_buffer scratch; 41 struct i915_vma *batch; 42 u64 hole; 43 u64 align; 44 u32 width; 45 u32 height; 46}; 47 48static int prepare_blit(const struct tiled_blits *t, 49 struct blit_buffer *dst, 50 struct blit_buffer *src, 51 struct drm_i915_gem_object *batch) 52{ 53 const int ver = GRAPHICS_VER(to_i915(batch->base.dev)); 54 bool use_64b_reloc = ver >= 8; 55 u32 src_pitch, dst_pitch; 56 u32 cmd, *cs; 57 58 cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); 59 if (IS_ERR(cs)) 60 return PTR_ERR(cs); 61 62 *cs++ = MI_LOAD_REGISTER_IMM(1); 63 *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); 64 cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; 65 if (src->tiling == CLIENT_TILING_Y) 66 cmd |= BCS_SRC_Y; 67 if (dst->tiling == CLIENT_TILING_Y) 68 cmd |= BCS_DST_Y; 69 *cs++ = cmd; 70 71 cmd = MI_FLUSH_DW; 72 if (ver >= 8) 73 cmd++; 74 *cs++ = cmd; 75 *cs++ = 0; 76 *cs++ = 0; 77 *cs++ = 0; 78 79 cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); 80 if (ver >= 8) 81 cmd += 2; 82 83 src_pitch = t->width * 4; 84 if (src->tiling) { 85 cmd |= XY_SRC_COPY_BLT_SRC_TILED; 86 src_pitch /= 4; 87 } 88 89 dst_pitch = t->width * 4; 90 if (dst->tiling) { 91 cmd |= XY_SRC_COPY_BLT_DST_TILED; 92 dst_pitch /= 4; 93 } 94 95 *cs++ = cmd; 96 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; 97 *cs++ = 0; 98 *cs++ = t->height << 16 | t->width; 99 *cs++ = lower_32_bits(dst->vma->node.start); 100 if (use_64b_reloc) 101 *cs++ = upper_32_bits(dst->vma->node.start); 102 *cs++ = 0; 103 *cs++ = src_pitch; 104 *cs++ = lower_32_bits(src->vma->node.start); 105 if (use_64b_reloc) 106 *cs++ = upper_32_bits(src->vma->node.start); 107 108 *cs++ = MI_BATCH_BUFFER_END; 109 110 i915_gem_object_flush_map(batch); 111 i915_gem_object_unpin_map(batch); 112 113 return 0; 114} 115 116static void tiled_blits_destroy_buffers(struct tiled_blits *t) 117{ 118 int i; 119 120 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) 121 i915_vma_put(t->buffers[i].vma); 122 123 i915_vma_put(t->scratch.vma); 124 i915_vma_put(t->batch); 125} 126 127static struct i915_vma * 128__create_vma(struct tiled_blits *t, size_t size, bool lmem) 129{ 130 struct drm_i915_private *i915 = t->ce->vm->i915; 131 struct drm_i915_gem_object *obj; 132 struct i915_vma *vma; 133 134 if (lmem) 135 obj = i915_gem_object_create_lmem(i915, size, 0); 136 else 137 obj = i915_gem_object_create_shmem(i915, size); 138 if (IS_ERR(obj)) 139 return ERR_CAST(obj); 140 141 vma = i915_vma_instance(obj, t->ce->vm, NULL); 142 if (IS_ERR(vma)) 143 i915_gem_object_put(obj); 144 145 return vma; 146} 147 148static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem) 149{ 150 return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem); 151} 152 153static int tiled_blits_create_buffers(struct tiled_blits *t, 154 int width, int height, 155 struct rnd_state *prng) 156{ 157 struct drm_i915_private *i915 = t->ce->engine->i915; 158 int i; 159 160 t->width = width; 161 t->height = height; 162 163 t->batch = __create_vma(t, PAGE_SIZE, false); 164 if (IS_ERR(t->batch)) 165 return PTR_ERR(t->batch); 166 167 t->scratch.vma = create_vma(t, false); 168 if (IS_ERR(t->scratch.vma)) { 169 i915_vma_put(t->batch); 170 return PTR_ERR(t->scratch.vma); 171 } 172 173 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { 174 struct i915_vma *vma; 175 176 vma = create_vma(t, HAS_LMEM(i915) && i % 2); 177 if (IS_ERR(vma)) { 178 tiled_blits_destroy_buffers(t); 179 return PTR_ERR(vma); 180 } 181 182 t->buffers[i].vma = vma; 183 t->buffers[i].tiling = 184 i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng); 185 } 186 187 return 0; 188} 189 190static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val) 191{ 192 int i; 193 194 t->scratch.start_val = val; 195 for (i = 0; i < t->width * t->height; i++) 196 vaddr[i] = val++; 197 198 i915_gem_object_flush_map(t->scratch.vma->obj); 199} 200 201static u64 swizzle_bit(unsigned int bit, u64 offset) 202{ 203 return (offset & BIT_ULL(bit)) >> (bit - 6); 204} 205 206static u64 tiled_offset(const struct intel_gt *gt, 207 u64 v, 208 unsigned int stride, 209 enum client_tiling tiling) 210{ 211 unsigned int swizzle; 212 u64 x, y; 213 214 if (tiling == CLIENT_TILING_LINEAR) 215 return v; 216 217 y = div64_u64_rem(v, stride, &x); 218 219 if (tiling == CLIENT_TILING_X) { 220 v = div64_u64_rem(y, 8, &y) * stride * 8; 221 v += y * 512; 222 v += div64_u64_rem(x, 512, &x) << 12; 223 v += x; 224 225 swizzle = gt->ggtt->bit_6_swizzle_x; 226 } else { 227 const unsigned int ytile_span = 16; 228 const unsigned int ytile_height = 512; 229 230 v = div64_u64_rem(y, 32, &y) * stride * 32; 231 v += y * ytile_span; 232 v += div64_u64_rem(x, ytile_span, &x) * ytile_height; 233 v += x; 234 235 swizzle = gt->ggtt->bit_6_swizzle_y; 236 } 237 238 switch (swizzle) { 239 case I915_BIT_6_SWIZZLE_9: 240 v ^= swizzle_bit(9, v); 241 break; 242 case I915_BIT_6_SWIZZLE_9_10: 243 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); 244 break; 245 case I915_BIT_6_SWIZZLE_9_11: 246 v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); 247 break; 248 case I915_BIT_6_SWIZZLE_9_10_11: 249 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); 250 break; 251 } 252 253 return v; 254} 255 256static const char *repr_tiling(enum client_tiling tiling) 257{ 258 switch (tiling) { 259 case CLIENT_TILING_LINEAR: return "linear"; 260 case CLIENT_TILING_X: return "X"; 261 case CLIENT_TILING_Y: return "Y"; 262 default: return "unknown"; 263 } 264} 265 266static int verify_buffer(const struct tiled_blits *t, 267 struct blit_buffer *buf, 268 struct rnd_state *prng) 269{ 270 const u32 *vaddr; 271 int ret = 0; 272 int x, y, p; 273 274 x = i915_prandom_u32_max_state(t->width, prng); 275 y = i915_prandom_u32_max_state(t->height, prng); 276 p = y * t->width + x; 277 278 vaddr = i915_gem_object_pin_map_unlocked(buf->vma->obj, I915_MAP_WC); 279 if (IS_ERR(vaddr)) 280 return PTR_ERR(vaddr); 281 282 if (vaddr[0] != buf->start_val) { 283 ret = -EINVAL; 284 } else { 285 u64 v = tiled_offset(buf->vma->vm->gt, 286 p * 4, t->width * 4, 287 buf->tiling); 288 289 if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p) 290 ret = -EINVAL; 291 } 292 if (ret) { 293 pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n", 294 repr_tiling(buf->tiling), 295 x, y, buf->start_val); 296 igt_hexdump(vaddr, 4096); 297 } 298 299 i915_gem_object_unpin_map(buf->vma->obj); 300 return ret; 301} 302 303static int move_to_active(struct i915_vma *vma, 304 struct i915_request *rq, 305 unsigned int flags) 306{ 307 int err; 308 309 i915_vma_lock(vma); 310 err = i915_request_await_object(rq, vma->obj, false); 311 if (err == 0) 312 err = i915_vma_move_to_active(vma, rq, flags); 313 i915_vma_unlock(vma); 314 315 return err; 316} 317 318static int pin_buffer(struct i915_vma *vma, u64 addr) 319{ 320 int err; 321 322 if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) { 323 err = i915_vma_unbind_unlocked(vma); 324 if (err) 325 return err; 326 } 327 328 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr); 329 if (err) 330 return err; 331 332 return 0; 333} 334 335static int 336tiled_blit(struct tiled_blits *t, 337 struct blit_buffer *dst, u64 dst_addr, 338 struct blit_buffer *src, u64 src_addr) 339{ 340 struct i915_request *rq; 341 int err; 342 343 err = pin_buffer(src->vma, src_addr); 344 if (err) { 345 pr_err("Cannot pin src @ %llx\n", src_addr); 346 return err; 347 } 348 349 err = pin_buffer(dst->vma, dst_addr); 350 if (err) { 351 pr_err("Cannot pin dst @ %llx\n", dst_addr); 352 goto err_src; 353 } 354 355 err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH); 356 if (err) { 357 pr_err("cannot pin batch\n"); 358 goto err_dst; 359 } 360 361 err = prepare_blit(t, dst, src, t->batch->obj); 362 if (err) 363 goto err_bb; 364 365 rq = intel_context_create_request(t->ce); 366 if (IS_ERR(rq)) { 367 err = PTR_ERR(rq); 368 goto err_bb; 369 } 370 371 err = move_to_active(t->batch, rq, 0); 372 if (!err) 373 err = move_to_active(src->vma, rq, 0); 374 if (!err) 375 err = move_to_active(dst->vma, rq, 0); 376 if (!err) 377 err = rq->engine->emit_bb_start(rq, 378 t->batch->node.start, 379 t->batch->node.size, 380 0); 381 i915_request_get(rq); 382 i915_request_add(rq); 383 if (i915_request_wait(rq, 0, HZ / 2) < 0) 384 err = -ETIME; 385 i915_request_put(rq); 386 387 dst->start_val = src->start_val; 388err_bb: 389 i915_vma_unpin(t->batch); 390err_dst: 391 i915_vma_unpin(dst->vma); 392err_src: 393 i915_vma_unpin(src->vma); 394 return err; 395} 396 397static struct tiled_blits * 398tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) 399{ 400 struct drm_mm_node hole; 401 struct tiled_blits *t; 402 u64 hole_size; 403 int err; 404 405 t = kzalloc(sizeof(*t), GFP_KERNEL); 406 if (!t) 407 return ERR_PTR(-ENOMEM); 408 409 t->ce = intel_context_create(engine); 410 if (IS_ERR(t->ce)) { 411 err = PTR_ERR(t->ce); 412 goto err_free; 413 } 414 415 t->align = i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL); 416 t->align = max(t->align, 417 i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); 418 419 hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); 420 hole_size *= 2; /* room to maneuver */ 421 hole_size += 2 * t->align; /* padding on either side */ 422 423 mutex_lock(&t->ce->vm->mutex); 424 memset(&hole, 0, sizeof(hole)); 425 err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, 426 hole_size, t->align, 427 I915_COLOR_UNEVICTABLE, 428 0, U64_MAX, 429 DRM_MM_INSERT_BEST); 430 if (!err) 431 drm_mm_remove_node(&hole); 432 mutex_unlock(&t->ce->vm->mutex); 433 if (err) { 434 err = -ENODEV; 435 goto err_put; 436 } 437 438 t->hole = hole.start + t->align; 439 pr_info("Using hole at %llx\n", t->hole); 440 441 err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); 442 if (err) 443 goto err_put; 444 445 return t; 446 447err_put: 448 intel_context_put(t->ce); 449err_free: 450 kfree(t); 451 return ERR_PTR(err); 452} 453 454static void tiled_blits_destroy(struct tiled_blits *t) 455{ 456 tiled_blits_destroy_buffers(t); 457 458 intel_context_put(t->ce); 459 kfree(t); 460} 461 462static int tiled_blits_prepare(struct tiled_blits *t, 463 struct rnd_state *prng) 464{ 465 u64 offset = round_up(t->width * t->height * 4, t->align); 466 u32 *map; 467 int err; 468 int i; 469 470 map = i915_gem_object_pin_map_unlocked(t->scratch.vma->obj, I915_MAP_WC); 471 if (IS_ERR(map)) 472 return PTR_ERR(map); 473 474 /* Use scratch to fill objects */ 475 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { 476 fill_scratch(t, map, prandom_u32_state(prng)); 477 GEM_BUG_ON(verify_buffer(t, &t->scratch, prng)); 478 479 err = tiled_blit(t, 480 &t->buffers[i], t->hole + offset, 481 &t->scratch, t->hole); 482 if (err == 0) 483 err = verify_buffer(t, &t->buffers[i], prng); 484 if (err) { 485 pr_err("Failed to create buffer %d\n", i); 486 break; 487 } 488 } 489 490 i915_gem_object_unpin_map(t->scratch.vma->obj); 491 return err; 492} 493 494static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) 495{ 496 u64 offset = round_up(t->width * t->height * 4, 2 * t->align); 497 int err; 498 499 /* We want to check position invariant tiling across GTT eviction */ 500 501 err = tiled_blit(t, 502 &t->buffers[1], t->hole + offset / 2, 503 &t->buffers[0], t->hole + 2 * offset); 504 if (err) 505 return err; 506 507 /* Reposition so that we overlap the old addresses, and slightly off */ 508 err = tiled_blit(t, 509 &t->buffers[2], t->hole + t->align, 510 &t->buffers[1], t->hole + 3 * offset / 2); 511 if (err) 512 return err; 513 514 err = verify_buffer(t, &t->buffers[2], prng); 515 if (err) 516 return err; 517 518 return 0; 519} 520 521static int __igt_client_tiled_blits(struct intel_engine_cs *engine, 522 struct rnd_state *prng) 523{ 524 struct tiled_blits *t; 525 int err; 526 527 t = tiled_blits_create(engine, prng); 528 if (IS_ERR(t)) 529 return PTR_ERR(t); 530 531 err = tiled_blits_prepare(t, prng); 532 if (err) 533 goto out; 534 535 err = tiled_blits_bounce(t, prng); 536 if (err) 537 goto out; 538 539out: 540 tiled_blits_destroy(t); 541 return err; 542} 543 544static bool has_bit17_swizzle(int sw) 545{ 546 return (sw == I915_BIT_6_SWIZZLE_9_10_17 || 547 sw == I915_BIT_6_SWIZZLE_9_17); 548} 549 550static bool bad_swizzling(struct drm_i915_private *i915) 551{ 552 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 553 554 if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) 555 return true; 556 557 if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) || 558 has_bit17_swizzle(ggtt->bit_6_swizzle_y)) 559 return true; 560 561 return false; 562} 563 564static int igt_client_tiled_blits(void *arg) 565{ 566 struct drm_i915_private *i915 = arg; 567 I915_RND_STATE(prng); 568 int inst = 0; 569 570 /* Test requires explicit BLT tiling controls */ 571 if (GRAPHICS_VER(i915) < 4) 572 return 0; 573 574 if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */ 575 return 0; 576 577 do { 578 struct intel_engine_cs *engine; 579 int err; 580 581 engine = intel_engine_lookup_user(i915, 582 I915_ENGINE_CLASS_COPY, 583 inst++); 584 if (!engine) 585 return 0; 586 587 err = __igt_client_tiled_blits(engine, &prng); 588 if (err == -ENODEV) 589 err = 0; 590 if (err) 591 return err; 592 } while (1); 593} 594 595int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) 596{ 597 static const struct i915_subtest tests[] = { 598 SUBTEST(igt_client_tiled_blits), 599 }; 600 601 if (intel_gt_is_wedged(to_gt(i915))) 602 return 0; 603 604 return i915_live_subtests(tests, i915); 605}