i915_gem_context.c (41214B)
1/* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7#include <linux/prime_numbers.h> 8#include <linux/string_helpers.h> 9 10#include "gem/i915_gem_internal.h" 11#include "gem/i915_gem_pm.h" 12#include "gt/intel_engine_pm.h" 13#include "gt/intel_engine_regs.h" 14#include "gt/intel_gt.h" 15#include "gt/intel_gt_requests.h" 16#include "gt/intel_reset.h" 17#include "i915_selftest.h" 18 19#include "gem/selftests/igt_gem_utils.h" 20#include "selftests/i915_random.h" 21#include "selftests/igt_flush_test.h" 22#include "selftests/igt_live_test.h" 23#include "selftests/igt_reset.h" 24#include "selftests/igt_spinner.h" 25#include "selftests/mock_drm.h" 26#include "selftests/mock_gem_device.h" 27 28#include "huge_gem_object.h" 29#include "igt_gem_utils.h" 30 31#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33static int live_nop_switch(void *arg) 34{ 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_file; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_file; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(to_gt(i915)); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_file; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_file; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_file; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(to_gt(i915)); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_file; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176out_file: 177 fput(file); 178 return err; 179} 180 181struct parallel_switch { 182 struct task_struct *tsk; 183 struct intel_context *ce[2]; 184}; 185 186static int __live_parallel_switch1(void *data) 187{ 188 struct parallel_switch *arg = data; 189 IGT_TIMEOUT(end_time); 190 unsigned long count; 191 192 count = 0; 193 do { 194 struct i915_request *rq = NULL; 195 int err, n; 196 197 err = 0; 198 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { 199 struct i915_request *prev = rq; 200 201 rq = i915_request_create(arg->ce[n]); 202 if (IS_ERR(rq)) { 203 i915_request_put(prev); 204 return PTR_ERR(rq); 205 } 206 207 i915_request_get(rq); 208 if (prev) { 209 err = i915_request_await_dma_fence(rq, &prev->fence); 210 i915_request_put(prev); 211 } 212 213 i915_request_add(rq); 214 } 215 if (i915_request_wait(rq, 0, HZ / 5) < 0) 216 err = -ETIME; 217 i915_request_put(rq); 218 if (err) 219 return err; 220 221 count++; 222 } while (!__igt_timeout(end_time, NULL)); 223 224 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 225 return 0; 226} 227 228static int __live_parallel_switchN(void *data) 229{ 230 struct parallel_switch *arg = data; 231 struct i915_request *rq = NULL; 232 IGT_TIMEOUT(end_time); 233 unsigned long count; 234 int n; 235 236 count = 0; 237 do { 238 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 239 struct i915_request *prev = rq; 240 int err = 0; 241 242 rq = i915_request_create(arg->ce[n]); 243 if (IS_ERR(rq)) { 244 i915_request_put(prev); 245 return PTR_ERR(rq); 246 } 247 248 i915_request_get(rq); 249 if (prev) { 250 err = i915_request_await_dma_fence(rq, &prev->fence); 251 i915_request_put(prev); 252 } 253 254 i915_request_add(rq); 255 if (err) { 256 i915_request_put(rq); 257 return err; 258 } 259 } 260 261 count++; 262 } while (!__igt_timeout(end_time, NULL)); 263 i915_request_put(rq); 264 265 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 266 return 0; 267} 268 269static int live_parallel_switch(void *arg) 270{ 271 struct drm_i915_private *i915 = arg; 272 static int (* const func[])(void *arg) = { 273 __live_parallel_switch1, 274 __live_parallel_switchN, 275 NULL, 276 }; 277 struct parallel_switch *data = NULL; 278 struct i915_gem_engines *engines; 279 struct i915_gem_engines_iter it; 280 int (* const *fn)(void *arg); 281 struct i915_gem_context *ctx; 282 struct intel_context *ce; 283 struct file *file; 284 int n, m, count; 285 int err = 0; 286 287 /* 288 * Check we can process switches on all engines simultaneously. 289 */ 290 291 if (!DRIVER_CAPS(i915)->has_logical_contexts) 292 return 0; 293 294 file = mock_file(i915); 295 if (IS_ERR(file)) 296 return PTR_ERR(file); 297 298 ctx = live_context(i915, file); 299 if (IS_ERR(ctx)) { 300 err = PTR_ERR(ctx); 301 goto out_file; 302 } 303 304 engines = i915_gem_context_lock_engines(ctx); 305 count = engines->num_engines; 306 307 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 308 if (!data) { 309 i915_gem_context_unlock_engines(ctx); 310 err = -ENOMEM; 311 goto out_file; 312 } 313 314 m = 0; /* Use the first context as our template for the engines */ 315 for_each_gem_engine(ce, engines, it) { 316 err = intel_context_pin(ce); 317 if (err) { 318 i915_gem_context_unlock_engines(ctx); 319 goto out; 320 } 321 data[m++].ce[0] = intel_context_get(ce); 322 } 323 i915_gem_context_unlock_engines(ctx); 324 325 /* Clone the same set of engines into the other contexts */ 326 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 327 ctx = live_context(i915, file); 328 if (IS_ERR(ctx)) { 329 err = PTR_ERR(ctx); 330 goto out; 331 } 332 333 for (m = 0; m < count; m++) { 334 if (!data[m].ce[0]) 335 continue; 336 337 ce = intel_context_create(data[m].ce[0]->engine); 338 if (IS_ERR(ce)) 339 goto out; 340 341 err = intel_context_pin(ce); 342 if (err) { 343 intel_context_put(ce); 344 goto out; 345 } 346 347 data[m].ce[n] = ce; 348 } 349 } 350 351 for (fn = func; !err && *fn; fn++) { 352 struct igt_live_test t; 353 int n; 354 355 err = igt_live_test_begin(&t, i915, __func__, ""); 356 if (err) 357 break; 358 359 for (n = 0; n < count; n++) { 360 if (!data[n].ce[0]) 361 continue; 362 363 data[n].tsk = kthread_run(*fn, &data[n], 364 "igt/parallel:%s", 365 data[n].ce[0]->engine->name); 366 if (IS_ERR(data[n].tsk)) { 367 err = PTR_ERR(data[n].tsk); 368 break; 369 } 370 get_task_struct(data[n].tsk); 371 } 372 373 yield(); /* start all threads before we kthread_stop() */ 374 375 for (n = 0; n < count; n++) { 376 int status; 377 378 if (IS_ERR_OR_NULL(data[n].tsk)) 379 continue; 380 381 status = kthread_stop(data[n].tsk); 382 if (status && !err) 383 err = status; 384 385 put_task_struct(data[n].tsk); 386 data[n].tsk = NULL; 387 } 388 389 if (igt_live_test_end(&t)) 390 err = -EIO; 391 } 392 393out: 394 for (n = 0; n < count; n++) { 395 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 396 if (!data[n].ce[m]) 397 continue; 398 399 intel_context_unpin(data[n].ce[m]); 400 intel_context_put(data[n].ce[m]); 401 } 402 } 403 kfree(data); 404out_file: 405 fput(file); 406 return err; 407} 408 409static unsigned long real_page_count(struct drm_i915_gem_object *obj) 410{ 411 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 412} 413 414static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 415{ 416 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 417} 418 419static int gpu_fill(struct intel_context *ce, 420 struct drm_i915_gem_object *obj, 421 unsigned int dw) 422{ 423 struct i915_vma *vma; 424 int err; 425 426 GEM_BUG_ON(obj->base.size > ce->vm->total); 427 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 428 429 vma = i915_vma_instance(obj, ce->vm, NULL); 430 if (IS_ERR(vma)) 431 return PTR_ERR(vma); 432 433 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 434 if (err) 435 return err; 436 437 /* 438 * Within the GTT the huge objects maps every page onto 439 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 440 * We set the nth dword within the page using the nth 441 * mapping via the GTT - this should exercise the GTT mapping 442 * whilst checking that each context provides a unique view 443 * into the object. 444 */ 445 err = igt_gpu_fill_dw(ce, vma, 446 (dw * real_page_count(obj)) << PAGE_SHIFT | 447 (dw * sizeof(u32)), 448 real_page_count(obj), 449 dw); 450 i915_vma_unpin(vma); 451 452 return err; 453} 454 455static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 456{ 457 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 458 unsigned int n, m, need_flush; 459 int err; 460 461 i915_gem_object_lock(obj, NULL); 462 err = i915_gem_object_prepare_write(obj, &need_flush); 463 if (err) 464 goto out; 465 466 for (n = 0; n < real_page_count(obj); n++) { 467 u32 *map; 468 469 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 470 for (m = 0; m < DW_PER_PAGE; m++) 471 map[m] = value; 472 if (!has_llc) 473 drm_clflush_virt_range(map, PAGE_SIZE); 474 kunmap_atomic(map); 475 } 476 477 i915_gem_object_finish_access(obj); 478 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 479 obj->write_domain = 0; 480out: 481 i915_gem_object_unlock(obj); 482 return err; 483} 484 485static noinline int cpu_check(struct drm_i915_gem_object *obj, 486 unsigned int idx, unsigned int max) 487{ 488 unsigned int n, m, needs_flush; 489 int err; 490 491 i915_gem_object_lock(obj, NULL); 492 err = i915_gem_object_prepare_read(obj, &needs_flush); 493 if (err) 494 goto out_unlock; 495 496 for (n = 0; n < real_page_count(obj); n++) { 497 u32 *map; 498 499 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 500 if (needs_flush & CLFLUSH_BEFORE) 501 drm_clflush_virt_range(map, PAGE_SIZE); 502 503 for (m = 0; m < max; m++) { 504 if (map[m] != m) { 505 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 506 __builtin_return_address(0), idx, 507 n, real_page_count(obj), m, max, 508 map[m], m); 509 err = -EINVAL; 510 goto out_unmap; 511 } 512 } 513 514 for (; m < DW_PER_PAGE; m++) { 515 if (map[m] != STACK_MAGIC) { 516 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 517 __builtin_return_address(0), idx, n, m, 518 map[m], STACK_MAGIC); 519 err = -EINVAL; 520 goto out_unmap; 521 } 522 } 523 524out_unmap: 525 kunmap_atomic(map); 526 if (err) 527 break; 528 } 529 530 i915_gem_object_finish_access(obj); 531out_unlock: 532 i915_gem_object_unlock(obj); 533 return err; 534} 535 536static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 537{ 538 int err; 539 540 GEM_BUG_ON(obj->base.handle_count); 541 542 /* tie the object to the drm_file for easy reaping */ 543 err = idr_alloc(&to_drm_file(file)->object_idr, 544 &obj->base, 1, 0, GFP_KERNEL); 545 if (err < 0) 546 return err; 547 548 i915_gem_object_get(obj); 549 obj->base.handle_count++; 550 return 0; 551} 552 553static struct drm_i915_gem_object * 554create_test_object(struct i915_address_space *vm, 555 struct file *file, 556 struct list_head *objects) 557{ 558 struct drm_i915_gem_object *obj; 559 u64 size; 560 int err; 561 562 /* Keep in GEM's good graces */ 563 intel_gt_retire_requests(vm->gt); 564 565 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 566 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 567 568 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 569 if (IS_ERR(obj)) 570 return obj; 571 572 err = file_add_object(file, obj); 573 i915_gem_object_put(obj); 574 if (err) 575 return ERR_PTR(err); 576 577 err = cpu_fill(obj, STACK_MAGIC); 578 if (err) { 579 pr_err("Failed to fill object with cpu, err=%d\n", 580 err); 581 return ERR_PTR(err); 582 } 583 584 list_add_tail(&obj->st_link, objects); 585 return obj; 586} 587 588static unsigned long max_dwords(struct drm_i915_gem_object *obj) 589{ 590 unsigned long npages = fake_page_count(obj); 591 592 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 593 return npages / DW_PER_PAGE; 594} 595 596static void throttle_release(struct i915_request **q, int count) 597{ 598 int i; 599 600 for (i = 0; i < count; i++) { 601 if (IS_ERR_OR_NULL(q[i])) 602 continue; 603 604 i915_request_put(fetch_and_zero(&q[i])); 605 } 606} 607 608static int throttle(struct intel_context *ce, 609 struct i915_request **q, int count) 610{ 611 int i; 612 613 if (!IS_ERR_OR_NULL(q[0])) { 614 if (i915_request_wait(q[0], 615 I915_WAIT_INTERRUPTIBLE, 616 MAX_SCHEDULE_TIMEOUT) < 0) 617 return -EINTR; 618 619 i915_request_put(q[0]); 620 } 621 622 for (i = 0; i < count - 1; i++) 623 q[i] = q[i + 1]; 624 625 q[i] = intel_context_create_request(ce); 626 if (IS_ERR(q[i])) 627 return PTR_ERR(q[i]); 628 629 i915_request_get(q[i]); 630 i915_request_add(q[i]); 631 632 return 0; 633} 634 635static int igt_ctx_exec(void *arg) 636{ 637 struct drm_i915_private *i915 = arg; 638 struct intel_engine_cs *engine; 639 int err = -ENODEV; 640 641 /* 642 * Create a few different contexts (with different mm) and write 643 * through each ctx/mm using the GPU making sure those writes end 644 * up in the expected pages of our obj. 645 */ 646 647 if (!DRIVER_CAPS(i915)->has_logical_contexts) 648 return 0; 649 650 for_each_uabi_engine(engine, i915) { 651 struct drm_i915_gem_object *obj = NULL; 652 unsigned long ncontexts, ndwords, dw; 653 struct i915_request *tq[5] = {}; 654 struct igt_live_test t; 655 IGT_TIMEOUT(end_time); 656 LIST_HEAD(objects); 657 struct file *file; 658 659 if (!intel_engine_can_store_dword(engine)) 660 continue; 661 662 if (!engine->context_size) 663 continue; /* No logical context support in HW */ 664 665 file = mock_file(i915); 666 if (IS_ERR(file)) 667 return PTR_ERR(file); 668 669 err = igt_live_test_begin(&t, i915, __func__, engine->name); 670 if (err) 671 goto out_file; 672 673 ncontexts = 0; 674 ndwords = 0; 675 dw = 0; 676 while (!time_after(jiffies, end_time)) { 677 struct i915_gem_context *ctx; 678 struct intel_context *ce; 679 680 ctx = kernel_context(i915, NULL); 681 if (IS_ERR(ctx)) { 682 err = PTR_ERR(ctx); 683 goto out_file; 684 } 685 686 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 687 GEM_BUG_ON(IS_ERR(ce)); 688 689 if (!obj) { 690 obj = create_test_object(ce->vm, file, &objects); 691 if (IS_ERR(obj)) { 692 err = PTR_ERR(obj); 693 intel_context_put(ce); 694 kernel_context_close(ctx); 695 goto out_file; 696 } 697 } 698 699 err = gpu_fill(ce, obj, dw); 700 if (err) { 701 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 702 ndwords, dw, max_dwords(obj), 703 engine->name, 704 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 705 err); 706 intel_context_put(ce); 707 kernel_context_close(ctx); 708 goto out_file; 709 } 710 711 err = throttle(ce, tq, ARRAY_SIZE(tq)); 712 if (err) { 713 intel_context_put(ce); 714 kernel_context_close(ctx); 715 goto out_file; 716 } 717 718 if (++dw == max_dwords(obj)) { 719 obj = NULL; 720 dw = 0; 721 } 722 723 ndwords++; 724 ncontexts++; 725 726 intel_context_put(ce); 727 kernel_context_close(ctx); 728 } 729 730 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 731 ncontexts, engine->name, ndwords); 732 733 ncontexts = dw = 0; 734 list_for_each_entry(obj, &objects, st_link) { 735 unsigned int rem = 736 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 737 738 err = cpu_check(obj, ncontexts++, rem); 739 if (err) 740 break; 741 742 dw += rem; 743 } 744 745out_file: 746 throttle_release(tq, ARRAY_SIZE(tq)); 747 if (igt_live_test_end(&t)) 748 err = -EIO; 749 750 fput(file); 751 if (err) 752 return err; 753 754 i915_gem_drain_freed_objects(i915); 755 } 756 757 return 0; 758} 759 760static int igt_shared_ctx_exec(void *arg) 761{ 762 struct drm_i915_private *i915 = arg; 763 struct i915_request *tq[5] = {}; 764 struct i915_gem_context *parent; 765 struct intel_engine_cs *engine; 766 struct igt_live_test t; 767 struct file *file; 768 int err = 0; 769 770 /* 771 * Create a few different contexts with the same mm and write 772 * through each ctx using the GPU making sure those writes end 773 * up in the expected pages of our obj. 774 */ 775 if (!DRIVER_CAPS(i915)->has_logical_contexts) 776 return 0; 777 778 file = mock_file(i915); 779 if (IS_ERR(file)) 780 return PTR_ERR(file); 781 782 parent = live_context(i915, file); 783 if (IS_ERR(parent)) { 784 err = PTR_ERR(parent); 785 goto out_file; 786 } 787 788 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 789 err = 0; 790 goto out_file; 791 } 792 793 err = igt_live_test_begin(&t, i915, __func__, ""); 794 if (err) 795 goto out_file; 796 797 for_each_uabi_engine(engine, i915) { 798 unsigned long ncontexts, ndwords, dw; 799 struct drm_i915_gem_object *obj = NULL; 800 IGT_TIMEOUT(end_time); 801 LIST_HEAD(objects); 802 803 if (!intel_engine_can_store_dword(engine)) 804 continue; 805 806 dw = 0; 807 ndwords = 0; 808 ncontexts = 0; 809 while (!time_after(jiffies, end_time)) { 810 struct i915_gem_context *ctx; 811 struct intel_context *ce; 812 813 ctx = kernel_context(i915, parent->vm); 814 if (IS_ERR(ctx)) { 815 err = PTR_ERR(ctx); 816 goto out_test; 817 } 818 819 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 820 GEM_BUG_ON(IS_ERR(ce)); 821 822 if (!obj) { 823 obj = create_test_object(parent->vm, 824 file, &objects); 825 if (IS_ERR(obj)) { 826 err = PTR_ERR(obj); 827 intel_context_put(ce); 828 kernel_context_close(ctx); 829 goto out_test; 830 } 831 } 832 833 err = gpu_fill(ce, obj, dw); 834 if (err) { 835 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 836 ndwords, dw, max_dwords(obj), 837 engine->name, 838 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 839 err); 840 intel_context_put(ce); 841 kernel_context_close(ctx); 842 goto out_test; 843 } 844 845 err = throttle(ce, tq, ARRAY_SIZE(tq)); 846 if (err) { 847 intel_context_put(ce); 848 kernel_context_close(ctx); 849 goto out_test; 850 } 851 852 if (++dw == max_dwords(obj)) { 853 obj = NULL; 854 dw = 0; 855 } 856 857 ndwords++; 858 ncontexts++; 859 860 intel_context_put(ce); 861 kernel_context_close(ctx); 862 } 863 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 864 ncontexts, engine->name, ndwords); 865 866 ncontexts = dw = 0; 867 list_for_each_entry(obj, &objects, st_link) { 868 unsigned int rem = 869 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 870 871 err = cpu_check(obj, ncontexts++, rem); 872 if (err) 873 goto out_test; 874 875 dw += rem; 876 } 877 878 i915_gem_drain_freed_objects(i915); 879 } 880out_test: 881 throttle_release(tq, ARRAY_SIZE(tq)); 882 if (igt_live_test_end(&t)) 883 err = -EIO; 884out_file: 885 fput(file); 886 return err; 887} 888 889static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 890 struct i915_vma *vma, 891 struct intel_engine_cs *engine) 892{ 893 u32 *cmd; 894 895 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 896 897 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 898 if (IS_ERR(cmd)) 899 return PTR_ERR(cmd); 900 901 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 902 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 903 *cmd++ = lower_32_bits(vma->node.start); 904 *cmd++ = upper_32_bits(vma->node.start); 905 *cmd = MI_BATCH_BUFFER_END; 906 907 __i915_gem_object_flush_map(rpcs, 0, 64); 908 i915_gem_object_unpin_map(rpcs); 909 910 intel_gt_chipset_flush(vma->vm->gt); 911 912 return 0; 913} 914 915static int 916emit_rpcs_query(struct drm_i915_gem_object *obj, 917 struct intel_context *ce, 918 struct i915_request **rq_out) 919{ 920 struct drm_i915_private *i915 = to_i915(obj->base.dev); 921 struct i915_request *rq; 922 struct i915_gem_ww_ctx ww; 923 struct i915_vma *batch; 924 struct i915_vma *vma; 925 struct drm_i915_gem_object *rpcs; 926 int err; 927 928 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 929 930 if (GRAPHICS_VER(i915) < 8) 931 return -EINVAL; 932 933 vma = i915_vma_instance(obj, ce->vm, NULL); 934 if (IS_ERR(vma)) 935 return PTR_ERR(vma); 936 937 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 938 if (IS_ERR(rpcs)) 939 return PTR_ERR(rpcs); 940 941 batch = i915_vma_instance(rpcs, ce->vm, NULL); 942 if (IS_ERR(batch)) { 943 err = PTR_ERR(batch); 944 goto err_put; 945 } 946 947 i915_gem_ww_ctx_init(&ww, false); 948retry: 949 err = i915_gem_object_lock(obj, &ww); 950 if (!err) 951 err = i915_gem_object_lock(rpcs, &ww); 952 if (!err) 953 err = i915_gem_object_set_to_gtt_domain(obj, false); 954 if (!err) 955 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 956 if (err) 957 goto err_put; 958 959 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 960 if (err) 961 goto err_vma; 962 963 err = rpcs_query_batch(rpcs, vma, ce->engine); 964 if (err) 965 goto err_batch; 966 967 rq = i915_request_create(ce); 968 if (IS_ERR(rq)) { 969 err = PTR_ERR(rq); 970 goto err_batch; 971 } 972 973 err = i915_request_await_object(rq, batch->obj, false); 974 if (err == 0) 975 err = i915_vma_move_to_active(batch, rq, 0); 976 if (err) 977 goto skip_request; 978 979 err = i915_request_await_object(rq, vma->obj, true); 980 if (err == 0) 981 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 982 if (err) 983 goto skip_request; 984 985 if (rq->engine->emit_init_breadcrumb) { 986 err = rq->engine->emit_init_breadcrumb(rq); 987 if (err) 988 goto skip_request; 989 } 990 991 err = rq->engine->emit_bb_start(rq, 992 batch->node.start, batch->node.size, 993 0); 994 if (err) 995 goto skip_request; 996 997 *rq_out = i915_request_get(rq); 998 999skip_request: 1000 if (err) 1001 i915_request_set_error_once(rq, err); 1002 i915_request_add(rq); 1003err_batch: 1004 i915_vma_unpin(batch); 1005err_vma: 1006 i915_vma_unpin(vma); 1007err_put: 1008 if (err == -EDEADLK) { 1009 err = i915_gem_ww_ctx_backoff(&ww); 1010 if (!err) 1011 goto retry; 1012 } 1013 i915_gem_ww_ctx_fini(&ww); 1014 i915_gem_object_put(rpcs); 1015 return err; 1016} 1017 1018#define TEST_IDLE BIT(0) 1019#define TEST_BUSY BIT(1) 1020#define TEST_RESET BIT(2) 1021 1022static int 1023__sseu_prepare(const char *name, 1024 unsigned int flags, 1025 struct intel_context *ce, 1026 struct igt_spinner **spin) 1027{ 1028 struct i915_request *rq; 1029 int ret; 1030 1031 *spin = NULL; 1032 if (!(flags & (TEST_BUSY | TEST_RESET))) 1033 return 0; 1034 1035 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1036 if (!*spin) 1037 return -ENOMEM; 1038 1039 ret = igt_spinner_init(*spin, ce->engine->gt); 1040 if (ret) 1041 goto err_free; 1042 1043 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1044 if (IS_ERR(rq)) { 1045 ret = PTR_ERR(rq); 1046 goto err_fini; 1047 } 1048 1049 i915_request_add(rq); 1050 1051 if (!igt_wait_for_spinner(*spin, rq)) { 1052 pr_err("%s: Spinner failed to start!\n", name); 1053 ret = -ETIMEDOUT; 1054 goto err_end; 1055 } 1056 1057 return 0; 1058 1059err_end: 1060 igt_spinner_end(*spin); 1061err_fini: 1062 igt_spinner_fini(*spin); 1063err_free: 1064 kfree(fetch_and_zero(spin)); 1065 return ret; 1066} 1067 1068static int 1069__read_slice_count(struct intel_context *ce, 1070 struct drm_i915_gem_object *obj, 1071 struct igt_spinner *spin, 1072 u32 *rpcs) 1073{ 1074 struct i915_request *rq = NULL; 1075 u32 s_mask, s_shift; 1076 unsigned int cnt; 1077 u32 *buf, val; 1078 long ret; 1079 1080 ret = emit_rpcs_query(obj, ce, &rq); 1081 if (ret) 1082 return ret; 1083 1084 if (spin) 1085 igt_spinner_end(spin); 1086 1087 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1088 i915_request_put(rq); 1089 if (ret < 0) 1090 return ret; 1091 1092 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1093 if (IS_ERR(buf)) { 1094 ret = PTR_ERR(buf); 1095 return ret; 1096 } 1097 1098 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1099 s_mask = GEN11_RPCS_S_CNT_MASK; 1100 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1101 } else { 1102 s_mask = GEN8_RPCS_S_CNT_MASK; 1103 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1104 } 1105 1106 val = *buf; 1107 cnt = (val & s_mask) >> s_shift; 1108 *rpcs = val; 1109 1110 i915_gem_object_unpin_map(obj); 1111 1112 return cnt; 1113} 1114 1115static int 1116__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1117 const char *prefix, const char *suffix) 1118{ 1119 if (slices == expected) 1120 return 0; 1121 1122 if (slices < 0) { 1123 pr_err("%s: %s read slice count failed with %d%s\n", 1124 name, prefix, slices, suffix); 1125 return slices; 1126 } 1127 1128 pr_err("%s: %s slice count %d is not %u%s\n", 1129 name, prefix, slices, expected, suffix); 1130 1131 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1132 rpcs, slices, 1133 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1134 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1135 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1136 1137 return -EINVAL; 1138} 1139 1140static int 1141__sseu_finish(const char *name, 1142 unsigned int flags, 1143 struct intel_context *ce, 1144 struct drm_i915_gem_object *obj, 1145 unsigned int expected, 1146 struct igt_spinner *spin) 1147{ 1148 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1149 u32 rpcs = 0; 1150 int ret = 0; 1151 1152 if (flags & TEST_RESET) { 1153 ret = intel_engine_reset(ce->engine, "sseu"); 1154 if (ret) 1155 goto out; 1156 } 1157 1158 ret = __read_slice_count(ce, obj, 1159 flags & TEST_RESET ? NULL : spin, &rpcs); 1160 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1161 if (ret) 1162 goto out; 1163 1164 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1165 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1166 1167out: 1168 if (spin) 1169 igt_spinner_end(spin); 1170 1171 if ((flags & TEST_IDLE) && ret == 0) { 1172 ret = igt_flush_test(ce->engine->i915); 1173 if (ret) 1174 return ret; 1175 1176 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1177 ret = __check_rpcs(name, rpcs, ret, expected, 1178 "Context", " after idle!"); 1179 } 1180 1181 return ret; 1182} 1183 1184static int 1185__sseu_test(const char *name, 1186 unsigned int flags, 1187 struct intel_context *ce, 1188 struct drm_i915_gem_object *obj, 1189 struct intel_sseu sseu) 1190{ 1191 struct igt_spinner *spin = NULL; 1192 int ret; 1193 1194 intel_engine_pm_get(ce->engine); 1195 1196 ret = __sseu_prepare(name, flags, ce, &spin); 1197 if (ret) 1198 goto out_pm; 1199 1200 ret = intel_context_reconfigure_sseu(ce, sseu); 1201 if (ret) 1202 goto out_spin; 1203 1204 ret = __sseu_finish(name, flags, ce, obj, 1205 hweight32(sseu.slice_mask), spin); 1206 1207out_spin: 1208 if (spin) { 1209 igt_spinner_end(spin); 1210 igt_spinner_fini(spin); 1211 kfree(spin); 1212 } 1213out_pm: 1214 intel_engine_pm_put(ce->engine); 1215 return ret; 1216} 1217 1218static int 1219__igt_ctx_sseu(struct drm_i915_private *i915, 1220 const char *name, 1221 unsigned int flags) 1222{ 1223 struct drm_i915_gem_object *obj; 1224 int inst = 0; 1225 int ret = 0; 1226 1227 if (GRAPHICS_VER(i915) < 9) 1228 return 0; 1229 1230 if (flags & TEST_RESET) 1231 igt_global_reset_lock(to_gt(i915)); 1232 1233 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1234 if (IS_ERR(obj)) { 1235 ret = PTR_ERR(obj); 1236 goto out_unlock; 1237 } 1238 1239 do { 1240 struct intel_engine_cs *engine; 1241 struct intel_context *ce; 1242 struct intel_sseu pg_sseu; 1243 1244 engine = intel_engine_lookup_user(i915, 1245 I915_ENGINE_CLASS_RENDER, 1246 inst++); 1247 if (!engine) 1248 break; 1249 1250 if (hweight32(engine->sseu.slice_mask) < 2) 1251 continue; 1252 1253 if (!engine->gt->info.sseu.has_slice_pg) 1254 continue; 1255 1256 /* 1257 * Gen11 VME friendly power-gated configuration with 1258 * half enabled sub-slices. 1259 */ 1260 pg_sseu = engine->sseu; 1261 pg_sseu.slice_mask = 1; 1262 pg_sseu.subslice_mask = 1263 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1264 1265 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1266 engine->name, name, flags, 1267 hweight32(engine->sseu.slice_mask), 1268 hweight32(pg_sseu.slice_mask)); 1269 1270 ce = intel_context_create(engine); 1271 if (IS_ERR(ce)) { 1272 ret = PTR_ERR(ce); 1273 goto out_put; 1274 } 1275 1276 ret = intel_context_pin(ce); 1277 if (ret) 1278 goto out_ce; 1279 1280 /* First set the default mask. */ 1281 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1282 if (ret) 1283 goto out_unpin; 1284 1285 /* Then set a power-gated configuration. */ 1286 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1287 if (ret) 1288 goto out_unpin; 1289 1290 /* Back to defaults. */ 1291 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1292 if (ret) 1293 goto out_unpin; 1294 1295 /* One last power-gated configuration for the road. */ 1296 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1297 if (ret) 1298 goto out_unpin; 1299 1300out_unpin: 1301 intel_context_unpin(ce); 1302out_ce: 1303 intel_context_put(ce); 1304 } while (!ret); 1305 1306 if (igt_flush_test(i915)) 1307 ret = -EIO; 1308 1309out_put: 1310 i915_gem_object_put(obj); 1311 1312out_unlock: 1313 if (flags & TEST_RESET) 1314 igt_global_reset_unlock(to_gt(i915)); 1315 1316 if (ret) 1317 pr_err("%s: Failed with %d!\n", name, ret); 1318 1319 return ret; 1320} 1321 1322static int igt_ctx_sseu(void *arg) 1323{ 1324 struct { 1325 const char *name; 1326 unsigned int flags; 1327 } *phase, phases[] = { 1328 { .name = "basic", .flags = 0 }, 1329 { .name = "idle", .flags = TEST_IDLE }, 1330 { .name = "busy", .flags = TEST_BUSY }, 1331 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1332 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1333 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1334 }; 1335 unsigned int i; 1336 int ret = 0; 1337 1338 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1339 i++, phase++) 1340 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1341 1342 return ret; 1343} 1344 1345static int igt_ctx_readonly(void *arg) 1346{ 1347 struct drm_i915_private *i915 = arg; 1348 unsigned long idx, ndwords, dw, num_engines; 1349 struct drm_i915_gem_object *obj = NULL; 1350 struct i915_request *tq[5] = {}; 1351 struct i915_gem_engines_iter it; 1352 struct i915_address_space *vm; 1353 struct i915_gem_context *ctx; 1354 struct intel_context *ce; 1355 struct igt_live_test t; 1356 I915_RND_STATE(prng); 1357 IGT_TIMEOUT(end_time); 1358 LIST_HEAD(objects); 1359 struct file *file; 1360 int err = -ENODEV; 1361 1362 /* 1363 * Create a few read-only objects (with the occasional writable object) 1364 * and try to write into these object checking that the GPU discards 1365 * any write to a read-only object. 1366 */ 1367 1368 file = mock_file(i915); 1369 if (IS_ERR(file)) 1370 return PTR_ERR(file); 1371 1372 err = igt_live_test_begin(&t, i915, __func__, ""); 1373 if (err) 1374 goto out_file; 1375 1376 ctx = live_context(i915, file); 1377 if (IS_ERR(ctx)) { 1378 err = PTR_ERR(ctx); 1379 goto out_file; 1380 } 1381 1382 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1383 if (!vm || !vm->has_read_only) { 1384 err = 0; 1385 goto out_file; 1386 } 1387 1388 num_engines = 0; 1389 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1390 if (intel_engine_can_store_dword(ce->engine)) 1391 num_engines++; 1392 i915_gem_context_unlock_engines(ctx); 1393 1394 ndwords = 0; 1395 dw = 0; 1396 while (!time_after(jiffies, end_time)) { 1397 for_each_gem_engine(ce, 1398 i915_gem_context_lock_engines(ctx), it) { 1399 if (!intel_engine_can_store_dword(ce->engine)) 1400 continue; 1401 1402 if (!obj) { 1403 obj = create_test_object(ce->vm, file, &objects); 1404 if (IS_ERR(obj)) { 1405 err = PTR_ERR(obj); 1406 i915_gem_context_unlock_engines(ctx); 1407 goto out_file; 1408 } 1409 1410 if (prandom_u32_state(&prng) & 1) 1411 i915_gem_object_set_readonly(obj); 1412 } 1413 1414 err = gpu_fill(ce, obj, dw); 1415 if (err) { 1416 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1417 ndwords, dw, max_dwords(obj), 1418 ce->engine->name, 1419 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1420 err); 1421 i915_gem_context_unlock_engines(ctx); 1422 goto out_file; 1423 } 1424 1425 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1426 if (err) { 1427 i915_gem_context_unlock_engines(ctx); 1428 goto out_file; 1429 } 1430 1431 if (++dw == max_dwords(obj)) { 1432 obj = NULL; 1433 dw = 0; 1434 } 1435 ndwords++; 1436 } 1437 i915_gem_context_unlock_engines(ctx); 1438 } 1439 pr_info("Submitted %lu dwords (across %lu engines)\n", 1440 ndwords, num_engines); 1441 1442 dw = 0; 1443 idx = 0; 1444 list_for_each_entry(obj, &objects, st_link) { 1445 unsigned int rem = 1446 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1447 unsigned int num_writes; 1448 1449 num_writes = rem; 1450 if (i915_gem_object_is_readonly(obj)) 1451 num_writes = 0; 1452 1453 err = cpu_check(obj, idx++, num_writes); 1454 if (err) 1455 break; 1456 1457 dw += rem; 1458 } 1459 1460out_file: 1461 throttle_release(tq, ARRAY_SIZE(tq)); 1462 if (igt_live_test_end(&t)) 1463 err = -EIO; 1464 1465 fput(file); 1466 return err; 1467} 1468 1469static int check_scratch(struct i915_address_space *vm, u64 offset) 1470{ 1471 struct drm_mm_node *node; 1472 1473 mutex_lock(&vm->mutex); 1474 node = __drm_mm_interval_first(&vm->mm, 1475 offset, offset + sizeof(u32) - 1); 1476 mutex_unlock(&vm->mutex); 1477 if (!node || node->start > offset) 1478 return 0; 1479 1480 GEM_BUG_ON(offset >= node->start + node->size); 1481 1482 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1483 upper_32_bits(offset), lower_32_bits(offset)); 1484 return -EINVAL; 1485} 1486 1487static int write_to_scratch(struct i915_gem_context *ctx, 1488 struct intel_engine_cs *engine, 1489 struct drm_i915_gem_object *obj, 1490 u64 offset, u32 value) 1491{ 1492 struct drm_i915_private *i915 = ctx->i915; 1493 struct i915_address_space *vm; 1494 struct i915_request *rq; 1495 struct i915_vma *vma; 1496 u32 *cmd; 1497 int err; 1498 1499 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1500 1501 err = check_scratch(ctx->vm, offset); 1502 if (err) 1503 return err; 1504 1505 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1506 if (IS_ERR(cmd)) 1507 return PTR_ERR(cmd); 1508 1509 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1510 if (GRAPHICS_VER(i915) >= 8) { 1511 *cmd++ = lower_32_bits(offset); 1512 *cmd++ = upper_32_bits(offset); 1513 } else { 1514 *cmd++ = 0; 1515 *cmd++ = offset; 1516 } 1517 *cmd++ = value; 1518 *cmd = MI_BATCH_BUFFER_END; 1519 __i915_gem_object_flush_map(obj, 0, 64); 1520 i915_gem_object_unpin_map(obj); 1521 1522 intel_gt_chipset_flush(engine->gt); 1523 1524 vm = i915_gem_context_get_eb_vm(ctx); 1525 vma = i915_vma_instance(obj, vm, NULL); 1526 if (IS_ERR(vma)) { 1527 err = PTR_ERR(vma); 1528 goto out_vm; 1529 } 1530 1531 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1532 if (err) 1533 goto out_vm; 1534 1535 rq = igt_request_alloc(ctx, engine); 1536 if (IS_ERR(rq)) { 1537 err = PTR_ERR(rq); 1538 goto err_unpin; 1539 } 1540 1541 i915_vma_lock(vma); 1542 err = i915_request_await_object(rq, vma->obj, false); 1543 if (err == 0) 1544 err = i915_vma_move_to_active(vma, rq, 0); 1545 i915_vma_unlock(vma); 1546 if (err) 1547 goto skip_request; 1548 1549 if (rq->engine->emit_init_breadcrumb) { 1550 err = rq->engine->emit_init_breadcrumb(rq); 1551 if (err) 1552 goto skip_request; 1553 } 1554 1555 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1556 if (err) 1557 goto skip_request; 1558 1559 i915_vma_unpin(vma); 1560 1561 i915_request_add(rq); 1562 1563 goto out_vm; 1564skip_request: 1565 i915_request_set_error_once(rq, err); 1566 i915_request_add(rq); 1567err_unpin: 1568 i915_vma_unpin(vma); 1569out_vm: 1570 i915_vm_put(vm); 1571 1572 if (!err) 1573 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1574 1575 return err; 1576} 1577 1578static int read_from_scratch(struct i915_gem_context *ctx, 1579 struct intel_engine_cs *engine, 1580 struct drm_i915_gem_object *obj, 1581 u64 offset, u32 *value) 1582{ 1583 struct drm_i915_private *i915 = ctx->i915; 1584 struct i915_address_space *vm; 1585 const u32 result = 0x100; 1586 struct i915_request *rq; 1587 struct i915_vma *vma; 1588 unsigned int flags; 1589 u32 *cmd; 1590 int err; 1591 1592 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1593 1594 err = check_scratch(ctx->vm, offset); 1595 if (err) 1596 return err; 1597 1598 if (GRAPHICS_VER(i915) >= 8) { 1599 const u32 GPR0 = engine->mmio_base + 0x600; 1600 1601 vm = i915_gem_context_get_eb_vm(ctx); 1602 vma = i915_vma_instance(obj, vm, NULL); 1603 if (IS_ERR(vma)) { 1604 err = PTR_ERR(vma); 1605 goto out_vm; 1606 } 1607 1608 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1609 if (err) 1610 goto out_vm; 1611 1612 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1613 if (IS_ERR(cmd)) { 1614 err = PTR_ERR(cmd); 1615 goto err_unpin; 1616 } 1617 1618 memset(cmd, POISON_INUSE, PAGE_SIZE); 1619 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1620 *cmd++ = GPR0; 1621 *cmd++ = lower_32_bits(offset); 1622 *cmd++ = upper_32_bits(offset); 1623 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1624 *cmd++ = GPR0; 1625 *cmd++ = result; 1626 *cmd++ = 0; 1627 *cmd = MI_BATCH_BUFFER_END; 1628 1629 i915_gem_object_flush_map(obj); 1630 i915_gem_object_unpin_map(obj); 1631 1632 flags = 0; 1633 } else { 1634 const u32 reg = engine->mmio_base + 0x420; 1635 1636 /* hsw: register access even to 3DPRIM! is protected */ 1637 vm = i915_vm_get(&engine->gt->ggtt->vm); 1638 vma = i915_vma_instance(obj, vm, NULL); 1639 if (IS_ERR(vma)) { 1640 err = PTR_ERR(vma); 1641 goto out_vm; 1642 } 1643 1644 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1645 if (err) 1646 goto out_vm; 1647 1648 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1649 if (IS_ERR(cmd)) { 1650 err = PTR_ERR(cmd); 1651 goto err_unpin; 1652 } 1653 1654 memset(cmd, POISON_INUSE, PAGE_SIZE); 1655 *cmd++ = MI_LOAD_REGISTER_MEM; 1656 *cmd++ = reg; 1657 *cmd++ = offset; 1658 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1659 *cmd++ = reg; 1660 *cmd++ = vma->node.start + result; 1661 *cmd = MI_BATCH_BUFFER_END; 1662 1663 i915_gem_object_flush_map(obj); 1664 i915_gem_object_unpin_map(obj); 1665 1666 flags = I915_DISPATCH_SECURE; 1667 } 1668 1669 intel_gt_chipset_flush(engine->gt); 1670 1671 rq = igt_request_alloc(ctx, engine); 1672 if (IS_ERR(rq)) { 1673 err = PTR_ERR(rq); 1674 goto err_unpin; 1675 } 1676 1677 i915_vma_lock(vma); 1678 err = i915_request_await_object(rq, vma->obj, true); 1679 if (err == 0) 1680 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1681 i915_vma_unlock(vma); 1682 if (err) 1683 goto skip_request; 1684 1685 if (rq->engine->emit_init_breadcrumb) { 1686 err = rq->engine->emit_init_breadcrumb(rq); 1687 if (err) 1688 goto skip_request; 1689 } 1690 1691 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); 1692 if (err) 1693 goto skip_request; 1694 1695 i915_vma_unpin(vma); 1696 1697 i915_request_add(rq); 1698 1699 i915_gem_object_lock(obj, NULL); 1700 err = i915_gem_object_set_to_cpu_domain(obj, false); 1701 i915_gem_object_unlock(obj); 1702 if (err) 1703 goto out_vm; 1704 1705 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1706 if (IS_ERR(cmd)) { 1707 err = PTR_ERR(cmd); 1708 goto out_vm; 1709 } 1710 1711 *value = cmd[result / sizeof(*cmd)]; 1712 i915_gem_object_unpin_map(obj); 1713 1714 goto out_vm; 1715skip_request: 1716 i915_request_set_error_once(rq, err); 1717 i915_request_add(rq); 1718err_unpin: 1719 i915_vma_unpin(vma); 1720out_vm: 1721 i915_vm_put(vm); 1722 1723 if (!err) 1724 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1725 1726 return err; 1727} 1728 1729static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1730{ 1731 struct i915_address_space *vm; 1732 u32 *vaddr; 1733 int err = 0; 1734 1735 vm = ctx->vm; 1736 if (!vm) 1737 return -ENODEV; 1738 1739 if (!vm->scratch[0]) { 1740 pr_err("No scratch page!\n"); 1741 return -EINVAL; 1742 } 1743 1744 vaddr = __px_vaddr(vm->scratch[0]); 1745 1746 memcpy(out, vaddr, sizeof(*out)); 1747 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1748 pr_err("Inconsistent initial state of scratch page!\n"); 1749 err = -EINVAL; 1750 } 1751 1752 return err; 1753} 1754 1755static int igt_vm_isolation(void *arg) 1756{ 1757 struct drm_i915_private *i915 = arg; 1758 struct i915_gem_context *ctx_a, *ctx_b; 1759 struct drm_i915_gem_object *obj_a, *obj_b; 1760 unsigned long num_engines, count; 1761 struct intel_engine_cs *engine; 1762 struct igt_live_test t; 1763 I915_RND_STATE(prng); 1764 struct file *file; 1765 u64 vm_total; 1766 u32 expected; 1767 int err; 1768 1769 if (GRAPHICS_VER(i915) < 7) 1770 return 0; 1771 1772 /* 1773 * The simple goal here is that a write into one context is not 1774 * observed in a second (separate page tables and scratch). 1775 */ 1776 1777 file = mock_file(i915); 1778 if (IS_ERR(file)) 1779 return PTR_ERR(file); 1780 1781 err = igt_live_test_begin(&t, i915, __func__, ""); 1782 if (err) 1783 goto out_file; 1784 1785 ctx_a = live_context(i915, file); 1786 if (IS_ERR(ctx_a)) { 1787 err = PTR_ERR(ctx_a); 1788 goto out_file; 1789 } 1790 1791 ctx_b = live_context(i915, file); 1792 if (IS_ERR(ctx_b)) { 1793 err = PTR_ERR(ctx_b); 1794 goto out_file; 1795 } 1796 1797 /* We can only test vm isolation, if the vm are distinct */ 1798 if (ctx_a->vm == ctx_b->vm) 1799 goto out_file; 1800 1801 /* Read the initial state of the scratch page */ 1802 err = check_scratch_page(ctx_a, &expected); 1803 if (err) 1804 goto out_file; 1805 1806 err = check_scratch_page(ctx_b, &expected); 1807 if (err) 1808 goto out_file; 1809 1810 vm_total = ctx_a->vm->total; 1811 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1812 1813 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1814 if (IS_ERR(obj_a)) { 1815 err = PTR_ERR(obj_a); 1816 goto out_file; 1817 } 1818 1819 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1820 if (IS_ERR(obj_b)) { 1821 err = PTR_ERR(obj_b); 1822 goto put_a; 1823 } 1824 1825 count = 0; 1826 num_engines = 0; 1827 for_each_uabi_engine(engine, i915) { 1828 IGT_TIMEOUT(end_time); 1829 unsigned long this = 0; 1830 1831 if (!intel_engine_can_store_dword(engine)) 1832 continue; 1833 1834 /* Not all engines have their own GPR! */ 1835 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1836 continue; 1837 1838 while (!__igt_timeout(end_time, NULL)) { 1839 u32 value = 0xc5c5c5c5; 1840 u64 offset; 1841 1842 /* Leave enough space at offset 0 for the batch */ 1843 offset = igt_random_offset(&prng, 1844 I915_GTT_PAGE_SIZE, vm_total, 1845 sizeof(u32), alignof_dword); 1846 1847 err = write_to_scratch(ctx_a, engine, obj_a, 1848 offset, 0xdeadbeef); 1849 if (err == 0) 1850 err = read_from_scratch(ctx_b, engine, obj_b, 1851 offset, &value); 1852 if (err) 1853 goto put_b; 1854 1855 if (value != expected) { 1856 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1857 engine->name, value, 1858 upper_32_bits(offset), 1859 lower_32_bits(offset), 1860 this); 1861 err = -EINVAL; 1862 goto put_b; 1863 } 1864 1865 this++; 1866 } 1867 count += this; 1868 num_engines++; 1869 } 1870 pr_info("Checked %lu scratch offsets across %lu engines\n", 1871 count, num_engines); 1872 1873put_b: 1874 i915_gem_object_put(obj_b); 1875put_a: 1876 i915_gem_object_put(obj_a); 1877out_file: 1878 if (igt_live_test_end(&t)) 1879 err = -EIO; 1880 fput(file); 1881 return err; 1882} 1883 1884int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1885{ 1886 static const struct i915_subtest tests[] = { 1887 SUBTEST(live_nop_switch), 1888 SUBTEST(live_parallel_switch), 1889 SUBTEST(igt_ctx_exec), 1890 SUBTEST(igt_ctx_readonly), 1891 SUBTEST(igt_ctx_sseu), 1892 SUBTEST(igt_shared_ctx_exec), 1893 SUBTEST(igt_vm_isolation), 1894 }; 1895 1896 if (intel_gt_is_wedged(to_gt(i915))) 1897 return 0; 1898 1899 return i915_live_subtests(tests, i915); 1900}