intel_context.c (13891B)
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6#include "gem/i915_gem_context.h" 7#include "gem/i915_gem_pm.h" 8 9#include "i915_drv.h" 10#include "i915_trace.h" 11 12#include "intel_context.h" 13#include "intel_engine.h" 14#include "intel_engine_pm.h" 15#include "intel_ring.h" 16 17static struct kmem_cache *slab_ce; 18 19static struct intel_context *intel_context_alloc(void) 20{ 21 return kmem_cache_zalloc(slab_ce, GFP_KERNEL); 22} 23 24static void rcu_context_free(struct rcu_head *rcu) 25{ 26 struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); 27 28 trace_intel_context_free(ce); 29 kmem_cache_free(slab_ce, ce); 30} 31 32void intel_context_free(struct intel_context *ce) 33{ 34 call_rcu(&ce->rcu, rcu_context_free); 35} 36 37struct intel_context * 38intel_context_create(struct intel_engine_cs *engine) 39{ 40 struct intel_context *ce; 41 42 ce = intel_context_alloc(); 43 if (!ce) 44 return ERR_PTR(-ENOMEM); 45 46 intel_context_init(ce, engine); 47 trace_intel_context_create(ce); 48 return ce; 49} 50 51int intel_context_alloc_state(struct intel_context *ce) 52{ 53 int err = 0; 54 55 if (mutex_lock_interruptible(&ce->pin_mutex)) 56 return -EINTR; 57 58 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { 59 if (intel_context_is_banned(ce)) { 60 err = -EIO; 61 goto unlock; 62 } 63 64 err = ce->ops->alloc(ce); 65 if (unlikely(err)) 66 goto unlock; 67 68 set_bit(CONTEXT_ALLOC_BIT, &ce->flags); 69 } 70 71unlock: 72 mutex_unlock(&ce->pin_mutex); 73 return err; 74} 75 76static int intel_context_active_acquire(struct intel_context *ce) 77{ 78 int err; 79 80 __i915_active_acquire(&ce->active); 81 82 if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || 83 intel_context_is_parallel(ce)) 84 return 0; 85 86 /* Preallocate tracking nodes */ 87 err = i915_active_acquire_preallocate_barrier(&ce->active, 88 ce->engine); 89 if (err) 90 i915_active_release(&ce->active); 91 92 return err; 93} 94 95static void intel_context_active_release(struct intel_context *ce) 96{ 97 /* Nodes preallocated in intel_context_active() */ 98 i915_active_acquire_barrier(&ce->active); 99 i915_active_release(&ce->active); 100} 101 102static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) 103{ 104 unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; 105 int err; 106 107 err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH); 108 if (err) 109 return err; 110 111 err = i915_active_acquire(&vma->active); 112 if (err) 113 goto err_unpin; 114 115 /* 116 * And mark it as a globally pinned object to let the shrinker know 117 * it cannot reclaim the object until we release it. 118 */ 119 i915_vma_make_unshrinkable(vma); 120 vma->obj->mm.dirty = true; 121 122 return 0; 123 124err_unpin: 125 i915_vma_unpin(vma); 126 return err; 127} 128 129static void __context_unpin_state(struct i915_vma *vma) 130{ 131 i915_vma_make_shrinkable(vma); 132 i915_active_release(&vma->active); 133 __i915_vma_unpin(vma); 134} 135 136static int __ring_active(struct intel_ring *ring, 137 struct i915_gem_ww_ctx *ww) 138{ 139 int err; 140 141 err = intel_ring_pin(ring, ww); 142 if (err) 143 return err; 144 145 err = i915_active_acquire(&ring->vma->active); 146 if (err) 147 goto err_pin; 148 149 return 0; 150 151err_pin: 152 intel_ring_unpin(ring); 153 return err; 154} 155 156static void __ring_retire(struct intel_ring *ring) 157{ 158 i915_active_release(&ring->vma->active); 159 intel_ring_unpin(ring); 160} 161 162static int intel_context_pre_pin(struct intel_context *ce, 163 struct i915_gem_ww_ctx *ww) 164{ 165 int err; 166 167 CE_TRACE(ce, "active\n"); 168 169 err = __ring_active(ce->ring, ww); 170 if (err) 171 return err; 172 173 err = intel_timeline_pin(ce->timeline, ww); 174 if (err) 175 goto err_ring; 176 177 if (!ce->state) 178 return 0; 179 180 err = __context_pin_state(ce->state, ww); 181 if (err) 182 goto err_timeline; 183 184 185 return 0; 186 187err_timeline: 188 intel_timeline_unpin(ce->timeline); 189err_ring: 190 __ring_retire(ce->ring); 191 return err; 192} 193 194static void intel_context_post_unpin(struct intel_context *ce) 195{ 196 if (ce->state) 197 __context_unpin_state(ce->state); 198 199 intel_timeline_unpin(ce->timeline); 200 __ring_retire(ce->ring); 201} 202 203int __intel_context_do_pin_ww(struct intel_context *ce, 204 struct i915_gem_ww_ctx *ww) 205{ 206 bool handoff = false; 207 void *vaddr; 208 int err = 0; 209 210 if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { 211 err = intel_context_alloc_state(ce); 212 if (err) 213 return err; 214 } 215 216 /* 217 * We always pin the context/ring/timeline here, to ensure a pin 218 * refcount for __intel_context_active(), which prevent a lock 219 * inversion of ce->pin_mutex vs dma_resv_lock(). 220 */ 221 222 err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); 223 if (!err) 224 err = i915_gem_object_lock(ce->ring->vma->obj, ww); 225 if (!err && ce->state) 226 err = i915_gem_object_lock(ce->state->obj, ww); 227 if (!err) 228 err = intel_context_pre_pin(ce, ww); 229 if (err) 230 return err; 231 232 err = ce->ops->pre_pin(ce, ww, &vaddr); 233 if (err) 234 goto err_ctx_unpin; 235 236 err = i915_active_acquire(&ce->active); 237 if (err) 238 goto err_post_unpin; 239 240 err = mutex_lock_interruptible(&ce->pin_mutex); 241 if (err) 242 goto err_release; 243 244 intel_engine_pm_might_get(ce->engine); 245 246 if (unlikely(intel_context_is_closed(ce))) { 247 err = -ENOENT; 248 goto err_unlock; 249 } 250 251 if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { 252 err = intel_context_active_acquire(ce); 253 if (unlikely(err)) 254 goto err_unlock; 255 256 err = ce->ops->pin(ce, vaddr); 257 if (err) { 258 intel_context_active_release(ce); 259 goto err_unlock; 260 } 261 262 CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n", 263 i915_ggtt_offset(ce->ring->vma), 264 ce->ring->head, ce->ring->tail); 265 266 handoff = true; 267 smp_mb__before_atomic(); /* flush pin before it is visible */ 268 atomic_inc(&ce->pin_count); 269 } 270 271 GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ 272 273 trace_intel_context_do_pin(ce); 274 275err_unlock: 276 mutex_unlock(&ce->pin_mutex); 277err_release: 278 i915_active_release(&ce->active); 279err_post_unpin: 280 if (!handoff) 281 ce->ops->post_unpin(ce); 282err_ctx_unpin: 283 intel_context_post_unpin(ce); 284 285 /* 286 * Unlock the hwsp_ggtt object since it's shared. 287 * In principle we can unlock all the global state locked above 288 * since it's pinned and doesn't need fencing, and will 289 * thus remain resident until it is explicitly unpinned. 290 */ 291 i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj); 292 293 return err; 294} 295 296int __intel_context_do_pin(struct intel_context *ce) 297{ 298 struct i915_gem_ww_ctx ww; 299 int err; 300 301 i915_gem_ww_ctx_init(&ww, true); 302retry: 303 err = __intel_context_do_pin_ww(ce, &ww); 304 if (err == -EDEADLK) { 305 err = i915_gem_ww_ctx_backoff(&ww); 306 if (!err) 307 goto retry; 308 } 309 i915_gem_ww_ctx_fini(&ww); 310 return err; 311} 312 313void __intel_context_do_unpin(struct intel_context *ce, int sub) 314{ 315 if (!atomic_sub_and_test(sub, &ce->pin_count)) 316 return; 317 318 CE_TRACE(ce, "unpin\n"); 319 ce->ops->unpin(ce); 320 ce->ops->post_unpin(ce); 321 322 /* 323 * Once released, we may asynchronously drop the active reference. 324 * As that may be the only reference keeping the context alive, 325 * take an extra now so that it is not freed before we finish 326 * dereferencing it. 327 */ 328 intel_context_get(ce); 329 intel_context_active_release(ce); 330 trace_intel_context_do_unpin(ce); 331 intel_context_put(ce); 332} 333 334static void __intel_context_retire(struct i915_active *active) 335{ 336 struct intel_context *ce = container_of(active, typeof(*ce), active); 337 338 CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n", 339 intel_context_get_total_runtime_ns(ce), 340 intel_context_get_avg_runtime_ns(ce)); 341 342 set_bit(CONTEXT_VALID_BIT, &ce->flags); 343 intel_context_post_unpin(ce); 344 intel_context_put(ce); 345} 346 347static int __intel_context_active(struct i915_active *active) 348{ 349 struct intel_context *ce = container_of(active, typeof(*ce), active); 350 351 intel_context_get(ce); 352 353 /* everything should already be activated by intel_context_pre_pin() */ 354 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active)); 355 __intel_ring_pin(ce->ring); 356 357 __intel_timeline_pin(ce->timeline); 358 359 if (ce->state) { 360 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active)); 361 __i915_vma_pin(ce->state); 362 i915_vma_make_unshrinkable(ce->state); 363 } 364 365 return 0; 366} 367 368static int 369sw_fence_dummy_notify(struct i915_sw_fence *sf, 370 enum i915_sw_fence_notify state) 371{ 372 return NOTIFY_DONE; 373} 374 375void 376intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) 377{ 378 GEM_BUG_ON(!engine->cops); 379 GEM_BUG_ON(!engine->gt->vm); 380 381 kref_init(&ce->ref); 382 383 ce->engine = engine; 384 ce->ops = engine->cops; 385 ce->sseu = engine->sseu; 386 ce->ring = NULL; 387 ce->ring_size = SZ_4K; 388 389 ewma_runtime_init(&ce->stats.runtime.avg); 390 391 ce->vm = i915_vm_get(engine->gt->vm); 392 393 /* NB ce->signal_link/lock is used under RCU */ 394 spin_lock_init(&ce->signal_lock); 395 INIT_LIST_HEAD(&ce->signals); 396 397 mutex_init(&ce->pin_mutex); 398 399 spin_lock_init(&ce->guc_state.lock); 400 INIT_LIST_HEAD(&ce->guc_state.fences); 401 INIT_LIST_HEAD(&ce->guc_state.requests); 402 403 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 404 INIT_LIST_HEAD(&ce->guc_id.link); 405 406 INIT_LIST_HEAD(&ce->destroyed_link); 407 408 INIT_LIST_HEAD(&ce->parallel.child_list); 409 410 /* 411 * Initialize fence to be complete as this is expected to be complete 412 * unless there is a pending schedule disable outstanding. 413 */ 414 i915_sw_fence_init(&ce->guc_state.blocked, 415 sw_fence_dummy_notify); 416 i915_sw_fence_commit(&ce->guc_state.blocked); 417 418 i915_active_init(&ce->active, 419 __intel_context_active, __intel_context_retire, 0); 420} 421 422void intel_context_fini(struct intel_context *ce) 423{ 424 struct intel_context *child, *next; 425 426 if (ce->timeline) 427 intel_timeline_put(ce->timeline); 428 i915_vm_put(ce->vm); 429 430 /* Need to put the creation ref for the children */ 431 if (intel_context_is_parent(ce)) 432 for_each_child_safe(ce, child, next) 433 intel_context_put(child); 434 435 mutex_destroy(&ce->pin_mutex); 436 i915_active_fini(&ce->active); 437 i915_sw_fence_fini(&ce->guc_state.blocked); 438} 439 440void i915_context_module_exit(void) 441{ 442 kmem_cache_destroy(slab_ce); 443} 444 445int __init i915_context_module_init(void) 446{ 447 slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); 448 if (!slab_ce) 449 return -ENOMEM; 450 451 return 0; 452} 453 454void intel_context_enter_engine(struct intel_context *ce) 455{ 456 intel_engine_pm_get(ce->engine); 457 intel_timeline_enter(ce->timeline); 458} 459 460void intel_context_exit_engine(struct intel_context *ce) 461{ 462 intel_timeline_exit(ce->timeline); 463 intel_engine_pm_put(ce->engine); 464} 465 466int intel_context_prepare_remote_request(struct intel_context *ce, 467 struct i915_request *rq) 468{ 469 struct intel_timeline *tl = ce->timeline; 470 int err; 471 472 /* Only suitable for use in remotely modifying this context */ 473 GEM_BUG_ON(rq->context == ce); 474 475 if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ 476 /* Queue this switch after current activity by this context. */ 477 err = i915_active_fence_set(&tl->last_request, rq); 478 if (err) 479 return err; 480 } 481 482 /* 483 * Guarantee context image and the timeline remains pinned until the 484 * modifying request is retired by setting the ce activity tracker. 485 * 486 * But we only need to take one pin on the account of it. Or in other 487 * words transfer the pinned ce object to tracked active request. 488 */ 489 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 490 return i915_active_add_request(&ce->active, rq); 491} 492 493struct i915_request *intel_context_create_request(struct intel_context *ce) 494{ 495 struct i915_gem_ww_ctx ww; 496 struct i915_request *rq; 497 int err; 498 499 i915_gem_ww_ctx_init(&ww, true); 500retry: 501 err = intel_context_pin_ww(ce, &ww); 502 if (!err) { 503 rq = i915_request_create(ce); 504 intel_context_unpin(ce); 505 } else if (err == -EDEADLK) { 506 err = i915_gem_ww_ctx_backoff(&ww); 507 if (!err) 508 goto retry; 509 rq = ERR_PTR(err); 510 } else { 511 rq = ERR_PTR(err); 512 } 513 514 i915_gem_ww_ctx_fini(&ww); 515 516 if (IS_ERR(rq)) 517 return rq; 518 519 /* 520 * timeline->mutex should be the inner lock, but is used as outer lock. 521 * Hack around this to shut up lockdep in selftests.. 522 */ 523 lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie); 524 mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_); 525 mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); 526 rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); 527 528 return rq; 529} 530 531struct i915_request *intel_context_find_active_request(struct intel_context *ce) 532{ 533 struct intel_context *parent = intel_context_to_parent(ce); 534 struct i915_request *rq, *active = NULL; 535 unsigned long flags; 536 537 GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); 538 539 /* 540 * We search the parent list to find an active request on the submitted 541 * context. The parent list contains the requests for all the contexts 542 * in the relationship so we have to do a compare of each request's 543 * context. 544 */ 545 spin_lock_irqsave(&parent->guc_state.lock, flags); 546 list_for_each_entry_reverse(rq, &parent->guc_state.requests, 547 sched.link) { 548 if (rq->context != ce) 549 continue; 550 if (i915_request_completed(rq)) 551 break; 552 553 active = rq; 554 } 555 spin_unlock_irqrestore(&parent->guc_state.lock, flags); 556 557 return active; 558} 559 560void intel_context_bind_parent_child(struct intel_context *parent, 561 struct intel_context *child) 562{ 563 /* 564 * Callers responsibility to validate that this function is used 565 * correctly but we use GEM_BUG_ON here ensure that they do. 566 */ 567 GEM_BUG_ON(intel_context_is_pinned(parent)); 568 GEM_BUG_ON(intel_context_is_child(parent)); 569 GEM_BUG_ON(intel_context_is_pinned(child)); 570 GEM_BUG_ON(intel_context_is_child(child)); 571 GEM_BUG_ON(intel_context_is_parent(child)); 572 573 parent->parallel.child_index = parent->parallel.number_children++; 574 list_add_tail(&child->parallel.child_link, 575 &parent->parallel.child_list); 576 child->parallel.parent = parent; 577} 578 579u64 intel_context_get_total_runtime_ns(const struct intel_context *ce) 580{ 581 u64 total, active; 582 583 total = ce->stats.runtime.total; 584 if (ce->ops->flags & COPS_RUNTIME_CYCLES) 585 total *= ce->engine->gt->clock_period_ns; 586 587 active = READ_ONCE(ce->stats.active); 588 if (active) 589 active = intel_context_clock() - active; 590 591 return total + active; 592} 593 594u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) 595{ 596 u64 avg = ewma_runtime_read(&ce->stats.runtime.avg); 597 598 if (ce->ops->flags & COPS_RUNTIME_CYCLES) 599 avg *= ce->engine->gt->clock_period_ns; 600 601 return avg; 602} 603 604#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 605#include "selftest_context.c" 606#endif