intel_execlists_submission.c (125074B)
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6/** 7 * DOC: Logical Rings, Logical Ring Contexts and Execlists 8 * 9 * Motivation: 10 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". 11 * These expanded contexts enable a number of new abilities, especially 12 * "Execlists" (also implemented in this file). 13 * 14 * One of the main differences with the legacy HW contexts is that logical 15 * ring contexts incorporate many more things to the context's state, like 16 * PDPs or ringbuffer control registers: 17 * 18 * The reason why PDPs are included in the context is straightforward: as 19 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs 20 * contained there mean you don't need to do a ppgtt->switch_mm yourself, 21 * instead, the GPU will do it for you on the context switch. 22 * 23 * But, what about the ringbuffer control registers (head, tail, etc..)? 24 * shouldn't we just need a set of those per engine command streamer? This is 25 * where the name "Logical Rings" starts to make sense: by virtualizing the 26 * rings, the engine cs shifts to a new "ring buffer" with every context 27 * switch. When you want to submit a workload to the GPU you: A) choose your 28 * context, B) find its appropriate virtualized ring, C) write commands to it 29 * and then, finally, D) tell the GPU to switch to that context. 30 * 31 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch 32 * to a contexts is via a context execution list, ergo "Execlists". 33 * 34 * LRC implementation: 35 * Regarding the creation of contexts, we have: 36 * 37 * - One global default context. 38 * - One local default context for each opened fd. 39 * - One local extra context for each context create ioctl call. 40 * 41 * Now that ringbuffers belong per-context (and not per-engine, like before) 42 * and that contexts are uniquely tied to a given engine (and not reusable, 43 * like before) we need: 44 * 45 * - One ringbuffer per-engine inside each context. 46 * - One backing object per-engine inside each context. 47 * 48 * The global default context starts its life with these new objects fully 49 * allocated and populated. The local default context for each opened fd is 50 * more complex, because we don't know at creation time which engine is going 51 * to use them. To handle this, we have implemented a deferred creation of LR 52 * contexts: 53 * 54 * The local context starts its life as a hollow or blank holder, that only 55 * gets populated for a given engine once we receive an execbuffer. If later 56 * on we receive another execbuffer ioctl for the same context but a different 57 * engine, we allocate/populate a new ringbuffer and context backing object and 58 * so on. 59 * 60 * Finally, regarding local contexts created using the ioctl call: as they are 61 * only allowed with the render ring, we can allocate & populate them right 62 * away (no need to defer anything, at least for now). 63 * 64 * Execlists implementation: 65 * Execlists are the new method by which, on gen8+ hardware, workloads are 66 * submitted for execution (as opposed to the legacy, ringbuffer-based, method). 67 * This method works as follows: 68 * 69 * When a request is committed, its commands (the BB start and any leading or 70 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer 71 * for the appropriate context. The tail pointer in the hardware context is not 72 * updated at this time, but instead, kept by the driver in the ringbuffer 73 * structure. A structure representing this request is added to a request queue 74 * for the appropriate engine: this structure contains a copy of the context's 75 * tail after the request was written to the ring buffer and a pointer to the 76 * context itself. 77 * 78 * If the engine's request queue was empty before the request was added, the 79 * queue is processed immediately. Otherwise the queue will be processed during 80 * a context switch interrupt. In any case, elements on the queue will get sent 81 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a 82 * globally unique 20-bits submission ID. 83 * 84 * When execution of a request completes, the GPU updates the context status 85 * buffer with a context complete event and generates a context switch interrupt. 86 * During the interrupt handling, the driver examines the events in the buffer: 87 * for each context complete event, if the announced ID matches that on the head 88 * of the request queue, then that request is retired and removed from the queue. 89 * 90 * After processing, if any requests were retired and the queue is not empty 91 * then a new execution list can be submitted. The two requests at the front of 92 * the queue are next to be submitted but since a context may not occur twice in 93 * an execution list, if subsequent requests have the same ID as the first then 94 * the two requests must be combined. This is done simply by discarding requests 95 * at the head of the queue until either only one requests is left (in which case 96 * we use a NULL second context) or the first two requests have unique IDs. 97 * 98 * By always executing the first two requests in the queue the driver ensures 99 * that the GPU is kept as busy as possible. In the case where a single context 100 * completes but a second context is still executing, the request for this second 101 * context will be at the head of the queue when we remove the first one. This 102 * request will then be resubmitted along with a new request for a different context, 103 * which will cause the hardware to continue executing the second request and queue 104 * the new request (the GPU detects the condition of a context getting preempted 105 * with the same context and optimizes the context switch flow by not doing 106 * preemption, but just sampling the new tail pointer). 107 * 108 */ 109#include <linux/interrupt.h> 110#include <linux/string_helpers.h> 111 112#include "i915_drv.h" 113#include "i915_trace.h" 114#include "i915_vgpu.h" 115#include "gen8_engine_cs.h" 116#include "intel_breadcrumbs.h" 117#include "intel_context.h" 118#include "intel_engine_heartbeat.h" 119#include "intel_engine_pm.h" 120#include "intel_engine_regs.h" 121#include "intel_engine_stats.h" 122#include "intel_execlists_submission.h" 123#include "intel_gt.h" 124#include "intel_gt_irq.h" 125#include "intel_gt_pm.h" 126#include "intel_gt_regs.h" 127#include "intel_gt_requests.h" 128#include "intel_lrc.h" 129#include "intel_lrc_reg.h" 130#include "intel_mocs.h" 131#include "intel_reset.h" 132#include "intel_ring.h" 133#include "intel_workarounds.h" 134#include "shmem_utils.h" 135 136#define RING_EXECLIST_QFULL (1 << 0x2) 137#define RING_EXECLIST1_VALID (1 << 0x3) 138#define RING_EXECLIST0_VALID (1 << 0x4) 139#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) 140#define RING_EXECLIST1_ACTIVE (1 << 0x11) 141#define RING_EXECLIST0_ACTIVE (1 << 0x12) 142 143#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) 144#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) 145#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) 146#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) 147#define GEN8_CTX_STATUS_COMPLETE (1 << 4) 148#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) 149 150#define GEN8_CTX_STATUS_COMPLETED_MASK \ 151 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) 152 153#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ 154#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ 155#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) 156#define GEN12_IDLE_CTX_ID 0x7FF 157#define GEN12_CSB_CTX_VALID(csb_dw) \ 158 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) 159 160#define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE BIT(1) /* upper csb dword */ 161#define XEHP_CSB_SW_CTX_ID_MASK GENMASK(31, 10) 162#define XEHP_IDLE_CTX_ID 0xFFFF 163#define XEHP_CSB_CTX_VALID(csb_dw) \ 164 (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID) 165 166/* Typical size of the average request (2 pipecontrols and a MI_BB) */ 167#define EXECLISTS_REQUEST_SIZE 64 /* bytes */ 168 169struct virtual_engine { 170 struct intel_engine_cs base; 171 struct intel_context context; 172 struct rcu_work rcu; 173 174 /* 175 * We allow only a single request through the virtual engine at a time 176 * (each request in the timeline waits for the completion fence of 177 * the previous before being submitted). By restricting ourselves to 178 * only submitting a single request, each request is placed on to a 179 * physical to maximise load spreading (by virtue of the late greedy 180 * scheduling -- each real engine takes the next available request 181 * upon idling). 182 */ 183 struct i915_request *request; 184 185 /* 186 * We keep a rbtree of available virtual engines inside each physical 187 * engine, sorted by priority. Here we preallocate the nodes we need 188 * for the virtual engine, indexed by physical_engine->id. 189 */ 190 struct ve_node { 191 struct rb_node rb; 192 int prio; 193 } nodes[I915_NUM_ENGINES]; 194 195 /* And finally, which physical engines this virtual engine maps onto. */ 196 unsigned int num_siblings; 197 struct intel_engine_cs *siblings[]; 198}; 199 200static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) 201{ 202 GEM_BUG_ON(!intel_engine_is_virtual(engine)); 203 return container_of(engine, struct virtual_engine, base); 204} 205 206static struct intel_context * 207execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 208 unsigned long flags); 209 210static struct i915_request * 211__active_request(const struct intel_timeline * const tl, 212 struct i915_request *rq, 213 int error) 214{ 215 struct i915_request *active = rq; 216 217 list_for_each_entry_from_reverse(rq, &tl->requests, link) { 218 if (__i915_request_is_complete(rq)) 219 break; 220 221 if (error) { 222 i915_request_set_error_once(rq, error); 223 __i915_request_skip(rq); 224 } 225 active = rq; 226 } 227 228 return active; 229} 230 231static struct i915_request * 232active_request(const struct intel_timeline * const tl, struct i915_request *rq) 233{ 234 return __active_request(tl, rq, 0); 235} 236 237static void ring_set_paused(const struct intel_engine_cs *engine, int state) 238{ 239 /* 240 * We inspect HWS_PREEMPT with a semaphore inside 241 * engine->emit_fini_breadcrumb. If the dword is true, 242 * the ring is paused as the semaphore will busywait 243 * until the dword is false. 244 */ 245 engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; 246 if (state) 247 wmb(); 248} 249 250static struct i915_priolist *to_priolist(struct rb_node *rb) 251{ 252 return rb_entry(rb, struct i915_priolist, node); 253} 254 255static int rq_prio(const struct i915_request *rq) 256{ 257 return READ_ONCE(rq->sched.attr.priority); 258} 259 260static int effective_prio(const struct i915_request *rq) 261{ 262 int prio = rq_prio(rq); 263 264 /* 265 * If this request is special and must not be interrupted at any 266 * cost, so be it. Note we are only checking the most recent request 267 * in the context and so may be masking an earlier vip request. It 268 * is hoped that under the conditions where nopreempt is used, this 269 * will not matter (i.e. all requests to that context will be 270 * nopreempt for as long as desired). 271 */ 272 if (i915_request_has_nopreempt(rq)) 273 prio = I915_PRIORITY_UNPREEMPTABLE; 274 275 return prio; 276} 277 278static int queue_prio(const struct i915_sched_engine *sched_engine) 279{ 280 struct rb_node *rb; 281 282 rb = rb_first_cached(&sched_engine->queue); 283 if (!rb) 284 return INT_MIN; 285 286 return to_priolist(rb)->priority; 287} 288 289static int virtual_prio(const struct intel_engine_execlists *el) 290{ 291 struct rb_node *rb = rb_first_cached(&el->virtual); 292 293 return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; 294} 295 296static bool need_preempt(const struct intel_engine_cs *engine, 297 const struct i915_request *rq) 298{ 299 int last_prio; 300 301 if (!intel_engine_has_semaphores(engine)) 302 return false; 303 304 /* 305 * Check if the current priority hint merits a preemption attempt. 306 * 307 * We record the highest value priority we saw during rescheduling 308 * prior to this dequeue, therefore we know that if it is strictly 309 * less than the current tail of ESLP[0], we do not need to force 310 * a preempt-to-idle cycle. 311 * 312 * However, the priority hint is a mere hint that we may need to 313 * preempt. If that hint is stale or we may be trying to preempt 314 * ourselves, ignore the request. 315 * 316 * More naturally we would write 317 * prio >= max(0, last); 318 * except that we wish to prevent triggering preemption at the same 319 * priority level: the task that is running should remain running 320 * to preserve FIFO ordering of dependencies. 321 */ 322 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); 323 if (engine->sched_engine->queue_priority_hint <= last_prio) 324 return false; 325 326 /* 327 * Check against the first request in ELSP[1], it will, thanks to the 328 * power of PI, be the highest priority of that context. 329 */ 330 if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) && 331 rq_prio(list_next_entry(rq, sched.link)) > last_prio) 332 return true; 333 334 /* 335 * If the inflight context did not trigger the preemption, then maybe 336 * it was the set of queued requests? Pick the highest priority in 337 * the queue (the first active priolist) and see if it deserves to be 338 * running instead of ELSP[0]. 339 * 340 * The highest priority request in the queue can not be either 341 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same 342 * context, it's priority would not exceed ELSP[0] aka last_prio. 343 */ 344 return max(virtual_prio(&engine->execlists), 345 queue_prio(engine->sched_engine)) > last_prio; 346} 347 348__maybe_unused static bool 349assert_priority_queue(const struct i915_request *prev, 350 const struct i915_request *next) 351{ 352 /* 353 * Without preemption, the prev may refer to the still active element 354 * which we refuse to let go. 355 * 356 * Even with preemption, there are times when we think it is better not 357 * to preempt and leave an ostensibly lower priority request in flight. 358 */ 359 if (i915_request_is_active(prev)) 360 return true; 361 362 return rq_prio(prev) >= rq_prio(next); 363} 364 365static struct i915_request * 366__unwind_incomplete_requests(struct intel_engine_cs *engine) 367{ 368 struct i915_request *rq, *rn, *active = NULL; 369 struct list_head *pl; 370 int prio = I915_PRIORITY_INVALID; 371 372 lockdep_assert_held(&engine->sched_engine->lock); 373 374 list_for_each_entry_safe_reverse(rq, rn, 375 &engine->sched_engine->requests, 376 sched.link) { 377 if (__i915_request_is_complete(rq)) { 378 list_del_init(&rq->sched.link); 379 continue; 380 } 381 382 __i915_request_unsubmit(rq); 383 384 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 385 if (rq_prio(rq) != prio) { 386 prio = rq_prio(rq); 387 pl = i915_sched_lookup_priolist(engine->sched_engine, 388 prio); 389 } 390 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); 391 392 list_move(&rq->sched.link, pl); 393 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 394 395 /* Check in case we rollback so far we wrap [size/2] */ 396 if (intel_ring_direction(rq->ring, 397 rq->tail, 398 rq->ring->tail + 8) > 0) 399 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; 400 401 active = rq; 402 } 403 404 return active; 405} 406 407struct i915_request * 408execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) 409{ 410 struct intel_engine_cs *engine = 411 container_of(execlists, typeof(*engine), execlists); 412 413 return __unwind_incomplete_requests(engine); 414} 415 416static void 417execlists_context_status_change(struct i915_request *rq, unsigned long status) 418{ 419 /* 420 * Only used when GVT-g is enabled now. When GVT-g is disabled, 421 * The compiler should eliminate this function as dead-code. 422 */ 423 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 424 return; 425 426 atomic_notifier_call_chain(&rq->engine->context_status_notifier, 427 status, rq); 428} 429 430static void reset_active(struct i915_request *rq, 431 struct intel_engine_cs *engine) 432{ 433 struct intel_context * const ce = rq->context; 434 u32 head; 435 436 /* 437 * The executing context has been cancelled. We want to prevent 438 * further execution along this context and propagate the error on 439 * to anything depending on its results. 440 * 441 * In __i915_request_submit(), we apply the -EIO and remove the 442 * requests' payloads for any banned requests. But first, we must 443 * rewind the context back to the start of the incomplete request so 444 * that we do not jump back into the middle of the batch. 445 * 446 * We preserve the breadcrumbs and semaphores of the incomplete 447 * requests so that inter-timeline dependencies (i.e other timelines) 448 * remain correctly ordered. And we defer to __i915_request_submit() 449 * so that all asynchronous waits are correctly handled. 450 */ 451 ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n", 452 rq->fence.context, rq->fence.seqno); 453 454 /* On resubmission of the active request, payload will be scrubbed */ 455 if (__i915_request_is_complete(rq)) 456 head = rq->tail; 457 else 458 head = __active_request(ce->timeline, rq, -EIO)->head; 459 head = intel_ring_wrap(ce->ring, head); 460 461 /* Scrub the context image to prevent replaying the previous batch */ 462 lrc_init_regs(ce, engine, true); 463 464 /* We've switched away, so this should be a no-op, but intent matters */ 465 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 466} 467 468static bool bad_request(const struct i915_request *rq) 469{ 470 return rq->fence.error && i915_request_started(rq); 471} 472 473static struct intel_engine_cs * 474__execlists_schedule_in(struct i915_request *rq) 475{ 476 struct intel_engine_cs * const engine = rq->engine; 477 struct intel_context * const ce = rq->context; 478 479 intel_context_get(ce); 480 481 if (unlikely(intel_context_is_closed(ce) && 482 !intel_engine_has_heartbeat(engine))) 483 intel_context_set_banned(ce); 484 485 if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) 486 reset_active(rq, engine); 487 488 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 489 lrc_check_regs(ce, engine, "before"); 490 491 if (ce->tag) { 492 /* Use a fixed tag for OA and friends */ 493 GEM_BUG_ON(ce->tag <= BITS_PER_LONG); 494 ce->lrc.ccid = ce->tag; 495 } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { 496 /* We don't need a strict matching tag, just different values */ 497 unsigned int tag = ffs(READ_ONCE(engine->context_tag)); 498 499 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); 500 clear_bit(tag - 1, &engine->context_tag); 501 ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32); 502 503 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 504 505 } else { 506 /* We don't need a strict matching tag, just different values */ 507 unsigned int tag = __ffs(engine->context_tag); 508 509 GEM_BUG_ON(tag >= BITS_PER_LONG); 510 __clear_bit(tag, &engine->context_tag); 511 ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32); 512 513 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 514 } 515 516 ce->lrc.ccid |= engine->execlists.ccid; 517 518 __intel_gt_pm_get(engine->gt); 519 if (engine->fw_domain && !engine->fw_active++) 520 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); 521 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); 522 intel_engine_context_in(engine); 523 524 CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid); 525 526 return engine; 527} 528 529static void execlists_schedule_in(struct i915_request *rq, int idx) 530{ 531 struct intel_context * const ce = rq->context; 532 struct intel_engine_cs *old; 533 534 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); 535 trace_i915_request_in(rq, idx); 536 537 old = ce->inflight; 538 if (!old) 539 old = __execlists_schedule_in(rq); 540 WRITE_ONCE(ce->inflight, ptr_inc(old)); 541 542 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); 543} 544 545static void 546resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) 547{ 548 struct intel_engine_cs *engine = rq->engine; 549 550 spin_lock_irq(&engine->sched_engine->lock); 551 552 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 553 WRITE_ONCE(rq->engine, &ve->base); 554 ve->base.submit_request(rq); 555 556 spin_unlock_irq(&engine->sched_engine->lock); 557} 558 559static void kick_siblings(struct i915_request *rq, struct intel_context *ce) 560{ 561 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 562 struct intel_engine_cs *engine = rq->engine; 563 564 /* 565 * After this point, the rq may be transferred to a new sibling, so 566 * before we clear ce->inflight make sure that the context has been 567 * removed from the b->signalers and furthermore we need to make sure 568 * that the concurrent iterator in signal_irq_work is no longer 569 * following ce->signal_link. 570 */ 571 if (!list_empty(&ce->signals)) 572 intel_context_remove_breadcrumbs(ce, engine->breadcrumbs); 573 574 /* 575 * This engine is now too busy to run this virtual request, so 576 * see if we can find an alternative engine for it to execute on. 577 * Once a request has become bonded to this engine, we treat it the 578 * same as other native request. 579 */ 580 if (i915_request_in_priority_queue(rq) && 581 rq->execution_mask != engine->mask) 582 resubmit_virtual_request(rq, ve); 583 584 if (READ_ONCE(ve->request)) 585 tasklet_hi_schedule(&ve->base.sched_engine->tasklet); 586} 587 588static void __execlists_schedule_out(struct i915_request * const rq, 589 struct intel_context * const ce) 590{ 591 struct intel_engine_cs * const engine = rq->engine; 592 unsigned int ccid; 593 594 /* 595 * NB process_csb() is not under the engine->sched_engine->lock and hence 596 * schedule_out can race with schedule_in meaning that we should 597 * refrain from doing non-trivial work here. 598 */ 599 600 CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); 601 GEM_BUG_ON(ce->inflight != engine); 602 603 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 604 lrc_check_regs(ce, engine, "after"); 605 606 /* 607 * If we have just completed this context, the engine may now be 608 * idle and we want to re-enter powersaving. 609 */ 610 if (intel_timeline_is_last(ce->timeline, rq) && 611 __i915_request_is_complete(rq)) 612 intel_engine_add_retire(engine, ce->timeline); 613 614 ccid = ce->lrc.ccid; 615 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { 616 ccid >>= XEHP_SW_CTX_ID_SHIFT - 32; 617 ccid &= XEHP_MAX_CONTEXT_HW_ID; 618 } else { 619 ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; 620 ccid &= GEN12_MAX_CONTEXT_HW_ID; 621 } 622 623 if (ccid < BITS_PER_LONG) { 624 GEM_BUG_ON(ccid == 0); 625 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); 626 __set_bit(ccid - 1, &engine->context_tag); 627 } 628 intel_engine_context_out(engine); 629 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 630 if (engine->fw_domain && !--engine->fw_active) 631 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); 632 intel_gt_pm_put_async(engine->gt); 633 634 /* 635 * If this is part of a virtual engine, its next request may 636 * have been blocked waiting for access to the active context. 637 * We have to kick all the siblings again in case we need to 638 * switch (e.g. the next request is not runnable on this 639 * engine). Hopefully, we will already have submitted the next 640 * request before the tasklet runs and do not need to rebuild 641 * each virtual tree and kick everyone again. 642 */ 643 if (ce->engine != engine) 644 kick_siblings(rq, ce); 645 646 WRITE_ONCE(ce->inflight, NULL); 647 intel_context_put(ce); 648} 649 650static inline void execlists_schedule_out(struct i915_request *rq) 651{ 652 struct intel_context * const ce = rq->context; 653 654 trace_i915_request_out(rq); 655 656 GEM_BUG_ON(!ce->inflight); 657 ce->inflight = ptr_dec(ce->inflight); 658 if (!__intel_context_inflight_count(ce->inflight)) 659 __execlists_schedule_out(rq, ce); 660 661 i915_request_put(rq); 662} 663 664static u64 execlists_update_context(struct i915_request *rq) 665{ 666 struct intel_context *ce = rq->context; 667 u64 desc; 668 u32 tail, prev; 669 670 desc = ce->lrc.desc; 671 if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 672 desc |= lrc_desc_priority(rq_prio(rq)); 673 674 /* 675 * WaIdleLiteRestore:bdw,skl 676 * 677 * We should never submit the context with the same RING_TAIL twice 678 * just in case we submit an empty ring, which confuses the HW. 679 * 680 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of 681 * the normal request to be able to always advance the RING_TAIL on 682 * subsequent resubmissions (for lite restore). Should that fail us, 683 * and we try and submit the same tail again, force the context 684 * reload. 685 * 686 * If we need to return to a preempted context, we need to skip the 687 * lite-restore and force it to reload the RING_TAIL. Otherwise, the 688 * HW has a tendency to ignore us rewinding the TAIL to the end of 689 * an earlier request. 690 */ 691 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); 692 prev = rq->ring->tail; 693 tail = intel_ring_set_tail(rq->ring, rq->tail); 694 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) 695 desc |= CTX_DESC_FORCE_RESTORE; 696 ce->lrc_reg_state[CTX_RING_TAIL] = tail; 697 rq->tail = rq->wa_tail; 698 699 /* 700 * Make sure the context image is complete before we submit it to HW. 701 * 702 * Ostensibly, writes (including the WCB) should be flushed prior to 703 * an uncached write such as our mmio register access, the empirical 704 * evidence (esp. on Braswell) suggests that the WC write into memory 705 * may not be visible to the HW prior to the completion of the UC 706 * register write and that we may begin execution from the context 707 * before its image is complete leading to invalid PD chasing. 708 */ 709 wmb(); 710 711 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; 712 return desc; 713} 714 715static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) 716{ 717 if (execlists->ctrl_reg) { 718 writel(lower_32_bits(desc), execlists->submit_reg + port * 2); 719 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); 720 } else { 721 writel(upper_32_bits(desc), execlists->submit_reg); 722 writel(lower_32_bits(desc), execlists->submit_reg); 723 } 724} 725 726static __maybe_unused char * 727dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) 728{ 729 if (!rq) 730 return ""; 731 732 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", 733 prefix, 734 rq->context->lrc.ccid, 735 rq->fence.context, rq->fence.seqno, 736 __i915_request_is_complete(rq) ? "!" : 737 __i915_request_has_started(rq) ? "*" : 738 "", 739 rq_prio(rq)); 740 741 return buf; 742} 743 744static __maybe_unused noinline void 745trace_ports(const struct intel_engine_execlists *execlists, 746 const char *msg, 747 struct i915_request * const *ports) 748{ 749 const struct intel_engine_cs *engine = 750 container_of(execlists, typeof(*engine), execlists); 751 char __maybe_unused p0[40], p1[40]; 752 753 if (!ports[0]) 754 return; 755 756 ENGINE_TRACE(engine, "%s { %s%s }\n", msg, 757 dump_port(p0, sizeof(p0), "", ports[0]), 758 dump_port(p1, sizeof(p1), ", ", ports[1])); 759} 760 761static bool 762reset_in_progress(const struct intel_engine_cs *engine) 763{ 764 return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet)); 765} 766 767static __maybe_unused noinline bool 768assert_pending_valid(const struct intel_engine_execlists *execlists, 769 const char *msg) 770{ 771 struct intel_engine_cs *engine = 772 container_of(execlists, typeof(*engine), execlists); 773 struct i915_request * const *port, *rq, *prev = NULL; 774 struct intel_context *ce = NULL; 775 u32 ccid = -1; 776 777 trace_ports(execlists, msg, execlists->pending); 778 779 /* We may be messing around with the lists during reset, lalala */ 780 if (reset_in_progress(engine)) 781 return true; 782 783 if (!execlists->pending[0]) { 784 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", 785 engine->name); 786 return false; 787 } 788 789 if (execlists->pending[execlists_num_ports(execlists)]) { 790 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", 791 engine->name, execlists_num_ports(execlists)); 792 return false; 793 } 794 795 for (port = execlists->pending; (rq = *port); port++) { 796 unsigned long flags; 797 bool ok = true; 798 799 GEM_BUG_ON(!kref_read(&rq->fence.refcount)); 800 GEM_BUG_ON(!i915_request_is_active(rq)); 801 802 if (ce == rq->context) { 803 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", 804 engine->name, 805 ce->timeline->fence_context, 806 port - execlists->pending); 807 return false; 808 } 809 ce = rq->context; 810 811 if (ccid == ce->lrc.ccid) { 812 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", 813 engine->name, 814 ccid, ce->timeline->fence_context, 815 port - execlists->pending); 816 return false; 817 } 818 ccid = ce->lrc.ccid; 819 820 /* 821 * Sentinels are supposed to be the last request so they flush 822 * the current execution off the HW. Check that they are the only 823 * request in the pending submission. 824 * 825 * NB: Due to the async nature of preempt-to-busy and request 826 * cancellation we need to handle the case where request 827 * becomes a sentinel in parallel to CSB processing. 828 */ 829 if (prev && i915_request_has_sentinel(prev) && 830 !READ_ONCE(prev->fence.error)) { 831 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", 832 engine->name, 833 ce->timeline->fence_context, 834 port - execlists->pending); 835 return false; 836 } 837 prev = rq; 838 839 /* 840 * We want virtual requests to only be in the first slot so 841 * that they are never stuck behind a hog and can be immediately 842 * transferred onto the next idle engine. 843 */ 844 if (rq->execution_mask != engine->mask && 845 port != execlists->pending) { 846 GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n", 847 engine->name, 848 ce->timeline->fence_context, 849 port - execlists->pending); 850 return false; 851 } 852 853 /* Hold tightly onto the lock to prevent concurrent retires! */ 854 if (!spin_trylock_irqsave(&rq->lock, flags)) 855 continue; 856 857 if (__i915_request_is_complete(rq)) 858 goto unlock; 859 860 if (i915_active_is_idle(&ce->active) && 861 !intel_context_is_barrier(ce)) { 862 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", 863 engine->name, 864 ce->timeline->fence_context, 865 port - execlists->pending); 866 ok = false; 867 goto unlock; 868 } 869 870 if (!i915_vma_is_pinned(ce->state)) { 871 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", 872 engine->name, 873 ce->timeline->fence_context, 874 port - execlists->pending); 875 ok = false; 876 goto unlock; 877 } 878 879 if (!i915_vma_is_pinned(ce->ring->vma)) { 880 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", 881 engine->name, 882 ce->timeline->fence_context, 883 port - execlists->pending); 884 ok = false; 885 goto unlock; 886 } 887 888unlock: 889 spin_unlock_irqrestore(&rq->lock, flags); 890 if (!ok) 891 return false; 892 } 893 894 return ce; 895} 896 897static void execlists_submit_ports(struct intel_engine_cs *engine) 898{ 899 struct intel_engine_execlists *execlists = &engine->execlists; 900 unsigned int n; 901 902 GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); 903 904 /* 905 * We can skip acquiring intel_runtime_pm_get() here as it was taken 906 * on our behalf by the request (see i915_gem_mark_busy()) and it will 907 * not be relinquished until the device is idle (see 908 * i915_gem_idle_work_handler()). As a precaution, we make sure 909 * that all ELSP are drained i.e. we have processed the CSB, 910 * before allowing ourselves to idle and calling intel_runtime_pm_put(). 911 */ 912 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 913 914 /* 915 * ELSQ note: the submit queue is not cleared after being submitted 916 * to the HW so we need to make sure we always clean it up. This is 917 * currently ensured by the fact that we always write the same number 918 * of elsq entries, keep this in mind before changing the loop below. 919 */ 920 for (n = execlists_num_ports(execlists); n--; ) { 921 struct i915_request *rq = execlists->pending[n]; 922 923 write_desc(execlists, 924 rq ? execlists_update_context(rq) : 0, 925 n); 926 } 927 928 /* we need to manually load the submit queue */ 929 if (execlists->ctrl_reg) 930 writel(EL_CTRL_LOAD, execlists->ctrl_reg); 931} 932 933static bool ctx_single_port_submission(const struct intel_context *ce) 934{ 935 return (IS_ENABLED(CONFIG_DRM_I915_GVT) && 936 intel_context_force_single_submission(ce)); 937} 938 939static bool can_merge_ctx(const struct intel_context *prev, 940 const struct intel_context *next) 941{ 942 if (prev != next) 943 return false; 944 945 if (ctx_single_port_submission(prev)) 946 return false; 947 948 return true; 949} 950 951static unsigned long i915_request_flags(const struct i915_request *rq) 952{ 953 return READ_ONCE(rq->fence.flags); 954} 955 956static bool can_merge_rq(const struct i915_request *prev, 957 const struct i915_request *next) 958{ 959 GEM_BUG_ON(prev == next); 960 GEM_BUG_ON(!assert_priority_queue(prev, next)); 961 962 /* 963 * We do not submit known completed requests. Therefore if the next 964 * request is already completed, we can pretend to merge it in 965 * with the previous context (and we will skip updating the ELSP 966 * and tracking). Thus hopefully keeping the ELSP full with active 967 * contexts, despite the best efforts of preempt-to-busy to confuse 968 * us. 969 */ 970 if (__i915_request_is_complete(next)) 971 return true; 972 973 if (unlikely((i915_request_flags(prev) | i915_request_flags(next)) & 974 (BIT(I915_FENCE_FLAG_NOPREEMPT) | 975 BIT(I915_FENCE_FLAG_SENTINEL)))) 976 return false; 977 978 if (!can_merge_ctx(prev->context, next->context)) 979 return false; 980 981 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); 982 return true; 983} 984 985static bool virtual_matches(const struct virtual_engine *ve, 986 const struct i915_request *rq, 987 const struct intel_engine_cs *engine) 988{ 989 const struct intel_engine_cs *inflight; 990 991 if (!rq) 992 return false; 993 994 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ 995 return false; 996 997 /* 998 * We track when the HW has completed saving the context image 999 * (i.e. when we have seen the final CS event switching out of 1000 * the context) and must not overwrite the context image before 1001 * then. This restricts us to only using the active engine 1002 * while the previous virtualized request is inflight (so 1003 * we reuse the register offsets). This is a very small 1004 * hystersis on the greedy seelction algorithm. 1005 */ 1006 inflight = intel_context_inflight(&ve->context); 1007 if (inflight && inflight != engine) 1008 return false; 1009 1010 return true; 1011} 1012 1013static struct virtual_engine * 1014first_virtual_engine(struct intel_engine_cs *engine) 1015{ 1016 struct intel_engine_execlists *el = &engine->execlists; 1017 struct rb_node *rb = rb_first_cached(&el->virtual); 1018 1019 while (rb) { 1020 struct virtual_engine *ve = 1021 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 1022 struct i915_request *rq = READ_ONCE(ve->request); 1023 1024 /* lazily cleanup after another engine handled rq */ 1025 if (!rq || !virtual_matches(ve, rq, engine)) { 1026 rb_erase_cached(rb, &el->virtual); 1027 RB_CLEAR_NODE(rb); 1028 rb = rb_first_cached(&el->virtual); 1029 continue; 1030 } 1031 1032 return ve; 1033 } 1034 1035 return NULL; 1036} 1037 1038static void virtual_xfer_context(struct virtual_engine *ve, 1039 struct intel_engine_cs *engine) 1040{ 1041 unsigned int n; 1042 1043 if (likely(engine == ve->siblings[0])) 1044 return; 1045 1046 GEM_BUG_ON(READ_ONCE(ve->context.inflight)); 1047 if (!intel_engine_has_relative_mmio(engine)) 1048 lrc_update_offsets(&ve->context, engine); 1049 1050 /* 1051 * Move the bound engine to the top of the list for 1052 * future execution. We then kick this tasklet first 1053 * before checking others, so that we preferentially 1054 * reuse this set of bound registers. 1055 */ 1056 for (n = 1; n < ve->num_siblings; n++) { 1057 if (ve->siblings[n] == engine) { 1058 swap(ve->siblings[n], ve->siblings[0]); 1059 break; 1060 } 1061 } 1062} 1063 1064static void defer_request(struct i915_request *rq, struct list_head * const pl) 1065{ 1066 LIST_HEAD(list); 1067 1068 /* 1069 * We want to move the interrupted request to the back of 1070 * the round-robin list (i.e. its priority level), but 1071 * in doing so, we must then move all requests that were in 1072 * flight and were waiting for the interrupted request to 1073 * be run after it again. 1074 */ 1075 do { 1076 struct i915_dependency *p; 1077 1078 GEM_BUG_ON(i915_request_is_active(rq)); 1079 list_move_tail(&rq->sched.link, pl); 1080 1081 for_each_waiter(p, rq) { 1082 struct i915_request *w = 1083 container_of(p->waiter, typeof(*w), sched); 1084 1085 if (p->flags & I915_DEPENDENCY_WEAK) 1086 continue; 1087 1088 /* Leave semaphores spinning on the other engines */ 1089 if (w->engine != rq->engine) 1090 continue; 1091 1092 /* No waiter should start before its signaler */ 1093 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && 1094 __i915_request_has_started(w) && 1095 !__i915_request_is_complete(rq)); 1096 1097 if (!i915_request_is_ready(w)) 1098 continue; 1099 1100 if (rq_prio(w) < rq_prio(rq)) 1101 continue; 1102 1103 GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); 1104 GEM_BUG_ON(i915_request_is_active(w)); 1105 list_move_tail(&w->sched.link, &list); 1106 } 1107 1108 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 1109 } while (rq); 1110} 1111 1112static void defer_active(struct intel_engine_cs *engine) 1113{ 1114 struct i915_request *rq; 1115 1116 rq = __unwind_incomplete_requests(engine); 1117 if (!rq) 1118 return; 1119 1120 defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine, 1121 rq_prio(rq))); 1122} 1123 1124static bool 1125timeslice_yield(const struct intel_engine_execlists *el, 1126 const struct i915_request *rq) 1127{ 1128 /* 1129 * Once bitten, forever smitten! 1130 * 1131 * If the active context ever busy-waited on a semaphore, 1132 * it will be treated as a hog until the end of its timeslice (i.e. 1133 * until it is scheduled out and replaced by a new submission, 1134 * possibly even its own lite-restore). The HW only sends an interrupt 1135 * on the first miss, and we do know if that semaphore has been 1136 * signaled, or even if it is now stuck on another semaphore. Play 1137 * safe, yield if it might be stuck -- it will be given a fresh 1138 * timeslice in the near future. 1139 */ 1140 return rq->context->lrc.ccid == READ_ONCE(el->yield); 1141} 1142 1143static bool needs_timeslice(const struct intel_engine_cs *engine, 1144 const struct i915_request *rq) 1145{ 1146 if (!intel_engine_has_timeslices(engine)) 1147 return false; 1148 1149 /* If not currently active, or about to switch, wait for next event */ 1150 if (!rq || __i915_request_is_complete(rq)) 1151 return false; 1152 1153 /* We do not need to start the timeslice until after the ACK */ 1154 if (READ_ONCE(engine->execlists.pending[0])) 1155 return false; 1156 1157 /* If ELSP[1] is occupied, always check to see if worth slicing */ 1158 if (!list_is_last_rcu(&rq->sched.link, 1159 &engine->sched_engine->requests)) { 1160 ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); 1161 return true; 1162 } 1163 1164 /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ 1165 if (!i915_sched_engine_is_empty(engine->sched_engine)) { 1166 ENGINE_TRACE(engine, "timeslice required for queue\n"); 1167 return true; 1168 } 1169 1170 if (!RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root)) { 1171 ENGINE_TRACE(engine, "timeslice required for virtual\n"); 1172 return true; 1173 } 1174 1175 return false; 1176} 1177 1178static bool 1179timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq) 1180{ 1181 const struct intel_engine_execlists *el = &engine->execlists; 1182 1183 if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq)) 1184 return false; 1185 1186 if (!needs_timeslice(engine, rq)) 1187 return false; 1188 1189 return timer_expired(&el->timer) || timeslice_yield(el, rq); 1190} 1191 1192static unsigned long timeslice(const struct intel_engine_cs *engine) 1193{ 1194 return READ_ONCE(engine->props.timeslice_duration_ms); 1195} 1196 1197static void start_timeslice(struct intel_engine_cs *engine) 1198{ 1199 struct intel_engine_execlists *el = &engine->execlists; 1200 unsigned long duration; 1201 1202 /* Disable the timer if there is nothing to switch to */ 1203 duration = 0; 1204 if (needs_timeslice(engine, *el->active)) { 1205 /* Avoid continually prolonging an active timeslice */ 1206 if (timer_active(&el->timer)) { 1207 /* 1208 * If we just submitted a new ELSP after an old 1209 * context, that context may have already consumed 1210 * its timeslice, so recheck. 1211 */ 1212 if (!timer_pending(&el->timer)) 1213 tasklet_hi_schedule(&engine->sched_engine->tasklet); 1214 return; 1215 } 1216 1217 duration = timeslice(engine); 1218 } 1219 1220 set_timer_ms(&el->timer, duration); 1221} 1222 1223static void record_preemption(struct intel_engine_execlists *execlists) 1224{ 1225 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); 1226} 1227 1228static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, 1229 const struct i915_request *rq) 1230{ 1231 if (!rq) 1232 return 0; 1233 1234 /* Force a fast reset for terminated contexts (ignoring sysfs!) */ 1235 if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) 1236 return 1; 1237 1238 return READ_ONCE(engine->props.preempt_timeout_ms); 1239} 1240 1241static void set_preempt_timeout(struct intel_engine_cs *engine, 1242 const struct i915_request *rq) 1243{ 1244 if (!intel_engine_has_preempt_reset(engine)) 1245 return; 1246 1247 set_timer_ms(&engine->execlists.preempt, 1248 active_preempt_timeout(engine, rq)); 1249} 1250 1251static bool completed(const struct i915_request *rq) 1252{ 1253 if (i915_request_has_sentinel(rq)) 1254 return false; 1255 1256 return __i915_request_is_complete(rq); 1257} 1258 1259static void execlists_dequeue(struct intel_engine_cs *engine) 1260{ 1261 struct intel_engine_execlists * const execlists = &engine->execlists; 1262 struct i915_sched_engine * const sched_engine = engine->sched_engine; 1263 struct i915_request **port = execlists->pending; 1264 struct i915_request ** const last_port = port + execlists->port_mask; 1265 struct i915_request *last, * const *active; 1266 struct virtual_engine *ve; 1267 struct rb_node *rb; 1268 bool submit = false; 1269 1270 /* 1271 * Hardware submission is through 2 ports. Conceptually each port 1272 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is 1273 * static for a context, and unique to each, so we only execute 1274 * requests belonging to a single context from each ring. RING_HEAD 1275 * is maintained by the CS in the context image, it marks the place 1276 * where it got up to last time, and through RING_TAIL we tell the CS 1277 * where we want to execute up to this time. 1278 * 1279 * In this list the requests are in order of execution. Consecutive 1280 * requests from the same context are adjacent in the ringbuffer. We 1281 * can combine these requests into a single RING_TAIL update: 1282 * 1283 * RING_HEAD...req1...req2 1284 * ^- RING_TAIL 1285 * since to execute req2 the CS must first execute req1. 1286 * 1287 * Our goal then is to point each port to the end of a consecutive 1288 * sequence of requests as being the most optimal (fewest wake ups 1289 * and context switches) submission. 1290 */ 1291 1292 spin_lock(&sched_engine->lock); 1293 1294 /* 1295 * If the queue is higher priority than the last 1296 * request in the currently active context, submit afresh. 1297 * We will resubmit again afterwards in case we need to split 1298 * the active context to interject the preemption request, 1299 * i.e. we will retrigger preemption following the ack in case 1300 * of trouble. 1301 * 1302 */ 1303 active = execlists->active; 1304 while ((last = *active) && completed(last)) 1305 active++; 1306 1307 if (last) { 1308 if (need_preempt(engine, last)) { 1309 ENGINE_TRACE(engine, 1310 "preempting last=%llx:%lld, prio=%d, hint=%d\n", 1311 last->fence.context, 1312 last->fence.seqno, 1313 last->sched.attr.priority, 1314 sched_engine->queue_priority_hint); 1315 record_preemption(execlists); 1316 1317 /* 1318 * Don't let the RING_HEAD advance past the breadcrumb 1319 * as we unwind (and until we resubmit) so that we do 1320 * not accidentally tell it to go backwards. 1321 */ 1322 ring_set_paused(engine, 1); 1323 1324 /* 1325 * Note that we have not stopped the GPU at this point, 1326 * so we are unwinding the incomplete requests as they 1327 * remain inflight and so by the time we do complete 1328 * the preemption, some of the unwound requests may 1329 * complete! 1330 */ 1331 __unwind_incomplete_requests(engine); 1332 1333 last = NULL; 1334 } else if (timeslice_expired(engine, last)) { 1335 ENGINE_TRACE(engine, 1336 "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", 1337 str_yes_no(timer_expired(&execlists->timer)), 1338 last->fence.context, last->fence.seqno, 1339 rq_prio(last), 1340 sched_engine->queue_priority_hint, 1341 str_yes_no(timeslice_yield(execlists, last))); 1342 1343 /* 1344 * Consume this timeslice; ensure we start a new one. 1345 * 1346 * The timeslice expired, and we will unwind the 1347 * running contexts and recompute the next ELSP. 1348 * If that submit will be the same pair of contexts 1349 * (due to dependency ordering), we will skip the 1350 * submission. If we don't cancel the timer now, 1351 * we will see that the timer has expired and 1352 * reschedule the tasklet; continually until the 1353 * next context switch or other preeemption event. 1354 * 1355 * Since we have decided to reschedule based on 1356 * consumption of this timeslice, if we submit the 1357 * same context again, grant it a full timeslice. 1358 */ 1359 cancel_timer(&execlists->timer); 1360 ring_set_paused(engine, 1); 1361 defer_active(engine); 1362 1363 /* 1364 * Unlike for preemption, if we rewind and continue 1365 * executing the same context as previously active, 1366 * the order of execution will remain the same and 1367 * the tail will only advance. We do not need to 1368 * force a full context restore, as a lite-restore 1369 * is sufficient to resample the monotonic TAIL. 1370 * 1371 * If we switch to any other context, similarly we 1372 * will not rewind TAIL of current context, and 1373 * normal save/restore will preserve state and allow 1374 * us to later continue executing the same request. 1375 */ 1376 last = NULL; 1377 } else { 1378 /* 1379 * Otherwise if we already have a request pending 1380 * for execution after the current one, we can 1381 * just wait until the next CS event before 1382 * queuing more. In either case we will force a 1383 * lite-restore preemption event, but if we wait 1384 * we hopefully coalesce several updates into a single 1385 * submission. 1386 */ 1387 if (active[1]) { 1388 /* 1389 * Even if ELSP[1] is occupied and not worthy 1390 * of timeslices, our queue might be. 1391 */ 1392 spin_unlock(&sched_engine->lock); 1393 return; 1394 } 1395 } 1396 } 1397 1398 /* XXX virtual is always taking precedence */ 1399 while ((ve = first_virtual_engine(engine))) { 1400 struct i915_request *rq; 1401 1402 spin_lock(&ve->base.sched_engine->lock); 1403 1404 rq = ve->request; 1405 if (unlikely(!virtual_matches(ve, rq, engine))) 1406 goto unlock; /* lost the race to a sibling */ 1407 1408 GEM_BUG_ON(rq->engine != &ve->base); 1409 GEM_BUG_ON(rq->context != &ve->context); 1410 1411 if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { 1412 spin_unlock(&ve->base.sched_engine->lock); 1413 break; 1414 } 1415 1416 if (last && !can_merge_rq(last, rq)) { 1417 spin_unlock(&ve->base.sched_engine->lock); 1418 spin_unlock(&engine->sched_engine->lock); 1419 return; /* leave this for another sibling */ 1420 } 1421 1422 ENGINE_TRACE(engine, 1423 "virtual rq=%llx:%lld%s, new engine? %s\n", 1424 rq->fence.context, 1425 rq->fence.seqno, 1426 __i915_request_is_complete(rq) ? "!" : 1427 __i915_request_has_started(rq) ? "*" : 1428 "", 1429 str_yes_no(engine != ve->siblings[0])); 1430 1431 WRITE_ONCE(ve->request, NULL); 1432 WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN); 1433 1434 rb = &ve->nodes[engine->id].rb; 1435 rb_erase_cached(rb, &execlists->virtual); 1436 RB_CLEAR_NODE(rb); 1437 1438 GEM_BUG_ON(!(rq->execution_mask & engine->mask)); 1439 WRITE_ONCE(rq->engine, engine); 1440 1441 if (__i915_request_submit(rq)) { 1442 /* 1443 * Only after we confirm that we will submit 1444 * this request (i.e. it has not already 1445 * completed), do we want to update the context. 1446 * 1447 * This serves two purposes. It avoids 1448 * unnecessary work if we are resubmitting an 1449 * already completed request after timeslicing. 1450 * But more importantly, it prevents us altering 1451 * ve->siblings[] on an idle context, where 1452 * we may be using ve->siblings[] in 1453 * virtual_context_enter / virtual_context_exit. 1454 */ 1455 virtual_xfer_context(ve, engine); 1456 GEM_BUG_ON(ve->siblings[0] != engine); 1457 1458 submit = true; 1459 last = rq; 1460 } 1461 1462 i915_request_put(rq); 1463unlock: 1464 spin_unlock(&ve->base.sched_engine->lock); 1465 1466 /* 1467 * Hmm, we have a bunch of virtual engine requests, 1468 * but the first one was already completed (thanks 1469 * preempt-to-busy!). Keep looking at the veng queue 1470 * until we have no more relevant requests (i.e. 1471 * the normal submit queue has higher priority). 1472 */ 1473 if (submit) 1474 break; 1475 } 1476 1477 while ((rb = rb_first_cached(&sched_engine->queue))) { 1478 struct i915_priolist *p = to_priolist(rb); 1479 struct i915_request *rq, *rn; 1480 1481 priolist_for_each_request_consume(rq, rn, p) { 1482 bool merge = true; 1483 1484 /* 1485 * Can we combine this request with the current port? 1486 * It has to be the same context/ringbuffer and not 1487 * have any exceptions (e.g. GVT saying never to 1488 * combine contexts). 1489 * 1490 * If we can combine the requests, we can execute both 1491 * by updating the RING_TAIL to point to the end of the 1492 * second request, and so we never need to tell the 1493 * hardware about the first. 1494 */ 1495 if (last && !can_merge_rq(last, rq)) { 1496 /* 1497 * If we are on the second port and cannot 1498 * combine this request with the last, then we 1499 * are done. 1500 */ 1501 if (port == last_port) 1502 goto done; 1503 1504 /* 1505 * We must not populate both ELSP[] with the 1506 * same LRCA, i.e. we must submit 2 different 1507 * contexts if we submit 2 ELSP. 1508 */ 1509 if (last->context == rq->context) 1510 goto done; 1511 1512 if (i915_request_has_sentinel(last)) 1513 goto done; 1514 1515 /* 1516 * We avoid submitting virtual requests into 1517 * the secondary ports so that we can migrate 1518 * the request immediately to another engine 1519 * rather than wait for the primary request. 1520 */ 1521 if (rq->execution_mask != engine->mask) 1522 goto done; 1523 1524 /* 1525 * If GVT overrides us we only ever submit 1526 * port[0], leaving port[1] empty. Note that we 1527 * also have to be careful that we don't queue 1528 * the same context (even though a different 1529 * request) to the second port. 1530 */ 1531 if (ctx_single_port_submission(last->context) || 1532 ctx_single_port_submission(rq->context)) 1533 goto done; 1534 1535 merge = false; 1536 } 1537 1538 if (__i915_request_submit(rq)) { 1539 if (!merge) { 1540 *port++ = i915_request_get(last); 1541 last = NULL; 1542 } 1543 1544 GEM_BUG_ON(last && 1545 !can_merge_ctx(last->context, 1546 rq->context)); 1547 GEM_BUG_ON(last && 1548 i915_seqno_passed(last->fence.seqno, 1549 rq->fence.seqno)); 1550 1551 submit = true; 1552 last = rq; 1553 } 1554 } 1555 1556 rb_erase_cached(&p->node, &sched_engine->queue); 1557 i915_priolist_free(p); 1558 } 1559done: 1560 *port++ = i915_request_get(last); 1561 1562 /* 1563 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. 1564 * 1565 * We choose the priority hint such that if we add a request of greater 1566 * priority than this, we kick the submission tasklet to decide on 1567 * the right order of submitting the requests to hardware. We must 1568 * also be prepared to reorder requests as they are in-flight on the 1569 * HW. We derive the priority hint then as the first "hole" in 1570 * the HW submission ports and if there are no available slots, 1571 * the priority of the lowest executing request, i.e. last. 1572 * 1573 * When we do receive a higher priority request ready to run from the 1574 * user, see queue_request(), the priority hint is bumped to that 1575 * request triggering preemption on the next dequeue (or subsequent 1576 * interrupt for secondary ports). 1577 */ 1578 sched_engine->queue_priority_hint = queue_prio(sched_engine); 1579 i915_sched_engine_reset_on_empty(sched_engine); 1580 spin_unlock(&sched_engine->lock); 1581 1582 /* 1583 * We can skip poking the HW if we ended up with exactly the same set 1584 * of requests as currently running, e.g. trying to timeslice a pair 1585 * of ordered contexts. 1586 */ 1587 if (submit && 1588 memcmp(active, 1589 execlists->pending, 1590 (port - execlists->pending) * sizeof(*port))) { 1591 *port = NULL; 1592 while (port-- != execlists->pending) 1593 execlists_schedule_in(*port, port - execlists->pending); 1594 1595 WRITE_ONCE(execlists->yield, -1); 1596 set_preempt_timeout(engine, *active); 1597 execlists_submit_ports(engine); 1598 } else { 1599 ring_set_paused(engine, 0); 1600 while (port-- != execlists->pending) 1601 i915_request_put(*port); 1602 *execlists->pending = NULL; 1603 } 1604} 1605 1606static void execlists_dequeue_irq(struct intel_engine_cs *engine) 1607{ 1608 local_irq_disable(); /* Suspend interrupts across request submission */ 1609 execlists_dequeue(engine); 1610 local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ 1611} 1612 1613static void clear_ports(struct i915_request **ports, int count) 1614{ 1615 memset_p((void **)ports, NULL, count); 1616} 1617 1618static void 1619copy_ports(struct i915_request **dst, struct i915_request **src, int count) 1620{ 1621 /* A memcpy_p() would be very useful here! */ 1622 while (count--) 1623 WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ 1624} 1625 1626static struct i915_request ** 1627cancel_port_requests(struct intel_engine_execlists * const execlists, 1628 struct i915_request **inactive) 1629{ 1630 struct i915_request * const *port; 1631 1632 for (port = execlists->pending; *port; port++) 1633 *inactive++ = *port; 1634 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); 1635 1636 /* Mark the end of active before we overwrite *active */ 1637 for (port = xchg(&execlists->active, execlists->pending); *port; port++) 1638 *inactive++ = *port; 1639 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); 1640 1641 smp_wmb(); /* complete the seqlock for execlists_active() */ 1642 WRITE_ONCE(execlists->active, execlists->inflight); 1643 1644 /* Having cancelled all outstanding process_csb(), stop their timers */ 1645 GEM_BUG_ON(execlists->pending[0]); 1646 cancel_timer(&execlists->timer); 1647 cancel_timer(&execlists->preempt); 1648 1649 return inactive; 1650} 1651 1652/* 1653 * Starting with Gen12, the status has a new format: 1654 * 1655 * bit 0: switched to new queue 1656 * bit 1: reserved 1657 * bit 2: semaphore wait mode (poll or signal), only valid when 1658 * switch detail is set to "wait on semaphore" 1659 * bits 3-5: engine class 1660 * bits 6-11: engine instance 1661 * bits 12-14: reserved 1662 * bits 15-25: sw context id of the lrc the GT switched to 1663 * bits 26-31: sw counter of the lrc the GT switched to 1664 * bits 32-35: context switch detail 1665 * - 0: ctx complete 1666 * - 1: wait on sync flip 1667 * - 2: wait on vblank 1668 * - 3: wait on scanline 1669 * - 4: wait on semaphore 1670 * - 5: context preempted (not on SEMAPHORE_WAIT or 1671 * WAIT_FOR_EVENT) 1672 * bit 36: reserved 1673 * bits 37-43: wait detail (for switch detail 1 to 4) 1674 * bits 44-46: reserved 1675 * bits 47-57: sw context id of the lrc the GT switched away from 1676 * bits 58-63: sw counter of the lrc the GT switched away from 1677 * 1678 * Xe_HP csb shuffles things around compared to TGL: 1679 * 1680 * bits 0-3: context switch detail (same possible values as TGL) 1681 * bits 4-9: engine instance 1682 * bits 10-25: sw context id of the lrc the GT switched to 1683 * bits 26-31: sw counter of the lrc the GT switched to 1684 * bit 32: semaphore wait mode (poll or signal), Only valid when 1685 * switch detail is set to "wait on semaphore" 1686 * bit 33: switched to new queue 1687 * bits 34-41: wait detail (for switch detail 1 to 4) 1688 * bits 42-57: sw context id of the lrc the GT switched away from 1689 * bits 58-63: sw counter of the lrc the GT switched away from 1690 */ 1691static inline bool 1692__gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue, 1693 u8 switch_detail) 1694{ 1695 /* 1696 * The context switch detail is not guaranteed to be 5 when a preemption 1697 * occurs, so we can't just check for that. The check below works for 1698 * all the cases we care about, including preemptions of WAIT 1699 * instructions and lite-restore. Preempt-to-idle via the CTRL register 1700 * would require some extra handling, but we don't support that. 1701 */ 1702 if (!ctx_away_valid || new_queue) { 1703 GEM_BUG_ON(!ctx_to_valid); 1704 return true; 1705 } 1706 1707 /* 1708 * switch detail = 5 is covered by the case above and we do not expect a 1709 * context switch on an unsuccessful wait instruction since we always 1710 * use polling mode. 1711 */ 1712 GEM_BUG_ON(switch_detail); 1713 return false; 1714} 1715 1716static bool xehp_csb_parse(const u64 csb) 1717{ 1718 return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ 1719 XEHP_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ 1720 upper_32_bits(csb) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, 1721 GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb))); 1722} 1723 1724static bool gen12_csb_parse(const u64 csb) 1725{ 1726 return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ 1727 GEN12_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ 1728 lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, 1729 GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); 1730} 1731 1732static bool gen8_csb_parse(const u64 csb) 1733{ 1734 return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); 1735} 1736 1737static noinline u64 1738wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1739{ 1740 u64 entry; 1741 1742 /* 1743 * Reading from the HWSP has one particular advantage: we can detect 1744 * a stale entry. Since the write into HWSP is broken, we have no reason 1745 * to trust the HW at all, the mmio entry may equally be unordered, so 1746 * we prefer the path that is self-checking and as a last resort, 1747 * return the mmio value. 1748 * 1749 * tgl,dg1:HSDES#22011327657 1750 */ 1751 preempt_disable(); 1752 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) { 1753 int idx = csb - engine->execlists.csb_status; 1754 int status; 1755 1756 status = GEN8_EXECLISTS_STATUS_BUF; 1757 if (idx >= 6) { 1758 status = GEN11_EXECLISTS_STATUS_BUF2; 1759 idx -= 6; 1760 } 1761 status += sizeof(u64) * idx; 1762 1763 entry = intel_uncore_read64(engine->uncore, 1764 _MMIO(engine->mmio_base + status)); 1765 } 1766 preempt_enable(); 1767 1768 return entry; 1769} 1770 1771static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1772{ 1773 u64 entry = READ_ONCE(*csb); 1774 1775 /* 1776 * Unfortunately, the GPU does not always serialise its write 1777 * of the CSB entries before its write of the CSB pointer, at least 1778 * from the perspective of the CPU, using what is known as a Global 1779 * Observation Point. We may read a new CSB tail pointer, but then 1780 * read the stale CSB entries, causing us to misinterpret the 1781 * context-switch events, and eventually declare the GPU hung. 1782 * 1783 * icl:HSDES#1806554093 1784 * tgl:HSDES#22011248461 1785 */ 1786 if (unlikely(entry == -1)) 1787 entry = wa_csb_read(engine, csb); 1788 1789 /* Consume this entry so that we can spot its future reuse. */ 1790 WRITE_ONCE(*csb, -1); 1791 1792 /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */ 1793 return entry; 1794} 1795 1796static void new_timeslice(struct intel_engine_execlists *el) 1797{ 1798 /* By cancelling, we will start afresh in start_timeslice() */ 1799 cancel_timer(&el->timer); 1800} 1801 1802static struct i915_request ** 1803process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 1804{ 1805 struct intel_engine_execlists * const execlists = &engine->execlists; 1806 u64 * const buf = execlists->csb_status; 1807 const u8 num_entries = execlists->csb_size; 1808 struct i915_request **prev; 1809 u8 head, tail; 1810 1811 /* 1812 * As we modify our execlists state tracking we require exclusive 1813 * access. Either we are inside the tasklet, or the tasklet is disabled 1814 * and we assume that is only inside the reset paths and so serialised. 1815 */ 1816 GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) && 1817 !reset_in_progress(engine)); 1818 1819 /* 1820 * Note that csb_write, csb_status may be either in HWSP or mmio. 1821 * When reading from the csb_write mmio register, we have to be 1822 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is 1823 * the low 4bits. As it happens we know the next 4bits are always 1824 * zero and so we can simply masked off the low u8 of the register 1825 * and treat it identically to reading from the HWSP (without having 1826 * to use explicit shifting and masking, and probably bifurcating 1827 * the code to handle the legacy mmio read). 1828 */ 1829 head = execlists->csb_head; 1830 tail = READ_ONCE(*execlists->csb_write); 1831 if (unlikely(head == tail)) 1832 return inactive; 1833 1834 /* 1835 * We will consume all events from HW, or at least pretend to. 1836 * 1837 * The sequence of events from the HW is deterministic, and derived 1838 * from our writes to the ELSP, with a smidgen of variability for 1839 * the arrival of the asynchronous requests wrt to the inflight 1840 * execution. If the HW sends an event that does not correspond with 1841 * the one we are expecting, we have to abandon all hope as we lose 1842 * all tracking of what the engine is actually executing. We will 1843 * only detect we are out of sequence with the HW when we get an 1844 * 'impossible' event because we have already drained our own 1845 * preemption/promotion queue. If this occurs, we know that we likely 1846 * lost track of execution earlier and must unwind and restart, the 1847 * simplest way is by stop processing the event queue and force the 1848 * engine to reset. 1849 */ 1850 execlists->csb_head = tail; 1851 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); 1852 1853 /* 1854 * Hopefully paired with a wmb() in HW! 1855 * 1856 * We must complete the read of the write pointer before any reads 1857 * from the CSB, so that we do not see stale values. Without an rmb 1858 * (lfence) the HW may speculatively perform the CSB[] reads *before* 1859 * we perform the READ_ONCE(*csb_write). 1860 */ 1861 rmb(); 1862 1863 /* Remember who was last running under the timer */ 1864 prev = inactive; 1865 *prev = NULL; 1866 1867 do { 1868 bool promote; 1869 u64 csb; 1870 1871 if (++head == num_entries) 1872 head = 0; 1873 1874 /* 1875 * We are flying near dragons again. 1876 * 1877 * We hold a reference to the request in execlist_port[] 1878 * but no more than that. We are operating in softirq 1879 * context and so cannot hold any mutex or sleep. That 1880 * prevents us stopping the requests we are processing 1881 * in port[] from being retired simultaneously (the 1882 * breadcrumb will be complete before we see the 1883 * context-switch). As we only hold the reference to the 1884 * request, any pointer chasing underneath the request 1885 * is subject to a potential use-after-free. Thus we 1886 * store all of the bookkeeping within port[] as 1887 * required, and avoid using unguarded pointers beneath 1888 * request itself. The same applies to the atomic 1889 * status notifier. 1890 */ 1891 1892 csb = csb_read(engine, buf + head); 1893 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", 1894 head, upper_32_bits(csb), lower_32_bits(csb)); 1895 1896 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 1897 promote = xehp_csb_parse(csb); 1898 else if (GRAPHICS_VER(engine->i915) >= 12) 1899 promote = gen12_csb_parse(csb); 1900 else 1901 promote = gen8_csb_parse(csb); 1902 if (promote) { 1903 struct i915_request * const *old = execlists->active; 1904 1905 if (GEM_WARN_ON(!*execlists->pending)) { 1906 execlists->error_interrupt |= ERROR_CSB; 1907 break; 1908 } 1909 1910 ring_set_paused(engine, 0); 1911 1912 /* Point active to the new ELSP; prevent overwriting */ 1913 WRITE_ONCE(execlists->active, execlists->pending); 1914 smp_wmb(); /* notify execlists_active() */ 1915 1916 /* cancel old inflight, prepare for switch */ 1917 trace_ports(execlists, "preempted", old); 1918 while (*old) 1919 *inactive++ = *old++; 1920 1921 /* switch pending to inflight */ 1922 GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); 1923 copy_ports(execlists->inflight, 1924 execlists->pending, 1925 execlists_num_ports(execlists)); 1926 smp_wmb(); /* complete the seqlock */ 1927 WRITE_ONCE(execlists->active, execlists->inflight); 1928 1929 /* XXX Magic delay for tgl */ 1930 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 1931 1932 WRITE_ONCE(execlists->pending[0], NULL); 1933 } else { 1934 if (GEM_WARN_ON(!*execlists->active)) { 1935 execlists->error_interrupt |= ERROR_CSB; 1936 break; 1937 } 1938 1939 /* port0 completed, advanced to port1 */ 1940 trace_ports(execlists, "completed", execlists->active); 1941 1942 /* 1943 * We rely on the hardware being strongly 1944 * ordered, that the breadcrumb write is 1945 * coherent (visible from the CPU) before the 1946 * user interrupt is processed. One might assume 1947 * that the breadcrumb write being before the 1948 * user interrupt and the CS event for the context 1949 * switch would therefore be before the CS event 1950 * itself... 1951 */ 1952 if (GEM_SHOW_DEBUG() && 1953 !__i915_request_is_complete(*execlists->active)) { 1954 struct i915_request *rq = *execlists->active; 1955 const u32 *regs __maybe_unused = 1956 rq->context->lrc_reg_state; 1957 1958 ENGINE_TRACE(engine, 1959 "context completed before request!\n"); 1960 ENGINE_TRACE(engine, 1961 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", 1962 ENGINE_READ(engine, RING_START), 1963 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, 1964 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, 1965 ENGINE_READ(engine, RING_CTL), 1966 ENGINE_READ(engine, RING_MI_MODE)); 1967 ENGINE_TRACE(engine, 1968 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", 1969 i915_ggtt_offset(rq->ring->vma), 1970 rq->head, rq->tail, 1971 rq->fence.context, 1972 lower_32_bits(rq->fence.seqno), 1973 hwsp_seqno(rq)); 1974 ENGINE_TRACE(engine, 1975 "ctx:{start:%08x, head:%04x, tail:%04x}, ", 1976 regs[CTX_RING_START], 1977 regs[CTX_RING_HEAD], 1978 regs[CTX_RING_TAIL]); 1979 } 1980 1981 *inactive++ = *execlists->active++; 1982 1983 GEM_BUG_ON(execlists->active - execlists->inflight > 1984 execlists_num_ports(execlists)); 1985 } 1986 } while (head != tail); 1987 1988 /* 1989 * Gen11 has proven to fail wrt global observation point between 1990 * entry and tail update, failing on the ordering and thus 1991 * we see an old entry in the context status buffer. 1992 * 1993 * Forcibly evict out entries for the next gpu csb update, 1994 * to increase the odds that we get a fresh entries with non 1995 * working hardware. The cost for doing so comes out mostly with 1996 * the wash as hardware, working or not, will need to do the 1997 * invalidation before. 1998 */ 1999 drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0])); 2000 2001 /* 2002 * We assume that any event reflects a change in context flow 2003 * and merits a fresh timeslice. We reinstall the timer after 2004 * inspecting the queue to see if we need to resumbit. 2005 */ 2006 if (*prev != *execlists->active) { /* elide lite-restores */ 2007 /* 2008 * Note the inherent discrepancy between the HW runtime, 2009 * recorded as part of the context switch, and the CPU 2010 * adjustment for active contexts. We have to hope that 2011 * the delay in processing the CS event is very small 2012 * and consistent. It works to our advantage to have 2013 * the CPU adjustment _undershoot_ (i.e. start later than) 2014 * the CS timestamp so we never overreport the runtime 2015 * and correct overselves later when updating from HW. 2016 */ 2017 if (*prev) 2018 lrc_runtime_stop((*prev)->context); 2019 if (*execlists->active) 2020 lrc_runtime_start((*execlists->active)->context); 2021 new_timeslice(execlists); 2022 } 2023 2024 return inactive; 2025} 2026 2027static void post_process_csb(struct i915_request **port, 2028 struct i915_request **last) 2029{ 2030 while (port != last) 2031 execlists_schedule_out(*port++); 2032} 2033 2034static void __execlists_hold(struct i915_request *rq) 2035{ 2036 LIST_HEAD(list); 2037 2038 do { 2039 struct i915_dependency *p; 2040 2041 if (i915_request_is_active(rq)) 2042 __i915_request_unsubmit(rq); 2043 2044 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2045 list_move_tail(&rq->sched.link, 2046 &rq->engine->sched_engine->hold); 2047 i915_request_set_hold(rq); 2048 RQ_TRACE(rq, "on hold\n"); 2049 2050 for_each_waiter(p, rq) { 2051 struct i915_request *w = 2052 container_of(p->waiter, typeof(*w), sched); 2053 2054 if (p->flags & I915_DEPENDENCY_WEAK) 2055 continue; 2056 2057 /* Leave semaphores spinning on the other engines */ 2058 if (w->engine != rq->engine) 2059 continue; 2060 2061 if (!i915_request_is_ready(w)) 2062 continue; 2063 2064 if (__i915_request_is_complete(w)) 2065 continue; 2066 2067 if (i915_request_on_hold(w)) 2068 continue; 2069 2070 list_move_tail(&w->sched.link, &list); 2071 } 2072 2073 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2074 } while (rq); 2075} 2076 2077static bool execlists_hold(struct intel_engine_cs *engine, 2078 struct i915_request *rq) 2079{ 2080 if (i915_request_on_hold(rq)) 2081 return false; 2082 2083 spin_lock_irq(&engine->sched_engine->lock); 2084 2085 if (__i915_request_is_complete(rq)) { /* too late! */ 2086 rq = NULL; 2087 goto unlock; 2088 } 2089 2090 /* 2091 * Transfer this request onto the hold queue to prevent it 2092 * being resumbitted to HW (and potentially completed) before we have 2093 * released it. Since we may have already submitted following 2094 * requests, we need to remove those as well. 2095 */ 2096 GEM_BUG_ON(i915_request_on_hold(rq)); 2097 GEM_BUG_ON(rq->engine != engine); 2098 __execlists_hold(rq); 2099 GEM_BUG_ON(list_empty(&engine->sched_engine->hold)); 2100 2101unlock: 2102 spin_unlock_irq(&engine->sched_engine->lock); 2103 return rq; 2104} 2105 2106static bool hold_request(const struct i915_request *rq) 2107{ 2108 struct i915_dependency *p; 2109 bool result = false; 2110 2111 /* 2112 * If one of our ancestors is on hold, we must also be on hold, 2113 * otherwise we will bypass it and execute before it. 2114 */ 2115 rcu_read_lock(); 2116 for_each_signaler(p, rq) { 2117 const struct i915_request *s = 2118 container_of(p->signaler, typeof(*s), sched); 2119 2120 if (s->engine != rq->engine) 2121 continue; 2122 2123 result = i915_request_on_hold(s); 2124 if (result) 2125 break; 2126 } 2127 rcu_read_unlock(); 2128 2129 return result; 2130} 2131 2132static void __execlists_unhold(struct i915_request *rq) 2133{ 2134 LIST_HEAD(list); 2135 2136 do { 2137 struct i915_dependency *p; 2138 2139 RQ_TRACE(rq, "hold release\n"); 2140 2141 GEM_BUG_ON(!i915_request_on_hold(rq)); 2142 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); 2143 2144 i915_request_clear_hold(rq); 2145 list_move_tail(&rq->sched.link, 2146 i915_sched_lookup_priolist(rq->engine->sched_engine, 2147 rq_prio(rq))); 2148 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2149 2150 /* Also release any children on this engine that are ready */ 2151 for_each_waiter(p, rq) { 2152 struct i915_request *w = 2153 container_of(p->waiter, typeof(*w), sched); 2154 2155 if (p->flags & I915_DEPENDENCY_WEAK) 2156 continue; 2157 2158 if (w->engine != rq->engine) 2159 continue; 2160 2161 if (!i915_request_on_hold(w)) 2162 continue; 2163 2164 /* Check that no other parents are also on hold */ 2165 if (hold_request(w)) 2166 continue; 2167 2168 list_move_tail(&w->sched.link, &list); 2169 } 2170 2171 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2172 } while (rq); 2173} 2174 2175static void execlists_unhold(struct intel_engine_cs *engine, 2176 struct i915_request *rq) 2177{ 2178 spin_lock_irq(&engine->sched_engine->lock); 2179 2180 /* 2181 * Move this request back to the priority queue, and all of its 2182 * children and grandchildren that were suspended along with it. 2183 */ 2184 __execlists_unhold(rq); 2185 2186 if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { 2187 engine->sched_engine->queue_priority_hint = rq_prio(rq); 2188 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2189 } 2190 2191 spin_unlock_irq(&engine->sched_engine->lock); 2192} 2193 2194struct execlists_capture { 2195 struct work_struct work; 2196 struct i915_request *rq; 2197 struct i915_gpu_coredump *error; 2198}; 2199 2200static void execlists_capture_work(struct work_struct *work) 2201{ 2202 struct execlists_capture *cap = container_of(work, typeof(*cap), work); 2203 const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | 2204 __GFP_NOWARN; 2205 struct intel_engine_cs *engine = cap->rq->engine; 2206 struct intel_gt_coredump *gt = cap->error->gt; 2207 struct intel_engine_capture_vma *vma; 2208 2209 /* Compress all the objects attached to the request, slow! */ 2210 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); 2211 if (vma) { 2212 struct i915_vma_compress *compress = 2213 i915_vma_capture_prepare(gt); 2214 2215 intel_engine_coredump_add_vma(gt->engine, vma, compress); 2216 i915_vma_capture_finish(gt, compress); 2217 } 2218 2219 gt->simulated = gt->engine->simulated; 2220 cap->error->simulated = gt->simulated; 2221 2222 /* Publish the error state, and announce it to the world */ 2223 i915_error_state_store(cap->error); 2224 i915_gpu_coredump_put(cap->error); 2225 2226 /* Return this request and all that depend upon it for signaling */ 2227 execlists_unhold(engine, cap->rq); 2228 i915_request_put(cap->rq); 2229 2230 kfree(cap); 2231} 2232 2233static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) 2234{ 2235 const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 2236 struct execlists_capture *cap; 2237 2238 cap = kmalloc(sizeof(*cap), gfp); 2239 if (!cap) 2240 return NULL; 2241 2242 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); 2243 if (!cap->error) 2244 goto err_cap; 2245 2246 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE); 2247 if (!cap->error->gt) 2248 goto err_gpu; 2249 2250 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE); 2251 if (!cap->error->gt->engine) 2252 goto err_gt; 2253 2254 cap->error->gt->engine->hung = true; 2255 2256 return cap; 2257 2258err_gt: 2259 kfree(cap->error->gt); 2260err_gpu: 2261 kfree(cap->error); 2262err_cap: 2263 kfree(cap); 2264 return NULL; 2265} 2266 2267static struct i915_request * 2268active_context(struct intel_engine_cs *engine, u32 ccid) 2269{ 2270 const struct intel_engine_execlists * const el = &engine->execlists; 2271 struct i915_request * const *port, *rq; 2272 2273 /* 2274 * Use the most recent result from process_csb(), but just in case 2275 * we trigger an error (via interrupt) before the first CS event has 2276 * been written, peek at the next submission. 2277 */ 2278 2279 for (port = el->active; (rq = *port); port++) { 2280 if (rq->context->lrc.ccid == ccid) { 2281 ENGINE_TRACE(engine, 2282 "ccid:%x found at active:%zd\n", 2283 ccid, port - el->active); 2284 return rq; 2285 } 2286 } 2287 2288 for (port = el->pending; (rq = *port); port++) { 2289 if (rq->context->lrc.ccid == ccid) { 2290 ENGINE_TRACE(engine, 2291 "ccid:%x found at pending:%zd\n", 2292 ccid, port - el->pending); 2293 return rq; 2294 } 2295 } 2296 2297 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); 2298 return NULL; 2299} 2300 2301static u32 active_ccid(struct intel_engine_cs *engine) 2302{ 2303 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); 2304} 2305 2306static void execlists_capture(struct intel_engine_cs *engine) 2307{ 2308 struct execlists_capture *cap; 2309 2310 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) 2311 return; 2312 2313 /* 2314 * We need to _quickly_ capture the engine state before we reset. 2315 * We are inside an atomic section (softirq) here and we are delaying 2316 * the forced preemption event. 2317 */ 2318 cap = capture_regs(engine); 2319 if (!cap) 2320 return; 2321 2322 spin_lock_irq(&engine->sched_engine->lock); 2323 cap->rq = active_context(engine, active_ccid(engine)); 2324 if (cap->rq) { 2325 cap->rq = active_request(cap->rq->context->timeline, cap->rq); 2326 cap->rq = i915_request_get_rcu(cap->rq); 2327 } 2328 spin_unlock_irq(&engine->sched_engine->lock); 2329 if (!cap->rq) 2330 goto err_free; 2331 2332 /* 2333 * Remove the request from the execlists queue, and take ownership 2334 * of the request. We pass it to our worker who will _slowly_ compress 2335 * all the pages the _user_ requested for debugging their batch, after 2336 * which we return it to the queue for signaling. 2337 * 2338 * By removing them from the execlists queue, we also remove the 2339 * requests from being processed by __unwind_incomplete_requests() 2340 * during the intel_engine_reset(), and so they will *not* be replayed 2341 * afterwards. 2342 * 2343 * Note that because we have not yet reset the engine at this point, 2344 * it is possible for the request that we have identified as being 2345 * guilty, did in fact complete and we will then hit an arbitration 2346 * point allowing the outstanding preemption to succeed. The likelihood 2347 * of that is very low (as capturing of the engine registers should be 2348 * fast enough to run inside an irq-off atomic section!), so we will 2349 * simply hold that request accountable for being non-preemptible 2350 * long enough to force the reset. 2351 */ 2352 if (!execlists_hold(engine, cap->rq)) 2353 goto err_rq; 2354 2355 INIT_WORK(&cap->work, execlists_capture_work); 2356 schedule_work(&cap->work); 2357 return; 2358 2359err_rq: 2360 i915_request_put(cap->rq); 2361err_free: 2362 i915_gpu_coredump_put(cap->error); 2363 kfree(cap); 2364} 2365 2366static void execlists_reset(struct intel_engine_cs *engine, const char *msg) 2367{ 2368 const unsigned int bit = I915_RESET_ENGINE + engine->id; 2369 unsigned long *lock = &engine->gt->reset.flags; 2370 2371 if (!intel_has_reset_engine(engine->gt)) 2372 return; 2373 2374 if (test_and_set_bit(bit, lock)) 2375 return; 2376 2377 ENGINE_TRACE(engine, "reset for %s\n", msg); 2378 2379 /* Mark this tasklet as disabled to avoid waiting for it to complete */ 2380 tasklet_disable_nosync(&engine->sched_engine->tasklet); 2381 2382 ring_set_paused(engine, 1); /* Freeze the current request in place */ 2383 execlists_capture(engine); 2384 intel_engine_reset(engine, msg); 2385 2386 tasklet_enable(&engine->sched_engine->tasklet); 2387 clear_and_wake_up_bit(bit, lock); 2388} 2389 2390static bool preempt_timeout(const struct intel_engine_cs *const engine) 2391{ 2392 const struct timer_list *t = &engine->execlists.preempt; 2393 2394 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) 2395 return false; 2396 2397 if (!timer_expired(t)) 2398 return false; 2399 2400 return engine->execlists.pending[0]; 2401} 2402 2403/* 2404 * Check the unread Context Status Buffers and manage the submission of new 2405 * contexts to the ELSP accordingly. 2406 */ 2407static void execlists_submission_tasklet(struct tasklet_struct *t) 2408{ 2409 struct i915_sched_engine *sched_engine = 2410 from_tasklet(sched_engine, t, tasklet); 2411 struct intel_engine_cs * const engine = sched_engine->private_data; 2412 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 2413 struct i915_request **inactive; 2414 2415 rcu_read_lock(); 2416 inactive = process_csb(engine, post); 2417 GEM_BUG_ON(inactive - post > ARRAY_SIZE(post)); 2418 2419 if (unlikely(preempt_timeout(engine))) { 2420 cancel_timer(&engine->execlists.preempt); 2421 engine->execlists.error_interrupt |= ERROR_PREEMPT; 2422 } 2423 2424 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { 2425 const char *msg; 2426 2427 /* Generate the error message in priority wrt to the user! */ 2428 if (engine->execlists.error_interrupt & GENMASK(15, 0)) 2429 msg = "CS error"; /* thrown by a user payload */ 2430 else if (engine->execlists.error_interrupt & ERROR_CSB) 2431 msg = "invalid CSB event"; 2432 else if (engine->execlists.error_interrupt & ERROR_PREEMPT) 2433 msg = "preemption time out"; 2434 else 2435 msg = "internal error"; 2436 2437 engine->execlists.error_interrupt = 0; 2438 execlists_reset(engine, msg); 2439 } 2440 2441 if (!engine->execlists.pending[0]) { 2442 execlists_dequeue_irq(engine); 2443 start_timeslice(engine); 2444 } 2445 2446 post_process_csb(post, inactive); 2447 rcu_read_unlock(); 2448} 2449 2450static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) 2451{ 2452 bool tasklet = false; 2453 2454 if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { 2455 u32 eir; 2456 2457 /* Upper 16b are the enabling mask, rsvd for internal errors */ 2458 eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0); 2459 ENGINE_TRACE(engine, "CS error: %x\n", eir); 2460 2461 /* Disable the error interrupt until after the reset */ 2462 if (likely(eir)) { 2463 ENGINE_WRITE(engine, RING_EMR, ~0u); 2464 ENGINE_WRITE(engine, RING_EIR, eir); 2465 WRITE_ONCE(engine->execlists.error_interrupt, eir); 2466 tasklet = true; 2467 } 2468 } 2469 2470 if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { 2471 WRITE_ONCE(engine->execlists.yield, 2472 ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); 2473 ENGINE_TRACE(engine, "semaphore yield: %08x\n", 2474 engine->execlists.yield); 2475 if (del_timer(&engine->execlists.timer)) 2476 tasklet = true; 2477 } 2478 2479 if (iir & GT_CONTEXT_SWITCH_INTERRUPT) 2480 tasklet = true; 2481 2482 if (iir & GT_RENDER_USER_INTERRUPT) 2483 intel_engine_signal_breadcrumbs(engine); 2484 2485 if (tasklet) 2486 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2487} 2488 2489static void __execlists_kick(struct intel_engine_execlists *execlists) 2490{ 2491 struct intel_engine_cs *engine = 2492 container_of(execlists, typeof(*engine), execlists); 2493 2494 /* Kick the tasklet for some interrupt coalescing and reset handling */ 2495 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2496} 2497 2498#define execlists_kick(t, member) \ 2499 __execlists_kick(container_of(t, struct intel_engine_execlists, member)) 2500 2501static void execlists_timeslice(struct timer_list *timer) 2502{ 2503 execlists_kick(timer, timer); 2504} 2505 2506static void execlists_preempt(struct timer_list *timer) 2507{ 2508 execlists_kick(timer, preempt); 2509} 2510 2511static void queue_request(struct intel_engine_cs *engine, 2512 struct i915_request *rq) 2513{ 2514 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2515 list_add_tail(&rq->sched.link, 2516 i915_sched_lookup_priolist(engine->sched_engine, 2517 rq_prio(rq))); 2518 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2519} 2520 2521static bool submit_queue(struct intel_engine_cs *engine, 2522 const struct i915_request *rq) 2523{ 2524 struct i915_sched_engine *sched_engine = engine->sched_engine; 2525 2526 if (rq_prio(rq) <= sched_engine->queue_priority_hint) 2527 return false; 2528 2529 sched_engine->queue_priority_hint = rq_prio(rq); 2530 return true; 2531} 2532 2533static bool ancestor_on_hold(const struct intel_engine_cs *engine, 2534 const struct i915_request *rq) 2535{ 2536 GEM_BUG_ON(i915_request_on_hold(rq)); 2537 return !list_empty(&engine->sched_engine->hold) && hold_request(rq); 2538} 2539 2540static void execlists_submit_request(struct i915_request *request) 2541{ 2542 struct intel_engine_cs *engine = request->engine; 2543 unsigned long flags; 2544 2545 /* Will be called from irq-context when using foreign fences. */ 2546 spin_lock_irqsave(&engine->sched_engine->lock, flags); 2547 2548 if (unlikely(ancestor_on_hold(engine, request))) { 2549 RQ_TRACE(request, "ancestor on hold\n"); 2550 list_add_tail(&request->sched.link, 2551 &engine->sched_engine->hold); 2552 i915_request_set_hold(request); 2553 } else { 2554 queue_request(engine, request); 2555 2556 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); 2557 GEM_BUG_ON(list_empty(&request->sched.link)); 2558 2559 if (submit_queue(engine, request)) 2560 __execlists_kick(&engine->execlists); 2561 } 2562 2563 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 2564} 2565 2566static int 2567__execlists_context_pre_pin(struct intel_context *ce, 2568 struct intel_engine_cs *engine, 2569 struct i915_gem_ww_ctx *ww, void **vaddr) 2570{ 2571 int err; 2572 2573 err = lrc_pre_pin(ce, engine, ww, vaddr); 2574 if (err) 2575 return err; 2576 2577 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { 2578 lrc_init_state(ce, engine, *vaddr); 2579 2580 __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); 2581 } 2582 2583 return 0; 2584} 2585 2586static int execlists_context_pre_pin(struct intel_context *ce, 2587 struct i915_gem_ww_ctx *ww, 2588 void **vaddr) 2589{ 2590 return __execlists_context_pre_pin(ce, ce->engine, ww, vaddr); 2591} 2592 2593static int execlists_context_pin(struct intel_context *ce, void *vaddr) 2594{ 2595 return lrc_pin(ce, ce->engine, vaddr); 2596} 2597 2598static int execlists_context_alloc(struct intel_context *ce) 2599{ 2600 return lrc_alloc(ce, ce->engine); 2601} 2602 2603static void execlists_context_cancel_request(struct intel_context *ce, 2604 struct i915_request *rq) 2605{ 2606 struct intel_engine_cs *engine = NULL; 2607 2608 i915_request_active_engine(rq, &engine); 2609 2610 if (engine && intel_engine_pulse(engine)) 2611 intel_gt_handle_error(engine->gt, engine->mask, 0, 2612 "request cancellation by %s", 2613 current->comm); 2614} 2615 2616static struct intel_context * 2617execlists_create_parallel(struct intel_engine_cs **engines, 2618 unsigned int num_siblings, 2619 unsigned int width) 2620{ 2621 struct intel_context *parent = NULL, *ce, *err; 2622 int i; 2623 2624 GEM_BUG_ON(num_siblings != 1); 2625 2626 for (i = 0; i < width; ++i) { 2627 ce = intel_context_create(engines[i]); 2628 if (IS_ERR(ce)) { 2629 err = ce; 2630 goto unwind; 2631 } 2632 2633 if (i == 0) 2634 parent = ce; 2635 else 2636 intel_context_bind_parent_child(parent, ce); 2637 } 2638 2639 parent->parallel.fence_context = dma_fence_context_alloc(1); 2640 2641 intel_context_set_nopreempt(parent); 2642 for_each_child(parent, ce) 2643 intel_context_set_nopreempt(ce); 2644 2645 return parent; 2646 2647unwind: 2648 if (parent) 2649 intel_context_put(parent); 2650 return err; 2651} 2652 2653static const struct intel_context_ops execlists_context_ops = { 2654 .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES, 2655 2656 .alloc = execlists_context_alloc, 2657 2658 .cancel_request = execlists_context_cancel_request, 2659 2660 .pre_pin = execlists_context_pre_pin, 2661 .pin = execlists_context_pin, 2662 .unpin = lrc_unpin, 2663 .post_unpin = lrc_post_unpin, 2664 2665 .enter = intel_context_enter_engine, 2666 .exit = intel_context_exit_engine, 2667 2668 .reset = lrc_reset, 2669 .destroy = lrc_destroy, 2670 2671 .create_parallel = execlists_create_parallel, 2672 .create_virtual = execlists_create_virtual, 2673}; 2674 2675static int emit_pdps(struct i915_request *rq) 2676{ 2677 const struct intel_engine_cs * const engine = rq->engine; 2678 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); 2679 int err, i; 2680 u32 *cs; 2681 2682 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); 2683 2684 /* 2685 * Beware ye of the dragons, this sequence is magic! 2686 * 2687 * Small changes to this sequence can cause anything from 2688 * GPU hangs to forcewake errors and machine lockups! 2689 */ 2690 2691 cs = intel_ring_begin(rq, 2); 2692 if (IS_ERR(cs)) 2693 return PTR_ERR(cs); 2694 2695 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 2696 *cs++ = MI_NOOP; 2697 intel_ring_advance(rq, cs); 2698 2699 /* Flush any residual operations from the context load */ 2700 err = engine->emit_flush(rq, EMIT_FLUSH); 2701 if (err) 2702 return err; 2703 2704 /* Magic required to prevent forcewake errors! */ 2705 err = engine->emit_flush(rq, EMIT_INVALIDATE); 2706 if (err) 2707 return err; 2708 2709 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); 2710 if (IS_ERR(cs)) 2711 return PTR_ERR(cs); 2712 2713 /* Ensure the LRI have landed before we invalidate & continue */ 2714 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; 2715 for (i = GEN8_3LVL_PDPES; i--; ) { 2716 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 2717 u32 base = engine->mmio_base; 2718 2719 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); 2720 *cs++ = upper_32_bits(pd_daddr); 2721 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); 2722 *cs++ = lower_32_bits(pd_daddr); 2723 } 2724 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2725 intel_ring_advance(rq, cs); 2726 2727 intel_ring_advance(rq, cs); 2728 2729 return 0; 2730} 2731 2732static int execlists_request_alloc(struct i915_request *request) 2733{ 2734 int ret; 2735 2736 GEM_BUG_ON(!intel_context_is_pinned(request->context)); 2737 2738 /* 2739 * Flush enough space to reduce the likelihood of waiting after 2740 * we start building the request - in which case we will just 2741 * have to repeat work. 2742 */ 2743 request->reserved_space += EXECLISTS_REQUEST_SIZE; 2744 2745 /* 2746 * Note that after this point, we have committed to using 2747 * this request as it is being used to both track the 2748 * state of engine initialisation and liveness of the 2749 * golden renderstate above. Think twice before you try 2750 * to cancel/unwind this request now. 2751 */ 2752 2753 if (!i915_vm_is_4lvl(request->context->vm)) { 2754 ret = emit_pdps(request); 2755 if (ret) 2756 return ret; 2757 } 2758 2759 /* Unconditionally invalidate GPU caches and TLBs. */ 2760 ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 2761 if (ret) 2762 return ret; 2763 2764 request->reserved_space -= EXECLISTS_REQUEST_SIZE; 2765 return 0; 2766} 2767 2768static void reset_csb_pointers(struct intel_engine_cs *engine) 2769{ 2770 struct intel_engine_execlists * const execlists = &engine->execlists; 2771 const unsigned int reset_value = execlists->csb_size - 1; 2772 2773 ring_set_paused(engine, 0); 2774 2775 /* 2776 * Sometimes Icelake forgets to reset its pointers on a GPU reset. 2777 * Bludgeon them with a mmio update to be sure. 2778 */ 2779 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2780 0xffff << 16 | reset_value << 8 | reset_value); 2781 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2782 2783 /* 2784 * After a reset, the HW starts writing into CSB entry [0]. We 2785 * therefore have to set our HEAD pointer back one entry so that 2786 * the *first* entry we check is entry 0. To complicate this further, 2787 * as we don't wait for the first interrupt after reset, we have to 2788 * fake the HW write to point back to the last entry so that our 2789 * inline comparison of our cached head position against the last HW 2790 * write works even before the first interrupt. 2791 */ 2792 execlists->csb_head = reset_value; 2793 WRITE_ONCE(*execlists->csb_write, reset_value); 2794 wmb(); /* Make sure this is visible to HW (paranoia?) */ 2795 2796 /* Check that the GPU does indeed update the CSB entries! */ 2797 memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); 2798 drm_clflush_virt_range(execlists->csb_status, 2799 execlists->csb_size * 2800 sizeof(execlists->csb_status)); 2801 2802 /* Once more for luck and our trusty paranoia */ 2803 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2804 0xffff << 16 | reset_value << 8 | reset_value); 2805 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2806 2807 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); 2808} 2809 2810static void sanitize_hwsp(struct intel_engine_cs *engine) 2811{ 2812 struct intel_timeline *tl; 2813 2814 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 2815 intel_timeline_reset_seqno(tl); 2816} 2817 2818static void execlists_sanitize(struct intel_engine_cs *engine) 2819{ 2820 GEM_BUG_ON(execlists_active(&engine->execlists)); 2821 2822 /* 2823 * Poison residual state on resume, in case the suspend didn't! 2824 * 2825 * We have to assume that across suspend/resume (or other loss 2826 * of control) that the contents of our pinned buffers has been 2827 * lost, replaced by garbage. Since this doesn't always happen, 2828 * let's poison such state so that we more quickly spot when 2829 * we falsely assume it has been preserved. 2830 */ 2831 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 2832 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 2833 2834 reset_csb_pointers(engine); 2835 2836 /* 2837 * The kernel_context HWSP is stored in the status_page. As above, 2838 * that may be lost on resume/initialisation, and so we need to 2839 * reset the value in the HWSP. 2840 */ 2841 sanitize_hwsp(engine); 2842 2843 /* And scrub the dirty cachelines for the HWSP */ 2844 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 2845 2846 intel_engine_reset_pinned_contexts(engine); 2847} 2848 2849static void enable_error_interrupt(struct intel_engine_cs *engine) 2850{ 2851 u32 status; 2852 2853 engine->execlists.error_interrupt = 0; 2854 ENGINE_WRITE(engine, RING_EMR, ~0u); 2855 ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ 2856 2857 status = ENGINE_READ(engine, RING_ESR); 2858 if (unlikely(status)) { 2859 drm_err(&engine->i915->drm, 2860 "engine '%s' resumed still in error: %08x\n", 2861 engine->name, status); 2862 __intel_gt_reset(engine->gt, engine->mask); 2863 } 2864 2865 /* 2866 * On current gen8+, we have 2 signals to play with 2867 * 2868 * - I915_ERROR_INSTUCTION (bit 0) 2869 * 2870 * Generate an error if the command parser encounters an invalid 2871 * instruction 2872 * 2873 * This is a fatal error. 2874 * 2875 * - CP_PRIV (bit 2) 2876 * 2877 * Generate an error on privilege violation (where the CP replaces 2878 * the instruction with a no-op). This also fires for writes into 2879 * read-only scratch pages. 2880 * 2881 * This is a non-fatal error, parsing continues. 2882 * 2883 * * there are a few others defined for odd HW that we do not use 2884 * 2885 * Since CP_PRIV fires for cases where we have chosen to ignore the 2886 * error (as the HW is validating and suppressing the mistakes), we 2887 * only unmask the instruction error bit. 2888 */ 2889 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); 2890} 2891 2892static void enable_execlists(struct intel_engine_cs *engine) 2893{ 2894 u32 mode; 2895 2896 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 2897 2898 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 2899 2900 if (GRAPHICS_VER(engine->i915) >= 11) 2901 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); 2902 else 2903 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); 2904 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); 2905 2906 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 2907 2908 ENGINE_WRITE_FW(engine, 2909 RING_HWS_PGA, 2910 i915_ggtt_offset(engine->status_page.vma)); 2911 ENGINE_POSTING_READ(engine, RING_HWS_PGA); 2912 2913 enable_error_interrupt(engine); 2914} 2915 2916static int execlists_resume(struct intel_engine_cs *engine) 2917{ 2918 intel_mocs_init_engine(engine); 2919 intel_breadcrumbs_reset(engine->breadcrumbs); 2920 2921 enable_execlists(engine); 2922 2923 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 2924 xehp_enable_ccs_engines(engine); 2925 2926 return 0; 2927} 2928 2929static void execlists_reset_prepare(struct intel_engine_cs *engine) 2930{ 2931 ENGINE_TRACE(engine, "depth<-%d\n", 2932 atomic_read(&engine->sched_engine->tasklet.count)); 2933 2934 /* 2935 * Prevent request submission to the hardware until we have 2936 * completed the reset in i915_gem_reset_finish(). If a request 2937 * is completed by one engine, it may then queue a request 2938 * to a second via its execlists->tasklet *just* as we are 2939 * calling engine->resume() and also writing the ELSP. 2940 * Turning off the execlists->tasklet until the reset is over 2941 * prevents the race. 2942 */ 2943 __tasklet_disable_sync_once(&engine->sched_engine->tasklet); 2944 GEM_BUG_ON(!reset_in_progress(engine)); 2945 2946 /* 2947 * We stop engines, otherwise we might get failed reset and a 2948 * dead gpu (on elk). Also as modern gpu as kbl can suffer 2949 * from system hang if batchbuffer is progressing when 2950 * the reset is issued, regardless of READY_TO_RESET ack. 2951 * Thus assume it is best to stop engines on all gens 2952 * where we have a gpu reset. 2953 * 2954 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) 2955 * 2956 * FIXME: Wa for more modern gens needs to be validated 2957 */ 2958 ring_set_paused(engine, 1); 2959 intel_engine_stop_cs(engine); 2960 2961 engine->execlists.reset_ccid = active_ccid(engine); 2962} 2963 2964static struct i915_request ** 2965reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 2966{ 2967 struct intel_engine_execlists * const execlists = &engine->execlists; 2968 2969 drm_clflush_virt_range(execlists->csb_write, 2970 sizeof(execlists->csb_write[0])); 2971 2972 inactive = process_csb(engine, inactive); /* drain preemption events */ 2973 2974 /* Following the reset, we need to reload the CSB read/write pointers */ 2975 reset_csb_pointers(engine); 2976 2977 return inactive; 2978} 2979 2980static void 2981execlists_reset_active(struct intel_engine_cs *engine, bool stalled) 2982{ 2983 struct intel_context *ce; 2984 struct i915_request *rq; 2985 u32 head; 2986 2987 /* 2988 * Save the currently executing context, even if we completed 2989 * its request, it was still running at the time of the 2990 * reset and will have been clobbered. 2991 */ 2992 rq = active_context(engine, engine->execlists.reset_ccid); 2993 if (!rq) 2994 return; 2995 2996 ce = rq->context; 2997 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 2998 2999 if (__i915_request_is_complete(rq)) { 3000 /* Idle context; tidy up the ring so we can restart afresh */ 3001 head = intel_ring_wrap(ce->ring, rq->tail); 3002 goto out_replay; 3003 } 3004 3005 /* We still have requests in-flight; the engine should be active */ 3006 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 3007 3008 /* Context has requests still in-flight; it should not be idle! */ 3009 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 3010 3011 rq = active_request(ce->timeline, rq); 3012 head = intel_ring_wrap(ce->ring, rq->head); 3013 GEM_BUG_ON(head == ce->ring->tail); 3014 3015 /* 3016 * If this request hasn't started yet, e.g. it is waiting on a 3017 * semaphore, we need to avoid skipping the request or else we 3018 * break the signaling chain. However, if the context is corrupt 3019 * the request will not restart and we will be stuck with a wedged 3020 * device. It is quite often the case that if we issue a reset 3021 * while the GPU is loading the context image, that the context 3022 * image becomes corrupt. 3023 * 3024 * Otherwise, if we have not started yet, the request should replay 3025 * perfectly and we do not need to flag the result as being erroneous. 3026 */ 3027 if (!__i915_request_has_started(rq)) 3028 goto out_replay; 3029 3030 /* 3031 * If the request was innocent, we leave the request in the ELSP 3032 * and will try to replay it on restarting. The context image may 3033 * have been corrupted by the reset, in which case we may have 3034 * to service a new GPU hang, but more likely we can continue on 3035 * without impact. 3036 * 3037 * If the request was guilty, we presume the context is corrupt 3038 * and have to at least restore the RING register in the context 3039 * image back to the expected values to skip over the guilty request. 3040 */ 3041 __i915_request_reset(rq, stalled); 3042 3043 /* 3044 * We want a simple context + ring to execute the breadcrumb update. 3045 * We cannot rely on the context being intact across the GPU hang, 3046 * so clear it and rebuild just what we need for the breadcrumb. 3047 * All pending requests for this context will be zapped, and any 3048 * future request will be after userspace has had the opportunity 3049 * to recreate its own state. 3050 */ 3051out_replay: 3052 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", 3053 head, ce->ring->tail); 3054 lrc_reset_regs(ce, engine); 3055 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 3056} 3057 3058static void execlists_reset_csb(struct intel_engine_cs *engine, bool stalled) 3059{ 3060 struct intel_engine_execlists * const execlists = &engine->execlists; 3061 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 3062 struct i915_request **inactive; 3063 3064 rcu_read_lock(); 3065 inactive = reset_csb(engine, post); 3066 3067 execlists_reset_active(engine, true); 3068 3069 inactive = cancel_port_requests(execlists, inactive); 3070 post_process_csb(post, inactive); 3071 rcu_read_unlock(); 3072} 3073 3074static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) 3075{ 3076 unsigned long flags; 3077 3078 ENGINE_TRACE(engine, "\n"); 3079 3080 /* Process the csb, find the guilty context and throw away */ 3081 execlists_reset_csb(engine, stalled); 3082 3083 /* Push back any incomplete requests for replay after the reset. */ 3084 rcu_read_lock(); 3085 spin_lock_irqsave(&engine->sched_engine->lock, flags); 3086 __unwind_incomplete_requests(engine); 3087 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 3088 rcu_read_unlock(); 3089} 3090 3091static void nop_submission_tasklet(struct tasklet_struct *t) 3092{ 3093 struct i915_sched_engine *sched_engine = 3094 from_tasklet(sched_engine, t, tasklet); 3095 struct intel_engine_cs * const engine = sched_engine->private_data; 3096 3097 /* The driver is wedged; don't process any more events. */ 3098 WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); 3099} 3100 3101static void execlists_reset_cancel(struct intel_engine_cs *engine) 3102{ 3103 struct intel_engine_execlists * const execlists = &engine->execlists; 3104 struct i915_sched_engine * const sched_engine = engine->sched_engine; 3105 struct i915_request *rq, *rn; 3106 struct rb_node *rb; 3107 unsigned long flags; 3108 3109 ENGINE_TRACE(engine, "\n"); 3110 3111 /* 3112 * Before we call engine->cancel_requests(), we should have exclusive 3113 * access to the submission state. This is arranged for us by the 3114 * caller disabling the interrupt generation, the tasklet and other 3115 * threads that may then access the same state, giving us a free hand 3116 * to reset state. However, we still need to let lockdep be aware that 3117 * we know this state may be accessed in hardirq context, so we 3118 * disable the irq around this manipulation and we want to keep 3119 * the spinlock focused on its duties and not accidentally conflate 3120 * coverage to the submission's irq state. (Similarly, although we 3121 * shouldn't need to disable irq around the manipulation of the 3122 * submission's irq state, we also wish to remind ourselves that 3123 * it is irq state.) 3124 */ 3125 execlists_reset_csb(engine, true); 3126 3127 rcu_read_lock(); 3128 spin_lock_irqsave(&engine->sched_engine->lock, flags); 3129 3130 /* Mark all executing requests as skipped. */ 3131 list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) 3132 i915_request_put(i915_request_mark_eio(rq)); 3133 intel_engine_signal_breadcrumbs(engine); 3134 3135 /* Flush the queued requests to the timeline list (for retiring). */ 3136 while ((rb = rb_first_cached(&sched_engine->queue))) { 3137 struct i915_priolist *p = to_priolist(rb); 3138 3139 priolist_for_each_request_consume(rq, rn, p) { 3140 if (i915_request_mark_eio(rq)) { 3141 __i915_request_submit(rq); 3142 i915_request_put(rq); 3143 } 3144 } 3145 3146 rb_erase_cached(&p->node, &sched_engine->queue); 3147 i915_priolist_free(p); 3148 } 3149 3150 /* On-hold requests will be flushed to timeline upon their release */ 3151 list_for_each_entry(rq, &sched_engine->hold, sched.link) 3152 i915_request_put(i915_request_mark_eio(rq)); 3153 3154 /* Cancel all attached virtual engines */ 3155 while ((rb = rb_first_cached(&execlists->virtual))) { 3156 struct virtual_engine *ve = 3157 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 3158 3159 rb_erase_cached(rb, &execlists->virtual); 3160 RB_CLEAR_NODE(rb); 3161 3162 spin_lock(&ve->base.sched_engine->lock); 3163 rq = fetch_and_zero(&ve->request); 3164 if (rq) { 3165 if (i915_request_mark_eio(rq)) { 3166 rq->engine = engine; 3167 __i915_request_submit(rq); 3168 i915_request_put(rq); 3169 } 3170 i915_request_put(rq); 3171 3172 ve->base.sched_engine->queue_priority_hint = INT_MIN; 3173 } 3174 spin_unlock(&ve->base.sched_engine->lock); 3175 } 3176 3177 /* Remaining _unready_ requests will be nop'ed when submitted */ 3178 3179 sched_engine->queue_priority_hint = INT_MIN; 3180 sched_engine->queue = RB_ROOT_CACHED; 3181 3182 GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet)); 3183 engine->sched_engine->tasklet.callback = nop_submission_tasklet; 3184 3185 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 3186 rcu_read_unlock(); 3187} 3188 3189static void execlists_reset_finish(struct intel_engine_cs *engine) 3190{ 3191 struct intel_engine_execlists * const execlists = &engine->execlists; 3192 3193 /* 3194 * After a GPU reset, we may have requests to replay. Do so now while 3195 * we still have the forcewake to be sure that the GPU is not allowed 3196 * to sleep before we restart and reload a context. 3197 * 3198 * If the GPU reset fails, the engine may still be alive with requests 3199 * inflight. We expect those to complete, or for the device to be 3200 * reset as the next level of recovery, and as a final resort we 3201 * will declare the device wedged. 3202 */ 3203 GEM_BUG_ON(!reset_in_progress(engine)); 3204 3205 /* And kick in case we missed a new request submission. */ 3206 if (__tasklet_enable(&engine->sched_engine->tasklet)) 3207 __execlists_kick(execlists); 3208 3209 ENGINE_TRACE(engine, "depth->%d\n", 3210 atomic_read(&engine->sched_engine->tasklet.count)); 3211} 3212 3213static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) 3214{ 3215 ENGINE_WRITE(engine, RING_IMR, 3216 ~(engine->irq_enable_mask | engine->irq_keep_mask)); 3217 ENGINE_POSTING_READ(engine, RING_IMR); 3218} 3219 3220static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) 3221{ 3222 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); 3223} 3224 3225static void execlists_park(struct intel_engine_cs *engine) 3226{ 3227 cancel_timer(&engine->execlists.timer); 3228 cancel_timer(&engine->execlists.preempt); 3229} 3230 3231static void add_to_engine(struct i915_request *rq) 3232{ 3233 lockdep_assert_held(&rq->engine->sched_engine->lock); 3234 list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); 3235} 3236 3237static void remove_from_engine(struct i915_request *rq) 3238{ 3239 struct intel_engine_cs *engine, *locked; 3240 3241 /* 3242 * Virtual engines complicate acquiring the engine timeline lock, 3243 * as their rq->engine pointer is not stable until under that 3244 * engine lock. The simple ploy we use is to take the lock then 3245 * check that the rq still belongs to the newly locked engine. 3246 */ 3247 locked = READ_ONCE(rq->engine); 3248 spin_lock_irq(&locked->sched_engine->lock); 3249 while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { 3250 spin_unlock(&locked->sched_engine->lock); 3251 spin_lock(&engine->sched_engine->lock); 3252 locked = engine; 3253 } 3254 list_del_init(&rq->sched.link); 3255 3256 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3257 clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); 3258 3259 /* Prevent further __await_execution() registering a cb, then flush */ 3260 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3261 3262 spin_unlock_irq(&locked->sched_engine->lock); 3263 3264 i915_request_notify_execute_cb_imm(rq); 3265} 3266 3267static bool can_preempt(struct intel_engine_cs *engine) 3268{ 3269 if (GRAPHICS_VER(engine->i915) > 8) 3270 return true; 3271 3272 /* GPGPU on bdw requires extra w/a; not implemented */ 3273 return engine->class != RENDER_CLASS; 3274} 3275 3276static void kick_execlists(const struct i915_request *rq, int prio) 3277{ 3278 struct intel_engine_cs *engine = rq->engine; 3279 struct i915_sched_engine *sched_engine = engine->sched_engine; 3280 const struct i915_request *inflight; 3281 3282 /* 3283 * We only need to kick the tasklet once for the high priority 3284 * new context we add into the queue. 3285 */ 3286 if (prio <= sched_engine->queue_priority_hint) 3287 return; 3288 3289 rcu_read_lock(); 3290 3291 /* Nothing currently active? We're overdue for a submission! */ 3292 inflight = execlists_active(&engine->execlists); 3293 if (!inflight) 3294 goto unlock; 3295 3296 /* 3297 * If we are already the currently executing context, don't 3298 * bother evaluating if we should preempt ourselves. 3299 */ 3300 if (inflight->context == rq->context) 3301 goto unlock; 3302 3303 ENGINE_TRACE(engine, 3304 "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", 3305 prio, 3306 rq->fence.context, rq->fence.seqno, 3307 inflight->fence.context, inflight->fence.seqno, 3308 inflight->sched.attr.priority); 3309 3310 sched_engine->queue_priority_hint = prio; 3311 3312 /* 3313 * Allow preemption of low -> normal -> high, but we do 3314 * not allow low priority tasks to preempt other low priority 3315 * tasks under the impression that latency for low priority 3316 * tasks does not matter (as much as background throughput), 3317 * so kiss. 3318 */ 3319 if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))) 3320 tasklet_hi_schedule(&sched_engine->tasklet); 3321 3322unlock: 3323 rcu_read_unlock(); 3324} 3325 3326static void execlists_set_default_submission(struct intel_engine_cs *engine) 3327{ 3328 engine->submit_request = execlists_submit_request; 3329 engine->sched_engine->schedule = i915_schedule; 3330 engine->sched_engine->kick_backend = kick_execlists; 3331 engine->sched_engine->tasklet.callback = execlists_submission_tasklet; 3332} 3333 3334static void execlists_shutdown(struct intel_engine_cs *engine) 3335{ 3336 /* Synchronise with residual timers and any softirq they raise */ 3337 del_timer_sync(&engine->execlists.timer); 3338 del_timer_sync(&engine->execlists.preempt); 3339 tasklet_kill(&engine->sched_engine->tasklet); 3340} 3341 3342static void execlists_release(struct intel_engine_cs *engine) 3343{ 3344 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3345 3346 execlists_shutdown(engine); 3347 3348 intel_engine_cleanup_common(engine); 3349 lrc_fini_wa_ctx(engine); 3350} 3351 3352static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine, 3353 ktime_t *now) 3354{ 3355 struct intel_engine_execlists_stats *stats = &engine->stats.execlists; 3356 ktime_t total = stats->total; 3357 3358 /* 3359 * If the engine is executing something at the moment 3360 * add it to the total. 3361 */ 3362 *now = ktime_get(); 3363 if (READ_ONCE(stats->active)) 3364 total = ktime_add(total, ktime_sub(*now, stats->start)); 3365 3366 return total; 3367} 3368 3369static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine, 3370 ktime_t *now) 3371{ 3372 struct intel_engine_execlists_stats *stats = &engine->stats.execlists; 3373 unsigned int seq; 3374 ktime_t total; 3375 3376 do { 3377 seq = read_seqcount_begin(&stats->lock); 3378 total = __execlists_engine_busyness(engine, now); 3379 } while (read_seqcount_retry(&stats->lock, seq)); 3380 3381 return total; 3382} 3383 3384static void 3385logical_ring_default_vfuncs(struct intel_engine_cs *engine) 3386{ 3387 /* Default vfuncs which can be overridden by each engine. */ 3388 3389 engine->resume = execlists_resume; 3390 3391 engine->cops = &execlists_context_ops; 3392 engine->request_alloc = execlists_request_alloc; 3393 engine->add_active_request = add_to_engine; 3394 engine->remove_active_request = remove_from_engine; 3395 3396 engine->reset.prepare = execlists_reset_prepare; 3397 engine->reset.rewind = execlists_reset_rewind; 3398 engine->reset.cancel = execlists_reset_cancel; 3399 engine->reset.finish = execlists_reset_finish; 3400 3401 engine->park = execlists_park; 3402 engine->unpark = NULL; 3403 3404 engine->emit_flush = gen8_emit_flush_xcs; 3405 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3406 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3407 if (GRAPHICS_VER(engine->i915) >= 12) { 3408 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3409 engine->emit_flush = gen12_emit_flush_xcs; 3410 } 3411 engine->set_default_submission = execlists_set_default_submission; 3412 3413 if (GRAPHICS_VER(engine->i915) < 11) { 3414 engine->irq_enable = gen8_logical_ring_enable_irq; 3415 engine->irq_disable = gen8_logical_ring_disable_irq; 3416 } else { 3417 /* 3418 * TODO: On Gen11 interrupt masks need to be clear 3419 * to allow C6 entry. Keep interrupts enabled at 3420 * and take the hit of generating extra interrupts 3421 * until a more refined solution exists. 3422 */ 3423 } 3424 intel_engine_set_irq_handler(engine, execlists_irq_handler); 3425 3426 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 3427 if (!intel_vgpu_active(engine->i915)) { 3428 engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3429 if (can_preempt(engine)) { 3430 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3431 if (CONFIG_DRM_I915_TIMESLICE_DURATION) 3432 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3433 } 3434 } 3435 3436 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { 3437 if (intel_engine_has_preemption(engine)) 3438 engine->emit_bb_start = gen125_emit_bb_start; 3439 else 3440 engine->emit_bb_start = gen125_emit_bb_start_noarb; 3441 } else { 3442 if (intel_engine_has_preemption(engine)) 3443 engine->emit_bb_start = gen8_emit_bb_start; 3444 else 3445 engine->emit_bb_start = gen8_emit_bb_start_noarb; 3446 } 3447 3448 engine->busyness = execlists_engine_busyness; 3449} 3450 3451static void logical_ring_default_irqs(struct intel_engine_cs *engine) 3452{ 3453 unsigned int shift = 0; 3454 3455 if (GRAPHICS_VER(engine->i915) < 11) { 3456 const u8 irq_shifts[] = { 3457 [RCS0] = GEN8_RCS_IRQ_SHIFT, 3458 [BCS0] = GEN8_BCS_IRQ_SHIFT, 3459 [VCS0] = GEN8_VCS0_IRQ_SHIFT, 3460 [VCS1] = GEN8_VCS1_IRQ_SHIFT, 3461 [VECS0] = GEN8_VECS_IRQ_SHIFT, 3462 }; 3463 3464 shift = irq_shifts[engine->id]; 3465 } 3466 3467 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; 3468 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; 3469 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; 3470 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; 3471} 3472 3473static void rcs_submission_override(struct intel_engine_cs *engine) 3474{ 3475 switch (GRAPHICS_VER(engine->i915)) { 3476 case 12: 3477 engine->emit_flush = gen12_emit_flush_rcs; 3478 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3479 break; 3480 case 11: 3481 engine->emit_flush = gen11_emit_flush_rcs; 3482 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3483 break; 3484 default: 3485 engine->emit_flush = gen8_emit_flush_rcs; 3486 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3487 break; 3488 } 3489} 3490 3491int intel_execlists_submission_setup(struct intel_engine_cs *engine) 3492{ 3493 struct intel_engine_execlists * const execlists = &engine->execlists; 3494 struct drm_i915_private *i915 = engine->i915; 3495 struct intel_uncore *uncore = engine->uncore; 3496 u32 base = engine->mmio_base; 3497 3498 tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet); 3499 timer_setup(&engine->execlists.timer, execlists_timeslice, 0); 3500 timer_setup(&engine->execlists.preempt, execlists_preempt, 0); 3501 3502 logical_ring_default_vfuncs(engine); 3503 logical_ring_default_irqs(engine); 3504 3505 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 3506 rcs_submission_override(engine); 3507 3508 lrc_init_wa_ctx(engine); 3509 3510 if (HAS_LOGICAL_RING_ELSQ(i915)) { 3511 execlists->submit_reg = uncore->regs + 3512 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); 3513 execlists->ctrl_reg = uncore->regs + 3514 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); 3515 3516 engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore, 3517 RING_EXECLIST_CONTROL(engine->mmio_base), 3518 FW_REG_WRITE); 3519 } else { 3520 execlists->submit_reg = uncore->regs + 3521 i915_mmio_reg_offset(RING_ELSP(base)); 3522 } 3523 3524 execlists->csb_status = 3525 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 3526 3527 execlists->csb_write = 3528 &engine->status_page.addr[INTEL_HWS_CSB_WRITE_INDEX(i915)]; 3529 3530 if (GRAPHICS_VER(i915) < 11) 3531 execlists->csb_size = GEN8_CSB_ENTRIES; 3532 else 3533 execlists->csb_size = GEN11_CSB_ENTRIES; 3534 3535 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); 3536 if (GRAPHICS_VER(engine->i915) >= 11 && 3537 GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) { 3538 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); 3539 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); 3540 } 3541 3542 /* Finally, take ownership and responsibility for cleanup! */ 3543 engine->sanitize = execlists_sanitize; 3544 engine->release = execlists_release; 3545 3546 return 0; 3547} 3548 3549static struct list_head *virtual_queue(struct virtual_engine *ve) 3550{ 3551 return &ve->base.sched_engine->default_priolist.requests; 3552} 3553 3554static void rcu_virtual_context_destroy(struct work_struct *wrk) 3555{ 3556 struct virtual_engine *ve = 3557 container_of(wrk, typeof(*ve), rcu.work); 3558 unsigned int n; 3559 3560 GEM_BUG_ON(ve->context.inflight); 3561 3562 /* Preempt-to-busy may leave a stale request behind. */ 3563 if (unlikely(ve->request)) { 3564 struct i915_request *old; 3565 3566 spin_lock_irq(&ve->base.sched_engine->lock); 3567 3568 old = fetch_and_zero(&ve->request); 3569 if (old) { 3570 GEM_BUG_ON(!__i915_request_is_complete(old)); 3571 __i915_request_submit(old); 3572 i915_request_put(old); 3573 } 3574 3575 spin_unlock_irq(&ve->base.sched_engine->lock); 3576 } 3577 3578 /* 3579 * Flush the tasklet in case it is still running on another core. 3580 * 3581 * This needs to be done before we remove ourselves from the siblings' 3582 * rbtrees as in the case it is running in parallel, it may reinsert 3583 * the rb_node into a sibling. 3584 */ 3585 tasklet_kill(&ve->base.sched_engine->tasklet); 3586 3587 /* Decouple ourselves from the siblings, no more access allowed. */ 3588 for (n = 0; n < ve->num_siblings; n++) { 3589 struct intel_engine_cs *sibling = ve->siblings[n]; 3590 struct rb_node *node = &ve->nodes[sibling->id].rb; 3591 3592 if (RB_EMPTY_NODE(node)) 3593 continue; 3594 3595 spin_lock_irq(&sibling->sched_engine->lock); 3596 3597 /* Detachment is lazily performed in the sched_engine->tasklet */ 3598 if (!RB_EMPTY_NODE(node)) 3599 rb_erase_cached(node, &sibling->execlists.virtual); 3600 3601 spin_unlock_irq(&sibling->sched_engine->lock); 3602 } 3603 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet)); 3604 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3605 3606 lrc_fini(&ve->context); 3607 intel_context_fini(&ve->context); 3608 3609 if (ve->base.breadcrumbs) 3610 intel_breadcrumbs_put(ve->base.breadcrumbs); 3611 if (ve->base.sched_engine) 3612 i915_sched_engine_put(ve->base.sched_engine); 3613 intel_engine_free_request_pool(&ve->base); 3614 3615 kfree(ve); 3616} 3617 3618static void virtual_context_destroy(struct kref *kref) 3619{ 3620 struct virtual_engine *ve = 3621 container_of(kref, typeof(*ve), context.ref); 3622 3623 GEM_BUG_ON(!list_empty(&ve->context.signals)); 3624 3625 /* 3626 * When destroying the virtual engine, we have to be aware that 3627 * it may still be in use from an hardirq/softirq context causing 3628 * the resubmission of a completed request (background completion 3629 * due to preempt-to-busy). Before we can free the engine, we need 3630 * to flush the submission code and tasklets that are still potentially 3631 * accessing the engine. Flushing the tasklets requires process context, 3632 * and since we can guard the resubmit onto the engine with an RCU read 3633 * lock, we can delegate the free of the engine to an RCU worker. 3634 */ 3635 INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); 3636 queue_rcu_work(system_wq, &ve->rcu); 3637} 3638 3639static void virtual_engine_initial_hint(struct virtual_engine *ve) 3640{ 3641 int swp; 3642 3643 /* 3644 * Pick a random sibling on starting to help spread the load around. 3645 * 3646 * New contexts are typically created with exactly the same order 3647 * of siblings, and often started in batches. Due to the way we iterate 3648 * the array of sibling when submitting requests, sibling[0] is 3649 * prioritised for dequeuing. If we make sure that sibling[0] is fairly 3650 * randomised across the system, we also help spread the load by the 3651 * first engine we inspect being different each time. 3652 * 3653 * NB This does not force us to execute on this engine, it will just 3654 * typically be the first we inspect for submission. 3655 */ 3656 swp = prandom_u32_max(ve->num_siblings); 3657 if (swp) 3658 swap(ve->siblings[swp], ve->siblings[0]); 3659} 3660 3661static int virtual_context_alloc(struct intel_context *ce) 3662{ 3663 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3664 3665 return lrc_alloc(ce, ve->siblings[0]); 3666} 3667 3668static int virtual_context_pre_pin(struct intel_context *ce, 3669 struct i915_gem_ww_ctx *ww, 3670 void **vaddr) 3671{ 3672 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3673 3674 /* Note: we must use a real engine class for setting up reg state */ 3675 return __execlists_context_pre_pin(ce, ve->siblings[0], ww, vaddr); 3676} 3677 3678static int virtual_context_pin(struct intel_context *ce, void *vaddr) 3679{ 3680 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3681 3682 return lrc_pin(ce, ve->siblings[0], vaddr); 3683} 3684 3685static void virtual_context_enter(struct intel_context *ce) 3686{ 3687 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3688 unsigned int n; 3689 3690 for (n = 0; n < ve->num_siblings; n++) 3691 intel_engine_pm_get(ve->siblings[n]); 3692 3693 intel_timeline_enter(ce->timeline); 3694} 3695 3696static void virtual_context_exit(struct intel_context *ce) 3697{ 3698 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3699 unsigned int n; 3700 3701 intel_timeline_exit(ce->timeline); 3702 3703 for (n = 0; n < ve->num_siblings; n++) 3704 intel_engine_pm_put(ve->siblings[n]); 3705} 3706 3707static struct intel_engine_cs * 3708virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) 3709{ 3710 struct virtual_engine *ve = to_virtual_engine(engine); 3711 3712 if (sibling >= ve->num_siblings) 3713 return NULL; 3714 3715 return ve->siblings[sibling]; 3716} 3717 3718static const struct intel_context_ops virtual_context_ops = { 3719 .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES, 3720 3721 .alloc = virtual_context_alloc, 3722 3723 .cancel_request = execlists_context_cancel_request, 3724 3725 .pre_pin = virtual_context_pre_pin, 3726 .pin = virtual_context_pin, 3727 .unpin = lrc_unpin, 3728 .post_unpin = lrc_post_unpin, 3729 3730 .enter = virtual_context_enter, 3731 .exit = virtual_context_exit, 3732 3733 .destroy = virtual_context_destroy, 3734 3735 .get_sibling = virtual_get_sibling, 3736}; 3737 3738static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) 3739{ 3740 struct i915_request *rq; 3741 intel_engine_mask_t mask; 3742 3743 rq = READ_ONCE(ve->request); 3744 if (!rq) 3745 return 0; 3746 3747 /* The rq is ready for submission; rq->execution_mask is now stable. */ 3748 mask = rq->execution_mask; 3749 if (unlikely(!mask)) { 3750 /* Invalid selection, submit to a random engine in error */ 3751 i915_request_set_error_once(rq, -ENODEV); 3752 mask = ve->siblings[0]->mask; 3753 } 3754 3755 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", 3756 rq->fence.context, rq->fence.seqno, 3757 mask, ve->base.sched_engine->queue_priority_hint); 3758 3759 return mask; 3760} 3761 3762static void virtual_submission_tasklet(struct tasklet_struct *t) 3763{ 3764 struct i915_sched_engine *sched_engine = 3765 from_tasklet(sched_engine, t, tasklet); 3766 struct virtual_engine * const ve = 3767 (struct virtual_engine *)sched_engine->private_data; 3768 const int prio = READ_ONCE(sched_engine->queue_priority_hint); 3769 intel_engine_mask_t mask; 3770 unsigned int n; 3771 3772 rcu_read_lock(); 3773 mask = virtual_submission_mask(ve); 3774 rcu_read_unlock(); 3775 if (unlikely(!mask)) 3776 return; 3777 3778 for (n = 0; n < ve->num_siblings; n++) { 3779 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); 3780 struct ve_node * const node = &ve->nodes[sibling->id]; 3781 struct rb_node **parent, *rb; 3782 bool first; 3783 3784 if (!READ_ONCE(ve->request)) 3785 break; /* already handled by a sibling's tasklet */ 3786 3787 spin_lock_irq(&sibling->sched_engine->lock); 3788 3789 if (unlikely(!(mask & sibling->mask))) { 3790 if (!RB_EMPTY_NODE(&node->rb)) { 3791 rb_erase_cached(&node->rb, 3792 &sibling->execlists.virtual); 3793 RB_CLEAR_NODE(&node->rb); 3794 } 3795 3796 goto unlock_engine; 3797 } 3798 3799 if (unlikely(!RB_EMPTY_NODE(&node->rb))) { 3800 /* 3801 * Cheat and avoid rebalancing the tree if we can 3802 * reuse this node in situ. 3803 */ 3804 first = rb_first_cached(&sibling->execlists.virtual) == 3805 &node->rb; 3806 if (prio == node->prio || (prio > node->prio && first)) 3807 goto submit_engine; 3808 3809 rb_erase_cached(&node->rb, &sibling->execlists.virtual); 3810 } 3811 3812 rb = NULL; 3813 first = true; 3814 parent = &sibling->execlists.virtual.rb_root.rb_node; 3815 while (*parent) { 3816 struct ve_node *other; 3817 3818 rb = *parent; 3819 other = rb_entry(rb, typeof(*other), rb); 3820 if (prio > other->prio) { 3821 parent = &rb->rb_left; 3822 } else { 3823 parent = &rb->rb_right; 3824 first = false; 3825 } 3826 } 3827 3828 rb_link_node(&node->rb, rb, parent); 3829 rb_insert_color_cached(&node->rb, 3830 &sibling->execlists.virtual, 3831 first); 3832 3833submit_engine: 3834 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); 3835 node->prio = prio; 3836 if (first && prio > sibling->sched_engine->queue_priority_hint) 3837 tasklet_hi_schedule(&sibling->sched_engine->tasklet); 3838 3839unlock_engine: 3840 spin_unlock_irq(&sibling->sched_engine->lock); 3841 3842 if (intel_context_inflight(&ve->context)) 3843 break; 3844 } 3845} 3846 3847static void virtual_submit_request(struct i915_request *rq) 3848{ 3849 struct virtual_engine *ve = to_virtual_engine(rq->engine); 3850 unsigned long flags; 3851 3852 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", 3853 rq->fence.context, 3854 rq->fence.seqno); 3855 3856 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); 3857 3858 spin_lock_irqsave(&ve->base.sched_engine->lock, flags); 3859 3860 /* By the time we resubmit a request, it may be completed */ 3861 if (__i915_request_is_complete(rq)) { 3862 __i915_request_submit(rq); 3863 goto unlock; 3864 } 3865 3866 if (ve->request) { /* background completion from preempt-to-busy */ 3867 GEM_BUG_ON(!__i915_request_is_complete(ve->request)); 3868 __i915_request_submit(ve->request); 3869 i915_request_put(ve->request); 3870 } 3871 3872 ve->base.sched_engine->queue_priority_hint = rq_prio(rq); 3873 ve->request = i915_request_get(rq); 3874 3875 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3876 list_move_tail(&rq->sched.link, virtual_queue(ve)); 3877 3878 tasklet_hi_schedule(&ve->base.sched_engine->tasklet); 3879 3880unlock: 3881 spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); 3882} 3883 3884static struct intel_context * 3885execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 3886 unsigned long flags) 3887{ 3888 struct virtual_engine *ve; 3889 unsigned int n; 3890 int err; 3891 3892 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); 3893 if (!ve) 3894 return ERR_PTR(-ENOMEM); 3895 3896 ve->base.i915 = siblings[0]->i915; 3897 ve->base.gt = siblings[0]->gt; 3898 ve->base.uncore = siblings[0]->uncore; 3899 ve->base.id = -1; 3900 3901 ve->base.class = OTHER_CLASS; 3902 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 3903 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3904 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3905 3906 /* 3907 * The decision on whether to submit a request using semaphores 3908 * depends on the saturated state of the engine. We only compute 3909 * this during HW submission of the request, and we need for this 3910 * state to be globally applied to all requests being submitted 3911 * to this engine. Virtual engines encompass more than one physical 3912 * engine and so we cannot accurately tell in advance if one of those 3913 * engines is already saturated and so cannot afford to use a semaphore 3914 * and be pessimized in priority for doing so -- if we are the only 3915 * context using semaphores after all other clients have stopped, we 3916 * will be starved on the saturated system. Such a global switch for 3917 * semaphores is less than ideal, but alas is the current compromise. 3918 */ 3919 ve->base.saturated = ALL_ENGINES; 3920 3921 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 3922 3923 intel_engine_init_execlists(&ve->base); 3924 3925 ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3926 if (!ve->base.sched_engine) { 3927 err = -ENOMEM; 3928 goto err_put; 3929 } 3930 ve->base.sched_engine->private_data = &ve->base; 3931 3932 ve->base.cops = &virtual_context_ops; 3933 ve->base.request_alloc = execlists_request_alloc; 3934 3935 ve->base.sched_engine->schedule = i915_schedule; 3936 ve->base.sched_engine->kick_backend = kick_execlists; 3937 ve->base.submit_request = virtual_submit_request; 3938 3939 INIT_LIST_HEAD(virtual_queue(ve)); 3940 tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); 3941 3942 intel_context_init(&ve->context, &ve->base); 3943 3944 ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); 3945 if (!ve->base.breadcrumbs) { 3946 err = -ENOMEM; 3947 goto err_put; 3948 } 3949 3950 for (n = 0; n < count; n++) { 3951 struct intel_engine_cs *sibling = siblings[n]; 3952 3953 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 3954 if (sibling->mask & ve->base.mask) { 3955 DRM_DEBUG("duplicate %s entry in load balancer\n", 3956 sibling->name); 3957 err = -EINVAL; 3958 goto err_put; 3959 } 3960 3961 /* 3962 * The virtual engine implementation is tightly coupled to 3963 * the execlists backend -- we push out request directly 3964 * into a tree inside each physical engine. We could support 3965 * layering if we handle cloning of the requests and 3966 * submitting a copy into each backend. 3967 */ 3968 if (sibling->sched_engine->tasklet.callback != 3969 execlists_submission_tasklet) { 3970 err = -ENODEV; 3971 goto err_put; 3972 } 3973 3974 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); 3975 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); 3976 3977 ve->siblings[ve->num_siblings++] = sibling; 3978 ve->base.mask |= sibling->mask; 3979 ve->base.logical_mask |= sibling->logical_mask; 3980 3981 /* 3982 * All physical engines must be compatible for their emission 3983 * functions (as we build the instructions during request 3984 * construction and do not alter them before submission 3985 * on the physical engine). We use the engine class as a guide 3986 * here, although that could be refined. 3987 */ 3988 if (ve->base.class != OTHER_CLASS) { 3989 if (ve->base.class != sibling->class) { 3990 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 3991 sibling->class, ve->base.class); 3992 err = -EINVAL; 3993 goto err_put; 3994 } 3995 continue; 3996 } 3997 3998 ve->base.class = sibling->class; 3999 ve->base.uabi_class = sibling->uabi_class; 4000 snprintf(ve->base.name, sizeof(ve->base.name), 4001 "v%dx%d", ve->base.class, count); 4002 ve->base.context_size = sibling->context_size; 4003 4004 ve->base.add_active_request = sibling->add_active_request; 4005 ve->base.remove_active_request = sibling->remove_active_request; 4006 ve->base.emit_bb_start = sibling->emit_bb_start; 4007 ve->base.emit_flush = sibling->emit_flush; 4008 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; 4009 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; 4010 ve->base.emit_fini_breadcrumb_dw = 4011 sibling->emit_fini_breadcrumb_dw; 4012 4013 ve->base.flags = sibling->flags; 4014 } 4015 4016 ve->base.flags |= I915_ENGINE_IS_VIRTUAL; 4017 4018 virtual_engine_initial_hint(ve); 4019 return &ve->context; 4020 4021err_put: 4022 intel_context_put(&ve->context); 4023 return ERR_PTR(err); 4024} 4025 4026void intel_execlists_show_requests(struct intel_engine_cs *engine, 4027 struct drm_printer *m, 4028 void (*show_request)(struct drm_printer *m, 4029 const struct i915_request *rq, 4030 const char *prefix, 4031 int indent), 4032 unsigned int max) 4033{ 4034 const struct intel_engine_execlists *execlists = &engine->execlists; 4035 struct i915_sched_engine *sched_engine = engine->sched_engine; 4036 struct i915_request *rq, *last; 4037 unsigned long flags; 4038 unsigned int count; 4039 struct rb_node *rb; 4040 4041 spin_lock_irqsave(&sched_engine->lock, flags); 4042 4043 last = NULL; 4044 count = 0; 4045 list_for_each_entry(rq, &sched_engine->requests, sched.link) { 4046 if (count++ < max - 1) 4047 show_request(m, rq, "\t\t", 0); 4048 else 4049 last = rq; 4050 } 4051 if (last) { 4052 if (count > max) { 4053 drm_printf(m, 4054 "\t\t...skipping %d executing requests...\n", 4055 count - max); 4056 } 4057 show_request(m, last, "\t\t", 0); 4058 } 4059 4060 if (sched_engine->queue_priority_hint != INT_MIN) 4061 drm_printf(m, "\t\tQueue priority hint: %d\n", 4062 READ_ONCE(sched_engine->queue_priority_hint)); 4063 4064 last = NULL; 4065 count = 0; 4066 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4067 struct i915_priolist *p = rb_entry(rb, typeof(*p), node); 4068 4069 priolist_for_each_request(rq, p) { 4070 if (count++ < max - 1) 4071 show_request(m, rq, "\t\t", 0); 4072 else 4073 last = rq; 4074 } 4075 } 4076 if (last) { 4077 if (count > max) { 4078 drm_printf(m, 4079 "\t\t...skipping %d queued requests...\n", 4080 count - max); 4081 } 4082 show_request(m, last, "\t\t", 0); 4083 } 4084 4085 last = NULL; 4086 count = 0; 4087 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { 4088 struct virtual_engine *ve = 4089 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 4090 struct i915_request *rq = READ_ONCE(ve->request); 4091 4092 if (rq) { 4093 if (count++ < max - 1) 4094 show_request(m, rq, "\t\t", 0); 4095 else 4096 last = rq; 4097 } 4098 } 4099 if (last) { 4100 if (count > max) { 4101 drm_printf(m, 4102 "\t\t...skipping %d virtual requests...\n", 4103 count - max); 4104 } 4105 show_request(m, last, "\t\t", 0); 4106 } 4107 4108 spin_unlock_irqrestore(&sched_engine->lock, flags); 4109} 4110 4111#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4112#include "selftest_execlists.c" 4113#endif