cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_guc_submission.c (139299B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2014 Intel Corporation
      4 */
      5
      6#include <linux/circ_buf.h>
      7
      8#include "gem/i915_gem_context.h"
      9#include "gt/gen8_engine_cs.h"
     10#include "gt/intel_breadcrumbs.h"
     11#include "gt/intel_context.h"
     12#include "gt/intel_engine_heartbeat.h"
     13#include "gt/intel_engine_pm.h"
     14#include "gt/intel_engine_regs.h"
     15#include "gt/intel_gpu_commands.h"
     16#include "gt/intel_gt.h"
     17#include "gt/intel_gt_clock_utils.h"
     18#include "gt/intel_gt_irq.h"
     19#include "gt/intel_gt_pm.h"
     20#include "gt/intel_gt_regs.h"
     21#include "gt/intel_gt_requests.h"
     22#include "gt/intel_lrc.h"
     23#include "gt/intel_lrc_reg.h"
     24#include "gt/intel_mocs.h"
     25#include "gt/intel_ring.h"
     26
     27#include "intel_guc_ads.h"
     28#include "intel_guc_capture.h"
     29#include "intel_guc_submission.h"
     30
     31#include "i915_drv.h"
     32#include "i915_trace.h"
     33
     34/**
     35 * DOC: GuC-based command submission
     36 *
     37 * The Scratch registers:
     38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
     39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
     40 * triggers an interrupt on the GuC via another register write (0xC4C8).
     41 * Firmware writes a success/fail code back to the action register after
     42 * processes the request. The kernel driver polls waiting for this update and
     43 * then proceeds.
     44 *
     45 * Command Transport buffers (CTBs):
     46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
     47 * - G2H) are a message interface between the i915 and GuC.
     48 *
     49 * Context registration:
     50 * Before a context can be submitted it must be registered with the GuC via a
     51 * H2G. A unique guc_id is associated with each context. The context is either
     52 * registered at request creation time (normal operation) or at submission time
     53 * (abnormal operation, e.g. after a reset).
     54 *
     55 * Context submission:
     56 * The i915 updates the LRC tail value in memory. The i915 must enable the
     57 * scheduling of the context within the GuC for the GuC to actually consider it.
     58 * Therefore, the first time a disabled context is submitted we use a schedule
     59 * enable H2G, while follow up submissions are done via the context submit H2G,
     60 * which informs the GuC that a previously enabled context has new work
     61 * available.
     62 *
     63 * Context unpin:
     64 * To unpin a context a H2G is used to disable scheduling. When the
     65 * corresponding G2H returns indicating the scheduling disable operation has
     66 * completed it is safe to unpin the context. While a disable is in flight it
     67 * isn't safe to resubmit the context so a fence is used to stall all future
     68 * requests of that context until the G2H is returned.
     69 *
     70 * Context deregistration:
     71 * Before a context can be destroyed or if we steal its guc_id we must
     72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
     73 * safe to submit anything to this guc_id until the deregister completes so a
     74 * fence is used to stall all requests associated with this guc_id until the
     75 * corresponding G2H returns indicating the guc_id has been deregistered.
     76 *
     77 * submission_state.guc_ids:
     78 * Unique number associated with private GuC context data passed in during
     79 * context registration / submission / deregistration. 64k available. Simple ida
     80 * is used for allocation.
     81 *
     82 * Stealing guc_ids:
     83 * If no guc_ids are available they can be stolen from another context at
     84 * request creation time if that context is unpinned. If a guc_id can't be found
     85 * we punt this problem to the user as we believe this is near impossible to hit
     86 * during normal use cases.
     87 *
     88 * Locking:
     89 * In the GuC submission code we have 3 basic spin locks which protect
     90 * everything. Details about each below.
     91 *
     92 * sched_engine->lock
     93 * This is the submission lock for all contexts that share an i915 schedule
     94 * engine (sched_engine), thus only one of the contexts which share a
     95 * sched_engine can be submitting at a time. Currently only one sched_engine is
     96 * used for all of GuC submission but that could change in the future.
     97 *
     98 * guc->submission_state.lock
     99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
    100 * list.
    101 *
    102 * ce->guc_state.lock
    103 * Protects everything under ce->guc_state. Ensures that a context is in the
    104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
    105 * on a disabled context (bad idea), we don't issue a schedule enable when a
    106 * schedule disable is in flight, etc... Also protects list of inflight requests
    107 * on the context and the priority management state. Lock is individual to each
    108 * context.
    109 *
    110 * Lock ordering rules:
    111 * sched_engine->lock -> ce->guc_state.lock
    112 * guc->submission_state.lock -> ce->guc_state.lock
    113 *
    114 * Reset races:
    115 * When a full GT reset is triggered it is assumed that some G2H responses to
    116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
    117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
    118 * contexts, release guc_ids, etc...). When this occurs we can scrub the
    119 * context state and cleanup appropriately, however this is quite racey.
    120 * To avoid races, the reset code must disable submission before scrubbing for
    121 * the missing G2H, while the submission code must check for submission being
    122 * disabled and skip sending H2Gs and updating context states when it is. Both
    123 * sides must also make sure to hold the relevant locks.
    124 */
    125
    126/* GuC Virtual Engine */
    127struct guc_virtual_engine {
    128	struct intel_engine_cs base;
    129	struct intel_context context;
    130};
    131
    132static struct intel_context *
    133guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
    134		   unsigned long flags);
    135
    136static struct intel_context *
    137guc_create_parallel(struct intel_engine_cs **engines,
    138		    unsigned int num_siblings,
    139		    unsigned int width);
    140
    141#define GUC_REQUEST_SIZE 64 /* bytes */
    142
    143/*
    144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
    145 * per the GuC submission interface. A different allocation algorithm is used
    146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
    147 * partition the guc_id space. We believe the number of multi-lrc contexts in
    148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
    149 * multi-lrc.
    150 */
    151#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
    152	((guc)->submission_state.num_guc_ids / 16)
    153
    154/*
    155 * Below is a set of functions which control the GuC scheduling state which
    156 * require a lock.
    157 */
    158#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER	BIT(0)
    159#define SCHED_STATE_DESTROYED				BIT(1)
    160#define SCHED_STATE_PENDING_DISABLE			BIT(2)
    161#define SCHED_STATE_BANNED				BIT(3)
    162#define SCHED_STATE_ENABLED				BIT(4)
    163#define SCHED_STATE_PENDING_ENABLE			BIT(5)
    164#define SCHED_STATE_REGISTERED				BIT(6)
    165#define SCHED_STATE_POLICY_REQUIRED			BIT(7)
    166#define SCHED_STATE_BLOCKED_SHIFT			8
    167#define SCHED_STATE_BLOCKED		BIT(SCHED_STATE_BLOCKED_SHIFT)
    168#define SCHED_STATE_BLOCKED_MASK	(0xfff << SCHED_STATE_BLOCKED_SHIFT)
    169
    170static inline void init_sched_state(struct intel_context *ce)
    171{
    172	lockdep_assert_held(&ce->guc_state.lock);
    173	ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
    174}
    175
    176__maybe_unused
    177static bool sched_state_is_init(struct intel_context *ce)
    178{
    179	/* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
    180	return !(ce->guc_state.sched_state &
    181		 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
    182}
    183
    184static inline bool
    185context_wait_for_deregister_to_register(struct intel_context *ce)
    186{
    187	return ce->guc_state.sched_state &
    188		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
    189}
    190
    191static inline void
    192set_context_wait_for_deregister_to_register(struct intel_context *ce)
    193{
    194	lockdep_assert_held(&ce->guc_state.lock);
    195	ce->guc_state.sched_state |=
    196		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
    197}
    198
    199static inline void
    200clr_context_wait_for_deregister_to_register(struct intel_context *ce)
    201{
    202	lockdep_assert_held(&ce->guc_state.lock);
    203	ce->guc_state.sched_state &=
    204		~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
    205}
    206
    207static inline bool
    208context_destroyed(struct intel_context *ce)
    209{
    210	return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
    211}
    212
    213static inline void
    214set_context_destroyed(struct intel_context *ce)
    215{
    216	lockdep_assert_held(&ce->guc_state.lock);
    217	ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
    218}
    219
    220static inline bool context_pending_disable(struct intel_context *ce)
    221{
    222	return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
    223}
    224
    225static inline void set_context_pending_disable(struct intel_context *ce)
    226{
    227	lockdep_assert_held(&ce->guc_state.lock);
    228	ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
    229}
    230
    231static inline void clr_context_pending_disable(struct intel_context *ce)
    232{
    233	lockdep_assert_held(&ce->guc_state.lock);
    234	ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
    235}
    236
    237static inline bool context_banned(struct intel_context *ce)
    238{
    239	return ce->guc_state.sched_state & SCHED_STATE_BANNED;
    240}
    241
    242static inline void set_context_banned(struct intel_context *ce)
    243{
    244	lockdep_assert_held(&ce->guc_state.lock);
    245	ce->guc_state.sched_state |= SCHED_STATE_BANNED;
    246}
    247
    248static inline void clr_context_banned(struct intel_context *ce)
    249{
    250	lockdep_assert_held(&ce->guc_state.lock);
    251	ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
    252}
    253
    254static inline bool context_enabled(struct intel_context *ce)
    255{
    256	return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
    257}
    258
    259static inline void set_context_enabled(struct intel_context *ce)
    260{
    261	lockdep_assert_held(&ce->guc_state.lock);
    262	ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
    263}
    264
    265static inline void clr_context_enabled(struct intel_context *ce)
    266{
    267	lockdep_assert_held(&ce->guc_state.lock);
    268	ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
    269}
    270
    271static inline bool context_pending_enable(struct intel_context *ce)
    272{
    273	return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
    274}
    275
    276static inline void set_context_pending_enable(struct intel_context *ce)
    277{
    278	lockdep_assert_held(&ce->guc_state.lock);
    279	ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
    280}
    281
    282static inline void clr_context_pending_enable(struct intel_context *ce)
    283{
    284	lockdep_assert_held(&ce->guc_state.lock);
    285	ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
    286}
    287
    288static inline bool context_registered(struct intel_context *ce)
    289{
    290	return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
    291}
    292
    293static inline void set_context_registered(struct intel_context *ce)
    294{
    295	lockdep_assert_held(&ce->guc_state.lock);
    296	ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
    297}
    298
    299static inline void clr_context_registered(struct intel_context *ce)
    300{
    301	lockdep_assert_held(&ce->guc_state.lock);
    302	ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
    303}
    304
    305static inline bool context_policy_required(struct intel_context *ce)
    306{
    307	return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
    308}
    309
    310static inline void set_context_policy_required(struct intel_context *ce)
    311{
    312	lockdep_assert_held(&ce->guc_state.lock);
    313	ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
    314}
    315
    316static inline void clr_context_policy_required(struct intel_context *ce)
    317{
    318	lockdep_assert_held(&ce->guc_state.lock);
    319	ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
    320}
    321
    322static inline u32 context_blocked(struct intel_context *ce)
    323{
    324	return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
    325		SCHED_STATE_BLOCKED_SHIFT;
    326}
    327
    328static inline void incr_context_blocked(struct intel_context *ce)
    329{
    330	lockdep_assert_held(&ce->guc_state.lock);
    331
    332	ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
    333
    334	GEM_BUG_ON(!context_blocked(ce));	/* Overflow check */
    335}
    336
    337static inline void decr_context_blocked(struct intel_context *ce)
    338{
    339	lockdep_assert_held(&ce->guc_state.lock);
    340
    341	GEM_BUG_ON(!context_blocked(ce));	/* Underflow check */
    342
    343	ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
    344}
    345
    346static inline bool context_has_committed_requests(struct intel_context *ce)
    347{
    348	return !!ce->guc_state.number_committed_requests;
    349}
    350
    351static inline void incr_context_committed_requests(struct intel_context *ce)
    352{
    353	lockdep_assert_held(&ce->guc_state.lock);
    354	++ce->guc_state.number_committed_requests;
    355	GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
    356}
    357
    358static inline void decr_context_committed_requests(struct intel_context *ce)
    359{
    360	lockdep_assert_held(&ce->guc_state.lock);
    361	--ce->guc_state.number_committed_requests;
    362	GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
    363}
    364
    365static struct intel_context *
    366request_to_scheduling_context(struct i915_request *rq)
    367{
    368	return intel_context_to_parent(rq->context);
    369}
    370
    371static inline bool context_guc_id_invalid(struct intel_context *ce)
    372{
    373	return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
    374}
    375
    376static inline void set_context_guc_id_invalid(struct intel_context *ce)
    377{
    378	ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
    379}
    380
    381static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
    382{
    383	return &ce->engine->gt->uc.guc;
    384}
    385
    386static inline struct i915_priolist *to_priolist(struct rb_node *rb)
    387{
    388	return rb_entry(rb, struct i915_priolist, node);
    389}
    390
    391/*
    392 * When using multi-lrc submission a scratch memory area is reserved in the
    393 * parent's context state for the process descriptor, work queue, and handshake
    394 * between the parent + children contexts to insert safe preemption points
    395 * between each of the BBs. Currently the scratch area is sized to a page.
    396 *
    397 * The layout of this scratch area is below:
    398 * 0						guc_process_desc
    399 * + sizeof(struct guc_process_desc)		child go
    400 * + CACHELINE_BYTES				child join[0]
    401 * ...
    402 * + CACHELINE_BYTES				child join[n - 1]
    403 * ...						unused
    404 * PARENT_SCRATCH_SIZE / 2			work queue start
    405 * ...						work queue
    406 * PARENT_SCRATCH_SIZE - 1			work queue end
    407 */
    408#define WQ_SIZE			(PARENT_SCRATCH_SIZE / 2)
    409#define WQ_OFFSET		(PARENT_SCRATCH_SIZE - WQ_SIZE)
    410
    411struct sync_semaphore {
    412	u32 semaphore;
    413	u8 unused[CACHELINE_BYTES - sizeof(u32)];
    414};
    415
    416struct parent_scratch {
    417	struct guc_sched_wq_desc wq_desc;
    418
    419	struct sync_semaphore go;
    420	struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
    421
    422	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
    423		sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
    424
    425	u32 wq[WQ_SIZE / sizeof(u32)];
    426};
    427
    428static u32 __get_parent_scratch_offset(struct intel_context *ce)
    429{
    430	GEM_BUG_ON(!ce->parallel.guc.parent_page);
    431
    432	return ce->parallel.guc.parent_page * PAGE_SIZE;
    433}
    434
    435static u32 __get_wq_offset(struct intel_context *ce)
    436{
    437	BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
    438
    439	return __get_parent_scratch_offset(ce) + WQ_OFFSET;
    440}
    441
    442static struct parent_scratch *
    443__get_parent_scratch(struct intel_context *ce)
    444{
    445	BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
    446	BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
    447
    448	/*
    449	 * Need to subtract LRC_STATE_OFFSET here as the
    450	 * parallel.guc.parent_page is the offset into ce->state while
    451	 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
    452	 */
    453	return (struct parent_scratch *)
    454		(ce->lrc_reg_state +
    455		 ((__get_parent_scratch_offset(ce) -
    456		   LRC_STATE_OFFSET) / sizeof(u32)));
    457}
    458
    459static struct guc_sched_wq_desc *
    460__get_wq_desc(struct intel_context *ce)
    461{
    462	struct parent_scratch *ps = __get_parent_scratch(ce);
    463
    464	return &ps->wq_desc;
    465}
    466
    467static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
    468			   struct intel_context *ce,
    469			   u32 wqi_size)
    470{
    471	/*
    472	 * Check for space in work queue. Caching a value of head pointer in
    473	 * intel_context structure in order reduce the number accesses to shared
    474	 * GPU memory which may be across a PCIe bus.
    475	 */
    476#define AVAILABLE_SPACE	\
    477	CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
    478	if (wqi_size > AVAILABLE_SPACE) {
    479		ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
    480
    481		if (wqi_size > AVAILABLE_SPACE)
    482			return NULL;
    483	}
    484#undef AVAILABLE_SPACE
    485
    486	return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
    487}
    488
    489static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
    490{
    491	struct intel_context *ce = xa_load(&guc->context_lookup, id);
    492
    493	GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
    494
    495	return ce;
    496}
    497
    498static inline bool guc_submission_initialized(struct intel_guc *guc)
    499{
    500	return guc->submission_initialized;
    501}
    502
    503static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
    504{
    505	return __get_context(guc, id);
    506}
    507
    508static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
    509				      struct intel_context *ce)
    510{
    511	unsigned long flags;
    512
    513	/*
    514	 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
    515	 * lower level functions directly.
    516	 */
    517	xa_lock_irqsave(&guc->context_lookup, flags);
    518	__xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
    519	xa_unlock_irqrestore(&guc->context_lookup, flags);
    520}
    521
    522static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
    523{
    524	unsigned long flags;
    525
    526	if (unlikely(!guc_submission_initialized(guc)))
    527		return;
    528
    529	/*
    530	 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
    531	 * the lower level functions directly.
    532	 */
    533	xa_lock_irqsave(&guc->context_lookup, flags);
    534	__xa_erase(&guc->context_lookup, id);
    535	xa_unlock_irqrestore(&guc->context_lookup, flags);
    536}
    537
    538static void decr_outstanding_submission_g2h(struct intel_guc *guc)
    539{
    540	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
    541		wake_up_all(&guc->ct.wq);
    542}
    543
    544static int guc_submission_send_busy_loop(struct intel_guc *guc,
    545					 const u32 *action,
    546					 u32 len,
    547					 u32 g2h_len_dw,
    548					 bool loop)
    549{
    550	/*
    551	 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
    552	 * so we don't handle the case where we don't get a reply because we
    553	 * aborted the send due to the channel being busy.
    554	 */
    555	GEM_BUG_ON(g2h_len_dw && !loop);
    556
    557	if (g2h_len_dw)
    558		atomic_inc(&guc->outstanding_submission_g2h);
    559
    560	return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
    561}
    562
    563int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
    564				   atomic_t *wait_var,
    565				   bool interruptible,
    566				   long timeout)
    567{
    568	const int state = interruptible ?
    569		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
    570	DEFINE_WAIT(wait);
    571
    572	might_sleep();
    573	GEM_BUG_ON(timeout < 0);
    574
    575	if (!atomic_read(wait_var))
    576		return 0;
    577
    578	if (!timeout)
    579		return -ETIME;
    580
    581	for (;;) {
    582		prepare_to_wait(&guc->ct.wq, &wait, state);
    583
    584		if (!atomic_read(wait_var))
    585			break;
    586
    587		if (signal_pending_state(state, current)) {
    588			timeout = -EINTR;
    589			break;
    590		}
    591
    592		if (!timeout) {
    593			timeout = -ETIME;
    594			break;
    595		}
    596
    597		timeout = io_schedule_timeout(timeout);
    598	}
    599	finish_wait(&guc->ct.wq, &wait);
    600
    601	return (timeout < 0) ? timeout : 0;
    602}
    603
    604int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
    605{
    606	if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
    607		return 0;
    608
    609	return intel_guc_wait_for_pending_msg(guc,
    610					      &guc->outstanding_submission_g2h,
    611					      true, timeout);
    612}
    613
    614static int guc_context_policy_init(struct intel_context *ce, bool loop);
    615static int try_context_registration(struct intel_context *ce, bool loop);
    616
    617static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
    618{
    619	int err = 0;
    620	struct intel_context *ce = request_to_scheduling_context(rq);
    621	u32 action[3];
    622	int len = 0;
    623	u32 g2h_len_dw = 0;
    624	bool enabled;
    625
    626	lockdep_assert_held(&rq->engine->sched_engine->lock);
    627
    628	/*
    629	 * Corner case where requests were sitting in the priority list or a
    630	 * request resubmitted after the context was banned.
    631	 */
    632	if (unlikely(intel_context_is_banned(ce))) {
    633		i915_request_put(i915_request_mark_eio(rq));
    634		intel_engine_signal_breadcrumbs(ce->engine);
    635		return 0;
    636	}
    637
    638	GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
    639	GEM_BUG_ON(context_guc_id_invalid(ce));
    640
    641	if (context_policy_required(ce)) {
    642		err = guc_context_policy_init(ce, false);
    643		if (err)
    644			return err;
    645	}
    646
    647	spin_lock(&ce->guc_state.lock);
    648
    649	/*
    650	 * The request / context will be run on the hardware when scheduling
    651	 * gets enabled in the unblock. For multi-lrc we still submit the
    652	 * context to move the LRC tails.
    653	 */
    654	if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
    655		goto out;
    656
    657	enabled = context_enabled(ce) || context_blocked(ce);
    658
    659	if (!enabled) {
    660		action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
    661		action[len++] = ce->guc_id.id;
    662		action[len++] = GUC_CONTEXT_ENABLE;
    663		set_context_pending_enable(ce);
    664		intel_context_get(ce);
    665		g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
    666	} else {
    667		action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
    668		action[len++] = ce->guc_id.id;
    669	}
    670
    671	err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
    672	if (!enabled && !err) {
    673		trace_intel_context_sched_enable(ce);
    674		atomic_inc(&guc->outstanding_submission_g2h);
    675		set_context_enabled(ce);
    676
    677		/*
    678		 * Without multi-lrc KMD does the submission step (moving the
    679		 * lrc tail) so enabling scheduling is sufficient to submit the
    680		 * context. This isn't the case in multi-lrc submission as the
    681		 * GuC needs to move the tails, hence the need for another H2G
    682		 * to submit a multi-lrc context after enabling scheduling.
    683		 */
    684		if (intel_context_is_parent(ce)) {
    685			action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
    686			err = intel_guc_send_nb(guc, action, len - 1, 0);
    687		}
    688	} else if (!enabled) {
    689		clr_context_pending_enable(ce);
    690		intel_context_put(ce);
    691	}
    692	if (likely(!err))
    693		trace_i915_request_guc_submit(rq);
    694
    695out:
    696	spin_unlock(&ce->guc_state.lock);
    697	return err;
    698}
    699
    700static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
    701{
    702	int ret = __guc_add_request(guc, rq);
    703
    704	if (unlikely(ret == -EBUSY)) {
    705		guc->stalled_request = rq;
    706		guc->submission_stall_reason = STALL_ADD_REQUEST;
    707	}
    708
    709	return ret;
    710}
    711
    712static inline void guc_set_lrc_tail(struct i915_request *rq)
    713{
    714	rq->context->lrc_reg_state[CTX_RING_TAIL] =
    715		intel_ring_set_tail(rq->ring, rq->tail);
    716}
    717
    718static inline int rq_prio(const struct i915_request *rq)
    719{
    720	return rq->sched.attr.priority;
    721}
    722
    723static bool is_multi_lrc_rq(struct i915_request *rq)
    724{
    725	return intel_context_is_parallel(rq->context);
    726}
    727
    728static bool can_merge_rq(struct i915_request *rq,
    729			 struct i915_request *last)
    730{
    731	return request_to_scheduling_context(rq) ==
    732		request_to_scheduling_context(last);
    733}
    734
    735static u32 wq_space_until_wrap(struct intel_context *ce)
    736{
    737	return (WQ_SIZE - ce->parallel.guc.wqi_tail);
    738}
    739
    740static void write_wqi(struct guc_sched_wq_desc *wq_desc,
    741		      struct intel_context *ce,
    742		      u32 wqi_size)
    743{
    744	BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
    745
    746	/*
    747	 * Ensure WQI are visible before updating tail
    748	 */
    749	intel_guc_write_barrier(ce_to_guc(ce));
    750
    751	ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
    752		(WQ_SIZE - 1);
    753	WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
    754}
    755
    756static int guc_wq_noop_append(struct intel_context *ce)
    757{
    758	struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
    759	u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
    760	u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
    761
    762	if (!wqi)
    763		return -EBUSY;
    764
    765	GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
    766
    767	*wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
    768		FIELD_PREP(WQ_LEN_MASK, len_dw);
    769	ce->parallel.guc.wqi_tail = 0;
    770
    771	return 0;
    772}
    773
    774static int __guc_wq_item_append(struct i915_request *rq)
    775{
    776	struct intel_context *ce = request_to_scheduling_context(rq);
    777	struct intel_context *child;
    778	struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
    779	unsigned int wqi_size = (ce->parallel.number_children + 4) *
    780		sizeof(u32);
    781	u32 *wqi;
    782	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
    783	int ret;
    784
    785	/* Ensure context is in correct state updating work queue */
    786	GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
    787	GEM_BUG_ON(context_guc_id_invalid(ce));
    788	GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
    789	GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
    790
    791	/* Insert NOOP if this work queue item will wrap the tail pointer. */
    792	if (wqi_size > wq_space_until_wrap(ce)) {
    793		ret = guc_wq_noop_append(ce);
    794		if (ret)
    795			return ret;
    796	}
    797
    798	wqi = get_wq_pointer(wq_desc, ce, wqi_size);
    799	if (!wqi)
    800		return -EBUSY;
    801
    802	GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
    803
    804	*wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
    805		FIELD_PREP(WQ_LEN_MASK, len_dw);
    806	*wqi++ = ce->lrc.lrca;
    807	*wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
    808	       FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
    809	*wqi++ = 0;	/* fence_id */
    810	for_each_child(ce, child)
    811		*wqi++ = child->ring->tail / sizeof(u64);
    812
    813	write_wqi(wq_desc, ce, wqi_size);
    814
    815	return 0;
    816}
    817
    818static int guc_wq_item_append(struct intel_guc *guc,
    819			      struct i915_request *rq)
    820{
    821	struct intel_context *ce = request_to_scheduling_context(rq);
    822	int ret = 0;
    823
    824	if (likely(!intel_context_is_banned(ce))) {
    825		ret = __guc_wq_item_append(rq);
    826
    827		if (unlikely(ret == -EBUSY)) {
    828			guc->stalled_request = rq;
    829			guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
    830		}
    831	}
    832
    833	return ret;
    834}
    835
    836static bool multi_lrc_submit(struct i915_request *rq)
    837{
    838	struct intel_context *ce = request_to_scheduling_context(rq);
    839
    840	intel_ring_set_tail(rq->ring, rq->tail);
    841
    842	/*
    843	 * We expect the front end (execbuf IOCTL) to set this flag on the last
    844	 * request generated from a multi-BB submission. This indicates to the
    845	 * backend (GuC interface) that we should submit this context thus
    846	 * submitting all the requests generated in parallel.
    847	 */
    848	return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
    849		intel_context_is_banned(ce);
    850}
    851
    852static int guc_dequeue_one_context(struct intel_guc *guc)
    853{
    854	struct i915_sched_engine * const sched_engine = guc->sched_engine;
    855	struct i915_request *last = NULL;
    856	bool submit = false;
    857	struct rb_node *rb;
    858	int ret;
    859
    860	lockdep_assert_held(&sched_engine->lock);
    861
    862	if (guc->stalled_request) {
    863		submit = true;
    864		last = guc->stalled_request;
    865
    866		switch (guc->submission_stall_reason) {
    867		case STALL_REGISTER_CONTEXT:
    868			goto register_context;
    869		case STALL_MOVE_LRC_TAIL:
    870			goto move_lrc_tail;
    871		case STALL_ADD_REQUEST:
    872			goto add_request;
    873		default:
    874			MISSING_CASE(guc->submission_stall_reason);
    875		}
    876	}
    877
    878	while ((rb = rb_first_cached(&sched_engine->queue))) {
    879		struct i915_priolist *p = to_priolist(rb);
    880		struct i915_request *rq, *rn;
    881
    882		priolist_for_each_request_consume(rq, rn, p) {
    883			if (last && !can_merge_rq(rq, last))
    884				goto register_context;
    885
    886			list_del_init(&rq->sched.link);
    887
    888			__i915_request_submit(rq);
    889
    890			trace_i915_request_in(rq, 0);
    891			last = rq;
    892
    893			if (is_multi_lrc_rq(rq)) {
    894				/*
    895				 * We need to coalesce all multi-lrc requests in
    896				 * a relationship into a single H2G. We are
    897				 * guaranteed that all of these requests will be
    898				 * submitted sequentially.
    899				 */
    900				if (multi_lrc_submit(rq)) {
    901					submit = true;
    902					goto register_context;
    903				}
    904			} else {
    905				submit = true;
    906			}
    907		}
    908
    909		rb_erase_cached(&p->node, &sched_engine->queue);
    910		i915_priolist_free(p);
    911	}
    912
    913register_context:
    914	if (submit) {
    915		struct intel_context *ce = request_to_scheduling_context(last);
    916
    917		if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
    918			     !intel_context_is_banned(ce))) {
    919			ret = try_context_registration(ce, false);
    920			if (unlikely(ret == -EPIPE)) {
    921				goto deadlk;
    922			} else if (ret == -EBUSY) {
    923				guc->stalled_request = last;
    924				guc->submission_stall_reason =
    925					STALL_REGISTER_CONTEXT;
    926				goto schedule_tasklet;
    927			} else if (ret != 0) {
    928				GEM_WARN_ON(ret);	/* Unexpected */
    929				goto deadlk;
    930			}
    931		}
    932
    933move_lrc_tail:
    934		if (is_multi_lrc_rq(last)) {
    935			ret = guc_wq_item_append(guc, last);
    936			if (ret == -EBUSY) {
    937				goto schedule_tasklet;
    938			} else if (ret != 0) {
    939				GEM_WARN_ON(ret);	/* Unexpected */
    940				goto deadlk;
    941			}
    942		} else {
    943			guc_set_lrc_tail(last);
    944		}
    945
    946add_request:
    947		ret = guc_add_request(guc, last);
    948		if (unlikely(ret == -EPIPE)) {
    949			goto deadlk;
    950		} else if (ret == -EBUSY) {
    951			goto schedule_tasklet;
    952		} else if (ret != 0) {
    953			GEM_WARN_ON(ret);	/* Unexpected */
    954			goto deadlk;
    955		}
    956	}
    957
    958	guc->stalled_request = NULL;
    959	guc->submission_stall_reason = STALL_NONE;
    960	return submit;
    961
    962deadlk:
    963	sched_engine->tasklet.callback = NULL;
    964	tasklet_disable_nosync(&sched_engine->tasklet);
    965	return false;
    966
    967schedule_tasklet:
    968	tasklet_schedule(&sched_engine->tasklet);
    969	return false;
    970}
    971
    972static void guc_submission_tasklet(struct tasklet_struct *t)
    973{
    974	struct i915_sched_engine *sched_engine =
    975		from_tasklet(sched_engine, t, tasklet);
    976	unsigned long flags;
    977	bool loop;
    978
    979	spin_lock_irqsave(&sched_engine->lock, flags);
    980
    981	do {
    982		loop = guc_dequeue_one_context(sched_engine->private_data);
    983	} while (loop);
    984
    985	i915_sched_engine_reset_on_empty(sched_engine);
    986
    987	spin_unlock_irqrestore(&sched_engine->lock, flags);
    988}
    989
    990static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
    991{
    992	if (iir & GT_RENDER_USER_INTERRUPT)
    993		intel_engine_signal_breadcrumbs(engine);
    994}
    995
    996static void __guc_context_destroy(struct intel_context *ce);
    997static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
    998static void guc_signal_context_fence(struct intel_context *ce);
    999static void guc_cancel_context_requests(struct intel_context *ce);
   1000static void guc_blocked_fence_complete(struct intel_context *ce);
   1001
   1002static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
   1003{
   1004	struct intel_context *ce;
   1005	unsigned long index, flags;
   1006	bool pending_disable, pending_enable, deregister, destroyed, banned;
   1007
   1008	xa_lock_irqsave(&guc->context_lookup, flags);
   1009	xa_for_each(&guc->context_lookup, index, ce) {
   1010		/*
   1011		 * Corner case where the ref count on the object is zero but and
   1012		 * deregister G2H was lost. In this case we don't touch the ref
   1013		 * count and finish the destroy of the context.
   1014		 */
   1015		bool do_put = kref_get_unless_zero(&ce->ref);
   1016
   1017		xa_unlock(&guc->context_lookup);
   1018
   1019		spin_lock(&ce->guc_state.lock);
   1020
   1021		/*
   1022		 * Once we are at this point submission_disabled() is guaranteed
   1023		 * to be visible to all callers who set the below flags (see above
   1024		 * flush and flushes in reset_prepare). If submission_disabled()
   1025		 * is set, the caller shouldn't set these flags.
   1026		 */
   1027
   1028		destroyed = context_destroyed(ce);
   1029		pending_enable = context_pending_enable(ce);
   1030		pending_disable = context_pending_disable(ce);
   1031		deregister = context_wait_for_deregister_to_register(ce);
   1032		banned = context_banned(ce);
   1033		init_sched_state(ce);
   1034
   1035		spin_unlock(&ce->guc_state.lock);
   1036
   1037		if (pending_enable || destroyed || deregister) {
   1038			decr_outstanding_submission_g2h(guc);
   1039			if (deregister)
   1040				guc_signal_context_fence(ce);
   1041			if (destroyed) {
   1042				intel_gt_pm_put_async(guc_to_gt(guc));
   1043				release_guc_id(guc, ce);
   1044				__guc_context_destroy(ce);
   1045			}
   1046			if (pending_enable || deregister)
   1047				intel_context_put(ce);
   1048		}
   1049
   1050		/* Not mutualy exclusive with above if statement. */
   1051		if (pending_disable) {
   1052			guc_signal_context_fence(ce);
   1053			if (banned) {
   1054				guc_cancel_context_requests(ce);
   1055				intel_engine_signal_breadcrumbs(ce->engine);
   1056			}
   1057			intel_context_sched_disable_unpin(ce);
   1058			decr_outstanding_submission_g2h(guc);
   1059
   1060			spin_lock(&ce->guc_state.lock);
   1061			guc_blocked_fence_complete(ce);
   1062			spin_unlock(&ce->guc_state.lock);
   1063
   1064			intel_context_put(ce);
   1065		}
   1066
   1067		if (do_put)
   1068			intel_context_put(ce);
   1069		xa_lock(&guc->context_lookup);
   1070	}
   1071	xa_unlock_irqrestore(&guc->context_lookup, flags);
   1072}
   1073
   1074/*
   1075 * GuC stores busyness stats for each engine at context in/out boundaries. A
   1076 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
   1077 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
   1078 * GuC.
   1079 *
   1080 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
   1081 * is valid (!= ~0) and start is non-zero, the engine is considered to be
   1082 * active. For an active engine total busyness = total + (now - start), where
   1083 * 'now' is the time at which the busyness is sampled. For inactive engine,
   1084 * total busyness = total.
   1085 *
   1086 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
   1087 *
   1088 * The start and total values provided by GuC are 32 bits and wrap around in a
   1089 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
   1090 * increasing ns values, there is a need for this implementation to account for
   1091 * overflows and extend the GuC provided values to 64 bits before returning
   1092 * busyness to the user. In order to do that, a worker runs periodically at
   1093 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
   1094 * 27 seconds for a gt clock frequency of 19.2 MHz).
   1095 */
   1096
   1097#define WRAP_TIME_CLKS U32_MAX
   1098#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
   1099
   1100static void
   1101__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
   1102{
   1103	u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
   1104	u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
   1105
   1106	if (new_start == lower_32_bits(*prev_start))
   1107		return;
   1108
   1109	/*
   1110	 * When gt is unparked, we update the gt timestamp and start the ping
   1111	 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
   1112	 * is unparked, all switched in contexts will have a start time that is
   1113	 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
   1114	 *
   1115	 * If neither gt_stamp nor new_start has rolled over, then the
   1116	 * gt_stamp_hi does not need to be adjusted, however if one of them has
   1117	 * rolled over, we need to adjust gt_stamp_hi accordingly.
   1118	 *
   1119	 * The below conditions address the cases of new_start rollover and
   1120	 * gt_stamp_last rollover respectively.
   1121	 */
   1122	if (new_start < gt_stamp_last &&
   1123	    (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
   1124		gt_stamp_hi++;
   1125
   1126	if (new_start > gt_stamp_last &&
   1127	    (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
   1128		gt_stamp_hi--;
   1129
   1130	*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
   1131}
   1132
   1133#define record_read(map_, field_) \
   1134	iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
   1135
   1136/*
   1137 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
   1138 * we run into a race where the value read is inconsistent. Sometimes the
   1139 * inconsistency is in reading the upper MSB bytes of the last_in value when
   1140 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
   1141 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
   1142 * determine validity of these values. Instead we read the values multiple times
   1143 * until they are consistent. In test runs, 3 attempts results in consistent
   1144 * values. The upper bound is set to 6 attempts and may need to be tuned as per
   1145 * any new occurences.
   1146 */
   1147static void __get_engine_usage_record(struct intel_engine_cs *engine,
   1148				      u32 *last_in, u32 *id, u32 *total)
   1149{
   1150	struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
   1151	int i = 0;
   1152
   1153	do {
   1154		*last_in = record_read(&rec_map, last_switch_in_stamp);
   1155		*id = record_read(&rec_map, current_context_index);
   1156		*total = record_read(&rec_map, total_runtime);
   1157
   1158		if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
   1159		    record_read(&rec_map, current_context_index) == *id &&
   1160		    record_read(&rec_map, total_runtime) == *total)
   1161			break;
   1162	} while (++i < 6);
   1163}
   1164
   1165static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
   1166{
   1167	struct intel_engine_guc_stats *stats = &engine->stats.guc;
   1168	struct intel_guc *guc = &engine->gt->uc.guc;
   1169	u32 last_switch, ctx_id, total;
   1170
   1171	lockdep_assert_held(&guc->timestamp.lock);
   1172
   1173	__get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
   1174
   1175	stats->running = ctx_id != ~0U && last_switch;
   1176	if (stats->running)
   1177		__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
   1178
   1179	/*
   1180	 * Instead of adjusting the total for overflow, just add the
   1181	 * difference from previous sample stats->total_gt_clks
   1182	 */
   1183	if (total && total != ~0U) {
   1184		stats->total_gt_clks += (u32)(total - stats->prev_total);
   1185		stats->prev_total = total;
   1186	}
   1187}
   1188
   1189static u32 gpm_timestamp_shift(struct intel_gt *gt)
   1190{
   1191	intel_wakeref_t wakeref;
   1192	u32 reg, shift;
   1193
   1194	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
   1195		reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
   1196
   1197	shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
   1198		GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
   1199
   1200	return 3 - shift;
   1201}
   1202
   1203static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
   1204{
   1205	struct intel_gt *gt = guc_to_gt(guc);
   1206	u32 gt_stamp_lo, gt_stamp_hi;
   1207	u64 gpm_ts;
   1208
   1209	lockdep_assert_held(&guc->timestamp.lock);
   1210
   1211	gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
   1212	gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
   1213					  MISC_STATUS1) >> guc->timestamp.shift;
   1214	gt_stamp_lo = lower_32_bits(gpm_ts);
   1215	*now = ktime_get();
   1216
   1217	if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
   1218		gt_stamp_hi++;
   1219
   1220	guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
   1221}
   1222
   1223/*
   1224 * Unlike the execlist mode of submission total and active times are in terms of
   1225 * gt clocks. The *now parameter is retained to return the cpu time at which the
   1226 * busyness was sampled.
   1227 */
   1228static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
   1229{
   1230	struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
   1231	struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
   1232	struct intel_gt *gt = engine->gt;
   1233	struct intel_guc *guc = &gt->uc.guc;
   1234	u64 total, gt_stamp_saved;
   1235	unsigned long flags;
   1236	u32 reset_count;
   1237	bool in_reset;
   1238
   1239	spin_lock_irqsave(&guc->timestamp.lock, flags);
   1240
   1241	/*
   1242	 * If a reset happened, we risk reading partially updated engine
   1243	 * busyness from GuC, so we just use the driver stored copy of busyness.
   1244	 * Synchronize with gt reset using reset_count and the
   1245	 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
   1246	 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
   1247	 * usable by checking the flag afterwards.
   1248	 */
   1249	reset_count = i915_reset_count(gpu_error);
   1250	in_reset = test_bit(I915_RESET_BACKOFF, &gt->reset.flags);
   1251
   1252	*now = ktime_get();
   1253
   1254	/*
   1255	 * The active busyness depends on start_gt_clk and gt_stamp.
   1256	 * gt_stamp is updated by i915 only when gt is awake and the
   1257	 * start_gt_clk is derived from GuC state. To get a consistent
   1258	 * view of activity, we query the GuC state only if gt is awake.
   1259	 */
   1260	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
   1261		stats_saved = *stats;
   1262		gt_stamp_saved = guc->timestamp.gt_stamp;
   1263		/*
   1264		 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
   1265		 * start_gt_clk' calculation below for active engines.
   1266		 */
   1267		guc_update_engine_gt_clks(engine);
   1268		guc_update_pm_timestamp(guc, now);
   1269		intel_gt_pm_put_async(gt);
   1270		if (i915_reset_count(gpu_error) != reset_count) {
   1271			*stats = stats_saved;
   1272			guc->timestamp.gt_stamp = gt_stamp_saved;
   1273		}
   1274	}
   1275
   1276	total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
   1277	if (stats->running) {
   1278		u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
   1279
   1280		total += intel_gt_clock_interval_to_ns(gt, clk);
   1281	}
   1282
   1283	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
   1284
   1285	return ns_to_ktime(total);
   1286}
   1287
   1288static void __reset_guc_busyness_stats(struct intel_guc *guc)
   1289{
   1290	struct intel_gt *gt = guc_to_gt(guc);
   1291	struct intel_engine_cs *engine;
   1292	enum intel_engine_id id;
   1293	unsigned long flags;
   1294	ktime_t unused;
   1295
   1296	cancel_delayed_work_sync(&guc->timestamp.work);
   1297
   1298	spin_lock_irqsave(&guc->timestamp.lock, flags);
   1299
   1300	guc_update_pm_timestamp(guc, &unused);
   1301	for_each_engine(engine, gt, id) {
   1302		guc_update_engine_gt_clks(engine);
   1303		engine->stats.guc.prev_total = 0;
   1304	}
   1305
   1306	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
   1307}
   1308
   1309static void __update_guc_busyness_stats(struct intel_guc *guc)
   1310{
   1311	struct intel_gt *gt = guc_to_gt(guc);
   1312	struct intel_engine_cs *engine;
   1313	enum intel_engine_id id;
   1314	unsigned long flags;
   1315	ktime_t unused;
   1316
   1317	spin_lock_irqsave(&guc->timestamp.lock, flags);
   1318
   1319	guc_update_pm_timestamp(guc, &unused);
   1320	for_each_engine(engine, gt, id)
   1321		guc_update_engine_gt_clks(engine);
   1322
   1323	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
   1324}
   1325
   1326static void guc_timestamp_ping(struct work_struct *wrk)
   1327{
   1328	struct intel_guc *guc = container_of(wrk, typeof(*guc),
   1329					     timestamp.work.work);
   1330	struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
   1331	struct intel_gt *gt = guc_to_gt(guc);
   1332	intel_wakeref_t wakeref;
   1333	int srcu, ret;
   1334
   1335	/*
   1336	 * Synchronize with gt reset to make sure the worker does not
   1337	 * corrupt the engine/guc stats.
   1338	 */
   1339	ret = intel_gt_reset_trylock(gt, &srcu);
   1340	if (ret)
   1341		return;
   1342
   1343	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
   1344		__update_guc_busyness_stats(guc);
   1345
   1346	intel_gt_reset_unlock(gt, srcu);
   1347
   1348	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
   1349			 guc->timestamp.ping_delay);
   1350}
   1351
   1352static int guc_action_enable_usage_stats(struct intel_guc *guc)
   1353{
   1354	u32 offset = intel_guc_engine_usage_offset(guc);
   1355	u32 action[] = {
   1356		INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
   1357		offset,
   1358		0,
   1359	};
   1360
   1361	return intel_guc_send(guc, action, ARRAY_SIZE(action));
   1362}
   1363
   1364static void guc_init_engine_stats(struct intel_guc *guc)
   1365{
   1366	struct intel_gt *gt = guc_to_gt(guc);
   1367	intel_wakeref_t wakeref;
   1368
   1369	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
   1370			 guc->timestamp.ping_delay);
   1371
   1372	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
   1373		int ret = guc_action_enable_usage_stats(guc);
   1374
   1375		if (ret)
   1376			drm_err(&gt->i915->drm,
   1377				"Failed to enable usage stats: %d!\n", ret);
   1378	}
   1379}
   1380
   1381void intel_guc_busyness_park(struct intel_gt *gt)
   1382{
   1383	struct intel_guc *guc = &gt->uc.guc;
   1384
   1385	if (!guc_submission_initialized(guc))
   1386		return;
   1387
   1388	cancel_delayed_work(&guc->timestamp.work);
   1389	__update_guc_busyness_stats(guc);
   1390}
   1391
   1392void intel_guc_busyness_unpark(struct intel_gt *gt)
   1393{
   1394	struct intel_guc *guc = &gt->uc.guc;
   1395	unsigned long flags;
   1396	ktime_t unused;
   1397
   1398	if (!guc_submission_initialized(guc))
   1399		return;
   1400
   1401	spin_lock_irqsave(&guc->timestamp.lock, flags);
   1402	guc_update_pm_timestamp(guc, &unused);
   1403	spin_unlock_irqrestore(&guc->timestamp.lock, flags);
   1404	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
   1405			 guc->timestamp.ping_delay);
   1406}
   1407
   1408static inline bool
   1409submission_disabled(struct intel_guc *guc)
   1410{
   1411	struct i915_sched_engine * const sched_engine = guc->sched_engine;
   1412
   1413	return unlikely(!sched_engine ||
   1414			!__tasklet_is_enabled(&sched_engine->tasklet) ||
   1415			intel_gt_is_wedged(guc_to_gt(guc)));
   1416}
   1417
   1418static void disable_submission(struct intel_guc *guc)
   1419{
   1420	struct i915_sched_engine * const sched_engine = guc->sched_engine;
   1421
   1422	if (__tasklet_is_enabled(&sched_engine->tasklet)) {
   1423		GEM_BUG_ON(!guc->ct.enabled);
   1424		__tasklet_disable_sync_once(&sched_engine->tasklet);
   1425		sched_engine->tasklet.callback = NULL;
   1426	}
   1427}
   1428
   1429static void enable_submission(struct intel_guc *guc)
   1430{
   1431	struct i915_sched_engine * const sched_engine = guc->sched_engine;
   1432	unsigned long flags;
   1433
   1434	spin_lock_irqsave(&guc->sched_engine->lock, flags);
   1435	sched_engine->tasklet.callback = guc_submission_tasklet;
   1436	wmb();	/* Make sure callback visible */
   1437	if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
   1438	    __tasklet_enable(&sched_engine->tasklet)) {
   1439		GEM_BUG_ON(!guc->ct.enabled);
   1440
   1441		/* And kick in case we missed a new request submission. */
   1442		tasklet_hi_schedule(&sched_engine->tasklet);
   1443	}
   1444	spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
   1445}
   1446
   1447static void guc_flush_submissions(struct intel_guc *guc)
   1448{
   1449	struct i915_sched_engine * const sched_engine = guc->sched_engine;
   1450	unsigned long flags;
   1451
   1452	spin_lock_irqsave(&sched_engine->lock, flags);
   1453	spin_unlock_irqrestore(&sched_engine->lock, flags);
   1454}
   1455
   1456static void guc_flush_destroyed_contexts(struct intel_guc *guc);
   1457
   1458void intel_guc_submission_reset_prepare(struct intel_guc *guc)
   1459{
   1460	if (unlikely(!guc_submission_initialized(guc))) {
   1461		/* Reset called during driver load? GuC not yet initialised! */
   1462		return;
   1463	}
   1464
   1465	intel_gt_park_heartbeats(guc_to_gt(guc));
   1466	disable_submission(guc);
   1467	guc->interrupts.disable(guc);
   1468	__reset_guc_busyness_stats(guc);
   1469
   1470	/* Flush IRQ handler */
   1471	spin_lock_irq(&guc_to_gt(guc)->irq_lock);
   1472	spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
   1473
   1474	guc_flush_submissions(guc);
   1475	guc_flush_destroyed_contexts(guc);
   1476	flush_work(&guc->ct.requests.worker);
   1477
   1478	scrub_guc_desc_for_outstanding_g2h(guc);
   1479}
   1480
   1481static struct intel_engine_cs *
   1482guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
   1483{
   1484	struct intel_engine_cs *engine;
   1485	intel_engine_mask_t tmp, mask = ve->mask;
   1486	unsigned int num_siblings = 0;
   1487
   1488	for_each_engine_masked(engine, ve->gt, mask, tmp)
   1489		if (num_siblings++ == sibling)
   1490			return engine;
   1491
   1492	return NULL;
   1493}
   1494
   1495static inline struct intel_engine_cs *
   1496__context_to_physical_engine(struct intel_context *ce)
   1497{
   1498	struct intel_engine_cs *engine = ce->engine;
   1499
   1500	if (intel_engine_is_virtual(engine))
   1501		engine = guc_virtual_get_sibling(engine, 0);
   1502
   1503	return engine;
   1504}
   1505
   1506static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
   1507{
   1508	struct intel_engine_cs *engine = __context_to_physical_engine(ce);
   1509
   1510	if (intel_context_is_banned(ce))
   1511		return;
   1512
   1513	GEM_BUG_ON(!intel_context_is_pinned(ce));
   1514
   1515	/*
   1516	 * We want a simple context + ring to execute the breadcrumb update.
   1517	 * We cannot rely on the context being intact across the GPU hang,
   1518	 * so clear it and rebuild just what we need for the breadcrumb.
   1519	 * All pending requests for this context will be zapped, and any
   1520	 * future request will be after userspace has had the opportunity
   1521	 * to recreate its own state.
   1522	 */
   1523	if (scrub)
   1524		lrc_init_regs(ce, engine, true);
   1525
   1526	/* Rerun the request; its payload has been neutered (if guilty). */
   1527	lrc_update_regs(ce, engine, head);
   1528}
   1529
   1530static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
   1531{
   1532	static const i915_reg_t _reg[I915_NUM_ENGINES] = {
   1533		[RCS0] = MSG_IDLE_CS,
   1534		[BCS0] = MSG_IDLE_BCS,
   1535		[VCS0] = MSG_IDLE_VCS0,
   1536		[VCS1] = MSG_IDLE_VCS1,
   1537		[VCS2] = MSG_IDLE_VCS2,
   1538		[VCS3] = MSG_IDLE_VCS3,
   1539		[VCS4] = MSG_IDLE_VCS4,
   1540		[VCS5] = MSG_IDLE_VCS5,
   1541		[VCS6] = MSG_IDLE_VCS6,
   1542		[VCS7] = MSG_IDLE_VCS7,
   1543		[VECS0] = MSG_IDLE_VECS0,
   1544		[VECS1] = MSG_IDLE_VECS1,
   1545		[VECS2] = MSG_IDLE_VECS2,
   1546		[VECS3] = MSG_IDLE_VECS3,
   1547		[CCS0] = MSG_IDLE_CS,
   1548		[CCS1] = MSG_IDLE_CS,
   1549		[CCS2] = MSG_IDLE_CS,
   1550		[CCS3] = MSG_IDLE_CS,
   1551	};
   1552	u32 val;
   1553
   1554	if (!_reg[engine->id].reg)
   1555		return 0;
   1556
   1557	val = intel_uncore_read(engine->uncore, _reg[engine->id]);
   1558
   1559	/* bits[29:25] & bits[13:9] >> shift */
   1560	return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
   1561}
   1562
   1563static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
   1564{
   1565	int ret;
   1566
   1567	/* Ensure GPM receives fw up/down after CS is stopped */
   1568	udelay(1);
   1569
   1570	/* Wait for forcewake request to complete in GPM */
   1571	ret =  __intel_wait_for_register_fw(gt->uncore,
   1572					    GEN9_PWRGT_DOMAIN_STATUS,
   1573					    fw_mask, fw_mask, 5000, 0, NULL);
   1574
   1575	/* Ensure CS receives fw ack from GPM */
   1576	udelay(1);
   1577
   1578	if (ret)
   1579		GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
   1580}
   1581
   1582/*
   1583 * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
   1584 * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
   1585 * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the
   1586 * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
   1587 * are concerned only with the gt reset here, we use a logical OR of pending
   1588 * forcewakeups from all reset domains and then wait for them to complete by
   1589 * querying PWRGT_DOMAIN_STATUS.
   1590 */
   1591static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
   1592{
   1593	u32 fw_pending;
   1594
   1595	if (GRAPHICS_VER(engine->i915) != 12)
   1596		return;
   1597
   1598	/*
   1599	 * Wa_22011802037
   1600	 * TODO: Occasionally trying to stop the cs times out, but does not
   1601	 * adversely affect functionality. The timeout is set as a config
   1602	 * parameter that defaults to 100ms. Assuming that this timeout is
   1603	 * sufficient for any pending MI_FORCEWAKEs to complete, ignore the
   1604	 * timeout returned here until it is root caused.
   1605	 */
   1606	intel_engine_stop_cs(engine);
   1607
   1608	fw_pending = __cs_pending_mi_force_wakes(engine);
   1609	if (fw_pending)
   1610		__gpm_wait_for_fw_complete(engine->gt, fw_pending);
   1611}
   1612
   1613static void guc_reset_nop(struct intel_engine_cs *engine)
   1614{
   1615}
   1616
   1617static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
   1618{
   1619}
   1620
   1621static void
   1622__unwind_incomplete_requests(struct intel_context *ce)
   1623{
   1624	struct i915_request *rq, *rn;
   1625	struct list_head *pl;
   1626	int prio = I915_PRIORITY_INVALID;
   1627	struct i915_sched_engine * const sched_engine =
   1628		ce->engine->sched_engine;
   1629	unsigned long flags;
   1630
   1631	spin_lock_irqsave(&sched_engine->lock, flags);
   1632	spin_lock(&ce->guc_state.lock);
   1633	list_for_each_entry_safe_reverse(rq, rn,
   1634					 &ce->guc_state.requests,
   1635					 sched.link) {
   1636		if (i915_request_completed(rq))
   1637			continue;
   1638
   1639		list_del_init(&rq->sched.link);
   1640		__i915_request_unsubmit(rq);
   1641
   1642		/* Push the request back into the queue for later resubmission. */
   1643		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
   1644		if (rq_prio(rq) != prio) {
   1645			prio = rq_prio(rq);
   1646			pl = i915_sched_lookup_priolist(sched_engine, prio);
   1647		}
   1648		GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
   1649
   1650		list_add(&rq->sched.link, pl);
   1651		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   1652	}
   1653	spin_unlock(&ce->guc_state.lock);
   1654	spin_unlock_irqrestore(&sched_engine->lock, flags);
   1655}
   1656
   1657static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
   1658{
   1659	bool guilty;
   1660	struct i915_request *rq;
   1661	unsigned long flags;
   1662	u32 head;
   1663	int i, number_children = ce->parallel.number_children;
   1664	struct intel_context *parent = ce;
   1665
   1666	GEM_BUG_ON(intel_context_is_child(ce));
   1667
   1668	intel_context_get(ce);
   1669
   1670	/*
   1671	 * GuC will implicitly mark the context as non-schedulable when it sends
   1672	 * the reset notification. Make sure our state reflects this change. The
   1673	 * context will be marked enabled on resubmission.
   1674	 */
   1675	spin_lock_irqsave(&ce->guc_state.lock, flags);
   1676	clr_context_enabled(ce);
   1677	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   1678
   1679	/*
   1680	 * For each context in the relationship find the hanging request
   1681	 * resetting each context / request as needed
   1682	 */
   1683	for (i = 0; i < number_children + 1; ++i) {
   1684		if (!intel_context_is_pinned(ce))
   1685			goto next_context;
   1686
   1687		guilty = false;
   1688		rq = intel_context_find_active_request(ce);
   1689		if (!rq) {
   1690			head = ce->ring->tail;
   1691			goto out_replay;
   1692		}
   1693
   1694		if (i915_request_started(rq))
   1695			guilty = stalled & ce->engine->mask;
   1696
   1697		GEM_BUG_ON(i915_active_is_idle(&ce->active));
   1698		head = intel_ring_wrap(ce->ring, rq->head);
   1699
   1700		__i915_request_reset(rq, guilty);
   1701out_replay:
   1702		guc_reset_state(ce, head, guilty);
   1703next_context:
   1704		if (i != number_children)
   1705			ce = list_next_entry(ce, parallel.child_link);
   1706	}
   1707
   1708	__unwind_incomplete_requests(parent);
   1709	intel_context_put(parent);
   1710}
   1711
   1712void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
   1713{
   1714	struct intel_context *ce;
   1715	unsigned long index;
   1716	unsigned long flags;
   1717
   1718	if (unlikely(!guc_submission_initialized(guc))) {
   1719		/* Reset called during driver load? GuC not yet initialised! */
   1720		return;
   1721	}
   1722
   1723	xa_lock_irqsave(&guc->context_lookup, flags);
   1724	xa_for_each(&guc->context_lookup, index, ce) {
   1725		if (!kref_get_unless_zero(&ce->ref))
   1726			continue;
   1727
   1728		xa_unlock(&guc->context_lookup);
   1729
   1730		if (intel_context_is_pinned(ce) &&
   1731		    !intel_context_is_child(ce))
   1732			__guc_reset_context(ce, stalled);
   1733
   1734		intel_context_put(ce);
   1735
   1736		xa_lock(&guc->context_lookup);
   1737	}
   1738	xa_unlock_irqrestore(&guc->context_lookup, flags);
   1739
   1740	/* GuC is blown away, drop all references to contexts */
   1741	xa_destroy(&guc->context_lookup);
   1742}
   1743
   1744static void guc_cancel_context_requests(struct intel_context *ce)
   1745{
   1746	struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
   1747	struct i915_request *rq;
   1748	unsigned long flags;
   1749
   1750	/* Mark all executing requests as skipped. */
   1751	spin_lock_irqsave(&sched_engine->lock, flags);
   1752	spin_lock(&ce->guc_state.lock);
   1753	list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
   1754		i915_request_put(i915_request_mark_eio(rq));
   1755	spin_unlock(&ce->guc_state.lock);
   1756	spin_unlock_irqrestore(&sched_engine->lock, flags);
   1757}
   1758
   1759static void
   1760guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
   1761{
   1762	struct i915_request *rq, *rn;
   1763	struct rb_node *rb;
   1764	unsigned long flags;
   1765
   1766	/* Can be called during boot if GuC fails to load */
   1767	if (!sched_engine)
   1768		return;
   1769
   1770	/*
   1771	 * Before we call engine->cancel_requests(), we should have exclusive
   1772	 * access to the submission state. This is arranged for us by the
   1773	 * caller disabling the interrupt generation, the tasklet and other
   1774	 * threads that may then access the same state, giving us a free hand
   1775	 * to reset state. However, we still need to let lockdep be aware that
   1776	 * we know this state may be accessed in hardirq context, so we
   1777	 * disable the irq around this manipulation and we want to keep
   1778	 * the spinlock focused on its duties and not accidentally conflate
   1779	 * coverage to the submission's irq state. (Similarly, although we
   1780	 * shouldn't need to disable irq around the manipulation of the
   1781	 * submission's irq state, we also wish to remind ourselves that
   1782	 * it is irq state.)
   1783	 */
   1784	spin_lock_irqsave(&sched_engine->lock, flags);
   1785
   1786	/* Flush the queued requests to the timeline list (for retiring). */
   1787	while ((rb = rb_first_cached(&sched_engine->queue))) {
   1788		struct i915_priolist *p = to_priolist(rb);
   1789
   1790		priolist_for_each_request_consume(rq, rn, p) {
   1791			list_del_init(&rq->sched.link);
   1792
   1793			__i915_request_submit(rq);
   1794
   1795			i915_request_put(i915_request_mark_eio(rq));
   1796		}
   1797
   1798		rb_erase_cached(&p->node, &sched_engine->queue);
   1799		i915_priolist_free(p);
   1800	}
   1801
   1802	/* Remaining _unready_ requests will be nop'ed when submitted */
   1803
   1804	sched_engine->queue_priority_hint = INT_MIN;
   1805	sched_engine->queue = RB_ROOT_CACHED;
   1806
   1807	spin_unlock_irqrestore(&sched_engine->lock, flags);
   1808}
   1809
   1810void intel_guc_submission_cancel_requests(struct intel_guc *guc)
   1811{
   1812	struct intel_context *ce;
   1813	unsigned long index;
   1814	unsigned long flags;
   1815
   1816	xa_lock_irqsave(&guc->context_lookup, flags);
   1817	xa_for_each(&guc->context_lookup, index, ce) {
   1818		if (!kref_get_unless_zero(&ce->ref))
   1819			continue;
   1820
   1821		xa_unlock(&guc->context_lookup);
   1822
   1823		if (intel_context_is_pinned(ce) &&
   1824		    !intel_context_is_child(ce))
   1825			guc_cancel_context_requests(ce);
   1826
   1827		intel_context_put(ce);
   1828
   1829		xa_lock(&guc->context_lookup);
   1830	}
   1831	xa_unlock_irqrestore(&guc->context_lookup, flags);
   1832
   1833	guc_cancel_sched_engine_requests(guc->sched_engine);
   1834
   1835	/* GuC is blown away, drop all references to contexts */
   1836	xa_destroy(&guc->context_lookup);
   1837}
   1838
   1839void intel_guc_submission_reset_finish(struct intel_guc *guc)
   1840{
   1841	/* Reset called during driver load or during wedge? */
   1842	if (unlikely(!guc_submission_initialized(guc) ||
   1843		     intel_gt_is_wedged(guc_to_gt(guc)))) {
   1844		return;
   1845	}
   1846
   1847	/*
   1848	 * Technically possible for either of these values to be non-zero here,
   1849	 * but very unlikely + harmless. Regardless let's add a warn so we can
   1850	 * see in CI if this happens frequently / a precursor to taking down the
   1851	 * machine.
   1852	 */
   1853	GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
   1854	atomic_set(&guc->outstanding_submission_g2h, 0);
   1855
   1856	intel_guc_global_policies_update(guc);
   1857	enable_submission(guc);
   1858	intel_gt_unpark_heartbeats(guc_to_gt(guc));
   1859}
   1860
   1861static void destroyed_worker_func(struct work_struct *w);
   1862static void reset_fail_worker_func(struct work_struct *w);
   1863
   1864/*
   1865 * Set up the memory resources to be shared with the GuC (via the GGTT)
   1866 * at firmware loading time.
   1867 */
   1868int intel_guc_submission_init(struct intel_guc *guc)
   1869{
   1870	struct intel_gt *gt = guc_to_gt(guc);
   1871
   1872	if (guc->submission_initialized)
   1873		return 0;
   1874
   1875	guc->submission_state.guc_ids_bitmap =
   1876		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
   1877	if (!guc->submission_state.guc_ids_bitmap)
   1878		return -ENOMEM;
   1879
   1880	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
   1881	guc->timestamp.shift = gpm_timestamp_shift(gt);
   1882	guc->submission_initialized = true;
   1883
   1884	return 0;
   1885}
   1886
   1887void intel_guc_submission_fini(struct intel_guc *guc)
   1888{
   1889	if (!guc->submission_initialized)
   1890		return;
   1891
   1892	guc_flush_destroyed_contexts(guc);
   1893	i915_sched_engine_put(guc->sched_engine);
   1894	bitmap_free(guc->submission_state.guc_ids_bitmap);
   1895	guc->submission_initialized = false;
   1896}
   1897
   1898static inline void queue_request(struct i915_sched_engine *sched_engine,
   1899				 struct i915_request *rq,
   1900				 int prio)
   1901{
   1902	GEM_BUG_ON(!list_empty(&rq->sched.link));
   1903	list_add_tail(&rq->sched.link,
   1904		      i915_sched_lookup_priolist(sched_engine, prio));
   1905	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   1906	tasklet_hi_schedule(&sched_engine->tasklet);
   1907}
   1908
   1909static int guc_bypass_tasklet_submit(struct intel_guc *guc,
   1910				     struct i915_request *rq)
   1911{
   1912	int ret = 0;
   1913
   1914	__i915_request_submit(rq);
   1915
   1916	trace_i915_request_in(rq, 0);
   1917
   1918	if (is_multi_lrc_rq(rq)) {
   1919		if (multi_lrc_submit(rq)) {
   1920			ret = guc_wq_item_append(guc, rq);
   1921			if (!ret)
   1922				ret = guc_add_request(guc, rq);
   1923		}
   1924	} else {
   1925		guc_set_lrc_tail(rq);
   1926		ret = guc_add_request(guc, rq);
   1927	}
   1928
   1929	if (unlikely(ret == -EPIPE))
   1930		disable_submission(guc);
   1931
   1932	return ret;
   1933}
   1934
   1935static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
   1936{
   1937	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
   1938	struct intel_context *ce = request_to_scheduling_context(rq);
   1939
   1940	return submission_disabled(guc) || guc->stalled_request ||
   1941		!i915_sched_engine_is_empty(sched_engine) ||
   1942		!ctx_id_mapped(guc, ce->guc_id.id);
   1943}
   1944
   1945static void guc_submit_request(struct i915_request *rq)
   1946{
   1947	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
   1948	struct intel_guc *guc = &rq->engine->gt->uc.guc;
   1949	unsigned long flags;
   1950
   1951	/* Will be called from irq-context when using foreign fences. */
   1952	spin_lock_irqsave(&sched_engine->lock, flags);
   1953
   1954	if (need_tasklet(guc, rq))
   1955		queue_request(sched_engine, rq, rq_prio(rq));
   1956	else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
   1957		tasklet_hi_schedule(&sched_engine->tasklet);
   1958
   1959	spin_unlock_irqrestore(&sched_engine->lock, flags);
   1960}
   1961
   1962static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
   1963{
   1964	int ret;
   1965
   1966	GEM_BUG_ON(intel_context_is_child(ce));
   1967
   1968	if (intel_context_is_parent(ce))
   1969		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
   1970					      NUMBER_MULTI_LRC_GUC_ID(guc),
   1971					      order_base_2(ce->parallel.number_children
   1972							   + 1));
   1973	else
   1974		ret = ida_simple_get(&guc->submission_state.guc_ids,
   1975				     NUMBER_MULTI_LRC_GUC_ID(guc),
   1976				     guc->submission_state.num_guc_ids,
   1977				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
   1978				     __GFP_NOWARN);
   1979	if (unlikely(ret < 0))
   1980		return ret;
   1981
   1982	ce->guc_id.id = ret;
   1983	return 0;
   1984}
   1985
   1986static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
   1987{
   1988	GEM_BUG_ON(intel_context_is_child(ce));
   1989
   1990	if (!context_guc_id_invalid(ce)) {
   1991		if (intel_context_is_parent(ce))
   1992			bitmap_release_region(guc->submission_state.guc_ids_bitmap,
   1993					      ce->guc_id.id,
   1994					      order_base_2(ce->parallel.number_children
   1995							   + 1));
   1996		else
   1997			ida_simple_remove(&guc->submission_state.guc_ids,
   1998					  ce->guc_id.id);
   1999		clr_ctx_id_mapping(guc, ce->guc_id.id);
   2000		set_context_guc_id_invalid(ce);
   2001	}
   2002	if (!list_empty(&ce->guc_id.link))
   2003		list_del_init(&ce->guc_id.link);
   2004}
   2005
   2006static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
   2007{
   2008	unsigned long flags;
   2009
   2010	spin_lock_irqsave(&guc->submission_state.lock, flags);
   2011	__release_guc_id(guc, ce);
   2012	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   2013}
   2014
   2015static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
   2016{
   2017	struct intel_context *cn;
   2018
   2019	lockdep_assert_held(&guc->submission_state.lock);
   2020	GEM_BUG_ON(intel_context_is_child(ce));
   2021	GEM_BUG_ON(intel_context_is_parent(ce));
   2022
   2023	if (!list_empty(&guc->submission_state.guc_id_list)) {
   2024		cn = list_first_entry(&guc->submission_state.guc_id_list,
   2025				      struct intel_context,
   2026				      guc_id.link);
   2027
   2028		GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
   2029		GEM_BUG_ON(context_guc_id_invalid(cn));
   2030		GEM_BUG_ON(intel_context_is_child(cn));
   2031		GEM_BUG_ON(intel_context_is_parent(cn));
   2032
   2033		list_del_init(&cn->guc_id.link);
   2034		ce->guc_id.id = cn->guc_id.id;
   2035
   2036		spin_lock(&cn->guc_state.lock);
   2037		clr_context_registered(cn);
   2038		spin_unlock(&cn->guc_state.lock);
   2039
   2040		set_context_guc_id_invalid(cn);
   2041
   2042#ifdef CONFIG_DRM_I915_SELFTEST
   2043		guc->number_guc_id_stolen++;
   2044#endif
   2045
   2046		return 0;
   2047	} else {
   2048		return -EAGAIN;
   2049	}
   2050}
   2051
   2052static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
   2053{
   2054	int ret;
   2055
   2056	lockdep_assert_held(&guc->submission_state.lock);
   2057	GEM_BUG_ON(intel_context_is_child(ce));
   2058
   2059	ret = new_guc_id(guc, ce);
   2060	if (unlikely(ret < 0)) {
   2061		if (intel_context_is_parent(ce))
   2062			return -ENOSPC;
   2063
   2064		ret = steal_guc_id(guc, ce);
   2065		if (ret < 0)
   2066			return ret;
   2067	}
   2068
   2069	if (intel_context_is_parent(ce)) {
   2070		struct intel_context *child;
   2071		int i = 1;
   2072
   2073		for_each_child(ce, child)
   2074			child->guc_id.id = ce->guc_id.id + i++;
   2075	}
   2076
   2077	return 0;
   2078}
   2079
   2080#define PIN_GUC_ID_TRIES	4
   2081static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
   2082{
   2083	int ret = 0;
   2084	unsigned long flags, tries = PIN_GUC_ID_TRIES;
   2085
   2086	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
   2087
   2088try_again:
   2089	spin_lock_irqsave(&guc->submission_state.lock, flags);
   2090
   2091	might_lock(&ce->guc_state.lock);
   2092
   2093	if (context_guc_id_invalid(ce)) {
   2094		ret = assign_guc_id(guc, ce);
   2095		if (ret)
   2096			goto out_unlock;
   2097		ret = 1;	/* Indidcates newly assigned guc_id */
   2098	}
   2099	if (!list_empty(&ce->guc_id.link))
   2100		list_del_init(&ce->guc_id.link);
   2101	atomic_inc(&ce->guc_id.ref);
   2102
   2103out_unlock:
   2104	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   2105
   2106	/*
   2107	 * -EAGAIN indicates no guc_id are available, let's retire any
   2108	 * outstanding requests to see if that frees up a guc_id. If the first
   2109	 * retire didn't help, insert a sleep with the timeslice duration before
   2110	 * attempting to retire more requests. Double the sleep period each
   2111	 * subsequent pass before finally giving up. The sleep period has max of
   2112	 * 100ms and minimum of 1ms.
   2113	 */
   2114	if (ret == -EAGAIN && --tries) {
   2115		if (PIN_GUC_ID_TRIES - tries > 1) {
   2116			unsigned int timeslice_shifted =
   2117				ce->engine->props.timeslice_duration_ms <<
   2118				(PIN_GUC_ID_TRIES - tries - 2);
   2119			unsigned int max = min_t(unsigned int, 100,
   2120						 timeslice_shifted);
   2121
   2122			msleep(max_t(unsigned int, max, 1));
   2123		}
   2124		intel_gt_retire_requests(guc_to_gt(guc));
   2125		goto try_again;
   2126	}
   2127
   2128	return ret;
   2129}
   2130
   2131static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
   2132{
   2133	unsigned long flags;
   2134
   2135	GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
   2136	GEM_BUG_ON(intel_context_is_child(ce));
   2137
   2138	if (unlikely(context_guc_id_invalid(ce) ||
   2139		     intel_context_is_parent(ce)))
   2140		return;
   2141
   2142	spin_lock_irqsave(&guc->submission_state.lock, flags);
   2143	if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
   2144	    !atomic_read(&ce->guc_id.ref))
   2145		list_add_tail(&ce->guc_id.link,
   2146			      &guc->submission_state.guc_id_list);
   2147	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   2148}
   2149
   2150static int __guc_action_register_multi_lrc(struct intel_guc *guc,
   2151					   struct intel_context *ce,
   2152					   struct guc_ctxt_registration_info *info,
   2153					   bool loop)
   2154{
   2155	struct intel_context *child;
   2156	u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
   2157	int len = 0;
   2158	u32 next_id;
   2159
   2160	GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
   2161
   2162	action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
   2163	action[len++] = info->flags;
   2164	action[len++] = info->context_idx;
   2165	action[len++] = info->engine_class;
   2166	action[len++] = info->engine_submit_mask;
   2167	action[len++] = info->wq_desc_lo;
   2168	action[len++] = info->wq_desc_hi;
   2169	action[len++] = info->wq_base_lo;
   2170	action[len++] = info->wq_base_hi;
   2171	action[len++] = info->wq_size;
   2172	action[len++] = ce->parallel.number_children + 1;
   2173	action[len++] = info->hwlrca_lo;
   2174	action[len++] = info->hwlrca_hi;
   2175
   2176	next_id = info->context_idx + 1;
   2177	for_each_child(ce, child) {
   2178		GEM_BUG_ON(next_id++ != child->guc_id.id);
   2179
   2180		/*
   2181		 * NB: GuC interface supports 64 bit LRCA even though i915/HW
   2182		 * only supports 32 bit currently.
   2183		 */
   2184		action[len++] = lower_32_bits(child->lrc.lrca);
   2185		action[len++] = upper_32_bits(child->lrc.lrca);
   2186	}
   2187
   2188	GEM_BUG_ON(len > ARRAY_SIZE(action));
   2189
   2190	return guc_submission_send_busy_loop(guc, action, len, 0, loop);
   2191}
   2192
   2193static int __guc_action_register_context(struct intel_guc *guc,
   2194					 struct guc_ctxt_registration_info *info,
   2195					 bool loop)
   2196{
   2197	u32 action[] = {
   2198		INTEL_GUC_ACTION_REGISTER_CONTEXT,
   2199		info->flags,
   2200		info->context_idx,
   2201		info->engine_class,
   2202		info->engine_submit_mask,
   2203		info->wq_desc_lo,
   2204		info->wq_desc_hi,
   2205		info->wq_base_lo,
   2206		info->wq_base_hi,
   2207		info->wq_size,
   2208		info->hwlrca_lo,
   2209		info->hwlrca_hi,
   2210	};
   2211
   2212	return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
   2213					     0, loop);
   2214}
   2215
   2216static void prepare_context_registration_info(struct intel_context *ce,
   2217					      struct guc_ctxt_registration_info *info);
   2218
   2219static int register_context(struct intel_context *ce, bool loop)
   2220{
   2221	struct guc_ctxt_registration_info info;
   2222	struct intel_guc *guc = ce_to_guc(ce);
   2223	int ret;
   2224
   2225	GEM_BUG_ON(intel_context_is_child(ce));
   2226	trace_intel_context_register(ce);
   2227
   2228	prepare_context_registration_info(ce, &info);
   2229
   2230	if (intel_context_is_parent(ce))
   2231		ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
   2232	else
   2233		ret = __guc_action_register_context(guc, &info, loop);
   2234	if (likely(!ret)) {
   2235		unsigned long flags;
   2236
   2237		spin_lock_irqsave(&ce->guc_state.lock, flags);
   2238		set_context_registered(ce);
   2239		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2240
   2241		guc_context_policy_init(ce, loop);
   2242	}
   2243
   2244	return ret;
   2245}
   2246
   2247static int __guc_action_deregister_context(struct intel_guc *guc,
   2248					   u32 guc_id)
   2249{
   2250	u32 action[] = {
   2251		INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
   2252		guc_id,
   2253	};
   2254
   2255	return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
   2256					     G2H_LEN_DW_DEREGISTER_CONTEXT,
   2257					     true);
   2258}
   2259
   2260static int deregister_context(struct intel_context *ce, u32 guc_id)
   2261{
   2262	struct intel_guc *guc = ce_to_guc(ce);
   2263
   2264	GEM_BUG_ON(intel_context_is_child(ce));
   2265	trace_intel_context_deregister(ce);
   2266
   2267	return __guc_action_deregister_context(guc, guc_id);
   2268}
   2269
   2270static inline void clear_children_join_go_memory(struct intel_context *ce)
   2271{
   2272	struct parent_scratch *ps = __get_parent_scratch(ce);
   2273	int i;
   2274
   2275	ps->go.semaphore = 0;
   2276	for (i = 0; i < ce->parallel.number_children + 1; ++i)
   2277		ps->join[i].semaphore = 0;
   2278}
   2279
   2280static inline u32 get_children_go_value(struct intel_context *ce)
   2281{
   2282	return __get_parent_scratch(ce)->go.semaphore;
   2283}
   2284
   2285static inline u32 get_children_join_value(struct intel_context *ce,
   2286					  u8 child_index)
   2287{
   2288	return __get_parent_scratch(ce)->join[child_index].semaphore;
   2289}
   2290
   2291struct context_policy {
   2292	u32 count;
   2293	struct guc_update_context_policy h2g;
   2294};
   2295
   2296static u32 __guc_context_policy_action_size(struct context_policy *policy)
   2297{
   2298	size_t bytes = sizeof(policy->h2g.header) +
   2299		       (sizeof(policy->h2g.klv[0]) * policy->count);
   2300
   2301	return bytes / sizeof(u32);
   2302}
   2303
   2304static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
   2305{
   2306	policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
   2307	policy->h2g.header.ctx_id = guc_id;
   2308	policy->count = 0;
   2309}
   2310
   2311#define MAKE_CONTEXT_POLICY_ADD(func, id) \
   2312static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
   2313{ \
   2314	GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
   2315	policy->h2g.klv[policy->count].kl = \
   2316		FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
   2317		FIELD_PREP(GUC_KLV_0_LEN, 1); \
   2318	policy->h2g.klv[policy->count].value = data; \
   2319	policy->count++; \
   2320}
   2321
   2322MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
   2323MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
   2324MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
   2325MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
   2326
   2327#undef MAKE_CONTEXT_POLICY_ADD
   2328
   2329static int __guc_context_set_context_policies(struct intel_guc *guc,
   2330					      struct context_policy *policy,
   2331					      bool loop)
   2332{
   2333	return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
   2334					__guc_context_policy_action_size(policy),
   2335					0, loop);
   2336}
   2337
   2338static int guc_context_policy_init(struct intel_context *ce, bool loop)
   2339{
   2340	struct intel_engine_cs *engine = ce->engine;
   2341	struct intel_guc *guc = &engine->gt->uc.guc;
   2342	struct context_policy policy;
   2343	u32 execution_quantum;
   2344	u32 preemption_timeout;
   2345	bool missing = false;
   2346	unsigned long flags;
   2347	int ret;
   2348
   2349	/* NB: For both of these, zero means disabled. */
   2350	execution_quantum = engine->props.timeslice_duration_ms * 1000;
   2351	preemption_timeout = engine->props.preempt_timeout_ms * 1000;
   2352
   2353	__guc_context_policy_start_klv(&policy, ce->guc_id.id);
   2354
   2355	__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
   2356	__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
   2357	__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
   2358
   2359	if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
   2360		__guc_context_policy_add_preempt_to_idle(&policy, 1);
   2361
   2362	ret = __guc_context_set_context_policies(guc, &policy, loop);
   2363	missing = ret != 0;
   2364
   2365	if (!missing && intel_context_is_parent(ce)) {
   2366		struct intel_context *child;
   2367
   2368		for_each_child(ce, child) {
   2369			__guc_context_policy_start_klv(&policy, child->guc_id.id);
   2370
   2371			if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
   2372				__guc_context_policy_add_preempt_to_idle(&policy, 1);
   2373
   2374			child->guc_state.prio = ce->guc_state.prio;
   2375			__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
   2376			__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
   2377			__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
   2378
   2379			ret = __guc_context_set_context_policies(guc, &policy, loop);
   2380			if (ret) {
   2381				missing = true;
   2382				break;
   2383			}
   2384		}
   2385	}
   2386
   2387	spin_lock_irqsave(&ce->guc_state.lock, flags);
   2388	if (missing)
   2389		set_context_policy_required(ce);
   2390	else
   2391		clr_context_policy_required(ce);
   2392	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2393
   2394	return ret;
   2395}
   2396
   2397static void prepare_context_registration_info(struct intel_context *ce,
   2398					      struct guc_ctxt_registration_info *info)
   2399{
   2400	struct intel_engine_cs *engine = ce->engine;
   2401	struct intel_guc *guc = &engine->gt->uc.guc;
   2402	u32 ctx_id = ce->guc_id.id;
   2403
   2404	GEM_BUG_ON(!engine->mask);
   2405
   2406	/*
   2407	 * Ensure LRC + CT vmas are is same region as write barrier is done
   2408	 * based on CT vma region.
   2409	 */
   2410	GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
   2411		   i915_gem_object_is_lmem(ce->ring->vma->obj));
   2412
   2413	memset(info, 0, sizeof(*info));
   2414	info->context_idx = ctx_id;
   2415	info->engine_class = engine_class_to_guc_class(engine->class);
   2416	info->engine_submit_mask = engine->logical_mask;
   2417	/*
   2418	 * NB: GuC interface supports 64 bit LRCA even though i915/HW
   2419	 * only supports 32 bit currently.
   2420	 */
   2421	info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
   2422	info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
   2423	info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
   2424
   2425	/*
   2426	 * If context is a parent, we need to register a process descriptor
   2427	 * describing a work queue and register all child contexts.
   2428	 */
   2429	if (intel_context_is_parent(ce)) {
   2430		struct guc_sched_wq_desc *wq_desc;
   2431		u64 wq_desc_offset, wq_base_offset;
   2432
   2433		ce->parallel.guc.wqi_tail = 0;
   2434		ce->parallel.guc.wqi_head = 0;
   2435
   2436		wq_desc_offset = i915_ggtt_offset(ce->state) +
   2437				 __get_parent_scratch_offset(ce);
   2438		wq_base_offset = i915_ggtt_offset(ce->state) +
   2439				 __get_wq_offset(ce);
   2440		info->wq_desc_lo = lower_32_bits(wq_desc_offset);
   2441		info->wq_desc_hi = upper_32_bits(wq_desc_offset);
   2442		info->wq_base_lo = lower_32_bits(wq_base_offset);
   2443		info->wq_base_hi = upper_32_bits(wq_base_offset);
   2444		info->wq_size = WQ_SIZE;
   2445
   2446		wq_desc = __get_wq_desc(ce);
   2447		memset(wq_desc, 0, sizeof(*wq_desc));
   2448		wq_desc->wq_status = WQ_STATUS_ACTIVE;
   2449
   2450		clear_children_join_go_memory(ce);
   2451	}
   2452}
   2453
   2454static int try_context_registration(struct intel_context *ce, bool loop)
   2455{
   2456	struct intel_engine_cs *engine = ce->engine;
   2457	struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
   2458	struct intel_guc *guc = &engine->gt->uc.guc;
   2459	intel_wakeref_t wakeref;
   2460	u32 ctx_id = ce->guc_id.id;
   2461	bool context_registered;
   2462	int ret = 0;
   2463
   2464	GEM_BUG_ON(!sched_state_is_init(ce));
   2465
   2466	context_registered = ctx_id_mapped(guc, ctx_id);
   2467
   2468	clr_ctx_id_mapping(guc, ctx_id);
   2469	set_ctx_id_mapping(guc, ctx_id, ce);
   2470
   2471	/*
   2472	 * The context_lookup xarray is used to determine if the hardware
   2473	 * context is currently registered. There are two cases in which it
   2474	 * could be registered either the guc_id has been stolen from another
   2475	 * context or the lrc descriptor address of this context has changed. In
   2476	 * either case the context needs to be deregistered with the GuC before
   2477	 * registering this context.
   2478	 */
   2479	if (context_registered) {
   2480		bool disabled;
   2481		unsigned long flags;
   2482
   2483		trace_intel_context_steal_guc_id(ce);
   2484		GEM_BUG_ON(!loop);
   2485
   2486		/* Seal race with Reset */
   2487		spin_lock_irqsave(&ce->guc_state.lock, flags);
   2488		disabled = submission_disabled(guc);
   2489		if (likely(!disabled)) {
   2490			set_context_wait_for_deregister_to_register(ce);
   2491			intel_context_get(ce);
   2492		}
   2493		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2494		if (unlikely(disabled)) {
   2495			clr_ctx_id_mapping(guc, ctx_id);
   2496			return 0;	/* Will get registered later */
   2497		}
   2498
   2499		/*
   2500		 * If stealing the guc_id, this ce has the same guc_id as the
   2501		 * context whose guc_id was stolen.
   2502		 */
   2503		with_intel_runtime_pm(runtime_pm, wakeref)
   2504			ret = deregister_context(ce, ce->guc_id.id);
   2505		if (unlikely(ret == -ENODEV))
   2506			ret = 0;	/* Will get registered later */
   2507	} else {
   2508		with_intel_runtime_pm(runtime_pm, wakeref)
   2509			ret = register_context(ce, loop);
   2510		if (unlikely(ret == -EBUSY)) {
   2511			clr_ctx_id_mapping(guc, ctx_id);
   2512		} else if (unlikely(ret == -ENODEV)) {
   2513			clr_ctx_id_mapping(guc, ctx_id);
   2514			ret = 0;	/* Will get registered later */
   2515		}
   2516	}
   2517
   2518	return ret;
   2519}
   2520
   2521static int __guc_context_pre_pin(struct intel_context *ce,
   2522				 struct intel_engine_cs *engine,
   2523				 struct i915_gem_ww_ctx *ww,
   2524				 void **vaddr)
   2525{
   2526	return lrc_pre_pin(ce, engine, ww, vaddr);
   2527}
   2528
   2529static int __guc_context_pin(struct intel_context *ce,
   2530			     struct intel_engine_cs *engine,
   2531			     void *vaddr)
   2532{
   2533	if (i915_ggtt_offset(ce->state) !=
   2534	    (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
   2535		set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
   2536
   2537	/*
   2538	 * GuC context gets pinned in guc_request_alloc. See that function for
   2539	 * explaination of why.
   2540	 */
   2541
   2542	return lrc_pin(ce, engine, vaddr);
   2543}
   2544
   2545static int guc_context_pre_pin(struct intel_context *ce,
   2546			       struct i915_gem_ww_ctx *ww,
   2547			       void **vaddr)
   2548{
   2549	return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
   2550}
   2551
   2552static int guc_context_pin(struct intel_context *ce, void *vaddr)
   2553{
   2554	int ret = __guc_context_pin(ce, ce->engine, vaddr);
   2555
   2556	if (likely(!ret && !intel_context_is_barrier(ce)))
   2557		intel_engine_pm_get(ce->engine);
   2558
   2559	return ret;
   2560}
   2561
   2562static void guc_context_unpin(struct intel_context *ce)
   2563{
   2564	struct intel_guc *guc = ce_to_guc(ce);
   2565
   2566	unpin_guc_id(guc, ce);
   2567	lrc_unpin(ce);
   2568
   2569	if (likely(!intel_context_is_barrier(ce)))
   2570		intel_engine_pm_put_async(ce->engine);
   2571}
   2572
   2573static void guc_context_post_unpin(struct intel_context *ce)
   2574{
   2575	lrc_post_unpin(ce);
   2576}
   2577
   2578static void __guc_context_sched_enable(struct intel_guc *guc,
   2579				       struct intel_context *ce)
   2580{
   2581	u32 action[] = {
   2582		INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
   2583		ce->guc_id.id,
   2584		GUC_CONTEXT_ENABLE
   2585	};
   2586
   2587	trace_intel_context_sched_enable(ce);
   2588
   2589	guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
   2590				      G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
   2591}
   2592
   2593static void __guc_context_sched_disable(struct intel_guc *guc,
   2594					struct intel_context *ce,
   2595					u16 guc_id)
   2596{
   2597	u32 action[] = {
   2598		INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
   2599		guc_id,	/* ce->guc_id.id not stable */
   2600		GUC_CONTEXT_DISABLE
   2601	};
   2602
   2603	GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
   2604
   2605	GEM_BUG_ON(intel_context_is_child(ce));
   2606	trace_intel_context_sched_disable(ce);
   2607
   2608	guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
   2609				      G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
   2610}
   2611
   2612static void guc_blocked_fence_complete(struct intel_context *ce)
   2613{
   2614	lockdep_assert_held(&ce->guc_state.lock);
   2615
   2616	if (!i915_sw_fence_done(&ce->guc_state.blocked))
   2617		i915_sw_fence_complete(&ce->guc_state.blocked);
   2618}
   2619
   2620static void guc_blocked_fence_reinit(struct intel_context *ce)
   2621{
   2622	lockdep_assert_held(&ce->guc_state.lock);
   2623	GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
   2624
   2625	/*
   2626	 * This fence is always complete unless a pending schedule disable is
   2627	 * outstanding. We arm the fence here and complete it when we receive
   2628	 * the pending schedule disable complete message.
   2629	 */
   2630	i915_sw_fence_fini(&ce->guc_state.blocked);
   2631	i915_sw_fence_reinit(&ce->guc_state.blocked);
   2632	i915_sw_fence_await(&ce->guc_state.blocked);
   2633	i915_sw_fence_commit(&ce->guc_state.blocked);
   2634}
   2635
   2636static u16 prep_context_pending_disable(struct intel_context *ce)
   2637{
   2638	lockdep_assert_held(&ce->guc_state.lock);
   2639
   2640	set_context_pending_disable(ce);
   2641	clr_context_enabled(ce);
   2642	guc_blocked_fence_reinit(ce);
   2643	intel_context_get(ce);
   2644
   2645	return ce->guc_id.id;
   2646}
   2647
   2648static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
   2649{
   2650	struct intel_guc *guc = ce_to_guc(ce);
   2651	unsigned long flags;
   2652	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
   2653	intel_wakeref_t wakeref;
   2654	u16 guc_id;
   2655	bool enabled;
   2656
   2657	GEM_BUG_ON(intel_context_is_child(ce));
   2658
   2659	spin_lock_irqsave(&ce->guc_state.lock, flags);
   2660
   2661	incr_context_blocked(ce);
   2662
   2663	enabled = context_enabled(ce);
   2664	if (unlikely(!enabled || submission_disabled(guc))) {
   2665		if (enabled)
   2666			clr_context_enabled(ce);
   2667		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2668		return &ce->guc_state.blocked;
   2669	}
   2670
   2671	/*
   2672	 * We add +2 here as the schedule disable complete CTB handler calls
   2673	 * intel_context_sched_disable_unpin (-2 to pin_count).
   2674	 */
   2675	atomic_add(2, &ce->pin_count);
   2676
   2677	guc_id = prep_context_pending_disable(ce);
   2678
   2679	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2680
   2681	with_intel_runtime_pm(runtime_pm, wakeref)
   2682		__guc_context_sched_disable(guc, ce, guc_id);
   2683
   2684	return &ce->guc_state.blocked;
   2685}
   2686
   2687#define SCHED_STATE_MULTI_BLOCKED_MASK \
   2688	(SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
   2689#define SCHED_STATE_NO_UNBLOCK \
   2690	(SCHED_STATE_MULTI_BLOCKED_MASK | \
   2691	 SCHED_STATE_PENDING_DISABLE | \
   2692	 SCHED_STATE_BANNED)
   2693
   2694static bool context_cant_unblock(struct intel_context *ce)
   2695{
   2696	lockdep_assert_held(&ce->guc_state.lock);
   2697
   2698	return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
   2699		context_guc_id_invalid(ce) ||
   2700		!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
   2701		!intel_context_is_pinned(ce);
   2702}
   2703
   2704static void guc_context_unblock(struct intel_context *ce)
   2705{
   2706	struct intel_guc *guc = ce_to_guc(ce);
   2707	unsigned long flags;
   2708	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
   2709	intel_wakeref_t wakeref;
   2710	bool enable;
   2711
   2712	GEM_BUG_ON(context_enabled(ce));
   2713	GEM_BUG_ON(intel_context_is_child(ce));
   2714
   2715	spin_lock_irqsave(&ce->guc_state.lock, flags);
   2716
   2717	if (unlikely(submission_disabled(guc) ||
   2718		     context_cant_unblock(ce))) {
   2719		enable = false;
   2720	} else {
   2721		enable = true;
   2722		set_context_pending_enable(ce);
   2723		set_context_enabled(ce);
   2724		intel_context_get(ce);
   2725	}
   2726
   2727	decr_context_blocked(ce);
   2728
   2729	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2730
   2731	if (enable) {
   2732		with_intel_runtime_pm(runtime_pm, wakeref)
   2733			__guc_context_sched_enable(guc, ce);
   2734	}
   2735}
   2736
   2737static void guc_context_cancel_request(struct intel_context *ce,
   2738				       struct i915_request *rq)
   2739{
   2740	struct intel_context *block_context =
   2741		request_to_scheduling_context(rq);
   2742
   2743	if (i915_sw_fence_signaled(&rq->submit)) {
   2744		struct i915_sw_fence *fence;
   2745
   2746		intel_context_get(ce);
   2747		fence = guc_context_block(block_context);
   2748		i915_sw_fence_wait(fence);
   2749		if (!i915_request_completed(rq)) {
   2750			__i915_request_skip(rq);
   2751			guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
   2752					true);
   2753		}
   2754
   2755		guc_context_unblock(block_context);
   2756		intel_context_put(ce);
   2757	}
   2758}
   2759
   2760static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
   2761						 u16 guc_id,
   2762						 u32 preemption_timeout)
   2763{
   2764	struct context_policy policy;
   2765
   2766	__guc_context_policy_start_klv(&policy, guc_id);
   2767	__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
   2768	__guc_context_set_context_policies(guc, &policy, true);
   2769}
   2770
   2771static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
   2772{
   2773	struct intel_guc *guc = ce_to_guc(ce);
   2774	struct intel_runtime_pm *runtime_pm =
   2775		&ce->engine->gt->i915->runtime_pm;
   2776	intel_wakeref_t wakeref;
   2777	unsigned long flags;
   2778
   2779	GEM_BUG_ON(intel_context_is_child(ce));
   2780
   2781	guc_flush_submissions(guc);
   2782
   2783	spin_lock_irqsave(&ce->guc_state.lock, flags);
   2784	set_context_banned(ce);
   2785
   2786	if (submission_disabled(guc) ||
   2787	    (!context_enabled(ce) && !context_pending_disable(ce))) {
   2788		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2789
   2790		guc_cancel_context_requests(ce);
   2791		intel_engine_signal_breadcrumbs(ce->engine);
   2792	} else if (!context_pending_disable(ce)) {
   2793		u16 guc_id;
   2794
   2795		/*
   2796		 * We add +2 here as the schedule disable complete CTB handler
   2797		 * calls intel_context_sched_disable_unpin (-2 to pin_count).
   2798		 */
   2799		atomic_add(2, &ce->pin_count);
   2800
   2801		guc_id = prep_context_pending_disable(ce);
   2802		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2803
   2804		/*
   2805		 * In addition to disabling scheduling, set the preemption
   2806		 * timeout to the minimum value (1 us) so the banned context
   2807		 * gets kicked off the HW ASAP.
   2808		 */
   2809		with_intel_runtime_pm(runtime_pm, wakeref) {
   2810			__guc_context_set_preemption_timeout(guc, guc_id, 1);
   2811			__guc_context_sched_disable(guc, ce, guc_id);
   2812		}
   2813	} else {
   2814		if (!context_guc_id_invalid(ce))
   2815			with_intel_runtime_pm(runtime_pm, wakeref)
   2816				__guc_context_set_preemption_timeout(guc,
   2817								     ce->guc_id.id,
   2818								     1);
   2819		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2820	}
   2821}
   2822
   2823static void guc_context_sched_disable(struct intel_context *ce)
   2824{
   2825	struct intel_guc *guc = ce_to_guc(ce);
   2826	unsigned long flags;
   2827	struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
   2828	intel_wakeref_t wakeref;
   2829	u16 guc_id;
   2830
   2831	GEM_BUG_ON(intel_context_is_child(ce));
   2832
   2833	spin_lock_irqsave(&ce->guc_state.lock, flags);
   2834
   2835	/*
   2836	 * We have to check if the context has been disabled by another thread,
   2837	 * check if submssion has been disabled to seal a race with reset and
   2838	 * finally check if any more requests have been committed to the
   2839	 * context ensursing that a request doesn't slip through the
   2840	 * 'context_pending_disable' fence.
   2841	 */
   2842	if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
   2843		     context_has_committed_requests(ce))) {
   2844		clr_context_enabled(ce);
   2845		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2846		goto unpin;
   2847	}
   2848	guc_id = prep_context_pending_disable(ce);
   2849
   2850	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2851
   2852	with_intel_runtime_pm(runtime_pm, wakeref)
   2853		__guc_context_sched_disable(guc, ce, guc_id);
   2854
   2855	return;
   2856unpin:
   2857	intel_context_sched_disable_unpin(ce);
   2858}
   2859
   2860static inline void guc_lrc_desc_unpin(struct intel_context *ce)
   2861{
   2862	struct intel_guc *guc = ce_to_guc(ce);
   2863	struct intel_gt *gt = guc_to_gt(guc);
   2864	unsigned long flags;
   2865	bool disabled;
   2866
   2867	GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
   2868	GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
   2869	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
   2870	GEM_BUG_ON(context_enabled(ce));
   2871
   2872	/* Seal race with Reset */
   2873	spin_lock_irqsave(&ce->guc_state.lock, flags);
   2874	disabled = submission_disabled(guc);
   2875	if (likely(!disabled)) {
   2876		__intel_gt_pm_get(gt);
   2877		set_context_destroyed(ce);
   2878		clr_context_registered(ce);
   2879	}
   2880	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   2881	if (unlikely(disabled)) {
   2882		release_guc_id(guc, ce);
   2883		__guc_context_destroy(ce);
   2884		return;
   2885	}
   2886
   2887	deregister_context(ce, ce->guc_id.id);
   2888}
   2889
   2890static void __guc_context_destroy(struct intel_context *ce)
   2891{
   2892	GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
   2893		   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
   2894		   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
   2895		   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
   2896	GEM_BUG_ON(ce->guc_state.number_committed_requests);
   2897
   2898	lrc_fini(ce);
   2899	intel_context_fini(ce);
   2900
   2901	if (intel_engine_is_virtual(ce->engine)) {
   2902		struct guc_virtual_engine *ve =
   2903			container_of(ce, typeof(*ve), context);
   2904
   2905		if (ve->base.breadcrumbs)
   2906			intel_breadcrumbs_put(ve->base.breadcrumbs);
   2907
   2908		kfree(ve);
   2909	} else {
   2910		intel_context_free(ce);
   2911	}
   2912}
   2913
   2914static void guc_flush_destroyed_contexts(struct intel_guc *guc)
   2915{
   2916	struct intel_context *ce;
   2917	unsigned long flags;
   2918
   2919	GEM_BUG_ON(!submission_disabled(guc) &&
   2920		   guc_submission_initialized(guc));
   2921
   2922	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
   2923		spin_lock_irqsave(&guc->submission_state.lock, flags);
   2924		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
   2925					      struct intel_context,
   2926					      destroyed_link);
   2927		if (ce)
   2928			list_del_init(&ce->destroyed_link);
   2929		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   2930
   2931		if (!ce)
   2932			break;
   2933
   2934		release_guc_id(guc, ce);
   2935		__guc_context_destroy(ce);
   2936	}
   2937}
   2938
   2939static void deregister_destroyed_contexts(struct intel_guc *guc)
   2940{
   2941	struct intel_context *ce;
   2942	unsigned long flags;
   2943
   2944	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
   2945		spin_lock_irqsave(&guc->submission_state.lock, flags);
   2946		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
   2947					      struct intel_context,
   2948					      destroyed_link);
   2949		if (ce)
   2950			list_del_init(&ce->destroyed_link);
   2951		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   2952
   2953		if (!ce)
   2954			break;
   2955
   2956		guc_lrc_desc_unpin(ce);
   2957	}
   2958}
   2959
   2960static void destroyed_worker_func(struct work_struct *w)
   2961{
   2962	struct intel_guc *guc = container_of(w, struct intel_guc,
   2963					     submission_state.destroyed_worker);
   2964	struct intel_gt *gt = guc_to_gt(guc);
   2965	int tmp;
   2966
   2967	with_intel_gt_pm(gt, tmp)
   2968		deregister_destroyed_contexts(guc);
   2969}
   2970
   2971static void guc_context_destroy(struct kref *kref)
   2972{
   2973	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
   2974	struct intel_guc *guc = ce_to_guc(ce);
   2975	unsigned long flags;
   2976	bool destroy;
   2977
   2978	/*
   2979	 * If the guc_id is invalid this context has been stolen and we can free
   2980	 * it immediately. Also can be freed immediately if the context is not
   2981	 * registered with the GuC or the GuC is in the middle of a reset.
   2982	 */
   2983	spin_lock_irqsave(&guc->submission_state.lock, flags);
   2984	destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
   2985		!ctx_id_mapped(guc, ce->guc_id.id);
   2986	if (likely(!destroy)) {
   2987		if (!list_empty(&ce->guc_id.link))
   2988			list_del_init(&ce->guc_id.link);
   2989		list_add_tail(&ce->destroyed_link,
   2990			      &guc->submission_state.destroyed_contexts);
   2991	} else {
   2992		__release_guc_id(guc, ce);
   2993	}
   2994	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   2995	if (unlikely(destroy)) {
   2996		__guc_context_destroy(ce);
   2997		return;
   2998	}
   2999
   3000	/*
   3001	 * We use a worker to issue the H2G to deregister the context as we can
   3002	 * take the GT PM for the first time which isn't allowed from an atomic
   3003	 * context.
   3004	 */
   3005	queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
   3006}
   3007
   3008static int guc_context_alloc(struct intel_context *ce)
   3009{
   3010	return lrc_alloc(ce, ce->engine);
   3011}
   3012
   3013static void __guc_context_set_prio(struct intel_guc *guc,
   3014				   struct intel_context *ce)
   3015{
   3016	struct context_policy policy;
   3017
   3018	__guc_context_policy_start_klv(&policy, ce->guc_id.id);
   3019	__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
   3020	__guc_context_set_context_policies(guc, &policy, true);
   3021}
   3022
   3023static void guc_context_set_prio(struct intel_guc *guc,
   3024				 struct intel_context *ce,
   3025				 u8 prio)
   3026{
   3027	GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
   3028		   prio > GUC_CLIENT_PRIORITY_NORMAL);
   3029	lockdep_assert_held(&ce->guc_state.lock);
   3030
   3031	if (ce->guc_state.prio == prio || submission_disabled(guc) ||
   3032	    !context_registered(ce)) {
   3033		ce->guc_state.prio = prio;
   3034		return;
   3035	}
   3036
   3037	ce->guc_state.prio = prio;
   3038	__guc_context_set_prio(guc, ce);
   3039
   3040	trace_intel_context_set_prio(ce);
   3041}
   3042
   3043static inline u8 map_i915_prio_to_guc_prio(int prio)
   3044{
   3045	if (prio == I915_PRIORITY_NORMAL)
   3046		return GUC_CLIENT_PRIORITY_KMD_NORMAL;
   3047	else if (prio < I915_PRIORITY_NORMAL)
   3048		return GUC_CLIENT_PRIORITY_NORMAL;
   3049	else if (prio < I915_PRIORITY_DISPLAY)
   3050		return GUC_CLIENT_PRIORITY_HIGH;
   3051	else
   3052		return GUC_CLIENT_PRIORITY_KMD_HIGH;
   3053}
   3054
   3055static inline void add_context_inflight_prio(struct intel_context *ce,
   3056					     u8 guc_prio)
   3057{
   3058	lockdep_assert_held(&ce->guc_state.lock);
   3059	GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
   3060
   3061	++ce->guc_state.prio_count[guc_prio];
   3062
   3063	/* Overflow protection */
   3064	GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
   3065}
   3066
   3067static inline void sub_context_inflight_prio(struct intel_context *ce,
   3068					     u8 guc_prio)
   3069{
   3070	lockdep_assert_held(&ce->guc_state.lock);
   3071	GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
   3072
   3073	/* Underflow protection */
   3074	GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
   3075
   3076	--ce->guc_state.prio_count[guc_prio];
   3077}
   3078
   3079static inline void update_context_prio(struct intel_context *ce)
   3080{
   3081	struct intel_guc *guc = &ce->engine->gt->uc.guc;
   3082	int i;
   3083
   3084	BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
   3085	BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
   3086
   3087	lockdep_assert_held(&ce->guc_state.lock);
   3088
   3089	for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
   3090		if (ce->guc_state.prio_count[i]) {
   3091			guc_context_set_prio(guc, ce, i);
   3092			break;
   3093		}
   3094	}
   3095}
   3096
   3097static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
   3098{
   3099	/* Lower value is higher priority */
   3100	return new_guc_prio < old_guc_prio;
   3101}
   3102
   3103static void add_to_context(struct i915_request *rq)
   3104{
   3105	struct intel_context *ce = request_to_scheduling_context(rq);
   3106	u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
   3107
   3108	GEM_BUG_ON(intel_context_is_child(ce));
   3109	GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
   3110
   3111	spin_lock(&ce->guc_state.lock);
   3112	list_move_tail(&rq->sched.link, &ce->guc_state.requests);
   3113
   3114	if (rq->guc_prio == GUC_PRIO_INIT) {
   3115		rq->guc_prio = new_guc_prio;
   3116		add_context_inflight_prio(ce, rq->guc_prio);
   3117	} else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
   3118		sub_context_inflight_prio(ce, rq->guc_prio);
   3119		rq->guc_prio = new_guc_prio;
   3120		add_context_inflight_prio(ce, rq->guc_prio);
   3121	}
   3122	update_context_prio(ce);
   3123
   3124	spin_unlock(&ce->guc_state.lock);
   3125}
   3126
   3127static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
   3128{
   3129	lockdep_assert_held(&ce->guc_state.lock);
   3130
   3131	if (rq->guc_prio != GUC_PRIO_INIT &&
   3132	    rq->guc_prio != GUC_PRIO_FINI) {
   3133		sub_context_inflight_prio(ce, rq->guc_prio);
   3134		update_context_prio(ce);
   3135	}
   3136	rq->guc_prio = GUC_PRIO_FINI;
   3137}
   3138
   3139static void remove_from_context(struct i915_request *rq)
   3140{
   3141	struct intel_context *ce = request_to_scheduling_context(rq);
   3142
   3143	GEM_BUG_ON(intel_context_is_child(ce));
   3144
   3145	spin_lock_irq(&ce->guc_state.lock);
   3146
   3147	list_del_init(&rq->sched.link);
   3148	clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   3149
   3150	/* Prevent further __await_execution() registering a cb, then flush */
   3151	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
   3152
   3153	guc_prio_fini(rq, ce);
   3154
   3155	decr_context_committed_requests(ce);
   3156
   3157	spin_unlock_irq(&ce->guc_state.lock);
   3158
   3159	atomic_dec(&ce->guc_id.ref);
   3160	i915_request_notify_execute_cb_imm(rq);
   3161}
   3162
   3163static const struct intel_context_ops guc_context_ops = {
   3164	.alloc = guc_context_alloc,
   3165
   3166	.pre_pin = guc_context_pre_pin,
   3167	.pin = guc_context_pin,
   3168	.unpin = guc_context_unpin,
   3169	.post_unpin = guc_context_post_unpin,
   3170
   3171	.ban = guc_context_ban,
   3172
   3173	.cancel_request = guc_context_cancel_request,
   3174
   3175	.enter = intel_context_enter_engine,
   3176	.exit = intel_context_exit_engine,
   3177
   3178	.sched_disable = guc_context_sched_disable,
   3179
   3180	.reset = lrc_reset,
   3181	.destroy = guc_context_destroy,
   3182
   3183	.create_virtual = guc_create_virtual,
   3184	.create_parallel = guc_create_parallel,
   3185};
   3186
   3187static void submit_work_cb(struct irq_work *wrk)
   3188{
   3189	struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
   3190
   3191	might_lock(&rq->engine->sched_engine->lock);
   3192	i915_sw_fence_complete(&rq->submit);
   3193}
   3194
   3195static void __guc_signal_context_fence(struct intel_context *ce)
   3196{
   3197	struct i915_request *rq, *rn;
   3198
   3199	lockdep_assert_held(&ce->guc_state.lock);
   3200
   3201	if (!list_empty(&ce->guc_state.fences))
   3202		trace_intel_context_fence_release(ce);
   3203
   3204	/*
   3205	 * Use an IRQ to ensure locking order of sched_engine->lock ->
   3206	 * ce->guc_state.lock is preserved.
   3207	 */
   3208	list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
   3209				 guc_fence_link) {
   3210		list_del(&rq->guc_fence_link);
   3211		irq_work_queue(&rq->submit_work);
   3212	}
   3213
   3214	INIT_LIST_HEAD(&ce->guc_state.fences);
   3215}
   3216
   3217static void guc_signal_context_fence(struct intel_context *ce)
   3218{
   3219	unsigned long flags;
   3220
   3221	GEM_BUG_ON(intel_context_is_child(ce));
   3222
   3223	spin_lock_irqsave(&ce->guc_state.lock, flags);
   3224	clr_context_wait_for_deregister_to_register(ce);
   3225	__guc_signal_context_fence(ce);
   3226	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   3227}
   3228
   3229static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
   3230{
   3231	return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
   3232		!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
   3233		!submission_disabled(ce_to_guc(ce));
   3234}
   3235
   3236static void guc_context_init(struct intel_context *ce)
   3237{
   3238	const struct i915_gem_context *ctx;
   3239	int prio = I915_CONTEXT_DEFAULT_PRIORITY;
   3240
   3241	rcu_read_lock();
   3242	ctx = rcu_dereference(ce->gem_context);
   3243	if (ctx)
   3244		prio = ctx->sched.priority;
   3245	rcu_read_unlock();
   3246
   3247	ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
   3248	set_bit(CONTEXT_GUC_INIT, &ce->flags);
   3249}
   3250
   3251static int guc_request_alloc(struct i915_request *rq)
   3252{
   3253	struct intel_context *ce = request_to_scheduling_context(rq);
   3254	struct intel_guc *guc = ce_to_guc(ce);
   3255	unsigned long flags;
   3256	int ret;
   3257
   3258	GEM_BUG_ON(!intel_context_is_pinned(rq->context));
   3259
   3260	/*
   3261	 * Flush enough space to reduce the likelihood of waiting after
   3262	 * we start building the request - in which case we will just
   3263	 * have to repeat work.
   3264	 */
   3265	rq->reserved_space += GUC_REQUEST_SIZE;
   3266
   3267	/*
   3268	 * Note that after this point, we have committed to using
   3269	 * this request as it is being used to both track the
   3270	 * state of engine initialisation and liveness of the
   3271	 * golden renderstate above. Think twice before you try
   3272	 * to cancel/unwind this request now.
   3273	 */
   3274
   3275	/* Unconditionally invalidate GPU caches and TLBs. */
   3276	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
   3277	if (ret)
   3278		return ret;
   3279
   3280	rq->reserved_space -= GUC_REQUEST_SIZE;
   3281
   3282	if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
   3283		guc_context_init(ce);
   3284
   3285	/*
   3286	 * Call pin_guc_id here rather than in the pinning step as with
   3287	 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
   3288	 * guc_id and creating horrible race conditions. This is especially bad
   3289	 * when guc_id are being stolen due to over subscription. By the time
   3290	 * this function is reached, it is guaranteed that the guc_id will be
   3291	 * persistent until the generated request is retired. Thus, sealing these
   3292	 * race conditions. It is still safe to fail here if guc_id are
   3293	 * exhausted and return -EAGAIN to the user indicating that they can try
   3294	 * again in the future.
   3295	 *
   3296	 * There is no need for a lock here as the timeline mutex ensures at
   3297	 * most one context can be executing this code path at once. The
   3298	 * guc_id_ref is incremented once for every request in flight and
   3299	 * decremented on each retire. When it is zero, a lock around the
   3300	 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
   3301	 */
   3302	if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
   3303		goto out;
   3304
   3305	ret = pin_guc_id(guc, ce);	/* returns 1 if new guc_id assigned */
   3306	if (unlikely(ret < 0))
   3307		return ret;
   3308	if (context_needs_register(ce, !!ret)) {
   3309		ret = try_context_registration(ce, true);
   3310		if (unlikely(ret)) {	/* unwind */
   3311			if (ret == -EPIPE) {
   3312				disable_submission(guc);
   3313				goto out;	/* GPU will be reset */
   3314			}
   3315			atomic_dec(&ce->guc_id.ref);
   3316			unpin_guc_id(guc, ce);
   3317			return ret;
   3318		}
   3319	}
   3320
   3321	clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
   3322
   3323out:
   3324	/*
   3325	 * We block all requests on this context if a G2H is pending for a
   3326	 * schedule disable or context deregistration as the GuC will fail a
   3327	 * schedule enable or context registration if either G2H is pending
   3328	 * respectfully. Once a G2H returns, the fence is released that is
   3329	 * blocking these requests (see guc_signal_context_fence).
   3330	 */
   3331	spin_lock_irqsave(&ce->guc_state.lock, flags);
   3332	if (context_wait_for_deregister_to_register(ce) ||
   3333	    context_pending_disable(ce)) {
   3334		init_irq_work(&rq->submit_work, submit_work_cb);
   3335		i915_sw_fence_await(&rq->submit);
   3336
   3337		list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
   3338	}
   3339	incr_context_committed_requests(ce);
   3340	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   3341
   3342	return 0;
   3343}
   3344
   3345static int guc_virtual_context_pre_pin(struct intel_context *ce,
   3346				       struct i915_gem_ww_ctx *ww,
   3347				       void **vaddr)
   3348{
   3349	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
   3350
   3351	return __guc_context_pre_pin(ce, engine, ww, vaddr);
   3352}
   3353
   3354static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
   3355{
   3356	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
   3357	int ret = __guc_context_pin(ce, engine, vaddr);
   3358	intel_engine_mask_t tmp, mask = ce->engine->mask;
   3359
   3360	if (likely(!ret))
   3361		for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
   3362			intel_engine_pm_get(engine);
   3363
   3364	return ret;
   3365}
   3366
   3367static void guc_virtual_context_unpin(struct intel_context *ce)
   3368{
   3369	intel_engine_mask_t tmp, mask = ce->engine->mask;
   3370	struct intel_engine_cs *engine;
   3371	struct intel_guc *guc = ce_to_guc(ce);
   3372
   3373	GEM_BUG_ON(context_enabled(ce));
   3374	GEM_BUG_ON(intel_context_is_barrier(ce));
   3375
   3376	unpin_guc_id(guc, ce);
   3377	lrc_unpin(ce);
   3378
   3379	for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
   3380		intel_engine_pm_put_async(engine);
   3381}
   3382
   3383static void guc_virtual_context_enter(struct intel_context *ce)
   3384{
   3385	intel_engine_mask_t tmp, mask = ce->engine->mask;
   3386	struct intel_engine_cs *engine;
   3387
   3388	for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
   3389		intel_engine_pm_get(engine);
   3390
   3391	intel_timeline_enter(ce->timeline);
   3392}
   3393
   3394static void guc_virtual_context_exit(struct intel_context *ce)
   3395{
   3396	intel_engine_mask_t tmp, mask = ce->engine->mask;
   3397	struct intel_engine_cs *engine;
   3398
   3399	for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
   3400		intel_engine_pm_put(engine);
   3401
   3402	intel_timeline_exit(ce->timeline);
   3403}
   3404
   3405static int guc_virtual_context_alloc(struct intel_context *ce)
   3406{
   3407	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
   3408
   3409	return lrc_alloc(ce, engine);
   3410}
   3411
   3412static const struct intel_context_ops virtual_guc_context_ops = {
   3413	.alloc = guc_virtual_context_alloc,
   3414
   3415	.pre_pin = guc_virtual_context_pre_pin,
   3416	.pin = guc_virtual_context_pin,
   3417	.unpin = guc_virtual_context_unpin,
   3418	.post_unpin = guc_context_post_unpin,
   3419
   3420	.ban = guc_context_ban,
   3421
   3422	.cancel_request = guc_context_cancel_request,
   3423
   3424	.enter = guc_virtual_context_enter,
   3425	.exit = guc_virtual_context_exit,
   3426
   3427	.sched_disable = guc_context_sched_disable,
   3428
   3429	.destroy = guc_context_destroy,
   3430
   3431	.get_sibling = guc_virtual_get_sibling,
   3432};
   3433
   3434static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
   3435{
   3436	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
   3437	struct intel_guc *guc = ce_to_guc(ce);
   3438	int ret;
   3439
   3440	GEM_BUG_ON(!intel_context_is_parent(ce));
   3441	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
   3442
   3443	ret = pin_guc_id(guc, ce);
   3444	if (unlikely(ret < 0))
   3445		return ret;
   3446
   3447	return __guc_context_pin(ce, engine, vaddr);
   3448}
   3449
   3450static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
   3451{
   3452	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
   3453
   3454	GEM_BUG_ON(!intel_context_is_child(ce));
   3455	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
   3456
   3457	__intel_context_pin(ce->parallel.parent);
   3458	return __guc_context_pin(ce, engine, vaddr);
   3459}
   3460
   3461static void guc_parent_context_unpin(struct intel_context *ce)
   3462{
   3463	struct intel_guc *guc = ce_to_guc(ce);
   3464
   3465	GEM_BUG_ON(context_enabled(ce));
   3466	GEM_BUG_ON(intel_context_is_barrier(ce));
   3467	GEM_BUG_ON(!intel_context_is_parent(ce));
   3468	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
   3469
   3470	unpin_guc_id(guc, ce);
   3471	lrc_unpin(ce);
   3472}
   3473
   3474static void guc_child_context_unpin(struct intel_context *ce)
   3475{
   3476	GEM_BUG_ON(context_enabled(ce));
   3477	GEM_BUG_ON(intel_context_is_barrier(ce));
   3478	GEM_BUG_ON(!intel_context_is_child(ce));
   3479	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
   3480
   3481	lrc_unpin(ce);
   3482}
   3483
   3484static void guc_child_context_post_unpin(struct intel_context *ce)
   3485{
   3486	GEM_BUG_ON(!intel_context_is_child(ce));
   3487	GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
   3488	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
   3489
   3490	lrc_post_unpin(ce);
   3491	intel_context_unpin(ce->parallel.parent);
   3492}
   3493
   3494static void guc_child_context_destroy(struct kref *kref)
   3495{
   3496	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
   3497
   3498	__guc_context_destroy(ce);
   3499}
   3500
   3501static const struct intel_context_ops virtual_parent_context_ops = {
   3502	.alloc = guc_virtual_context_alloc,
   3503
   3504	.pre_pin = guc_context_pre_pin,
   3505	.pin = guc_parent_context_pin,
   3506	.unpin = guc_parent_context_unpin,
   3507	.post_unpin = guc_context_post_unpin,
   3508
   3509	.ban = guc_context_ban,
   3510
   3511	.cancel_request = guc_context_cancel_request,
   3512
   3513	.enter = guc_virtual_context_enter,
   3514	.exit = guc_virtual_context_exit,
   3515
   3516	.sched_disable = guc_context_sched_disable,
   3517
   3518	.destroy = guc_context_destroy,
   3519
   3520	.get_sibling = guc_virtual_get_sibling,
   3521};
   3522
   3523static const struct intel_context_ops virtual_child_context_ops = {
   3524	.alloc = guc_virtual_context_alloc,
   3525
   3526	.pre_pin = guc_context_pre_pin,
   3527	.pin = guc_child_context_pin,
   3528	.unpin = guc_child_context_unpin,
   3529	.post_unpin = guc_child_context_post_unpin,
   3530
   3531	.cancel_request = guc_context_cancel_request,
   3532
   3533	.enter = guc_virtual_context_enter,
   3534	.exit = guc_virtual_context_exit,
   3535
   3536	.destroy = guc_child_context_destroy,
   3537
   3538	.get_sibling = guc_virtual_get_sibling,
   3539};
   3540
   3541/*
   3542 * The below override of the breadcrumbs is enabled when the user configures a
   3543 * context for parallel submission (multi-lrc, parent-child).
   3544 *
   3545 * The overridden breadcrumbs implements an algorithm which allows the GuC to
   3546 * safely preempt all the hw contexts configured for parallel submission
   3547 * between each BB. The contract between the i915 and GuC is if the parent
   3548 * context can be preempted, all the children can be preempted, and the GuC will
   3549 * always try to preempt the parent before the children. A handshake between the
   3550 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
   3551 * creating a window to preempt between each set of BBs.
   3552 */
   3553static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
   3554						     u64 offset, u32 len,
   3555						     const unsigned int flags);
   3556static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
   3557						    u64 offset, u32 len,
   3558						    const unsigned int flags);
   3559static u32 *
   3560emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
   3561						 u32 *cs);
   3562static u32 *
   3563emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
   3564						u32 *cs);
   3565
   3566static struct intel_context *
   3567guc_create_parallel(struct intel_engine_cs **engines,
   3568		    unsigned int num_siblings,
   3569		    unsigned int width)
   3570{
   3571	struct intel_engine_cs **siblings = NULL;
   3572	struct intel_context *parent = NULL, *ce, *err;
   3573	int i, j;
   3574
   3575	siblings = kmalloc_array(num_siblings,
   3576				 sizeof(*siblings),
   3577				 GFP_KERNEL);
   3578	if (!siblings)
   3579		return ERR_PTR(-ENOMEM);
   3580
   3581	for (i = 0; i < width; ++i) {
   3582		for (j = 0; j < num_siblings; ++j)
   3583			siblings[j] = engines[i * num_siblings + j];
   3584
   3585		ce = intel_engine_create_virtual(siblings, num_siblings,
   3586						 FORCE_VIRTUAL);
   3587		if (IS_ERR(ce)) {
   3588			err = ERR_CAST(ce);
   3589			goto unwind;
   3590		}
   3591
   3592		if (i == 0) {
   3593			parent = ce;
   3594			parent->ops = &virtual_parent_context_ops;
   3595		} else {
   3596			ce->ops = &virtual_child_context_ops;
   3597			intel_context_bind_parent_child(parent, ce);
   3598		}
   3599	}
   3600
   3601	parent->parallel.fence_context = dma_fence_context_alloc(1);
   3602
   3603	parent->engine->emit_bb_start =
   3604		emit_bb_start_parent_no_preempt_mid_batch;
   3605	parent->engine->emit_fini_breadcrumb =
   3606		emit_fini_breadcrumb_parent_no_preempt_mid_batch;
   3607	parent->engine->emit_fini_breadcrumb_dw =
   3608		12 + 4 * parent->parallel.number_children;
   3609	for_each_child(parent, ce) {
   3610		ce->engine->emit_bb_start =
   3611			emit_bb_start_child_no_preempt_mid_batch;
   3612		ce->engine->emit_fini_breadcrumb =
   3613			emit_fini_breadcrumb_child_no_preempt_mid_batch;
   3614		ce->engine->emit_fini_breadcrumb_dw = 16;
   3615	}
   3616
   3617	kfree(siblings);
   3618	return parent;
   3619
   3620unwind:
   3621	if (parent)
   3622		intel_context_put(parent);
   3623	kfree(siblings);
   3624	return err;
   3625}
   3626
   3627static bool
   3628guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
   3629{
   3630	struct intel_engine_cs *sibling;
   3631	intel_engine_mask_t tmp, mask = b->engine_mask;
   3632	bool result = false;
   3633
   3634	for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
   3635		result |= intel_engine_irq_enable(sibling);
   3636
   3637	return result;
   3638}
   3639
   3640static void
   3641guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
   3642{
   3643	struct intel_engine_cs *sibling;
   3644	intel_engine_mask_t tmp, mask = b->engine_mask;
   3645
   3646	for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
   3647		intel_engine_irq_disable(sibling);
   3648}
   3649
   3650static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
   3651{
   3652	int i;
   3653
   3654	/*
   3655	 * In GuC submission mode we do not know which physical engine a request
   3656	 * will be scheduled on, this creates a problem because the breadcrumb
   3657	 * interrupt is per physical engine. To work around this we attach
   3658	 * requests and direct all breadcrumb interrupts to the first instance
   3659	 * of an engine per class. In addition all breadcrumb interrupts are
   3660	 * enabled / disabled across an engine class in unison.
   3661	 */
   3662	for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
   3663		struct intel_engine_cs *sibling =
   3664			engine->gt->engine_class[engine->class][i];
   3665
   3666		if (sibling) {
   3667			if (engine->breadcrumbs != sibling->breadcrumbs) {
   3668				intel_breadcrumbs_put(engine->breadcrumbs);
   3669				engine->breadcrumbs =
   3670					intel_breadcrumbs_get(sibling->breadcrumbs);
   3671			}
   3672			break;
   3673		}
   3674	}
   3675
   3676	if (engine->breadcrumbs) {
   3677		engine->breadcrumbs->engine_mask |= engine->mask;
   3678		engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
   3679		engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
   3680	}
   3681}
   3682
   3683static void guc_bump_inflight_request_prio(struct i915_request *rq,
   3684					   int prio)
   3685{
   3686	struct intel_context *ce = request_to_scheduling_context(rq);
   3687	u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
   3688
   3689	/* Short circuit function */
   3690	if (prio < I915_PRIORITY_NORMAL ||
   3691	    rq->guc_prio == GUC_PRIO_FINI ||
   3692	    (rq->guc_prio != GUC_PRIO_INIT &&
   3693	     !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
   3694		return;
   3695
   3696	spin_lock(&ce->guc_state.lock);
   3697	if (rq->guc_prio != GUC_PRIO_FINI) {
   3698		if (rq->guc_prio != GUC_PRIO_INIT)
   3699			sub_context_inflight_prio(ce, rq->guc_prio);
   3700		rq->guc_prio = new_guc_prio;
   3701		add_context_inflight_prio(ce, rq->guc_prio);
   3702		update_context_prio(ce);
   3703	}
   3704	spin_unlock(&ce->guc_state.lock);
   3705}
   3706
   3707static void guc_retire_inflight_request_prio(struct i915_request *rq)
   3708{
   3709	struct intel_context *ce = request_to_scheduling_context(rq);
   3710
   3711	spin_lock(&ce->guc_state.lock);
   3712	guc_prio_fini(rq, ce);
   3713	spin_unlock(&ce->guc_state.lock);
   3714}
   3715
   3716static void sanitize_hwsp(struct intel_engine_cs *engine)
   3717{
   3718	struct intel_timeline *tl;
   3719
   3720	list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
   3721		intel_timeline_reset_seqno(tl);
   3722}
   3723
   3724static void guc_sanitize(struct intel_engine_cs *engine)
   3725{
   3726	/*
   3727	 * Poison residual state on resume, in case the suspend didn't!
   3728	 *
   3729	 * We have to assume that across suspend/resume (or other loss
   3730	 * of control) that the contents of our pinned buffers has been
   3731	 * lost, replaced by garbage. Since this doesn't always happen,
   3732	 * let's poison such state so that we more quickly spot when
   3733	 * we falsely assume it has been preserved.
   3734	 */
   3735	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   3736		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
   3737
   3738	/*
   3739	 * The kernel_context HWSP is stored in the status_page. As above,
   3740	 * that may be lost on resume/initialisation, and so we need to
   3741	 * reset the value in the HWSP.
   3742	 */
   3743	sanitize_hwsp(engine);
   3744
   3745	/* And scrub the dirty cachelines for the HWSP */
   3746	drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
   3747
   3748	intel_engine_reset_pinned_contexts(engine);
   3749}
   3750
   3751static void setup_hwsp(struct intel_engine_cs *engine)
   3752{
   3753	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
   3754
   3755	ENGINE_WRITE_FW(engine,
   3756			RING_HWS_PGA,
   3757			i915_ggtt_offset(engine->status_page.vma));
   3758}
   3759
   3760static void start_engine(struct intel_engine_cs *engine)
   3761{
   3762	ENGINE_WRITE_FW(engine,
   3763			RING_MODE_GEN7,
   3764			_MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
   3765
   3766	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
   3767	ENGINE_POSTING_READ(engine, RING_MI_MODE);
   3768}
   3769
   3770static int guc_resume(struct intel_engine_cs *engine)
   3771{
   3772	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
   3773
   3774	intel_mocs_init_engine(engine);
   3775
   3776	intel_breadcrumbs_reset(engine->breadcrumbs);
   3777
   3778	setup_hwsp(engine);
   3779	start_engine(engine);
   3780
   3781	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
   3782		xehp_enable_ccs_engines(engine);
   3783
   3784	return 0;
   3785}
   3786
   3787static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
   3788{
   3789	return !sched_engine->tasklet.callback;
   3790}
   3791
   3792static void guc_set_default_submission(struct intel_engine_cs *engine)
   3793{
   3794	engine->submit_request = guc_submit_request;
   3795}
   3796
   3797static inline void guc_kernel_context_pin(struct intel_guc *guc,
   3798					  struct intel_context *ce)
   3799{
   3800	/*
   3801	 * Note: we purposefully do not check the returns below because
   3802	 * the registration can only fail if a reset is just starting.
   3803	 * This is called at the end of reset so presumably another reset
   3804	 * isn't happening and even it did this code would be run again.
   3805	 */
   3806
   3807	if (context_guc_id_invalid(ce))
   3808		pin_guc_id(guc, ce);
   3809
   3810	try_context_registration(ce, true);
   3811}
   3812
   3813static inline void guc_init_lrc_mapping(struct intel_guc *guc)
   3814{
   3815	struct intel_gt *gt = guc_to_gt(guc);
   3816	struct intel_engine_cs *engine;
   3817	enum intel_engine_id id;
   3818
   3819	/* make sure all descriptors are clean... */
   3820	xa_destroy(&guc->context_lookup);
   3821
   3822	/*
   3823	 * Some contexts might have been pinned before we enabled GuC
   3824	 * submission, so we need to add them to the GuC bookeeping.
   3825	 * Also, after a reset the of the GuC we want to make sure that the
   3826	 * information shared with GuC is properly reset. The kernel LRCs are
   3827	 * not attached to the gem_context, so they need to be added separately.
   3828	 */
   3829	for_each_engine(engine, gt, id) {
   3830		struct intel_context *ce;
   3831
   3832		list_for_each_entry(ce, &engine->pinned_contexts_list,
   3833				    pinned_contexts_link)
   3834			guc_kernel_context_pin(guc, ce);
   3835	}
   3836}
   3837
   3838static void guc_release(struct intel_engine_cs *engine)
   3839{
   3840	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
   3841
   3842	intel_engine_cleanup_common(engine);
   3843	lrc_fini_wa_ctx(engine);
   3844}
   3845
   3846static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
   3847{
   3848	struct intel_engine_cs *e;
   3849	intel_engine_mask_t tmp, mask = engine->mask;
   3850
   3851	for_each_engine_masked(e, engine->gt, mask, tmp)
   3852		e->serial++;
   3853}
   3854
   3855static void guc_default_vfuncs(struct intel_engine_cs *engine)
   3856{
   3857	/* Default vfuncs which can be overridden by each engine. */
   3858
   3859	engine->resume = guc_resume;
   3860
   3861	engine->cops = &guc_context_ops;
   3862	engine->request_alloc = guc_request_alloc;
   3863	engine->add_active_request = add_to_context;
   3864	engine->remove_active_request = remove_from_context;
   3865
   3866	engine->sched_engine->schedule = i915_schedule;
   3867
   3868	engine->reset.prepare = guc_engine_reset_prepare;
   3869	engine->reset.rewind = guc_rewind_nop;
   3870	engine->reset.cancel = guc_reset_nop;
   3871	engine->reset.finish = guc_reset_nop;
   3872
   3873	engine->emit_flush = gen8_emit_flush_xcs;
   3874	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
   3875	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
   3876	if (GRAPHICS_VER(engine->i915) >= 12) {
   3877		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
   3878		engine->emit_flush = gen12_emit_flush_xcs;
   3879	}
   3880	engine->set_default_submission = guc_set_default_submission;
   3881	engine->busyness = guc_engine_busyness;
   3882
   3883	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
   3884	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
   3885	engine->flags |= I915_ENGINE_HAS_TIMESLICES;
   3886
   3887	/* Wa_14014475959:dg2 */
   3888	if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
   3889		engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
   3890
   3891	/*
   3892	 * TODO: GuC supports timeslicing and semaphores as well, but they're
   3893	 * handled by the firmware so some minor tweaks are required before
   3894	 * enabling.
   3895	 *
   3896	 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
   3897	 */
   3898
   3899	engine->emit_bb_start = gen8_emit_bb_start;
   3900	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
   3901		engine->emit_bb_start = gen125_emit_bb_start;
   3902}
   3903
   3904static void rcs_submission_override(struct intel_engine_cs *engine)
   3905{
   3906	switch (GRAPHICS_VER(engine->i915)) {
   3907	case 12:
   3908		engine->emit_flush = gen12_emit_flush_rcs;
   3909		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
   3910		break;
   3911	case 11:
   3912		engine->emit_flush = gen11_emit_flush_rcs;
   3913		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
   3914		break;
   3915	default:
   3916		engine->emit_flush = gen8_emit_flush_rcs;
   3917		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
   3918		break;
   3919	}
   3920}
   3921
   3922static inline void guc_default_irqs(struct intel_engine_cs *engine)
   3923{
   3924	engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
   3925	intel_engine_set_irq_handler(engine, cs_irq_handler);
   3926}
   3927
   3928static void guc_sched_engine_destroy(struct kref *kref)
   3929{
   3930	struct i915_sched_engine *sched_engine =
   3931		container_of(kref, typeof(*sched_engine), ref);
   3932	struct intel_guc *guc = sched_engine->private_data;
   3933
   3934	guc->sched_engine = NULL;
   3935	tasklet_kill(&sched_engine->tasklet); /* flush the callback */
   3936	kfree(sched_engine);
   3937}
   3938
   3939int intel_guc_submission_setup(struct intel_engine_cs *engine)
   3940{
   3941	struct drm_i915_private *i915 = engine->i915;
   3942	struct intel_guc *guc = &engine->gt->uc.guc;
   3943
   3944	/*
   3945	 * The setup relies on several assumptions (e.g. irqs always enabled)
   3946	 * that are only valid on gen11+
   3947	 */
   3948	GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
   3949
   3950	if (!guc->sched_engine) {
   3951		guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
   3952		if (!guc->sched_engine)
   3953			return -ENOMEM;
   3954
   3955		guc->sched_engine->schedule = i915_schedule;
   3956		guc->sched_engine->disabled = guc_sched_engine_disabled;
   3957		guc->sched_engine->private_data = guc;
   3958		guc->sched_engine->destroy = guc_sched_engine_destroy;
   3959		guc->sched_engine->bump_inflight_request_prio =
   3960			guc_bump_inflight_request_prio;
   3961		guc->sched_engine->retire_inflight_request_prio =
   3962			guc_retire_inflight_request_prio;
   3963		tasklet_setup(&guc->sched_engine->tasklet,
   3964			      guc_submission_tasklet);
   3965	}
   3966	i915_sched_engine_put(engine->sched_engine);
   3967	engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
   3968
   3969	guc_default_vfuncs(engine);
   3970	guc_default_irqs(engine);
   3971	guc_init_breadcrumbs(engine);
   3972
   3973	if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
   3974		rcs_submission_override(engine);
   3975
   3976	lrc_init_wa_ctx(engine);
   3977
   3978	/* Finally, take ownership and responsibility for cleanup! */
   3979	engine->sanitize = guc_sanitize;
   3980	engine->release = guc_release;
   3981
   3982	return 0;
   3983}
   3984
   3985void intel_guc_submission_enable(struct intel_guc *guc)
   3986{
   3987	guc_init_lrc_mapping(guc);
   3988	guc_init_engine_stats(guc);
   3989}
   3990
   3991void intel_guc_submission_disable(struct intel_guc *guc)
   3992{
   3993	/* Note: By the time we're here, GuC may have already been reset */
   3994}
   3995
   3996static bool __guc_submission_supported(struct intel_guc *guc)
   3997{
   3998	/* GuC submission is unavailable for pre-Gen11 */
   3999	return intel_guc_is_supported(guc) &&
   4000	       GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
   4001}
   4002
   4003static bool __guc_submission_selected(struct intel_guc *guc)
   4004{
   4005	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
   4006
   4007	if (!intel_guc_submission_is_supported(guc))
   4008		return false;
   4009
   4010	return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
   4011}
   4012
   4013void intel_guc_submission_init_early(struct intel_guc *guc)
   4014{
   4015	xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
   4016
   4017	spin_lock_init(&guc->submission_state.lock);
   4018	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
   4019	ida_init(&guc->submission_state.guc_ids);
   4020	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
   4021	INIT_WORK(&guc->submission_state.destroyed_worker,
   4022		  destroyed_worker_func);
   4023	INIT_WORK(&guc->submission_state.reset_fail_worker,
   4024		  reset_fail_worker_func);
   4025
   4026	spin_lock_init(&guc->timestamp.lock);
   4027	INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
   4028
   4029	guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
   4030	guc->submission_supported = __guc_submission_supported(guc);
   4031	guc->submission_selected = __guc_submission_selected(guc);
   4032}
   4033
   4034static inline struct intel_context *
   4035g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
   4036{
   4037	struct intel_context *ce;
   4038
   4039	if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
   4040		drm_err(&guc_to_gt(guc)->i915->drm,
   4041			"Invalid ctx_id %u\n", ctx_id);
   4042		return NULL;
   4043	}
   4044
   4045	ce = __get_context(guc, ctx_id);
   4046	if (unlikely(!ce)) {
   4047		drm_err(&guc_to_gt(guc)->i915->drm,
   4048			"Context is NULL, ctx_id %u\n", ctx_id);
   4049		return NULL;
   4050	}
   4051
   4052	if (unlikely(intel_context_is_child(ce))) {
   4053		drm_err(&guc_to_gt(guc)->i915->drm,
   4054			"Context is child, ctx_id %u\n", ctx_id);
   4055		return NULL;
   4056	}
   4057
   4058	return ce;
   4059}
   4060
   4061int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
   4062					  const u32 *msg,
   4063					  u32 len)
   4064{
   4065	struct intel_context *ce;
   4066	u32 ctx_id;
   4067
   4068	if (unlikely(len < 1)) {
   4069		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
   4070		return -EPROTO;
   4071	}
   4072	ctx_id = msg[0];
   4073
   4074	ce = g2h_context_lookup(guc, ctx_id);
   4075	if (unlikely(!ce))
   4076		return -EPROTO;
   4077
   4078	trace_intel_context_deregister_done(ce);
   4079
   4080#ifdef CONFIG_DRM_I915_SELFTEST
   4081	if (unlikely(ce->drop_deregister)) {
   4082		ce->drop_deregister = false;
   4083		return 0;
   4084	}
   4085#endif
   4086
   4087	if (context_wait_for_deregister_to_register(ce)) {
   4088		struct intel_runtime_pm *runtime_pm =
   4089			&ce->engine->gt->i915->runtime_pm;
   4090		intel_wakeref_t wakeref;
   4091
   4092		/*
   4093		 * Previous owner of this guc_id has been deregistered, now safe
   4094		 * register this context.
   4095		 */
   4096		with_intel_runtime_pm(runtime_pm, wakeref)
   4097			register_context(ce, true);
   4098		guc_signal_context_fence(ce);
   4099		intel_context_put(ce);
   4100	} else if (context_destroyed(ce)) {
   4101		/* Context has been destroyed */
   4102		intel_gt_pm_put_async(guc_to_gt(guc));
   4103		release_guc_id(guc, ce);
   4104		__guc_context_destroy(ce);
   4105	}
   4106
   4107	decr_outstanding_submission_g2h(guc);
   4108
   4109	return 0;
   4110}
   4111
   4112int intel_guc_sched_done_process_msg(struct intel_guc *guc,
   4113				     const u32 *msg,
   4114				     u32 len)
   4115{
   4116	struct intel_context *ce;
   4117	unsigned long flags;
   4118	u32 ctx_id;
   4119
   4120	if (unlikely(len < 2)) {
   4121		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
   4122		return -EPROTO;
   4123	}
   4124	ctx_id = msg[0];
   4125
   4126	ce = g2h_context_lookup(guc, ctx_id);
   4127	if (unlikely(!ce))
   4128		return -EPROTO;
   4129
   4130	if (unlikely(context_destroyed(ce) ||
   4131		     (!context_pending_enable(ce) &&
   4132		     !context_pending_disable(ce)))) {
   4133		drm_err(&guc_to_gt(guc)->i915->drm,
   4134			"Bad context sched_state 0x%x, ctx_id %u\n",
   4135			ce->guc_state.sched_state, ctx_id);
   4136		return -EPROTO;
   4137	}
   4138
   4139	trace_intel_context_sched_done(ce);
   4140
   4141	if (context_pending_enable(ce)) {
   4142#ifdef CONFIG_DRM_I915_SELFTEST
   4143		if (unlikely(ce->drop_schedule_enable)) {
   4144			ce->drop_schedule_enable = false;
   4145			return 0;
   4146		}
   4147#endif
   4148
   4149		spin_lock_irqsave(&ce->guc_state.lock, flags);
   4150		clr_context_pending_enable(ce);
   4151		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   4152	} else if (context_pending_disable(ce)) {
   4153		bool banned;
   4154
   4155#ifdef CONFIG_DRM_I915_SELFTEST
   4156		if (unlikely(ce->drop_schedule_disable)) {
   4157			ce->drop_schedule_disable = false;
   4158			return 0;
   4159		}
   4160#endif
   4161
   4162		/*
   4163		 * Unpin must be done before __guc_signal_context_fence,
   4164		 * otherwise a race exists between the requests getting
   4165		 * submitted + retired before this unpin completes resulting in
   4166		 * the pin_count going to zero and the context still being
   4167		 * enabled.
   4168		 */
   4169		intel_context_sched_disable_unpin(ce);
   4170
   4171		spin_lock_irqsave(&ce->guc_state.lock, flags);
   4172		banned = context_banned(ce);
   4173		clr_context_banned(ce);
   4174		clr_context_pending_disable(ce);
   4175		__guc_signal_context_fence(ce);
   4176		guc_blocked_fence_complete(ce);
   4177		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   4178
   4179		if (banned) {
   4180			guc_cancel_context_requests(ce);
   4181			intel_engine_signal_breadcrumbs(ce->engine);
   4182		}
   4183	}
   4184
   4185	decr_outstanding_submission_g2h(guc);
   4186	intel_context_put(ce);
   4187
   4188	return 0;
   4189}
   4190
   4191static void capture_error_state(struct intel_guc *guc,
   4192				struct intel_context *ce)
   4193{
   4194	struct intel_gt *gt = guc_to_gt(guc);
   4195	struct drm_i915_private *i915 = gt->i915;
   4196	struct intel_engine_cs *engine = __context_to_physical_engine(ce);
   4197	intel_wakeref_t wakeref;
   4198
   4199	intel_engine_set_hung_context(engine, ce);
   4200	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
   4201		i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
   4202	atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
   4203}
   4204
   4205static void guc_context_replay(struct intel_context *ce)
   4206{
   4207	struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
   4208
   4209	__guc_reset_context(ce, ce->engine->mask);
   4210	tasklet_hi_schedule(&sched_engine->tasklet);
   4211}
   4212
   4213static void guc_handle_context_reset(struct intel_guc *guc,
   4214				     struct intel_context *ce)
   4215{
   4216	trace_intel_context_reset(ce);
   4217
   4218	if (likely(!intel_context_is_banned(ce))) {
   4219		capture_error_state(guc, ce);
   4220		guc_context_replay(ce);
   4221	} else {
   4222		drm_info(&guc_to_gt(guc)->i915->drm,
   4223			 "Ignoring context reset notification of banned context 0x%04X on %s",
   4224			 ce->guc_id.id, ce->engine->name);
   4225	}
   4226}
   4227
   4228int intel_guc_context_reset_process_msg(struct intel_guc *guc,
   4229					const u32 *msg, u32 len)
   4230{
   4231	struct intel_context *ce;
   4232	unsigned long flags;
   4233	int ctx_id;
   4234
   4235	if (unlikely(len != 1)) {
   4236		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
   4237		return -EPROTO;
   4238	}
   4239
   4240	ctx_id = msg[0];
   4241
   4242	/*
   4243	 * The context lookup uses the xarray but lookups only require an RCU lock
   4244	 * not the full spinlock. So take the lock explicitly and keep it until the
   4245	 * context has been reference count locked to ensure it can't be destroyed
   4246	 * asynchronously until the reset is done.
   4247	 */
   4248	xa_lock_irqsave(&guc->context_lookup, flags);
   4249	ce = g2h_context_lookup(guc, ctx_id);
   4250	if (ce)
   4251		intel_context_get(ce);
   4252	xa_unlock_irqrestore(&guc->context_lookup, flags);
   4253
   4254	if (unlikely(!ce))
   4255		return -EPROTO;
   4256
   4257	guc_handle_context_reset(guc, ce);
   4258	intel_context_put(ce);
   4259
   4260	return 0;
   4261}
   4262
   4263int intel_guc_error_capture_process_msg(struct intel_guc *guc,
   4264					const u32 *msg, u32 len)
   4265{
   4266	u32 status;
   4267
   4268	if (unlikely(len != 1)) {
   4269		drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
   4270		return -EPROTO;
   4271	}
   4272
   4273	status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
   4274	if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
   4275		drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
   4276
   4277	intel_guc_capture_process(guc);
   4278
   4279	return 0;
   4280}
   4281
   4282struct intel_engine_cs *
   4283intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
   4284{
   4285	struct intel_gt *gt = guc_to_gt(guc);
   4286	u8 engine_class = guc_class_to_engine_class(guc_class);
   4287
   4288	/* Class index is checked in class converter */
   4289	GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
   4290
   4291	return gt->engine_class[engine_class][instance];
   4292}
   4293
   4294static void reset_fail_worker_func(struct work_struct *w)
   4295{
   4296	struct intel_guc *guc = container_of(w, struct intel_guc,
   4297					     submission_state.reset_fail_worker);
   4298	struct intel_gt *gt = guc_to_gt(guc);
   4299	intel_engine_mask_t reset_fail_mask;
   4300	unsigned long flags;
   4301
   4302	spin_lock_irqsave(&guc->submission_state.lock, flags);
   4303	reset_fail_mask = guc->submission_state.reset_fail_mask;
   4304	guc->submission_state.reset_fail_mask = 0;
   4305	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   4306
   4307	if (likely(reset_fail_mask))
   4308		intel_gt_handle_error(gt, reset_fail_mask,
   4309				      I915_ERROR_CAPTURE,
   4310				      "GuC failed to reset engine mask=0x%x\n",
   4311				      reset_fail_mask);
   4312}
   4313
   4314int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
   4315					 const u32 *msg, u32 len)
   4316{
   4317	struct intel_engine_cs *engine;
   4318	struct intel_gt *gt = guc_to_gt(guc);
   4319	u8 guc_class, instance;
   4320	u32 reason;
   4321	unsigned long flags;
   4322
   4323	if (unlikely(len != 3)) {
   4324		drm_err(&gt->i915->drm, "Invalid length %u", len);
   4325		return -EPROTO;
   4326	}
   4327
   4328	guc_class = msg[0];
   4329	instance = msg[1];
   4330	reason = msg[2];
   4331
   4332	engine = intel_guc_lookup_engine(guc, guc_class, instance);
   4333	if (unlikely(!engine)) {
   4334		drm_err(&gt->i915->drm,
   4335			"Invalid engine %d:%d", guc_class, instance);
   4336		return -EPROTO;
   4337	}
   4338
   4339	/*
   4340	 * This is an unexpected failure of a hardware feature. So, log a real
   4341	 * error message not just the informational that comes with the reset.
   4342	 */
   4343	drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
   4344		guc_class, instance, engine->name, reason);
   4345
   4346	spin_lock_irqsave(&guc->submission_state.lock, flags);
   4347	guc->submission_state.reset_fail_mask |= engine->mask;
   4348	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
   4349
   4350	/*
   4351	 * A GT reset flushes this worker queue (G2H handler) so we must use
   4352	 * another worker to trigger a GT reset.
   4353	 */
   4354	queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
   4355
   4356	return 0;
   4357}
   4358
   4359void intel_guc_find_hung_context(struct intel_engine_cs *engine)
   4360{
   4361	struct intel_guc *guc = &engine->gt->uc.guc;
   4362	struct intel_context *ce;
   4363	struct i915_request *rq;
   4364	unsigned long index;
   4365	unsigned long flags;
   4366
   4367	/* Reset called during driver load? GuC not yet initialised! */
   4368	if (unlikely(!guc_submission_initialized(guc)))
   4369		return;
   4370
   4371	xa_lock_irqsave(&guc->context_lookup, flags);
   4372	xa_for_each(&guc->context_lookup, index, ce) {
   4373		if (!kref_get_unless_zero(&ce->ref))
   4374			continue;
   4375
   4376		xa_unlock(&guc->context_lookup);
   4377
   4378		if (!intel_context_is_pinned(ce))
   4379			goto next;
   4380
   4381		if (intel_engine_is_virtual(ce->engine)) {
   4382			if (!(ce->engine->mask & engine->mask))
   4383				goto next;
   4384		} else {
   4385			if (ce->engine != engine)
   4386				goto next;
   4387		}
   4388
   4389		list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
   4390			if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
   4391				continue;
   4392
   4393			intel_engine_set_hung_context(engine, ce);
   4394
   4395			/* Can only cope with one hang at a time... */
   4396			intel_context_put(ce);
   4397			xa_lock(&guc->context_lookup);
   4398			goto done;
   4399		}
   4400next:
   4401		intel_context_put(ce);
   4402		xa_lock(&guc->context_lookup);
   4403	}
   4404done:
   4405	xa_unlock_irqrestore(&guc->context_lookup, flags);
   4406}
   4407
   4408void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
   4409				    struct i915_request *hung_rq,
   4410				    struct drm_printer *m)
   4411{
   4412	struct intel_guc *guc = &engine->gt->uc.guc;
   4413	struct intel_context *ce;
   4414	unsigned long index;
   4415	unsigned long flags;
   4416
   4417	/* Reset called during driver load? GuC not yet initialised! */
   4418	if (unlikely(!guc_submission_initialized(guc)))
   4419		return;
   4420
   4421	xa_lock_irqsave(&guc->context_lookup, flags);
   4422	xa_for_each(&guc->context_lookup, index, ce) {
   4423		if (!kref_get_unless_zero(&ce->ref))
   4424			continue;
   4425
   4426		xa_unlock(&guc->context_lookup);
   4427
   4428		if (!intel_context_is_pinned(ce))
   4429			goto next;
   4430
   4431		if (intel_engine_is_virtual(ce->engine)) {
   4432			if (!(ce->engine->mask & engine->mask))
   4433				goto next;
   4434		} else {
   4435			if (ce->engine != engine)
   4436				goto next;
   4437		}
   4438
   4439		spin_lock(&ce->guc_state.lock);
   4440		intel_engine_dump_active_requests(&ce->guc_state.requests,
   4441						  hung_rq, m);
   4442		spin_unlock(&ce->guc_state.lock);
   4443
   4444next:
   4445		intel_context_put(ce);
   4446		xa_lock(&guc->context_lookup);
   4447	}
   4448	xa_unlock_irqrestore(&guc->context_lookup, flags);
   4449}
   4450
   4451void intel_guc_submission_print_info(struct intel_guc *guc,
   4452				     struct drm_printer *p)
   4453{
   4454	struct i915_sched_engine *sched_engine = guc->sched_engine;
   4455	struct rb_node *rb;
   4456	unsigned long flags;
   4457
   4458	if (!sched_engine)
   4459		return;
   4460
   4461	drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
   4462		   atomic_read(&guc->outstanding_submission_g2h));
   4463	drm_printf(p, "GuC tasklet count: %u\n\n",
   4464		   atomic_read(&sched_engine->tasklet.count));
   4465
   4466	spin_lock_irqsave(&sched_engine->lock, flags);
   4467	drm_printf(p, "Requests in GuC submit tasklet:\n");
   4468	for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
   4469		struct i915_priolist *pl = to_priolist(rb);
   4470		struct i915_request *rq;
   4471
   4472		priolist_for_each_request(rq, pl)
   4473			drm_printf(p, "guc_id=%u, seqno=%llu\n",
   4474				   rq->context->guc_id.id,
   4475				   rq->fence.seqno);
   4476	}
   4477	spin_unlock_irqrestore(&sched_engine->lock, flags);
   4478	drm_printf(p, "\n");
   4479}
   4480
   4481static inline void guc_log_context_priority(struct drm_printer *p,
   4482					    struct intel_context *ce)
   4483{
   4484	int i;
   4485
   4486	drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
   4487	drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
   4488	for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
   4489	     i < GUC_CLIENT_PRIORITY_NUM; ++i) {
   4490		drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
   4491			   i, ce->guc_state.prio_count[i]);
   4492	}
   4493	drm_printf(p, "\n");
   4494}
   4495
   4496static inline void guc_log_context(struct drm_printer *p,
   4497				   struct intel_context *ce)
   4498{
   4499	drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
   4500	drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
   4501	drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
   4502		   ce->ring->head,
   4503		   ce->lrc_reg_state[CTX_RING_HEAD]);
   4504	drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
   4505		   ce->ring->tail,
   4506		   ce->lrc_reg_state[CTX_RING_TAIL]);
   4507	drm_printf(p, "\t\tContext Pin Count: %u\n",
   4508		   atomic_read(&ce->pin_count));
   4509	drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
   4510		   atomic_read(&ce->guc_id.ref));
   4511	drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
   4512		   ce->guc_state.sched_state);
   4513}
   4514
   4515void intel_guc_submission_print_context_info(struct intel_guc *guc,
   4516					     struct drm_printer *p)
   4517{
   4518	struct intel_context *ce;
   4519	unsigned long index;
   4520	unsigned long flags;
   4521
   4522	xa_lock_irqsave(&guc->context_lookup, flags);
   4523	xa_for_each(&guc->context_lookup, index, ce) {
   4524		GEM_BUG_ON(intel_context_is_child(ce));
   4525
   4526		guc_log_context(p, ce);
   4527		guc_log_context_priority(p, ce);
   4528
   4529		if (intel_context_is_parent(ce)) {
   4530			struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
   4531			struct intel_context *child;
   4532
   4533			drm_printf(p, "\t\tNumber children: %u\n",
   4534				   ce->parallel.number_children);
   4535			drm_printf(p, "\t\tWQI Head: %u\n",
   4536				   READ_ONCE(wq_desc->head));
   4537			drm_printf(p, "\t\tWQI Tail: %u\n",
   4538				   READ_ONCE(wq_desc->tail));
   4539			drm_printf(p, "\t\tWQI Status: %u\n\n",
   4540				   READ_ONCE(wq_desc->wq_status));
   4541
   4542			if (ce->engine->emit_bb_start ==
   4543			    emit_bb_start_parent_no_preempt_mid_batch) {
   4544				u8 i;
   4545
   4546				drm_printf(p, "\t\tChildren Go: %u\n\n",
   4547					   get_children_go_value(ce));
   4548				for (i = 0; i < ce->parallel.number_children; ++i)
   4549					drm_printf(p, "\t\tChildren Join: %u\n",
   4550						   get_children_join_value(ce, i));
   4551			}
   4552
   4553			for_each_child(ce, child)
   4554				guc_log_context(p, child);
   4555		}
   4556	}
   4557	xa_unlock_irqrestore(&guc->context_lookup, flags);
   4558}
   4559
   4560static inline u32 get_children_go_addr(struct intel_context *ce)
   4561{
   4562	GEM_BUG_ON(!intel_context_is_parent(ce));
   4563
   4564	return i915_ggtt_offset(ce->state) +
   4565		__get_parent_scratch_offset(ce) +
   4566		offsetof(struct parent_scratch, go.semaphore);
   4567}
   4568
   4569static inline u32 get_children_join_addr(struct intel_context *ce,
   4570					 u8 child_index)
   4571{
   4572	GEM_BUG_ON(!intel_context_is_parent(ce));
   4573
   4574	return i915_ggtt_offset(ce->state) +
   4575		__get_parent_scratch_offset(ce) +
   4576		offsetof(struct parent_scratch, join[child_index].semaphore);
   4577}
   4578
   4579#define PARENT_GO_BB			1
   4580#define PARENT_GO_FINI_BREADCRUMB	0
   4581#define CHILD_GO_BB			1
   4582#define CHILD_GO_FINI_BREADCRUMB	0
   4583static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
   4584						     u64 offset, u32 len,
   4585						     const unsigned int flags)
   4586{
   4587	struct intel_context *ce = rq->context;
   4588	u32 *cs;
   4589	u8 i;
   4590
   4591	GEM_BUG_ON(!intel_context_is_parent(ce));
   4592
   4593	cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
   4594	if (IS_ERR(cs))
   4595		return PTR_ERR(cs);
   4596
   4597	/* Wait on children */
   4598	for (i = 0; i < ce->parallel.number_children; ++i) {
   4599		*cs++ = (MI_SEMAPHORE_WAIT |
   4600			 MI_SEMAPHORE_GLOBAL_GTT |
   4601			 MI_SEMAPHORE_POLL |
   4602			 MI_SEMAPHORE_SAD_EQ_SDD);
   4603		*cs++ = PARENT_GO_BB;
   4604		*cs++ = get_children_join_addr(ce, i);
   4605		*cs++ = 0;
   4606	}
   4607
   4608	/* Turn off preemption */
   4609	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   4610	*cs++ = MI_NOOP;
   4611
   4612	/* Tell children go */
   4613	cs = gen8_emit_ggtt_write(cs,
   4614				  CHILD_GO_BB,
   4615				  get_children_go_addr(ce),
   4616				  0);
   4617
   4618	/* Jump to batch */
   4619	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   4620		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   4621	*cs++ = lower_32_bits(offset);
   4622	*cs++ = upper_32_bits(offset);
   4623	*cs++ = MI_NOOP;
   4624
   4625	intel_ring_advance(rq, cs);
   4626
   4627	return 0;
   4628}
   4629
   4630static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
   4631						    u64 offset, u32 len,
   4632						    const unsigned int flags)
   4633{
   4634	struct intel_context *ce = rq->context;
   4635	struct intel_context *parent = intel_context_to_parent(ce);
   4636	u32 *cs;
   4637
   4638	GEM_BUG_ON(!intel_context_is_child(ce));
   4639
   4640	cs = intel_ring_begin(rq, 12);
   4641	if (IS_ERR(cs))
   4642		return PTR_ERR(cs);
   4643
   4644	/* Signal parent */
   4645	cs = gen8_emit_ggtt_write(cs,
   4646				  PARENT_GO_BB,
   4647				  get_children_join_addr(parent,
   4648							 ce->parallel.child_index),
   4649				  0);
   4650
   4651	/* Wait on parent for go */
   4652	*cs++ = (MI_SEMAPHORE_WAIT |
   4653		 MI_SEMAPHORE_GLOBAL_GTT |
   4654		 MI_SEMAPHORE_POLL |
   4655		 MI_SEMAPHORE_SAD_EQ_SDD);
   4656	*cs++ = CHILD_GO_BB;
   4657	*cs++ = get_children_go_addr(parent);
   4658	*cs++ = 0;
   4659
   4660	/* Turn off preemption */
   4661	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   4662
   4663	/* Jump to batch */
   4664	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   4665		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   4666	*cs++ = lower_32_bits(offset);
   4667	*cs++ = upper_32_bits(offset);
   4668
   4669	intel_ring_advance(rq, cs);
   4670
   4671	return 0;
   4672}
   4673
   4674static u32 *
   4675__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
   4676						   u32 *cs)
   4677{
   4678	struct intel_context *ce = rq->context;
   4679	u8 i;
   4680
   4681	GEM_BUG_ON(!intel_context_is_parent(ce));
   4682
   4683	/* Wait on children */
   4684	for (i = 0; i < ce->parallel.number_children; ++i) {
   4685		*cs++ = (MI_SEMAPHORE_WAIT |
   4686			 MI_SEMAPHORE_GLOBAL_GTT |
   4687			 MI_SEMAPHORE_POLL |
   4688			 MI_SEMAPHORE_SAD_EQ_SDD);
   4689		*cs++ = PARENT_GO_FINI_BREADCRUMB;
   4690		*cs++ = get_children_join_addr(ce, i);
   4691		*cs++ = 0;
   4692	}
   4693
   4694	/* Turn on preemption */
   4695	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4696	*cs++ = MI_NOOP;
   4697
   4698	/* Tell children go */
   4699	cs = gen8_emit_ggtt_write(cs,
   4700				  CHILD_GO_FINI_BREADCRUMB,
   4701				  get_children_go_addr(ce),
   4702				  0);
   4703
   4704	return cs;
   4705}
   4706
   4707/*
   4708 * If this true, a submission of multi-lrc requests had an error and the
   4709 * requests need to be skipped. The front end (execuf IOCTL) should've called
   4710 * i915_request_skip which squashes the BB but we still need to emit the fini
   4711 * breadrcrumbs seqno write. At this point we don't know how many of the
   4712 * requests in the multi-lrc submission were generated so we can't do the
   4713 * handshake between the parent and children (e.g. if 4 requests should be
   4714 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
   4715 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
   4716 * has occurred on any of the requests in submission / relationship.
   4717 */
   4718static inline bool skip_handshake(struct i915_request *rq)
   4719{
   4720	return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
   4721}
   4722
   4723#define NON_SKIP_LEN	6
   4724static u32 *
   4725emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
   4726						 u32 *cs)
   4727{
   4728	struct intel_context *ce = rq->context;
   4729	__maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
   4730	__maybe_unused u32 *start_fini_breadcrumb_cs = cs;
   4731
   4732	GEM_BUG_ON(!intel_context_is_parent(ce));
   4733
   4734	if (unlikely(skip_handshake(rq))) {
   4735		/*
   4736		 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
   4737		 * the NON_SKIP_LEN comes from the length of the emits below.
   4738		 */
   4739		memset(cs, 0, sizeof(u32) *
   4740		       (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
   4741		cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
   4742	} else {
   4743		cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
   4744	}
   4745
   4746	/* Emit fini breadcrumb */
   4747	before_fini_breadcrumb_user_interrupt_cs = cs;
   4748	cs = gen8_emit_ggtt_write(cs,
   4749				  rq->fence.seqno,
   4750				  i915_request_active_timeline(rq)->hwsp_offset,
   4751				  0);
   4752
   4753	/* User interrupt */
   4754	*cs++ = MI_USER_INTERRUPT;
   4755	*cs++ = MI_NOOP;
   4756
   4757	/* Ensure our math for skip + emit is correct */
   4758	GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
   4759		   cs);
   4760	GEM_BUG_ON(start_fini_breadcrumb_cs +
   4761		   ce->engine->emit_fini_breadcrumb_dw != cs);
   4762
   4763	rq->tail = intel_ring_offset(rq, cs);
   4764
   4765	return cs;
   4766}
   4767
   4768static u32 *
   4769__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
   4770						  u32 *cs)
   4771{
   4772	struct intel_context *ce = rq->context;
   4773	struct intel_context *parent = intel_context_to_parent(ce);
   4774
   4775	GEM_BUG_ON(!intel_context_is_child(ce));
   4776
   4777	/* Turn on preemption */
   4778	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4779	*cs++ = MI_NOOP;
   4780
   4781	/* Signal parent */
   4782	cs = gen8_emit_ggtt_write(cs,
   4783				  PARENT_GO_FINI_BREADCRUMB,
   4784				  get_children_join_addr(parent,
   4785							 ce->parallel.child_index),
   4786				  0);
   4787
   4788	/* Wait parent on for go */
   4789	*cs++ = (MI_SEMAPHORE_WAIT |
   4790		 MI_SEMAPHORE_GLOBAL_GTT |
   4791		 MI_SEMAPHORE_POLL |
   4792		 MI_SEMAPHORE_SAD_EQ_SDD);
   4793	*cs++ = CHILD_GO_FINI_BREADCRUMB;
   4794	*cs++ = get_children_go_addr(parent);
   4795	*cs++ = 0;
   4796
   4797	return cs;
   4798}
   4799
   4800static u32 *
   4801emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
   4802						u32 *cs)
   4803{
   4804	struct intel_context *ce = rq->context;
   4805	__maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
   4806	__maybe_unused u32 *start_fini_breadcrumb_cs = cs;
   4807
   4808	GEM_BUG_ON(!intel_context_is_child(ce));
   4809
   4810	if (unlikely(skip_handshake(rq))) {
   4811		/*
   4812		 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
   4813		 * the NON_SKIP_LEN comes from the length of the emits below.
   4814		 */
   4815		memset(cs, 0, sizeof(u32) *
   4816		       (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
   4817		cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
   4818	} else {
   4819		cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
   4820	}
   4821
   4822	/* Emit fini breadcrumb */
   4823	before_fini_breadcrumb_user_interrupt_cs = cs;
   4824	cs = gen8_emit_ggtt_write(cs,
   4825				  rq->fence.seqno,
   4826				  i915_request_active_timeline(rq)->hwsp_offset,
   4827				  0);
   4828
   4829	/* User interrupt */
   4830	*cs++ = MI_USER_INTERRUPT;
   4831	*cs++ = MI_NOOP;
   4832
   4833	/* Ensure our math for skip + emit is correct */
   4834	GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
   4835		   cs);
   4836	GEM_BUG_ON(start_fini_breadcrumb_cs +
   4837		   ce->engine->emit_fini_breadcrumb_dw != cs);
   4838
   4839	rq->tail = intel_ring_offset(rq, cs);
   4840
   4841	return cs;
   4842}
   4843
   4844#undef NON_SKIP_LEN
   4845
   4846static struct intel_context *
   4847guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
   4848		   unsigned long flags)
   4849{
   4850	struct guc_virtual_engine *ve;
   4851	struct intel_guc *guc;
   4852	unsigned int n;
   4853	int err;
   4854
   4855	ve = kzalloc(sizeof(*ve), GFP_KERNEL);
   4856	if (!ve)
   4857		return ERR_PTR(-ENOMEM);
   4858
   4859	guc = &siblings[0]->gt->uc.guc;
   4860
   4861	ve->base.i915 = siblings[0]->i915;
   4862	ve->base.gt = siblings[0]->gt;
   4863	ve->base.uncore = siblings[0]->uncore;
   4864	ve->base.id = -1;
   4865
   4866	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
   4867	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   4868	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   4869	ve->base.saturated = ALL_ENGINES;
   4870
   4871	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
   4872
   4873	ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
   4874
   4875	ve->base.cops = &virtual_guc_context_ops;
   4876	ve->base.request_alloc = guc_request_alloc;
   4877	ve->base.bump_serial = virtual_guc_bump_serial;
   4878
   4879	ve->base.submit_request = guc_submit_request;
   4880
   4881	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
   4882
   4883	intel_context_init(&ve->context, &ve->base);
   4884
   4885	for (n = 0; n < count; n++) {
   4886		struct intel_engine_cs *sibling = siblings[n];
   4887
   4888		GEM_BUG_ON(!is_power_of_2(sibling->mask));
   4889		if (sibling->mask & ve->base.mask) {
   4890			DRM_DEBUG("duplicate %s entry in load balancer\n",
   4891				  sibling->name);
   4892			err = -EINVAL;
   4893			goto err_put;
   4894		}
   4895
   4896		ve->base.mask |= sibling->mask;
   4897		ve->base.logical_mask |= sibling->logical_mask;
   4898
   4899		if (n != 0 && ve->base.class != sibling->class) {
   4900			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
   4901				  sibling->class, ve->base.class);
   4902			err = -EINVAL;
   4903			goto err_put;
   4904		} else if (n == 0) {
   4905			ve->base.class = sibling->class;
   4906			ve->base.uabi_class = sibling->uabi_class;
   4907			snprintf(ve->base.name, sizeof(ve->base.name),
   4908				 "v%dx%d", ve->base.class, count);
   4909			ve->base.context_size = sibling->context_size;
   4910
   4911			ve->base.add_active_request =
   4912				sibling->add_active_request;
   4913			ve->base.remove_active_request =
   4914				sibling->remove_active_request;
   4915			ve->base.emit_bb_start = sibling->emit_bb_start;
   4916			ve->base.emit_flush = sibling->emit_flush;
   4917			ve->base.emit_init_breadcrumb =
   4918				sibling->emit_init_breadcrumb;
   4919			ve->base.emit_fini_breadcrumb =
   4920				sibling->emit_fini_breadcrumb;
   4921			ve->base.emit_fini_breadcrumb_dw =
   4922				sibling->emit_fini_breadcrumb_dw;
   4923			ve->base.breadcrumbs =
   4924				intel_breadcrumbs_get(sibling->breadcrumbs);
   4925
   4926			ve->base.flags |= sibling->flags;
   4927
   4928			ve->base.props.timeslice_duration_ms =
   4929				sibling->props.timeslice_duration_ms;
   4930			ve->base.props.preempt_timeout_ms =
   4931				sibling->props.preempt_timeout_ms;
   4932		}
   4933	}
   4934
   4935	return &ve->context;
   4936
   4937err_put:
   4938	intel_context_put(&ve->context);
   4939	return ERR_PTR(err);
   4940}
   4941
   4942bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
   4943{
   4944	struct intel_engine_cs *engine;
   4945	intel_engine_mask_t tmp, mask = ve->mask;
   4946
   4947	for_each_engine_masked(engine, ve->gt, mask, tmp)
   4948		if (READ_ONCE(engine->props.heartbeat_interval_ms))
   4949			return true;
   4950
   4951	return false;
   4952}
   4953
   4954#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
   4955#include "selftest_guc.c"
   4956#include "selftest_guc_multi_lrc.c"
   4957#endif