cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_breadcrumbs.c (13684B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2015-2021 Intel Corporation
      4 */
      5
      6#include <linux/kthread.h>
      7#include <linux/string_helpers.h>
      8#include <trace/events/dma_fence.h>
      9#include <uapi/linux/sched/types.h>
     10
     11#include "i915_drv.h"
     12#include "i915_trace.h"
     13#include "intel_breadcrumbs.h"
     14#include "intel_context.h"
     15#include "intel_engine_pm.h"
     16#include "intel_gt_pm.h"
     17#include "intel_gt_requests.h"
     18
     19static bool irq_enable(struct intel_breadcrumbs *b)
     20{
     21	return intel_engine_irq_enable(b->irq_engine);
     22}
     23
     24static void irq_disable(struct intel_breadcrumbs *b)
     25{
     26	intel_engine_irq_disable(b->irq_engine);
     27}
     28
     29static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
     30{
     31	/*
     32	 * Since we are waiting on a request, the GPU should be busy
     33	 * and should have its own rpm reference.
     34	 */
     35	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
     36		return;
     37
     38	/*
     39	 * The breadcrumb irq will be disarmed on the interrupt after the
     40	 * waiters are signaled. This gives us a single interrupt window in
     41	 * which we can add a new waiter and avoid the cost of re-enabling
     42	 * the irq.
     43	 */
     44	WRITE_ONCE(b->irq_armed, true);
     45
     46	/* Requests may have completed before we could enable the interrupt. */
     47	if (!b->irq_enabled++ && b->irq_enable(b))
     48		irq_work_queue(&b->irq_work);
     49}
     50
     51static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
     52{
     53	if (!b->irq_engine)
     54		return;
     55
     56	spin_lock(&b->irq_lock);
     57	if (!b->irq_armed)
     58		__intel_breadcrumbs_arm_irq(b);
     59	spin_unlock(&b->irq_lock);
     60}
     61
     62static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
     63{
     64	GEM_BUG_ON(!b->irq_enabled);
     65	if (!--b->irq_enabled)
     66		b->irq_disable(b);
     67
     68	WRITE_ONCE(b->irq_armed, false);
     69	intel_gt_pm_put_async(b->irq_engine->gt);
     70}
     71
     72static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
     73{
     74	spin_lock(&b->irq_lock);
     75	if (b->irq_armed)
     76		__intel_breadcrumbs_disarm_irq(b);
     77	spin_unlock(&b->irq_lock);
     78}
     79
     80static void add_signaling_context(struct intel_breadcrumbs *b,
     81				  struct intel_context *ce)
     82{
     83	lockdep_assert_held(&ce->signal_lock);
     84
     85	spin_lock(&b->signalers_lock);
     86	list_add_rcu(&ce->signal_link, &b->signalers);
     87	spin_unlock(&b->signalers_lock);
     88}
     89
     90static bool remove_signaling_context(struct intel_breadcrumbs *b,
     91				     struct intel_context *ce)
     92{
     93	lockdep_assert_held(&ce->signal_lock);
     94
     95	if (!list_empty(&ce->signals))
     96		return false;
     97
     98	spin_lock(&b->signalers_lock);
     99	list_del_rcu(&ce->signal_link);
    100	spin_unlock(&b->signalers_lock);
    101
    102	return true;
    103}
    104
    105__maybe_unused static bool
    106check_signal_order(struct intel_context *ce, struct i915_request *rq)
    107{
    108	if (rq->context != ce)
    109		return false;
    110
    111	if (!list_is_last(&rq->signal_link, &ce->signals) &&
    112	    i915_seqno_passed(rq->fence.seqno,
    113			      list_next_entry(rq, signal_link)->fence.seqno))
    114		return false;
    115
    116	if (!list_is_first(&rq->signal_link, &ce->signals) &&
    117	    i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
    118			      rq->fence.seqno))
    119		return false;
    120
    121	return true;
    122}
    123
    124static bool
    125__dma_fence_signal(struct dma_fence *fence)
    126{
    127	return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
    128}
    129
    130static void
    131__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
    132{
    133	fence->timestamp = timestamp;
    134	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
    135	trace_dma_fence_signaled(fence);
    136}
    137
    138static void
    139__dma_fence_signal__notify(struct dma_fence *fence,
    140			   const struct list_head *list)
    141{
    142	struct dma_fence_cb *cur, *tmp;
    143
    144	lockdep_assert_held(fence->lock);
    145
    146	list_for_each_entry_safe(cur, tmp, list, node) {
    147		INIT_LIST_HEAD(&cur->node);
    148		cur->func(fence, cur);
    149	}
    150}
    151
    152static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
    153{
    154	if (b->irq_engine)
    155		intel_engine_add_retire(b->irq_engine, tl);
    156}
    157
    158static struct llist_node *
    159slist_add(struct llist_node *node, struct llist_node *head)
    160{
    161	node->next = head;
    162	return node;
    163}
    164
    165static void signal_irq_work(struct irq_work *work)
    166{
    167	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
    168	const ktime_t timestamp = ktime_get();
    169	struct llist_node *signal, *sn;
    170	struct intel_context *ce;
    171
    172	signal = NULL;
    173	if (unlikely(!llist_empty(&b->signaled_requests)))
    174		signal = llist_del_all(&b->signaled_requests);
    175
    176	/*
    177	 * Keep the irq armed until the interrupt after all listeners are gone.
    178	 *
    179	 * Enabling/disabling the interrupt is rather costly, roughly a couple
    180	 * of hundred microseconds. If we are proactive and enable/disable
    181	 * the interrupt around every request that wants a breadcrumb, we
    182	 * quickly drown in the extra orders of magnitude of latency imposed
    183	 * on request submission.
    184	 *
    185	 * So we try to be lazy, and keep the interrupts enabled until no
    186	 * more listeners appear within a breadcrumb interrupt interval (that
    187	 * is until a request completes that no one cares about). The
    188	 * observation is that listeners come in batches, and will often
    189	 * listen to a bunch of requests in succession. Though note on icl+,
    190	 * interrupts are always enabled due to concerns with rc6 being
    191	 * dysfunctional with per-engine interrupt masking.
    192	 *
    193	 * We also try to avoid raising too many interrupts, as they may
    194	 * be generated by userspace batches and it is unfortunately rather
    195	 * too easy to drown the CPU under a flood of GPU interrupts. Thus
    196	 * whenever no one appears to be listening, we turn off the interrupts.
    197	 * Fewer interrupts should conserve power -- at the very least, fewer
    198	 * interrupt draw less ire from other users of the system and tools
    199	 * like powertop.
    200	 */
    201	if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
    202		intel_breadcrumbs_disarm_irq(b);
    203
    204	rcu_read_lock();
    205	atomic_inc(&b->signaler_active);
    206	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
    207		struct i915_request *rq;
    208
    209		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
    210			bool release;
    211
    212			if (!__i915_request_is_complete(rq))
    213				break;
    214
    215			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
    216						&rq->fence.flags))
    217				break;
    218
    219			/*
    220			 * Queue for execution after dropping the signaling
    221			 * spinlock as the callback chain may end up adding
    222			 * more signalers to the same context or engine.
    223			 */
    224			spin_lock(&ce->signal_lock);
    225			list_del_rcu(&rq->signal_link);
    226			release = remove_signaling_context(b, ce);
    227			spin_unlock(&ce->signal_lock);
    228			if (release) {
    229				if (intel_timeline_is_last(ce->timeline, rq))
    230					add_retire(b, ce->timeline);
    231				intel_context_put(ce);
    232			}
    233
    234			if (__dma_fence_signal(&rq->fence))
    235				/* We own signal_node now, xfer to local list */
    236				signal = slist_add(&rq->signal_node, signal);
    237			else
    238				i915_request_put(rq);
    239		}
    240	}
    241	atomic_dec(&b->signaler_active);
    242	rcu_read_unlock();
    243
    244	llist_for_each_safe(signal, sn, signal) {
    245		struct i915_request *rq =
    246			llist_entry(signal, typeof(*rq), signal_node);
    247		struct list_head cb_list;
    248
    249		if (rq->engine->sched_engine->retire_inflight_request_prio)
    250			rq->engine->sched_engine->retire_inflight_request_prio(rq);
    251
    252		spin_lock(&rq->lock);
    253		list_replace(&rq->fence.cb_list, &cb_list);
    254		__dma_fence_signal__timestamp(&rq->fence, timestamp);
    255		__dma_fence_signal__notify(&rq->fence, &cb_list);
    256		spin_unlock(&rq->lock);
    257
    258		i915_request_put(rq);
    259	}
    260
    261	if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
    262		intel_breadcrumbs_arm_irq(b);
    263}
    264
    265struct intel_breadcrumbs *
    266intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
    267{
    268	struct intel_breadcrumbs *b;
    269
    270	b = kzalloc(sizeof(*b), GFP_KERNEL);
    271	if (!b)
    272		return NULL;
    273
    274	kref_init(&b->ref);
    275
    276	spin_lock_init(&b->signalers_lock);
    277	INIT_LIST_HEAD(&b->signalers);
    278	init_llist_head(&b->signaled_requests);
    279
    280	spin_lock_init(&b->irq_lock);
    281	init_irq_work(&b->irq_work, signal_irq_work);
    282
    283	b->irq_engine = irq_engine;
    284	b->irq_enable = irq_enable;
    285	b->irq_disable = irq_disable;
    286
    287	return b;
    288}
    289
    290void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
    291{
    292	unsigned long flags;
    293
    294	if (!b->irq_engine)
    295		return;
    296
    297	spin_lock_irqsave(&b->irq_lock, flags);
    298
    299	if (b->irq_enabled)
    300		b->irq_enable(b);
    301	else
    302		b->irq_disable(b);
    303
    304	spin_unlock_irqrestore(&b->irq_lock, flags);
    305}
    306
    307void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
    308{
    309	if (!READ_ONCE(b->irq_armed))
    310		return;
    311
    312	/* Kick the work once more to drain the signalers, and disarm the irq */
    313	irq_work_sync(&b->irq_work);
    314	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
    315		local_irq_disable();
    316		signal_irq_work(&b->irq_work);
    317		local_irq_enable();
    318		cond_resched();
    319	}
    320}
    321
    322void intel_breadcrumbs_free(struct kref *kref)
    323{
    324	struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref);
    325
    326	irq_work_sync(&b->irq_work);
    327	GEM_BUG_ON(!list_empty(&b->signalers));
    328	GEM_BUG_ON(b->irq_armed);
    329
    330	kfree(b);
    331}
    332
    333static void irq_signal_request(struct i915_request *rq,
    334			       struct intel_breadcrumbs *b)
    335{
    336	if (!__dma_fence_signal(&rq->fence))
    337		return;
    338
    339	i915_request_get(rq);
    340	if (llist_add(&rq->signal_node, &b->signaled_requests))
    341		irq_work_queue(&b->irq_work);
    342}
    343
    344static void insert_breadcrumb(struct i915_request *rq)
    345{
    346	struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
    347	struct intel_context *ce = rq->context;
    348	struct list_head *pos;
    349
    350	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
    351		return;
    352
    353	/*
    354	 * If the request is already completed, we can transfer it
    355	 * straight onto a signaled list, and queue the irq worker for
    356	 * its signal completion.
    357	 */
    358	if (__i915_request_is_complete(rq)) {
    359		irq_signal_request(rq, b);
    360		return;
    361	}
    362
    363	if (list_empty(&ce->signals)) {
    364		intel_context_get(ce);
    365		add_signaling_context(b, ce);
    366		pos = &ce->signals;
    367	} else {
    368		/*
    369		 * We keep the seqno in retirement order, so we can break
    370		 * inside intel_engine_signal_breadcrumbs as soon as we've
    371		 * passed the last completed request (or seen a request that
    372		 * hasn't event started). We could walk the timeline->requests,
    373		 * but keeping a separate signalers_list has the advantage of
    374		 * hopefully being much smaller than the full list and so
    375		 * provides faster iteration and detection when there are no
    376		 * more interrupts required for this context.
    377		 *
    378		 * We typically expect to add new signalers in order, so we
    379		 * start looking for our insertion point from the tail of
    380		 * the list.
    381		 */
    382		list_for_each_prev(pos, &ce->signals) {
    383			struct i915_request *it =
    384				list_entry(pos, typeof(*it), signal_link);
    385
    386			if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
    387				break;
    388		}
    389	}
    390
    391	i915_request_get(rq);
    392	list_add_rcu(&rq->signal_link, pos);
    393	GEM_BUG_ON(!check_signal_order(ce, rq));
    394	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
    395	set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
    396
    397	/*
    398	 * Defer enabling the interrupt to after HW submission and recheck
    399	 * the request as it may have completed and raised the interrupt as
    400	 * we were attaching it into the lists.
    401	 */
    402	irq_work_queue(&b->irq_work);
    403}
    404
    405bool i915_request_enable_breadcrumb(struct i915_request *rq)
    406{
    407	struct intel_context *ce = rq->context;
    408
    409	/* Serialises with i915_request_retire() using rq->lock */
    410	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
    411		return true;
    412
    413	/*
    414	 * Peek at i915_request_submit()/i915_request_unsubmit() status.
    415	 *
    416	 * If the request is not yet active (and not signaled), we will
    417	 * attach the breadcrumb later.
    418	 */
    419	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
    420		return true;
    421
    422	spin_lock(&ce->signal_lock);
    423	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
    424		insert_breadcrumb(rq);
    425	spin_unlock(&ce->signal_lock);
    426
    427	return true;
    428}
    429
    430void i915_request_cancel_breadcrumb(struct i915_request *rq)
    431{
    432	struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
    433	struct intel_context *ce = rq->context;
    434	bool release;
    435
    436	spin_lock(&ce->signal_lock);
    437	if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
    438		spin_unlock(&ce->signal_lock);
    439		return;
    440	}
    441
    442	list_del_rcu(&rq->signal_link);
    443	release = remove_signaling_context(b, ce);
    444	spin_unlock(&ce->signal_lock);
    445	if (release)
    446		intel_context_put(ce);
    447
    448	if (__i915_request_is_complete(rq))
    449		irq_signal_request(rq, b);
    450
    451	i915_request_put(rq);
    452}
    453
    454void intel_context_remove_breadcrumbs(struct intel_context *ce,
    455				      struct intel_breadcrumbs *b)
    456{
    457	struct i915_request *rq, *rn;
    458	bool release = false;
    459	unsigned long flags;
    460
    461	spin_lock_irqsave(&ce->signal_lock, flags);
    462
    463	if (list_empty(&ce->signals))
    464		goto unlock;
    465
    466	list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
    467		GEM_BUG_ON(!__i915_request_is_complete(rq));
    468		if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
    469					&rq->fence.flags))
    470			continue;
    471
    472		list_del_rcu(&rq->signal_link);
    473		irq_signal_request(rq, b);
    474		i915_request_put(rq);
    475	}
    476	release = remove_signaling_context(b, ce);
    477
    478unlock:
    479	spin_unlock_irqrestore(&ce->signal_lock, flags);
    480	if (release)
    481		intel_context_put(ce);
    482
    483	while (atomic_read(&b->signaler_active))
    484		cpu_relax();
    485}
    486
    487static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
    488{
    489	struct intel_context *ce;
    490	struct i915_request *rq;
    491
    492	drm_printf(p, "Signals:\n");
    493
    494	rcu_read_lock();
    495	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
    496		list_for_each_entry_rcu(rq, &ce->signals, signal_link)
    497			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
    498				   rq->fence.context, rq->fence.seqno,
    499				   __i915_request_is_complete(rq) ? "!" :
    500				   __i915_request_has_started(rq) ? "*" :
    501				   "",
    502				   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
    503	}
    504	rcu_read_unlock();
    505}
    506
    507void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
    508				    struct drm_printer *p)
    509{
    510	struct intel_breadcrumbs *b;
    511
    512	b = engine->breadcrumbs;
    513	if (!b)
    514		return;
    515
    516	drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed));
    517	if (!list_empty(&b->signalers))
    518		print_signals(b, p);
    519}