cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

submit.c (5595B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
      3#include <linux/init.h>
      4#include <linux/kernel.h>
      5#include <linux/module.h>
      6#include <linux/pci.h>
      7#include <uapi/linux/idxd.h>
      8#include "idxd.h"
      9#include "registers.h"
     10
     11static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
     12{
     13	struct idxd_desc *desc;
     14	struct idxd_device *idxd = wq->idxd;
     15
     16	desc = wq->descs[idx];
     17	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
     18	memset(desc->completion, 0, idxd->data->compl_size);
     19	desc->cpu = cpu;
     20
     21	if (device_pasid_enabled(idxd))
     22		desc->hw->pasid = idxd->pasid;
     23
     24	return desc;
     25}
     26
     27struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
     28{
     29	int cpu, idx;
     30	struct idxd_device *idxd = wq->idxd;
     31	DEFINE_SBQ_WAIT(wait);
     32	struct sbq_wait_state *ws;
     33	struct sbitmap_queue *sbq;
     34
     35	if (idxd->state != IDXD_DEV_ENABLED)
     36		return ERR_PTR(-EIO);
     37
     38	sbq = &wq->sbq;
     39	idx = sbitmap_queue_get(sbq, &cpu);
     40	if (idx < 0) {
     41		if (optype == IDXD_OP_NONBLOCK)
     42			return ERR_PTR(-EAGAIN);
     43	} else {
     44		return __get_desc(wq, idx, cpu);
     45	}
     46
     47	ws = &sbq->ws[0];
     48	for (;;) {
     49		sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
     50		if (signal_pending_state(TASK_INTERRUPTIBLE, current))
     51			break;
     52		idx = sbitmap_queue_get(sbq, &cpu);
     53		if (idx >= 0)
     54			break;
     55		schedule();
     56	}
     57
     58	sbitmap_finish_wait(sbq, ws, &wait);
     59	if (idx < 0)
     60		return ERR_PTR(-EAGAIN);
     61
     62	return __get_desc(wq, idx, cpu);
     63}
     64
     65void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
     66{
     67	int cpu = desc->cpu;
     68
     69	desc->cpu = -1;
     70	sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
     71}
     72
     73static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
     74					 struct idxd_desc *desc)
     75{
     76	struct idxd_desc *d, *n;
     77
     78	lockdep_assert_held(&ie->list_lock);
     79	list_for_each_entry_safe(d, n, &ie->work_list, list) {
     80		if (d == desc) {
     81			list_del(&d->list);
     82			return d;
     83		}
     84	}
     85
     86	/*
     87	 * At this point, the desc needs to be aborted is held by the completion
     88	 * handler where it has taken it off the pending list but has not added to the
     89	 * work list. It will be cleaned up by the interrupt handler when it sees the
     90	 * IDXD_COMP_DESC_ABORT for completion status.
     91	 */
     92	return NULL;
     93}
     94
     95static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
     96			     struct idxd_desc *desc)
     97{
     98	struct idxd_desc *d, *t, *found = NULL;
     99	struct llist_node *head;
    100	LIST_HEAD(flist);
    101
    102	desc->completion->status = IDXD_COMP_DESC_ABORT;
    103	/*
    104	 * Grab the list lock so it will block the irq thread handler. This allows the
    105	 * abort code to locate the descriptor need to be aborted.
    106	 */
    107	spin_lock(&ie->list_lock);
    108	head = llist_del_all(&ie->pending_llist);
    109	if (head) {
    110		llist_for_each_entry_safe(d, t, head, llnode) {
    111			if (d == desc) {
    112				found = desc;
    113				continue;
    114			}
    115
    116			if (d->completion->status)
    117				list_add_tail(&d->list, &flist);
    118			else
    119				list_add_tail(&d->list, &ie->work_list);
    120		}
    121	}
    122
    123	if (!found)
    124		found = list_abort_desc(wq, ie, desc);
    125	spin_unlock(&ie->list_lock);
    126
    127	if (found)
    128		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
    129
    130	/*
    131	 * completing the descriptor will return desc to allocator and
    132	 * the desc can be acquired by a different process and the
    133	 * desc->list can be modified.  Delete desc from list so the
    134	 * list trasversing does not get corrupted by the other process.
    135	 */
    136	list_for_each_entry_safe(d, t, &flist, list) {
    137		list_del_init(&d->list);
    138		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
    139	}
    140}
    141
    142/*
    143 * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
    144 * has better control of number of descriptors being submitted to a shared wq by limiting
    145 * the number of driver allocated descriptors to the wq size. However, when the swq is
    146 * exported to a guest kernel, it may be shared with multiple guest kernels. This means
    147 * the likelihood of getting busy returned on the swq when submitting goes significantly up.
    148 * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
    149 * up. The sysfs knob can be tuned by the system administrator.
    150 */
    151int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
    152{
    153	unsigned int retries = wq->enqcmds_retries;
    154	int rc;
    155
    156	do {
    157		rc = enqcmds(portal, desc);
    158		if (rc == 0)
    159			break;
    160		cpu_relax();
    161	} while (retries--);
    162
    163	return rc;
    164}
    165
    166int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
    167{
    168	struct idxd_device *idxd = wq->idxd;
    169	struct idxd_irq_entry *ie = NULL;
    170	u32 desc_flags = desc->hw->flags;
    171	void __iomem *portal;
    172	int rc;
    173
    174	if (idxd->state != IDXD_DEV_ENABLED)
    175		return -EIO;
    176
    177	if (!percpu_ref_tryget_live(&wq->wq_active)) {
    178		wait_for_completion(&wq->wq_resurrect);
    179		if (!percpu_ref_tryget_live(&wq->wq_active))
    180			return -ENXIO;
    181	}
    182
    183	portal = idxd_wq_portal_addr(wq);
    184
    185	/*
    186	 * The wmb() flushes writes to coherent DMA data before
    187	 * possibly triggering a DMA read. The wmb() is necessary
    188	 * even on UP because the recipient is a device.
    189	 */
    190	wmb();
    191
    192	/*
    193	 * Pending the descriptor to the lockless list for the irq_entry
    194	 * that we designated the descriptor to.
    195	 */
    196	if (desc_flags & IDXD_OP_FLAG_RCI) {
    197		ie = &wq->ie;
    198		desc->hw->int_handle = ie->int_handle;
    199		llist_add(&desc->llnode, &ie->pending_llist);
    200	}
    201
    202	if (wq_dedicated(wq)) {
    203		iosubmit_cmds512(portal, desc->hw, 1);
    204	} else {
    205		rc = idxd_enqcmds(wq, portal, desc->hw);
    206		if (rc < 0) {
    207			percpu_ref_put(&wq->wq_active);
    208			/* abort operation frees the descriptor */
    209			if (ie)
    210				llist_abort_desc(wq, ie, desc);
    211			return rc;
    212		}
    213	}
    214
    215	percpu_ref_put(&wq->wq_active);
    216	return 0;
    217}