io-pgfault.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
io-pgfault.c (12169B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Handle device page faults
      4 *
      5 * Copyright (C) 2020 ARM Ltd.
      6 */
      7
      8#include <linux/iommu.h>
      9#include <linux/list.h>
     10#include <linux/sched/mm.h>
     11#include <linux/slab.h>
     12#include <linux/workqueue.h>
     13
     14#include "iommu-sva-lib.h"
     15
     16/**
     17 * struct iopf_queue - IO Page Fault queue
     18 * @wq: the fault workqueue
     19 * @devices: devices attached to this queue
     20 * @lock: protects the device list
     21 */
     22struct iopf_queue {
     23	struct workqueue_struct		*wq;
     24	struct list_head		devices;
     25	struct mutex			lock;
     26};
     27
     28/**
     29 * struct iopf_device_param - IO Page Fault data attached to a device
     30 * @dev: the device that owns this param
     31 * @queue: IOPF queue
     32 * @queue_list: index into queue->devices
     33 * @partial: faults that are part of a Page Request Group for which the last
     34 *           request hasn't been submitted yet.
     35 */
     36struct iopf_device_param {
     37	struct device			*dev;
     38	struct iopf_queue		*queue;
     39	struct list_head		queue_list;
     40	struct list_head		partial;
     41};
     42
     43struct iopf_fault {
     44	struct iommu_fault		fault;
     45	struct list_head		list;
     46};
     47
     48struct iopf_group {
     49	struct iopf_fault		last_fault;
     50	struct list_head		faults;
     51	struct work_struct		work;
     52	struct device			*dev;
     53};
     54
     55static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
     56			       enum iommu_page_response_code status)
     57{
     58	struct iommu_page_response resp = {
     59		.version		= IOMMU_PAGE_RESP_VERSION_1,
     60		.pasid			= iopf->fault.prm.pasid,
     61		.grpid			= iopf->fault.prm.grpid,
     62		.code			= status,
     63	};
     64
     65	if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
     66	    (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID))
     67		resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
     68
     69	return iommu_page_response(dev, &resp);
     70}
     71
     72static enum iommu_page_response_code
     73iopf_handle_single(struct iopf_fault *iopf)
     74{
     75	vm_fault_t ret;
     76	struct mm_struct *mm;
     77	struct vm_area_struct *vma;
     78	unsigned int access_flags = 0;
     79	unsigned int fault_flags = FAULT_FLAG_REMOTE;
     80	struct iommu_fault_page_request *prm = &iopf->fault.prm;
     81	enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
     82
     83	if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
     84		return status;
     85
     86	mm = iommu_sva_find(prm->pasid);
     87	if (IS_ERR_OR_NULL(mm))
     88		return status;
     89
     90	mmap_read_lock(mm);
     91
     92	vma = find_extend_vma(mm, prm->addr);
     93	if (!vma)
     94		/* Unmapped area */
     95		goto out_put_mm;
     96
     97	if (prm->perm & IOMMU_FAULT_PERM_READ)
     98		access_flags |= VM_READ;
     99
    100	if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
    101		access_flags |= VM_WRITE;
    102		fault_flags |= FAULT_FLAG_WRITE;
    103	}
    104
    105	if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
    106		access_flags |= VM_EXEC;
    107		fault_flags |= FAULT_FLAG_INSTRUCTION;
    108	}
    109
    110	if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
    111		fault_flags |= FAULT_FLAG_USER;
    112
    113	if (access_flags & ~vma->vm_flags)
    114		/* Access fault */
    115		goto out_put_mm;
    116
    117	ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
    118	status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
    119		IOMMU_PAGE_RESP_SUCCESS;
    120
    121out_put_mm:
    122	mmap_read_unlock(mm);
    123	mmput(mm);
    124
    125	return status;
    126}
    127
    128static void iopf_handle_group(struct work_struct *work)
    129{
    130	struct iopf_group *group;
    131	struct iopf_fault *iopf, *next;
    132	enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
    133
    134	group = container_of(work, struct iopf_group, work);
    135
    136	list_for_each_entry_safe(iopf, next, &group->faults, list) {
    137		/*
    138		 * For the moment, errors are sticky: don't handle subsequent
    139		 * faults in the group if there is an error.
    140		 */
    141		if (status == IOMMU_PAGE_RESP_SUCCESS)
    142			status = iopf_handle_single(iopf);
    143
    144		if (!(iopf->fault.prm.flags &
    145		      IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
    146			kfree(iopf);
    147	}
    148
    149	iopf_complete_group(group->dev, &group->last_fault, status);
    150	kfree(group);
    151}
    152
    153/**
    154 * iommu_queue_iopf - IO Page Fault handler
    155 * @fault: fault event
    156 * @cookie: struct device, passed to iommu_register_device_fault_handler.
    157 *
    158 * Add a fault to the device workqueue, to be handled by mm.
    159 *
    160 * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
    161 * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
    162 * expect a response. It may be generated when disabling a PASID (issuing a
    163 * PASID stop request) by some PCI devices.
    164 *
    165 * The PASID stop request is issued by the device driver before unbind(). Once
    166 * it completes, no page request is generated for this PASID anymore and
    167 * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1
    168 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait
    169 * for all outstanding page requests to come back with a response before
    170 * completing the PASID stop request. Others do not wait for page responses, and
    171 * instead issue this Stop Marker that tells us when the PASID can be
    172 * reallocated.
    173 *
    174 * It is safe to discard the Stop Marker because it is an optimization.
    175 * a. Page requests, which are posted requests, have been flushed to the IOMMU
    176 *    when the stop request completes.
    177 * b. The IOMMU driver flushes all fault queues on unbind() before freeing the
    178 *    PASID.
    179 *
    180 * So even though the Stop Marker might be issued by the device *after* the stop
    181 * request completes, outstanding faults will have been dealt with by the time
    182 * the PASID is freed.
    183 *
    184 * Return: 0 on success and <0 on error.
    185 */
    186int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
    187{
    188	int ret;
    189	struct iopf_group *group;
    190	struct iopf_fault *iopf, *next;
    191	struct iopf_device_param *iopf_param;
    192
    193	struct device *dev = cookie;
    194	struct dev_iommu *param = dev->iommu;
    195
    196	lockdep_assert_held(&param->lock);
    197
    198	if (fault->type != IOMMU_FAULT_PAGE_REQ)
    199		/* Not a recoverable page fault */
    200		return -EOPNOTSUPP;
    201
    202	/*
    203	 * As long as we're holding param->lock, the queue can't be unlinked
    204	 * from the device and therefore cannot disappear.
    205	 */
    206	iopf_param = param->iopf_param;
    207	if (!iopf_param)
    208		return -ENODEV;
    209
    210	if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
    211		iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
    212		if (!iopf)
    213			return -ENOMEM;
    214
    215		iopf->fault = *fault;
    216
    217		/* Non-last request of a group. Postpone until the last one */
    218		list_add(&iopf->list, &iopf_param->partial);
    219
    220		return 0;
    221	}
    222
    223	group = kzalloc(sizeof(*group), GFP_KERNEL);
    224	if (!group) {
    225		/*
    226		 * The caller will send a response to the hardware. But we do
    227		 * need to clean up before leaving, otherwise partial faults
    228		 * will be stuck.
    229		 */
    230		ret = -ENOMEM;
    231		goto cleanup_partial;
    232	}
    233
    234	group->dev = dev;
    235	group->last_fault.fault = *fault;
    236	INIT_LIST_HEAD(&group->faults);
    237	list_add(&group->last_fault.list, &group->faults);
    238	INIT_WORK(&group->work, iopf_handle_group);
    239
    240	/* See if we have partial faults for this group */
    241	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
    242		if (iopf->fault.prm.grpid == fault->prm.grpid)
    243			/* Insert *before* the last fault */
    244			list_move(&iopf->list, &group->faults);
    245	}
    246
    247	queue_work(iopf_param->queue->wq, &group->work);
    248	return 0;
    249
    250cleanup_partial:
    251	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
    252		if (iopf->fault.prm.grpid == fault->prm.grpid) {
    253			list_del(&iopf->list);
    254			kfree(iopf);
    255		}
    256	}
    257	return ret;
    258}
    259EXPORT_SYMBOL_GPL(iommu_queue_iopf);
    260
    261/**
    262 * iopf_queue_flush_dev - Ensure that all queued faults have been processed
    263 * @dev: the endpoint whose faults need to be flushed.
    264 *
    265 * The IOMMU driver calls this before releasing a PASID, to ensure that all
    266 * pending faults for this PASID have been handled, and won't hit the address
    267 * space of the next process that uses this PASID. The driver must make sure
    268 * that no new fault is added to the queue. In particular it must flush its
    269 * low-level queue before calling this function.
    270 *
    271 * Return: 0 on success and <0 on error.
    272 */
    273int iopf_queue_flush_dev(struct device *dev)
    274{
    275	int ret = 0;
    276	struct iopf_device_param *iopf_param;
    277	struct dev_iommu *param = dev->iommu;
    278
    279	if (!param)
    280		return -ENODEV;
    281
    282	mutex_lock(&param->lock);
    283	iopf_param = param->iopf_param;
    284	if (iopf_param)
    285		flush_workqueue(iopf_param->queue->wq);
    286	else
    287		ret = -ENODEV;
    288	mutex_unlock(&param->lock);
    289
    290	return ret;
    291}
    292EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
    293
    294/**
    295 * iopf_queue_discard_partial - Remove all pending partial fault
    296 * @queue: the queue whose partial faults need to be discarded
    297 *
    298 * When the hardware queue overflows, last page faults in a group may have been
    299 * lost and the IOMMU driver calls this to discard all partial faults. The
    300 * driver shouldn't be adding new faults to this queue concurrently.
    301 *
    302 * Return: 0 on success and <0 on error.
    303 */
    304int iopf_queue_discard_partial(struct iopf_queue *queue)
    305{
    306	struct iopf_fault *iopf, *next;
    307	struct iopf_device_param *iopf_param;
    308
    309	if (!queue)
    310		return -EINVAL;
    311
    312	mutex_lock(&queue->lock);
    313	list_for_each_entry(iopf_param, &queue->devices, queue_list) {
    314		list_for_each_entry_safe(iopf, next, &iopf_param->partial,
    315					 list) {
    316			list_del(&iopf->list);
    317			kfree(iopf);
    318		}
    319	}
    320	mutex_unlock(&queue->lock);
    321	return 0;
    322}
    323EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
    324
    325/**
    326 * iopf_queue_add_device - Add producer to the fault queue
    327 * @queue: IOPF queue
    328 * @dev: device to add
    329 *
    330 * Return: 0 on success and <0 on error.
    331 */
    332int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
    333{
    334	int ret = -EBUSY;
    335	struct iopf_device_param *iopf_param;
    336	struct dev_iommu *param = dev->iommu;
    337
    338	if (!param)
    339		return -ENODEV;
    340
    341	iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL);
    342	if (!iopf_param)
    343		return -ENOMEM;
    344
    345	INIT_LIST_HEAD(&iopf_param->partial);
    346	iopf_param->queue = queue;
    347	iopf_param->dev = dev;
    348
    349	mutex_lock(&queue->lock);
    350	mutex_lock(&param->lock);
    351	if (!param->iopf_param) {
    352		list_add(&iopf_param->queue_list, &queue->devices);
    353		param->iopf_param = iopf_param;
    354		ret = 0;
    355	}
    356	mutex_unlock(&param->lock);
    357	mutex_unlock(&queue->lock);
    358
    359	if (ret)
    360		kfree(iopf_param);
    361
    362	return ret;
    363}
    364EXPORT_SYMBOL_GPL(iopf_queue_add_device);
    365
    366/**
    367 * iopf_queue_remove_device - Remove producer from fault queue
    368 * @queue: IOPF queue
    369 * @dev: device to remove
    370 *
    371 * Caller makes sure that no more faults are reported for this device.
    372 *
    373 * Return: 0 on success and <0 on error.
    374 */
    375int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
    376{
    377	int ret = -EINVAL;
    378	struct iopf_fault *iopf, *next;
    379	struct iopf_device_param *iopf_param;
    380	struct dev_iommu *param = dev->iommu;
    381
    382	if (!param || !queue)
    383		return -EINVAL;
    384
    385	mutex_lock(&queue->lock);
    386	mutex_lock(&param->lock);
    387	iopf_param = param->iopf_param;
    388	if (iopf_param && iopf_param->queue == queue) {
    389		list_del(&iopf_param->queue_list);
    390		param->iopf_param = NULL;
    391		ret = 0;
    392	}
    393	mutex_unlock(&param->lock);
    394	mutex_unlock(&queue->lock);
    395	if (ret)
    396		return ret;
    397
    398	/* Just in case some faults are still stuck */
    399	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list)
    400		kfree(iopf);
    401
    402	kfree(iopf_param);
    403
    404	return 0;
    405}
    406EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
    407
    408/**
    409 * iopf_queue_alloc - Allocate and initialize a fault queue
    410 * @name: a unique string identifying the queue (for workqueue)
    411 *
    412 * Return: the queue on success and NULL on error.
    413 */
    414struct iopf_queue *iopf_queue_alloc(const char *name)
    415{
    416	struct iopf_queue *queue;
    417
    418	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
    419	if (!queue)
    420		return NULL;
    421
    422	/*
    423	 * The WQ is unordered because the low-level handler enqueues faults by
    424	 * group. PRI requests within a group have to be ordered, but once
    425	 * that's dealt with, the high-level function can handle groups out of
    426	 * order.
    427	 */
    428	queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name);
    429	if (!queue->wq) {
    430		kfree(queue);
    431		return NULL;
    432	}
    433
    434	INIT_LIST_HEAD(&queue->devices);
    435	mutex_init(&queue->lock);
    436
    437	return queue;
    438}
    439EXPORT_SYMBOL_GPL(iopf_queue_alloc);
    440
    441/**
    442 * iopf_queue_free - Free IOPF queue
    443 * @queue: queue to free
    444 *
    445 * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or
    446 * adding/removing devices on this queue anymore.
    447 */
    448void iopf_queue_free(struct iopf_queue *queue)
    449{
    450	struct iopf_device_param *iopf_param, *next;
    451
    452	if (!queue)
    453		return;
    454
    455	list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list)
    456		iopf_queue_remove_device(queue, iopf_param->dev);
    457
    458	destroy_workqueue(queue->wq);
    459	kfree(queue);
    460}
    461EXPORT_SYMBOL_GPL(iopf_queue_free);