cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

svm.c (24946B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright © 2015 Intel Corporation.
      4 *
      5 * Authors: David Woodhouse <dwmw2@infradead.org>
      6 */
      7
      8#include <linux/intel-iommu.h>
      9#include <linux/mmu_notifier.h>
     10#include <linux/sched.h>
     11#include <linux/sched/mm.h>
     12#include <linux/slab.h>
     13#include <linux/intel-svm.h>
     14#include <linux/rculist.h>
     15#include <linux/pci.h>
     16#include <linux/pci-ats.h>
     17#include <linux/dmar.h>
     18#include <linux/interrupt.h>
     19#include <linux/mm_types.h>
     20#include <linux/xarray.h>
     21#include <linux/ioasid.h>
     22#include <asm/page.h>
     23#include <asm/fpu/api.h>
     24#include <trace/events/intel_iommu.h>
     25
     26#include "pasid.h"
     27#include "perf.h"
     28#include "../iommu-sva-lib.h"
     29
     30static irqreturn_t prq_event_thread(int irq, void *d);
     31static void intel_svm_drain_prq(struct device *dev, u32 pasid);
     32#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
     33
     34static DEFINE_XARRAY_ALLOC(pasid_private_array);
     35static int pasid_private_add(ioasid_t pasid, void *priv)
     36{
     37	return xa_alloc(&pasid_private_array, &pasid, priv,
     38			XA_LIMIT(pasid, pasid), GFP_ATOMIC);
     39}
     40
     41static void pasid_private_remove(ioasid_t pasid)
     42{
     43	xa_erase(&pasid_private_array, pasid);
     44}
     45
     46static void *pasid_private_find(ioasid_t pasid)
     47{
     48	return xa_load(&pasid_private_array, pasid);
     49}
     50
     51static struct intel_svm_dev *
     52svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid)
     53{
     54	struct intel_svm_dev *sdev = NULL, *t;
     55
     56	rcu_read_lock();
     57	list_for_each_entry_rcu(t, &svm->devs, list) {
     58		if (t->sid == sid) {
     59			sdev = t;
     60			break;
     61		}
     62	}
     63	rcu_read_unlock();
     64
     65	return sdev;
     66}
     67
     68static struct intel_svm_dev *
     69svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
     70{
     71	struct intel_svm_dev *sdev = NULL, *t;
     72
     73	rcu_read_lock();
     74	list_for_each_entry_rcu(t, &svm->devs, list) {
     75		if (t->dev == dev) {
     76			sdev = t;
     77			break;
     78		}
     79	}
     80	rcu_read_unlock();
     81
     82	return sdev;
     83}
     84
     85int intel_svm_enable_prq(struct intel_iommu *iommu)
     86{
     87	struct iopf_queue *iopfq;
     88	struct page *pages;
     89	int irq, ret;
     90
     91	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
     92	if (!pages) {
     93		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
     94			iommu->name);
     95		return -ENOMEM;
     96	}
     97	iommu->prq = page_address(pages);
     98
     99	irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
    100	if (irq <= 0) {
    101		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
    102		       iommu->name);
    103		ret = -EINVAL;
    104		goto free_prq;
    105	}
    106	iommu->pr_irq = irq;
    107
    108	snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
    109		 "dmar%d-iopfq", iommu->seq_id);
    110	iopfq = iopf_queue_alloc(iommu->iopfq_name);
    111	if (!iopfq) {
    112		pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
    113		ret = -ENOMEM;
    114		goto free_hwirq;
    115	}
    116	iommu->iopf_queue = iopfq;
    117
    118	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
    119
    120	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
    121				   iommu->prq_name, iommu);
    122	if (ret) {
    123		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
    124		       iommu->name);
    125		goto free_iopfq;
    126	}
    127	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
    128	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
    129	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
    130
    131	init_completion(&iommu->prq_complete);
    132
    133	return 0;
    134
    135free_iopfq:
    136	iopf_queue_free(iommu->iopf_queue);
    137	iommu->iopf_queue = NULL;
    138free_hwirq:
    139	dmar_free_hwirq(irq);
    140	iommu->pr_irq = 0;
    141free_prq:
    142	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
    143	iommu->prq = NULL;
    144
    145	return ret;
    146}
    147
    148int intel_svm_finish_prq(struct intel_iommu *iommu)
    149{
    150	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
    151	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
    152	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
    153
    154	if (iommu->pr_irq) {
    155		free_irq(iommu->pr_irq, iommu);
    156		dmar_free_hwirq(iommu->pr_irq);
    157		iommu->pr_irq = 0;
    158	}
    159
    160	if (iommu->iopf_queue) {
    161		iopf_queue_free(iommu->iopf_queue);
    162		iommu->iopf_queue = NULL;
    163	}
    164
    165	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
    166	iommu->prq = NULL;
    167
    168	return 0;
    169}
    170
    171void intel_svm_check(struct intel_iommu *iommu)
    172{
    173	if (!pasid_supported(iommu))
    174		return;
    175
    176	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
    177	    !cap_fl1gp_support(iommu->cap)) {
    178		pr_err("%s SVM disabled, incompatible 1GB page capability\n",
    179		       iommu->name);
    180		return;
    181	}
    182
    183	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
    184	    !cap_5lp_support(iommu->cap)) {
    185		pr_err("%s SVM disabled, incompatible paging mode\n",
    186		       iommu->name);
    187		return;
    188	}
    189
    190	iommu->flags |= VTD_FLAG_SVM_CAPABLE;
    191}
    192
    193static void __flush_svm_range_dev(struct intel_svm *svm,
    194				  struct intel_svm_dev *sdev,
    195				  unsigned long address,
    196				  unsigned long pages, int ih)
    197{
    198	struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
    199
    200	if (WARN_ON(!pages))
    201		return;
    202
    203	qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
    204	if (info->ats_enabled)
    205		qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
    206					 svm->pasid, sdev->qdep, address,
    207					 order_base_2(pages));
    208}
    209
    210static void intel_flush_svm_range_dev(struct intel_svm *svm,
    211				      struct intel_svm_dev *sdev,
    212				      unsigned long address,
    213				      unsigned long pages, int ih)
    214{
    215	unsigned long shift = ilog2(__roundup_pow_of_two(pages));
    216	unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
    217	unsigned long start = ALIGN_DOWN(address, align);
    218	unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
    219
    220	while (start < end) {
    221		__flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
    222		start += align;
    223	}
    224}
    225
    226static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
    227				unsigned long pages, int ih)
    228{
    229	struct intel_svm_dev *sdev;
    230
    231	rcu_read_lock();
    232	list_for_each_entry_rcu(sdev, &svm->devs, list)
    233		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
    234	rcu_read_unlock();
    235}
    236
    237/* Pages have been freed at this point */
    238static void intel_invalidate_range(struct mmu_notifier *mn,
    239				   struct mm_struct *mm,
    240				   unsigned long start, unsigned long end)
    241{
    242	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
    243
    244	intel_flush_svm_range(svm, start,
    245			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
    246}
    247
    248static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
    249{
    250	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
    251	struct intel_svm_dev *sdev;
    252
    253	/* This might end up being called from exit_mmap(), *before* the page
    254	 * tables are cleared. And __mmu_notifier_release() will delete us from
    255	 * the list of notifiers so that our invalidate_range() callback doesn't
    256	 * get called when the page tables are cleared. So we need to protect
    257	 * against hardware accessing those page tables.
    258	 *
    259	 * We do it by clearing the entry in the PASID table and then flushing
    260	 * the IOTLB and the PASID table caches. This might upset hardware;
    261	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
    262	 * page) so that we end up taking a fault that the hardware really
    263	 * *has* to handle gracefully without affecting other processes.
    264	 */
    265	rcu_read_lock();
    266	list_for_each_entry_rcu(sdev, &svm->devs, list)
    267		intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
    268					    svm->pasid, true);
    269	rcu_read_unlock();
    270
    271}
    272
    273static const struct mmu_notifier_ops intel_mmuops = {
    274	.release = intel_mm_release,
    275	.invalidate_range = intel_invalidate_range,
    276};
    277
    278static DEFINE_MUTEX(pasid_mutex);
    279
    280static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
    281			     struct intel_svm **rsvm,
    282			     struct intel_svm_dev **rsdev)
    283{
    284	struct intel_svm_dev *sdev = NULL;
    285	struct intel_svm *svm;
    286
    287	/* The caller should hold the pasid_mutex lock */
    288	if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
    289		return -EINVAL;
    290
    291	if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
    292		return -EINVAL;
    293
    294	svm = pasid_private_find(pasid);
    295	if (IS_ERR(svm))
    296		return PTR_ERR(svm);
    297
    298	if (!svm)
    299		goto out;
    300
    301	/*
    302	 * If we found svm for the PASID, there must be at least one device
    303	 * bond.
    304	 */
    305	if (WARN_ON(list_empty(&svm->devs)))
    306		return -EINVAL;
    307	sdev = svm_lookup_device_by_dev(svm, dev);
    308
    309out:
    310	*rsvm = svm;
    311	*rsdev = sdev;
    312
    313	return 0;
    314}
    315
    316static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
    317				 unsigned int flags)
    318{
    319	ioasid_t max_pasid = dev_is_pci(dev) ?
    320			pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
    321
    322	return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
    323}
    324
    325static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
    326					   struct device *dev,
    327					   struct mm_struct *mm,
    328					   unsigned int flags)
    329{
    330	struct device_domain_info *info = dev_iommu_priv_get(dev);
    331	unsigned long iflags, sflags;
    332	struct intel_svm_dev *sdev;
    333	struct intel_svm *svm;
    334	int ret = 0;
    335
    336	svm = pasid_private_find(mm->pasid);
    337	if (!svm) {
    338		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
    339		if (!svm)
    340			return ERR_PTR(-ENOMEM);
    341
    342		svm->pasid = mm->pasid;
    343		svm->mm = mm;
    344		svm->flags = flags;
    345		INIT_LIST_HEAD_RCU(&svm->devs);
    346
    347		if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
    348			svm->notifier.ops = &intel_mmuops;
    349			ret = mmu_notifier_register(&svm->notifier, mm);
    350			if (ret) {
    351				kfree(svm);
    352				return ERR_PTR(ret);
    353			}
    354		}
    355
    356		ret = pasid_private_add(svm->pasid, svm);
    357		if (ret) {
    358			if (svm->notifier.ops)
    359				mmu_notifier_unregister(&svm->notifier, mm);
    360			kfree(svm);
    361			return ERR_PTR(ret);
    362		}
    363	}
    364
    365	/* Find the matching device in svm list */
    366	sdev = svm_lookup_device_by_dev(svm, dev);
    367	if (sdev) {
    368		sdev->users++;
    369		goto success;
    370	}
    371
    372	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
    373	if (!sdev) {
    374		ret = -ENOMEM;
    375		goto free_svm;
    376	}
    377
    378	sdev->dev = dev;
    379	sdev->iommu = iommu;
    380	sdev->did = FLPT_DEFAULT_DID;
    381	sdev->sid = PCI_DEVID(info->bus, info->devfn);
    382	sdev->users = 1;
    383	sdev->pasid = svm->pasid;
    384	sdev->sva.dev = dev;
    385	init_rcu_head(&sdev->rcu);
    386	if (info->ats_enabled) {
    387		sdev->dev_iotlb = 1;
    388		sdev->qdep = info->ats_qdep;
    389		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
    390			sdev->qdep = 0;
    391	}
    392
    393	/* Setup the pasid table: */
    394	sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
    395			PASID_FLAG_SUPERVISOR_MODE : 0;
    396	sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
    397	spin_lock_irqsave(&iommu->lock, iflags);
    398	ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
    399					    FLPT_DEFAULT_DID, sflags);
    400	spin_unlock_irqrestore(&iommu->lock, iflags);
    401
    402	if (ret)
    403		goto free_sdev;
    404
    405	list_add_rcu(&sdev->list, &svm->devs);
    406success:
    407	return &sdev->sva;
    408
    409free_sdev:
    410	kfree(sdev);
    411free_svm:
    412	if (list_empty(&svm->devs)) {
    413		if (svm->notifier.ops)
    414			mmu_notifier_unregister(&svm->notifier, mm);
    415		pasid_private_remove(mm->pasid);
    416		kfree(svm);
    417	}
    418
    419	return ERR_PTR(ret);
    420}
    421
    422/* Caller must hold pasid_mutex */
    423static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
    424{
    425	struct intel_svm_dev *sdev;
    426	struct intel_iommu *iommu;
    427	struct intel_svm *svm;
    428	struct mm_struct *mm;
    429	int ret = -EINVAL;
    430
    431	iommu = device_to_iommu(dev, NULL, NULL);
    432	if (!iommu)
    433		goto out;
    434
    435	ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
    436	if (ret)
    437		goto out;
    438	mm = svm->mm;
    439
    440	if (sdev) {
    441		sdev->users--;
    442		if (!sdev->users) {
    443			list_del_rcu(&sdev->list);
    444			/* Flush the PASID cache and IOTLB for this device.
    445			 * Note that we do depend on the hardware *not* using
    446			 * the PASID any more. Just as we depend on other
    447			 * devices never using PASIDs that they have no right
    448			 * to use. We have a *shared* PASID table, because it's
    449			 * large and has to be physically contiguous. So it's
    450			 * hard to be as defensive as we might like. */
    451			intel_pasid_tear_down_entry(iommu, dev,
    452						    svm->pasid, false);
    453			intel_svm_drain_prq(dev, svm->pasid);
    454			kfree_rcu(sdev, rcu);
    455
    456			if (list_empty(&svm->devs)) {
    457				if (svm->notifier.ops)
    458					mmu_notifier_unregister(&svm->notifier, mm);
    459				pasid_private_remove(svm->pasid);
    460				/* We mandate that no page faults may be outstanding
    461				 * for the PASID when intel_svm_unbind_mm() is called.
    462				 * If that is not obeyed, subtle errors will happen.
    463				 * Let's make them less subtle... */
    464				memset(svm, 0x6b, sizeof(*svm));
    465				kfree(svm);
    466			}
    467		}
    468	}
    469out:
    470	return ret;
    471}
    472
    473/* Page request queue descriptor */
    474struct page_req_dsc {
    475	union {
    476		struct {
    477			u64 type:8;
    478			u64 pasid_present:1;
    479			u64 priv_data_present:1;
    480			u64 rsvd:6;
    481			u64 rid:16;
    482			u64 pasid:20;
    483			u64 exe_req:1;
    484			u64 pm_req:1;
    485			u64 rsvd2:10;
    486		};
    487		u64 qw_0;
    488	};
    489	union {
    490		struct {
    491			u64 rd_req:1;
    492			u64 wr_req:1;
    493			u64 lpig:1;
    494			u64 prg_index:9;
    495			u64 addr:52;
    496		};
    497		u64 qw_1;
    498	};
    499	u64 priv_data[2];
    500};
    501
    502static bool is_canonical_address(u64 addr)
    503{
    504	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
    505	long saddr = (long) addr;
    506
    507	return (((saddr << shift) >> shift) == saddr);
    508}
    509
    510/**
    511 * intel_svm_drain_prq - Drain page requests and responses for a pasid
    512 * @dev: target device
    513 * @pasid: pasid for draining
    514 *
    515 * Drain all pending page requests and responses related to @pasid in both
    516 * software and hardware. This is supposed to be called after the device
    517 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
    518 * and DevTLB have been invalidated.
    519 *
    520 * It waits until all pending page requests for @pasid in the page fault
    521 * queue are completed by the prq handling thread. Then follow the steps
    522 * described in VT-d spec CH7.10 to drain all page requests and page
    523 * responses pending in the hardware.
    524 */
    525static void intel_svm_drain_prq(struct device *dev, u32 pasid)
    526{
    527	struct device_domain_info *info;
    528	struct dmar_domain *domain;
    529	struct intel_iommu *iommu;
    530	struct qi_desc desc[3];
    531	struct pci_dev *pdev;
    532	int head, tail;
    533	u16 sid, did;
    534	int qdep;
    535
    536	info = dev_iommu_priv_get(dev);
    537	if (WARN_ON(!info || !dev_is_pci(dev)))
    538		return;
    539
    540	if (!info->pri_enabled)
    541		return;
    542
    543	iommu = info->iommu;
    544	domain = info->domain;
    545	pdev = to_pci_dev(dev);
    546	sid = PCI_DEVID(info->bus, info->devfn);
    547	did = domain->iommu_did[iommu->seq_id];
    548	qdep = pci_ats_queue_depth(pdev);
    549
    550	/*
    551	 * Check and wait until all pending page requests in the queue are
    552	 * handled by the prq handling thread.
    553	 */
    554prq_retry:
    555	reinit_completion(&iommu->prq_complete);
    556	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
    557	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
    558	while (head != tail) {
    559		struct page_req_dsc *req;
    560
    561		req = &iommu->prq[head / sizeof(*req)];
    562		if (!req->pasid_present || req->pasid != pasid) {
    563			head = (head + sizeof(*req)) & PRQ_RING_MASK;
    564			continue;
    565		}
    566
    567		wait_for_completion(&iommu->prq_complete);
    568		goto prq_retry;
    569	}
    570
    571	/*
    572	 * A work in IO page fault workqueue may try to lock pasid_mutex now.
    573	 * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
    574	 * all works in the workqueue to finish may cause deadlock.
    575	 *
    576	 * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
    577	 * Unlock it to allow the works to be handled while waiting for
    578	 * them to finish.
    579	 */
    580	lockdep_assert_held(&pasid_mutex);
    581	mutex_unlock(&pasid_mutex);
    582	iopf_queue_flush_dev(dev);
    583	mutex_lock(&pasid_mutex);
    584
    585	/*
    586	 * Perform steps described in VT-d spec CH7.10 to drain page
    587	 * requests and responses in hardware.
    588	 */
    589	memset(desc, 0, sizeof(desc));
    590	desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
    591			QI_IWD_FENCE |
    592			QI_IWD_TYPE;
    593	desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
    594			QI_EIOTLB_DID(did) |
    595			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
    596			QI_EIOTLB_TYPE;
    597	desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
    598			QI_DEV_EIOTLB_SID(sid) |
    599			QI_DEV_EIOTLB_QDEP(qdep) |
    600			QI_DEIOTLB_TYPE |
    601			QI_DEV_IOTLB_PFSID(info->pfsid);
    602qi_retry:
    603	reinit_completion(&iommu->prq_complete);
    604	qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
    605	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
    606		wait_for_completion(&iommu->prq_complete);
    607		goto qi_retry;
    608	}
    609}
    610
    611static int prq_to_iommu_prot(struct page_req_dsc *req)
    612{
    613	int prot = 0;
    614
    615	if (req->rd_req)
    616		prot |= IOMMU_FAULT_PERM_READ;
    617	if (req->wr_req)
    618		prot |= IOMMU_FAULT_PERM_WRITE;
    619	if (req->exe_req)
    620		prot |= IOMMU_FAULT_PERM_EXEC;
    621	if (req->pm_req)
    622		prot |= IOMMU_FAULT_PERM_PRIV;
    623
    624	return prot;
    625}
    626
    627static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
    628				struct page_req_dsc *desc)
    629{
    630	struct iommu_fault_event event;
    631
    632	if (!dev || !dev_is_pci(dev))
    633		return -ENODEV;
    634
    635	/* Fill in event data for device specific processing */
    636	memset(&event, 0, sizeof(struct iommu_fault_event));
    637	event.fault.type = IOMMU_FAULT_PAGE_REQ;
    638	event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
    639	event.fault.prm.pasid = desc->pasid;
    640	event.fault.prm.grpid = desc->prg_index;
    641	event.fault.prm.perm = prq_to_iommu_prot(desc);
    642
    643	if (desc->lpig)
    644		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
    645	if (desc->pasid_present) {
    646		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
    647		event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
    648	}
    649	if (desc->priv_data_present) {
    650		/*
    651		 * Set last page in group bit if private data is present,
    652		 * page response is required as it does for LPIG.
    653		 * iommu_report_device_fault() doesn't understand this vendor
    654		 * specific requirement thus we set last_page as a workaround.
    655		 */
    656		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
    657		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
    658		event.fault.prm.private_data[0] = desc->priv_data[0];
    659		event.fault.prm.private_data[1] = desc->priv_data[1];
    660	} else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
    661		/*
    662		 * If the private data fields are not used by hardware, use it
    663		 * to monitor the prq handle latency.
    664		 */
    665		event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
    666	}
    667
    668	return iommu_report_device_fault(dev, &event);
    669}
    670
    671static void handle_bad_prq_event(struct intel_iommu *iommu,
    672				 struct page_req_dsc *req, int result)
    673{
    674	struct qi_desc desc;
    675
    676	pr_err("%s: Invalid page request: %08llx %08llx\n",
    677	       iommu->name, ((unsigned long long *)req)[0],
    678	       ((unsigned long long *)req)[1]);
    679
    680	/*
    681	 * Per VT-d spec. v3.0 ch7.7, system software must
    682	 * respond with page group response if private data
    683	 * is present (PDP) or last page in group (LPIG) bit
    684	 * is set. This is an additional VT-d feature beyond
    685	 * PCI ATS spec.
    686	 */
    687	if (!req->lpig && !req->priv_data_present)
    688		return;
    689
    690	desc.qw0 = QI_PGRP_PASID(req->pasid) |
    691			QI_PGRP_DID(req->rid) |
    692			QI_PGRP_PASID_P(req->pasid_present) |
    693			QI_PGRP_PDP(req->priv_data_present) |
    694			QI_PGRP_RESP_CODE(result) |
    695			QI_PGRP_RESP_TYPE;
    696	desc.qw1 = QI_PGRP_IDX(req->prg_index) |
    697			QI_PGRP_LPIG(req->lpig);
    698
    699	if (req->priv_data_present) {
    700		desc.qw2 = req->priv_data[0];
    701		desc.qw3 = req->priv_data[1];
    702	} else {
    703		desc.qw2 = 0;
    704		desc.qw3 = 0;
    705	}
    706
    707	qi_submit_sync(iommu, &desc, 1, 0);
    708}
    709
    710static irqreturn_t prq_event_thread(int irq, void *d)
    711{
    712	struct intel_svm_dev *sdev = NULL;
    713	struct intel_iommu *iommu = d;
    714	struct intel_svm *svm = NULL;
    715	struct page_req_dsc *req;
    716	int head, tail, handled;
    717	u64 address;
    718
    719	/*
    720	 * Clear PPR bit before reading head/tail registers, to ensure that
    721	 * we get a new interrupt if needed.
    722	 */
    723	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
    724
    725	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
    726	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
    727	handled = (head != tail);
    728	while (head != tail) {
    729		req = &iommu->prq[head / sizeof(*req)];
    730		address = (u64)req->addr << VTD_PAGE_SHIFT;
    731
    732		if (unlikely(!req->pasid_present)) {
    733			pr_err("IOMMU: %s: Page request without PASID\n",
    734			       iommu->name);
    735bad_req:
    736			svm = NULL;
    737			sdev = NULL;
    738			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
    739			goto prq_advance;
    740		}
    741
    742		if (unlikely(!is_canonical_address(address))) {
    743			pr_err("IOMMU: %s: Address is not canonical\n",
    744			       iommu->name);
    745			goto bad_req;
    746		}
    747
    748		if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
    749			pr_err("IOMMU: %s: Page request in Privilege Mode\n",
    750			       iommu->name);
    751			goto bad_req;
    752		}
    753
    754		if (unlikely(req->exe_req && req->rd_req)) {
    755			pr_err("IOMMU: %s: Execution request not supported\n",
    756			       iommu->name);
    757			goto bad_req;
    758		}
    759
    760		/* Drop Stop Marker message. No need for a response. */
    761		if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
    762			goto prq_advance;
    763
    764		if (!svm || svm->pasid != req->pasid) {
    765			/*
    766			 * It can't go away, because the driver is not permitted
    767			 * to unbind the mm while any page faults are outstanding.
    768			 */
    769			svm = pasid_private_find(req->pasid);
    770			if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE))
    771				goto bad_req;
    772		}
    773
    774		if (!sdev || sdev->sid != req->rid) {
    775			sdev = svm_lookup_device_by_sid(svm, req->rid);
    776			if (!sdev)
    777				goto bad_req;
    778		}
    779
    780		sdev->prq_seq_number++;
    781
    782		/*
    783		 * If prq is to be handled outside iommu driver via receiver of
    784		 * the fault notifiers, we skip the page response here.
    785		 */
    786		if (intel_svm_prq_report(iommu, sdev->dev, req))
    787			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
    788
    789		trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1,
    790				 req->priv_data[0], req->priv_data[1],
    791				 sdev->prq_seq_number);
    792prq_advance:
    793		head = (head + sizeof(*req)) & PRQ_RING_MASK;
    794	}
    795
    796	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
    797
    798	/*
    799	 * Clear the page request overflow bit and wake up all threads that
    800	 * are waiting for the completion of this handling.
    801	 */
    802	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
    803		pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
    804				    iommu->name);
    805		head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
    806		tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
    807		if (head == tail) {
    808			iopf_queue_discard_partial(iommu->iopf_queue);
    809			writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
    810			pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
    811					    iommu->name);
    812		}
    813	}
    814
    815	if (!completion_done(&iommu->prq_complete))
    816		complete(&iommu->prq_complete);
    817
    818	return IRQ_RETVAL(handled);
    819}
    820
    821struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
    822{
    823	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
    824	unsigned int flags = 0;
    825	struct iommu_sva *sva;
    826	int ret;
    827
    828	if (drvdata)
    829		flags = *(unsigned int *)drvdata;
    830
    831	if (flags & SVM_FLAG_SUPERVISOR_MODE) {
    832		if (!ecap_srs(iommu->ecap)) {
    833			dev_err(dev, "%s: Supervisor PASID not supported\n",
    834				iommu->name);
    835			return ERR_PTR(-EOPNOTSUPP);
    836		}
    837
    838		if (mm) {
    839			dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
    840				iommu->name);
    841			return ERR_PTR(-EINVAL);
    842		}
    843
    844		mm = &init_mm;
    845	}
    846
    847	mutex_lock(&pasid_mutex);
    848	ret = intel_svm_alloc_pasid(dev, mm, flags);
    849	if (ret) {
    850		mutex_unlock(&pasid_mutex);
    851		return ERR_PTR(ret);
    852	}
    853
    854	sva = intel_svm_bind_mm(iommu, dev, mm, flags);
    855	mutex_unlock(&pasid_mutex);
    856
    857	return sva;
    858}
    859
    860void intel_svm_unbind(struct iommu_sva *sva)
    861{
    862	struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
    863
    864	mutex_lock(&pasid_mutex);
    865	intel_svm_unbind_mm(sdev->dev, sdev->pasid);
    866	mutex_unlock(&pasid_mutex);
    867}
    868
    869u32 intel_svm_get_pasid(struct iommu_sva *sva)
    870{
    871	struct intel_svm_dev *sdev;
    872	u32 pasid;
    873
    874	mutex_lock(&pasid_mutex);
    875	sdev = to_intel_svm_dev(sva);
    876	pasid = sdev->pasid;
    877	mutex_unlock(&pasid_mutex);
    878
    879	return pasid;
    880}
    881
    882int intel_svm_page_response(struct device *dev,
    883			    struct iommu_fault_event *evt,
    884			    struct iommu_page_response *msg)
    885{
    886	struct iommu_fault_page_request *prm;
    887	struct intel_svm_dev *sdev = NULL;
    888	struct intel_svm *svm = NULL;
    889	struct intel_iommu *iommu;
    890	bool private_present;
    891	bool pasid_present;
    892	bool last_page;
    893	u8 bus, devfn;
    894	int ret = 0;
    895	u16 sid;
    896
    897	if (!dev || !dev_is_pci(dev))
    898		return -ENODEV;
    899
    900	iommu = device_to_iommu(dev, &bus, &devfn);
    901	if (!iommu)
    902		return -ENODEV;
    903
    904	if (!msg || !evt)
    905		return -EINVAL;
    906
    907	mutex_lock(&pasid_mutex);
    908
    909	prm = &evt->fault.prm;
    910	sid = PCI_DEVID(bus, devfn);
    911	pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
    912	private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
    913	last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
    914
    915	if (!pasid_present) {
    916		ret = -EINVAL;
    917		goto out;
    918	}
    919
    920	if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
    921		ret = -EINVAL;
    922		goto out;
    923	}
    924
    925	ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
    926	if (ret || !sdev) {
    927		ret = -ENODEV;
    928		goto out;
    929	}
    930
    931	/*
    932	 * Per VT-d spec. v3.0 ch7.7, system software must respond
    933	 * with page group response if private data is present (PDP)
    934	 * or last page in group (LPIG) bit is set. This is an
    935	 * additional VT-d requirement beyond PCI ATS spec.
    936	 */
    937	if (last_page || private_present) {
    938		struct qi_desc desc;
    939
    940		desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
    941				QI_PGRP_PASID_P(pasid_present) |
    942				QI_PGRP_PDP(private_present) |
    943				QI_PGRP_RESP_CODE(msg->code) |
    944				QI_PGRP_RESP_TYPE;
    945		desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
    946		desc.qw2 = 0;
    947		desc.qw3 = 0;
    948
    949		if (private_present) {
    950			desc.qw2 = prm->private_data[0];
    951			desc.qw3 = prm->private_data[1];
    952		} else if (prm->private_data[0]) {
    953			dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
    954				ktime_to_ns(ktime_get()) - prm->private_data[0]);
    955		}
    956
    957		qi_submit_sync(iommu, &desc, 1, 0);
    958	}
    959out:
    960	mutex_unlock(&pasid_mutex);
    961	return ret;
    962}