cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

irq_remapping.c (38317B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3#define pr_fmt(fmt)     "DMAR-IR: " fmt
      4
      5#include <linux/interrupt.h>
      6#include <linux/dmar.h>
      7#include <linux/spinlock.h>
      8#include <linux/slab.h>
      9#include <linux/jiffies.h>
     10#include <linux/hpet.h>
     11#include <linux/pci.h>
     12#include <linux/irq.h>
     13#include <linux/intel-iommu.h>
     14#include <linux/acpi.h>
     15#include <linux/irqdomain.h>
     16#include <linux/crash_dump.h>
     17#include <asm/io_apic.h>
     18#include <asm/apic.h>
     19#include <asm/smp.h>
     20#include <asm/cpu.h>
     21#include <asm/irq_remapping.h>
     22#include <asm/pci-direct.h>
     23
     24#include "../irq_remapping.h"
     25#include "cap_audit.h"
     26
     27enum irq_mode {
     28	IRQ_REMAPPING,
     29	IRQ_POSTING,
     30};
     31
     32struct ioapic_scope {
     33	struct intel_iommu *iommu;
     34	unsigned int id;
     35	unsigned int bus;	/* PCI bus number */
     36	unsigned int devfn;	/* PCI devfn number */
     37};
     38
     39struct hpet_scope {
     40	struct intel_iommu *iommu;
     41	u8 id;
     42	unsigned int bus;
     43	unsigned int devfn;
     44};
     45
     46struct irq_2_iommu {
     47	struct intel_iommu *iommu;
     48	u16 irte_index;
     49	u16 sub_handle;
     50	u8  irte_mask;
     51	enum irq_mode mode;
     52};
     53
     54struct intel_ir_data {
     55	struct irq_2_iommu			irq_2_iommu;
     56	struct irte				irte_entry;
     57	union {
     58		struct msi_msg			msi_entry;
     59	};
     60};
     61
     62#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
     63#define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8)
     64
     65static int __read_mostly eim_mode;
     66static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
     67static struct hpet_scope ir_hpet[MAX_HPET_TBS];
     68
     69/*
     70 * Lock ordering:
     71 * ->dmar_global_lock
     72 *	->irq_2_ir_lock
     73 *		->qi->q_lock
     74 *	->iommu->register_lock
     75 * Note:
     76 * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called
     77 * in single-threaded environment with interrupt disabled, so no need to tabke
     78 * the dmar_global_lock.
     79 */
     80DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
     81static const struct irq_domain_ops intel_ir_domain_ops;
     82
     83static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
     84static int __init parse_ioapics_under_ir(void);
     85
     86static bool ir_pre_enabled(struct intel_iommu *iommu)
     87{
     88	return (iommu->flags & VTD_FLAG_IRQ_REMAP_PRE_ENABLED);
     89}
     90
     91static void clear_ir_pre_enabled(struct intel_iommu *iommu)
     92{
     93	iommu->flags &= ~VTD_FLAG_IRQ_REMAP_PRE_ENABLED;
     94}
     95
     96static void init_ir_status(struct intel_iommu *iommu)
     97{
     98	u32 gsts;
     99
    100	gsts = readl(iommu->reg + DMAR_GSTS_REG);
    101	if (gsts & DMA_GSTS_IRES)
    102		iommu->flags |= VTD_FLAG_IRQ_REMAP_PRE_ENABLED;
    103}
    104
    105static int alloc_irte(struct intel_iommu *iommu,
    106		      struct irq_2_iommu *irq_iommu, u16 count)
    107{
    108	struct ir_table *table = iommu->ir_table;
    109	unsigned int mask = 0;
    110	unsigned long flags;
    111	int index;
    112
    113	if (!count || !irq_iommu)
    114		return -1;
    115
    116	if (count > 1) {
    117		count = __roundup_pow_of_two(count);
    118		mask = ilog2(count);
    119	}
    120
    121	if (mask > ecap_max_handle_mask(iommu->ecap)) {
    122		pr_err("Requested mask %x exceeds the max invalidation handle"
    123		       " mask value %Lx\n", mask,
    124		       ecap_max_handle_mask(iommu->ecap));
    125		return -1;
    126	}
    127
    128	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
    129	index = bitmap_find_free_region(table->bitmap,
    130					INTR_REMAP_TABLE_ENTRIES, mask);
    131	if (index < 0) {
    132		pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id);
    133	} else {
    134		irq_iommu->iommu = iommu;
    135		irq_iommu->irte_index =  index;
    136		irq_iommu->sub_handle = 0;
    137		irq_iommu->irte_mask = mask;
    138		irq_iommu->mode = IRQ_REMAPPING;
    139	}
    140	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
    141
    142	return index;
    143}
    144
    145static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
    146{
    147	struct qi_desc desc;
    148
    149	desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
    150		   | QI_IEC_SELECTIVE;
    151	desc.qw1 = 0;
    152	desc.qw2 = 0;
    153	desc.qw3 = 0;
    154
    155	return qi_submit_sync(iommu, &desc, 1, 0);
    156}
    157
    158static int modify_irte(struct irq_2_iommu *irq_iommu,
    159		       struct irte *irte_modified)
    160{
    161	struct intel_iommu *iommu;
    162	unsigned long flags;
    163	struct irte *irte;
    164	int rc, index;
    165
    166	if (!irq_iommu)
    167		return -1;
    168
    169	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
    170
    171	iommu = irq_iommu->iommu;
    172
    173	index = irq_iommu->irte_index + irq_iommu->sub_handle;
    174	irte = &iommu->ir_table->base[index];
    175
    176#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
    177	if ((irte->pst == 1) || (irte_modified->pst == 1)) {
    178		bool ret;
    179
    180		ret = cmpxchg_double(&irte->low, &irte->high,
    181				     irte->low, irte->high,
    182				     irte_modified->low, irte_modified->high);
    183		/*
    184		 * We use cmpxchg16 to atomically update the 128-bit IRTE,
    185		 * and it cannot be updated by the hardware or other processors
    186		 * behind us, so the return value of cmpxchg16 should be the
    187		 * same as the old value.
    188		 */
    189		WARN_ON(!ret);
    190	} else
    191#endif
    192	{
    193		set_64bit(&irte->low, irte_modified->low);
    194		set_64bit(&irte->high, irte_modified->high);
    195	}
    196	__iommu_flush_cache(iommu, irte, sizeof(*irte));
    197
    198	rc = qi_flush_iec(iommu, index, 0);
    199
    200	/* Update iommu mode according to the IRTE mode */
    201	irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING;
    202	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
    203
    204	return rc;
    205}
    206
    207static struct intel_iommu *map_hpet_to_iommu(u8 hpet_id)
    208{
    209	int i;
    210
    211	for (i = 0; i < MAX_HPET_TBS; i++) {
    212		if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu)
    213			return ir_hpet[i].iommu;
    214	}
    215	return NULL;
    216}
    217
    218static struct intel_iommu *map_ioapic_to_iommu(int apic)
    219{
    220	int i;
    221
    222	for (i = 0; i < MAX_IO_APICS; i++) {
    223		if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu)
    224			return ir_ioapic[i].iommu;
    225	}
    226	return NULL;
    227}
    228
    229static struct irq_domain *map_dev_to_ir(struct pci_dev *dev)
    230{
    231	struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev);
    232
    233	return drhd ? drhd->iommu->ir_msi_domain : NULL;
    234}
    235
    236static int clear_entries(struct irq_2_iommu *irq_iommu)
    237{
    238	struct irte *start, *entry, *end;
    239	struct intel_iommu *iommu;
    240	int index;
    241
    242	if (irq_iommu->sub_handle)
    243		return 0;
    244
    245	iommu = irq_iommu->iommu;
    246	index = irq_iommu->irte_index;
    247
    248	start = iommu->ir_table->base + index;
    249	end = start + (1 << irq_iommu->irte_mask);
    250
    251	for (entry = start; entry < end; entry++) {
    252		set_64bit(&entry->low, 0);
    253		set_64bit(&entry->high, 0);
    254	}
    255	bitmap_release_region(iommu->ir_table->bitmap, index,
    256			      irq_iommu->irte_mask);
    257
    258	return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
    259}
    260
    261/*
    262 * source validation type
    263 */
    264#define SVT_NO_VERIFY		0x0  /* no verification is required */
    265#define SVT_VERIFY_SID_SQ	0x1  /* verify using SID and SQ fields */
    266#define SVT_VERIFY_BUS		0x2  /* verify bus of request-id */
    267
    268/*
    269 * source-id qualifier
    270 */
    271#define SQ_ALL_16	0x0  /* verify all 16 bits of request-id */
    272#define SQ_13_IGNORE_1	0x1  /* verify most significant 13 bits, ignore
    273			      * the third least significant bit
    274			      */
    275#define SQ_13_IGNORE_2	0x2  /* verify most significant 13 bits, ignore
    276			      * the second and third least significant bits
    277			      */
    278#define SQ_13_IGNORE_3	0x3  /* verify most significant 13 bits, ignore
    279			      * the least three significant bits
    280			      */
    281
    282/*
    283 * set SVT, SQ and SID fields of irte to verify
    284 * source ids of interrupt requests
    285 */
    286static void set_irte_sid(struct irte *irte, unsigned int svt,
    287			 unsigned int sq, unsigned int sid)
    288{
    289	if (disable_sourceid_checking)
    290		svt = SVT_NO_VERIFY;
    291	irte->svt = svt;
    292	irte->sq = sq;
    293	irte->sid = sid;
    294}
    295
    296/*
    297 * Set an IRTE to match only the bus number. Interrupt requests that reference
    298 * this IRTE must have a requester-id whose bus number is between or equal
    299 * to the start_bus and end_bus arguments.
    300 */
    301static void set_irte_verify_bus(struct irte *irte, unsigned int start_bus,
    302				unsigned int end_bus)
    303{
    304	set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
    305		     (start_bus << 8) | end_bus);
    306}
    307
    308static int set_ioapic_sid(struct irte *irte, int apic)
    309{
    310	int i;
    311	u16 sid = 0;
    312
    313	if (!irte)
    314		return -1;
    315
    316	down_read(&dmar_global_lock);
    317	for (i = 0; i < MAX_IO_APICS; i++) {
    318		if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
    319			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
    320			break;
    321		}
    322	}
    323	up_read(&dmar_global_lock);
    324
    325	if (sid == 0) {
    326		pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic);
    327		return -1;
    328	}
    329
    330	set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid);
    331
    332	return 0;
    333}
    334
    335static int set_hpet_sid(struct irte *irte, u8 id)
    336{
    337	int i;
    338	u16 sid = 0;
    339
    340	if (!irte)
    341		return -1;
    342
    343	down_read(&dmar_global_lock);
    344	for (i = 0; i < MAX_HPET_TBS; i++) {
    345		if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
    346			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
    347			break;
    348		}
    349	}
    350	up_read(&dmar_global_lock);
    351
    352	if (sid == 0) {
    353		pr_warn("Failed to set source-id of HPET block (%d)\n", id);
    354		return -1;
    355	}
    356
    357	/*
    358	 * Should really use SQ_ALL_16. Some platforms are broken.
    359	 * While we figure out the right quirks for these broken platforms, use
    360	 * SQ_13_IGNORE_3 for now.
    361	 */
    362	set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
    363
    364	return 0;
    365}
    366
    367struct set_msi_sid_data {
    368	struct pci_dev *pdev;
    369	u16 alias;
    370	int count;
    371	int busmatch_count;
    372};
    373
    374static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque)
    375{
    376	struct set_msi_sid_data *data = opaque;
    377
    378	if (data->count == 0 || PCI_BUS_NUM(alias) == PCI_BUS_NUM(data->alias))
    379		data->busmatch_count++;
    380
    381	data->pdev = pdev;
    382	data->alias = alias;
    383	data->count++;
    384
    385	return 0;
    386}
    387
    388static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
    389{
    390	struct set_msi_sid_data data;
    391
    392	if (!irte || !dev)
    393		return -1;
    394
    395	data.count = 0;
    396	data.busmatch_count = 0;
    397	pci_for_each_dma_alias(dev, set_msi_sid_cb, &data);
    398
    399	/*
    400	 * DMA alias provides us with a PCI device and alias.  The only case
    401	 * where the it will return an alias on a different bus than the
    402	 * device is the case of a PCIe-to-PCI bridge, where the alias is for
    403	 * the subordinate bus.  In this case we can only verify the bus.
    404	 *
    405	 * If there are multiple aliases, all with the same bus number,
    406	 * then all we can do is verify the bus. This is typical in NTB
    407	 * hardware which use proxy IDs where the device will generate traffic
    408	 * from multiple devfn numbers on the same bus.
    409	 *
    410	 * If the alias device is on a different bus than our source device
    411	 * then we have a topology based alias, use it.
    412	 *
    413	 * Otherwise, the alias is for a device DMA quirk and we cannot
    414	 * assume that MSI uses the same requester ID.  Therefore use the
    415	 * original device.
    416	 */
    417	if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number)
    418		set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias),
    419				    dev->bus->number);
    420	else if (data.count >= 2 && data.busmatch_count == data.count)
    421		set_irte_verify_bus(irte, dev->bus->number, dev->bus->number);
    422	else if (data.pdev->bus->number != dev->bus->number)
    423		set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias);
    424	else
    425		set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
    426			     pci_dev_id(dev));
    427
    428	return 0;
    429}
    430
    431static int iommu_load_old_irte(struct intel_iommu *iommu)
    432{
    433	struct irte *old_ir_table;
    434	phys_addr_t irt_phys;
    435	unsigned int i;
    436	size_t size;
    437	u64 irta;
    438
    439	/* Check whether the old ir-table has the same size as ours */
    440	irta = dmar_readq(iommu->reg + DMAR_IRTA_REG);
    441	if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK)
    442	     != INTR_REMAP_TABLE_REG_SIZE)
    443		return -EINVAL;
    444
    445	irt_phys = irta & VTD_PAGE_MASK;
    446	size     = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte);
    447
    448	/* Map the old IR table */
    449	old_ir_table = memremap(irt_phys, size, MEMREMAP_WB);
    450	if (!old_ir_table)
    451		return -ENOMEM;
    452
    453	/* Copy data over */
    454	memcpy(iommu->ir_table->base, old_ir_table, size);
    455
    456	__iommu_flush_cache(iommu, iommu->ir_table->base, size);
    457
    458	/*
    459	 * Now check the table for used entries and mark those as
    460	 * allocated in the bitmap
    461	 */
    462	for (i = 0; i < INTR_REMAP_TABLE_ENTRIES; i++) {
    463		if (iommu->ir_table->base[i].present)
    464			bitmap_set(iommu->ir_table->bitmap, i, 1);
    465	}
    466
    467	memunmap(old_ir_table);
    468
    469	return 0;
    470}
    471
    472
    473static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
    474{
    475	unsigned long flags;
    476	u64 addr;
    477	u32 sts;
    478
    479	addr = virt_to_phys((void *)iommu->ir_table->base);
    480
    481	raw_spin_lock_irqsave(&iommu->register_lock, flags);
    482
    483	dmar_writeq(iommu->reg + DMAR_IRTA_REG,
    484		    (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
    485
    486	/* Set interrupt-remapping table pointer */
    487	writel(iommu->gcmd | DMA_GCMD_SIRTP, iommu->reg + DMAR_GCMD_REG);
    488
    489	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
    490		      readl, (sts & DMA_GSTS_IRTPS), sts);
    491	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
    492
    493	/*
    494	 * Global invalidation of interrupt entry cache to make sure the
    495	 * hardware uses the new irq remapping table.
    496	 */
    497	qi_global_iec(iommu);
    498}
    499
    500static void iommu_enable_irq_remapping(struct intel_iommu *iommu)
    501{
    502	unsigned long flags;
    503	u32 sts;
    504
    505	raw_spin_lock_irqsave(&iommu->register_lock, flags);
    506
    507	/* Enable interrupt-remapping */
    508	iommu->gcmd |= DMA_GCMD_IRE;
    509	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
    510	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
    511		      readl, (sts & DMA_GSTS_IRES), sts);
    512
    513	/* Block compatibility-format MSIs */
    514	if (sts & DMA_GSTS_CFIS) {
    515		iommu->gcmd &= ~DMA_GCMD_CFI;
    516		writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
    517		IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
    518			      readl, !(sts & DMA_GSTS_CFIS), sts);
    519	}
    520
    521	/*
    522	 * With CFI clear in the Global Command register, we should be
    523	 * protected from dangerous (i.e. compatibility) interrupts
    524	 * regardless of x2apic status.  Check just to be sure.
    525	 */
    526	if (sts & DMA_GSTS_CFIS)
    527		WARN(1, KERN_WARNING
    528			"Compatibility-format IRQs enabled despite intr remapping;\n"
    529			"you are vulnerable to IRQ injection.\n");
    530
    531	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
    532}
    533
    534static int intel_setup_irq_remapping(struct intel_iommu *iommu)
    535{
    536	struct ir_table *ir_table;
    537	struct fwnode_handle *fn;
    538	unsigned long *bitmap;
    539	struct page *pages;
    540
    541	if (iommu->ir_table)
    542		return 0;
    543
    544	ir_table = kzalloc(sizeof(struct ir_table), GFP_KERNEL);
    545	if (!ir_table)
    546		return -ENOMEM;
    547
    548	pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
    549				 INTR_REMAP_PAGE_ORDER);
    550	if (!pages) {
    551		pr_err("IR%d: failed to allocate pages of order %d\n",
    552		       iommu->seq_id, INTR_REMAP_PAGE_ORDER);
    553		goto out_free_table;
    554	}
    555
    556	bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC);
    557	if (bitmap == NULL) {
    558		pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
    559		goto out_free_pages;
    560	}
    561
    562	fn = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id);
    563	if (!fn)
    564		goto out_free_bitmap;
    565
    566	iommu->ir_domain =
    567		irq_domain_create_hierarchy(arch_get_ir_parent_domain(),
    568					    0, INTR_REMAP_TABLE_ENTRIES,
    569					    fn, &intel_ir_domain_ops,
    570					    iommu);
    571	if (!iommu->ir_domain) {
    572		pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
    573		goto out_free_fwnode;
    574	}
    575	iommu->ir_msi_domain =
    576		arch_create_remap_msi_irq_domain(iommu->ir_domain,
    577						 "INTEL-IR-MSI",
    578						 iommu->seq_id);
    579
    580	ir_table->base = page_address(pages);
    581	ir_table->bitmap = bitmap;
    582	iommu->ir_table = ir_table;
    583
    584	/*
    585	 * If the queued invalidation is already initialized,
    586	 * shouldn't disable it.
    587	 */
    588	if (!iommu->qi) {
    589		/*
    590		 * Clear previous faults.
    591		 */
    592		dmar_fault(-1, iommu);
    593		dmar_disable_qi(iommu);
    594
    595		if (dmar_enable_qi(iommu)) {
    596			pr_err("Failed to enable queued invalidation\n");
    597			goto out_free_ir_domain;
    598		}
    599	}
    600
    601	init_ir_status(iommu);
    602
    603	if (ir_pre_enabled(iommu)) {
    604		if (!is_kdump_kernel()) {
    605			pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n",
    606				iommu->name);
    607			clear_ir_pre_enabled(iommu);
    608			iommu_disable_irq_remapping(iommu);
    609		} else if (iommu_load_old_irte(iommu))
    610			pr_err("Failed to copy IR table for %s from previous kernel\n",
    611			       iommu->name);
    612		else
    613			pr_info("Copied IR table for %s from previous kernel\n",
    614				iommu->name);
    615	}
    616
    617	iommu_set_irq_remapping(iommu, eim_mode);
    618
    619	return 0;
    620
    621out_free_ir_domain:
    622	if (iommu->ir_msi_domain)
    623		irq_domain_remove(iommu->ir_msi_domain);
    624	iommu->ir_msi_domain = NULL;
    625	irq_domain_remove(iommu->ir_domain);
    626	iommu->ir_domain = NULL;
    627out_free_fwnode:
    628	irq_domain_free_fwnode(fn);
    629out_free_bitmap:
    630	bitmap_free(bitmap);
    631out_free_pages:
    632	__free_pages(pages, INTR_REMAP_PAGE_ORDER);
    633out_free_table:
    634	kfree(ir_table);
    635
    636	iommu->ir_table  = NULL;
    637
    638	return -ENOMEM;
    639}
    640
    641static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
    642{
    643	struct fwnode_handle *fn;
    644
    645	if (iommu && iommu->ir_table) {
    646		if (iommu->ir_msi_domain) {
    647			fn = iommu->ir_msi_domain->fwnode;
    648
    649			irq_domain_remove(iommu->ir_msi_domain);
    650			irq_domain_free_fwnode(fn);
    651			iommu->ir_msi_domain = NULL;
    652		}
    653		if (iommu->ir_domain) {
    654			fn = iommu->ir_domain->fwnode;
    655
    656			irq_domain_remove(iommu->ir_domain);
    657			irq_domain_free_fwnode(fn);
    658			iommu->ir_domain = NULL;
    659		}
    660		free_pages((unsigned long)iommu->ir_table->base,
    661			   INTR_REMAP_PAGE_ORDER);
    662		bitmap_free(iommu->ir_table->bitmap);
    663		kfree(iommu->ir_table);
    664		iommu->ir_table = NULL;
    665	}
    666}
    667
    668/*
    669 * Disable Interrupt Remapping.
    670 */
    671static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
    672{
    673	unsigned long flags;
    674	u32 sts;
    675
    676	if (!ecap_ir_support(iommu->ecap))
    677		return;
    678
    679	/*
    680	 * global invalidation of interrupt entry cache before disabling
    681	 * interrupt-remapping.
    682	 */
    683	qi_global_iec(iommu);
    684
    685	raw_spin_lock_irqsave(&iommu->register_lock, flags);
    686
    687	sts = readl(iommu->reg + DMAR_GSTS_REG);
    688	if (!(sts & DMA_GSTS_IRES))
    689		goto end;
    690
    691	iommu->gcmd &= ~DMA_GCMD_IRE;
    692	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
    693
    694	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
    695		      readl, !(sts & DMA_GSTS_IRES), sts);
    696
    697end:
    698	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
    699}
    700
    701static int __init dmar_x2apic_optout(void)
    702{
    703	struct acpi_table_dmar *dmar;
    704	dmar = (struct acpi_table_dmar *)dmar_tbl;
    705	if (!dmar || no_x2apic_optout)
    706		return 0;
    707	return dmar->flags & DMAR_X2APIC_OPT_OUT;
    708}
    709
    710static void __init intel_cleanup_irq_remapping(void)
    711{
    712	struct dmar_drhd_unit *drhd;
    713	struct intel_iommu *iommu;
    714
    715	for_each_iommu(iommu, drhd) {
    716		if (ecap_ir_support(iommu->ecap)) {
    717			iommu_disable_irq_remapping(iommu);
    718			intel_teardown_irq_remapping(iommu);
    719		}
    720	}
    721
    722	if (x2apic_supported())
    723		pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
    724}
    725
    726static int __init intel_prepare_irq_remapping(void)
    727{
    728	struct dmar_drhd_unit *drhd;
    729	struct intel_iommu *iommu;
    730	int eim = 0;
    731
    732	if (irq_remap_broken) {
    733		pr_warn("This system BIOS has enabled interrupt remapping\n"
    734			"on a chipset that contains an erratum making that\n"
    735			"feature unstable.  To maintain system stability\n"
    736			"interrupt remapping is being disabled.  Please\n"
    737			"contact your BIOS vendor for an update\n");
    738		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
    739		return -ENODEV;
    740	}
    741
    742	if (dmar_table_init() < 0)
    743		return -ENODEV;
    744
    745	if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL))
    746		return -ENODEV;
    747
    748	if (!dmar_ir_support())
    749		return -ENODEV;
    750
    751	if (parse_ioapics_under_ir()) {
    752		pr_info("Not enabling interrupt remapping\n");
    753		goto error;
    754	}
    755
    756	/* First make sure all IOMMUs support IRQ remapping */
    757	for_each_iommu(iommu, drhd)
    758		if (!ecap_ir_support(iommu->ecap))
    759			goto error;
    760
    761	/* Detect remapping mode: lapic or x2apic */
    762	if (x2apic_supported()) {
    763		eim = !dmar_x2apic_optout();
    764		if (!eim) {
    765			pr_info("x2apic is disabled because BIOS sets x2apic opt out bit.");
    766			pr_info("Use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
    767		}
    768	}
    769
    770	for_each_iommu(iommu, drhd) {
    771		if (eim && !ecap_eim_support(iommu->ecap)) {
    772			pr_info("%s does not support EIM\n", iommu->name);
    773			eim = 0;
    774		}
    775	}
    776
    777	eim_mode = eim;
    778	if (eim)
    779		pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
    780
    781	/* Do the initializations early */
    782	for_each_iommu(iommu, drhd) {
    783		if (intel_setup_irq_remapping(iommu)) {
    784			pr_err("Failed to setup irq remapping for %s\n",
    785			       iommu->name);
    786			goto error;
    787		}
    788	}
    789
    790	return 0;
    791
    792error:
    793	intel_cleanup_irq_remapping();
    794	return -ENODEV;
    795}
    796
    797/*
    798 * Set Posted-Interrupts capability.
    799 */
    800static inline void set_irq_posting_cap(void)
    801{
    802	struct dmar_drhd_unit *drhd;
    803	struct intel_iommu *iommu;
    804
    805	if (!disable_irq_post) {
    806		/*
    807		 * If IRTE is in posted format, the 'pda' field goes across the
    808		 * 64-bit boundary, we need use cmpxchg16b to atomically update
    809		 * it. We only expose posted-interrupt when X86_FEATURE_CX16
    810		 * is supported. Actually, hardware platforms supporting PI
    811		 * should have X86_FEATURE_CX16 support, this has been confirmed
    812		 * with Intel hardware guys.
    813		 */
    814		if (boot_cpu_has(X86_FEATURE_CX16))
    815			intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
    816
    817		for_each_iommu(iommu, drhd)
    818			if (!cap_pi_support(iommu->cap)) {
    819				intel_irq_remap_ops.capability &=
    820						~(1 << IRQ_POSTING_CAP);
    821				break;
    822			}
    823	}
    824}
    825
    826static int __init intel_enable_irq_remapping(void)
    827{
    828	struct dmar_drhd_unit *drhd;
    829	struct intel_iommu *iommu;
    830	bool setup = false;
    831
    832	/*
    833	 * Setup Interrupt-remapping for all the DRHD's now.
    834	 */
    835	for_each_iommu(iommu, drhd) {
    836		if (!ir_pre_enabled(iommu))
    837			iommu_enable_irq_remapping(iommu);
    838		setup = true;
    839	}
    840
    841	if (!setup)
    842		goto error;
    843
    844	irq_remapping_enabled = 1;
    845
    846	set_irq_posting_cap();
    847
    848	pr_info("Enabled IRQ remapping in %s mode\n", eim_mode ? "x2apic" : "xapic");
    849
    850	return eim_mode ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
    851
    852error:
    853	intel_cleanup_irq_remapping();
    854	return -1;
    855}
    856
    857static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
    858				   struct intel_iommu *iommu,
    859				   struct acpi_dmar_hardware_unit *drhd)
    860{
    861	struct acpi_dmar_pci_path *path;
    862	u8 bus;
    863	int count, free = -1;
    864
    865	bus = scope->bus;
    866	path = (struct acpi_dmar_pci_path *)(scope + 1);
    867	count = (scope->length - sizeof(struct acpi_dmar_device_scope))
    868		/ sizeof(struct acpi_dmar_pci_path);
    869
    870	while (--count > 0) {
    871		/*
    872		 * Access PCI directly due to the PCI
    873		 * subsystem isn't initialized yet.
    874		 */
    875		bus = read_pci_config_byte(bus, path->device, path->function,
    876					   PCI_SECONDARY_BUS);
    877		path++;
    878	}
    879
    880	for (count = 0; count < MAX_HPET_TBS; count++) {
    881		if (ir_hpet[count].iommu == iommu &&
    882		    ir_hpet[count].id == scope->enumeration_id)
    883			return 0;
    884		else if (ir_hpet[count].iommu == NULL && free == -1)
    885			free = count;
    886	}
    887	if (free == -1) {
    888		pr_warn("Exceeded Max HPET blocks\n");
    889		return -ENOSPC;
    890	}
    891
    892	ir_hpet[free].iommu = iommu;
    893	ir_hpet[free].id    = scope->enumeration_id;
    894	ir_hpet[free].bus   = bus;
    895	ir_hpet[free].devfn = PCI_DEVFN(path->device, path->function);
    896	pr_info("HPET id %d under DRHD base 0x%Lx\n",
    897		scope->enumeration_id, drhd->address);
    898
    899	return 0;
    900}
    901
    902static int ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
    903				     struct intel_iommu *iommu,
    904				     struct acpi_dmar_hardware_unit *drhd)
    905{
    906	struct acpi_dmar_pci_path *path;
    907	u8 bus;
    908	int count, free = -1;
    909
    910	bus = scope->bus;
    911	path = (struct acpi_dmar_pci_path *)(scope + 1);
    912	count = (scope->length - sizeof(struct acpi_dmar_device_scope))
    913		/ sizeof(struct acpi_dmar_pci_path);
    914
    915	while (--count > 0) {
    916		/*
    917		 * Access PCI directly due to the PCI
    918		 * subsystem isn't initialized yet.
    919		 */
    920		bus = read_pci_config_byte(bus, path->device, path->function,
    921					   PCI_SECONDARY_BUS);
    922		path++;
    923	}
    924
    925	for (count = 0; count < MAX_IO_APICS; count++) {
    926		if (ir_ioapic[count].iommu == iommu &&
    927		    ir_ioapic[count].id == scope->enumeration_id)
    928			return 0;
    929		else if (ir_ioapic[count].iommu == NULL && free == -1)
    930			free = count;
    931	}
    932	if (free == -1) {
    933		pr_warn("Exceeded Max IO APICS\n");
    934		return -ENOSPC;
    935	}
    936
    937	ir_ioapic[free].bus   = bus;
    938	ir_ioapic[free].devfn = PCI_DEVFN(path->device, path->function);
    939	ir_ioapic[free].iommu = iommu;
    940	ir_ioapic[free].id    = scope->enumeration_id;
    941	pr_info("IOAPIC id %d under DRHD base  0x%Lx IOMMU %d\n",
    942		scope->enumeration_id, drhd->address, iommu->seq_id);
    943
    944	return 0;
    945}
    946
    947static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
    948				      struct intel_iommu *iommu)
    949{
    950	int ret = 0;
    951	struct acpi_dmar_hardware_unit *drhd;
    952	struct acpi_dmar_device_scope *scope;
    953	void *start, *end;
    954
    955	drhd = (struct acpi_dmar_hardware_unit *)header;
    956	start = (void *)(drhd + 1);
    957	end = ((void *)drhd) + header->length;
    958
    959	while (start < end && ret == 0) {
    960		scope = start;
    961		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC)
    962			ret = ir_parse_one_ioapic_scope(scope, iommu, drhd);
    963		else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET)
    964			ret = ir_parse_one_hpet_scope(scope, iommu, drhd);
    965		start += scope->length;
    966	}
    967
    968	return ret;
    969}
    970
    971static void ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu)
    972{
    973	int i;
    974
    975	for (i = 0; i < MAX_HPET_TBS; i++)
    976		if (ir_hpet[i].iommu == iommu)
    977			ir_hpet[i].iommu = NULL;
    978
    979	for (i = 0; i < MAX_IO_APICS; i++)
    980		if (ir_ioapic[i].iommu == iommu)
    981			ir_ioapic[i].iommu = NULL;
    982}
    983
    984/*
    985 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
    986 * hardware unit.
    987 */
    988static int __init parse_ioapics_under_ir(void)
    989{
    990	struct dmar_drhd_unit *drhd;
    991	struct intel_iommu *iommu;
    992	bool ir_supported = false;
    993	int ioapic_idx;
    994
    995	for_each_iommu(iommu, drhd) {
    996		int ret;
    997
    998		if (!ecap_ir_support(iommu->ecap))
    999			continue;
   1000
   1001		ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu);
   1002		if (ret)
   1003			return ret;
   1004
   1005		ir_supported = true;
   1006	}
   1007
   1008	if (!ir_supported)
   1009		return -ENODEV;
   1010
   1011	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
   1012		int ioapic_id = mpc_ioapic_id(ioapic_idx);
   1013		if (!map_ioapic_to_iommu(ioapic_id)) {
   1014			pr_err(FW_BUG "ioapic %d has no mapping iommu, "
   1015			       "interrupt remapping will be disabled\n",
   1016			       ioapic_id);
   1017			return -1;
   1018		}
   1019	}
   1020
   1021	return 0;
   1022}
   1023
   1024static int __init ir_dev_scope_init(void)
   1025{
   1026	int ret;
   1027
   1028	if (!irq_remapping_enabled)
   1029		return 0;
   1030
   1031	down_write(&dmar_global_lock);
   1032	ret = dmar_dev_scope_init();
   1033	up_write(&dmar_global_lock);
   1034
   1035	return ret;
   1036}
   1037rootfs_initcall(ir_dev_scope_init);
   1038
   1039static void disable_irq_remapping(void)
   1040{
   1041	struct dmar_drhd_unit *drhd;
   1042	struct intel_iommu *iommu = NULL;
   1043
   1044	/*
   1045	 * Disable Interrupt-remapping for all the DRHD's now.
   1046	 */
   1047	for_each_iommu(iommu, drhd) {
   1048		if (!ecap_ir_support(iommu->ecap))
   1049			continue;
   1050
   1051		iommu_disable_irq_remapping(iommu);
   1052	}
   1053
   1054	/*
   1055	 * Clear Posted-Interrupts capability.
   1056	 */
   1057	if (!disable_irq_post)
   1058		intel_irq_remap_ops.capability &= ~(1 << IRQ_POSTING_CAP);
   1059}
   1060
   1061static int reenable_irq_remapping(int eim)
   1062{
   1063	struct dmar_drhd_unit *drhd;
   1064	bool setup = false;
   1065	struct intel_iommu *iommu = NULL;
   1066
   1067	for_each_iommu(iommu, drhd)
   1068		if (iommu->qi)
   1069			dmar_reenable_qi(iommu);
   1070
   1071	/*
   1072	 * Setup Interrupt-remapping for all the DRHD's now.
   1073	 */
   1074	for_each_iommu(iommu, drhd) {
   1075		if (!ecap_ir_support(iommu->ecap))
   1076			continue;
   1077
   1078		/* Set up interrupt remapping for iommu.*/
   1079		iommu_set_irq_remapping(iommu, eim);
   1080		iommu_enable_irq_remapping(iommu);
   1081		setup = true;
   1082	}
   1083
   1084	if (!setup)
   1085		goto error;
   1086
   1087	set_irq_posting_cap();
   1088
   1089	return 0;
   1090
   1091error:
   1092	/*
   1093	 * handle error condition gracefully here!
   1094	 */
   1095	return -1;
   1096}
   1097
   1098/*
   1099 * Store the MSI remapping domain pointer in the device if enabled.
   1100 *
   1101 * This is called from dmar_pci_bus_add_dev() so it works even when DMA
   1102 * remapping is disabled. Only update the pointer if the device is not
   1103 * already handled by a non default PCI/MSI interrupt domain. This protects
   1104 * e.g. VMD devices.
   1105 */
   1106void intel_irq_remap_add_device(struct dmar_pci_notify_info *info)
   1107{
   1108	if (!irq_remapping_enabled || pci_dev_has_special_msi_domain(info->dev))
   1109		return;
   1110
   1111	dev_set_msi_domain(&info->dev->dev, map_dev_to_ir(info->dev));
   1112}
   1113
   1114static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
   1115{
   1116	memset(irte, 0, sizeof(*irte));
   1117
   1118	irte->present = 1;
   1119	irte->dst_mode = apic->dest_mode_logical;
   1120	/*
   1121	 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
   1122	 * actual level or edge trigger will be setup in the IO-APIC
   1123	 * RTE. This will help simplify level triggered irq migration.
   1124	 * For more details, see the comments (in io_apic.c) explainig IO-APIC
   1125	 * irq migration in the presence of interrupt-remapping.
   1126	*/
   1127	irte->trigger_mode = 0;
   1128	irte->dlvry_mode = apic->delivery_mode;
   1129	irte->vector = vector;
   1130	irte->dest_id = IRTE_DEST(dest);
   1131	irte->redir_hint = 1;
   1132}
   1133
   1134struct irq_remap_ops intel_irq_remap_ops = {
   1135	.prepare		= intel_prepare_irq_remapping,
   1136	.enable			= intel_enable_irq_remapping,
   1137	.disable		= disable_irq_remapping,
   1138	.reenable		= reenable_irq_remapping,
   1139	.enable_faulting	= enable_drhd_fault_handling,
   1140};
   1141
   1142static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
   1143{
   1144	struct intel_ir_data *ir_data = irqd->chip_data;
   1145	struct irte *irte = &ir_data->irte_entry;
   1146	struct irq_cfg *cfg = irqd_cfg(irqd);
   1147
   1148	/*
   1149	 * Atomically updates the IRTE with the new destination, vector
   1150	 * and flushes the interrupt entry cache.
   1151	 */
   1152	irte->vector = cfg->vector;
   1153	irte->dest_id = IRTE_DEST(cfg->dest_apicid);
   1154
   1155	/* Update the hardware only if the interrupt is in remapped mode. */
   1156	if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
   1157		modify_irte(&ir_data->irq_2_iommu, irte);
   1158}
   1159
   1160/*
   1161 * Migrate the IO-APIC irq in the presence of intr-remapping.
   1162 *
   1163 * For both level and edge triggered, irq migration is a simple atomic
   1164 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
   1165 *
   1166 * For level triggered, we eliminate the io-apic RTE modification (with the
   1167 * updated vector information), by using a virtual vector (io-apic pin number).
   1168 * Real vector that is used for interrupting cpu will be coming from
   1169 * the interrupt-remapping table entry.
   1170 *
   1171 * As the migration is a simple atomic update of IRTE, the same mechanism
   1172 * is used to migrate MSI irq's in the presence of interrupt-remapping.
   1173 */
   1174static int
   1175intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
   1176		      bool force)
   1177{
   1178	struct irq_data *parent = data->parent_data;
   1179	struct irq_cfg *cfg = irqd_cfg(data);
   1180	int ret;
   1181
   1182	ret = parent->chip->irq_set_affinity(parent, mask, force);
   1183	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
   1184		return ret;
   1185
   1186	intel_ir_reconfigure_irte(data, false);
   1187	/*
   1188	 * After this point, all the interrupts will start arriving
   1189	 * at the new destination. So, time to cleanup the previous
   1190	 * vector allocation.
   1191	 */
   1192	send_cleanup_vector(cfg);
   1193
   1194	return IRQ_SET_MASK_OK_DONE;
   1195}
   1196
   1197static void intel_ir_compose_msi_msg(struct irq_data *irq_data,
   1198				     struct msi_msg *msg)
   1199{
   1200	struct intel_ir_data *ir_data = irq_data->chip_data;
   1201
   1202	*msg = ir_data->msi_entry;
   1203}
   1204
   1205static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
   1206{
   1207	struct intel_ir_data *ir_data = data->chip_data;
   1208	struct vcpu_data *vcpu_pi_info = info;
   1209
   1210	/* stop posting interrupts, back to remapping mode */
   1211	if (!vcpu_pi_info) {
   1212		modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
   1213	} else {
   1214		struct irte irte_pi;
   1215
   1216		/*
   1217		 * We are not caching the posted interrupt entry. We
   1218		 * copy the data from the remapped entry and modify
   1219		 * the fields which are relevant for posted mode. The
   1220		 * cached remapped entry is used for switching back to
   1221		 * remapped mode.
   1222		 */
   1223		memset(&irte_pi, 0, sizeof(irte_pi));
   1224		dmar_copy_shared_irte(&irte_pi, &ir_data->irte_entry);
   1225
   1226		/* Update the posted mode fields */
   1227		irte_pi.p_pst = 1;
   1228		irte_pi.p_urgent = 0;
   1229		irte_pi.p_vector = vcpu_pi_info->vector;
   1230		irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >>
   1231				(32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
   1232		irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
   1233				~(-1UL << PDA_HIGH_BIT);
   1234
   1235		modify_irte(&ir_data->irq_2_iommu, &irte_pi);
   1236	}
   1237
   1238	return 0;
   1239}
   1240
   1241static struct irq_chip intel_ir_chip = {
   1242	.name			= "INTEL-IR",
   1243	.irq_ack		= apic_ack_irq,
   1244	.irq_set_affinity	= intel_ir_set_affinity,
   1245	.irq_compose_msi_msg	= intel_ir_compose_msi_msg,
   1246	.irq_set_vcpu_affinity	= intel_ir_set_vcpu_affinity,
   1247};
   1248
   1249static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle)
   1250{
   1251	memset(msg, 0, sizeof(*msg));
   1252
   1253	msg->arch_addr_lo.dmar_base_address = X86_MSI_BASE_ADDRESS_LOW;
   1254	msg->arch_addr_lo.dmar_subhandle_valid = true;
   1255	msg->arch_addr_lo.dmar_format = true;
   1256	msg->arch_addr_lo.dmar_index_0_14 = index & 0x7FFF;
   1257	msg->arch_addr_lo.dmar_index_15 = !!(index & 0x8000);
   1258
   1259	msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
   1260
   1261	msg->arch_data.dmar_subhandle = subhandle;
   1262}
   1263
   1264static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
   1265					     struct irq_cfg *irq_cfg,
   1266					     struct irq_alloc_info *info,
   1267					     int index, int sub_handle)
   1268{
   1269	struct irte *irte = &data->irte_entry;
   1270
   1271	prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid);
   1272
   1273	switch (info->type) {
   1274	case X86_IRQ_ALLOC_TYPE_IOAPIC:
   1275		/* Set source-id of interrupt request */
   1276		set_ioapic_sid(irte, info->devid);
   1277		apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n",
   1278			info->devid, irte->present, irte->fpd,
   1279			irte->dst_mode, irte->redir_hint,
   1280			irte->trigger_mode, irte->dlvry_mode,
   1281			irte->avail, irte->vector, irte->dest_id,
   1282			irte->sid, irte->sq, irte->svt);
   1283		sub_handle = info->ioapic.pin;
   1284		break;
   1285	case X86_IRQ_ALLOC_TYPE_HPET:
   1286		set_hpet_sid(irte, info->devid);
   1287		break;
   1288	case X86_IRQ_ALLOC_TYPE_PCI_MSI:
   1289	case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
   1290		set_msi_sid(irte,
   1291			    pci_real_dma_dev(msi_desc_to_pci_dev(info->desc)));
   1292		break;
   1293	default:
   1294		BUG_ON(1);
   1295		break;
   1296	}
   1297	fill_msi_msg(&data->msi_entry, index, sub_handle);
   1298}
   1299
   1300static void intel_free_irq_resources(struct irq_domain *domain,
   1301				     unsigned int virq, unsigned int nr_irqs)
   1302{
   1303	struct irq_data *irq_data;
   1304	struct intel_ir_data *data;
   1305	struct irq_2_iommu *irq_iommu;
   1306	unsigned long flags;
   1307	int i;
   1308	for (i = 0; i < nr_irqs; i++) {
   1309		irq_data = irq_domain_get_irq_data(domain, virq  + i);
   1310		if (irq_data && irq_data->chip_data) {
   1311			data = irq_data->chip_data;
   1312			irq_iommu = &data->irq_2_iommu;
   1313			raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
   1314			clear_entries(irq_iommu);
   1315			raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
   1316			irq_domain_reset_irq_data(irq_data);
   1317			kfree(data);
   1318		}
   1319	}
   1320}
   1321
   1322static int intel_irq_remapping_alloc(struct irq_domain *domain,
   1323				     unsigned int virq, unsigned int nr_irqs,
   1324				     void *arg)
   1325{
   1326	struct intel_iommu *iommu = domain->host_data;
   1327	struct irq_alloc_info *info = arg;
   1328	struct intel_ir_data *data, *ird;
   1329	struct irq_data *irq_data;
   1330	struct irq_cfg *irq_cfg;
   1331	int i, ret, index;
   1332
   1333	if (!info || !iommu)
   1334		return -EINVAL;
   1335	if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI &&
   1336	    info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX)
   1337		return -EINVAL;
   1338
   1339	/*
   1340	 * With IRQ remapping enabled, don't need contiguous CPU vectors
   1341	 * to support multiple MSI interrupts.
   1342	 */
   1343	if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
   1344		info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
   1345
   1346	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
   1347	if (ret < 0)
   1348		return ret;
   1349
   1350	ret = -ENOMEM;
   1351	data = kzalloc(sizeof(*data), GFP_KERNEL);
   1352	if (!data)
   1353		goto out_free_parent;
   1354
   1355	down_read(&dmar_global_lock);
   1356	index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs);
   1357	up_read(&dmar_global_lock);
   1358	if (index < 0) {
   1359		pr_warn("Failed to allocate IRTE\n");
   1360		kfree(data);
   1361		goto out_free_parent;
   1362	}
   1363
   1364	for (i = 0; i < nr_irqs; i++) {
   1365		irq_data = irq_domain_get_irq_data(domain, virq + i);
   1366		irq_cfg = irqd_cfg(irq_data);
   1367		if (!irq_data || !irq_cfg) {
   1368			if (!i)
   1369				kfree(data);
   1370			ret = -EINVAL;
   1371			goto out_free_data;
   1372		}
   1373
   1374		if (i > 0) {
   1375			ird = kzalloc(sizeof(*ird), GFP_KERNEL);
   1376			if (!ird)
   1377				goto out_free_data;
   1378			/* Initialize the common data */
   1379			ird->irq_2_iommu = data->irq_2_iommu;
   1380			ird->irq_2_iommu.sub_handle = i;
   1381		} else {
   1382			ird = data;
   1383		}
   1384
   1385		irq_data->hwirq = (index << 16) + i;
   1386		irq_data->chip_data = ird;
   1387		irq_data->chip = &intel_ir_chip;
   1388		intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i);
   1389		irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
   1390	}
   1391	return 0;
   1392
   1393out_free_data:
   1394	intel_free_irq_resources(domain, virq, i);
   1395out_free_parent:
   1396	irq_domain_free_irqs_common(domain, virq, nr_irqs);
   1397	return ret;
   1398}
   1399
   1400static void intel_irq_remapping_free(struct irq_domain *domain,
   1401				     unsigned int virq, unsigned int nr_irqs)
   1402{
   1403	intel_free_irq_resources(domain, virq, nr_irqs);
   1404	irq_domain_free_irqs_common(domain, virq, nr_irqs);
   1405}
   1406
   1407static int intel_irq_remapping_activate(struct irq_domain *domain,
   1408					struct irq_data *irq_data, bool reserve)
   1409{
   1410	intel_ir_reconfigure_irte(irq_data, true);
   1411	return 0;
   1412}
   1413
   1414static void intel_irq_remapping_deactivate(struct irq_domain *domain,
   1415					   struct irq_data *irq_data)
   1416{
   1417	struct intel_ir_data *data = irq_data->chip_data;
   1418	struct irte entry;
   1419
   1420	memset(&entry, 0, sizeof(entry));
   1421	modify_irte(&data->irq_2_iommu, &entry);
   1422}
   1423
   1424static int intel_irq_remapping_select(struct irq_domain *d,
   1425				      struct irq_fwspec *fwspec,
   1426				      enum irq_domain_bus_token bus_token)
   1427{
   1428	struct intel_iommu *iommu = NULL;
   1429
   1430	if (x86_fwspec_is_ioapic(fwspec))
   1431		iommu = map_ioapic_to_iommu(fwspec->param[0]);
   1432	else if (x86_fwspec_is_hpet(fwspec))
   1433		iommu = map_hpet_to_iommu(fwspec->param[0]);
   1434
   1435	return iommu && d == iommu->ir_domain;
   1436}
   1437
   1438static const struct irq_domain_ops intel_ir_domain_ops = {
   1439	.select = intel_irq_remapping_select,
   1440	.alloc = intel_irq_remapping_alloc,
   1441	.free = intel_irq_remapping_free,
   1442	.activate = intel_irq_remapping_activate,
   1443	.deactivate = intel_irq_remapping_deactivate,
   1444};
   1445
   1446/*
   1447 * Support of Interrupt Remapping Unit Hotplug
   1448 */
   1449static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
   1450{
   1451	int ret;
   1452	int eim = x2apic_enabled();
   1453
   1454	ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu);
   1455	if (ret)
   1456		return ret;
   1457
   1458	if (eim && !ecap_eim_support(iommu->ecap)) {
   1459		pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
   1460			iommu->reg_phys, iommu->ecap);
   1461		return -ENODEV;
   1462	}
   1463
   1464	if (ir_parse_ioapic_hpet_scope(dmaru->hdr, iommu)) {
   1465		pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n",
   1466			iommu->reg_phys);
   1467		return -ENODEV;
   1468	}
   1469
   1470	/* TODO: check all IOAPICs are covered by IOMMU */
   1471
   1472	/* Setup Interrupt-remapping now. */
   1473	ret = intel_setup_irq_remapping(iommu);
   1474	if (ret) {
   1475		pr_err("Failed to setup irq remapping for %s\n",
   1476		       iommu->name);
   1477		intel_teardown_irq_remapping(iommu);
   1478		ir_remove_ioapic_hpet_scope(iommu);
   1479	} else {
   1480		iommu_enable_irq_remapping(iommu);
   1481	}
   1482
   1483	return ret;
   1484}
   1485
   1486int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
   1487{
   1488	int ret = 0;
   1489	struct intel_iommu *iommu = dmaru->iommu;
   1490
   1491	if (!irq_remapping_enabled)
   1492		return 0;
   1493	if (iommu == NULL)
   1494		return -EINVAL;
   1495	if (!ecap_ir_support(iommu->ecap))
   1496		return 0;
   1497	if (irq_remapping_cap(IRQ_POSTING_CAP) &&
   1498	    !cap_pi_support(iommu->cap))
   1499		return -EBUSY;
   1500
   1501	if (insert) {
   1502		if (!iommu->ir_table)
   1503			ret = dmar_ir_add(dmaru, iommu);
   1504	} else {
   1505		if (iommu->ir_table) {
   1506			if (!bitmap_empty(iommu->ir_table->bitmap,
   1507					  INTR_REMAP_TABLE_ENTRIES)) {
   1508				ret = -EBUSY;
   1509			} else {
   1510				iommu_disable_irq_remapping(iommu);
   1511				intel_teardown_irq_remapping(iommu);
   1512				ir_remove_ioapic_hpet_scope(iommu);
   1513			}
   1514		}
   1515	}
   1516
   1517	return ret;
   1518}