cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

init.c (90379B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
      4 * Author: Joerg Roedel <jroedel@suse.de>
      5 *         Leo Duran <leo.duran@amd.com>
      6 */
      7
      8#define pr_fmt(fmt)     "AMD-Vi: " fmt
      9#define dev_fmt(fmt)    pr_fmt(fmt)
     10
     11#include <linux/pci.h>
     12#include <linux/acpi.h>
     13#include <linux/list.h>
     14#include <linux/bitmap.h>
     15#include <linux/slab.h>
     16#include <linux/syscore_ops.h>
     17#include <linux/interrupt.h>
     18#include <linux/msi.h>
     19#include <linux/irq.h>
     20#include <linux/amd-iommu.h>
     21#include <linux/export.h>
     22#include <linux/kmemleak.h>
     23#include <linux/cc_platform.h>
     24#include <linux/iopoll.h>
     25#include <asm/pci-direct.h>
     26#include <asm/iommu.h>
     27#include <asm/apic.h>
     28#include <asm/gart.h>
     29#include <asm/x86_init.h>
     30#include <asm/io_apic.h>
     31#include <asm/irq_remapping.h>
     32#include <asm/set_memory.h>
     33
     34#include <linux/crash_dump.h>
     35
     36#include "amd_iommu.h"
     37#include "../irq_remapping.h"
     38
     39/*
     40 * definitions for the ACPI scanning code
     41 */
     42#define IVRS_HEADER_LENGTH 48
     43
     44#define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
     45#define ACPI_IVMD_TYPE_ALL              0x20
     46#define ACPI_IVMD_TYPE                  0x21
     47#define ACPI_IVMD_TYPE_RANGE            0x22
     48
     49#define IVHD_DEV_ALL                    0x01
     50#define IVHD_DEV_SELECT                 0x02
     51#define IVHD_DEV_SELECT_RANGE_START     0x03
     52#define IVHD_DEV_RANGE_END              0x04
     53#define IVHD_DEV_ALIAS                  0x42
     54#define IVHD_DEV_ALIAS_RANGE            0x43
     55#define IVHD_DEV_EXT_SELECT             0x46
     56#define IVHD_DEV_EXT_SELECT_RANGE       0x47
     57#define IVHD_DEV_SPECIAL		0x48
     58#define IVHD_DEV_ACPI_HID		0xf0
     59
     60#define UID_NOT_PRESENT                 0
     61#define UID_IS_INTEGER                  1
     62#define UID_IS_CHARACTER                2
     63
     64#define IVHD_SPECIAL_IOAPIC		1
     65#define IVHD_SPECIAL_HPET		2
     66
     67#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
     68#define IVHD_FLAG_PASSPW_EN_MASK        0x02
     69#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
     70#define IVHD_FLAG_ISOC_EN_MASK          0x08
     71
     72#define IVMD_FLAG_EXCL_RANGE            0x08
     73#define IVMD_FLAG_IW                    0x04
     74#define IVMD_FLAG_IR                    0x02
     75#define IVMD_FLAG_UNITY_MAP             0x01
     76
     77#define ACPI_DEVFLAG_INITPASS           0x01
     78#define ACPI_DEVFLAG_EXTINT             0x02
     79#define ACPI_DEVFLAG_NMI                0x04
     80#define ACPI_DEVFLAG_SYSMGT1            0x10
     81#define ACPI_DEVFLAG_SYSMGT2            0x20
     82#define ACPI_DEVFLAG_LINT0              0x40
     83#define ACPI_DEVFLAG_LINT1              0x80
     84#define ACPI_DEVFLAG_ATSDIS             0x10000000
     85
     86#define LOOP_TIMEOUT	2000000
     87
     88#define IVRS_GET_SBDF_ID(seg, bus, dev, fd)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
     89						 | ((dev & 0x1f) << 3) | (fn & 0x7))
     90
     91/*
     92 * ACPI table definitions
     93 *
     94 * These data structures are laid over the table to parse the important values
     95 * out of it.
     96 */
     97
     98extern const struct iommu_ops amd_iommu_ops;
     99
    100/*
    101 * structure describing one IOMMU in the ACPI table. Typically followed by one
    102 * or more ivhd_entrys.
    103 */
    104struct ivhd_header {
    105	u8 type;
    106	u8 flags;
    107	u16 length;
    108	u16 devid;
    109	u16 cap_ptr;
    110	u64 mmio_phys;
    111	u16 pci_seg;
    112	u16 info;
    113	u32 efr_attr;
    114
    115	/* Following only valid on IVHD type 11h and 40h */
    116	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
    117	u64 efr_reg2;
    118} __attribute__((packed));
    119
    120/*
    121 * A device entry describing which devices a specific IOMMU translates and
    122 * which requestor ids they use.
    123 */
    124struct ivhd_entry {
    125	u8 type;
    126	u16 devid;
    127	u8 flags;
    128	struct_group(ext_hid,
    129		u32 ext;
    130		u32 hidh;
    131	);
    132	u64 cid;
    133	u8 uidf;
    134	u8 uidl;
    135	u8 uid;
    136} __attribute__((packed));
    137
    138/*
    139 * An AMD IOMMU memory definition structure. It defines things like exclusion
    140 * ranges for devices and regions that should be unity mapped.
    141 */
    142struct ivmd_header {
    143	u8 type;
    144	u8 flags;
    145	u16 length;
    146	u16 devid;
    147	u16 aux;
    148	u16 pci_seg;
    149	u8  resv[6];
    150	u64 range_start;
    151	u64 range_length;
    152} __attribute__((packed));
    153
    154bool amd_iommu_dump;
    155bool amd_iommu_irq_remap __read_mostly;
    156
    157enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
    158
    159int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
    160static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
    161
    162static bool amd_iommu_detected;
    163static bool amd_iommu_disabled __initdata;
    164static bool amd_iommu_force_enable __initdata;
    165static int amd_iommu_target_ivhd_type;
    166
    167/* Global EFR and EFR2 registers */
    168u64 amd_iommu_efr;
    169u64 amd_iommu_efr2;
    170
    171/* SNP is enabled on the system? */
    172bool amd_iommu_snp_en;
    173EXPORT_SYMBOL(amd_iommu_snp_en);
    174
    175LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
    176LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
    177					   system */
    178
    179/* Array to assign indices to IOMMUs*/
    180struct amd_iommu *amd_iommus[MAX_IOMMUS];
    181
    182/* Number of IOMMUs present in the system */
    183static int amd_iommus_present;
    184
    185/* IOMMUs have a non-present cache? */
    186bool amd_iommu_np_cache __read_mostly;
    187bool amd_iommu_iotlb_sup __read_mostly = true;
    188
    189u32 amd_iommu_max_pasid __read_mostly = ~0;
    190
    191bool amd_iommu_v2_present __read_mostly;
    192static bool amd_iommu_pc_present __read_mostly;
    193bool amdr_ivrs_remap_support __read_mostly;
    194
    195bool amd_iommu_force_isolation __read_mostly;
    196
    197/*
    198 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
    199 * to know which ones are already in use.
    200 */
    201unsigned long *amd_iommu_pd_alloc_bitmap;
    202
    203enum iommu_init_state {
    204	IOMMU_START_STATE,
    205	IOMMU_IVRS_DETECTED,
    206	IOMMU_ACPI_FINISHED,
    207	IOMMU_ENABLED,
    208	IOMMU_PCI_INIT,
    209	IOMMU_INTERRUPTS_EN,
    210	IOMMU_INITIALIZED,
    211	IOMMU_NOT_FOUND,
    212	IOMMU_INIT_ERROR,
    213	IOMMU_CMDLINE_DISABLED,
    214};
    215
    216/* Early ioapic and hpet maps from kernel command line */
    217#define EARLY_MAP_SIZE		4
    218static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
    219static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
    220static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
    221
    222static int __initdata early_ioapic_map_size;
    223static int __initdata early_hpet_map_size;
    224static int __initdata early_acpihid_map_size;
    225
    226static bool __initdata cmdline_maps;
    227
    228static enum iommu_init_state init_state = IOMMU_START_STATE;
    229
    230static int amd_iommu_enable_interrupts(void);
    231static int __init iommu_go_to_state(enum iommu_init_state state);
    232static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
    233
    234static bool amd_iommu_pre_enabled = true;
    235
    236static u32 amd_iommu_ivinfo __initdata;
    237
    238bool translation_pre_enabled(struct amd_iommu *iommu)
    239{
    240	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
    241}
    242
    243static void clear_translation_pre_enabled(struct amd_iommu *iommu)
    244{
    245	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
    246}
    247
    248static void init_translation_status(struct amd_iommu *iommu)
    249{
    250	u64 ctrl;
    251
    252	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
    253	if (ctrl & (1<<CONTROL_IOMMU_EN))
    254		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
    255}
    256
    257static inline unsigned long tbl_size(int entry_size, int last_bdf)
    258{
    259	unsigned shift = PAGE_SHIFT +
    260			 get_order((last_bdf + 1) * entry_size);
    261
    262	return 1UL << shift;
    263}
    264
    265int amd_iommu_get_num_iommus(void)
    266{
    267	return amd_iommus_present;
    268}
    269
    270/*
    271 * Iterate through all the IOMMUs to get common EFR
    272 * masks among all IOMMUs and warn if found inconsistency.
    273 */
    274static void get_global_efr(void)
    275{
    276	struct amd_iommu *iommu;
    277
    278	for_each_iommu(iommu) {
    279		u64 tmp = iommu->features;
    280		u64 tmp2 = iommu->features2;
    281
    282		if (list_is_first(&iommu->list, &amd_iommu_list)) {
    283			amd_iommu_efr = tmp;
    284			amd_iommu_efr2 = tmp2;
    285			continue;
    286		}
    287
    288		if (amd_iommu_efr == tmp &&
    289		    amd_iommu_efr2 == tmp2)
    290			continue;
    291
    292		pr_err(FW_BUG
    293		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
    294		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
    295		       iommu->index, iommu->pci_seg->id,
    296		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
    297		       PCI_FUNC(iommu->devid));
    298
    299		amd_iommu_efr &= tmp;
    300		amd_iommu_efr2 &= tmp2;
    301	}
    302
    303	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
    304}
    305
    306static bool check_feature_on_all_iommus(u64 mask)
    307{
    308	return !!(amd_iommu_efr & mask);
    309}
    310
    311/*
    312 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
    313 * Default to IVHD EFR since it is available sooner
    314 * (i.e. before PCI init).
    315 */
    316static void __init early_iommu_features_init(struct amd_iommu *iommu,
    317					     struct ivhd_header *h)
    318{
    319	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
    320		iommu->features = h->efr_reg;
    321		iommu->features2 = h->efr_reg2;
    322	}
    323	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
    324		amdr_ivrs_remap_support = true;
    325}
    326
    327/* Access to l1 and l2 indexed register spaces */
    328
    329static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
    330{
    331	u32 val;
    332
    333	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
    334	pci_read_config_dword(iommu->dev, 0xfc, &val);
    335	return val;
    336}
    337
    338static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
    339{
    340	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
    341	pci_write_config_dword(iommu->dev, 0xfc, val);
    342	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
    343}
    344
    345static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
    346{
    347	u32 val;
    348
    349	pci_write_config_dword(iommu->dev, 0xf0, address);
    350	pci_read_config_dword(iommu->dev, 0xf4, &val);
    351	return val;
    352}
    353
    354static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
    355{
    356	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
    357	pci_write_config_dword(iommu->dev, 0xf4, val);
    358}
    359
    360/****************************************************************************
    361 *
    362 * AMD IOMMU MMIO register space handling functions
    363 *
    364 * These functions are used to program the IOMMU device registers in
    365 * MMIO space required for that driver.
    366 *
    367 ****************************************************************************/
    368
    369/*
    370 * This function set the exclusion range in the IOMMU. DMA accesses to the
    371 * exclusion range are passed through untranslated
    372 */
    373static void iommu_set_exclusion_range(struct amd_iommu *iommu)
    374{
    375	u64 start = iommu->exclusion_start & PAGE_MASK;
    376	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
    377	u64 entry;
    378
    379	if (!iommu->exclusion_start)
    380		return;
    381
    382	entry = start | MMIO_EXCL_ENABLE_MASK;
    383	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
    384			&entry, sizeof(entry));
    385
    386	entry = limit;
    387	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
    388			&entry, sizeof(entry));
    389}
    390
    391static void iommu_set_cwwb_range(struct amd_iommu *iommu)
    392{
    393	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
    394	u64 entry = start & PM_ADDR_MASK;
    395
    396	if (!check_feature_on_all_iommus(FEATURE_SNP))
    397		return;
    398
    399	/* Note:
    400	 * Re-purpose Exclusion base/limit registers for Completion wait
    401	 * write-back base/limit.
    402	 */
    403	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
    404		    &entry, sizeof(entry));
    405
    406	/* Note:
    407	 * Default to 4 Kbytes, which can be specified by setting base
    408	 * address equal to the limit address.
    409	 */
    410	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
    411		    &entry, sizeof(entry));
    412}
    413
    414/* Programs the physical address of the device table into the IOMMU hardware */
    415static void iommu_set_device_table(struct amd_iommu *iommu)
    416{
    417	u64 entry;
    418	u32 dev_table_size = iommu->pci_seg->dev_table_size;
    419	void *dev_table = (void *)get_dev_table(iommu);
    420
    421	BUG_ON(iommu->mmio_base == NULL);
    422
    423	entry = iommu_virt_to_phys(dev_table);
    424	entry |= (dev_table_size >> 12) - 1;
    425	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
    426			&entry, sizeof(entry));
    427}
    428
    429/* Generic functions to enable/disable certain features of the IOMMU. */
    430static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
    431{
    432	u64 ctrl;
    433
    434	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
    435	ctrl |= (1ULL << bit);
    436	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
    437}
    438
    439static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
    440{
    441	u64 ctrl;
    442
    443	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
    444	ctrl &= ~(1ULL << bit);
    445	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
    446}
    447
    448static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
    449{
    450	u64 ctrl;
    451
    452	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
    453	ctrl &= ~CTRL_INV_TO_MASK;
    454	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
    455	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
    456}
    457
    458/* Function to enable the hardware */
    459static void iommu_enable(struct amd_iommu *iommu)
    460{
    461	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
    462}
    463
    464static void iommu_disable(struct amd_iommu *iommu)
    465{
    466	if (!iommu->mmio_base)
    467		return;
    468
    469	/* Disable command buffer */
    470	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
    471
    472	/* Disable event logging and event interrupts */
    473	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
    474	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
    475
    476	/* Disable IOMMU GA_LOG */
    477	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
    478	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
    479
    480	/* Disable IOMMU hardware itself */
    481	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
    482}
    483
    484/*
    485 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
    486 * the system has one.
    487 */
    488static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
    489{
    490	if (!request_mem_region(address, end, "amd_iommu")) {
    491		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
    492			address, end);
    493		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
    494		return NULL;
    495	}
    496
    497	return (u8 __iomem *)ioremap(address, end);
    498}
    499
    500static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
    501{
    502	if (iommu->mmio_base)
    503		iounmap(iommu->mmio_base);
    504	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
    505}
    506
    507static inline u32 get_ivhd_header_size(struct ivhd_header *h)
    508{
    509	u32 size = 0;
    510
    511	switch (h->type) {
    512	case 0x10:
    513		size = 24;
    514		break;
    515	case 0x11:
    516	case 0x40:
    517		size = 40;
    518		break;
    519	}
    520	return size;
    521}
    522
    523/****************************************************************************
    524 *
    525 * The functions below belong to the first pass of AMD IOMMU ACPI table
    526 * parsing. In this pass we try to find out the highest device id this
    527 * code has to handle. Upon this information the size of the shared data
    528 * structures is determined later.
    529 *
    530 ****************************************************************************/
    531
    532/*
    533 * This function calculates the length of a given IVHD entry
    534 */
    535static inline int ivhd_entry_length(u8 *ivhd)
    536{
    537	u32 type = ((struct ivhd_entry *)ivhd)->type;
    538
    539	if (type < 0x80) {
    540		return 0x04 << (*ivhd >> 6);
    541	} else if (type == IVHD_DEV_ACPI_HID) {
    542		/* For ACPI_HID, offset 21 is uid len */
    543		return *((u8 *)ivhd + 21) + 22;
    544	}
    545	return 0;
    546}
    547
    548/*
    549 * After reading the highest device id from the IOMMU PCI capability header
    550 * this function looks if there is a higher device id defined in the ACPI table
    551 */
    552static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
    553{
    554	u8 *p = (void *)h, *end = (void *)h;
    555	struct ivhd_entry *dev;
    556	int last_devid = -EINVAL;
    557
    558	u32 ivhd_size = get_ivhd_header_size(h);
    559
    560	if (!ivhd_size) {
    561		pr_err("Unsupported IVHD type %#x\n", h->type);
    562		return -EINVAL;
    563	}
    564
    565	p += ivhd_size;
    566	end += h->length;
    567
    568	while (p < end) {
    569		dev = (struct ivhd_entry *)p;
    570		switch (dev->type) {
    571		case IVHD_DEV_ALL:
    572			/* Use maximum BDF value for DEV_ALL */
    573			return 0xffff;
    574		case IVHD_DEV_SELECT:
    575		case IVHD_DEV_RANGE_END:
    576		case IVHD_DEV_ALIAS:
    577		case IVHD_DEV_EXT_SELECT:
    578			/* all the above subfield types refer to device ids */
    579			if (dev->devid > last_devid)
    580				last_devid = dev->devid;
    581			break;
    582		default:
    583			break;
    584		}
    585		p += ivhd_entry_length(p);
    586	}
    587
    588	WARN_ON(p != end);
    589
    590	return last_devid;
    591}
    592
    593static int __init check_ivrs_checksum(struct acpi_table_header *table)
    594{
    595	int i;
    596	u8 checksum = 0, *p = (u8 *)table;
    597
    598	for (i = 0; i < table->length; ++i)
    599		checksum += p[i];
    600	if (checksum != 0) {
    601		/* ACPI table corrupt */
    602		pr_err(FW_BUG "IVRS invalid checksum\n");
    603		return -ENODEV;
    604	}
    605
    606	return 0;
    607}
    608
    609/*
    610 * Iterate over all IVHD entries in the ACPI table and find the highest device
    611 * id which we need to handle. This is the first of three functions which parse
    612 * the ACPI table. So we check the checksum here.
    613 */
    614static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
    615{
    616	u8 *p = (u8 *)table, *end = (u8 *)table;
    617	struct ivhd_header *h;
    618	int last_devid, last_bdf = 0;
    619
    620	p += IVRS_HEADER_LENGTH;
    621
    622	end += table->length;
    623	while (p < end) {
    624		h = (struct ivhd_header *)p;
    625		if (h->pci_seg == pci_seg &&
    626		    h->type == amd_iommu_target_ivhd_type) {
    627			last_devid = find_last_devid_from_ivhd(h);
    628
    629			if (last_devid < 0)
    630				return -EINVAL;
    631			if (last_devid > last_bdf)
    632				last_bdf = last_devid;
    633		}
    634		p += h->length;
    635	}
    636	WARN_ON(p != end);
    637
    638	return last_bdf;
    639}
    640
    641/****************************************************************************
    642 *
    643 * The following functions belong to the code path which parses the ACPI table
    644 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
    645 * data structures, initialize the per PCI segment device/alias/rlookup table
    646 * and also basically initialize the hardware.
    647 *
    648 ****************************************************************************/
    649
    650/* Allocate per PCI segment device table */
    651static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
    652{
    653	pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
    654						      get_order(pci_seg->dev_table_size));
    655	if (!pci_seg->dev_table)
    656		return -ENOMEM;
    657
    658	return 0;
    659}
    660
    661static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
    662{
    663	free_pages((unsigned long)pci_seg->dev_table,
    664		    get_order(pci_seg->dev_table_size));
    665	pci_seg->dev_table = NULL;
    666}
    667
    668/* Allocate per PCI segment IOMMU rlookup table. */
    669static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
    670{
    671	pci_seg->rlookup_table = (void *)__get_free_pages(
    672						GFP_KERNEL | __GFP_ZERO,
    673						get_order(pci_seg->rlookup_table_size));
    674	if (pci_seg->rlookup_table == NULL)
    675		return -ENOMEM;
    676
    677	return 0;
    678}
    679
    680static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
    681{
    682	free_pages((unsigned long)pci_seg->rlookup_table,
    683		   get_order(pci_seg->rlookup_table_size));
    684	pci_seg->rlookup_table = NULL;
    685}
    686
    687static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
    688{
    689	pci_seg->irq_lookup_table = (void *)__get_free_pages(
    690					     GFP_KERNEL | __GFP_ZERO,
    691					     get_order(pci_seg->rlookup_table_size));
    692	kmemleak_alloc(pci_seg->irq_lookup_table,
    693		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
    694	if (pci_seg->irq_lookup_table == NULL)
    695		return -ENOMEM;
    696
    697	return 0;
    698}
    699
    700static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
    701{
    702	kmemleak_free(pci_seg->irq_lookup_table);
    703	free_pages((unsigned long)pci_seg->irq_lookup_table,
    704		   get_order(pci_seg->rlookup_table_size));
    705	pci_seg->irq_lookup_table = NULL;
    706}
    707
    708static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
    709{
    710	int i;
    711
    712	pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL,
    713					get_order(pci_seg->alias_table_size));
    714	if (!pci_seg->alias_table)
    715		return -ENOMEM;
    716
    717	/*
    718	 * let all alias entries point to itself
    719	 */
    720	for (i = 0; i <= pci_seg->last_bdf; ++i)
    721		pci_seg->alias_table[i] = i;
    722
    723	return 0;
    724}
    725
    726static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
    727{
    728	free_pages((unsigned long)pci_seg->alias_table,
    729		   get_order(pci_seg->alias_table_size));
    730	pci_seg->alias_table = NULL;
    731}
    732
    733/*
    734 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
    735 * write commands to that buffer later and the IOMMU will execute them
    736 * asynchronously
    737 */
    738static int __init alloc_command_buffer(struct amd_iommu *iommu)
    739{
    740	iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
    741						  get_order(CMD_BUFFER_SIZE));
    742
    743	return iommu->cmd_buf ? 0 : -ENOMEM;
    744}
    745
    746/*
    747 * This function restarts event logging in case the IOMMU experienced
    748 * an event log buffer overflow.
    749 */
    750void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
    751{
    752	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
    753	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
    754}
    755
    756/*
    757 * This function resets the command buffer if the IOMMU stopped fetching
    758 * commands from it.
    759 */
    760static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
    761{
    762	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
    763
    764	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
    765	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
    766	iommu->cmd_buf_head = 0;
    767	iommu->cmd_buf_tail = 0;
    768
    769	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
    770}
    771
    772/*
    773 * This function writes the command buffer address to the hardware and
    774 * enables it.
    775 */
    776static void iommu_enable_command_buffer(struct amd_iommu *iommu)
    777{
    778	u64 entry;
    779
    780	BUG_ON(iommu->cmd_buf == NULL);
    781
    782	entry = iommu_virt_to_phys(iommu->cmd_buf);
    783	entry |= MMIO_CMD_SIZE_512;
    784
    785	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
    786		    &entry, sizeof(entry));
    787
    788	amd_iommu_reset_cmd_buffer(iommu);
    789}
    790
    791/*
    792 * This function disables the command buffer
    793 */
    794static void iommu_disable_command_buffer(struct amd_iommu *iommu)
    795{
    796	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
    797}
    798
    799static void __init free_command_buffer(struct amd_iommu *iommu)
    800{
    801	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
    802}
    803
    804static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
    805					 gfp_t gfp, size_t size)
    806{
    807	int order = get_order(size);
    808	void *buf = (void *)__get_free_pages(gfp, order);
    809
    810	if (buf &&
    811	    check_feature_on_all_iommus(FEATURE_SNP) &&
    812	    set_memory_4k((unsigned long)buf, (1 << order))) {
    813		free_pages((unsigned long)buf, order);
    814		buf = NULL;
    815	}
    816
    817	return buf;
    818}
    819
    820/* allocates the memory where the IOMMU will log its events to */
    821static int __init alloc_event_buffer(struct amd_iommu *iommu)
    822{
    823	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
    824					      EVT_BUFFER_SIZE);
    825
    826	return iommu->evt_buf ? 0 : -ENOMEM;
    827}
    828
    829static void iommu_enable_event_buffer(struct amd_iommu *iommu)
    830{
    831	u64 entry;
    832
    833	BUG_ON(iommu->evt_buf == NULL);
    834
    835	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
    836
    837	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
    838		    &entry, sizeof(entry));
    839
    840	/* set head and tail to zero manually */
    841	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
    842	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
    843
    844	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
    845}
    846
    847/*
    848 * This function disables the event log buffer
    849 */
    850static void iommu_disable_event_buffer(struct amd_iommu *iommu)
    851{
    852	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
    853}
    854
    855static void __init free_event_buffer(struct amd_iommu *iommu)
    856{
    857	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
    858}
    859
    860/* allocates the memory where the IOMMU will log its events to */
    861static int __init alloc_ppr_log(struct amd_iommu *iommu)
    862{
    863	iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
    864					      PPR_LOG_SIZE);
    865
    866	return iommu->ppr_log ? 0 : -ENOMEM;
    867}
    868
    869static void iommu_enable_ppr_log(struct amd_iommu *iommu)
    870{
    871	u64 entry;
    872
    873	if (iommu->ppr_log == NULL)
    874		return;
    875
    876	entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
    877
    878	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
    879		    &entry, sizeof(entry));
    880
    881	/* set head and tail to zero manually */
    882	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
    883	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
    884
    885	iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
    886	iommu_feature_enable(iommu, CONTROL_PPR_EN);
    887}
    888
    889static void __init free_ppr_log(struct amd_iommu *iommu)
    890{
    891	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
    892}
    893
    894static void free_ga_log(struct amd_iommu *iommu)
    895{
    896#ifdef CONFIG_IRQ_REMAP
    897	free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
    898	free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
    899#endif
    900}
    901
    902static int iommu_ga_log_enable(struct amd_iommu *iommu)
    903{
    904#ifdef CONFIG_IRQ_REMAP
    905	u32 status, i;
    906	u64 entry;
    907
    908	if (!iommu->ga_log)
    909		return -EINVAL;
    910
    911	/* Check if already running */
    912	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
    913	if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK)))
    914		return 0;
    915
    916	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
    917	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
    918		    &entry, sizeof(entry));
    919	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
    920		 (BIT_ULL(52)-1)) & ~7ULL;
    921	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
    922		    &entry, sizeof(entry));
    923	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
    924	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
    925
    926
    927	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
    928	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
    929
    930	for (i = 0; i < LOOP_TIMEOUT; ++i) {
    931		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
    932		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
    933			break;
    934		udelay(10);
    935	}
    936
    937	if (WARN_ON(i >= LOOP_TIMEOUT))
    938		return -EINVAL;
    939#endif /* CONFIG_IRQ_REMAP */
    940	return 0;
    941}
    942
    943static int iommu_init_ga_log(struct amd_iommu *iommu)
    944{
    945#ifdef CONFIG_IRQ_REMAP
    946	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
    947		return 0;
    948
    949	iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
    950					get_order(GA_LOG_SIZE));
    951	if (!iommu->ga_log)
    952		goto err_out;
    953
    954	iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
    955					get_order(8));
    956	if (!iommu->ga_log_tail)
    957		goto err_out;
    958
    959	return 0;
    960err_out:
    961	free_ga_log(iommu);
    962	return -EINVAL;
    963#else
    964	return 0;
    965#endif /* CONFIG_IRQ_REMAP */
    966}
    967
    968static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
    969{
    970	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
    971
    972	return iommu->cmd_sem ? 0 : -ENOMEM;
    973}
    974
    975static void __init free_cwwb_sem(struct amd_iommu *iommu)
    976{
    977	if (iommu->cmd_sem)
    978		free_page((unsigned long)iommu->cmd_sem);
    979}
    980
    981static void iommu_enable_xt(struct amd_iommu *iommu)
    982{
    983#ifdef CONFIG_IRQ_REMAP
    984	/*
    985	 * XT mode (32-bit APIC destination ID) requires
    986	 * GA mode (128-bit IRTE support) as a prerequisite.
    987	 */
    988	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
    989	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
    990		iommu_feature_enable(iommu, CONTROL_XT_EN);
    991#endif /* CONFIG_IRQ_REMAP */
    992}
    993
    994static void iommu_enable_gt(struct amd_iommu *iommu)
    995{
    996	if (!iommu_feature(iommu, FEATURE_GT))
    997		return;
    998
    999	iommu_feature_enable(iommu, CONTROL_GT_EN);
   1000}
   1001
   1002/* sets a specific bit in the device table entry. */
   1003static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
   1004				u16 devid, u8 bit)
   1005{
   1006	int i = (bit >> 6) & 0x03;
   1007	int _bit = bit & 0x3f;
   1008
   1009	dev_table[devid].data[i] |= (1UL << _bit);
   1010}
   1011
   1012static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
   1013{
   1014	struct dev_table_entry *dev_table = get_dev_table(iommu);
   1015
   1016	return __set_dev_entry_bit(dev_table, devid, bit);
   1017}
   1018
   1019static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
   1020			       u16 devid, u8 bit)
   1021{
   1022	int i = (bit >> 6) & 0x03;
   1023	int _bit = bit & 0x3f;
   1024
   1025	return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
   1026}
   1027
   1028static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
   1029{
   1030	struct dev_table_entry *dev_table = get_dev_table(iommu);
   1031
   1032	return __get_dev_entry_bit(dev_table, devid, bit);
   1033}
   1034
   1035static bool __copy_device_table(struct amd_iommu *iommu)
   1036{
   1037	u64 int_ctl, int_tab_len, entry = 0;
   1038	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
   1039	struct dev_table_entry *old_devtb = NULL;
   1040	u32 lo, hi, devid, old_devtb_size;
   1041	phys_addr_t old_devtb_phys;
   1042	u16 dom_id, dte_v, irq_v;
   1043	gfp_t gfp_flag;
   1044	u64 tmp;
   1045
   1046	/* Each IOMMU use separate device table with the same size */
   1047	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
   1048	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
   1049	entry = (((u64) hi) << 32) + lo;
   1050
   1051	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
   1052	if (old_devtb_size != pci_seg->dev_table_size) {
   1053		pr_err("The device table size of IOMMU:%d is not expected!\n",
   1054			iommu->index);
   1055		return false;
   1056	}
   1057
   1058	/*
   1059	 * When SME is enabled in the first kernel, the entry includes the
   1060	 * memory encryption mask(sme_me_mask), we must remove the memory
   1061	 * encryption mask to obtain the true physical address in kdump kernel.
   1062	 */
   1063	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
   1064
   1065	if (old_devtb_phys >= 0x100000000ULL) {
   1066		pr_err("The address of old device table is above 4G, not trustworthy!\n");
   1067		return false;
   1068	}
   1069	old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
   1070		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
   1071							pci_seg->dev_table_size)
   1072		    : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
   1073
   1074	if (!old_devtb)
   1075		return false;
   1076
   1077	gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
   1078	pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
   1079						    get_order(pci_seg->dev_table_size));
   1080	if (pci_seg->old_dev_tbl_cpy == NULL) {
   1081		pr_err("Failed to allocate memory for copying old device table!\n");
   1082		memunmap(old_devtb);
   1083		return false;
   1084	}
   1085
   1086	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
   1087		pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
   1088		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
   1089		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
   1090
   1091		if (dte_v && dom_id) {
   1092			pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
   1093			pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
   1094			__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
   1095			/* If gcr3 table existed, mask it out */
   1096			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
   1097				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
   1098				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
   1099				pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
   1100				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
   1101				tmp |= DTE_FLAG_GV;
   1102				pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
   1103			}
   1104		}
   1105
   1106		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
   1107		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
   1108		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
   1109		if (irq_v && (int_ctl || int_tab_len)) {
   1110			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
   1111			    (int_tab_len != DTE_INTTABLEN)) {
   1112				pr_err("Wrong old irq remapping flag: %#x\n", devid);
   1113				memunmap(old_devtb);
   1114				return false;
   1115			}
   1116
   1117			pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
   1118		}
   1119	}
   1120	memunmap(old_devtb);
   1121
   1122	return true;
   1123}
   1124
   1125static bool copy_device_table(void)
   1126{
   1127	struct amd_iommu *iommu;
   1128	struct amd_iommu_pci_seg *pci_seg;
   1129
   1130	if (!amd_iommu_pre_enabled)
   1131		return false;
   1132
   1133	pr_warn("Translation is already enabled - trying to copy translation structures\n");
   1134
   1135	/*
   1136	 * All IOMMUs within PCI segment shares common device table.
   1137	 * Hence copy device table only once per PCI segment.
   1138	 */
   1139	for_each_pci_segment(pci_seg) {
   1140		for_each_iommu(iommu) {
   1141			if (pci_seg->id != iommu->pci_seg->id)
   1142				continue;
   1143			if (!__copy_device_table(iommu))
   1144				return false;
   1145			break;
   1146		}
   1147	}
   1148
   1149	return true;
   1150}
   1151
   1152void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
   1153{
   1154	int sysmgt;
   1155
   1156	sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
   1157		 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
   1158
   1159	if (sysmgt == 0x01)
   1160		set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
   1161}
   1162
   1163/*
   1164 * This function takes the device specific flags read from the ACPI
   1165 * table and sets up the device table entry with that information
   1166 */
   1167static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
   1168					   u16 devid, u32 flags, u32 ext_flags)
   1169{
   1170	if (flags & ACPI_DEVFLAG_INITPASS)
   1171		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
   1172	if (flags & ACPI_DEVFLAG_EXTINT)
   1173		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
   1174	if (flags & ACPI_DEVFLAG_NMI)
   1175		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
   1176	if (flags & ACPI_DEVFLAG_SYSMGT1)
   1177		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
   1178	if (flags & ACPI_DEVFLAG_SYSMGT2)
   1179		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
   1180	if (flags & ACPI_DEVFLAG_LINT0)
   1181		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
   1182	if (flags & ACPI_DEVFLAG_LINT1)
   1183		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
   1184
   1185	amd_iommu_apply_erratum_63(iommu, devid);
   1186
   1187	amd_iommu_set_rlookup_table(iommu, devid);
   1188}
   1189
   1190int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
   1191{
   1192	struct devid_map *entry;
   1193	struct list_head *list;
   1194
   1195	if (type == IVHD_SPECIAL_IOAPIC)
   1196		list = &ioapic_map;
   1197	else if (type == IVHD_SPECIAL_HPET)
   1198		list = &hpet_map;
   1199	else
   1200		return -EINVAL;
   1201
   1202	list_for_each_entry(entry, list, list) {
   1203		if (!(entry->id == id && entry->cmd_line))
   1204			continue;
   1205
   1206		pr_info("Command-line override present for %s id %d - ignoring\n",
   1207			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
   1208
   1209		*devid = entry->devid;
   1210
   1211		return 0;
   1212	}
   1213
   1214	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
   1215	if (!entry)
   1216		return -ENOMEM;
   1217
   1218	entry->id	= id;
   1219	entry->devid	= *devid;
   1220	entry->cmd_line	= cmd_line;
   1221
   1222	list_add_tail(&entry->list, list);
   1223
   1224	return 0;
   1225}
   1226
   1227static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
   1228				      bool cmd_line)
   1229{
   1230	struct acpihid_map_entry *entry;
   1231	struct list_head *list = &acpihid_map;
   1232
   1233	list_for_each_entry(entry, list, list) {
   1234		if (strcmp(entry->hid, hid) ||
   1235		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
   1236		    !entry->cmd_line)
   1237			continue;
   1238
   1239		pr_info("Command-line override for hid:%s uid:%s\n",
   1240			hid, uid);
   1241		*devid = entry->devid;
   1242		return 0;
   1243	}
   1244
   1245	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
   1246	if (!entry)
   1247		return -ENOMEM;
   1248
   1249	memcpy(entry->uid, uid, strlen(uid));
   1250	memcpy(entry->hid, hid, strlen(hid));
   1251	entry->devid = *devid;
   1252	entry->cmd_line	= cmd_line;
   1253	entry->root_devid = (entry->devid & (~0x7));
   1254
   1255	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
   1256		entry->cmd_line ? "cmd" : "ivrs",
   1257		entry->hid, entry->uid, entry->root_devid);
   1258
   1259	list_add_tail(&entry->list, list);
   1260	return 0;
   1261}
   1262
   1263static int __init add_early_maps(void)
   1264{
   1265	int i, ret;
   1266
   1267	for (i = 0; i < early_ioapic_map_size; ++i) {
   1268		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
   1269					 early_ioapic_map[i].id,
   1270					 &early_ioapic_map[i].devid,
   1271					 early_ioapic_map[i].cmd_line);
   1272		if (ret)
   1273			return ret;
   1274	}
   1275
   1276	for (i = 0; i < early_hpet_map_size; ++i) {
   1277		ret = add_special_device(IVHD_SPECIAL_HPET,
   1278					 early_hpet_map[i].id,
   1279					 &early_hpet_map[i].devid,
   1280					 early_hpet_map[i].cmd_line);
   1281		if (ret)
   1282			return ret;
   1283	}
   1284
   1285	for (i = 0; i < early_acpihid_map_size; ++i) {
   1286		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
   1287					  early_acpihid_map[i].uid,
   1288					  &early_acpihid_map[i].devid,
   1289					  early_acpihid_map[i].cmd_line);
   1290		if (ret)
   1291			return ret;
   1292	}
   1293
   1294	return 0;
   1295}
   1296
   1297/*
   1298 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
   1299 * initializes the hardware and our data structures with it.
   1300 */
   1301static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
   1302					struct ivhd_header *h)
   1303{
   1304	u8 *p = (u8 *)h;
   1305	u8 *end = p, flags = 0;
   1306	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
   1307	u32 dev_i, ext_flags = 0;
   1308	bool alias = false;
   1309	struct ivhd_entry *e;
   1310	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
   1311	u32 ivhd_size;
   1312	int ret;
   1313
   1314
   1315	ret = add_early_maps();
   1316	if (ret)
   1317		return ret;
   1318
   1319	amd_iommu_apply_ivrs_quirks();
   1320
   1321	/*
   1322	 * First save the recommended feature enable bits from ACPI
   1323	 */
   1324	iommu->acpi_flags = h->flags;
   1325
   1326	/*
   1327	 * Done. Now parse the device entries
   1328	 */
   1329	ivhd_size = get_ivhd_header_size(h);
   1330	if (!ivhd_size) {
   1331		pr_err("Unsupported IVHD type %#x\n", h->type);
   1332		return -EINVAL;
   1333	}
   1334
   1335	p += ivhd_size;
   1336
   1337	end += h->length;
   1338
   1339
   1340	while (p < end) {
   1341		e = (struct ivhd_entry *)p;
   1342		seg_id = pci_seg->id;
   1343
   1344		switch (e->type) {
   1345		case IVHD_DEV_ALL:
   1346
   1347			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
   1348
   1349			for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
   1350				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
   1351			break;
   1352		case IVHD_DEV_SELECT:
   1353
   1354			DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
   1355				    "flags: %02x\n",
   1356				    seg_id, PCI_BUS_NUM(e->devid),
   1357				    PCI_SLOT(e->devid),
   1358				    PCI_FUNC(e->devid),
   1359				    e->flags);
   1360
   1361			devid = e->devid;
   1362			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
   1363			break;
   1364		case IVHD_DEV_SELECT_RANGE_START:
   1365
   1366			DUMP_printk("  DEV_SELECT_RANGE_START\t "
   1367				    "devid: %04x:%02x:%02x.%x flags: %02x\n",
   1368				    seg_id, PCI_BUS_NUM(e->devid),
   1369				    PCI_SLOT(e->devid),
   1370				    PCI_FUNC(e->devid),
   1371				    e->flags);
   1372
   1373			devid_start = e->devid;
   1374			flags = e->flags;
   1375			ext_flags = 0;
   1376			alias = false;
   1377			break;
   1378		case IVHD_DEV_ALIAS:
   1379
   1380			DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
   1381				    "flags: %02x devid_to: %02x:%02x.%x\n",
   1382				    seg_id, PCI_BUS_NUM(e->devid),
   1383				    PCI_SLOT(e->devid),
   1384				    PCI_FUNC(e->devid),
   1385				    e->flags,
   1386				    PCI_BUS_NUM(e->ext >> 8),
   1387				    PCI_SLOT(e->ext >> 8),
   1388				    PCI_FUNC(e->ext >> 8));
   1389
   1390			devid = e->devid;
   1391			devid_to = e->ext >> 8;
   1392			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
   1393			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
   1394			pci_seg->alias_table[devid] = devid_to;
   1395			break;
   1396		case IVHD_DEV_ALIAS_RANGE:
   1397
   1398			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
   1399				    "devid: %04x:%02x:%02x.%x flags: %02x "
   1400				    "devid_to: %04x:%02x:%02x.%x\n",
   1401				    seg_id, PCI_BUS_NUM(e->devid),
   1402				    PCI_SLOT(e->devid),
   1403				    PCI_FUNC(e->devid),
   1404				    e->flags,
   1405				    seg_id, PCI_BUS_NUM(e->ext >> 8),
   1406				    PCI_SLOT(e->ext >> 8),
   1407				    PCI_FUNC(e->ext >> 8));
   1408
   1409			devid_start = e->devid;
   1410			flags = e->flags;
   1411			devid_to = e->ext >> 8;
   1412			ext_flags = 0;
   1413			alias = true;
   1414			break;
   1415		case IVHD_DEV_EXT_SELECT:
   1416
   1417			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
   1418				    "flags: %02x ext: %08x\n",
   1419				    seg_id, PCI_BUS_NUM(e->devid),
   1420				    PCI_SLOT(e->devid),
   1421				    PCI_FUNC(e->devid),
   1422				    e->flags, e->ext);
   1423
   1424			devid = e->devid;
   1425			set_dev_entry_from_acpi(iommu, devid, e->flags,
   1426						e->ext);
   1427			break;
   1428		case IVHD_DEV_EXT_SELECT_RANGE:
   1429
   1430			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
   1431				    "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
   1432				    seg_id, PCI_BUS_NUM(e->devid),
   1433				    PCI_SLOT(e->devid),
   1434				    PCI_FUNC(e->devid),
   1435				    e->flags, e->ext);
   1436
   1437			devid_start = e->devid;
   1438			flags = e->flags;
   1439			ext_flags = e->ext;
   1440			alias = false;
   1441			break;
   1442		case IVHD_DEV_RANGE_END:
   1443
   1444			DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
   1445				    seg_id, PCI_BUS_NUM(e->devid),
   1446				    PCI_SLOT(e->devid),
   1447				    PCI_FUNC(e->devid));
   1448
   1449			devid = e->devid;
   1450			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
   1451				if (alias) {
   1452					pci_seg->alias_table[dev_i] = devid_to;
   1453					set_dev_entry_from_acpi(iommu,
   1454						devid_to, flags, ext_flags);
   1455				}
   1456				set_dev_entry_from_acpi(iommu, dev_i,
   1457							flags, ext_flags);
   1458			}
   1459			break;
   1460		case IVHD_DEV_SPECIAL: {
   1461			u8 handle, type;
   1462			const char *var;
   1463			u32 devid;
   1464			int ret;
   1465
   1466			handle = e->ext & 0xff;
   1467			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
   1468			type   = (e->ext >> 24) & 0xff;
   1469
   1470			if (type == IVHD_SPECIAL_IOAPIC)
   1471				var = "IOAPIC";
   1472			else if (type == IVHD_SPECIAL_HPET)
   1473				var = "HPET";
   1474			else
   1475				var = "UNKNOWN";
   1476
   1477			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
   1478				    var, (int)handle,
   1479				    seg_id, PCI_BUS_NUM(devid),
   1480				    PCI_SLOT(devid),
   1481				    PCI_FUNC(devid));
   1482
   1483			ret = add_special_device(type, handle, &devid, false);
   1484			if (ret)
   1485				return ret;
   1486
   1487			/*
   1488			 * add_special_device might update the devid in case a
   1489			 * command-line override is present. So call
   1490			 * set_dev_entry_from_acpi after add_special_device.
   1491			 */
   1492			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
   1493
   1494			break;
   1495		}
   1496		case IVHD_DEV_ACPI_HID: {
   1497			u32 devid;
   1498			u8 hid[ACPIHID_HID_LEN];
   1499			u8 uid[ACPIHID_UID_LEN];
   1500			int ret;
   1501
   1502			if (h->type != 0x40) {
   1503				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
   1504				       e->type);
   1505				break;
   1506			}
   1507
   1508			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
   1509			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
   1510			hid[ACPIHID_HID_LEN - 1] = '\0';
   1511
   1512			if (!(*hid)) {
   1513				pr_err(FW_BUG "Invalid HID.\n");
   1514				break;
   1515			}
   1516
   1517			uid[0] = '\0';
   1518			switch (e->uidf) {
   1519			case UID_NOT_PRESENT:
   1520
   1521				if (e->uidl != 0)
   1522					pr_warn(FW_BUG "Invalid UID length.\n");
   1523
   1524				break;
   1525			case UID_IS_INTEGER:
   1526
   1527				sprintf(uid, "%d", e->uid);
   1528
   1529				break;
   1530			case UID_IS_CHARACTER:
   1531
   1532				memcpy(uid, &e->uid, e->uidl);
   1533				uid[e->uidl] = '\0';
   1534
   1535				break;
   1536			default:
   1537				break;
   1538			}
   1539
   1540			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
   1541			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
   1542				    hid, uid, seg_id,
   1543				    PCI_BUS_NUM(devid),
   1544				    PCI_SLOT(devid),
   1545				    PCI_FUNC(devid));
   1546
   1547			flags = e->flags;
   1548
   1549			ret = add_acpi_hid_device(hid, uid, &devid, false);
   1550			if (ret)
   1551				return ret;
   1552
   1553			/*
   1554			 * add_special_device might update the devid in case a
   1555			 * command-line override is present. So call
   1556			 * set_dev_entry_from_acpi after add_special_device.
   1557			 */
   1558			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
   1559
   1560			break;
   1561		}
   1562		default:
   1563			break;
   1564		}
   1565
   1566		p += ivhd_entry_length(p);
   1567	}
   1568
   1569	return 0;
   1570}
   1571
   1572/* Allocate PCI segment data structure */
   1573static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
   1574					  struct acpi_table_header *ivrs_base)
   1575{
   1576	struct amd_iommu_pci_seg *pci_seg;
   1577	int last_bdf;
   1578
   1579	/*
   1580	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
   1581	 * handle in this PCI segment. Upon this information the shared data
   1582	 * structures for the PCI segments in the system will be allocated.
   1583	 */
   1584	last_bdf = find_last_devid_acpi(ivrs_base, id);
   1585	if (last_bdf < 0)
   1586		return NULL;
   1587
   1588	pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
   1589	if (pci_seg == NULL)
   1590		return NULL;
   1591
   1592	pci_seg->last_bdf = last_bdf;
   1593	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
   1594	pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
   1595	pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
   1596	pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
   1597
   1598	pci_seg->id = id;
   1599	init_llist_head(&pci_seg->dev_data_list);
   1600	INIT_LIST_HEAD(&pci_seg->unity_map);
   1601	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
   1602
   1603	if (alloc_dev_table(pci_seg))
   1604		return NULL;
   1605	if (alloc_alias_table(pci_seg))
   1606		return NULL;
   1607	if (alloc_rlookup_table(pci_seg))
   1608		return NULL;
   1609
   1610	return pci_seg;
   1611}
   1612
   1613static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
   1614					struct acpi_table_header *ivrs_base)
   1615{
   1616	struct amd_iommu_pci_seg *pci_seg;
   1617
   1618	for_each_pci_segment(pci_seg) {
   1619		if (pci_seg->id == id)
   1620			return pci_seg;
   1621	}
   1622
   1623	return alloc_pci_segment(id, ivrs_base);
   1624}
   1625
   1626static void __init free_pci_segments(void)
   1627{
   1628	struct amd_iommu_pci_seg *pci_seg, *next;
   1629
   1630	for_each_pci_segment_safe(pci_seg, next) {
   1631		list_del(&pci_seg->list);
   1632		free_irq_lookup_table(pci_seg);
   1633		free_rlookup_table(pci_seg);
   1634		free_alias_table(pci_seg);
   1635		free_dev_table(pci_seg);
   1636		kfree(pci_seg);
   1637	}
   1638}
   1639
   1640static void __init free_iommu_one(struct amd_iommu *iommu)
   1641{
   1642	free_cwwb_sem(iommu);
   1643	free_command_buffer(iommu);
   1644	free_event_buffer(iommu);
   1645	free_ppr_log(iommu);
   1646	free_ga_log(iommu);
   1647	iommu_unmap_mmio_space(iommu);
   1648}
   1649
   1650static void __init free_iommu_all(void)
   1651{
   1652	struct amd_iommu *iommu, *next;
   1653
   1654	for_each_iommu_safe(iommu, next) {
   1655		list_del(&iommu->list);
   1656		free_iommu_one(iommu);
   1657		kfree(iommu);
   1658	}
   1659}
   1660
   1661/*
   1662 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
   1663 * Workaround:
   1664 *     BIOS should disable L2B micellaneous clock gating by setting
   1665 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
   1666 */
   1667static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
   1668{
   1669	u32 value;
   1670
   1671	if ((boot_cpu_data.x86 != 0x15) ||
   1672	    (boot_cpu_data.x86_model < 0x10) ||
   1673	    (boot_cpu_data.x86_model > 0x1f))
   1674		return;
   1675
   1676	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
   1677	pci_read_config_dword(iommu->dev, 0xf4, &value);
   1678
   1679	if (value & BIT(2))
   1680		return;
   1681
   1682	/* Select NB indirect register 0x90 and enable writing */
   1683	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
   1684
   1685	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
   1686	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
   1687
   1688	/* Clear the enable writing bit */
   1689	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
   1690}
   1691
   1692/*
   1693 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
   1694 * Workaround:
   1695 *     BIOS should enable ATS write permission check by setting
   1696 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
   1697 */
   1698static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
   1699{
   1700	u32 value;
   1701
   1702	if ((boot_cpu_data.x86 != 0x15) ||
   1703	    (boot_cpu_data.x86_model < 0x30) ||
   1704	    (boot_cpu_data.x86_model > 0x3f))
   1705		return;
   1706
   1707	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
   1708	value = iommu_read_l2(iommu, 0x47);
   1709
   1710	if (value & BIT(0))
   1711		return;
   1712
   1713	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
   1714	iommu_write_l2(iommu, 0x47, value | BIT(0));
   1715
   1716	pci_info(iommu->dev, "Applying ATS write check workaround\n");
   1717}
   1718
   1719/*
   1720 * This function glues the initialization function for one IOMMU
   1721 * together and also allocates the command buffer and programs the
   1722 * hardware. It does NOT enable the IOMMU. This is done afterwards.
   1723 */
   1724static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
   1725				 struct acpi_table_header *ivrs_base)
   1726{
   1727	struct amd_iommu_pci_seg *pci_seg;
   1728
   1729	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
   1730	if (pci_seg == NULL)
   1731		return -ENOMEM;
   1732	iommu->pci_seg = pci_seg;
   1733
   1734	raw_spin_lock_init(&iommu->lock);
   1735	iommu->cmd_sem_val = 0;
   1736
   1737	/* Add IOMMU to internal data structures */
   1738	list_add_tail(&iommu->list, &amd_iommu_list);
   1739	iommu->index = amd_iommus_present++;
   1740
   1741	if (unlikely(iommu->index >= MAX_IOMMUS)) {
   1742		WARN(1, "System has more IOMMUs than supported by this driver\n");
   1743		return -ENOSYS;
   1744	}
   1745
   1746	/* Index is fine - add IOMMU to the array */
   1747	amd_iommus[iommu->index] = iommu;
   1748
   1749	/*
   1750	 * Copy data from ACPI table entry to the iommu struct
   1751	 */
   1752	iommu->devid   = h->devid;
   1753	iommu->cap_ptr = h->cap_ptr;
   1754	iommu->mmio_phys = h->mmio_phys;
   1755
   1756	switch (h->type) {
   1757	case 0x10:
   1758		/* Check if IVHD EFR contains proper max banks/counters */
   1759		if ((h->efr_attr != 0) &&
   1760		    ((h->efr_attr & (0xF << 13)) != 0) &&
   1761		    ((h->efr_attr & (0x3F << 17)) != 0))
   1762			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
   1763		else
   1764			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
   1765
   1766		/*
   1767		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
   1768		 * GAM also requires GA mode. Therefore, we need to
   1769		 * check cmpxchg16b support before enabling it.
   1770		 */
   1771		if (!boot_cpu_has(X86_FEATURE_CX16) ||
   1772		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
   1773			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
   1774		break;
   1775	case 0x11:
   1776	case 0x40:
   1777		if (h->efr_reg & (1 << 9))
   1778			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
   1779		else
   1780			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
   1781
   1782		/*
   1783		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
   1784		 * XT, GAM also requires GA mode. Therefore, we need to
   1785		 * check cmpxchg16b support before enabling them.
   1786		 */
   1787		if (!boot_cpu_has(X86_FEATURE_CX16) ||
   1788		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
   1789			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
   1790			break;
   1791		}
   1792
   1793		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
   1794			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
   1795
   1796		early_iommu_features_init(iommu, h);
   1797
   1798		break;
   1799	default:
   1800		return -EINVAL;
   1801	}
   1802
   1803	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
   1804						iommu->mmio_phys_end);
   1805	if (!iommu->mmio_base)
   1806		return -ENOMEM;
   1807
   1808	return init_iommu_from_acpi(iommu, h);
   1809}
   1810
   1811static int __init init_iommu_one_late(struct amd_iommu *iommu)
   1812{
   1813	int ret;
   1814
   1815	if (alloc_cwwb_sem(iommu))
   1816		return -ENOMEM;
   1817
   1818	if (alloc_command_buffer(iommu))
   1819		return -ENOMEM;
   1820
   1821	if (alloc_event_buffer(iommu))
   1822		return -ENOMEM;
   1823
   1824	iommu->int_enabled = false;
   1825
   1826	init_translation_status(iommu);
   1827	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
   1828		iommu_disable(iommu);
   1829		clear_translation_pre_enabled(iommu);
   1830		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
   1831			iommu->index);
   1832	}
   1833	if (amd_iommu_pre_enabled)
   1834		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
   1835
   1836	if (amd_iommu_irq_remap) {
   1837		ret = amd_iommu_create_irq_domain(iommu);
   1838		if (ret)
   1839			return ret;
   1840	}
   1841
   1842	/*
   1843	 * Make sure IOMMU is not considered to translate itself. The IVRS
   1844	 * table tells us so, but this is a lie!
   1845	 */
   1846	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
   1847
   1848	return 0;
   1849}
   1850
   1851/**
   1852 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
   1853 * @ivrs: Pointer to the IVRS header
   1854 *
   1855 * This function search through all IVDB of the maximum supported IVHD
   1856 */
   1857static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
   1858{
   1859	u8 *base = (u8 *)ivrs;
   1860	struct ivhd_header *ivhd = (struct ivhd_header *)
   1861					(base + IVRS_HEADER_LENGTH);
   1862	u8 last_type = ivhd->type;
   1863	u16 devid = ivhd->devid;
   1864
   1865	while (((u8 *)ivhd - base < ivrs->length) &&
   1866	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
   1867		u8 *p = (u8 *) ivhd;
   1868
   1869		if (ivhd->devid == devid)
   1870			last_type = ivhd->type;
   1871		ivhd = (struct ivhd_header *)(p + ivhd->length);
   1872	}
   1873
   1874	return last_type;
   1875}
   1876
   1877/*
   1878 * Iterates over all IOMMU entries in the ACPI table, allocates the
   1879 * IOMMU structure and initializes it with init_iommu_one()
   1880 */
   1881static int __init init_iommu_all(struct acpi_table_header *table)
   1882{
   1883	u8 *p = (u8 *)table, *end = (u8 *)table;
   1884	struct ivhd_header *h;
   1885	struct amd_iommu *iommu;
   1886	int ret;
   1887
   1888	end += table->length;
   1889	p += IVRS_HEADER_LENGTH;
   1890
   1891	/* Phase 1: Process all IVHD blocks */
   1892	while (p < end) {
   1893		h = (struct ivhd_header *)p;
   1894		if (*p == amd_iommu_target_ivhd_type) {
   1895
   1896			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
   1897				    "flags: %01x info %04x\n",
   1898				    h->pci_seg, PCI_BUS_NUM(h->devid),
   1899				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
   1900				    h->cap_ptr, h->flags, h->info);
   1901			DUMP_printk("       mmio-addr: %016llx\n",
   1902				    h->mmio_phys);
   1903
   1904			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
   1905			if (iommu == NULL)
   1906				return -ENOMEM;
   1907
   1908			ret = init_iommu_one(iommu, h, table);
   1909			if (ret)
   1910				return ret;
   1911		}
   1912		p += h->length;
   1913
   1914	}
   1915	WARN_ON(p != end);
   1916
   1917	/* Phase 2 : Early feature support check */
   1918	get_global_efr();
   1919
   1920	/* Phase 3 : Enabling IOMMU features */
   1921	for_each_iommu(iommu) {
   1922		ret = init_iommu_one_late(iommu);
   1923		if (ret)
   1924			return ret;
   1925	}
   1926
   1927	return 0;
   1928}
   1929
   1930static void init_iommu_perf_ctr(struct amd_iommu *iommu)
   1931{
   1932	u64 val;
   1933	struct pci_dev *pdev = iommu->dev;
   1934
   1935	if (!iommu_feature(iommu, FEATURE_PC))
   1936		return;
   1937
   1938	amd_iommu_pc_present = true;
   1939
   1940	pci_info(pdev, "IOMMU performance counters supported\n");
   1941
   1942	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
   1943	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
   1944	iommu->max_counters = (u8) ((val >> 7) & 0xf);
   1945
   1946	return;
   1947}
   1948
   1949static ssize_t amd_iommu_show_cap(struct device *dev,
   1950				  struct device_attribute *attr,
   1951				  char *buf)
   1952{
   1953	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
   1954	return sprintf(buf, "%x\n", iommu->cap);
   1955}
   1956static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
   1957
   1958static ssize_t amd_iommu_show_features(struct device *dev,
   1959				       struct device_attribute *attr,
   1960				       char *buf)
   1961{
   1962	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
   1963	return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features);
   1964}
   1965static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
   1966
   1967static struct attribute *amd_iommu_attrs[] = {
   1968	&dev_attr_cap.attr,
   1969	&dev_attr_features.attr,
   1970	NULL,
   1971};
   1972
   1973static struct attribute_group amd_iommu_group = {
   1974	.name = "amd-iommu",
   1975	.attrs = amd_iommu_attrs,
   1976};
   1977
   1978static const struct attribute_group *amd_iommu_groups[] = {
   1979	&amd_iommu_group,
   1980	NULL,
   1981};
   1982
   1983/*
   1984 * Note: IVHD 0x11 and 0x40 also contains exact copy
   1985 * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
   1986 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
   1987 */
   1988static void __init late_iommu_features_init(struct amd_iommu *iommu)
   1989{
   1990	u64 features, features2;
   1991
   1992	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
   1993		return;
   1994
   1995	/* read extended feature bits */
   1996	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
   1997	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
   1998
   1999	if (!iommu->features) {
   2000		iommu->features = features;
   2001		iommu->features2 = features2;
   2002		return;
   2003	}
   2004
   2005	/*
   2006	 * Sanity check and warn if EFR values from
   2007	 * IVHD and MMIO conflict.
   2008	 */
   2009	if (features != iommu->features ||
   2010	    features2 != iommu->features2) {
   2011		pr_warn(FW_WARN
   2012			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
   2013			features, iommu->features,
   2014			features2, iommu->features2);
   2015	}
   2016}
   2017
   2018static int __init iommu_init_pci(struct amd_iommu *iommu)
   2019{
   2020	int cap_ptr = iommu->cap_ptr;
   2021
   2022	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
   2023						 PCI_BUS_NUM(iommu->devid),
   2024						 iommu->devid & 0xff);
   2025	if (!iommu->dev)
   2026		return -ENODEV;
   2027
   2028	/* Prevent binding other PCI device drivers to IOMMU devices */
   2029	iommu->dev->match_driver = false;
   2030
   2031	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
   2032			      &iommu->cap);
   2033
   2034	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
   2035		amd_iommu_iotlb_sup = false;
   2036
   2037	late_iommu_features_init(iommu);
   2038
   2039	if (iommu_feature(iommu, FEATURE_GT)) {
   2040		int glxval;
   2041		u32 max_pasid;
   2042		u64 pasmax;
   2043
   2044		pasmax = iommu->features & FEATURE_PASID_MASK;
   2045		pasmax >>= FEATURE_PASID_SHIFT;
   2046		max_pasid  = (1 << (pasmax + 1)) - 1;
   2047
   2048		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
   2049
   2050		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
   2051
   2052		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
   2053		glxval >>= FEATURE_GLXVAL_SHIFT;
   2054
   2055		if (amd_iommu_max_glx_val == -1)
   2056			amd_iommu_max_glx_val = glxval;
   2057		else
   2058			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
   2059	}
   2060
   2061	if (iommu_feature(iommu, FEATURE_GT) &&
   2062	    iommu_feature(iommu, FEATURE_PPR)) {
   2063		iommu->is_iommu_v2   = true;
   2064		amd_iommu_v2_present = true;
   2065	}
   2066
   2067	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
   2068		return -ENOMEM;
   2069
   2070	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
   2071		pr_info("Using strict mode due to virtualization\n");
   2072		iommu_set_dma_strict();
   2073		amd_iommu_np_cache = true;
   2074	}
   2075
   2076	init_iommu_perf_ctr(iommu);
   2077
   2078	if (is_rd890_iommu(iommu->dev)) {
   2079		int i, j;
   2080
   2081		iommu->root_pdev =
   2082			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
   2083						    iommu->dev->bus->number,
   2084						    PCI_DEVFN(0, 0));
   2085
   2086		/*
   2087		 * Some rd890 systems may not be fully reconfigured by the
   2088		 * BIOS, so it's necessary for us to store this information so
   2089		 * it can be reprogrammed on resume
   2090		 */
   2091		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
   2092				&iommu->stored_addr_lo);
   2093		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
   2094				&iommu->stored_addr_hi);
   2095
   2096		/* Low bit locks writes to configuration space */
   2097		iommu->stored_addr_lo &= ~1;
   2098
   2099		for (i = 0; i < 6; i++)
   2100			for (j = 0; j < 0x12; j++)
   2101				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
   2102
   2103		for (i = 0; i < 0x83; i++)
   2104			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
   2105	}
   2106
   2107	amd_iommu_erratum_746_workaround(iommu);
   2108	amd_iommu_ats_write_check_workaround(iommu);
   2109
   2110	iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
   2111			       amd_iommu_groups, "ivhd%d", iommu->index);
   2112	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
   2113
   2114	return pci_enable_device(iommu->dev);
   2115}
   2116
   2117static void print_iommu_info(void)
   2118{
   2119	static const char * const feat_str[] = {
   2120		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
   2121		"IA", "GA", "HE", "PC"
   2122	};
   2123	struct amd_iommu *iommu;
   2124
   2125	for_each_iommu(iommu) {
   2126		struct pci_dev *pdev = iommu->dev;
   2127		int i;
   2128
   2129		pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
   2130
   2131		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
   2132			pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
   2133
   2134			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
   2135				if (iommu_feature(iommu, (1ULL << i)))
   2136					pr_cont(" %s", feat_str[i]);
   2137			}
   2138
   2139			if (iommu->features & FEATURE_GAM_VAPIC)
   2140				pr_cont(" GA_vAPIC");
   2141
   2142			if (iommu->features & FEATURE_SNP)
   2143				pr_cont(" SNP");
   2144
   2145			pr_cont("\n");
   2146		}
   2147	}
   2148	if (irq_remapping_enabled) {
   2149		pr_info("Interrupt remapping enabled\n");
   2150		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
   2151			pr_info("X2APIC enabled\n");
   2152	}
   2153}
   2154
   2155static int __init amd_iommu_init_pci(void)
   2156{
   2157	struct amd_iommu *iommu;
   2158	struct amd_iommu_pci_seg *pci_seg;
   2159	int ret;
   2160
   2161	for_each_iommu(iommu) {
   2162		ret = iommu_init_pci(iommu);
   2163		if (ret) {
   2164			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
   2165			       iommu->index, ret);
   2166			goto out;
   2167		}
   2168		/* Need to setup range after PCI init */
   2169		iommu_set_cwwb_range(iommu);
   2170	}
   2171
   2172	/*
   2173	 * Order is important here to make sure any unity map requirements are
   2174	 * fulfilled. The unity mappings are created and written to the device
   2175	 * table during the amd_iommu_init_api() call.
   2176	 *
   2177	 * After that we call init_device_table_dma() to make sure any
   2178	 * uninitialized DTE will block DMA, and in the end we flush the caches
   2179	 * of all IOMMUs to make sure the changes to the device table are
   2180	 * active.
   2181	 */
   2182	ret = amd_iommu_init_api();
   2183	if (ret) {
   2184		pr_err("IOMMU: Failed to initialize IOMMU-API interface (error=%d)!\n",
   2185		       ret);
   2186		goto out;
   2187	}
   2188
   2189	for_each_pci_segment(pci_seg)
   2190		init_device_table_dma(pci_seg);
   2191
   2192	for_each_iommu(iommu)
   2193		iommu_flush_all_caches(iommu);
   2194
   2195	print_iommu_info();
   2196
   2197out:
   2198	return ret;
   2199}
   2200
   2201/****************************************************************************
   2202 *
   2203 * The following functions initialize the MSI interrupts for all IOMMUs
   2204 * in the system. It's a bit challenging because there could be multiple
   2205 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
   2206 * pci_dev.
   2207 *
   2208 ****************************************************************************/
   2209
   2210static int iommu_setup_msi(struct amd_iommu *iommu)
   2211{
   2212	int r;
   2213
   2214	r = pci_enable_msi(iommu->dev);
   2215	if (r)
   2216		return r;
   2217
   2218	r = request_threaded_irq(iommu->dev->irq,
   2219				 amd_iommu_int_handler,
   2220				 amd_iommu_int_thread,
   2221				 0, "AMD-Vi",
   2222				 iommu);
   2223
   2224	if (r) {
   2225		pci_disable_msi(iommu->dev);
   2226		return r;
   2227	}
   2228
   2229	return 0;
   2230}
   2231
   2232union intcapxt {
   2233	u64	capxt;
   2234	struct {
   2235		u64	reserved_0		:  2,
   2236			dest_mode_logical	:  1,
   2237			reserved_1		:  5,
   2238			destid_0_23		: 24,
   2239			vector			:  8,
   2240			reserved_2		: 16,
   2241			destid_24_31		:  8;
   2242	};
   2243} __attribute__ ((packed));
   2244
   2245
   2246static struct irq_chip intcapxt_controller;
   2247
   2248static int intcapxt_irqdomain_activate(struct irq_domain *domain,
   2249				       struct irq_data *irqd, bool reserve)
   2250{
   2251	return 0;
   2252}
   2253
   2254static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
   2255					  struct irq_data *irqd)
   2256{
   2257}
   2258
   2259
   2260static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
   2261				    unsigned int nr_irqs, void *arg)
   2262{
   2263	struct irq_alloc_info *info = arg;
   2264	int i, ret;
   2265
   2266	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
   2267		return -EINVAL;
   2268
   2269	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
   2270	if (ret < 0)
   2271		return ret;
   2272
   2273	for (i = virq; i < virq + nr_irqs; i++) {
   2274		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
   2275
   2276		irqd->chip = &intcapxt_controller;
   2277		irqd->chip_data = info->data;
   2278		__irq_set_handler(i, handle_edge_irq, 0, "edge");
   2279	}
   2280
   2281	return ret;
   2282}
   2283
   2284static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
   2285				    unsigned int nr_irqs)
   2286{
   2287	irq_domain_free_irqs_top(domain, virq, nr_irqs);
   2288}
   2289
   2290
   2291static void intcapxt_unmask_irq(struct irq_data *irqd)
   2292{
   2293	struct amd_iommu *iommu = irqd->chip_data;
   2294	struct irq_cfg *cfg = irqd_cfg(irqd);
   2295	union intcapxt xt;
   2296
   2297	xt.capxt = 0ULL;
   2298	xt.dest_mode_logical = apic->dest_mode_logical;
   2299	xt.vector = cfg->vector;
   2300	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
   2301	xt.destid_24_31 = cfg->dest_apicid >> 24;
   2302
   2303	/**
   2304	 * Current IOMMU implementation uses the same IRQ for all
   2305	 * 3 IOMMU interrupts.
   2306	 */
   2307	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
   2308	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
   2309	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
   2310}
   2311
   2312static void intcapxt_mask_irq(struct irq_data *irqd)
   2313{
   2314	struct amd_iommu *iommu = irqd->chip_data;
   2315
   2316	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
   2317	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
   2318	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
   2319}
   2320
   2321
   2322static int intcapxt_set_affinity(struct irq_data *irqd,
   2323				 const struct cpumask *mask, bool force)
   2324{
   2325	struct irq_data *parent = irqd->parent_data;
   2326	int ret;
   2327
   2328	ret = parent->chip->irq_set_affinity(parent, mask, force);
   2329	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
   2330		return ret;
   2331	return 0;
   2332}
   2333
   2334static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
   2335{
   2336	return on ? -EOPNOTSUPP : 0;
   2337}
   2338
   2339static struct irq_chip intcapxt_controller = {
   2340	.name			= "IOMMU-MSI",
   2341	.irq_unmask		= intcapxt_unmask_irq,
   2342	.irq_mask		= intcapxt_mask_irq,
   2343	.irq_ack		= irq_chip_ack_parent,
   2344	.irq_retrigger		= irq_chip_retrigger_hierarchy,
   2345	.irq_set_affinity       = intcapxt_set_affinity,
   2346	.irq_set_wake		= intcapxt_set_wake,
   2347	.flags			= IRQCHIP_MASK_ON_SUSPEND,
   2348};
   2349
   2350static const struct irq_domain_ops intcapxt_domain_ops = {
   2351	.alloc			= intcapxt_irqdomain_alloc,
   2352	.free			= intcapxt_irqdomain_free,
   2353	.activate		= intcapxt_irqdomain_activate,
   2354	.deactivate		= intcapxt_irqdomain_deactivate,
   2355};
   2356
   2357
   2358static struct irq_domain *iommu_irqdomain;
   2359
   2360static struct irq_domain *iommu_get_irqdomain(void)
   2361{
   2362	struct fwnode_handle *fn;
   2363
   2364	/* No need for locking here (yet) as the init is single-threaded */
   2365	if (iommu_irqdomain)
   2366		return iommu_irqdomain;
   2367
   2368	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
   2369	if (!fn)
   2370		return NULL;
   2371
   2372	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
   2373						      fn, &intcapxt_domain_ops,
   2374						      NULL);
   2375	if (!iommu_irqdomain)
   2376		irq_domain_free_fwnode(fn);
   2377
   2378	return iommu_irqdomain;
   2379}
   2380
   2381static int iommu_setup_intcapxt(struct amd_iommu *iommu)
   2382{
   2383	struct irq_domain *domain;
   2384	struct irq_alloc_info info;
   2385	int irq, ret;
   2386
   2387	domain = iommu_get_irqdomain();
   2388	if (!domain)
   2389		return -ENXIO;
   2390
   2391	init_irq_alloc_info(&info, NULL);
   2392	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
   2393	info.data = iommu;
   2394
   2395	irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
   2396	if (irq < 0) {
   2397		irq_domain_remove(domain);
   2398		return irq;
   2399	}
   2400
   2401	ret = request_threaded_irq(irq, amd_iommu_int_handler,
   2402				   amd_iommu_int_thread, 0, "AMD-Vi", iommu);
   2403	if (ret) {
   2404		irq_domain_free_irqs(irq, 1);
   2405		irq_domain_remove(domain);
   2406		return ret;
   2407	}
   2408
   2409	return 0;
   2410}
   2411
   2412static int iommu_init_irq(struct amd_iommu *iommu)
   2413{
   2414	int ret;
   2415
   2416	if (iommu->int_enabled)
   2417		goto enable_faults;
   2418
   2419	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
   2420		ret = iommu_setup_intcapxt(iommu);
   2421	else if (iommu->dev->msi_cap)
   2422		ret = iommu_setup_msi(iommu);
   2423	else
   2424		ret = -ENODEV;
   2425
   2426	if (ret)
   2427		return ret;
   2428
   2429	iommu->int_enabled = true;
   2430enable_faults:
   2431
   2432	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
   2433		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
   2434
   2435	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
   2436
   2437	if (iommu->ppr_log != NULL)
   2438		iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
   2439	return 0;
   2440}
   2441
   2442/****************************************************************************
   2443 *
   2444 * The next functions belong to the third pass of parsing the ACPI
   2445 * table. In this last pass the memory mapping requirements are
   2446 * gathered (like exclusion and unity mapping ranges).
   2447 *
   2448 ****************************************************************************/
   2449
   2450static void __init free_unity_maps(void)
   2451{
   2452	struct unity_map_entry *entry, *next;
   2453	struct amd_iommu_pci_seg *p, *pci_seg;
   2454
   2455	for_each_pci_segment_safe(pci_seg, p) {
   2456		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
   2457			list_del(&entry->list);
   2458			kfree(entry);
   2459		}
   2460	}
   2461}
   2462
   2463/* called for unity map ACPI definition */
   2464static int __init init_unity_map_range(struct ivmd_header *m,
   2465				       struct acpi_table_header *ivrs_base)
   2466{
   2467	struct unity_map_entry *e = NULL;
   2468	struct amd_iommu_pci_seg *pci_seg;
   2469	char *s;
   2470
   2471	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
   2472	if (pci_seg == NULL)
   2473		return -ENOMEM;
   2474
   2475	e = kzalloc(sizeof(*e), GFP_KERNEL);
   2476	if (e == NULL)
   2477		return -ENOMEM;
   2478
   2479	switch (m->type) {
   2480	default:
   2481		kfree(e);
   2482		return 0;
   2483	case ACPI_IVMD_TYPE:
   2484		s = "IVMD_TYPEi\t\t\t";
   2485		e->devid_start = e->devid_end = m->devid;
   2486		break;
   2487	case ACPI_IVMD_TYPE_ALL:
   2488		s = "IVMD_TYPE_ALL\t\t";
   2489		e->devid_start = 0;
   2490		e->devid_end = pci_seg->last_bdf;
   2491		break;
   2492	case ACPI_IVMD_TYPE_RANGE:
   2493		s = "IVMD_TYPE_RANGE\t\t";
   2494		e->devid_start = m->devid;
   2495		e->devid_end = m->aux;
   2496		break;
   2497	}
   2498	e->address_start = PAGE_ALIGN(m->range_start);
   2499	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
   2500	e->prot = m->flags >> 1;
   2501
   2502	/*
   2503	 * Treat per-device exclusion ranges as r/w unity-mapped regions
   2504	 * since some buggy BIOSes might lead to the overwritten exclusion
   2505	 * range (exclusion_start and exclusion_length members). This
   2506	 * happens when there are multiple exclusion ranges (IVMD entries)
   2507	 * defined in ACPI table.
   2508	 */
   2509	if (m->flags & IVMD_FLAG_EXCL_RANGE)
   2510		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
   2511
   2512	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
   2513		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
   2514		    " flags: %x\n", s, m->pci_seg,
   2515		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
   2516		    PCI_FUNC(e->devid_start), m->pci_seg,
   2517		    PCI_BUS_NUM(e->devid_end),
   2518		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
   2519		    e->address_start, e->address_end, m->flags);
   2520
   2521	list_add_tail(&e->list, &pci_seg->unity_map);
   2522
   2523	return 0;
   2524}
   2525
   2526/* iterates over all memory definitions we find in the ACPI table */
   2527static int __init init_memory_definitions(struct acpi_table_header *table)
   2528{
   2529	u8 *p = (u8 *)table, *end = (u8 *)table;
   2530	struct ivmd_header *m;
   2531
   2532	end += table->length;
   2533	p += IVRS_HEADER_LENGTH;
   2534
   2535	while (p < end) {
   2536		m = (struct ivmd_header *)p;
   2537		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
   2538			init_unity_map_range(m, table);
   2539
   2540		p += m->length;
   2541	}
   2542
   2543	return 0;
   2544}
   2545
   2546/*
   2547 * Init the device table to not allow DMA access for devices
   2548 */
   2549static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
   2550{
   2551	u32 devid;
   2552	struct dev_table_entry *dev_table = pci_seg->dev_table;
   2553
   2554	if (dev_table == NULL)
   2555		return;
   2556
   2557	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
   2558		__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
   2559		if (!amd_iommu_snp_en)
   2560			__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
   2561	}
   2562}
   2563
   2564static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
   2565{
   2566	u32 devid;
   2567	struct dev_table_entry *dev_table = pci_seg->dev_table;
   2568
   2569	if (dev_table == NULL)
   2570		return;
   2571
   2572	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
   2573		dev_table[devid].data[0] = 0ULL;
   2574		dev_table[devid].data[1] = 0ULL;
   2575	}
   2576}
   2577
   2578static void init_device_table(void)
   2579{
   2580	struct amd_iommu_pci_seg *pci_seg;
   2581	u32 devid;
   2582
   2583	if (!amd_iommu_irq_remap)
   2584		return;
   2585
   2586	for_each_pci_segment(pci_seg) {
   2587		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
   2588			__set_dev_entry_bit(pci_seg->dev_table,
   2589					    devid, DEV_ENTRY_IRQ_TBL_EN);
   2590	}
   2591}
   2592
   2593static void iommu_init_flags(struct amd_iommu *iommu)
   2594{
   2595	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
   2596		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
   2597		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
   2598
   2599	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
   2600		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
   2601		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
   2602
   2603	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
   2604		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
   2605		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
   2606
   2607	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
   2608		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
   2609		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
   2610
   2611	/*
   2612	 * make IOMMU memory accesses cache coherent
   2613	 */
   2614	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
   2615
   2616	/* Set IOTLB invalidation timeout to 1s */
   2617	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
   2618}
   2619
   2620static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
   2621{
   2622	int i, j;
   2623	u32 ioc_feature_control;
   2624	struct pci_dev *pdev = iommu->root_pdev;
   2625
   2626	/* RD890 BIOSes may not have completely reconfigured the iommu */
   2627	if (!is_rd890_iommu(iommu->dev) || !pdev)
   2628		return;
   2629
   2630	/*
   2631	 * First, we need to ensure that the iommu is enabled. This is
   2632	 * controlled by a register in the northbridge
   2633	 */
   2634
   2635	/* Select Northbridge indirect register 0x75 and enable writing */
   2636	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
   2637	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
   2638
   2639	/* Enable the iommu */
   2640	if (!(ioc_feature_control & 0x1))
   2641		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
   2642
   2643	/* Restore the iommu BAR */
   2644	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
   2645			       iommu->stored_addr_lo);
   2646	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
   2647			       iommu->stored_addr_hi);
   2648
   2649	/* Restore the l1 indirect regs for each of the 6 l1s */
   2650	for (i = 0; i < 6; i++)
   2651		for (j = 0; j < 0x12; j++)
   2652			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
   2653
   2654	/* Restore the l2 indirect regs */
   2655	for (i = 0; i < 0x83; i++)
   2656		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
   2657
   2658	/* Lock PCI setup registers */
   2659	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
   2660			       iommu->stored_addr_lo | 1);
   2661}
   2662
   2663static void iommu_enable_ga(struct amd_iommu *iommu)
   2664{
   2665#ifdef CONFIG_IRQ_REMAP
   2666	switch (amd_iommu_guest_ir) {
   2667	case AMD_IOMMU_GUEST_IR_VAPIC:
   2668	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
   2669		iommu_feature_enable(iommu, CONTROL_GA_EN);
   2670		iommu->irte_ops = &irte_128_ops;
   2671		break;
   2672	default:
   2673		iommu->irte_ops = &irte_32_ops;
   2674		break;
   2675	}
   2676#endif
   2677}
   2678
   2679static void early_enable_iommu(struct amd_iommu *iommu)
   2680{
   2681	iommu_disable(iommu);
   2682	iommu_init_flags(iommu);
   2683	iommu_set_device_table(iommu);
   2684	iommu_enable_command_buffer(iommu);
   2685	iommu_enable_event_buffer(iommu);
   2686	iommu_set_exclusion_range(iommu);
   2687	iommu_enable_ga(iommu);
   2688	iommu_enable_xt(iommu);
   2689	iommu_enable(iommu);
   2690	iommu_flush_all_caches(iommu);
   2691}
   2692
   2693/*
   2694 * This function finally enables all IOMMUs found in the system after
   2695 * they have been initialized.
   2696 *
   2697 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
   2698 * the old content of device table entries. Not this case or copy failed,
   2699 * just continue as normal kernel does.
   2700 */
   2701static void early_enable_iommus(void)
   2702{
   2703	struct amd_iommu *iommu;
   2704	struct amd_iommu_pci_seg *pci_seg;
   2705
   2706	if (!copy_device_table()) {
   2707		/*
   2708		 * If come here because of failure in copying device table from old
   2709		 * kernel with all IOMMUs enabled, print error message and try to
   2710		 * free allocated old_dev_tbl_cpy.
   2711		 */
   2712		if (amd_iommu_pre_enabled)
   2713			pr_err("Failed to copy DEV table from previous kernel.\n");
   2714
   2715		for_each_pci_segment(pci_seg) {
   2716			if (pci_seg->old_dev_tbl_cpy != NULL) {
   2717				free_pages((unsigned long)pci_seg->old_dev_tbl_cpy,
   2718						get_order(pci_seg->dev_table_size));
   2719				pci_seg->old_dev_tbl_cpy = NULL;
   2720			}
   2721		}
   2722
   2723		for_each_iommu(iommu) {
   2724			clear_translation_pre_enabled(iommu);
   2725			early_enable_iommu(iommu);
   2726		}
   2727	} else {
   2728		pr_info("Copied DEV table from previous kernel.\n");
   2729
   2730		for_each_pci_segment(pci_seg) {
   2731			free_pages((unsigned long)pci_seg->dev_table,
   2732				   get_order(pci_seg->dev_table_size));
   2733			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
   2734		}
   2735
   2736		for_each_iommu(iommu) {
   2737			iommu_disable_command_buffer(iommu);
   2738			iommu_disable_event_buffer(iommu);
   2739			iommu_enable_command_buffer(iommu);
   2740			iommu_enable_event_buffer(iommu);
   2741			iommu_enable_ga(iommu);
   2742			iommu_enable_xt(iommu);
   2743			iommu_set_device_table(iommu);
   2744			iommu_flush_all_caches(iommu);
   2745		}
   2746	}
   2747}
   2748
   2749static void enable_iommus_v2(void)
   2750{
   2751	struct amd_iommu *iommu;
   2752
   2753	for_each_iommu(iommu) {
   2754		iommu_enable_ppr_log(iommu);
   2755		iommu_enable_gt(iommu);
   2756	}
   2757}
   2758
   2759static void enable_iommus_vapic(void)
   2760{
   2761#ifdef CONFIG_IRQ_REMAP
   2762	struct amd_iommu *iommu;
   2763
   2764	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
   2765	    !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
   2766		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
   2767		return;
   2768	}
   2769
   2770	if (amd_iommu_snp_en &&
   2771	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
   2772		pr_warn("Force to disable Virtual APIC due to SNP\n");
   2773		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
   2774		return;
   2775	}
   2776
   2777	/* Enabling GAM and SNPAVIC support */
   2778	for_each_iommu(iommu) {
   2779		if (iommu_init_ga_log(iommu) || iommu_ga_log_enable(iommu))
   2780			return;
   2781
   2782		iommu_feature_enable(iommu, CONTROL_GAM_EN);
   2783		if (amd_iommu_snp_en)
   2784			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
   2785	}
   2786
   2787	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
   2788	pr_info("Virtual APIC enabled\n");
   2789#endif
   2790}
   2791
   2792static void enable_iommus(void)
   2793{
   2794	early_enable_iommus();
   2795	enable_iommus_vapic();
   2796	enable_iommus_v2();
   2797}
   2798
   2799static void disable_iommus(void)
   2800{
   2801	struct amd_iommu *iommu;
   2802
   2803	for_each_iommu(iommu)
   2804		iommu_disable(iommu);
   2805
   2806#ifdef CONFIG_IRQ_REMAP
   2807	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
   2808		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
   2809#endif
   2810}
   2811
   2812/*
   2813 * Suspend/Resume support
   2814 * disable suspend until real resume implemented
   2815 */
   2816
   2817static void amd_iommu_resume(void)
   2818{
   2819	struct amd_iommu *iommu;
   2820
   2821	for_each_iommu(iommu)
   2822		iommu_apply_resume_quirks(iommu);
   2823
   2824	/* re-load the hardware */
   2825	enable_iommus();
   2826
   2827	amd_iommu_enable_interrupts();
   2828}
   2829
   2830static int amd_iommu_suspend(void)
   2831{
   2832	/* disable IOMMUs to go out of the way for BIOS */
   2833	disable_iommus();
   2834
   2835	return 0;
   2836}
   2837
   2838static struct syscore_ops amd_iommu_syscore_ops = {
   2839	.suspend = amd_iommu_suspend,
   2840	.resume = amd_iommu_resume,
   2841};
   2842
   2843static void __init free_iommu_resources(void)
   2844{
   2845	kmem_cache_destroy(amd_iommu_irq_cache);
   2846	amd_iommu_irq_cache = NULL;
   2847
   2848	free_iommu_all();
   2849	free_pci_segments();
   2850}
   2851
   2852/* SB IOAPIC is always on this device in AMD systems */
   2853#define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
   2854
   2855static bool __init check_ioapic_information(void)
   2856{
   2857	const char *fw_bug = FW_BUG;
   2858	bool ret, has_sb_ioapic;
   2859	int idx;
   2860
   2861	has_sb_ioapic = false;
   2862	ret           = false;
   2863
   2864	/*
   2865	 * If we have map overrides on the kernel command line the
   2866	 * messages in this function might not describe firmware bugs
   2867	 * anymore - so be careful
   2868	 */
   2869	if (cmdline_maps)
   2870		fw_bug = "";
   2871
   2872	for (idx = 0; idx < nr_ioapics; idx++) {
   2873		int devid, id = mpc_ioapic_id(idx);
   2874
   2875		devid = get_ioapic_devid(id);
   2876		if (devid < 0) {
   2877			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
   2878				fw_bug, id);
   2879			ret = false;
   2880		} else if (devid == IOAPIC_SB_DEVID) {
   2881			has_sb_ioapic = true;
   2882			ret           = true;
   2883		}
   2884	}
   2885
   2886	if (!has_sb_ioapic) {
   2887		/*
   2888		 * We expect the SB IOAPIC to be listed in the IVRS
   2889		 * table. The system timer is connected to the SB IOAPIC
   2890		 * and if we don't have it in the list the system will
   2891		 * panic at boot time.  This situation usually happens
   2892		 * when the BIOS is buggy and provides us the wrong
   2893		 * device id for the IOAPIC in the system.
   2894		 */
   2895		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
   2896	}
   2897
   2898	if (!ret)
   2899		pr_err("Disabling interrupt remapping\n");
   2900
   2901	return ret;
   2902}
   2903
   2904static void __init free_dma_resources(void)
   2905{
   2906	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
   2907		   get_order(MAX_DOMAIN_ID/8));
   2908	amd_iommu_pd_alloc_bitmap = NULL;
   2909
   2910	free_unity_maps();
   2911}
   2912
   2913static void __init ivinfo_init(void *ivrs)
   2914{
   2915	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
   2916}
   2917
   2918/*
   2919 * This is the hardware init function for AMD IOMMU in the system.
   2920 * This function is called either from amd_iommu_init or from the interrupt
   2921 * remapping setup code.
   2922 *
   2923 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
   2924 * four times:
   2925 *
   2926 *	1 pass) Discover the most comprehensive IVHD type to use.
   2927 *
   2928 *	2 pass) Find the highest PCI device id the driver has to handle.
   2929 *		Upon this information the size of the data structures is
   2930 *		determined that needs to be allocated.
   2931 *
   2932 *	3 pass) Initialize the data structures just allocated with the
   2933 *		information in the ACPI table about available AMD IOMMUs
   2934 *		in the system. It also maps the PCI devices in the
   2935 *		system to specific IOMMUs
   2936 *
   2937 *	4 pass) After the basic data structures are allocated and
   2938 *		initialized we update them with information about memory
   2939 *		remapping requirements parsed out of the ACPI table in
   2940 *		this last pass.
   2941 *
   2942 * After everything is set up the IOMMUs are enabled and the necessary
   2943 * hotplug and suspend notifiers are registered.
   2944 */
   2945static int __init early_amd_iommu_init(void)
   2946{
   2947	struct acpi_table_header *ivrs_base;
   2948	int remap_cache_sz, ret;
   2949	acpi_status status;
   2950
   2951	if (!amd_iommu_detected)
   2952		return -ENODEV;
   2953
   2954	status = acpi_get_table("IVRS", 0, &ivrs_base);
   2955	if (status == AE_NOT_FOUND)
   2956		return -ENODEV;
   2957	else if (ACPI_FAILURE(status)) {
   2958		const char *err = acpi_format_exception(status);
   2959		pr_err("IVRS table error: %s\n", err);
   2960		return -EINVAL;
   2961	}
   2962
   2963	/*
   2964	 * Validate checksum here so we don't need to do it when
   2965	 * we actually parse the table
   2966	 */
   2967	ret = check_ivrs_checksum(ivrs_base);
   2968	if (ret)
   2969		goto out;
   2970
   2971	ivinfo_init(ivrs_base);
   2972
   2973	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
   2974	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
   2975
   2976	/* Device table - directly used by all IOMMUs */
   2977	ret = -ENOMEM;
   2978
   2979	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
   2980					    GFP_KERNEL | __GFP_ZERO,
   2981					    get_order(MAX_DOMAIN_ID/8));
   2982	if (amd_iommu_pd_alloc_bitmap == NULL)
   2983		goto out;
   2984
   2985	/*
   2986	 * never allocate domain 0 because its used as the non-allocated and
   2987	 * error value placeholder
   2988	 */
   2989	__set_bit(0, amd_iommu_pd_alloc_bitmap);
   2990
   2991	/*
   2992	 * now the data structures are allocated and basically initialized
   2993	 * start the real acpi table scan
   2994	 */
   2995	ret = init_iommu_all(ivrs_base);
   2996	if (ret)
   2997		goto out;
   2998
   2999	/* Disable any previously enabled IOMMUs */
   3000	if (!is_kdump_kernel() || amd_iommu_disabled)
   3001		disable_iommus();
   3002
   3003	if (amd_iommu_irq_remap)
   3004		amd_iommu_irq_remap = check_ioapic_information();
   3005
   3006	if (amd_iommu_irq_remap) {
   3007		struct amd_iommu_pci_seg *pci_seg;
   3008		/*
   3009		 * Interrupt remapping enabled, create kmem_cache for the
   3010		 * remapping tables.
   3011		 */
   3012		ret = -ENOMEM;
   3013		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
   3014			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
   3015		else
   3016			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
   3017		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
   3018							remap_cache_sz,
   3019							DTE_INTTAB_ALIGNMENT,
   3020							0, NULL);
   3021		if (!amd_iommu_irq_cache)
   3022			goto out;
   3023
   3024		for_each_pci_segment(pci_seg) {
   3025			if (alloc_irq_lookup_table(pci_seg))
   3026				goto out;
   3027		}
   3028	}
   3029
   3030	ret = init_memory_definitions(ivrs_base);
   3031	if (ret)
   3032		goto out;
   3033
   3034	/* init the device table */
   3035	init_device_table();
   3036
   3037out:
   3038	/* Don't leak any ACPI memory */
   3039	acpi_put_table(ivrs_base);
   3040
   3041	return ret;
   3042}
   3043
   3044static int amd_iommu_enable_interrupts(void)
   3045{
   3046	struct amd_iommu *iommu;
   3047	int ret = 0;
   3048
   3049	for_each_iommu(iommu) {
   3050		ret = iommu_init_irq(iommu);
   3051		if (ret)
   3052			goto out;
   3053	}
   3054
   3055out:
   3056	return ret;
   3057}
   3058
   3059static bool __init detect_ivrs(void)
   3060{
   3061	struct acpi_table_header *ivrs_base;
   3062	acpi_status status;
   3063	int i;
   3064
   3065	status = acpi_get_table("IVRS", 0, &ivrs_base);
   3066	if (status == AE_NOT_FOUND)
   3067		return false;
   3068	else if (ACPI_FAILURE(status)) {
   3069		const char *err = acpi_format_exception(status);
   3070		pr_err("IVRS table error: %s\n", err);
   3071		return false;
   3072	}
   3073
   3074	acpi_put_table(ivrs_base);
   3075
   3076	if (amd_iommu_force_enable)
   3077		goto out;
   3078
   3079	/* Don't use IOMMU if there is Stoney Ridge graphics */
   3080	for (i = 0; i < 32; i++) {
   3081		u32 pci_id;
   3082
   3083		pci_id = read_pci_config(0, i, 0, 0);
   3084		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
   3085			pr_info("Disable IOMMU on Stoney Ridge\n");
   3086			return false;
   3087		}
   3088	}
   3089
   3090out:
   3091	/* Make sure ACS will be enabled during PCI probe */
   3092	pci_request_acs();
   3093
   3094	return true;
   3095}
   3096
   3097/****************************************************************************
   3098 *
   3099 * AMD IOMMU Initialization State Machine
   3100 *
   3101 ****************************************************************************/
   3102
   3103static int __init state_next(void)
   3104{
   3105	int ret = 0;
   3106
   3107	switch (init_state) {
   3108	case IOMMU_START_STATE:
   3109		if (!detect_ivrs()) {
   3110			init_state	= IOMMU_NOT_FOUND;
   3111			ret		= -ENODEV;
   3112		} else {
   3113			init_state	= IOMMU_IVRS_DETECTED;
   3114		}
   3115		break;
   3116	case IOMMU_IVRS_DETECTED:
   3117		if (amd_iommu_disabled) {
   3118			init_state = IOMMU_CMDLINE_DISABLED;
   3119			ret = -EINVAL;
   3120		} else {
   3121			ret = early_amd_iommu_init();
   3122			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
   3123		}
   3124		break;
   3125	case IOMMU_ACPI_FINISHED:
   3126		early_enable_iommus();
   3127		x86_platform.iommu_shutdown = disable_iommus;
   3128		init_state = IOMMU_ENABLED;
   3129		break;
   3130	case IOMMU_ENABLED:
   3131		register_syscore_ops(&amd_iommu_syscore_ops);
   3132		ret = amd_iommu_init_pci();
   3133		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
   3134		enable_iommus_vapic();
   3135		enable_iommus_v2();
   3136		break;
   3137	case IOMMU_PCI_INIT:
   3138		ret = amd_iommu_enable_interrupts();
   3139		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
   3140		break;
   3141	case IOMMU_INTERRUPTS_EN:
   3142		init_state = IOMMU_INITIALIZED;
   3143		break;
   3144	case IOMMU_INITIALIZED:
   3145		/* Nothing to do */
   3146		break;
   3147	case IOMMU_NOT_FOUND:
   3148	case IOMMU_INIT_ERROR:
   3149	case IOMMU_CMDLINE_DISABLED:
   3150		/* Error states => do nothing */
   3151		ret = -EINVAL;
   3152		break;
   3153	default:
   3154		/* Unknown state */
   3155		BUG();
   3156	}
   3157
   3158	if (ret) {
   3159		free_dma_resources();
   3160		if (!irq_remapping_enabled) {
   3161			disable_iommus();
   3162			free_iommu_resources();
   3163		} else {
   3164			struct amd_iommu *iommu;
   3165			struct amd_iommu_pci_seg *pci_seg;
   3166
   3167			for_each_pci_segment(pci_seg)
   3168				uninit_device_table_dma(pci_seg);
   3169
   3170			for_each_iommu(iommu)
   3171				iommu_flush_all_caches(iommu);
   3172		}
   3173	}
   3174	return ret;
   3175}
   3176
   3177static int __init iommu_go_to_state(enum iommu_init_state state)
   3178{
   3179	int ret = -EINVAL;
   3180
   3181	while (init_state != state) {
   3182		if (init_state == IOMMU_NOT_FOUND         ||
   3183		    init_state == IOMMU_INIT_ERROR        ||
   3184		    init_state == IOMMU_CMDLINE_DISABLED)
   3185			break;
   3186		ret = state_next();
   3187	}
   3188
   3189	return ret;
   3190}
   3191
   3192#ifdef CONFIG_IRQ_REMAP
   3193int __init amd_iommu_prepare(void)
   3194{
   3195	int ret;
   3196
   3197	amd_iommu_irq_remap = true;
   3198
   3199	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
   3200	if (ret) {
   3201		amd_iommu_irq_remap = false;
   3202		return ret;
   3203	}
   3204
   3205	return amd_iommu_irq_remap ? 0 : -ENODEV;
   3206}
   3207
   3208int __init amd_iommu_enable(void)
   3209{
   3210	int ret;
   3211
   3212	ret = iommu_go_to_state(IOMMU_ENABLED);
   3213	if (ret)
   3214		return ret;
   3215
   3216	irq_remapping_enabled = 1;
   3217	return amd_iommu_xt_mode;
   3218}
   3219
   3220void amd_iommu_disable(void)
   3221{
   3222	amd_iommu_suspend();
   3223}
   3224
   3225int amd_iommu_reenable(int mode)
   3226{
   3227	amd_iommu_resume();
   3228
   3229	return 0;
   3230}
   3231
   3232int __init amd_iommu_enable_faulting(void)
   3233{
   3234	/* We enable MSI later when PCI is initialized */
   3235	return 0;
   3236}
   3237#endif
   3238
   3239/*
   3240 * This is the core init function for AMD IOMMU hardware in the system.
   3241 * This function is called from the generic x86 DMA layer initialization
   3242 * code.
   3243 */
   3244static int __init amd_iommu_init(void)
   3245{
   3246	struct amd_iommu *iommu;
   3247	int ret;
   3248
   3249	ret = iommu_go_to_state(IOMMU_INITIALIZED);
   3250#ifdef CONFIG_GART_IOMMU
   3251	if (ret && list_empty(&amd_iommu_list)) {
   3252		/*
   3253		 * We failed to initialize the AMD IOMMU - try fallback
   3254		 * to GART if possible.
   3255		 */
   3256		gart_iommu_init();
   3257	}
   3258#endif
   3259
   3260	for_each_iommu(iommu)
   3261		amd_iommu_debugfs_setup(iommu);
   3262
   3263	return ret;
   3264}
   3265
   3266static bool amd_iommu_sme_check(void)
   3267{
   3268	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
   3269	    (boot_cpu_data.x86 != 0x17))
   3270		return true;
   3271
   3272	/* For Fam17h, a specific level of support is required */
   3273	if (boot_cpu_data.microcode >= 0x08001205)
   3274		return true;
   3275
   3276	if ((boot_cpu_data.microcode >= 0x08001126) &&
   3277	    (boot_cpu_data.microcode <= 0x080011ff))
   3278		return true;
   3279
   3280	pr_notice("IOMMU not currently supported when SME is active\n");
   3281
   3282	return false;
   3283}
   3284
   3285/****************************************************************************
   3286 *
   3287 * Early detect code. This code runs at IOMMU detection time in the DMA
   3288 * layer. It just looks if there is an IVRS ACPI table to detect AMD
   3289 * IOMMUs
   3290 *
   3291 ****************************************************************************/
   3292int __init amd_iommu_detect(void)
   3293{
   3294	int ret;
   3295
   3296	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
   3297		return -ENODEV;
   3298
   3299	if (!amd_iommu_sme_check())
   3300		return -ENODEV;
   3301
   3302	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
   3303	if (ret)
   3304		return ret;
   3305
   3306	amd_iommu_detected = true;
   3307	iommu_detected = 1;
   3308	x86_init.iommu.iommu_init = amd_iommu_init;
   3309
   3310	return 1;
   3311}
   3312
   3313/****************************************************************************
   3314 *
   3315 * Parsing functions for the AMD IOMMU specific kernel command line
   3316 * options.
   3317 *
   3318 ****************************************************************************/
   3319
   3320static int __init parse_amd_iommu_dump(char *str)
   3321{
   3322	amd_iommu_dump = true;
   3323
   3324	return 1;
   3325}
   3326
   3327static int __init parse_amd_iommu_intr(char *str)
   3328{
   3329	for (; *str; ++str) {
   3330		if (strncmp(str, "legacy", 6) == 0) {
   3331			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
   3332			break;
   3333		}
   3334		if (strncmp(str, "vapic", 5) == 0) {
   3335			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
   3336			break;
   3337		}
   3338	}
   3339	return 1;
   3340}
   3341
   3342static int __init parse_amd_iommu_options(char *str)
   3343{
   3344	for (; *str; ++str) {
   3345		if (strncmp(str, "fullflush", 9) == 0) {
   3346			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
   3347			iommu_set_dma_strict();
   3348		}
   3349		if (strncmp(str, "force_enable", 12) == 0)
   3350			amd_iommu_force_enable = true;
   3351		if (strncmp(str, "off", 3) == 0)
   3352			amd_iommu_disabled = true;
   3353		if (strncmp(str, "force_isolation", 15) == 0)
   3354			amd_iommu_force_isolation = true;
   3355	}
   3356
   3357	return 1;
   3358}
   3359
   3360static int __init parse_ivrs_ioapic(char *str)
   3361{
   3362	u32 seg = 0, bus, dev, fn;
   3363	int ret, id, i;
   3364	u32 devid;
   3365
   3366	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
   3367	if (ret != 4) {
   3368		ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
   3369		if (ret != 5) {
   3370			pr_err("Invalid command line: ivrs_ioapic%s\n", str);
   3371			return 1;
   3372		}
   3373	}
   3374
   3375	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
   3376		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
   3377			str);
   3378		return 1;
   3379	}
   3380
   3381	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
   3382
   3383	cmdline_maps			= true;
   3384	i				= early_ioapic_map_size++;
   3385	early_ioapic_map[i].id		= id;
   3386	early_ioapic_map[i].devid	= devid;
   3387	early_ioapic_map[i].cmd_line	= true;
   3388
   3389	return 1;
   3390}
   3391
   3392static int __init parse_ivrs_hpet(char *str)
   3393{
   3394	u32 seg = 0, bus, dev, fn;
   3395	int ret, id, i;
   3396	u32 devid;
   3397
   3398	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
   3399	if (ret != 4) {
   3400		ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
   3401		if (ret != 5) {
   3402			pr_err("Invalid command line: ivrs_hpet%s\n", str);
   3403			return 1;
   3404		}
   3405	}
   3406
   3407	if (early_hpet_map_size == EARLY_MAP_SIZE) {
   3408		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
   3409			str);
   3410		return 1;
   3411	}
   3412
   3413	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
   3414
   3415	cmdline_maps			= true;
   3416	i				= early_hpet_map_size++;
   3417	early_hpet_map[i].id		= id;
   3418	early_hpet_map[i].devid		= devid;
   3419	early_hpet_map[i].cmd_line	= true;
   3420
   3421	return 1;
   3422}
   3423
   3424static int __init parse_ivrs_acpihid(char *str)
   3425{
   3426	u32 seg = 0, bus, dev, fn;
   3427	char *hid, *uid, *p;
   3428	char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
   3429	int ret, i;
   3430
   3431	ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
   3432	if (ret != 4) {
   3433		ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid);
   3434		if (ret != 5) {
   3435			pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
   3436			return 1;
   3437		}
   3438	}
   3439
   3440	p = acpiid;
   3441	hid = strsep(&p, ":");
   3442	uid = p;
   3443
   3444	if (!hid || !(*hid) || !uid) {
   3445		pr_err("Invalid command line: hid or uid\n");
   3446		return 1;
   3447	}
   3448
   3449	i = early_acpihid_map_size++;
   3450	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
   3451	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
   3452	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
   3453	early_acpihid_map[i].cmd_line	= true;
   3454
   3455	return 1;
   3456}
   3457
   3458__setup("amd_iommu_dump",	parse_amd_iommu_dump);
   3459__setup("amd_iommu=",		parse_amd_iommu_options);
   3460__setup("amd_iommu_intr=",	parse_amd_iommu_intr);
   3461__setup("ivrs_ioapic",		parse_ivrs_ioapic);
   3462__setup("ivrs_hpet",		parse_ivrs_hpet);
   3463__setup("ivrs_acpihid",		parse_ivrs_acpihid);
   3464
   3465bool amd_iommu_v2_supported(void)
   3466{
   3467	/*
   3468	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
   3469	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
   3470	 * setting up IOMMUv1 page table.
   3471	 */
   3472	return amd_iommu_v2_present && !amd_iommu_snp_en;
   3473}
   3474EXPORT_SYMBOL(amd_iommu_v2_supported);
   3475
   3476struct amd_iommu *get_amd_iommu(unsigned int idx)
   3477{
   3478	unsigned int i = 0;
   3479	struct amd_iommu *iommu;
   3480
   3481	for_each_iommu(iommu)
   3482		if (i++ == idx)
   3483			return iommu;
   3484	return NULL;
   3485}
   3486
   3487/****************************************************************************
   3488 *
   3489 * IOMMU EFR Performance Counter support functionality. This code allows
   3490 * access to the IOMMU PC functionality.
   3491 *
   3492 ****************************************************************************/
   3493
   3494u8 amd_iommu_pc_get_max_banks(unsigned int idx)
   3495{
   3496	struct amd_iommu *iommu = get_amd_iommu(idx);
   3497
   3498	if (iommu)
   3499		return iommu->max_banks;
   3500
   3501	return 0;
   3502}
   3503EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
   3504
   3505bool amd_iommu_pc_supported(void)
   3506{
   3507	return amd_iommu_pc_present;
   3508}
   3509EXPORT_SYMBOL(amd_iommu_pc_supported);
   3510
   3511u8 amd_iommu_pc_get_max_counters(unsigned int idx)
   3512{
   3513	struct amd_iommu *iommu = get_amd_iommu(idx);
   3514
   3515	if (iommu)
   3516		return iommu->max_counters;
   3517
   3518	return 0;
   3519}
   3520EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
   3521
   3522static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
   3523				u8 fxn, u64 *value, bool is_write)
   3524{
   3525	u32 offset;
   3526	u32 max_offset_lim;
   3527
   3528	/* Make sure the IOMMU PC resource is available */
   3529	if (!amd_iommu_pc_present)
   3530		return -ENODEV;
   3531
   3532	/* Check for valid iommu and pc register indexing */
   3533	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
   3534		return -ENODEV;
   3535
   3536	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
   3537
   3538	/* Limit the offset to the hw defined mmio region aperture */
   3539	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
   3540				(iommu->max_counters << 8) | 0x28);
   3541	if ((offset < MMIO_CNTR_REG_OFFSET) ||
   3542	    (offset > max_offset_lim))
   3543		return -EINVAL;
   3544
   3545	if (is_write) {
   3546		u64 val = *value & GENMASK_ULL(47, 0);
   3547
   3548		writel((u32)val, iommu->mmio_base + offset);
   3549		writel((val >> 32), iommu->mmio_base + offset + 4);
   3550	} else {
   3551		*value = readl(iommu->mmio_base + offset + 4);
   3552		*value <<= 32;
   3553		*value |= readl(iommu->mmio_base + offset);
   3554		*value &= GENMASK_ULL(47, 0);
   3555	}
   3556
   3557	return 0;
   3558}
   3559
   3560int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
   3561{
   3562	if (!iommu)
   3563		return -EINVAL;
   3564
   3565	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
   3566}
   3567
   3568int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
   3569{
   3570	if (!iommu)
   3571		return -EINVAL;
   3572
   3573	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
   3574}
   3575
   3576#ifdef CONFIG_AMD_MEM_ENCRYPT
   3577int amd_iommu_snp_enable(void)
   3578{
   3579	/*
   3580	 * The SNP support requires that IOMMU must be enabled, and is
   3581	 * not configured in the passthrough mode.
   3582	 */
   3583	if (no_iommu || iommu_default_passthrough()) {
   3584		pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
   3585		return -EINVAL;
   3586	}
   3587
   3588	/*
   3589	 * Prevent enabling SNP after IOMMU_ENABLED state because this process
   3590	 * affect how IOMMU driver sets up data structures and configures
   3591	 * IOMMU hardware.
   3592	 */
   3593	if (init_state > IOMMU_ENABLED) {
   3594		pr_err("SNP: Too late to enable SNP for IOMMU.\n");
   3595		return -EINVAL;
   3596	}
   3597
   3598	amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
   3599	if (!amd_iommu_snp_en)
   3600		return -EINVAL;
   3601
   3602	pr_info("SNP enabled\n");
   3603
   3604	/* Enforce IOMMU v1 pagetable when SNP is enabled. */
   3605	if (amd_iommu_pgtable != AMD_IOMMU_V1) {
   3606		pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
   3607		amd_iommu_pgtable = AMD_IOMMU_V1;
   3608	}
   3609
   3610	return 0;
   3611}
   3612#endif