cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

arm-smmu-v3.c (105490B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * IOMMU API for ARM architected SMMUv3 implementations.
      4 *
      5 * Copyright (C) 2015 ARM Limited
      6 *
      7 * Author: Will Deacon <will.deacon@arm.com>
      8 *
      9 * This driver is powered by bad coffee and bombay mix.
     10 */
     11
     12#include <linux/acpi.h>
     13#include <linux/acpi_iort.h>
     14#include <linux/bitops.h>
     15#include <linux/crash_dump.h>
     16#include <linux/delay.h>
     17#include <linux/dma-iommu.h>
     18#include <linux/err.h>
     19#include <linux/interrupt.h>
     20#include <linux/io-pgtable.h>
     21#include <linux/iopoll.h>
     22#include <linux/module.h>
     23#include <linux/msi.h>
     24#include <linux/of.h>
     25#include <linux/of_address.h>
     26#include <linux/of_platform.h>
     27#include <linux/pci.h>
     28#include <linux/pci-ats.h>
     29#include <linux/platform_device.h>
     30
     31#include <linux/amba/bus.h>
     32
     33#include "arm-smmu-v3.h"
     34#include "../../iommu-sva-lib.h"
     35
     36static bool disable_bypass = true;
     37module_param(disable_bypass, bool, 0444);
     38MODULE_PARM_DESC(disable_bypass,
     39	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
     40
     41static bool disable_msipolling;
     42module_param(disable_msipolling, bool, 0444);
     43MODULE_PARM_DESC(disable_msipolling,
     44	"Disable MSI-based polling for CMD_SYNC completion.");
     45
     46enum arm_smmu_msi_index {
     47	EVTQ_MSI_INDEX,
     48	GERROR_MSI_INDEX,
     49	PRIQ_MSI_INDEX,
     50	ARM_SMMU_MAX_MSIS,
     51};
     52
     53static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
     54	[EVTQ_MSI_INDEX] = {
     55		ARM_SMMU_EVTQ_IRQ_CFG0,
     56		ARM_SMMU_EVTQ_IRQ_CFG1,
     57		ARM_SMMU_EVTQ_IRQ_CFG2,
     58	},
     59	[GERROR_MSI_INDEX] = {
     60		ARM_SMMU_GERROR_IRQ_CFG0,
     61		ARM_SMMU_GERROR_IRQ_CFG1,
     62		ARM_SMMU_GERROR_IRQ_CFG2,
     63	},
     64	[PRIQ_MSI_INDEX] = {
     65		ARM_SMMU_PRIQ_IRQ_CFG0,
     66		ARM_SMMU_PRIQ_IRQ_CFG1,
     67		ARM_SMMU_PRIQ_IRQ_CFG2,
     68	},
     69};
     70
     71struct arm_smmu_option_prop {
     72	u32 opt;
     73	const char *prop;
     74};
     75
     76DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
     77DEFINE_MUTEX(arm_smmu_asid_lock);
     78
     79/*
     80 * Special value used by SVA when a process dies, to quiesce a CD without
     81 * disabling it.
     82 */
     83struct arm_smmu_ctx_desc quiet_cd = { 0 };
     84
     85static struct arm_smmu_option_prop arm_smmu_options[] = {
     86	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
     87	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
     88	{ 0, NULL},
     89};
     90
     91static void parse_driver_options(struct arm_smmu_device *smmu)
     92{
     93	int i = 0;
     94
     95	do {
     96		if (of_property_read_bool(smmu->dev->of_node,
     97						arm_smmu_options[i].prop)) {
     98			smmu->options |= arm_smmu_options[i].opt;
     99			dev_notice(smmu->dev, "option %s\n",
    100				arm_smmu_options[i].prop);
    101		}
    102	} while (arm_smmu_options[++i].opt);
    103}
    104
    105/* Low-level queue manipulation functions */
    106static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
    107{
    108	u32 space, prod, cons;
    109
    110	prod = Q_IDX(q, q->prod);
    111	cons = Q_IDX(q, q->cons);
    112
    113	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
    114		space = (1 << q->max_n_shift) - (prod - cons);
    115	else
    116		space = cons - prod;
    117
    118	return space >= n;
    119}
    120
    121static bool queue_full(struct arm_smmu_ll_queue *q)
    122{
    123	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
    124	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
    125}
    126
    127static bool queue_empty(struct arm_smmu_ll_queue *q)
    128{
    129	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
    130	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
    131}
    132
    133static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
    134{
    135	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
    136		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
    137	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
    138		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
    139}
    140
    141static void queue_sync_cons_out(struct arm_smmu_queue *q)
    142{
    143	/*
    144	 * Ensure that all CPU accesses (reads and writes) to the queue
    145	 * are complete before we update the cons pointer.
    146	 */
    147	__iomb();
    148	writel_relaxed(q->llq.cons, q->cons_reg);
    149}
    150
    151static void queue_inc_cons(struct arm_smmu_ll_queue *q)
    152{
    153	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
    154	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
    155}
    156
    157static int queue_sync_prod_in(struct arm_smmu_queue *q)
    158{
    159	u32 prod;
    160	int ret = 0;
    161
    162	/*
    163	 * We can't use the _relaxed() variant here, as we must prevent
    164	 * speculative reads of the queue before we have determined that
    165	 * prod has indeed moved.
    166	 */
    167	prod = readl(q->prod_reg);
    168
    169	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
    170		ret = -EOVERFLOW;
    171
    172	q->llq.prod = prod;
    173	return ret;
    174}
    175
    176static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
    177{
    178	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
    179	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
    180}
    181
    182static void queue_poll_init(struct arm_smmu_device *smmu,
    183			    struct arm_smmu_queue_poll *qp)
    184{
    185	qp->delay = 1;
    186	qp->spin_cnt = 0;
    187	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
    188	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
    189}
    190
    191static int queue_poll(struct arm_smmu_queue_poll *qp)
    192{
    193	if (ktime_compare(ktime_get(), qp->timeout) > 0)
    194		return -ETIMEDOUT;
    195
    196	if (qp->wfe) {
    197		wfe();
    198	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
    199		cpu_relax();
    200	} else {
    201		udelay(qp->delay);
    202		qp->delay *= 2;
    203		qp->spin_cnt = 0;
    204	}
    205
    206	return 0;
    207}
    208
    209static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
    210{
    211	int i;
    212
    213	for (i = 0; i < n_dwords; ++i)
    214		*dst++ = cpu_to_le64(*src++);
    215}
    216
    217static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
    218{
    219	int i;
    220
    221	for (i = 0; i < n_dwords; ++i)
    222		*dst++ = le64_to_cpu(*src++);
    223}
    224
    225static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
    226{
    227	if (queue_empty(&q->llq))
    228		return -EAGAIN;
    229
    230	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
    231	queue_inc_cons(&q->llq);
    232	queue_sync_cons_out(q);
    233	return 0;
    234}
    235
    236/* High-level queue accessors */
    237static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
    238{
    239	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
    240	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
    241
    242	switch (ent->opcode) {
    243	case CMDQ_OP_TLBI_EL2_ALL:
    244	case CMDQ_OP_TLBI_NSNH_ALL:
    245		break;
    246	case CMDQ_OP_PREFETCH_CFG:
    247		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
    248		break;
    249	case CMDQ_OP_CFGI_CD:
    250		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
    251		fallthrough;
    252	case CMDQ_OP_CFGI_STE:
    253		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
    254		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
    255		break;
    256	case CMDQ_OP_CFGI_CD_ALL:
    257		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
    258		break;
    259	case CMDQ_OP_CFGI_ALL:
    260		/* Cover the entire SID range */
    261		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
    262		break;
    263	case CMDQ_OP_TLBI_NH_VA:
    264		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
    265		fallthrough;
    266	case CMDQ_OP_TLBI_EL2_VA:
    267		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
    268		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
    269		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
    270		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
    271		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
    272		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
    273		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
    274		break;
    275	case CMDQ_OP_TLBI_S2_IPA:
    276		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
    277		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
    278		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
    279		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
    280		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
    281		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
    282		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
    283		break;
    284	case CMDQ_OP_TLBI_NH_ASID:
    285		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
    286		fallthrough;
    287	case CMDQ_OP_TLBI_S12_VMALL:
    288		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
    289		break;
    290	case CMDQ_OP_TLBI_EL2_ASID:
    291		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
    292		break;
    293	case CMDQ_OP_ATC_INV:
    294		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
    295		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
    296		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
    297		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
    298		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
    299		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
    300		break;
    301	case CMDQ_OP_PRI_RESP:
    302		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
    303		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
    304		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
    305		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
    306		switch (ent->pri.resp) {
    307		case PRI_RESP_DENY:
    308		case PRI_RESP_FAIL:
    309		case PRI_RESP_SUCC:
    310			break;
    311		default:
    312			return -EINVAL;
    313		}
    314		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
    315		break;
    316	case CMDQ_OP_RESUME:
    317		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
    318		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
    319		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
    320		break;
    321	case CMDQ_OP_CMD_SYNC:
    322		if (ent->sync.msiaddr) {
    323			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
    324			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
    325		} else {
    326			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
    327		}
    328		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
    329		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
    330		break;
    331	default:
    332		return -ENOENT;
    333	}
    334
    335	return 0;
    336}
    337
    338static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
    339{
    340	return &smmu->cmdq;
    341}
    342
    343static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
    344					 struct arm_smmu_queue *q, u32 prod)
    345{
    346	struct arm_smmu_cmdq_ent ent = {
    347		.opcode = CMDQ_OP_CMD_SYNC,
    348	};
    349
    350	/*
    351	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
    352	 * payload, so the write will zero the entire command on that platform.
    353	 */
    354	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
    355		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
    356				   q->ent_dwords * 8;
    357	}
    358
    359	arm_smmu_cmdq_build_cmd(cmd, &ent);
    360}
    361
    362static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
    363				     struct arm_smmu_queue *q)
    364{
    365	static const char * const cerror_str[] = {
    366		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
    367		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
    368		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
    369		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
    370	};
    371
    372	int i;
    373	u64 cmd[CMDQ_ENT_DWORDS];
    374	u32 cons = readl_relaxed(q->cons_reg);
    375	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
    376	struct arm_smmu_cmdq_ent cmd_sync = {
    377		.opcode = CMDQ_OP_CMD_SYNC,
    378	};
    379
    380	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
    381		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
    382
    383	switch (idx) {
    384	case CMDQ_ERR_CERROR_ABT_IDX:
    385		dev_err(smmu->dev, "retrying command fetch\n");
    386		return;
    387	case CMDQ_ERR_CERROR_NONE_IDX:
    388		return;
    389	case CMDQ_ERR_CERROR_ATC_INV_IDX:
    390		/*
    391		 * ATC Invalidation Completion timeout. CONS is still pointing
    392		 * at the CMD_SYNC. Attempt to complete other pending commands
    393		 * by repeating the CMD_SYNC, though we might well end up back
    394		 * here since the ATC invalidation may still be pending.
    395		 */
    396		return;
    397	case CMDQ_ERR_CERROR_ILL_IDX:
    398	default:
    399		break;
    400	}
    401
    402	/*
    403	 * We may have concurrent producers, so we need to be careful
    404	 * not to touch any of the shadow cmdq state.
    405	 */
    406	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
    407	dev_err(smmu->dev, "skipping command in error state:\n");
    408	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
    409		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
    410
    411	/* Convert the erroneous command into a CMD_SYNC */
    412	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
    413
    414	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
    415}
    416
    417static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
    418{
    419	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
    420}
    421
    422/*
    423 * Command queue locking.
    424 * This is a form of bastardised rwlock with the following major changes:
    425 *
    426 * - The only LOCK routines are exclusive_trylock() and shared_lock().
    427 *   Neither have barrier semantics, and instead provide only a control
    428 *   dependency.
    429 *
    430 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
    431 *   fails if the caller appears to be the last lock holder (yes, this is
    432 *   racy). All successful UNLOCK routines have RELEASE semantics.
    433 */
    434static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
    435{
    436	int val;
    437
    438	/*
    439	 * We can try to avoid the cmpxchg() loop by simply incrementing the
    440	 * lock counter. When held in exclusive state, the lock counter is set
    441	 * to INT_MIN so these increments won't hurt as the value will remain
    442	 * negative.
    443	 */
    444	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
    445		return;
    446
    447	do {
    448		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
    449	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
    450}
    451
    452static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
    453{
    454	(void)atomic_dec_return_release(&cmdq->lock);
    455}
    456
    457static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
    458{
    459	if (atomic_read(&cmdq->lock) == 1)
    460		return false;
    461
    462	arm_smmu_cmdq_shared_unlock(cmdq);
    463	return true;
    464}
    465
    466#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
    467({									\
    468	bool __ret;							\
    469	local_irq_save(flags);						\
    470	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
    471	if (!__ret)							\
    472		local_irq_restore(flags);				\
    473	__ret;								\
    474})
    475
    476#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
    477({									\
    478	atomic_set_release(&cmdq->lock, 0);				\
    479	local_irq_restore(flags);					\
    480})
    481
    482
    483/*
    484 * Command queue insertion.
    485 * This is made fiddly by our attempts to achieve some sort of scalability
    486 * since there is one queue shared amongst all of the CPUs in the system.  If
    487 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
    488 * then you'll *love* this monstrosity.
    489 *
    490 * The basic idea is to split the queue up into ranges of commands that are
    491 * owned by a given CPU; the owner may not have written all of the commands
    492 * itself, but is responsible for advancing the hardware prod pointer when
    493 * the time comes. The algorithm is roughly:
    494 *
    495 * 	1. Allocate some space in the queue. At this point we also discover
    496 *	   whether the head of the queue is currently owned by another CPU,
    497 *	   or whether we are the owner.
    498 *
    499 *	2. Write our commands into our allocated slots in the queue.
    500 *
    501 *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
    502 *
    503 *	4. If we are an owner:
    504 *		a. Wait for the previous owner to finish.
    505 *		b. Mark the queue head as unowned, which tells us the range
    506 *		   that we are responsible for publishing.
    507 *		c. Wait for all commands in our owned range to become valid.
    508 *		d. Advance the hardware prod pointer.
    509 *		e. Tell the next owner we've finished.
    510 *
    511 *	5. If we are inserting a CMD_SYNC (we may or may not have been an
    512 *	   owner), then we need to stick around until it has completed:
    513 *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
    514 *		   to clear the first 4 bytes.
    515 *		b. Otherwise, we spin waiting for the hardware cons pointer to
    516 *		   advance past our command.
    517 *
    518 * The devil is in the details, particularly the use of locking for handling
    519 * SYNC completion and freeing up space in the queue before we think that it is
    520 * full.
    521 */
    522static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
    523					       u32 sprod, u32 eprod, bool set)
    524{
    525	u32 swidx, sbidx, ewidx, ebidx;
    526	struct arm_smmu_ll_queue llq = {
    527		.max_n_shift	= cmdq->q.llq.max_n_shift,
    528		.prod		= sprod,
    529	};
    530
    531	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
    532	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
    533
    534	while (llq.prod != eprod) {
    535		unsigned long mask;
    536		atomic_long_t *ptr;
    537		u32 limit = BITS_PER_LONG;
    538
    539		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
    540		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
    541
    542		ptr = &cmdq->valid_map[swidx];
    543
    544		if ((swidx == ewidx) && (sbidx < ebidx))
    545			limit = ebidx;
    546
    547		mask = GENMASK(limit - 1, sbidx);
    548
    549		/*
    550		 * The valid bit is the inverse of the wrap bit. This means
    551		 * that a zero-initialised queue is invalid and, after marking
    552		 * all entries as valid, they become invalid again when we
    553		 * wrap.
    554		 */
    555		if (set) {
    556			atomic_long_xor(mask, ptr);
    557		} else { /* Poll */
    558			unsigned long valid;
    559
    560			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
    561			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
    562		}
    563
    564		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
    565	}
    566}
    567
    568/* Mark all entries in the range [sprod, eprod) as valid */
    569static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
    570					u32 sprod, u32 eprod)
    571{
    572	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
    573}
    574
    575/* Wait for all entries in the range [sprod, eprod) to become valid */
    576static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
    577					 u32 sprod, u32 eprod)
    578{
    579	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
    580}
    581
    582/* Wait for the command queue to become non-full */
    583static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
    584					     struct arm_smmu_ll_queue *llq)
    585{
    586	unsigned long flags;
    587	struct arm_smmu_queue_poll qp;
    588	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
    589	int ret = 0;
    590
    591	/*
    592	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
    593	 * that fails, spin until somebody else updates it for us.
    594	 */
    595	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
    596		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
    597		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
    598		llq->val = READ_ONCE(cmdq->q.llq.val);
    599		return 0;
    600	}
    601
    602	queue_poll_init(smmu, &qp);
    603	do {
    604		llq->val = READ_ONCE(cmdq->q.llq.val);
    605		if (!queue_full(llq))
    606			break;
    607
    608		ret = queue_poll(&qp);
    609	} while (!ret);
    610
    611	return ret;
    612}
    613
    614/*
    615 * Wait until the SMMU signals a CMD_SYNC completion MSI.
    616 * Must be called with the cmdq lock held in some capacity.
    617 */
    618static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
    619					  struct arm_smmu_ll_queue *llq)
    620{
    621	int ret = 0;
    622	struct arm_smmu_queue_poll qp;
    623	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
    624	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
    625
    626	queue_poll_init(smmu, &qp);
    627
    628	/*
    629	 * The MSI won't generate an event, since it's being written back
    630	 * into the command queue.
    631	 */
    632	qp.wfe = false;
    633	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
    634	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
    635	return ret;
    636}
    637
    638/*
    639 * Wait until the SMMU cons index passes llq->prod.
    640 * Must be called with the cmdq lock held in some capacity.
    641 */
    642static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
    643					       struct arm_smmu_ll_queue *llq)
    644{
    645	struct arm_smmu_queue_poll qp;
    646	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
    647	u32 prod = llq->prod;
    648	int ret = 0;
    649
    650	queue_poll_init(smmu, &qp);
    651	llq->val = READ_ONCE(cmdq->q.llq.val);
    652	do {
    653		if (queue_consumed(llq, prod))
    654			break;
    655
    656		ret = queue_poll(&qp);
    657
    658		/*
    659		 * This needs to be a readl() so that our subsequent call
    660		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
    661		 *
    662		 * Specifically, we need to ensure that we observe all
    663		 * shared_lock()s by other CMD_SYNCs that share our owner,
    664		 * so that a failing call to tryunlock() means that we're
    665		 * the last one out and therefore we can safely advance
    666		 * cmdq->q.llq.cons. Roughly speaking:
    667		 *
    668		 * CPU 0		CPU1			CPU2 (us)
    669		 *
    670		 * if (sync)
    671		 * 	shared_lock();
    672		 *
    673		 * dma_wmb();
    674		 * set_valid_map();
    675		 *
    676		 * 			if (owner) {
    677		 *				poll_valid_map();
    678		 *				<control dependency>
    679		 *				writel(prod_reg);
    680		 *
    681		 *						readl(cons_reg);
    682		 *						tryunlock();
    683		 *
    684		 * Requires us to see CPU 0's shared_lock() acquisition.
    685		 */
    686		llq->cons = readl(cmdq->q.cons_reg);
    687	} while (!ret);
    688
    689	return ret;
    690}
    691
    692static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
    693					 struct arm_smmu_ll_queue *llq)
    694{
    695	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
    696		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
    697
    698	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
    699}
    700
    701static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
    702					u32 prod, int n)
    703{
    704	int i;
    705	struct arm_smmu_ll_queue llq = {
    706		.max_n_shift	= cmdq->q.llq.max_n_shift,
    707		.prod		= prod,
    708	};
    709
    710	for (i = 0; i < n; ++i) {
    711		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
    712
    713		prod = queue_inc_prod_n(&llq, i);
    714		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
    715	}
    716}
    717
    718/*
    719 * This is the actual insertion function, and provides the following
    720 * ordering guarantees to callers:
    721 *
    722 * - There is a dma_wmb() before publishing any commands to the queue.
    723 *   This can be relied upon to order prior writes to data structures
    724 *   in memory (such as a CD or an STE) before the command.
    725 *
    726 * - On completion of a CMD_SYNC, there is a control dependency.
    727 *   This can be relied upon to order subsequent writes to memory (e.g.
    728 *   freeing an IOVA) after completion of the CMD_SYNC.
    729 *
    730 * - Command insertion is totally ordered, so if two CPUs each race to
    731 *   insert their own list of commands then all of the commands from one
    732 *   CPU will appear before any of the commands from the other CPU.
    733 */
    734static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
    735				       u64 *cmds, int n, bool sync)
    736{
    737	u64 cmd_sync[CMDQ_ENT_DWORDS];
    738	u32 prod;
    739	unsigned long flags;
    740	bool owner;
    741	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
    742	struct arm_smmu_ll_queue llq, head;
    743	int ret = 0;
    744
    745	llq.max_n_shift = cmdq->q.llq.max_n_shift;
    746
    747	/* 1. Allocate some space in the queue */
    748	local_irq_save(flags);
    749	llq.val = READ_ONCE(cmdq->q.llq.val);
    750	do {
    751		u64 old;
    752
    753		while (!queue_has_space(&llq, n + sync)) {
    754			local_irq_restore(flags);
    755			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
    756				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
    757			local_irq_save(flags);
    758		}
    759
    760		head.cons = llq.cons;
    761		head.prod = queue_inc_prod_n(&llq, n + sync) |
    762					     CMDQ_PROD_OWNED_FLAG;
    763
    764		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
    765		if (old == llq.val)
    766			break;
    767
    768		llq.val = old;
    769	} while (1);
    770	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
    771	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
    772	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
    773
    774	/*
    775	 * 2. Write our commands into the queue
    776	 * Dependency ordering from the cmpxchg() loop above.
    777	 */
    778	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
    779	if (sync) {
    780		prod = queue_inc_prod_n(&llq, n);
    781		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
    782		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
    783
    784		/*
    785		 * In order to determine completion of our CMD_SYNC, we must
    786		 * ensure that the queue can't wrap twice without us noticing.
    787		 * We achieve that by taking the cmdq lock as shared before
    788		 * marking our slot as valid.
    789		 */
    790		arm_smmu_cmdq_shared_lock(cmdq);
    791	}
    792
    793	/* 3. Mark our slots as valid, ensuring commands are visible first */
    794	dma_wmb();
    795	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
    796
    797	/* 4. If we are the owner, take control of the SMMU hardware */
    798	if (owner) {
    799		/* a. Wait for previous owner to finish */
    800		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
    801
    802		/* b. Stop gathering work by clearing the owned flag */
    803		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
    804						   &cmdq->q.llq.atomic.prod);
    805		prod &= ~CMDQ_PROD_OWNED_FLAG;
    806
    807		/*
    808		 * c. Wait for any gathered work to be written to the queue.
    809		 * Note that we read our own entries so that we have the control
    810		 * dependency required by (d).
    811		 */
    812		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
    813
    814		/*
    815		 * d. Advance the hardware prod pointer
    816		 * Control dependency ordering from the entries becoming valid.
    817		 */
    818		writel_relaxed(prod, cmdq->q.prod_reg);
    819
    820		/*
    821		 * e. Tell the next owner we're done
    822		 * Make sure we've updated the hardware first, so that we don't
    823		 * race to update prod and potentially move it backwards.
    824		 */
    825		atomic_set_release(&cmdq->owner_prod, prod);
    826	}
    827
    828	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
    829	if (sync) {
    830		llq.prod = queue_inc_prod_n(&llq, n);
    831		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
    832		if (ret) {
    833			dev_err_ratelimited(smmu->dev,
    834					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
    835					    llq.prod,
    836					    readl_relaxed(cmdq->q.prod_reg),
    837					    readl_relaxed(cmdq->q.cons_reg));
    838		}
    839
    840		/*
    841		 * Try to unlock the cmdq lock. This will fail if we're the last
    842		 * reader, in which case we can safely update cmdq->q.llq.cons
    843		 */
    844		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
    845			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
    846			arm_smmu_cmdq_shared_unlock(cmdq);
    847		}
    848	}
    849
    850	local_irq_restore(flags);
    851	return ret;
    852}
    853
    854static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
    855				     struct arm_smmu_cmdq_ent *ent,
    856				     bool sync)
    857{
    858	u64 cmd[CMDQ_ENT_DWORDS];
    859
    860	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
    861		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
    862			 ent->opcode);
    863		return -EINVAL;
    864	}
    865
    866	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
    867}
    868
    869static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
    870				   struct arm_smmu_cmdq_ent *ent)
    871{
    872	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
    873}
    874
    875static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
    876					     struct arm_smmu_cmdq_ent *ent)
    877{
    878	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
    879}
    880
    881static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
    882				    struct arm_smmu_cmdq_batch *cmds,
    883				    struct arm_smmu_cmdq_ent *cmd)
    884{
    885	int index;
    886
    887	if (cmds->num == CMDQ_BATCH_ENTRIES) {
    888		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
    889		cmds->num = 0;
    890	}
    891
    892	index = cmds->num * CMDQ_ENT_DWORDS;
    893	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
    894		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
    895			 cmd->opcode);
    896		return;
    897	}
    898
    899	cmds->num++;
    900}
    901
    902static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
    903				      struct arm_smmu_cmdq_batch *cmds)
    904{
    905	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
    906}
    907
    908static int arm_smmu_page_response(struct device *dev,
    909				  struct iommu_fault_event *unused,
    910				  struct iommu_page_response *resp)
    911{
    912	struct arm_smmu_cmdq_ent cmd = {0};
    913	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
    914	int sid = master->streams[0].id;
    915
    916	if (master->stall_enabled) {
    917		cmd.opcode		= CMDQ_OP_RESUME;
    918		cmd.resume.sid		= sid;
    919		cmd.resume.stag		= resp->grpid;
    920		switch (resp->code) {
    921		case IOMMU_PAGE_RESP_INVALID:
    922		case IOMMU_PAGE_RESP_FAILURE:
    923			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
    924			break;
    925		case IOMMU_PAGE_RESP_SUCCESS:
    926			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
    927			break;
    928		default:
    929			return -EINVAL;
    930		}
    931	} else {
    932		return -ENODEV;
    933	}
    934
    935	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
    936	/*
    937	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
    938	 * RESUME consumption guarantees that the stalled transaction will be
    939	 * terminated... at some point in the future. PRI_RESP is fire and
    940	 * forget.
    941	 */
    942
    943	return 0;
    944}
    945
    946/* Context descriptor manipulation functions */
    947void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
    948{
    949	struct arm_smmu_cmdq_ent cmd = {
    950		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
    951			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
    952		.tlbi.asid = asid,
    953	};
    954
    955	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
    956}
    957
    958static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
    959			     int ssid, bool leaf)
    960{
    961	size_t i;
    962	unsigned long flags;
    963	struct arm_smmu_master *master;
    964	struct arm_smmu_cmdq_batch cmds;
    965	struct arm_smmu_device *smmu = smmu_domain->smmu;
    966	struct arm_smmu_cmdq_ent cmd = {
    967		.opcode	= CMDQ_OP_CFGI_CD,
    968		.cfgi	= {
    969			.ssid	= ssid,
    970			.leaf	= leaf,
    971		},
    972	};
    973
    974	cmds.num = 0;
    975
    976	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
    977	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
    978		for (i = 0; i < master->num_streams; i++) {
    979			cmd.cfgi.sid = master->streams[i].id;
    980			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
    981		}
    982	}
    983	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
    984
    985	arm_smmu_cmdq_batch_submit(smmu, &cmds);
    986}
    987
    988static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
    989					struct arm_smmu_l1_ctx_desc *l1_desc)
    990{
    991	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
    992
    993	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
    994					     &l1_desc->l2ptr_dma, GFP_KERNEL);
    995	if (!l1_desc->l2ptr) {
    996		dev_warn(smmu->dev,
    997			 "failed to allocate context descriptor table\n");
    998		return -ENOMEM;
    999	}
   1000	return 0;
   1001}
   1002
   1003static void arm_smmu_write_cd_l1_desc(__le64 *dst,
   1004				      struct arm_smmu_l1_ctx_desc *l1_desc)
   1005{
   1006	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
   1007		  CTXDESC_L1_DESC_V;
   1008
   1009	/* See comment in arm_smmu_write_ctx_desc() */
   1010	WRITE_ONCE(*dst, cpu_to_le64(val));
   1011}
   1012
   1013static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
   1014				   u32 ssid)
   1015{
   1016	__le64 *l1ptr;
   1017	unsigned int idx;
   1018	struct arm_smmu_l1_ctx_desc *l1_desc;
   1019	struct arm_smmu_device *smmu = smmu_domain->smmu;
   1020	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
   1021
   1022	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
   1023		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
   1024
   1025	idx = ssid >> CTXDESC_SPLIT;
   1026	l1_desc = &cdcfg->l1_desc[idx];
   1027	if (!l1_desc->l2ptr) {
   1028		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
   1029			return NULL;
   1030
   1031		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
   1032		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
   1033		/* An invalid L1CD can be cached */
   1034		arm_smmu_sync_cd(smmu_domain, ssid, false);
   1035	}
   1036	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
   1037	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
   1038}
   1039
   1040int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
   1041			    struct arm_smmu_ctx_desc *cd)
   1042{
   1043	/*
   1044	 * This function handles the following cases:
   1045	 *
   1046	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
   1047	 * (2) Install a secondary CD, for SID+SSID traffic.
   1048	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
   1049	 *     CD, then invalidate the old entry and mappings.
   1050	 * (4) Quiesce the context without clearing the valid bit. Disable
   1051	 *     translation, and ignore any translation fault.
   1052	 * (5) Remove a secondary CD.
   1053	 */
   1054	u64 val;
   1055	bool cd_live;
   1056	__le64 *cdptr;
   1057
   1058	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
   1059		return -E2BIG;
   1060
   1061	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
   1062	if (!cdptr)
   1063		return -ENOMEM;
   1064
   1065	val = le64_to_cpu(cdptr[0]);
   1066	cd_live = !!(val & CTXDESC_CD_0_V);
   1067
   1068	if (!cd) { /* (5) */
   1069		val = 0;
   1070	} else if (cd == &quiet_cd) { /* (4) */
   1071		val |= CTXDESC_CD_0_TCR_EPD0;
   1072	} else if (cd_live) { /* (3) */
   1073		val &= ~CTXDESC_CD_0_ASID;
   1074		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
   1075		/*
   1076		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
   1077		 * this substream's traffic
   1078		 */
   1079	} else { /* (1) and (2) */
   1080		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
   1081		cdptr[2] = 0;
   1082		cdptr[3] = cpu_to_le64(cd->mair);
   1083
   1084		/*
   1085		 * STE is live, and the SMMU might read dwords of this CD in any
   1086		 * order. Ensure that it observes valid values before reading
   1087		 * V=1.
   1088		 */
   1089		arm_smmu_sync_cd(smmu_domain, ssid, true);
   1090
   1091		val = cd->tcr |
   1092#ifdef __BIG_ENDIAN
   1093			CTXDESC_CD_0_ENDI |
   1094#endif
   1095			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
   1096			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
   1097			CTXDESC_CD_0_AA64 |
   1098			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
   1099			CTXDESC_CD_0_V;
   1100
   1101		if (smmu_domain->stall_enabled)
   1102			val |= CTXDESC_CD_0_S;
   1103	}
   1104
   1105	/*
   1106	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
   1107	 * "Configuration structures and configuration invalidation completion"
   1108	 *
   1109	 *   The size of single-copy atomic reads made by the SMMU is
   1110	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
   1111	 *   field within an aligned 64-bit span of a structure can be altered
   1112	 *   without first making the structure invalid.
   1113	 */
   1114	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
   1115	arm_smmu_sync_cd(smmu_domain, ssid, true);
   1116	return 0;
   1117}
   1118
   1119static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
   1120{
   1121	int ret;
   1122	size_t l1size;
   1123	size_t max_contexts;
   1124	struct arm_smmu_device *smmu = smmu_domain->smmu;
   1125	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
   1126	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
   1127
   1128	max_contexts = 1 << cfg->s1cdmax;
   1129
   1130	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
   1131	    max_contexts <= CTXDESC_L2_ENTRIES) {
   1132		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
   1133		cdcfg->num_l1_ents = max_contexts;
   1134
   1135		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
   1136	} else {
   1137		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
   1138		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
   1139						  CTXDESC_L2_ENTRIES);
   1140
   1141		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
   1142					      sizeof(*cdcfg->l1_desc),
   1143					      GFP_KERNEL);
   1144		if (!cdcfg->l1_desc)
   1145			return -ENOMEM;
   1146
   1147		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
   1148	}
   1149
   1150	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
   1151					   GFP_KERNEL);
   1152	if (!cdcfg->cdtab) {
   1153		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
   1154		ret = -ENOMEM;
   1155		goto err_free_l1;
   1156	}
   1157
   1158	return 0;
   1159
   1160err_free_l1:
   1161	if (cdcfg->l1_desc) {
   1162		devm_kfree(smmu->dev, cdcfg->l1_desc);
   1163		cdcfg->l1_desc = NULL;
   1164	}
   1165	return ret;
   1166}
   1167
   1168static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
   1169{
   1170	int i;
   1171	size_t size, l1size;
   1172	struct arm_smmu_device *smmu = smmu_domain->smmu;
   1173	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
   1174
   1175	if (cdcfg->l1_desc) {
   1176		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
   1177
   1178		for (i = 0; i < cdcfg->num_l1_ents; i++) {
   1179			if (!cdcfg->l1_desc[i].l2ptr)
   1180				continue;
   1181
   1182			dmam_free_coherent(smmu->dev, size,
   1183					   cdcfg->l1_desc[i].l2ptr,
   1184					   cdcfg->l1_desc[i].l2ptr_dma);
   1185		}
   1186		devm_kfree(smmu->dev, cdcfg->l1_desc);
   1187		cdcfg->l1_desc = NULL;
   1188
   1189		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
   1190	} else {
   1191		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
   1192	}
   1193
   1194	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
   1195	cdcfg->cdtab_dma = 0;
   1196	cdcfg->cdtab = NULL;
   1197}
   1198
   1199bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
   1200{
   1201	bool free;
   1202	struct arm_smmu_ctx_desc *old_cd;
   1203
   1204	if (!cd->asid)
   1205		return false;
   1206
   1207	free = refcount_dec_and_test(&cd->refs);
   1208	if (free) {
   1209		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
   1210		WARN_ON(old_cd != cd);
   1211	}
   1212	return free;
   1213}
   1214
   1215/* Stream table manipulation functions */
   1216static void
   1217arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
   1218{
   1219	u64 val = 0;
   1220
   1221	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
   1222	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
   1223
   1224	/* See comment in arm_smmu_write_ctx_desc() */
   1225	WRITE_ONCE(*dst, cpu_to_le64(val));
   1226}
   1227
   1228static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
   1229{
   1230	struct arm_smmu_cmdq_ent cmd = {
   1231		.opcode	= CMDQ_OP_CFGI_STE,
   1232		.cfgi	= {
   1233			.sid	= sid,
   1234			.leaf	= true,
   1235		},
   1236	};
   1237
   1238	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
   1239}
   1240
   1241static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
   1242				      __le64 *dst)
   1243{
   1244	/*
   1245	 * This is hideously complicated, but we only really care about
   1246	 * three cases at the moment:
   1247	 *
   1248	 * 1. Invalid (all zero) -> bypass/fault (init)
   1249	 * 2. Bypass/fault -> translation/bypass (attach)
   1250	 * 3. Translation/bypass -> bypass/fault (detach)
   1251	 *
   1252	 * Given that we can't update the STE atomically and the SMMU
   1253	 * doesn't read the thing in a defined order, that leaves us
   1254	 * with the following maintenance requirements:
   1255	 *
   1256	 * 1. Update Config, return (init time STEs aren't live)
   1257	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
   1258	 * 3. Update Config, sync
   1259	 */
   1260	u64 val = le64_to_cpu(dst[0]);
   1261	bool ste_live = false;
   1262	struct arm_smmu_device *smmu = NULL;
   1263	struct arm_smmu_s1_cfg *s1_cfg = NULL;
   1264	struct arm_smmu_s2_cfg *s2_cfg = NULL;
   1265	struct arm_smmu_domain *smmu_domain = NULL;
   1266	struct arm_smmu_cmdq_ent prefetch_cmd = {
   1267		.opcode		= CMDQ_OP_PREFETCH_CFG,
   1268		.prefetch	= {
   1269			.sid	= sid,
   1270		},
   1271	};
   1272
   1273	if (master) {
   1274		smmu_domain = master->domain;
   1275		smmu = master->smmu;
   1276	}
   1277
   1278	if (smmu_domain) {
   1279		switch (smmu_domain->stage) {
   1280		case ARM_SMMU_DOMAIN_S1:
   1281			s1_cfg = &smmu_domain->s1_cfg;
   1282			break;
   1283		case ARM_SMMU_DOMAIN_S2:
   1284		case ARM_SMMU_DOMAIN_NESTED:
   1285			s2_cfg = &smmu_domain->s2_cfg;
   1286			break;
   1287		default:
   1288			break;
   1289		}
   1290	}
   1291
   1292	if (val & STRTAB_STE_0_V) {
   1293		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
   1294		case STRTAB_STE_0_CFG_BYPASS:
   1295			break;
   1296		case STRTAB_STE_0_CFG_S1_TRANS:
   1297		case STRTAB_STE_0_CFG_S2_TRANS:
   1298			ste_live = true;
   1299			break;
   1300		case STRTAB_STE_0_CFG_ABORT:
   1301			BUG_ON(!disable_bypass);
   1302			break;
   1303		default:
   1304			BUG(); /* STE corruption */
   1305		}
   1306	}
   1307
   1308	/* Nuke the existing STE_0 value, as we're going to rewrite it */
   1309	val = STRTAB_STE_0_V;
   1310
   1311	/* Bypass/fault */
   1312	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
   1313		if (!smmu_domain && disable_bypass)
   1314			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
   1315		else
   1316			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
   1317
   1318		dst[0] = cpu_to_le64(val);
   1319		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
   1320						STRTAB_STE_1_SHCFG_INCOMING));
   1321		dst[2] = 0; /* Nuke the VMID */
   1322		/*
   1323		 * The SMMU can perform negative caching, so we must sync
   1324		 * the STE regardless of whether the old value was live.
   1325		 */
   1326		if (smmu)
   1327			arm_smmu_sync_ste_for_sid(smmu, sid);
   1328		return;
   1329	}
   1330
   1331	if (s1_cfg) {
   1332		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
   1333			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
   1334
   1335		BUG_ON(ste_live);
   1336		dst[1] = cpu_to_le64(
   1337			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
   1338			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
   1339			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
   1340			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
   1341			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
   1342
   1343		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
   1344		    !master->stall_enabled)
   1345			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
   1346
   1347		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
   1348			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
   1349			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
   1350			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
   1351	}
   1352
   1353	if (s2_cfg) {
   1354		BUG_ON(ste_live);
   1355		dst[2] = cpu_to_le64(
   1356			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
   1357			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
   1358#ifdef __BIG_ENDIAN
   1359			 STRTAB_STE_2_S2ENDI |
   1360#endif
   1361			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
   1362			 STRTAB_STE_2_S2R);
   1363
   1364		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
   1365
   1366		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
   1367	}
   1368
   1369	if (master->ats_enabled)
   1370		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
   1371						 STRTAB_STE_1_EATS_TRANS));
   1372
   1373	arm_smmu_sync_ste_for_sid(smmu, sid);
   1374	/* See comment in arm_smmu_write_ctx_desc() */
   1375	WRITE_ONCE(dst[0], cpu_to_le64(val));
   1376	arm_smmu_sync_ste_for_sid(smmu, sid);
   1377
   1378	/* It's likely that we'll want to use the new STE soon */
   1379	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
   1380		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
   1381}
   1382
   1383static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
   1384{
   1385	unsigned int i;
   1386
   1387	for (i = 0; i < nent; ++i) {
   1388		arm_smmu_write_strtab_ent(NULL, -1, strtab);
   1389		strtab += STRTAB_STE_DWORDS;
   1390	}
   1391}
   1392
   1393static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
   1394{
   1395	size_t size;
   1396	void *strtab;
   1397	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
   1398	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
   1399
   1400	if (desc->l2ptr)
   1401		return 0;
   1402
   1403	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
   1404	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
   1405
   1406	desc->span = STRTAB_SPLIT + 1;
   1407	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
   1408					  GFP_KERNEL);
   1409	if (!desc->l2ptr) {
   1410		dev_err(smmu->dev,
   1411			"failed to allocate l2 stream table for SID %u\n",
   1412			sid);
   1413		return -ENOMEM;
   1414	}
   1415
   1416	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
   1417	arm_smmu_write_strtab_l1_desc(strtab, desc);
   1418	return 0;
   1419}
   1420
   1421static struct arm_smmu_master *
   1422arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
   1423{
   1424	struct rb_node *node;
   1425	struct arm_smmu_stream *stream;
   1426
   1427	lockdep_assert_held(&smmu->streams_mutex);
   1428
   1429	node = smmu->streams.rb_node;
   1430	while (node) {
   1431		stream = rb_entry(node, struct arm_smmu_stream, node);
   1432		if (stream->id < sid)
   1433			node = node->rb_right;
   1434		else if (stream->id > sid)
   1435			node = node->rb_left;
   1436		else
   1437			return stream->master;
   1438	}
   1439
   1440	return NULL;
   1441}
   1442
   1443/* IRQ and event handlers */
   1444static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
   1445{
   1446	int ret;
   1447	u32 reason;
   1448	u32 perm = 0;
   1449	struct arm_smmu_master *master;
   1450	bool ssid_valid = evt[0] & EVTQ_0_SSV;
   1451	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
   1452	struct iommu_fault_event fault_evt = { };
   1453	struct iommu_fault *flt = &fault_evt.fault;
   1454
   1455	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
   1456	case EVT_ID_TRANSLATION_FAULT:
   1457		reason = IOMMU_FAULT_REASON_PTE_FETCH;
   1458		break;
   1459	case EVT_ID_ADDR_SIZE_FAULT:
   1460		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
   1461		break;
   1462	case EVT_ID_ACCESS_FAULT:
   1463		reason = IOMMU_FAULT_REASON_ACCESS;
   1464		break;
   1465	case EVT_ID_PERMISSION_FAULT:
   1466		reason = IOMMU_FAULT_REASON_PERMISSION;
   1467		break;
   1468	default:
   1469		return -EOPNOTSUPP;
   1470	}
   1471
   1472	/* Stage-2 is always pinned at the moment */
   1473	if (evt[1] & EVTQ_1_S2)
   1474		return -EFAULT;
   1475
   1476	if (evt[1] & EVTQ_1_RnW)
   1477		perm |= IOMMU_FAULT_PERM_READ;
   1478	else
   1479		perm |= IOMMU_FAULT_PERM_WRITE;
   1480
   1481	if (evt[1] & EVTQ_1_InD)
   1482		perm |= IOMMU_FAULT_PERM_EXEC;
   1483
   1484	if (evt[1] & EVTQ_1_PnU)
   1485		perm |= IOMMU_FAULT_PERM_PRIV;
   1486
   1487	if (evt[1] & EVTQ_1_STALL) {
   1488		flt->type = IOMMU_FAULT_PAGE_REQ;
   1489		flt->prm = (struct iommu_fault_page_request) {
   1490			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
   1491			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
   1492			.perm = perm,
   1493			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
   1494		};
   1495
   1496		if (ssid_valid) {
   1497			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
   1498			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
   1499		}
   1500	} else {
   1501		flt->type = IOMMU_FAULT_DMA_UNRECOV;
   1502		flt->event = (struct iommu_fault_unrecoverable) {
   1503			.reason = reason,
   1504			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
   1505			.perm = perm,
   1506			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
   1507		};
   1508
   1509		if (ssid_valid) {
   1510			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
   1511			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
   1512		}
   1513	}
   1514
   1515	mutex_lock(&smmu->streams_mutex);
   1516	master = arm_smmu_find_master(smmu, sid);
   1517	if (!master) {
   1518		ret = -EINVAL;
   1519		goto out_unlock;
   1520	}
   1521
   1522	ret = iommu_report_device_fault(master->dev, &fault_evt);
   1523	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
   1524		/* Nobody cared, abort the access */
   1525		struct iommu_page_response resp = {
   1526			.pasid		= flt->prm.pasid,
   1527			.grpid		= flt->prm.grpid,
   1528			.code		= IOMMU_PAGE_RESP_FAILURE,
   1529		};
   1530		arm_smmu_page_response(master->dev, &fault_evt, &resp);
   1531	}
   1532
   1533out_unlock:
   1534	mutex_unlock(&smmu->streams_mutex);
   1535	return ret;
   1536}
   1537
   1538static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
   1539{
   1540	int i, ret;
   1541	struct arm_smmu_device *smmu = dev;
   1542	struct arm_smmu_queue *q = &smmu->evtq.q;
   1543	struct arm_smmu_ll_queue *llq = &q->llq;
   1544	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
   1545				      DEFAULT_RATELIMIT_BURST);
   1546	u64 evt[EVTQ_ENT_DWORDS];
   1547
   1548	do {
   1549		while (!queue_remove_raw(q, evt)) {
   1550			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
   1551
   1552			ret = arm_smmu_handle_evt(smmu, evt);
   1553			if (!ret || !__ratelimit(&rs))
   1554				continue;
   1555
   1556			dev_info(smmu->dev, "event 0x%02x received:\n", id);
   1557			for (i = 0; i < ARRAY_SIZE(evt); ++i)
   1558				dev_info(smmu->dev, "\t0x%016llx\n",
   1559					 (unsigned long long)evt[i]);
   1560
   1561			cond_resched();
   1562		}
   1563
   1564		/*
   1565		 * Not much we can do on overflow, so scream and pretend we're
   1566		 * trying harder.
   1567		 */
   1568		if (queue_sync_prod_in(q) == -EOVERFLOW)
   1569			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
   1570	} while (!queue_empty(llq));
   1571
   1572	/* Sync our overflow flag, as we believe we're up to speed */
   1573	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
   1574		    Q_IDX(llq, llq->cons);
   1575	return IRQ_HANDLED;
   1576}
   1577
   1578static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
   1579{
   1580	u32 sid, ssid;
   1581	u16 grpid;
   1582	bool ssv, last;
   1583
   1584	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
   1585	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
   1586	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
   1587	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
   1588	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
   1589
   1590	dev_info(smmu->dev, "unexpected PRI request received:\n");
   1591	dev_info(smmu->dev,
   1592		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
   1593		 sid, ssid, grpid, last ? "L" : "",
   1594		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
   1595		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
   1596		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
   1597		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
   1598		 evt[1] & PRIQ_1_ADDR_MASK);
   1599
   1600	if (last) {
   1601		struct arm_smmu_cmdq_ent cmd = {
   1602			.opcode			= CMDQ_OP_PRI_RESP,
   1603			.substream_valid	= ssv,
   1604			.pri			= {
   1605				.sid	= sid,
   1606				.ssid	= ssid,
   1607				.grpid	= grpid,
   1608				.resp	= PRI_RESP_DENY,
   1609			},
   1610		};
   1611
   1612		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
   1613	}
   1614}
   1615
   1616static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
   1617{
   1618	struct arm_smmu_device *smmu = dev;
   1619	struct arm_smmu_queue *q = &smmu->priq.q;
   1620	struct arm_smmu_ll_queue *llq = &q->llq;
   1621	u64 evt[PRIQ_ENT_DWORDS];
   1622
   1623	do {
   1624		while (!queue_remove_raw(q, evt))
   1625			arm_smmu_handle_ppr(smmu, evt);
   1626
   1627		if (queue_sync_prod_in(q) == -EOVERFLOW)
   1628			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
   1629	} while (!queue_empty(llq));
   1630
   1631	/* Sync our overflow flag, as we believe we're up to speed */
   1632	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
   1633		      Q_IDX(llq, llq->cons);
   1634	queue_sync_cons_out(q);
   1635	return IRQ_HANDLED;
   1636}
   1637
   1638static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
   1639
   1640static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
   1641{
   1642	u32 gerror, gerrorn, active;
   1643	struct arm_smmu_device *smmu = dev;
   1644
   1645	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
   1646	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
   1647
   1648	active = gerror ^ gerrorn;
   1649	if (!(active & GERROR_ERR_MASK))
   1650		return IRQ_NONE; /* No errors pending */
   1651
   1652	dev_warn(smmu->dev,
   1653		 "unexpected global error reported (0x%08x), this could be serious\n",
   1654		 active);
   1655
   1656	if (active & GERROR_SFM_ERR) {
   1657		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
   1658		arm_smmu_device_disable(smmu);
   1659	}
   1660
   1661	if (active & GERROR_MSI_GERROR_ABT_ERR)
   1662		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
   1663
   1664	if (active & GERROR_MSI_PRIQ_ABT_ERR)
   1665		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
   1666
   1667	if (active & GERROR_MSI_EVTQ_ABT_ERR)
   1668		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
   1669
   1670	if (active & GERROR_MSI_CMDQ_ABT_ERR)
   1671		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
   1672
   1673	if (active & GERROR_PRIQ_ABT_ERR)
   1674		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
   1675
   1676	if (active & GERROR_EVTQ_ABT_ERR)
   1677		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
   1678
   1679	if (active & GERROR_CMDQ_ERR)
   1680		arm_smmu_cmdq_skip_err(smmu);
   1681
   1682	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
   1683	return IRQ_HANDLED;
   1684}
   1685
   1686static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
   1687{
   1688	struct arm_smmu_device *smmu = dev;
   1689
   1690	arm_smmu_evtq_thread(irq, dev);
   1691	if (smmu->features & ARM_SMMU_FEAT_PRI)
   1692		arm_smmu_priq_thread(irq, dev);
   1693
   1694	return IRQ_HANDLED;
   1695}
   1696
   1697static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
   1698{
   1699	arm_smmu_gerror_handler(irq, dev);
   1700	return IRQ_WAKE_THREAD;
   1701}
   1702
   1703static void
   1704arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
   1705			struct arm_smmu_cmdq_ent *cmd)
   1706{
   1707	size_t log2_span;
   1708	size_t span_mask;
   1709	/* ATC invalidates are always on 4096-bytes pages */
   1710	size_t inval_grain_shift = 12;
   1711	unsigned long page_start, page_end;
   1712
   1713	/*
   1714	 * ATS and PASID:
   1715	 *
   1716	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
   1717	 * prefix. In that case all ATC entries within the address range are
   1718	 * invalidated, including those that were requested with a PASID! There
   1719	 * is no way to invalidate only entries without PASID.
   1720	 *
   1721	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
   1722	 * traffic), translation requests without PASID create ATC entries
   1723	 * without PASID, which must be invalidated with substream_valid clear.
   1724	 * This has the unpleasant side-effect of invalidating all PASID-tagged
   1725	 * ATC entries within the address range.
   1726	 */
   1727	*cmd = (struct arm_smmu_cmdq_ent) {
   1728		.opcode			= CMDQ_OP_ATC_INV,
   1729		.substream_valid	= !!ssid,
   1730		.atc.ssid		= ssid,
   1731	};
   1732
   1733	if (!size) {
   1734		cmd->atc.size = ATC_INV_SIZE_ALL;
   1735		return;
   1736	}
   1737
   1738	page_start	= iova >> inval_grain_shift;
   1739	page_end	= (iova + size - 1) >> inval_grain_shift;
   1740
   1741	/*
   1742	 * In an ATS Invalidate Request, the address must be aligned on the
   1743	 * range size, which must be a power of two number of page sizes. We
   1744	 * thus have to choose between grossly over-invalidating the region, or
   1745	 * splitting the invalidation into multiple commands. For simplicity
   1746	 * we'll go with the first solution, but should refine it in the future
   1747	 * if multiple commands are shown to be more efficient.
   1748	 *
   1749	 * Find the smallest power of two that covers the range. The most
   1750	 * significant differing bit between the start and end addresses,
   1751	 * fls(start ^ end), indicates the required span. For example:
   1752	 *
   1753	 * We want to invalidate pages [8; 11]. This is already the ideal range:
   1754	 *		x = 0b1000 ^ 0b1011 = 0b11
   1755	 *		span = 1 << fls(x) = 4
   1756	 *
   1757	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
   1758	 *		x = 0b0111 ^ 0b1010 = 0b1101
   1759	 *		span = 1 << fls(x) = 16
   1760	 */
   1761	log2_span	= fls_long(page_start ^ page_end);
   1762	span_mask	= (1ULL << log2_span) - 1;
   1763
   1764	page_start	&= ~span_mask;
   1765
   1766	cmd->atc.addr	= page_start << inval_grain_shift;
   1767	cmd->atc.size	= log2_span;
   1768}
   1769
   1770static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
   1771{
   1772	int i;
   1773	struct arm_smmu_cmdq_ent cmd;
   1774	struct arm_smmu_cmdq_batch cmds;
   1775
   1776	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
   1777
   1778	cmds.num = 0;
   1779	for (i = 0; i < master->num_streams; i++) {
   1780		cmd.atc.sid = master->streams[i].id;
   1781		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
   1782	}
   1783
   1784	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
   1785}
   1786
   1787int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
   1788			    unsigned long iova, size_t size)
   1789{
   1790	int i;
   1791	unsigned long flags;
   1792	struct arm_smmu_cmdq_ent cmd;
   1793	struct arm_smmu_master *master;
   1794	struct arm_smmu_cmdq_batch cmds;
   1795
   1796	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
   1797		return 0;
   1798
   1799	/*
   1800	 * Ensure that we've completed prior invalidation of the main TLBs
   1801	 * before we read 'nr_ats_masters' in case of a concurrent call to
   1802	 * arm_smmu_enable_ats():
   1803	 *
   1804	 *	// unmap()			// arm_smmu_enable_ats()
   1805	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
   1806	 *	smp_mb();			[...]
   1807	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
   1808	 *
   1809	 * Ensures that we always see the incremented 'nr_ats_masters' count if
   1810	 * ATS was enabled at the PCI device before completion of the TLBI.
   1811	 */
   1812	smp_mb();
   1813	if (!atomic_read(&smmu_domain->nr_ats_masters))
   1814		return 0;
   1815
   1816	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
   1817
   1818	cmds.num = 0;
   1819
   1820	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
   1821	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
   1822		if (!master->ats_enabled)
   1823			continue;
   1824
   1825		for (i = 0; i < master->num_streams; i++) {
   1826			cmd.atc.sid = master->streams[i].id;
   1827			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
   1828		}
   1829	}
   1830	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
   1831
   1832	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
   1833}
   1834
   1835/* IO_PGTABLE API */
   1836static void arm_smmu_tlb_inv_context(void *cookie)
   1837{
   1838	struct arm_smmu_domain *smmu_domain = cookie;
   1839	struct arm_smmu_device *smmu = smmu_domain->smmu;
   1840	struct arm_smmu_cmdq_ent cmd;
   1841
   1842	/*
   1843	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
   1844	 * PTEs previously cleared by unmaps on the current CPU not yet visible
   1845	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
   1846	 * insertion to guarantee those are observed before the TLBI. Do be
   1847	 * careful, 007.
   1848	 */
   1849	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
   1850		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
   1851	} else {
   1852		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
   1853		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
   1854		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
   1855	}
   1856	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
   1857}
   1858
   1859static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
   1860				     unsigned long iova, size_t size,
   1861				     size_t granule,
   1862				     struct arm_smmu_domain *smmu_domain)
   1863{
   1864	struct arm_smmu_device *smmu = smmu_domain->smmu;
   1865	unsigned long end = iova + size, num_pages = 0, tg = 0;
   1866	size_t inv_range = granule;
   1867	struct arm_smmu_cmdq_batch cmds;
   1868
   1869	if (!size)
   1870		return;
   1871
   1872	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
   1873		/* Get the leaf page size */
   1874		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
   1875
   1876		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
   1877		cmd->tlbi.tg = (tg - 10) / 2;
   1878
   1879		/* Determine what level the granule is at */
   1880		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
   1881
   1882		num_pages = size >> tg;
   1883	}
   1884
   1885	cmds.num = 0;
   1886
   1887	while (iova < end) {
   1888		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
   1889			/*
   1890			 * On each iteration of the loop, the range is 5 bits
   1891			 * worth of the aligned size remaining.
   1892			 * The range in pages is:
   1893			 *
   1894			 * range = (num_pages & (0x1f << __ffs(num_pages)))
   1895			 */
   1896			unsigned long scale, num;
   1897
   1898			/* Determine the power of 2 multiple number of pages */
   1899			scale = __ffs(num_pages);
   1900			cmd->tlbi.scale = scale;
   1901
   1902			/* Determine how many chunks of 2^scale size we have */
   1903			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
   1904			cmd->tlbi.num = num - 1;
   1905
   1906			/* range is num * 2^scale * pgsize */
   1907			inv_range = num << (scale + tg);
   1908
   1909			/* Clear out the lower order bits for the next iteration */
   1910			num_pages -= num << scale;
   1911		}
   1912
   1913		cmd->tlbi.addr = iova;
   1914		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
   1915		iova += inv_range;
   1916	}
   1917	arm_smmu_cmdq_batch_submit(smmu, &cmds);
   1918}
   1919
   1920static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
   1921					  size_t granule, bool leaf,
   1922					  struct arm_smmu_domain *smmu_domain)
   1923{
   1924	struct arm_smmu_cmdq_ent cmd = {
   1925		.tlbi = {
   1926			.leaf	= leaf,
   1927		},
   1928	};
   1929
   1930	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
   1931		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
   1932				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
   1933		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
   1934	} else {
   1935		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
   1936		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
   1937	}
   1938	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
   1939
   1940	/*
   1941	 * Unfortunately, this can't be leaf-only since we may have
   1942	 * zapped an entire table.
   1943	 */
   1944	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
   1945}
   1946
   1947void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
   1948				 size_t granule, bool leaf,
   1949				 struct arm_smmu_domain *smmu_domain)
   1950{
   1951	struct arm_smmu_cmdq_ent cmd = {
   1952		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
   1953			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
   1954		.tlbi = {
   1955			.asid	= asid,
   1956			.leaf	= leaf,
   1957		},
   1958	};
   1959
   1960	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
   1961}
   1962
   1963static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
   1964					 unsigned long iova, size_t granule,
   1965					 void *cookie)
   1966{
   1967	struct arm_smmu_domain *smmu_domain = cookie;
   1968	struct iommu_domain *domain = &smmu_domain->domain;
   1969
   1970	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
   1971}
   1972
   1973static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
   1974				  size_t granule, void *cookie)
   1975{
   1976	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
   1977}
   1978
   1979static const struct iommu_flush_ops arm_smmu_flush_ops = {
   1980	.tlb_flush_all	= arm_smmu_tlb_inv_context,
   1981	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
   1982	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
   1983};
   1984
   1985/* IOMMU API */
   1986static bool arm_smmu_capable(enum iommu_cap cap)
   1987{
   1988	switch (cap) {
   1989	case IOMMU_CAP_CACHE_COHERENCY:
   1990		return true;
   1991	case IOMMU_CAP_NOEXEC:
   1992		return true;
   1993	default:
   1994		return false;
   1995	}
   1996}
   1997
   1998static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
   1999{
   2000	struct arm_smmu_domain *smmu_domain;
   2001
   2002	if (type != IOMMU_DOMAIN_UNMANAGED &&
   2003	    type != IOMMU_DOMAIN_DMA &&
   2004	    type != IOMMU_DOMAIN_DMA_FQ &&
   2005	    type != IOMMU_DOMAIN_IDENTITY)
   2006		return NULL;
   2007
   2008	/*
   2009	 * Allocate the domain and initialise some of its data structures.
   2010	 * We can't really do anything meaningful until we've added a
   2011	 * master.
   2012	 */
   2013	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
   2014	if (!smmu_domain)
   2015		return NULL;
   2016
   2017	mutex_init(&smmu_domain->init_mutex);
   2018	INIT_LIST_HEAD(&smmu_domain->devices);
   2019	spin_lock_init(&smmu_domain->devices_lock);
   2020	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
   2021
   2022	return &smmu_domain->domain;
   2023}
   2024
   2025static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
   2026{
   2027	int idx, size = 1 << span;
   2028
   2029	do {
   2030		idx = find_first_zero_bit(map, size);
   2031		if (idx == size)
   2032			return -ENOSPC;
   2033	} while (test_and_set_bit(idx, map));
   2034
   2035	return idx;
   2036}
   2037
   2038static void arm_smmu_bitmap_free(unsigned long *map, int idx)
   2039{
   2040	clear_bit(idx, map);
   2041}
   2042
   2043static void arm_smmu_domain_free(struct iommu_domain *domain)
   2044{
   2045	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
   2046	struct arm_smmu_device *smmu = smmu_domain->smmu;
   2047
   2048	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
   2049
   2050	/* Free the CD and ASID, if we allocated them */
   2051	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
   2052		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
   2053
   2054		/* Prevent SVA from touching the CD while we're freeing it */
   2055		mutex_lock(&arm_smmu_asid_lock);
   2056		if (cfg->cdcfg.cdtab)
   2057			arm_smmu_free_cd_tables(smmu_domain);
   2058		arm_smmu_free_asid(&cfg->cd);
   2059		mutex_unlock(&arm_smmu_asid_lock);
   2060	} else {
   2061		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
   2062		if (cfg->vmid)
   2063			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
   2064	}
   2065
   2066	kfree(smmu_domain);
   2067}
   2068
   2069static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
   2070				       struct arm_smmu_master *master,
   2071				       struct io_pgtable_cfg *pgtbl_cfg)
   2072{
   2073	int ret;
   2074	u32 asid;
   2075	struct arm_smmu_device *smmu = smmu_domain->smmu;
   2076	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
   2077	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
   2078
   2079	refcount_set(&cfg->cd.refs, 1);
   2080
   2081	/* Prevent SVA from modifying the ASID until it is written to the CD */
   2082	mutex_lock(&arm_smmu_asid_lock);
   2083	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
   2084		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
   2085	if (ret)
   2086		goto out_unlock;
   2087
   2088	cfg->s1cdmax = master->ssid_bits;
   2089
   2090	smmu_domain->stall_enabled = master->stall_enabled;
   2091
   2092	ret = arm_smmu_alloc_cd_tables(smmu_domain);
   2093	if (ret)
   2094		goto out_free_asid;
   2095
   2096	cfg->cd.asid	= (u16)asid;
   2097	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
   2098	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
   2099			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
   2100			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
   2101			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
   2102			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
   2103			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
   2104			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
   2105	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
   2106
   2107	/*
   2108	 * Note that this will end up calling arm_smmu_sync_cd() before
   2109	 * the master has been added to the devices list for this domain.
   2110	 * This isn't an issue because the STE hasn't been installed yet.
   2111	 */
   2112	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
   2113	if (ret)
   2114		goto out_free_cd_tables;
   2115
   2116	mutex_unlock(&arm_smmu_asid_lock);
   2117	return 0;
   2118
   2119out_free_cd_tables:
   2120	arm_smmu_free_cd_tables(smmu_domain);
   2121out_free_asid:
   2122	arm_smmu_free_asid(&cfg->cd);
   2123out_unlock:
   2124	mutex_unlock(&arm_smmu_asid_lock);
   2125	return ret;
   2126}
   2127
   2128static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
   2129				       struct arm_smmu_master *master,
   2130				       struct io_pgtable_cfg *pgtbl_cfg)
   2131{
   2132	int vmid;
   2133	struct arm_smmu_device *smmu = smmu_domain->smmu;
   2134	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
   2135	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
   2136
   2137	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
   2138	if (vmid < 0)
   2139		return vmid;
   2140
   2141	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
   2142	cfg->vmid	= (u16)vmid;
   2143	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
   2144	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
   2145			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
   2146			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
   2147			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
   2148			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
   2149			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
   2150			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
   2151	return 0;
   2152}
   2153
   2154static int arm_smmu_domain_finalise(struct iommu_domain *domain,
   2155				    struct arm_smmu_master *master)
   2156{
   2157	int ret;
   2158	unsigned long ias, oas;
   2159	enum io_pgtable_fmt fmt;
   2160	struct io_pgtable_cfg pgtbl_cfg;
   2161	struct io_pgtable_ops *pgtbl_ops;
   2162	int (*finalise_stage_fn)(struct arm_smmu_domain *,
   2163				 struct arm_smmu_master *,
   2164				 struct io_pgtable_cfg *);
   2165	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
   2166	struct arm_smmu_device *smmu = smmu_domain->smmu;
   2167
   2168	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
   2169		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
   2170		return 0;
   2171	}
   2172
   2173	/* Restrict the stage to what we can actually support */
   2174	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
   2175		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
   2176	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
   2177		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
   2178
   2179	switch (smmu_domain->stage) {
   2180	case ARM_SMMU_DOMAIN_S1:
   2181		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
   2182		ias = min_t(unsigned long, ias, VA_BITS);
   2183		oas = smmu->ias;
   2184		fmt = ARM_64_LPAE_S1;
   2185		finalise_stage_fn = arm_smmu_domain_finalise_s1;
   2186		break;
   2187	case ARM_SMMU_DOMAIN_NESTED:
   2188	case ARM_SMMU_DOMAIN_S2:
   2189		ias = smmu->ias;
   2190		oas = smmu->oas;
   2191		fmt = ARM_64_LPAE_S2;
   2192		finalise_stage_fn = arm_smmu_domain_finalise_s2;
   2193		break;
   2194	default:
   2195		return -EINVAL;
   2196	}
   2197
   2198	pgtbl_cfg = (struct io_pgtable_cfg) {
   2199		.pgsize_bitmap	= smmu->pgsize_bitmap,
   2200		.ias		= ias,
   2201		.oas		= oas,
   2202		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
   2203		.tlb		= &arm_smmu_flush_ops,
   2204		.iommu_dev	= smmu->dev,
   2205	};
   2206
   2207	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
   2208	if (!pgtbl_ops)
   2209		return -ENOMEM;
   2210
   2211	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
   2212	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
   2213	domain->geometry.force_aperture = true;
   2214
   2215	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
   2216	if (ret < 0) {
   2217		free_io_pgtable_ops(pgtbl_ops);
   2218		return ret;
   2219	}
   2220
   2221	smmu_domain->pgtbl_ops = pgtbl_ops;
   2222	return 0;
   2223}
   2224
   2225static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
   2226{
   2227	__le64 *step;
   2228	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
   2229
   2230	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
   2231		struct arm_smmu_strtab_l1_desc *l1_desc;
   2232		int idx;
   2233
   2234		/* Two-level walk */
   2235		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
   2236		l1_desc = &cfg->l1_desc[idx];
   2237		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
   2238		step = &l1_desc->l2ptr[idx];
   2239	} else {
   2240		/* Simple linear lookup */
   2241		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
   2242	}
   2243
   2244	return step;
   2245}
   2246
   2247static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
   2248{
   2249	int i, j;
   2250	struct arm_smmu_device *smmu = master->smmu;
   2251
   2252	for (i = 0; i < master->num_streams; ++i) {
   2253		u32 sid = master->streams[i].id;
   2254		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
   2255
   2256		/* Bridged PCI devices may end up with duplicated IDs */
   2257		for (j = 0; j < i; j++)
   2258			if (master->streams[j].id == sid)
   2259				break;
   2260		if (j < i)
   2261			continue;
   2262
   2263		arm_smmu_write_strtab_ent(master, sid, step);
   2264	}
   2265}
   2266
   2267static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
   2268{
   2269	struct device *dev = master->dev;
   2270	struct arm_smmu_device *smmu = master->smmu;
   2271	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
   2272
   2273	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
   2274		return false;
   2275
   2276	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
   2277		return false;
   2278
   2279	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
   2280}
   2281
   2282static void arm_smmu_enable_ats(struct arm_smmu_master *master)
   2283{
   2284	size_t stu;
   2285	struct pci_dev *pdev;
   2286	struct arm_smmu_device *smmu = master->smmu;
   2287	struct arm_smmu_domain *smmu_domain = master->domain;
   2288
   2289	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
   2290	if (!master->ats_enabled)
   2291		return;
   2292
   2293	/* Smallest Translation Unit: log2 of the smallest supported granule */
   2294	stu = __ffs(smmu->pgsize_bitmap);
   2295	pdev = to_pci_dev(master->dev);
   2296
   2297	atomic_inc(&smmu_domain->nr_ats_masters);
   2298	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
   2299	if (pci_enable_ats(pdev, stu))
   2300		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
   2301}
   2302
   2303static void arm_smmu_disable_ats(struct arm_smmu_master *master)
   2304{
   2305	struct arm_smmu_domain *smmu_domain = master->domain;
   2306
   2307	if (!master->ats_enabled)
   2308		return;
   2309
   2310	pci_disable_ats(to_pci_dev(master->dev));
   2311	/*
   2312	 * Ensure ATS is disabled at the endpoint before we issue the
   2313	 * ATC invalidation via the SMMU.
   2314	 */
   2315	wmb();
   2316	arm_smmu_atc_inv_master(master);
   2317	atomic_dec(&smmu_domain->nr_ats_masters);
   2318}
   2319
   2320static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
   2321{
   2322	int ret;
   2323	int features;
   2324	int num_pasids;
   2325	struct pci_dev *pdev;
   2326
   2327	if (!dev_is_pci(master->dev))
   2328		return -ENODEV;
   2329
   2330	pdev = to_pci_dev(master->dev);
   2331
   2332	features = pci_pasid_features(pdev);
   2333	if (features < 0)
   2334		return features;
   2335
   2336	num_pasids = pci_max_pasids(pdev);
   2337	if (num_pasids <= 0)
   2338		return num_pasids;
   2339
   2340	ret = pci_enable_pasid(pdev, features);
   2341	if (ret) {
   2342		dev_err(&pdev->dev, "Failed to enable PASID\n");
   2343		return ret;
   2344	}
   2345
   2346	master->ssid_bits = min_t(u8, ilog2(num_pasids),
   2347				  master->smmu->ssid_bits);
   2348	return 0;
   2349}
   2350
   2351static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
   2352{
   2353	struct pci_dev *pdev;
   2354
   2355	if (!dev_is_pci(master->dev))
   2356		return;
   2357
   2358	pdev = to_pci_dev(master->dev);
   2359
   2360	if (!pdev->pasid_enabled)
   2361		return;
   2362
   2363	master->ssid_bits = 0;
   2364	pci_disable_pasid(pdev);
   2365}
   2366
   2367static void arm_smmu_detach_dev(struct arm_smmu_master *master)
   2368{
   2369	unsigned long flags;
   2370	struct arm_smmu_domain *smmu_domain = master->domain;
   2371
   2372	if (!smmu_domain)
   2373		return;
   2374
   2375	arm_smmu_disable_ats(master);
   2376
   2377	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
   2378	list_del(&master->domain_head);
   2379	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
   2380
   2381	master->domain = NULL;
   2382	master->ats_enabled = false;
   2383	arm_smmu_install_ste_for_dev(master);
   2384}
   2385
   2386static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
   2387{
   2388	int ret = 0;
   2389	unsigned long flags;
   2390	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
   2391	struct arm_smmu_device *smmu;
   2392	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
   2393	struct arm_smmu_master *master;
   2394
   2395	if (!fwspec)
   2396		return -ENOENT;
   2397
   2398	master = dev_iommu_priv_get(dev);
   2399	smmu = master->smmu;
   2400
   2401	/*
   2402	 * Checking that SVA is disabled ensures that this device isn't bound to
   2403	 * any mm, and can be safely detached from its old domain. Bonds cannot
   2404	 * be removed concurrently since we're holding the group mutex.
   2405	 */
   2406	if (arm_smmu_master_sva_enabled(master)) {
   2407		dev_err(dev, "cannot attach - SVA enabled\n");
   2408		return -EBUSY;
   2409	}
   2410
   2411	arm_smmu_detach_dev(master);
   2412
   2413	mutex_lock(&smmu_domain->init_mutex);
   2414
   2415	if (!smmu_domain->smmu) {
   2416		smmu_domain->smmu = smmu;
   2417		ret = arm_smmu_domain_finalise(domain, master);
   2418		if (ret) {
   2419			smmu_domain->smmu = NULL;
   2420			goto out_unlock;
   2421		}
   2422	} else if (smmu_domain->smmu != smmu) {
   2423		dev_err(dev,
   2424			"cannot attach to SMMU %s (upstream of %s)\n",
   2425			dev_name(smmu_domain->smmu->dev),
   2426			dev_name(smmu->dev));
   2427		ret = -ENXIO;
   2428		goto out_unlock;
   2429	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   2430		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
   2431		dev_err(dev,
   2432			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
   2433			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
   2434		ret = -EINVAL;
   2435		goto out_unlock;
   2436	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
   2437		   smmu_domain->stall_enabled != master->stall_enabled) {
   2438		dev_err(dev, "cannot attach to stall-%s domain\n",
   2439			smmu_domain->stall_enabled ? "enabled" : "disabled");
   2440		ret = -EINVAL;
   2441		goto out_unlock;
   2442	}
   2443
   2444	master->domain = smmu_domain;
   2445
   2446	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
   2447		master->ats_enabled = arm_smmu_ats_supported(master);
   2448
   2449	arm_smmu_install_ste_for_dev(master);
   2450
   2451	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
   2452	list_add(&master->domain_head, &smmu_domain->devices);
   2453	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
   2454
   2455	arm_smmu_enable_ats(master);
   2456
   2457out_unlock:
   2458	mutex_unlock(&smmu_domain->init_mutex);
   2459	return ret;
   2460}
   2461
   2462static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
   2463			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
   2464			      int prot, gfp_t gfp, size_t *mapped)
   2465{
   2466	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
   2467
   2468	if (!ops)
   2469		return -ENODEV;
   2470
   2471	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
   2472}
   2473
   2474static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
   2475				   size_t pgsize, size_t pgcount,
   2476				   struct iommu_iotlb_gather *gather)
   2477{
   2478	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
   2479	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
   2480
   2481	if (!ops)
   2482		return 0;
   2483
   2484	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
   2485}
   2486
   2487static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
   2488{
   2489	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
   2490
   2491	if (smmu_domain->smmu)
   2492		arm_smmu_tlb_inv_context(smmu_domain);
   2493}
   2494
   2495static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
   2496				struct iommu_iotlb_gather *gather)
   2497{
   2498	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
   2499
   2500	if (!gather->pgsize)
   2501		return;
   2502
   2503	arm_smmu_tlb_inv_range_domain(gather->start,
   2504				      gather->end - gather->start + 1,
   2505				      gather->pgsize, true, smmu_domain);
   2506}
   2507
   2508static phys_addr_t
   2509arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
   2510{
   2511	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
   2512
   2513	if (!ops)
   2514		return 0;
   2515
   2516	return ops->iova_to_phys(ops, iova);
   2517}
   2518
   2519static struct platform_driver arm_smmu_driver;
   2520
   2521static
   2522struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
   2523{
   2524	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
   2525							  fwnode);
   2526	put_device(dev);
   2527	return dev ? dev_get_drvdata(dev) : NULL;
   2528}
   2529
   2530static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
   2531{
   2532	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
   2533
   2534	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
   2535		limit *= 1UL << STRTAB_SPLIT;
   2536
   2537	return sid < limit;
   2538}
   2539
   2540static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
   2541				  struct arm_smmu_master *master)
   2542{
   2543	int i;
   2544	int ret = 0;
   2545	struct arm_smmu_stream *new_stream, *cur_stream;
   2546	struct rb_node **new_node, *parent_node = NULL;
   2547	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
   2548
   2549	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
   2550				  GFP_KERNEL);
   2551	if (!master->streams)
   2552		return -ENOMEM;
   2553	master->num_streams = fwspec->num_ids;
   2554
   2555	mutex_lock(&smmu->streams_mutex);
   2556	for (i = 0; i < fwspec->num_ids; i++) {
   2557		u32 sid = fwspec->ids[i];
   2558
   2559		new_stream = &master->streams[i];
   2560		new_stream->id = sid;
   2561		new_stream->master = master;
   2562
   2563		/*
   2564		 * Check the SIDs are in range of the SMMU and our stream table
   2565		 */
   2566		if (!arm_smmu_sid_in_range(smmu, sid)) {
   2567			ret = -ERANGE;
   2568			break;
   2569		}
   2570
   2571		/* Ensure l2 strtab is initialised */
   2572		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
   2573			ret = arm_smmu_init_l2_strtab(smmu, sid);
   2574			if (ret)
   2575				break;
   2576		}
   2577
   2578		/* Insert into SID tree */
   2579		new_node = &(smmu->streams.rb_node);
   2580		while (*new_node) {
   2581			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
   2582					      node);
   2583			parent_node = *new_node;
   2584			if (cur_stream->id > new_stream->id) {
   2585				new_node = &((*new_node)->rb_left);
   2586			} else if (cur_stream->id < new_stream->id) {
   2587				new_node = &((*new_node)->rb_right);
   2588			} else {
   2589				dev_warn(master->dev,
   2590					 "stream %u already in tree\n",
   2591					 cur_stream->id);
   2592				ret = -EINVAL;
   2593				break;
   2594			}
   2595		}
   2596		if (ret)
   2597			break;
   2598
   2599		rb_link_node(&new_stream->node, parent_node, new_node);
   2600		rb_insert_color(&new_stream->node, &smmu->streams);
   2601	}
   2602
   2603	if (ret) {
   2604		for (i--; i >= 0; i--)
   2605			rb_erase(&master->streams[i].node, &smmu->streams);
   2606		kfree(master->streams);
   2607	}
   2608	mutex_unlock(&smmu->streams_mutex);
   2609
   2610	return ret;
   2611}
   2612
   2613static void arm_smmu_remove_master(struct arm_smmu_master *master)
   2614{
   2615	int i;
   2616	struct arm_smmu_device *smmu = master->smmu;
   2617	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
   2618
   2619	if (!smmu || !master->streams)
   2620		return;
   2621
   2622	mutex_lock(&smmu->streams_mutex);
   2623	for (i = 0; i < fwspec->num_ids; i++)
   2624		rb_erase(&master->streams[i].node, &smmu->streams);
   2625	mutex_unlock(&smmu->streams_mutex);
   2626
   2627	kfree(master->streams);
   2628}
   2629
   2630static struct iommu_ops arm_smmu_ops;
   2631
   2632static struct iommu_device *arm_smmu_probe_device(struct device *dev)
   2633{
   2634	int ret;
   2635	struct arm_smmu_device *smmu;
   2636	struct arm_smmu_master *master;
   2637	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
   2638
   2639	if (!fwspec || fwspec->ops != &arm_smmu_ops)
   2640		return ERR_PTR(-ENODEV);
   2641
   2642	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
   2643		return ERR_PTR(-EBUSY);
   2644
   2645	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
   2646	if (!smmu)
   2647		return ERR_PTR(-ENODEV);
   2648
   2649	master = kzalloc(sizeof(*master), GFP_KERNEL);
   2650	if (!master)
   2651		return ERR_PTR(-ENOMEM);
   2652
   2653	master->dev = dev;
   2654	master->smmu = smmu;
   2655	INIT_LIST_HEAD(&master->bonds);
   2656	dev_iommu_priv_set(dev, master);
   2657
   2658	ret = arm_smmu_insert_master(smmu, master);
   2659	if (ret)
   2660		goto err_free_master;
   2661
   2662	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
   2663	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
   2664
   2665	/*
   2666	 * Note that PASID must be enabled before, and disabled after ATS:
   2667	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
   2668	 *
   2669	 *   Behavior is undefined if this bit is Set and the value of the PASID
   2670	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
   2671	 *   are changed.
   2672	 */
   2673	arm_smmu_enable_pasid(master);
   2674
   2675	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
   2676		master->ssid_bits = min_t(u8, master->ssid_bits,
   2677					  CTXDESC_LINEAR_CDMAX);
   2678
   2679	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
   2680	     device_property_read_bool(dev, "dma-can-stall")) ||
   2681	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
   2682		master->stall_enabled = true;
   2683
   2684	return &smmu->iommu;
   2685
   2686err_free_master:
   2687	kfree(master);
   2688	dev_iommu_priv_set(dev, NULL);
   2689	return ERR_PTR(ret);
   2690}
   2691
   2692static void arm_smmu_release_device(struct device *dev)
   2693{
   2694	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
   2695	struct arm_smmu_master *master;
   2696
   2697	if (!fwspec || fwspec->ops != &arm_smmu_ops)
   2698		return;
   2699
   2700	master = dev_iommu_priv_get(dev);
   2701	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
   2702		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
   2703	arm_smmu_detach_dev(master);
   2704	arm_smmu_disable_pasid(master);
   2705	arm_smmu_remove_master(master);
   2706	kfree(master);
   2707	iommu_fwspec_free(dev);
   2708}
   2709
   2710static struct iommu_group *arm_smmu_device_group(struct device *dev)
   2711{
   2712	struct iommu_group *group;
   2713
   2714	/*
   2715	 * We don't support devices sharing stream IDs other than PCI RID
   2716	 * aliases, since the necessary ID-to-device lookup becomes rather
   2717	 * impractical given a potential sparse 32-bit stream ID space.
   2718	 */
   2719	if (dev_is_pci(dev))
   2720		group = pci_device_group(dev);
   2721	else
   2722		group = generic_device_group(dev);
   2723
   2724	return group;
   2725}
   2726
   2727static int arm_smmu_enable_nesting(struct iommu_domain *domain)
   2728{
   2729	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
   2730	int ret = 0;
   2731
   2732	mutex_lock(&smmu_domain->init_mutex);
   2733	if (smmu_domain->smmu)
   2734		ret = -EPERM;
   2735	else
   2736		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
   2737	mutex_unlock(&smmu_domain->init_mutex);
   2738
   2739	return ret;
   2740}
   2741
   2742static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
   2743{
   2744	return iommu_fwspec_add_ids(dev, args->args, 1);
   2745}
   2746
   2747static void arm_smmu_get_resv_regions(struct device *dev,
   2748				      struct list_head *head)
   2749{
   2750	struct iommu_resv_region *region;
   2751	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
   2752
   2753	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
   2754					 prot, IOMMU_RESV_SW_MSI);
   2755	if (!region)
   2756		return;
   2757
   2758	list_add_tail(&region->list, head);
   2759
   2760	iommu_dma_get_resv_regions(dev, head);
   2761}
   2762
   2763static bool arm_smmu_dev_has_feature(struct device *dev,
   2764				     enum iommu_dev_features feat)
   2765{
   2766	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
   2767
   2768	if (!master)
   2769		return false;
   2770
   2771	switch (feat) {
   2772	case IOMMU_DEV_FEAT_IOPF:
   2773		return arm_smmu_master_iopf_supported(master);
   2774	case IOMMU_DEV_FEAT_SVA:
   2775		return arm_smmu_master_sva_supported(master);
   2776	default:
   2777		return false;
   2778	}
   2779}
   2780
   2781static bool arm_smmu_dev_feature_enabled(struct device *dev,
   2782					 enum iommu_dev_features feat)
   2783{
   2784	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
   2785
   2786	if (!master)
   2787		return false;
   2788
   2789	switch (feat) {
   2790	case IOMMU_DEV_FEAT_IOPF:
   2791		return master->iopf_enabled;
   2792	case IOMMU_DEV_FEAT_SVA:
   2793		return arm_smmu_master_sva_enabled(master);
   2794	default:
   2795		return false;
   2796	}
   2797}
   2798
   2799static int arm_smmu_dev_enable_feature(struct device *dev,
   2800				       enum iommu_dev_features feat)
   2801{
   2802	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
   2803
   2804	if (!arm_smmu_dev_has_feature(dev, feat))
   2805		return -ENODEV;
   2806
   2807	if (arm_smmu_dev_feature_enabled(dev, feat))
   2808		return -EBUSY;
   2809
   2810	switch (feat) {
   2811	case IOMMU_DEV_FEAT_IOPF:
   2812		master->iopf_enabled = true;
   2813		return 0;
   2814	case IOMMU_DEV_FEAT_SVA:
   2815		return arm_smmu_master_enable_sva(master);
   2816	default:
   2817		return -EINVAL;
   2818	}
   2819}
   2820
   2821static int arm_smmu_dev_disable_feature(struct device *dev,
   2822					enum iommu_dev_features feat)
   2823{
   2824	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
   2825
   2826	if (!arm_smmu_dev_feature_enabled(dev, feat))
   2827		return -EINVAL;
   2828
   2829	switch (feat) {
   2830	case IOMMU_DEV_FEAT_IOPF:
   2831		if (master->sva_enabled)
   2832			return -EBUSY;
   2833		master->iopf_enabled = false;
   2834		return 0;
   2835	case IOMMU_DEV_FEAT_SVA:
   2836		return arm_smmu_master_disable_sva(master);
   2837	default:
   2838		return -EINVAL;
   2839	}
   2840}
   2841
   2842static struct iommu_ops arm_smmu_ops = {
   2843	.capable		= arm_smmu_capable,
   2844	.domain_alloc		= arm_smmu_domain_alloc,
   2845	.probe_device		= arm_smmu_probe_device,
   2846	.release_device		= arm_smmu_release_device,
   2847	.device_group		= arm_smmu_device_group,
   2848	.of_xlate		= arm_smmu_of_xlate,
   2849	.get_resv_regions	= arm_smmu_get_resv_regions,
   2850	.put_resv_regions	= generic_iommu_put_resv_regions,
   2851	.dev_has_feat		= arm_smmu_dev_has_feature,
   2852	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
   2853	.dev_enable_feat	= arm_smmu_dev_enable_feature,
   2854	.dev_disable_feat	= arm_smmu_dev_disable_feature,
   2855	.sva_bind		= arm_smmu_sva_bind,
   2856	.sva_unbind		= arm_smmu_sva_unbind,
   2857	.sva_get_pasid		= arm_smmu_sva_get_pasid,
   2858	.page_response		= arm_smmu_page_response,
   2859	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
   2860	.owner			= THIS_MODULE,
   2861	.default_domain_ops = &(const struct iommu_domain_ops) {
   2862		.attach_dev		= arm_smmu_attach_dev,
   2863		.map_pages		= arm_smmu_map_pages,
   2864		.unmap_pages		= arm_smmu_unmap_pages,
   2865		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
   2866		.iotlb_sync		= arm_smmu_iotlb_sync,
   2867		.iova_to_phys		= arm_smmu_iova_to_phys,
   2868		.enable_nesting		= arm_smmu_enable_nesting,
   2869		.free			= arm_smmu_domain_free,
   2870	}
   2871};
   2872
   2873/* Probing and initialisation functions */
   2874static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
   2875				   struct arm_smmu_queue *q,
   2876				   void __iomem *page,
   2877				   unsigned long prod_off,
   2878				   unsigned long cons_off,
   2879				   size_t dwords, const char *name)
   2880{
   2881	size_t qsz;
   2882
   2883	do {
   2884		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
   2885		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
   2886					      GFP_KERNEL);
   2887		if (q->base || qsz < PAGE_SIZE)
   2888			break;
   2889
   2890		q->llq.max_n_shift--;
   2891	} while (1);
   2892
   2893	if (!q->base) {
   2894		dev_err(smmu->dev,
   2895			"failed to allocate queue (0x%zx bytes) for %s\n",
   2896			qsz, name);
   2897		return -ENOMEM;
   2898	}
   2899
   2900	if (!WARN_ON(q->base_dma & (qsz - 1))) {
   2901		dev_info(smmu->dev, "allocated %u entries for %s\n",
   2902			 1 << q->llq.max_n_shift, name);
   2903	}
   2904
   2905	q->prod_reg	= page + prod_off;
   2906	q->cons_reg	= page + cons_off;
   2907	q->ent_dwords	= dwords;
   2908
   2909	q->q_base  = Q_BASE_RWA;
   2910	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
   2911	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
   2912
   2913	q->llq.prod = q->llq.cons = 0;
   2914	return 0;
   2915}
   2916
   2917static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
   2918{
   2919	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
   2920	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
   2921
   2922	atomic_set(&cmdq->owner_prod, 0);
   2923	atomic_set(&cmdq->lock, 0);
   2924
   2925	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
   2926							      GFP_KERNEL);
   2927	if (!cmdq->valid_map)
   2928		return -ENOMEM;
   2929
   2930	return 0;
   2931}
   2932
   2933static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
   2934{
   2935	int ret;
   2936
   2937	/* cmdq */
   2938	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
   2939				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
   2940				      CMDQ_ENT_DWORDS, "cmdq");
   2941	if (ret)
   2942		return ret;
   2943
   2944	ret = arm_smmu_cmdq_init(smmu);
   2945	if (ret)
   2946		return ret;
   2947
   2948	/* evtq */
   2949	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
   2950				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
   2951				      EVTQ_ENT_DWORDS, "evtq");
   2952	if (ret)
   2953		return ret;
   2954
   2955	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
   2956	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
   2957		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
   2958		if (!smmu->evtq.iopf)
   2959			return -ENOMEM;
   2960	}
   2961
   2962	/* priq */
   2963	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
   2964		return 0;
   2965
   2966	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
   2967				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
   2968				       PRIQ_ENT_DWORDS, "priq");
   2969}
   2970
   2971static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
   2972{
   2973	unsigned int i;
   2974	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
   2975	void *strtab = smmu->strtab_cfg.strtab;
   2976
   2977	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
   2978				    sizeof(*cfg->l1_desc), GFP_KERNEL);
   2979	if (!cfg->l1_desc)
   2980		return -ENOMEM;
   2981
   2982	for (i = 0; i < cfg->num_l1_ents; ++i) {
   2983		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
   2984		strtab += STRTAB_L1_DESC_DWORDS << 3;
   2985	}
   2986
   2987	return 0;
   2988}
   2989
   2990static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
   2991{
   2992	void *strtab;
   2993	u64 reg;
   2994	u32 size, l1size;
   2995	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
   2996
   2997	/* Calculate the L1 size, capped to the SIDSIZE. */
   2998	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
   2999	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
   3000	cfg->num_l1_ents = 1 << size;
   3001
   3002	size += STRTAB_SPLIT;
   3003	if (size < smmu->sid_bits)
   3004		dev_warn(smmu->dev,
   3005			 "2-level strtab only covers %u/%u bits of SID\n",
   3006			 size, smmu->sid_bits);
   3007
   3008	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
   3009	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
   3010				     GFP_KERNEL);
   3011	if (!strtab) {
   3012		dev_err(smmu->dev,
   3013			"failed to allocate l1 stream table (%u bytes)\n",
   3014			l1size);
   3015		return -ENOMEM;
   3016	}
   3017	cfg->strtab = strtab;
   3018
   3019	/* Configure strtab_base_cfg for 2 levels */
   3020	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
   3021	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
   3022	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
   3023	cfg->strtab_base_cfg = reg;
   3024
   3025	return arm_smmu_init_l1_strtab(smmu);
   3026}
   3027
   3028static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
   3029{
   3030	void *strtab;
   3031	u64 reg;
   3032	u32 size;
   3033	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
   3034
   3035	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
   3036	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
   3037				     GFP_KERNEL);
   3038	if (!strtab) {
   3039		dev_err(smmu->dev,
   3040			"failed to allocate linear stream table (%u bytes)\n",
   3041			size);
   3042		return -ENOMEM;
   3043	}
   3044	cfg->strtab = strtab;
   3045	cfg->num_l1_ents = 1 << smmu->sid_bits;
   3046
   3047	/* Configure strtab_base_cfg for a linear table covering all SIDs */
   3048	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
   3049	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
   3050	cfg->strtab_base_cfg = reg;
   3051
   3052	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
   3053	return 0;
   3054}
   3055
   3056static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
   3057{
   3058	u64 reg;
   3059	int ret;
   3060
   3061	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
   3062		ret = arm_smmu_init_strtab_2lvl(smmu);
   3063	else
   3064		ret = arm_smmu_init_strtab_linear(smmu);
   3065
   3066	if (ret)
   3067		return ret;
   3068
   3069	/* Set the strtab base address */
   3070	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
   3071	reg |= STRTAB_BASE_RA;
   3072	smmu->strtab_cfg.strtab_base = reg;
   3073
   3074	/* Allocate the first VMID for stage-2 bypass STEs */
   3075	set_bit(0, smmu->vmid_map);
   3076	return 0;
   3077}
   3078
   3079static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
   3080{
   3081	int ret;
   3082
   3083	mutex_init(&smmu->streams_mutex);
   3084	smmu->streams = RB_ROOT;
   3085
   3086	ret = arm_smmu_init_queues(smmu);
   3087	if (ret)
   3088		return ret;
   3089
   3090	return arm_smmu_init_strtab(smmu);
   3091}
   3092
   3093static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
   3094				   unsigned int reg_off, unsigned int ack_off)
   3095{
   3096	u32 reg;
   3097
   3098	writel_relaxed(val, smmu->base + reg_off);
   3099	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
   3100					  1, ARM_SMMU_POLL_TIMEOUT_US);
   3101}
   3102
   3103/* GBPA is "special" */
   3104static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
   3105{
   3106	int ret;
   3107	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
   3108
   3109	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
   3110					 1, ARM_SMMU_POLL_TIMEOUT_US);
   3111	if (ret)
   3112		return ret;
   3113
   3114	reg &= ~clr;
   3115	reg |= set;
   3116	writel_relaxed(reg | GBPA_UPDATE, gbpa);
   3117	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
   3118					 1, ARM_SMMU_POLL_TIMEOUT_US);
   3119
   3120	if (ret)
   3121		dev_err(smmu->dev, "GBPA not responding to update\n");
   3122	return ret;
   3123}
   3124
   3125static void arm_smmu_free_msis(void *data)
   3126{
   3127	struct device *dev = data;
   3128	platform_msi_domain_free_irqs(dev);
   3129}
   3130
   3131static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
   3132{
   3133	phys_addr_t doorbell;
   3134	struct device *dev = msi_desc_to_dev(desc);
   3135	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
   3136	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
   3137
   3138	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
   3139	doorbell &= MSI_CFG0_ADDR_MASK;
   3140
   3141	writeq_relaxed(doorbell, smmu->base + cfg[0]);
   3142	writel_relaxed(msg->data, smmu->base + cfg[1]);
   3143	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
   3144}
   3145
   3146static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
   3147{
   3148	int ret, nvec = ARM_SMMU_MAX_MSIS;
   3149	struct device *dev = smmu->dev;
   3150
   3151	/* Clear the MSI address regs */
   3152	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
   3153	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
   3154
   3155	if (smmu->features & ARM_SMMU_FEAT_PRI)
   3156		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
   3157	else
   3158		nvec--;
   3159
   3160	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
   3161		return;
   3162
   3163	if (!dev->msi.domain) {
   3164		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
   3165		return;
   3166	}
   3167
   3168	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
   3169	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
   3170	if (ret) {
   3171		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
   3172		return;
   3173	}
   3174
   3175	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
   3176	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
   3177	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
   3178
   3179	/* Add callback to free MSIs on teardown */
   3180	devm_add_action(dev, arm_smmu_free_msis, dev);
   3181}
   3182
   3183static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
   3184{
   3185	int irq, ret;
   3186
   3187	arm_smmu_setup_msis(smmu);
   3188
   3189	/* Request interrupt lines */
   3190	irq = smmu->evtq.q.irq;
   3191	if (irq) {
   3192		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
   3193						arm_smmu_evtq_thread,
   3194						IRQF_ONESHOT,
   3195						"arm-smmu-v3-evtq", smmu);
   3196		if (ret < 0)
   3197			dev_warn(smmu->dev, "failed to enable evtq irq\n");
   3198	} else {
   3199		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
   3200	}
   3201
   3202	irq = smmu->gerr_irq;
   3203	if (irq) {
   3204		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
   3205				       0, "arm-smmu-v3-gerror", smmu);
   3206		if (ret < 0)
   3207			dev_warn(smmu->dev, "failed to enable gerror irq\n");
   3208	} else {
   3209		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
   3210	}
   3211
   3212	if (smmu->features & ARM_SMMU_FEAT_PRI) {
   3213		irq = smmu->priq.q.irq;
   3214		if (irq) {
   3215			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
   3216							arm_smmu_priq_thread,
   3217							IRQF_ONESHOT,
   3218							"arm-smmu-v3-priq",
   3219							smmu);
   3220			if (ret < 0)
   3221				dev_warn(smmu->dev,
   3222					 "failed to enable priq irq\n");
   3223		} else {
   3224			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
   3225		}
   3226	}
   3227}
   3228
   3229static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
   3230{
   3231	int ret, irq;
   3232	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
   3233
   3234	/* Disable IRQs first */
   3235	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
   3236				      ARM_SMMU_IRQ_CTRLACK);
   3237	if (ret) {
   3238		dev_err(smmu->dev, "failed to disable irqs\n");
   3239		return ret;
   3240	}
   3241
   3242	irq = smmu->combined_irq;
   3243	if (irq) {
   3244		/*
   3245		 * Cavium ThunderX2 implementation doesn't support unique irq
   3246		 * lines. Use a single irq line for all the SMMUv3 interrupts.
   3247		 */
   3248		ret = devm_request_threaded_irq(smmu->dev, irq,
   3249					arm_smmu_combined_irq_handler,
   3250					arm_smmu_combined_irq_thread,
   3251					IRQF_ONESHOT,
   3252					"arm-smmu-v3-combined-irq", smmu);
   3253		if (ret < 0)
   3254			dev_warn(smmu->dev, "failed to enable combined irq\n");
   3255	} else
   3256		arm_smmu_setup_unique_irqs(smmu);
   3257
   3258	if (smmu->features & ARM_SMMU_FEAT_PRI)
   3259		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
   3260
   3261	/* Enable interrupt generation on the SMMU */
   3262	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
   3263				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
   3264	if (ret)
   3265		dev_warn(smmu->dev, "failed to enable irqs\n");
   3266
   3267	return 0;
   3268}
   3269
   3270static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
   3271{
   3272	int ret;
   3273
   3274	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
   3275	if (ret)
   3276		dev_err(smmu->dev, "failed to clear cr0\n");
   3277
   3278	return ret;
   3279}
   3280
   3281static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
   3282{
   3283	int ret;
   3284	u32 reg, enables;
   3285	struct arm_smmu_cmdq_ent cmd;
   3286
   3287	/* Clear CR0 and sync (disables SMMU and queue processing) */
   3288	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
   3289	if (reg & CR0_SMMUEN) {
   3290		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
   3291		WARN_ON(is_kdump_kernel() && !disable_bypass);
   3292		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
   3293	}
   3294
   3295	ret = arm_smmu_device_disable(smmu);
   3296	if (ret)
   3297		return ret;
   3298
   3299	/* CR1 (table and queue memory attributes) */
   3300	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
   3301	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
   3302	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
   3303	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
   3304	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
   3305	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
   3306	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
   3307
   3308	/* CR2 (random crap) */
   3309	reg = CR2_PTM | CR2_RECINVSID;
   3310
   3311	if (smmu->features & ARM_SMMU_FEAT_E2H)
   3312		reg |= CR2_E2H;
   3313
   3314	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
   3315
   3316	/* Stream table */
   3317	writeq_relaxed(smmu->strtab_cfg.strtab_base,
   3318		       smmu->base + ARM_SMMU_STRTAB_BASE);
   3319	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
   3320		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
   3321
   3322	/* Command queue */
   3323	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
   3324	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
   3325	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
   3326
   3327	enables = CR0_CMDQEN;
   3328	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
   3329				      ARM_SMMU_CR0ACK);
   3330	if (ret) {
   3331		dev_err(smmu->dev, "failed to enable command queue\n");
   3332		return ret;
   3333	}
   3334
   3335	/* Invalidate any cached configuration */
   3336	cmd.opcode = CMDQ_OP_CFGI_ALL;
   3337	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
   3338
   3339	/* Invalidate any stale TLB entries */
   3340	if (smmu->features & ARM_SMMU_FEAT_HYP) {
   3341		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
   3342		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
   3343	}
   3344
   3345	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
   3346	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
   3347
   3348	/* Event queue */
   3349	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
   3350	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
   3351	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
   3352
   3353	enables |= CR0_EVTQEN;
   3354	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
   3355				      ARM_SMMU_CR0ACK);
   3356	if (ret) {
   3357		dev_err(smmu->dev, "failed to enable event queue\n");
   3358		return ret;
   3359	}
   3360
   3361	/* PRI queue */
   3362	if (smmu->features & ARM_SMMU_FEAT_PRI) {
   3363		writeq_relaxed(smmu->priq.q.q_base,
   3364			       smmu->base + ARM_SMMU_PRIQ_BASE);
   3365		writel_relaxed(smmu->priq.q.llq.prod,
   3366			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
   3367		writel_relaxed(smmu->priq.q.llq.cons,
   3368			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
   3369
   3370		enables |= CR0_PRIQEN;
   3371		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
   3372					      ARM_SMMU_CR0ACK);
   3373		if (ret) {
   3374			dev_err(smmu->dev, "failed to enable PRI queue\n");
   3375			return ret;
   3376		}
   3377	}
   3378
   3379	if (smmu->features & ARM_SMMU_FEAT_ATS) {
   3380		enables |= CR0_ATSCHK;
   3381		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
   3382					      ARM_SMMU_CR0ACK);
   3383		if (ret) {
   3384			dev_err(smmu->dev, "failed to enable ATS check\n");
   3385			return ret;
   3386		}
   3387	}
   3388
   3389	ret = arm_smmu_setup_irqs(smmu);
   3390	if (ret) {
   3391		dev_err(smmu->dev, "failed to setup irqs\n");
   3392		return ret;
   3393	}
   3394
   3395	if (is_kdump_kernel())
   3396		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
   3397
   3398	/* Enable the SMMU interface, or ensure bypass */
   3399	if (!bypass || disable_bypass) {
   3400		enables |= CR0_SMMUEN;
   3401	} else {
   3402		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
   3403		if (ret)
   3404			return ret;
   3405	}
   3406	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
   3407				      ARM_SMMU_CR0ACK);
   3408	if (ret) {
   3409		dev_err(smmu->dev, "failed to enable SMMU interface\n");
   3410		return ret;
   3411	}
   3412
   3413	return 0;
   3414}
   3415
   3416static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
   3417{
   3418	u32 reg;
   3419	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
   3420
   3421	/* IDR0 */
   3422	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
   3423
   3424	/* 2-level structures */
   3425	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
   3426		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
   3427
   3428	if (reg & IDR0_CD2L)
   3429		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
   3430
   3431	/*
   3432	 * Translation table endianness.
   3433	 * We currently require the same endianness as the CPU, but this
   3434	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
   3435	 */
   3436	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
   3437	case IDR0_TTENDIAN_MIXED:
   3438		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
   3439		break;
   3440#ifdef __BIG_ENDIAN
   3441	case IDR0_TTENDIAN_BE:
   3442		smmu->features |= ARM_SMMU_FEAT_TT_BE;
   3443		break;
   3444#else
   3445	case IDR0_TTENDIAN_LE:
   3446		smmu->features |= ARM_SMMU_FEAT_TT_LE;
   3447		break;
   3448#endif
   3449	default:
   3450		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
   3451		return -ENXIO;
   3452	}
   3453
   3454	/* Boolean feature flags */
   3455	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
   3456		smmu->features |= ARM_SMMU_FEAT_PRI;
   3457
   3458	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
   3459		smmu->features |= ARM_SMMU_FEAT_ATS;
   3460
   3461	if (reg & IDR0_SEV)
   3462		smmu->features |= ARM_SMMU_FEAT_SEV;
   3463
   3464	if (reg & IDR0_MSI) {
   3465		smmu->features |= ARM_SMMU_FEAT_MSI;
   3466		if (coherent && !disable_msipolling)
   3467			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
   3468	}
   3469
   3470	if (reg & IDR0_HYP) {
   3471		smmu->features |= ARM_SMMU_FEAT_HYP;
   3472		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
   3473			smmu->features |= ARM_SMMU_FEAT_E2H;
   3474	}
   3475
   3476	/*
   3477	 * The coherency feature as set by FW is used in preference to the ID
   3478	 * register, but warn on mismatch.
   3479	 */
   3480	if (!!(reg & IDR0_COHACC) != coherent)
   3481		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
   3482			 coherent ? "true" : "false");
   3483
   3484	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
   3485	case IDR0_STALL_MODEL_FORCE:
   3486		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
   3487		fallthrough;
   3488	case IDR0_STALL_MODEL_STALL:
   3489		smmu->features |= ARM_SMMU_FEAT_STALLS;
   3490	}
   3491
   3492	if (reg & IDR0_S1P)
   3493		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
   3494
   3495	if (reg & IDR0_S2P)
   3496		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
   3497
   3498	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
   3499		dev_err(smmu->dev, "no translation support!\n");
   3500		return -ENXIO;
   3501	}
   3502
   3503	/* We only support the AArch64 table format at present */
   3504	switch (FIELD_GET(IDR0_TTF, reg)) {
   3505	case IDR0_TTF_AARCH32_64:
   3506		smmu->ias = 40;
   3507		fallthrough;
   3508	case IDR0_TTF_AARCH64:
   3509		break;
   3510	default:
   3511		dev_err(smmu->dev, "AArch64 table format not supported!\n");
   3512		return -ENXIO;
   3513	}
   3514
   3515	/* ASID/VMID sizes */
   3516	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
   3517	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
   3518
   3519	/* IDR1 */
   3520	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
   3521	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
   3522		dev_err(smmu->dev, "embedded implementation not supported\n");
   3523		return -ENXIO;
   3524	}
   3525
   3526	/* Queue sizes, capped to ensure natural alignment */
   3527	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
   3528					     FIELD_GET(IDR1_CMDQS, reg));
   3529	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
   3530		/*
   3531		 * We don't support splitting up batches, so one batch of
   3532		 * commands plus an extra sync needs to fit inside the command
   3533		 * queue. There's also no way we can handle the weird alignment
   3534		 * restrictions on the base pointer for a unit-length queue.
   3535		 */
   3536		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
   3537			CMDQ_BATCH_ENTRIES);
   3538		return -ENXIO;
   3539	}
   3540
   3541	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
   3542					     FIELD_GET(IDR1_EVTQS, reg));
   3543	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
   3544					     FIELD_GET(IDR1_PRIQS, reg));
   3545
   3546	/* SID/SSID sizes */
   3547	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
   3548	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
   3549
   3550	/*
   3551	 * If the SMMU supports fewer bits than would fill a single L2 stream
   3552	 * table, use a linear table instead.
   3553	 */
   3554	if (smmu->sid_bits <= STRTAB_SPLIT)
   3555		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
   3556
   3557	/* IDR3 */
   3558	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
   3559	if (FIELD_GET(IDR3_RIL, reg))
   3560		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
   3561
   3562	/* IDR5 */
   3563	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
   3564
   3565	/* Maximum number of outstanding stalls */
   3566	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
   3567
   3568	/* Page sizes */
   3569	if (reg & IDR5_GRAN64K)
   3570		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
   3571	if (reg & IDR5_GRAN16K)
   3572		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
   3573	if (reg & IDR5_GRAN4K)
   3574		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
   3575
   3576	/* Input address size */
   3577	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
   3578		smmu->features |= ARM_SMMU_FEAT_VAX;
   3579
   3580	/* Output address size */
   3581	switch (FIELD_GET(IDR5_OAS, reg)) {
   3582	case IDR5_OAS_32_BIT:
   3583		smmu->oas = 32;
   3584		break;
   3585	case IDR5_OAS_36_BIT:
   3586		smmu->oas = 36;
   3587		break;
   3588	case IDR5_OAS_40_BIT:
   3589		smmu->oas = 40;
   3590		break;
   3591	case IDR5_OAS_42_BIT:
   3592		smmu->oas = 42;
   3593		break;
   3594	case IDR5_OAS_44_BIT:
   3595		smmu->oas = 44;
   3596		break;
   3597	case IDR5_OAS_52_BIT:
   3598		smmu->oas = 52;
   3599		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
   3600		break;
   3601	default:
   3602		dev_info(smmu->dev,
   3603			"unknown output address size. Truncating to 48-bit\n");
   3604		fallthrough;
   3605	case IDR5_OAS_48_BIT:
   3606		smmu->oas = 48;
   3607	}
   3608
   3609	if (arm_smmu_ops.pgsize_bitmap == -1UL)
   3610		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
   3611	else
   3612		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
   3613
   3614	/* Set the DMA mask for our table walker */
   3615	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
   3616		dev_warn(smmu->dev,
   3617			 "failed to set DMA mask for table walker\n");
   3618
   3619	smmu->ias = max(smmu->ias, smmu->oas);
   3620
   3621	if (arm_smmu_sva_supported(smmu))
   3622		smmu->features |= ARM_SMMU_FEAT_SVA;
   3623
   3624	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
   3625		 smmu->ias, smmu->oas, smmu->features);
   3626	return 0;
   3627}
   3628
   3629#ifdef CONFIG_ACPI
   3630static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
   3631{
   3632	switch (model) {
   3633	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
   3634		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
   3635		break;
   3636	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
   3637		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
   3638		break;
   3639	}
   3640
   3641	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
   3642}
   3643
   3644static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
   3645				      struct arm_smmu_device *smmu)
   3646{
   3647	struct acpi_iort_smmu_v3 *iort_smmu;
   3648	struct device *dev = smmu->dev;
   3649	struct acpi_iort_node *node;
   3650
   3651	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
   3652
   3653	/* Retrieve SMMUv3 specific data */
   3654	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
   3655
   3656	acpi_smmu_get_options(iort_smmu->model, smmu);
   3657
   3658	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
   3659		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
   3660
   3661	return 0;
   3662}
   3663#else
   3664static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
   3665					     struct arm_smmu_device *smmu)
   3666{
   3667	return -ENODEV;
   3668}
   3669#endif
   3670
   3671static int arm_smmu_device_dt_probe(struct platform_device *pdev,
   3672				    struct arm_smmu_device *smmu)
   3673{
   3674	struct device *dev = &pdev->dev;
   3675	u32 cells;
   3676	int ret = -EINVAL;
   3677
   3678	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
   3679		dev_err(dev, "missing #iommu-cells property\n");
   3680	else if (cells != 1)
   3681		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
   3682	else
   3683		ret = 0;
   3684
   3685	parse_driver_options(smmu);
   3686
   3687	if (of_dma_is_coherent(dev->of_node))
   3688		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
   3689
   3690	return ret;
   3691}
   3692
   3693static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
   3694{
   3695	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
   3696		return SZ_64K;
   3697	else
   3698		return SZ_128K;
   3699}
   3700
   3701static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
   3702{
   3703	int err;
   3704
   3705#ifdef CONFIG_PCI
   3706	if (pci_bus_type.iommu_ops != ops) {
   3707		err = bus_set_iommu(&pci_bus_type, ops);
   3708		if (err)
   3709			return err;
   3710	}
   3711#endif
   3712#ifdef CONFIG_ARM_AMBA
   3713	if (amba_bustype.iommu_ops != ops) {
   3714		err = bus_set_iommu(&amba_bustype, ops);
   3715		if (err)
   3716			goto err_reset_pci_ops;
   3717	}
   3718#endif
   3719	if (platform_bus_type.iommu_ops != ops) {
   3720		err = bus_set_iommu(&platform_bus_type, ops);
   3721		if (err)
   3722			goto err_reset_amba_ops;
   3723	}
   3724
   3725	return 0;
   3726
   3727err_reset_amba_ops:
   3728#ifdef CONFIG_ARM_AMBA
   3729	bus_set_iommu(&amba_bustype, NULL);
   3730#endif
   3731err_reset_pci_ops: __maybe_unused;
   3732#ifdef CONFIG_PCI
   3733	bus_set_iommu(&pci_bus_type, NULL);
   3734#endif
   3735	return err;
   3736}
   3737
   3738static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
   3739				      resource_size_t size)
   3740{
   3741	struct resource res = DEFINE_RES_MEM(start, size);
   3742
   3743	return devm_ioremap_resource(dev, &res);
   3744}
   3745
   3746static int arm_smmu_device_probe(struct platform_device *pdev)
   3747{
   3748	int irq, ret;
   3749	struct resource *res;
   3750	resource_size_t ioaddr;
   3751	struct arm_smmu_device *smmu;
   3752	struct device *dev = &pdev->dev;
   3753	bool bypass;
   3754
   3755	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
   3756	if (!smmu)
   3757		return -ENOMEM;
   3758	smmu->dev = dev;
   3759
   3760	if (dev->of_node) {
   3761		ret = arm_smmu_device_dt_probe(pdev, smmu);
   3762	} else {
   3763		ret = arm_smmu_device_acpi_probe(pdev, smmu);
   3764		if (ret == -ENODEV)
   3765			return ret;
   3766	}
   3767
   3768	/* Set bypass mode according to firmware probing result */
   3769	bypass = !!ret;
   3770
   3771	/* Base address */
   3772	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
   3773	if (!res)
   3774		return -EINVAL;
   3775	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
   3776		dev_err(dev, "MMIO region too small (%pr)\n", res);
   3777		return -EINVAL;
   3778	}
   3779	ioaddr = res->start;
   3780
   3781	/*
   3782	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
   3783	 * the PMCG registers which are reserved by the PMU driver.
   3784	 */
   3785	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
   3786	if (IS_ERR(smmu->base))
   3787		return PTR_ERR(smmu->base);
   3788
   3789	if (arm_smmu_resource_size(smmu) > SZ_64K) {
   3790		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
   3791					       ARM_SMMU_REG_SZ);
   3792		if (IS_ERR(smmu->page1))
   3793			return PTR_ERR(smmu->page1);
   3794	} else {
   3795		smmu->page1 = smmu->base;
   3796	}
   3797
   3798	/* Interrupt lines */
   3799
   3800	irq = platform_get_irq_byname_optional(pdev, "combined");
   3801	if (irq > 0)
   3802		smmu->combined_irq = irq;
   3803	else {
   3804		irq = platform_get_irq_byname_optional(pdev, "eventq");
   3805		if (irq > 0)
   3806			smmu->evtq.q.irq = irq;
   3807
   3808		irq = platform_get_irq_byname_optional(pdev, "priq");
   3809		if (irq > 0)
   3810			smmu->priq.q.irq = irq;
   3811
   3812		irq = platform_get_irq_byname_optional(pdev, "gerror");
   3813		if (irq > 0)
   3814			smmu->gerr_irq = irq;
   3815	}
   3816	/* Probe the h/w */
   3817	ret = arm_smmu_device_hw_probe(smmu);
   3818	if (ret)
   3819		return ret;
   3820
   3821	/* Initialise in-memory data structures */
   3822	ret = arm_smmu_init_structures(smmu);
   3823	if (ret)
   3824		return ret;
   3825
   3826	/* Record our private device structure */
   3827	platform_set_drvdata(pdev, smmu);
   3828
   3829	/* Reset the device */
   3830	ret = arm_smmu_device_reset(smmu, bypass);
   3831	if (ret)
   3832		return ret;
   3833
   3834	/* And we're up. Go go go! */
   3835	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
   3836				     "smmu3.%pa", &ioaddr);
   3837	if (ret)
   3838		return ret;
   3839
   3840	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
   3841	if (ret) {
   3842		dev_err(dev, "Failed to register iommu\n");
   3843		goto err_sysfs_remove;
   3844	}
   3845
   3846	ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
   3847	if (ret)
   3848		goto err_unregister_device;
   3849
   3850	return 0;
   3851
   3852err_unregister_device:
   3853	iommu_device_unregister(&smmu->iommu);
   3854err_sysfs_remove:
   3855	iommu_device_sysfs_remove(&smmu->iommu);
   3856	return ret;
   3857}
   3858
   3859static int arm_smmu_device_remove(struct platform_device *pdev)
   3860{
   3861	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
   3862
   3863	arm_smmu_set_bus_ops(NULL);
   3864	iommu_device_unregister(&smmu->iommu);
   3865	iommu_device_sysfs_remove(&smmu->iommu);
   3866	arm_smmu_device_disable(smmu);
   3867	iopf_queue_free(smmu->evtq.iopf);
   3868
   3869	return 0;
   3870}
   3871
   3872static void arm_smmu_device_shutdown(struct platform_device *pdev)
   3873{
   3874	arm_smmu_device_remove(pdev);
   3875}
   3876
   3877static const struct of_device_id arm_smmu_of_match[] = {
   3878	{ .compatible = "arm,smmu-v3", },
   3879	{ },
   3880};
   3881MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
   3882
   3883static void arm_smmu_driver_unregister(struct platform_driver *drv)
   3884{
   3885	arm_smmu_sva_notifier_synchronize();
   3886	platform_driver_unregister(drv);
   3887}
   3888
   3889static struct platform_driver arm_smmu_driver = {
   3890	.driver	= {
   3891		.name			= "arm-smmu-v3",
   3892		.of_match_table		= arm_smmu_of_match,
   3893		.suppress_bind_attrs	= true,
   3894	},
   3895	.probe	= arm_smmu_device_probe,
   3896	.remove	= arm_smmu_device_remove,
   3897	.shutdown = arm_smmu_device_shutdown,
   3898};
   3899module_driver(arm_smmu_driver, platform_driver_register,
   3900	      arm_smmu_driver_unregister);
   3901
   3902MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
   3903MODULE_AUTHOR("Will Deacon <will@kernel.org>");
   3904MODULE_ALIAS("platform:arm-smmu-v3");
   3905MODULE_LICENSE("GPL v2");