cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vgic-its.c (73102B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * GICv3 ITS emulation
      4 *
      5 * Copyright (C) 2015,2016 ARM Ltd.
      6 * Author: Andre Przywara <andre.przywara@arm.com>
      7 */
      8
      9#include <linux/cpu.h>
     10#include <linux/kvm.h>
     11#include <linux/kvm_host.h>
     12#include <linux/interrupt.h>
     13#include <linux/list.h>
     14#include <linux/uaccess.h>
     15#include <linux/list_sort.h>
     16
     17#include <linux/irqchip/arm-gic-v3.h>
     18
     19#include <asm/kvm_emulate.h>
     20#include <asm/kvm_arm.h>
     21#include <asm/kvm_mmu.h>
     22
     23#include "vgic.h"
     24#include "vgic-mmio.h"
     25
     26static int vgic_its_save_tables_v0(struct vgic_its *its);
     27static int vgic_its_restore_tables_v0(struct vgic_its *its);
     28static int vgic_its_commit_v0(struct vgic_its *its);
     29static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
     30			     struct kvm_vcpu *filter_vcpu, bool needs_inv);
     31
     32/*
     33 * Creates a new (reference to a) struct vgic_irq for a given LPI.
     34 * If this LPI is already mapped on another ITS, we increase its refcount
     35 * and return a pointer to the existing structure.
     36 * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq.
     37 * This function returns a pointer to the _unlocked_ structure.
     38 */
     39static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
     40				     struct kvm_vcpu *vcpu)
     41{
     42	struct vgic_dist *dist = &kvm->arch.vgic;
     43	struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
     44	unsigned long flags;
     45	int ret;
     46
     47	/* In this case there is no put, since we keep the reference. */
     48	if (irq)
     49		return irq;
     50
     51	irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
     52	if (!irq)
     53		return ERR_PTR(-ENOMEM);
     54
     55	INIT_LIST_HEAD(&irq->lpi_list);
     56	INIT_LIST_HEAD(&irq->ap_list);
     57	raw_spin_lock_init(&irq->irq_lock);
     58
     59	irq->config = VGIC_CONFIG_EDGE;
     60	kref_init(&irq->refcount);
     61	irq->intid = intid;
     62	irq->target_vcpu = vcpu;
     63	irq->group = 1;
     64
     65	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
     66
     67	/*
     68	 * There could be a race with another vgic_add_lpi(), so we need to
     69	 * check that we don't add a second list entry with the same LPI.
     70	 */
     71	list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) {
     72		if (oldirq->intid != intid)
     73			continue;
     74
     75		/* Someone was faster with adding this LPI, lets use that. */
     76		kfree(irq);
     77		irq = oldirq;
     78
     79		/*
     80		 * This increases the refcount, the caller is expected to
     81		 * call vgic_put_irq() on the returned pointer once it's
     82		 * finished with the IRQ.
     83		 */
     84		vgic_get_irq_kref(irq);
     85
     86		goto out_unlock;
     87	}
     88
     89	list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
     90	dist->lpi_list_count++;
     91
     92out_unlock:
     93	raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
     94
     95	/*
     96	 * We "cache" the configuration table entries in our struct vgic_irq's.
     97	 * However we only have those structs for mapped IRQs, so we read in
     98	 * the respective config data from memory here upon mapping the LPI.
     99	 *
    100	 * Should any of these fail, behave as if we couldn't create the LPI
    101	 * by dropping the refcount and returning the error.
    102	 */
    103	ret = update_lpi_config(kvm, irq, NULL, false);
    104	if (ret) {
    105		vgic_put_irq(kvm, irq);
    106		return ERR_PTR(ret);
    107	}
    108
    109	ret = vgic_v3_lpi_sync_pending_status(kvm, irq);
    110	if (ret) {
    111		vgic_put_irq(kvm, irq);
    112		return ERR_PTR(ret);
    113	}
    114
    115	return irq;
    116}
    117
    118struct its_device {
    119	struct list_head dev_list;
    120
    121	/* the head for the list of ITTEs */
    122	struct list_head itt_head;
    123	u32 num_eventid_bits;
    124	gpa_t itt_addr;
    125	u32 device_id;
    126};
    127
    128#define COLLECTION_NOT_MAPPED ((u32)~0)
    129
    130struct its_collection {
    131	struct list_head coll_list;
    132
    133	u32 collection_id;
    134	u32 target_addr;
    135};
    136
    137#define its_is_collection_mapped(coll) ((coll) && \
    138				((coll)->target_addr != COLLECTION_NOT_MAPPED))
    139
    140struct its_ite {
    141	struct list_head ite_list;
    142
    143	struct vgic_irq *irq;
    144	struct its_collection *collection;
    145	u32 event_id;
    146};
    147
    148struct vgic_translation_cache_entry {
    149	struct list_head	entry;
    150	phys_addr_t		db;
    151	u32			devid;
    152	u32			eventid;
    153	struct vgic_irq		*irq;
    154};
    155
    156/**
    157 * struct vgic_its_abi - ITS abi ops and settings
    158 * @cte_esz: collection table entry size
    159 * @dte_esz: device table entry size
    160 * @ite_esz: interrupt translation table entry size
    161 * @save tables: save the ITS tables into guest RAM
    162 * @restore_tables: restore the ITS internal structs from tables
    163 *  stored in guest RAM
    164 * @commit: initialize the registers which expose the ABI settings,
    165 *  especially the entry sizes
    166 */
    167struct vgic_its_abi {
    168	int cte_esz;
    169	int dte_esz;
    170	int ite_esz;
    171	int (*save_tables)(struct vgic_its *its);
    172	int (*restore_tables)(struct vgic_its *its);
    173	int (*commit)(struct vgic_its *its);
    174};
    175
    176#define ABI_0_ESZ	8
    177#define ESZ_MAX		ABI_0_ESZ
    178
    179static const struct vgic_its_abi its_table_abi_versions[] = {
    180	[0] = {
    181	 .cte_esz = ABI_0_ESZ,
    182	 .dte_esz = ABI_0_ESZ,
    183	 .ite_esz = ABI_0_ESZ,
    184	 .save_tables = vgic_its_save_tables_v0,
    185	 .restore_tables = vgic_its_restore_tables_v0,
    186	 .commit = vgic_its_commit_v0,
    187	},
    188};
    189
    190#define NR_ITS_ABIS	ARRAY_SIZE(its_table_abi_versions)
    191
    192inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
    193{
    194	return &its_table_abi_versions[its->abi_rev];
    195}
    196
    197static int vgic_its_set_abi(struct vgic_its *its, u32 rev)
    198{
    199	const struct vgic_its_abi *abi;
    200
    201	its->abi_rev = rev;
    202	abi = vgic_its_get_abi(its);
    203	return abi->commit(its);
    204}
    205
    206/*
    207 * Find and returns a device in the device table for an ITS.
    208 * Must be called with the its_lock mutex held.
    209 */
    210static struct its_device *find_its_device(struct vgic_its *its, u32 device_id)
    211{
    212	struct its_device *device;
    213
    214	list_for_each_entry(device, &its->device_list, dev_list)
    215		if (device_id == device->device_id)
    216			return device;
    217
    218	return NULL;
    219}
    220
    221/*
    222 * Find and returns an interrupt translation table entry (ITTE) for a given
    223 * Device ID/Event ID pair on an ITS.
    224 * Must be called with the its_lock mutex held.
    225 */
    226static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
    227				  u32 event_id)
    228{
    229	struct its_device *device;
    230	struct its_ite *ite;
    231
    232	device = find_its_device(its, device_id);
    233	if (device == NULL)
    234		return NULL;
    235
    236	list_for_each_entry(ite, &device->itt_head, ite_list)
    237		if (ite->event_id == event_id)
    238			return ite;
    239
    240	return NULL;
    241}
    242
    243/* To be used as an iterator this macro misses the enclosing parentheses */
    244#define for_each_lpi_its(dev, ite, its) \
    245	list_for_each_entry(dev, &(its)->device_list, dev_list) \
    246		list_for_each_entry(ite, &(dev)->itt_head, ite_list)
    247
    248#define GIC_LPI_OFFSET 8192
    249
    250#define VITS_TYPER_IDBITS 16
    251#define VITS_TYPER_DEVBITS 16
    252#define VITS_DTE_MAX_DEVID_OFFSET	(BIT(14) - 1)
    253#define VITS_ITE_MAX_EVENTID_OFFSET	(BIT(16) - 1)
    254
    255/*
    256 * Finds and returns a collection in the ITS collection table.
    257 * Must be called with the its_lock mutex held.
    258 */
    259static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
    260{
    261	struct its_collection *collection;
    262
    263	list_for_each_entry(collection, &its->collection_list, coll_list) {
    264		if (coll_id == collection->collection_id)
    265			return collection;
    266	}
    267
    268	return NULL;
    269}
    270
    271#define LPI_PROP_ENABLE_BIT(p)	((p) & LPI_PROP_ENABLED)
    272#define LPI_PROP_PRIORITY(p)	((p) & 0xfc)
    273
    274/*
    275 * Reads the configuration data for a given LPI from guest memory and
    276 * updates the fields in struct vgic_irq.
    277 * If filter_vcpu is not NULL, applies only if the IRQ is targeting this
    278 * VCPU. Unconditionally applies if filter_vcpu is NULL.
    279 */
    280static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
    281			     struct kvm_vcpu *filter_vcpu, bool needs_inv)
    282{
    283	u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
    284	u8 prop;
    285	int ret;
    286	unsigned long flags;
    287
    288	ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
    289				  &prop, 1);
    290
    291	if (ret)
    292		return ret;
    293
    294	raw_spin_lock_irqsave(&irq->irq_lock, flags);
    295
    296	if (!filter_vcpu || filter_vcpu == irq->target_vcpu) {
    297		irq->priority = LPI_PROP_PRIORITY(prop);
    298		irq->enabled = LPI_PROP_ENABLE_BIT(prop);
    299
    300		if (!irq->hw) {
    301			vgic_queue_irq_unlock(kvm, irq, flags);
    302			return 0;
    303		}
    304	}
    305
    306	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
    307
    308	if (irq->hw)
    309		return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
    310
    311	return 0;
    312}
    313
    314/*
    315 * Create a snapshot of the current LPIs targeting @vcpu, so that we can
    316 * enumerate those LPIs without holding any lock.
    317 * Returns their number and puts the kmalloc'ed array into intid_ptr.
    318 */
    319int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
    320{
    321	struct vgic_dist *dist = &kvm->arch.vgic;
    322	struct vgic_irq *irq;
    323	unsigned long flags;
    324	u32 *intids;
    325	int irq_count, i = 0;
    326
    327	/*
    328	 * There is an obvious race between allocating the array and LPIs
    329	 * being mapped/unmapped. If we ended up here as a result of a
    330	 * command, we're safe (locks are held, preventing another
    331	 * command). If coming from another path (such as enabling LPIs),
    332	 * we must be careful not to overrun the array.
    333	 */
    334	irq_count = READ_ONCE(dist->lpi_list_count);
    335	intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
    336	if (!intids)
    337		return -ENOMEM;
    338
    339	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
    340	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
    341		if (i == irq_count)
    342			break;
    343		/* We don't need to "get" the IRQ, as we hold the list lock. */
    344		if (vcpu && irq->target_vcpu != vcpu)
    345			continue;
    346		intids[i++] = irq->intid;
    347	}
    348	raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
    349
    350	*intid_ptr = intids;
    351	return i;
    352}
    353
    354static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
    355{
    356	int ret = 0;
    357	unsigned long flags;
    358
    359	raw_spin_lock_irqsave(&irq->irq_lock, flags);
    360	irq->target_vcpu = vcpu;
    361	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
    362
    363	if (irq->hw) {
    364		struct its_vlpi_map map;
    365
    366		ret = its_get_vlpi(irq->host_irq, &map);
    367		if (ret)
    368			return ret;
    369
    370		if (map.vpe)
    371			atomic_dec(&map.vpe->vlpi_count);
    372		map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
    373		atomic_inc(&map.vpe->vlpi_count);
    374
    375		ret = its_map_vlpi(irq->host_irq, &map);
    376	}
    377
    378	return ret;
    379}
    380
    381/*
    382 * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
    383 * is targeting) to the VGIC's view, which deals with target VCPUs.
    384 * Needs to be called whenever either the collection for a LPIs has
    385 * changed or the collection itself got retargeted.
    386 */
    387static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
    388{
    389	struct kvm_vcpu *vcpu;
    390
    391	if (!its_is_collection_mapped(ite->collection))
    392		return;
    393
    394	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
    395	update_affinity(ite->irq, vcpu);
    396}
    397
    398/*
    399 * Updates the target VCPU for every LPI targeting this collection.
    400 * Must be called with the its_lock mutex held.
    401 */
    402static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
    403				       struct its_collection *coll)
    404{
    405	struct its_device *device;
    406	struct its_ite *ite;
    407
    408	for_each_lpi_its(device, ite, its) {
    409		if (!ite->collection || coll != ite->collection)
    410			continue;
    411
    412		update_affinity_ite(kvm, ite);
    413	}
    414}
    415
    416static u32 max_lpis_propbaser(u64 propbaser)
    417{
    418	int nr_idbits = (propbaser & 0x1f) + 1;
    419
    420	return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS);
    421}
    422
    423/*
    424 * Sync the pending table pending bit of LPIs targeting @vcpu
    425 * with our own data structures. This relies on the LPI being
    426 * mapped before.
    427 */
    428static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
    429{
    430	gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
    431	struct vgic_irq *irq;
    432	int last_byte_offset = -1;
    433	int ret = 0;
    434	u32 *intids;
    435	int nr_irqs, i;
    436	unsigned long flags;
    437	u8 pendmask;
    438
    439	nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
    440	if (nr_irqs < 0)
    441		return nr_irqs;
    442
    443	for (i = 0; i < nr_irqs; i++) {
    444		int byte_offset, bit_nr;
    445
    446		byte_offset = intids[i] / BITS_PER_BYTE;
    447		bit_nr = intids[i] % BITS_PER_BYTE;
    448
    449		/*
    450		 * For contiguously allocated LPIs chances are we just read
    451		 * this very same byte in the last iteration. Reuse that.
    452		 */
    453		if (byte_offset != last_byte_offset) {
    454			ret = kvm_read_guest_lock(vcpu->kvm,
    455						  pendbase + byte_offset,
    456						  &pendmask, 1);
    457			if (ret) {
    458				kfree(intids);
    459				return ret;
    460			}
    461			last_byte_offset = byte_offset;
    462		}
    463
    464		irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
    465		raw_spin_lock_irqsave(&irq->irq_lock, flags);
    466		irq->pending_latch = pendmask & (1U << bit_nr);
    467		vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
    468		vgic_put_irq(vcpu->kvm, irq);
    469	}
    470
    471	kfree(intids);
    472
    473	return ret;
    474}
    475
    476static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
    477					      struct vgic_its *its,
    478					      gpa_t addr, unsigned int len)
    479{
    480	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
    481	u64 reg = GITS_TYPER_PLPIS;
    482
    483	/*
    484	 * We use linear CPU numbers for redistributor addressing,
    485	 * so GITS_TYPER.PTA is 0.
    486	 * Also we force all PROPBASER registers to be the same, so
    487	 * CommonLPIAff is 0 as well.
    488	 * To avoid memory waste in the guest, we keep the number of IDBits and
    489	 * DevBits low - as least for the time being.
    490	 */
    491	reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT;
    492	reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT;
    493	reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
    494
    495	return extract_bytes(reg, addr & 7, len);
    496}
    497
    498static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm,
    499					     struct vgic_its *its,
    500					     gpa_t addr, unsigned int len)
    501{
    502	u32 val;
    503
    504	val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK;
    505	val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM;
    506	return val;
    507}
    508
    509static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm,
    510					    struct vgic_its *its,
    511					    gpa_t addr, unsigned int len,
    512					    unsigned long val)
    513{
    514	u32 rev = GITS_IIDR_REV(val);
    515
    516	if (rev >= NR_ITS_ABIS)
    517		return -EINVAL;
    518	return vgic_its_set_abi(its, rev);
    519}
    520
    521static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
    522					       struct vgic_its *its,
    523					       gpa_t addr, unsigned int len)
    524{
    525	switch (addr & 0xffff) {
    526	case GITS_PIDR0:
    527		return 0x92;	/* part number, bits[7:0] */
    528	case GITS_PIDR1:
    529		return 0xb4;	/* part number, bits[11:8] */
    530	case GITS_PIDR2:
    531		return GIC_PIDR2_ARCH_GICv3 | 0x0b;
    532	case GITS_PIDR4:
    533		return 0x40;	/* This is a 64K software visible page */
    534	/* The following are the ID registers for (any) GIC. */
    535	case GITS_CIDR0:
    536		return 0x0d;
    537	case GITS_CIDR1:
    538		return 0xf0;
    539	case GITS_CIDR2:
    540		return 0x05;
    541	case GITS_CIDR3:
    542		return 0xb1;
    543	}
    544
    545	return 0;
    546}
    547
    548static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist,
    549					       phys_addr_t db,
    550					       u32 devid, u32 eventid)
    551{
    552	struct vgic_translation_cache_entry *cte;
    553
    554	list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
    555		/*
    556		 * If we hit a NULL entry, there is nothing after this
    557		 * point.
    558		 */
    559		if (!cte->irq)
    560			break;
    561
    562		if (cte->db != db || cte->devid != devid ||
    563		    cte->eventid != eventid)
    564			continue;
    565
    566		/*
    567		 * Move this entry to the head, as it is the most
    568		 * recently used.
    569		 */
    570		if (!list_is_first(&cte->entry, &dist->lpi_translation_cache))
    571			list_move(&cte->entry, &dist->lpi_translation_cache);
    572
    573		return cte->irq;
    574	}
    575
    576	return NULL;
    577}
    578
    579static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
    580					     u32 devid, u32 eventid)
    581{
    582	struct vgic_dist *dist = &kvm->arch.vgic;
    583	struct vgic_irq *irq;
    584	unsigned long flags;
    585
    586	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
    587	irq = __vgic_its_check_cache(dist, db, devid, eventid);
    588	raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
    589
    590	return irq;
    591}
    592
    593static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
    594				       u32 devid, u32 eventid,
    595				       struct vgic_irq *irq)
    596{
    597	struct vgic_dist *dist = &kvm->arch.vgic;
    598	struct vgic_translation_cache_entry *cte;
    599	unsigned long flags;
    600	phys_addr_t db;
    601
    602	/* Do not cache a directly injected interrupt */
    603	if (irq->hw)
    604		return;
    605
    606	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
    607
    608	if (unlikely(list_empty(&dist->lpi_translation_cache)))
    609		goto out;
    610
    611	/*
    612	 * We could have raced with another CPU caching the same
    613	 * translation behind our back, so let's check it is not in
    614	 * already
    615	 */
    616	db = its->vgic_its_base + GITS_TRANSLATER;
    617	if (__vgic_its_check_cache(dist, db, devid, eventid))
    618		goto out;
    619
    620	/* Always reuse the last entry (LRU policy) */
    621	cte = list_last_entry(&dist->lpi_translation_cache,
    622			      typeof(*cte), entry);
    623
    624	/*
    625	 * Caching the translation implies having an extra reference
    626	 * to the interrupt, so drop the potential reference on what
    627	 * was in the cache, and increment it on the new interrupt.
    628	 */
    629	if (cte->irq)
    630		__vgic_put_lpi_locked(kvm, cte->irq);
    631
    632	vgic_get_irq_kref(irq);
    633
    634	cte->db		= db;
    635	cte->devid	= devid;
    636	cte->eventid	= eventid;
    637	cte->irq	= irq;
    638
    639	/* Move the new translation to the head of the list */
    640	list_move(&cte->entry, &dist->lpi_translation_cache);
    641
    642out:
    643	raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
    644}
    645
    646void vgic_its_invalidate_cache(struct kvm *kvm)
    647{
    648	struct vgic_dist *dist = &kvm->arch.vgic;
    649	struct vgic_translation_cache_entry *cte;
    650	unsigned long flags;
    651
    652	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
    653
    654	list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
    655		/*
    656		 * If we hit a NULL entry, there is nothing after this
    657		 * point.
    658		 */
    659		if (!cte->irq)
    660			break;
    661
    662		__vgic_put_lpi_locked(kvm, cte->irq);
    663		cte->irq = NULL;
    664	}
    665
    666	raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
    667}
    668
    669int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
    670			 u32 devid, u32 eventid, struct vgic_irq **irq)
    671{
    672	struct kvm_vcpu *vcpu;
    673	struct its_ite *ite;
    674
    675	if (!its->enabled)
    676		return -EBUSY;
    677
    678	ite = find_ite(its, devid, eventid);
    679	if (!ite || !its_is_collection_mapped(ite->collection))
    680		return E_ITS_INT_UNMAPPED_INTERRUPT;
    681
    682	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
    683	if (!vcpu)
    684		return E_ITS_INT_UNMAPPED_INTERRUPT;
    685
    686	if (!vgic_lpis_enabled(vcpu))
    687		return -EBUSY;
    688
    689	vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
    690
    691	*irq = ite->irq;
    692	return 0;
    693}
    694
    695struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
    696{
    697	u64 address;
    698	struct kvm_io_device *kvm_io_dev;
    699	struct vgic_io_device *iodev;
    700
    701	if (!vgic_has_its(kvm))
    702		return ERR_PTR(-ENODEV);
    703
    704	if (!(msi->flags & KVM_MSI_VALID_DEVID))
    705		return ERR_PTR(-EINVAL);
    706
    707	address = (u64)msi->address_hi << 32 | msi->address_lo;
    708
    709	kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
    710	if (!kvm_io_dev)
    711		return ERR_PTR(-EINVAL);
    712
    713	if (kvm_io_dev->ops != &kvm_io_gic_ops)
    714		return ERR_PTR(-EINVAL);
    715
    716	iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
    717	if (iodev->iodev_type != IODEV_ITS)
    718		return ERR_PTR(-EINVAL);
    719
    720	return iodev->its;
    721}
    722
    723/*
    724 * Find the target VCPU and the LPI number for a given devid/eventid pair
    725 * and make this IRQ pending, possibly injecting it.
    726 * Must be called with the its_lock mutex held.
    727 * Returns 0 on success, a positive error value for any ITS mapping
    728 * related errors and negative error values for generic errors.
    729 */
    730static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
    731				u32 devid, u32 eventid)
    732{
    733	struct vgic_irq *irq = NULL;
    734	unsigned long flags;
    735	int err;
    736
    737	err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq);
    738	if (err)
    739		return err;
    740
    741	if (irq->hw)
    742		return irq_set_irqchip_state(irq->host_irq,
    743					     IRQCHIP_STATE_PENDING, true);
    744
    745	raw_spin_lock_irqsave(&irq->irq_lock, flags);
    746	irq->pending_latch = true;
    747	vgic_queue_irq_unlock(kvm, irq, flags);
    748
    749	return 0;
    750}
    751
    752int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
    753{
    754	struct vgic_irq *irq;
    755	unsigned long flags;
    756	phys_addr_t db;
    757
    758	db = (u64)msi->address_hi << 32 | msi->address_lo;
    759	irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data);
    760	if (!irq)
    761		return -EWOULDBLOCK;
    762
    763	raw_spin_lock_irqsave(&irq->irq_lock, flags);
    764	irq->pending_latch = true;
    765	vgic_queue_irq_unlock(kvm, irq, flags);
    766
    767	return 0;
    768}
    769
    770/*
    771 * Queries the KVM IO bus framework to get the ITS pointer from the given
    772 * doorbell address.
    773 * We then call vgic_its_trigger_msi() with the decoded data.
    774 * According to the KVM_SIGNAL_MSI API description returns 1 on success.
    775 */
    776int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
    777{
    778	struct vgic_its *its;
    779	int ret;
    780
    781	if (!vgic_its_inject_cached_translation(kvm, msi))
    782		return 1;
    783
    784	its = vgic_msi_to_its(kvm, msi);
    785	if (IS_ERR(its))
    786		return PTR_ERR(its);
    787
    788	mutex_lock(&its->its_lock);
    789	ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data);
    790	mutex_unlock(&its->its_lock);
    791
    792	if (ret < 0)
    793		return ret;
    794
    795	/*
    796	 * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
    797	 * if the guest has blocked the MSI. So we map any LPI mapping
    798	 * related error to that.
    799	 */
    800	if (ret)
    801		return 0;
    802	else
    803		return 1;
    804}
    805
    806/* Requires the its_lock to be held. */
    807static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
    808{
    809	list_del(&ite->ite_list);
    810
    811	/* This put matches the get in vgic_add_lpi. */
    812	if (ite->irq) {
    813		if (ite->irq->hw)
    814			WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
    815
    816		vgic_put_irq(kvm, ite->irq);
    817	}
    818
    819	kfree(ite);
    820}
    821
    822static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
    823{
    824	return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1);
    825}
    826
    827#define its_cmd_get_command(cmd)	its_cmd_mask_field(cmd, 0,  0,  8)
    828#define its_cmd_get_deviceid(cmd)	its_cmd_mask_field(cmd, 0, 32, 32)
    829#define its_cmd_get_size(cmd)		(its_cmd_mask_field(cmd, 1,  0,  5) + 1)
    830#define its_cmd_get_id(cmd)		its_cmd_mask_field(cmd, 1,  0, 32)
    831#define its_cmd_get_physical_id(cmd)	its_cmd_mask_field(cmd, 1, 32, 32)
    832#define its_cmd_get_collection(cmd)	its_cmd_mask_field(cmd, 2,  0, 16)
    833#define its_cmd_get_ittaddr(cmd)	(its_cmd_mask_field(cmd, 2,  8, 44) << 8)
    834#define its_cmd_get_target_addr(cmd)	its_cmd_mask_field(cmd, 2, 16, 32)
    835#define its_cmd_get_validbit(cmd)	its_cmd_mask_field(cmd, 2, 63,  1)
    836
    837/*
    838 * The DISCARD command frees an Interrupt Translation Table Entry (ITTE).
    839 * Must be called with the its_lock mutex held.
    840 */
    841static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
    842				       u64 *its_cmd)
    843{
    844	u32 device_id = its_cmd_get_deviceid(its_cmd);
    845	u32 event_id = its_cmd_get_id(its_cmd);
    846	struct its_ite *ite;
    847
    848	ite = find_ite(its, device_id, event_id);
    849	if (ite && its_is_collection_mapped(ite->collection)) {
    850		/*
    851		 * Though the spec talks about removing the pending state, we
    852		 * don't bother here since we clear the ITTE anyway and the
    853		 * pending state is a property of the ITTE struct.
    854		 */
    855		vgic_its_invalidate_cache(kvm);
    856
    857		its_free_ite(kvm, ite);
    858		return 0;
    859	}
    860
    861	return E_ITS_DISCARD_UNMAPPED_INTERRUPT;
    862}
    863
    864/*
    865 * The MOVI command moves an ITTE to a different collection.
    866 * Must be called with the its_lock mutex held.
    867 */
    868static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
    869				    u64 *its_cmd)
    870{
    871	u32 device_id = its_cmd_get_deviceid(its_cmd);
    872	u32 event_id = its_cmd_get_id(its_cmd);
    873	u32 coll_id = its_cmd_get_collection(its_cmd);
    874	struct kvm_vcpu *vcpu;
    875	struct its_ite *ite;
    876	struct its_collection *collection;
    877
    878	ite = find_ite(its, device_id, event_id);
    879	if (!ite)
    880		return E_ITS_MOVI_UNMAPPED_INTERRUPT;
    881
    882	if (!its_is_collection_mapped(ite->collection))
    883		return E_ITS_MOVI_UNMAPPED_COLLECTION;
    884
    885	collection = find_collection(its, coll_id);
    886	if (!its_is_collection_mapped(collection))
    887		return E_ITS_MOVI_UNMAPPED_COLLECTION;
    888
    889	ite->collection = collection;
    890	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
    891
    892	vgic_its_invalidate_cache(kvm);
    893
    894	return update_affinity(ite->irq, vcpu);
    895}
    896
    897static bool __is_visible_gfn_locked(struct vgic_its *its, gpa_t gpa)
    898{
    899	gfn_t gfn = gpa >> PAGE_SHIFT;
    900	int idx;
    901	bool ret;
    902
    903	idx = srcu_read_lock(&its->dev->kvm->srcu);
    904	ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
    905	srcu_read_unlock(&its->dev->kvm->srcu, idx);
    906	return ret;
    907}
    908
    909/*
    910 * Check whether an ID can be stored into the corresponding guest table.
    911 * For a direct table this is pretty easy, but gets a bit nasty for
    912 * indirect tables. We check whether the resulting guest physical address
    913 * is actually valid (covered by a memslot and guest accessible).
    914 * For this we have to read the respective first level entry.
    915 */
    916static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
    917			      gpa_t *eaddr)
    918{
    919	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
    920	u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
    921	phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
    922	int esz = GITS_BASER_ENTRY_SIZE(baser);
    923	int index;
    924
    925	switch (type) {
    926	case GITS_BASER_TYPE_DEVICE:
    927		if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
    928			return false;
    929		break;
    930	case GITS_BASER_TYPE_COLLECTION:
    931		/* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */
    932		if (id >= BIT_ULL(16))
    933			return false;
    934		break;
    935	default:
    936		return false;
    937	}
    938
    939	if (!(baser & GITS_BASER_INDIRECT)) {
    940		phys_addr_t addr;
    941
    942		if (id >= (l1_tbl_size / esz))
    943			return false;
    944
    945		addr = base + id * esz;
    946
    947		if (eaddr)
    948			*eaddr = addr;
    949
    950		return __is_visible_gfn_locked(its, addr);
    951	}
    952
    953	/* calculate and check the index into the 1st level */
    954	index = id / (SZ_64K / esz);
    955	if (index >= (l1_tbl_size / sizeof(u64)))
    956		return false;
    957
    958	/* Each 1st level entry is represented by a 64-bit value. */
    959	if (kvm_read_guest_lock(its->dev->kvm,
    960			   base + index * sizeof(indirect_ptr),
    961			   &indirect_ptr, sizeof(indirect_ptr)))
    962		return false;
    963
    964	indirect_ptr = le64_to_cpu(indirect_ptr);
    965
    966	/* check the valid bit of the first level entry */
    967	if (!(indirect_ptr & BIT_ULL(63)))
    968		return false;
    969
    970	/* Mask the guest physical address and calculate the frame number. */
    971	indirect_ptr &= GENMASK_ULL(51, 16);
    972
    973	/* Find the address of the actual entry */
    974	index = id % (SZ_64K / esz);
    975	indirect_ptr += index * esz;
    976
    977	if (eaddr)
    978		*eaddr = indirect_ptr;
    979
    980	return __is_visible_gfn_locked(its, indirect_ptr);
    981}
    982
    983/*
    984 * Check whether an event ID can be stored in the corresponding Interrupt
    985 * Translation Table, which starts at device->itt_addr.
    986 */
    987static bool vgic_its_check_event_id(struct vgic_its *its, struct its_device *device,
    988		u32 event_id)
    989{
    990	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
    991	int ite_esz = abi->ite_esz;
    992	gpa_t gpa;
    993
    994	/* max table size is: BIT_ULL(device->num_eventid_bits) * ite_esz */
    995	if (event_id >= BIT_ULL(device->num_eventid_bits))
    996		return false;
    997
    998	gpa = device->itt_addr + event_id * ite_esz;
    999	return __is_visible_gfn_locked(its, gpa);
   1000}
   1001
   1002/*
   1003 * Add a new collection into the ITS collection table.
   1004 * Returns 0 on success, and a negative error value for generic errors.
   1005 */
   1006static int vgic_its_alloc_collection(struct vgic_its *its,
   1007				     struct its_collection **colp,
   1008				     u32 coll_id)
   1009{
   1010	struct its_collection *collection;
   1011
   1012	collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT);
   1013	if (!collection)
   1014		return -ENOMEM;
   1015
   1016	collection->collection_id = coll_id;
   1017	collection->target_addr = COLLECTION_NOT_MAPPED;
   1018
   1019	list_add_tail(&collection->coll_list, &its->collection_list);
   1020	*colp = collection;
   1021
   1022	return 0;
   1023}
   1024
   1025static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
   1026{
   1027	struct its_collection *collection;
   1028	struct its_device *device;
   1029	struct its_ite *ite;
   1030
   1031	/*
   1032	 * Clearing the mapping for that collection ID removes the
   1033	 * entry from the list. If there wasn't any before, we can
   1034	 * go home early.
   1035	 */
   1036	collection = find_collection(its, coll_id);
   1037	if (!collection)
   1038		return;
   1039
   1040	for_each_lpi_its(device, ite, its)
   1041		if (ite->collection &&
   1042		    ite->collection->collection_id == coll_id)
   1043			ite->collection = NULL;
   1044
   1045	list_del(&collection->coll_list);
   1046	kfree(collection);
   1047}
   1048
   1049/* Must be called with its_lock mutex held */
   1050static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
   1051					  struct its_collection *collection,
   1052					  u32 event_id)
   1053{
   1054	struct its_ite *ite;
   1055
   1056	ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT);
   1057	if (!ite)
   1058		return ERR_PTR(-ENOMEM);
   1059
   1060	ite->event_id	= event_id;
   1061	ite->collection = collection;
   1062
   1063	list_add_tail(&ite->ite_list, &device->itt_head);
   1064	return ite;
   1065}
   1066
   1067/*
   1068 * The MAPTI and MAPI commands map LPIs to ITTEs.
   1069 * Must be called with its_lock mutex held.
   1070 */
   1071static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
   1072				    u64 *its_cmd)
   1073{
   1074	u32 device_id = its_cmd_get_deviceid(its_cmd);
   1075	u32 event_id = its_cmd_get_id(its_cmd);
   1076	u32 coll_id = its_cmd_get_collection(its_cmd);
   1077	struct its_ite *ite;
   1078	struct kvm_vcpu *vcpu = NULL;
   1079	struct its_device *device;
   1080	struct its_collection *collection, *new_coll = NULL;
   1081	struct vgic_irq *irq;
   1082	int lpi_nr;
   1083
   1084	device = find_its_device(its, device_id);
   1085	if (!device)
   1086		return E_ITS_MAPTI_UNMAPPED_DEVICE;
   1087
   1088	if (!vgic_its_check_event_id(its, device, event_id))
   1089		return E_ITS_MAPTI_ID_OOR;
   1090
   1091	if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
   1092		lpi_nr = its_cmd_get_physical_id(its_cmd);
   1093	else
   1094		lpi_nr = event_id;
   1095	if (lpi_nr < GIC_LPI_OFFSET ||
   1096	    lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
   1097		return E_ITS_MAPTI_PHYSICALID_OOR;
   1098
   1099	/* If there is an existing mapping, behavior is UNPREDICTABLE. */
   1100	if (find_ite(its, device_id, event_id))
   1101		return 0;
   1102
   1103	collection = find_collection(its, coll_id);
   1104	if (!collection) {
   1105		int ret;
   1106
   1107		if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
   1108			return E_ITS_MAPC_COLLECTION_OOR;
   1109
   1110		ret = vgic_its_alloc_collection(its, &collection, coll_id);
   1111		if (ret)
   1112			return ret;
   1113		new_coll = collection;
   1114	}
   1115
   1116	ite = vgic_its_alloc_ite(device, collection, event_id);
   1117	if (IS_ERR(ite)) {
   1118		if (new_coll)
   1119			vgic_its_free_collection(its, coll_id);
   1120		return PTR_ERR(ite);
   1121	}
   1122
   1123	if (its_is_collection_mapped(collection))
   1124		vcpu = kvm_get_vcpu(kvm, collection->target_addr);
   1125
   1126	irq = vgic_add_lpi(kvm, lpi_nr, vcpu);
   1127	if (IS_ERR(irq)) {
   1128		if (new_coll)
   1129			vgic_its_free_collection(its, coll_id);
   1130		its_free_ite(kvm, ite);
   1131		return PTR_ERR(irq);
   1132	}
   1133	ite->irq = irq;
   1134
   1135	return 0;
   1136}
   1137
   1138/* Requires the its_lock to be held. */
   1139static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
   1140{
   1141	struct its_ite *ite, *temp;
   1142
   1143	/*
   1144	 * The spec says that unmapping a device with still valid
   1145	 * ITTEs associated is UNPREDICTABLE. We remove all ITTEs,
   1146	 * since we cannot leave the memory unreferenced.
   1147	 */
   1148	list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
   1149		its_free_ite(kvm, ite);
   1150
   1151	vgic_its_invalidate_cache(kvm);
   1152
   1153	list_del(&device->dev_list);
   1154	kfree(device);
   1155}
   1156
   1157/* its lock must be held */
   1158static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its)
   1159{
   1160	struct its_device *cur, *temp;
   1161
   1162	list_for_each_entry_safe(cur, temp, &its->device_list, dev_list)
   1163		vgic_its_free_device(kvm, cur);
   1164}
   1165
   1166/* its lock must be held */
   1167static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its)
   1168{
   1169	struct its_collection *cur, *temp;
   1170
   1171	list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list)
   1172		vgic_its_free_collection(its, cur->collection_id);
   1173}
   1174
   1175/* Must be called with its_lock mutex held */
   1176static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
   1177						u32 device_id, gpa_t itt_addr,
   1178						u8 num_eventid_bits)
   1179{
   1180	struct its_device *device;
   1181
   1182	device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT);
   1183	if (!device)
   1184		return ERR_PTR(-ENOMEM);
   1185
   1186	device->device_id = device_id;
   1187	device->itt_addr = itt_addr;
   1188	device->num_eventid_bits = num_eventid_bits;
   1189	INIT_LIST_HEAD(&device->itt_head);
   1190
   1191	list_add_tail(&device->dev_list, &its->device_list);
   1192	return device;
   1193}
   1194
   1195/*
   1196 * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs).
   1197 * Must be called with the its_lock mutex held.
   1198 */
   1199static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
   1200				    u64 *its_cmd)
   1201{
   1202	u32 device_id = its_cmd_get_deviceid(its_cmd);
   1203	bool valid = its_cmd_get_validbit(its_cmd);
   1204	u8 num_eventid_bits = its_cmd_get_size(its_cmd);
   1205	gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
   1206	struct its_device *device;
   1207
   1208	if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL))
   1209		return E_ITS_MAPD_DEVICE_OOR;
   1210
   1211	if (valid && num_eventid_bits > VITS_TYPER_IDBITS)
   1212		return E_ITS_MAPD_ITTSIZE_OOR;
   1213
   1214	device = find_its_device(its, device_id);
   1215
   1216	/*
   1217	 * The spec says that calling MAPD on an already mapped device
   1218	 * invalidates all cached data for this device. We implement this
   1219	 * by removing the mapping and re-establishing it.
   1220	 */
   1221	if (device)
   1222		vgic_its_free_device(kvm, device);
   1223
   1224	/*
   1225	 * The spec does not say whether unmapping a not-mapped device
   1226	 * is an error, so we are done in any case.
   1227	 */
   1228	if (!valid)
   1229		return 0;
   1230
   1231	device = vgic_its_alloc_device(its, device_id, itt_addr,
   1232				       num_eventid_bits);
   1233
   1234	return PTR_ERR_OR_ZERO(device);
   1235}
   1236
   1237/*
   1238 * The MAPC command maps collection IDs to redistributors.
   1239 * Must be called with the its_lock mutex held.
   1240 */
   1241static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
   1242				    u64 *its_cmd)
   1243{
   1244	u16 coll_id;
   1245	u32 target_addr;
   1246	struct its_collection *collection;
   1247	bool valid;
   1248
   1249	valid = its_cmd_get_validbit(its_cmd);
   1250	coll_id = its_cmd_get_collection(its_cmd);
   1251	target_addr = its_cmd_get_target_addr(its_cmd);
   1252
   1253	if (target_addr >= atomic_read(&kvm->online_vcpus))
   1254		return E_ITS_MAPC_PROCNUM_OOR;
   1255
   1256	if (!valid) {
   1257		vgic_its_free_collection(its, coll_id);
   1258		vgic_its_invalidate_cache(kvm);
   1259	} else {
   1260		collection = find_collection(its, coll_id);
   1261
   1262		if (!collection) {
   1263			int ret;
   1264
   1265			if (!vgic_its_check_id(its, its->baser_coll_table,
   1266						coll_id, NULL))
   1267				return E_ITS_MAPC_COLLECTION_OOR;
   1268
   1269			ret = vgic_its_alloc_collection(its, &collection,
   1270							coll_id);
   1271			if (ret)
   1272				return ret;
   1273			collection->target_addr = target_addr;
   1274		} else {
   1275			collection->target_addr = target_addr;
   1276			update_affinity_collection(kvm, its, collection);
   1277		}
   1278	}
   1279
   1280	return 0;
   1281}
   1282
   1283/*
   1284 * The CLEAR command removes the pending state for a particular LPI.
   1285 * Must be called with the its_lock mutex held.
   1286 */
   1287static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
   1288				     u64 *its_cmd)
   1289{
   1290	u32 device_id = its_cmd_get_deviceid(its_cmd);
   1291	u32 event_id = its_cmd_get_id(its_cmd);
   1292	struct its_ite *ite;
   1293
   1294
   1295	ite = find_ite(its, device_id, event_id);
   1296	if (!ite)
   1297		return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
   1298
   1299	ite->irq->pending_latch = false;
   1300
   1301	if (ite->irq->hw)
   1302		return irq_set_irqchip_state(ite->irq->host_irq,
   1303					     IRQCHIP_STATE_PENDING, false);
   1304
   1305	return 0;
   1306}
   1307
   1308int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq)
   1309{
   1310	return update_lpi_config(kvm, irq, NULL, true);
   1311}
   1312
   1313/*
   1314 * The INV command syncs the configuration bits from the memory table.
   1315 * Must be called with the its_lock mutex held.
   1316 */
   1317static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
   1318				   u64 *its_cmd)
   1319{
   1320	u32 device_id = its_cmd_get_deviceid(its_cmd);
   1321	u32 event_id = its_cmd_get_id(its_cmd);
   1322	struct its_ite *ite;
   1323
   1324
   1325	ite = find_ite(its, device_id, event_id);
   1326	if (!ite)
   1327		return E_ITS_INV_UNMAPPED_INTERRUPT;
   1328
   1329	return vgic_its_inv_lpi(kvm, ite->irq);
   1330}
   1331
   1332/**
   1333 * vgic_its_invall - invalidate all LPIs targetting a given vcpu
   1334 * @vcpu: the vcpu for which the RD is targetted by an invalidation
   1335 *
   1336 * Contrary to the INVALL command, this targets a RD instead of a
   1337 * collection, and we don't need to hold the its_lock, since no ITS is
   1338 * involved here.
   1339 */
   1340int vgic_its_invall(struct kvm_vcpu *vcpu)
   1341{
   1342	struct kvm *kvm = vcpu->kvm;
   1343	int irq_count, i = 0;
   1344	u32 *intids;
   1345
   1346	irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
   1347	if (irq_count < 0)
   1348		return irq_count;
   1349
   1350	for (i = 0; i < irq_count; i++) {
   1351		struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
   1352		if (!irq)
   1353			continue;
   1354		update_lpi_config(kvm, irq, vcpu, false);
   1355		vgic_put_irq(kvm, irq);
   1356	}
   1357
   1358	kfree(intids);
   1359
   1360	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
   1361		its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
   1362
   1363	return 0;
   1364}
   1365
   1366/*
   1367 * The INVALL command requests flushing of all IRQ data in this collection.
   1368 * Find the VCPU mapped to that collection, then iterate over the VM's list
   1369 * of mapped LPIs and update the configuration for each IRQ which targets
   1370 * the specified vcpu. The configuration will be read from the in-memory
   1371 * configuration table.
   1372 * Must be called with the its_lock mutex held.
   1373 */
   1374static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
   1375				      u64 *its_cmd)
   1376{
   1377	u32 coll_id = its_cmd_get_collection(its_cmd);
   1378	struct its_collection *collection;
   1379	struct kvm_vcpu *vcpu;
   1380
   1381	collection = find_collection(its, coll_id);
   1382	if (!its_is_collection_mapped(collection))
   1383		return E_ITS_INVALL_UNMAPPED_COLLECTION;
   1384
   1385	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
   1386	vgic_its_invall(vcpu);
   1387
   1388	return 0;
   1389}
   1390
   1391/*
   1392 * The MOVALL command moves the pending state of all IRQs targeting one
   1393 * redistributor to another. We don't hold the pending state in the VCPUs,
   1394 * but in the IRQs instead, so there is really not much to do for us here.
   1395 * However the spec says that no IRQ must target the old redistributor
   1396 * afterwards, so we make sure that no LPI is using the associated target_vcpu.
   1397 * This command affects all LPIs in the system that target that redistributor.
   1398 */
   1399static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
   1400				      u64 *its_cmd)
   1401{
   1402	u32 target1_addr = its_cmd_get_target_addr(its_cmd);
   1403	u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
   1404	struct kvm_vcpu *vcpu1, *vcpu2;
   1405	struct vgic_irq *irq;
   1406	u32 *intids;
   1407	int irq_count, i;
   1408
   1409	if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
   1410	    target2_addr >= atomic_read(&kvm->online_vcpus))
   1411		return E_ITS_MOVALL_PROCNUM_OOR;
   1412
   1413	if (target1_addr == target2_addr)
   1414		return 0;
   1415
   1416	vcpu1 = kvm_get_vcpu(kvm, target1_addr);
   1417	vcpu2 = kvm_get_vcpu(kvm, target2_addr);
   1418
   1419	irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
   1420	if (irq_count < 0)
   1421		return irq_count;
   1422
   1423	for (i = 0; i < irq_count; i++) {
   1424		irq = vgic_get_irq(kvm, NULL, intids[i]);
   1425
   1426		update_affinity(irq, vcpu2);
   1427
   1428		vgic_put_irq(kvm, irq);
   1429	}
   1430
   1431	vgic_its_invalidate_cache(kvm);
   1432
   1433	kfree(intids);
   1434	return 0;
   1435}
   1436
   1437/*
   1438 * The INT command injects the LPI associated with that DevID/EvID pair.
   1439 * Must be called with the its_lock mutex held.
   1440 */
   1441static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its,
   1442				   u64 *its_cmd)
   1443{
   1444	u32 msi_data = its_cmd_get_id(its_cmd);
   1445	u64 msi_devid = its_cmd_get_deviceid(its_cmd);
   1446
   1447	return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
   1448}
   1449
   1450/*
   1451 * This function is called with the its_cmd lock held, but the ITS data
   1452 * structure lock dropped.
   1453 */
   1454static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its,
   1455				   u64 *its_cmd)
   1456{
   1457	int ret = -ENODEV;
   1458
   1459	mutex_lock(&its->its_lock);
   1460	switch (its_cmd_get_command(its_cmd)) {
   1461	case GITS_CMD_MAPD:
   1462		ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd);
   1463		break;
   1464	case GITS_CMD_MAPC:
   1465		ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd);
   1466		break;
   1467	case GITS_CMD_MAPI:
   1468		ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd);
   1469		break;
   1470	case GITS_CMD_MAPTI:
   1471		ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd);
   1472		break;
   1473	case GITS_CMD_MOVI:
   1474		ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd);
   1475		break;
   1476	case GITS_CMD_DISCARD:
   1477		ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd);
   1478		break;
   1479	case GITS_CMD_CLEAR:
   1480		ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd);
   1481		break;
   1482	case GITS_CMD_MOVALL:
   1483		ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd);
   1484		break;
   1485	case GITS_CMD_INT:
   1486		ret = vgic_its_cmd_handle_int(kvm, its, its_cmd);
   1487		break;
   1488	case GITS_CMD_INV:
   1489		ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd);
   1490		break;
   1491	case GITS_CMD_INVALL:
   1492		ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd);
   1493		break;
   1494	case GITS_CMD_SYNC:
   1495		/* we ignore this command: we are in sync all of the time */
   1496		ret = 0;
   1497		break;
   1498	}
   1499	mutex_unlock(&its->its_lock);
   1500
   1501	return ret;
   1502}
   1503
   1504static u64 vgic_sanitise_its_baser(u64 reg)
   1505{
   1506	reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK,
   1507				  GITS_BASER_SHAREABILITY_SHIFT,
   1508				  vgic_sanitise_shareability);
   1509	reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK,
   1510				  GITS_BASER_INNER_CACHEABILITY_SHIFT,
   1511				  vgic_sanitise_inner_cacheability);
   1512	reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK,
   1513				  GITS_BASER_OUTER_CACHEABILITY_SHIFT,
   1514				  vgic_sanitise_outer_cacheability);
   1515
   1516	/* We support only one (ITS) page size: 64K */
   1517	reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K;
   1518
   1519	return reg;
   1520}
   1521
   1522static u64 vgic_sanitise_its_cbaser(u64 reg)
   1523{
   1524	reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK,
   1525				  GITS_CBASER_SHAREABILITY_SHIFT,
   1526				  vgic_sanitise_shareability);
   1527	reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK,
   1528				  GITS_CBASER_INNER_CACHEABILITY_SHIFT,
   1529				  vgic_sanitise_inner_cacheability);
   1530	reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK,
   1531				  GITS_CBASER_OUTER_CACHEABILITY_SHIFT,
   1532				  vgic_sanitise_outer_cacheability);
   1533
   1534	/* Sanitise the physical address to be 64k aligned. */
   1535	reg &= ~GENMASK_ULL(15, 12);
   1536
   1537	return reg;
   1538}
   1539
   1540static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm,
   1541					       struct vgic_its *its,
   1542					       gpa_t addr, unsigned int len)
   1543{
   1544	return extract_bytes(its->cbaser, addr & 7, len);
   1545}
   1546
   1547static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
   1548				       gpa_t addr, unsigned int len,
   1549				       unsigned long val)
   1550{
   1551	/* When GITS_CTLR.Enable is 1, this register is RO. */
   1552	if (its->enabled)
   1553		return;
   1554
   1555	mutex_lock(&its->cmd_lock);
   1556	its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val);
   1557	its->cbaser = vgic_sanitise_its_cbaser(its->cbaser);
   1558	its->creadr = 0;
   1559	/*
   1560	 * CWRITER is architecturally UNKNOWN on reset, but we need to reset
   1561	 * it to CREADR to make sure we start with an empty command buffer.
   1562	 */
   1563	its->cwriter = its->creadr;
   1564	mutex_unlock(&its->cmd_lock);
   1565}
   1566
   1567#define ITS_CMD_BUFFER_SIZE(baser)	((((baser) & 0xff) + 1) << 12)
   1568#define ITS_CMD_SIZE			32
   1569#define ITS_CMD_OFFSET(reg)		((reg) & GENMASK(19, 5))
   1570
   1571/* Must be called with the cmd_lock held. */
   1572static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
   1573{
   1574	gpa_t cbaser;
   1575	u64 cmd_buf[4];
   1576
   1577	/* Commands are only processed when the ITS is enabled. */
   1578	if (!its->enabled)
   1579		return;
   1580
   1581	cbaser = GITS_CBASER_ADDRESS(its->cbaser);
   1582
   1583	while (its->cwriter != its->creadr) {
   1584		int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
   1585					      cmd_buf, ITS_CMD_SIZE);
   1586		/*
   1587		 * If kvm_read_guest() fails, this could be due to the guest
   1588		 * programming a bogus value in CBASER or something else going
   1589		 * wrong from which we cannot easily recover.
   1590		 * According to section 6.3.2 in the GICv3 spec we can just
   1591		 * ignore that command then.
   1592		 */
   1593		if (!ret)
   1594			vgic_its_handle_command(kvm, its, cmd_buf);
   1595
   1596		its->creadr += ITS_CMD_SIZE;
   1597		if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
   1598			its->creadr = 0;
   1599	}
   1600}
   1601
   1602/*
   1603 * By writing to CWRITER the guest announces new commands to be processed.
   1604 * To avoid any races in the first place, we take the its_cmd lock, which
   1605 * protects our ring buffer variables, so that there is only one user
   1606 * per ITS handling commands at a given time.
   1607 */
   1608static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
   1609					gpa_t addr, unsigned int len,
   1610					unsigned long val)
   1611{
   1612	u64 reg;
   1613
   1614	if (!its)
   1615		return;
   1616
   1617	mutex_lock(&its->cmd_lock);
   1618
   1619	reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
   1620	reg = ITS_CMD_OFFSET(reg);
   1621	if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
   1622		mutex_unlock(&its->cmd_lock);
   1623		return;
   1624	}
   1625	its->cwriter = reg;
   1626
   1627	vgic_its_process_commands(kvm, its);
   1628
   1629	mutex_unlock(&its->cmd_lock);
   1630}
   1631
   1632static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm,
   1633						struct vgic_its *its,
   1634						gpa_t addr, unsigned int len)
   1635{
   1636	return extract_bytes(its->cwriter, addr & 0x7, len);
   1637}
   1638
   1639static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm,
   1640					       struct vgic_its *its,
   1641					       gpa_t addr, unsigned int len)
   1642{
   1643	return extract_bytes(its->creadr, addr & 0x7, len);
   1644}
   1645
   1646static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm,
   1647					      struct vgic_its *its,
   1648					      gpa_t addr, unsigned int len,
   1649					      unsigned long val)
   1650{
   1651	u32 cmd_offset;
   1652	int ret = 0;
   1653
   1654	mutex_lock(&its->cmd_lock);
   1655
   1656	if (its->enabled) {
   1657		ret = -EBUSY;
   1658		goto out;
   1659	}
   1660
   1661	cmd_offset = ITS_CMD_OFFSET(val);
   1662	if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
   1663		ret = -EINVAL;
   1664		goto out;
   1665	}
   1666
   1667	its->creadr = cmd_offset;
   1668out:
   1669	mutex_unlock(&its->cmd_lock);
   1670	return ret;
   1671}
   1672
   1673#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7)
   1674static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm,
   1675					      struct vgic_its *its,
   1676					      gpa_t addr, unsigned int len)
   1677{
   1678	u64 reg;
   1679
   1680	switch (BASER_INDEX(addr)) {
   1681	case 0:
   1682		reg = its->baser_device_table;
   1683		break;
   1684	case 1:
   1685		reg = its->baser_coll_table;
   1686		break;
   1687	default:
   1688		reg = 0;
   1689		break;
   1690	}
   1691
   1692	return extract_bytes(reg, addr & 7, len);
   1693}
   1694
   1695#define GITS_BASER_RO_MASK	(GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56))
   1696static void vgic_mmio_write_its_baser(struct kvm *kvm,
   1697				      struct vgic_its *its,
   1698				      gpa_t addr, unsigned int len,
   1699				      unsigned long val)
   1700{
   1701	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   1702	u64 entry_size, table_type;
   1703	u64 reg, *regptr, clearbits = 0;
   1704
   1705	/* When GITS_CTLR.Enable is 1, we ignore write accesses. */
   1706	if (its->enabled)
   1707		return;
   1708
   1709	switch (BASER_INDEX(addr)) {
   1710	case 0:
   1711		regptr = &its->baser_device_table;
   1712		entry_size = abi->dte_esz;
   1713		table_type = GITS_BASER_TYPE_DEVICE;
   1714		break;
   1715	case 1:
   1716		regptr = &its->baser_coll_table;
   1717		entry_size = abi->cte_esz;
   1718		table_type = GITS_BASER_TYPE_COLLECTION;
   1719		clearbits = GITS_BASER_INDIRECT;
   1720		break;
   1721	default:
   1722		return;
   1723	}
   1724
   1725	reg = update_64bit_reg(*regptr, addr & 7, len, val);
   1726	reg &= ~GITS_BASER_RO_MASK;
   1727	reg &= ~clearbits;
   1728
   1729	reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT;
   1730	reg |= table_type << GITS_BASER_TYPE_SHIFT;
   1731	reg = vgic_sanitise_its_baser(reg);
   1732
   1733	*regptr = reg;
   1734
   1735	if (!(reg & GITS_BASER_VALID)) {
   1736		/* Take the its_lock to prevent a race with a save/restore */
   1737		mutex_lock(&its->its_lock);
   1738		switch (table_type) {
   1739		case GITS_BASER_TYPE_DEVICE:
   1740			vgic_its_free_device_list(kvm, its);
   1741			break;
   1742		case GITS_BASER_TYPE_COLLECTION:
   1743			vgic_its_free_collection_list(kvm, its);
   1744			break;
   1745		}
   1746		mutex_unlock(&its->its_lock);
   1747	}
   1748}
   1749
   1750static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
   1751					     struct vgic_its *its,
   1752					     gpa_t addr, unsigned int len)
   1753{
   1754	u32 reg = 0;
   1755
   1756	mutex_lock(&its->cmd_lock);
   1757	if (its->creadr == its->cwriter)
   1758		reg |= GITS_CTLR_QUIESCENT;
   1759	if (its->enabled)
   1760		reg |= GITS_CTLR_ENABLE;
   1761	mutex_unlock(&its->cmd_lock);
   1762
   1763	return reg;
   1764}
   1765
   1766static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
   1767				     gpa_t addr, unsigned int len,
   1768				     unsigned long val)
   1769{
   1770	mutex_lock(&its->cmd_lock);
   1771
   1772	/*
   1773	 * It is UNPREDICTABLE to enable the ITS if any of the CBASER or
   1774	 * device/collection BASER are invalid
   1775	 */
   1776	if (!its->enabled && (val & GITS_CTLR_ENABLE) &&
   1777		(!(its->baser_device_table & GITS_BASER_VALID) ||
   1778		 !(its->baser_coll_table & GITS_BASER_VALID) ||
   1779		 !(its->cbaser & GITS_CBASER_VALID)))
   1780		goto out;
   1781
   1782	its->enabled = !!(val & GITS_CTLR_ENABLE);
   1783	if (!its->enabled)
   1784		vgic_its_invalidate_cache(kvm);
   1785
   1786	/*
   1787	 * Try to process any pending commands. This function bails out early
   1788	 * if the ITS is disabled or no commands have been queued.
   1789	 */
   1790	vgic_its_process_commands(kvm, its);
   1791
   1792out:
   1793	mutex_unlock(&its->cmd_lock);
   1794}
   1795
   1796#define REGISTER_ITS_DESC(off, rd, wr, length, acc)		\
   1797{								\
   1798	.reg_offset = off,					\
   1799	.len = length,						\
   1800	.access_flags = acc,					\
   1801	.its_read = rd,						\
   1802	.its_write = wr,					\
   1803}
   1804
   1805#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\
   1806{								\
   1807	.reg_offset = off,					\
   1808	.len = length,						\
   1809	.access_flags = acc,					\
   1810	.its_read = rd,						\
   1811	.its_write = wr,					\
   1812	.uaccess_its_write = uwr,				\
   1813}
   1814
   1815static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its,
   1816			      gpa_t addr, unsigned int len, unsigned long val)
   1817{
   1818	/* Ignore */
   1819}
   1820
   1821static struct vgic_register_region its_registers[] = {
   1822	REGISTER_ITS_DESC(GITS_CTLR,
   1823		vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4,
   1824		VGIC_ACCESS_32bit),
   1825	REGISTER_ITS_DESC_UACCESS(GITS_IIDR,
   1826		vgic_mmio_read_its_iidr, its_mmio_write_wi,
   1827		vgic_mmio_uaccess_write_its_iidr, 4,
   1828		VGIC_ACCESS_32bit),
   1829	REGISTER_ITS_DESC(GITS_TYPER,
   1830		vgic_mmio_read_its_typer, its_mmio_write_wi, 8,
   1831		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
   1832	REGISTER_ITS_DESC(GITS_CBASER,
   1833		vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8,
   1834		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
   1835	REGISTER_ITS_DESC(GITS_CWRITER,
   1836		vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8,
   1837		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
   1838	REGISTER_ITS_DESC_UACCESS(GITS_CREADR,
   1839		vgic_mmio_read_its_creadr, its_mmio_write_wi,
   1840		vgic_mmio_uaccess_write_its_creadr, 8,
   1841		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
   1842	REGISTER_ITS_DESC(GITS_BASER,
   1843		vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40,
   1844		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
   1845	REGISTER_ITS_DESC(GITS_IDREGS_BASE,
   1846		vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30,
   1847		VGIC_ACCESS_32bit),
   1848};
   1849
   1850/* This is called on setting the LPI enable bit in the redistributor. */
   1851void vgic_enable_lpis(struct kvm_vcpu *vcpu)
   1852{
   1853	if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ))
   1854		its_sync_lpi_pending_table(vcpu);
   1855}
   1856
   1857static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its,
   1858				   u64 addr)
   1859{
   1860	struct vgic_io_device *iodev = &its->iodev;
   1861	int ret;
   1862
   1863	mutex_lock(&kvm->slots_lock);
   1864	if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
   1865		ret = -EBUSY;
   1866		goto out;
   1867	}
   1868
   1869	its->vgic_its_base = addr;
   1870	iodev->regions = its_registers;
   1871	iodev->nr_regions = ARRAY_SIZE(its_registers);
   1872	kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops);
   1873
   1874	iodev->base_addr = its->vgic_its_base;
   1875	iodev->iodev_type = IODEV_ITS;
   1876	iodev->its = its;
   1877	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr,
   1878				      KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
   1879out:
   1880	mutex_unlock(&kvm->slots_lock);
   1881
   1882	return ret;
   1883}
   1884
   1885/* Default is 16 cached LPIs per vcpu */
   1886#define LPI_DEFAULT_PCPU_CACHE_SIZE	16
   1887
   1888void vgic_lpi_translation_cache_init(struct kvm *kvm)
   1889{
   1890	struct vgic_dist *dist = &kvm->arch.vgic;
   1891	unsigned int sz;
   1892	int i;
   1893
   1894	if (!list_empty(&dist->lpi_translation_cache))
   1895		return;
   1896
   1897	sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
   1898
   1899	for (i = 0; i < sz; i++) {
   1900		struct vgic_translation_cache_entry *cte;
   1901
   1902		/* An allocation failure is not fatal */
   1903		cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT);
   1904		if (WARN_ON(!cte))
   1905			break;
   1906
   1907		INIT_LIST_HEAD(&cte->entry);
   1908		list_add(&cte->entry, &dist->lpi_translation_cache);
   1909	}
   1910}
   1911
   1912void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
   1913{
   1914	struct vgic_dist *dist = &kvm->arch.vgic;
   1915	struct vgic_translation_cache_entry *cte, *tmp;
   1916
   1917	vgic_its_invalidate_cache(kvm);
   1918
   1919	list_for_each_entry_safe(cte, tmp,
   1920				 &dist->lpi_translation_cache, entry) {
   1921		list_del(&cte->entry);
   1922		kfree(cte);
   1923	}
   1924}
   1925
   1926#define INITIAL_BASER_VALUE						  \
   1927	(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)		| \
   1928	 GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner)		| \
   1929	 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)		| \
   1930	 GITS_BASER_PAGE_SIZE_64K)
   1931
   1932#define INITIAL_PROPBASER_VALUE						  \
   1933	(GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)		| \
   1934	 GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner)	| \
   1935	 GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable))
   1936
   1937static int vgic_its_create(struct kvm_device *dev, u32 type)
   1938{
   1939	struct vgic_its *its;
   1940
   1941	if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
   1942		return -ENODEV;
   1943
   1944	its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT);
   1945	if (!its)
   1946		return -ENOMEM;
   1947
   1948	if (vgic_initialized(dev->kvm)) {
   1949		int ret = vgic_v4_init(dev->kvm);
   1950		if (ret < 0) {
   1951			kfree(its);
   1952			return ret;
   1953		}
   1954
   1955		vgic_lpi_translation_cache_init(dev->kvm);
   1956	}
   1957
   1958	mutex_init(&its->its_lock);
   1959	mutex_init(&its->cmd_lock);
   1960
   1961	its->vgic_its_base = VGIC_ADDR_UNDEF;
   1962
   1963	INIT_LIST_HEAD(&its->device_list);
   1964	INIT_LIST_HEAD(&its->collection_list);
   1965
   1966	dev->kvm->arch.vgic.msis_require_devid = true;
   1967	dev->kvm->arch.vgic.has_its = true;
   1968	its->enabled = false;
   1969	its->dev = dev;
   1970
   1971	its->baser_device_table = INITIAL_BASER_VALUE			|
   1972		((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT);
   1973	its->baser_coll_table = INITIAL_BASER_VALUE |
   1974		((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT);
   1975	dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE;
   1976
   1977	dev->private = its;
   1978
   1979	return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
   1980}
   1981
   1982static void vgic_its_destroy(struct kvm_device *kvm_dev)
   1983{
   1984	struct kvm *kvm = kvm_dev->kvm;
   1985	struct vgic_its *its = kvm_dev->private;
   1986
   1987	mutex_lock(&its->its_lock);
   1988
   1989	vgic_its_free_device_list(kvm, its);
   1990	vgic_its_free_collection_list(kvm, its);
   1991
   1992	mutex_unlock(&its->its_lock);
   1993	kfree(its);
   1994	kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */
   1995}
   1996
   1997static int vgic_its_has_attr_regs(struct kvm_device *dev,
   1998				  struct kvm_device_attr *attr)
   1999{
   2000	const struct vgic_register_region *region;
   2001	gpa_t offset = attr->attr;
   2002	int align;
   2003
   2004	align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7;
   2005
   2006	if (offset & align)
   2007		return -EINVAL;
   2008
   2009	region = vgic_find_mmio_region(its_registers,
   2010				       ARRAY_SIZE(its_registers),
   2011				       offset);
   2012	if (!region)
   2013		return -ENXIO;
   2014
   2015	return 0;
   2016}
   2017
   2018static int vgic_its_attr_regs_access(struct kvm_device *dev,
   2019				     struct kvm_device_attr *attr,
   2020				     u64 *reg, bool is_write)
   2021{
   2022	const struct vgic_register_region *region;
   2023	struct vgic_its *its;
   2024	gpa_t addr, offset;
   2025	unsigned int len;
   2026	int align, ret = 0;
   2027
   2028	its = dev->private;
   2029	offset = attr->attr;
   2030
   2031	/*
   2032	 * Although the spec supports upper/lower 32-bit accesses to
   2033	 * 64-bit ITS registers, the userspace ABI requires 64-bit
   2034	 * accesses to all 64-bit wide registers. We therefore only
   2035	 * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID
   2036	 * registers
   2037	 */
   2038	if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4))
   2039		align = 0x3;
   2040	else
   2041		align = 0x7;
   2042
   2043	if (offset & align)
   2044		return -EINVAL;
   2045
   2046	mutex_lock(&dev->kvm->lock);
   2047
   2048	if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
   2049		ret = -ENXIO;
   2050		goto out;
   2051	}
   2052
   2053	region = vgic_find_mmio_region(its_registers,
   2054				       ARRAY_SIZE(its_registers),
   2055				       offset);
   2056	if (!region) {
   2057		ret = -ENXIO;
   2058		goto out;
   2059	}
   2060
   2061	if (!lock_all_vcpus(dev->kvm)) {
   2062		ret = -EBUSY;
   2063		goto out;
   2064	}
   2065
   2066	addr = its->vgic_its_base + offset;
   2067
   2068	len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4;
   2069
   2070	if (is_write) {
   2071		if (region->uaccess_its_write)
   2072			ret = region->uaccess_its_write(dev->kvm, its, addr,
   2073							len, *reg);
   2074		else
   2075			region->its_write(dev->kvm, its, addr, len, *reg);
   2076	} else {
   2077		*reg = region->its_read(dev->kvm, its, addr, len);
   2078	}
   2079	unlock_all_vcpus(dev->kvm);
   2080out:
   2081	mutex_unlock(&dev->kvm->lock);
   2082	return ret;
   2083}
   2084
   2085static u32 compute_next_devid_offset(struct list_head *h,
   2086				     struct its_device *dev)
   2087{
   2088	struct its_device *next;
   2089	u32 next_offset;
   2090
   2091	if (list_is_last(&dev->dev_list, h))
   2092		return 0;
   2093	next = list_next_entry(dev, dev_list);
   2094	next_offset = next->device_id - dev->device_id;
   2095
   2096	return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET);
   2097}
   2098
   2099static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
   2100{
   2101	struct its_ite *next;
   2102	u32 next_offset;
   2103
   2104	if (list_is_last(&ite->ite_list, h))
   2105		return 0;
   2106	next = list_next_entry(ite, ite_list);
   2107	next_offset = next->event_id - ite->event_id;
   2108
   2109	return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET);
   2110}
   2111
   2112/**
   2113 * entry_fn_t - Callback called on a table entry restore path
   2114 * @its: its handle
   2115 * @id: id of the entry
   2116 * @entry: pointer to the entry
   2117 * @opaque: pointer to an opaque data
   2118 *
   2119 * Return: < 0 on error, 0 if last element was identified, id offset to next
   2120 * element otherwise
   2121 */
   2122typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
   2123			  void *opaque);
   2124
   2125/**
   2126 * scan_its_table - Scan a contiguous table in guest RAM and applies a function
   2127 * to each entry
   2128 *
   2129 * @its: its handle
   2130 * @base: base gpa of the table
   2131 * @size: size of the table in bytes
   2132 * @esz: entry size in bytes
   2133 * @start_id: the ID of the first entry in the table
   2134 * (non zero for 2d level tables)
   2135 * @fn: function to apply on each entry
   2136 *
   2137 * Return: < 0 on error, 0 if last element was identified, 1 otherwise
   2138 * (the last element may not be found on second level tables)
   2139 */
   2140static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
   2141			  int start_id, entry_fn_t fn, void *opaque)
   2142{
   2143	struct kvm *kvm = its->dev->kvm;
   2144	unsigned long len = size;
   2145	int id = start_id;
   2146	gpa_t gpa = base;
   2147	char entry[ESZ_MAX];
   2148	int ret;
   2149
   2150	memset(entry, 0, esz);
   2151
   2152	while (len > 0) {
   2153		int next_offset;
   2154		size_t byte_offset;
   2155
   2156		ret = kvm_read_guest_lock(kvm, gpa, entry, esz);
   2157		if (ret)
   2158			return ret;
   2159
   2160		next_offset = fn(its, id, entry, opaque);
   2161		if (next_offset <= 0)
   2162			return next_offset;
   2163
   2164		byte_offset = next_offset * esz;
   2165		id += next_offset;
   2166		gpa += byte_offset;
   2167		len -= byte_offset;
   2168	}
   2169	return 1;
   2170}
   2171
   2172/**
   2173 * vgic_its_save_ite - Save an interrupt translation entry at @gpa
   2174 */
   2175static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
   2176			      struct its_ite *ite, gpa_t gpa, int ite_esz)
   2177{
   2178	struct kvm *kvm = its->dev->kvm;
   2179	u32 next_offset;
   2180	u64 val;
   2181
   2182	next_offset = compute_next_eventid_offset(&dev->itt_head, ite);
   2183	val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) |
   2184	       ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
   2185		ite->collection->collection_id;
   2186	val = cpu_to_le64(val);
   2187	return kvm_write_guest_lock(kvm, gpa, &val, ite_esz);
   2188}
   2189
   2190/**
   2191 * vgic_its_restore_ite - restore an interrupt translation entry
   2192 * @event_id: id used for indexing
   2193 * @ptr: pointer to the ITE entry
   2194 * @opaque: pointer to the its_device
   2195 */
   2196static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
   2197				void *ptr, void *opaque)
   2198{
   2199	struct its_device *dev = opaque;
   2200	struct its_collection *collection;
   2201	struct kvm *kvm = its->dev->kvm;
   2202	struct kvm_vcpu *vcpu = NULL;
   2203	u64 val;
   2204	u64 *p = (u64 *)ptr;
   2205	struct vgic_irq *irq;
   2206	u32 coll_id, lpi_id;
   2207	struct its_ite *ite;
   2208	u32 offset;
   2209
   2210	val = *p;
   2211
   2212	val = le64_to_cpu(val);
   2213
   2214	coll_id = val & KVM_ITS_ITE_ICID_MASK;
   2215	lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT;
   2216
   2217	if (!lpi_id)
   2218		return 1; /* invalid entry, no choice but to scan next entry */
   2219
   2220	if (lpi_id < VGIC_MIN_LPI)
   2221		return -EINVAL;
   2222
   2223	offset = val >> KVM_ITS_ITE_NEXT_SHIFT;
   2224	if (event_id + offset >= BIT_ULL(dev->num_eventid_bits))
   2225		return -EINVAL;
   2226
   2227	collection = find_collection(its, coll_id);
   2228	if (!collection)
   2229		return -EINVAL;
   2230
   2231	if (!vgic_its_check_event_id(its, dev, event_id))
   2232		return -EINVAL;
   2233
   2234	ite = vgic_its_alloc_ite(dev, collection, event_id);
   2235	if (IS_ERR(ite))
   2236		return PTR_ERR(ite);
   2237
   2238	if (its_is_collection_mapped(collection))
   2239		vcpu = kvm_get_vcpu(kvm, collection->target_addr);
   2240
   2241	irq = vgic_add_lpi(kvm, lpi_id, vcpu);
   2242	if (IS_ERR(irq)) {
   2243		its_free_ite(kvm, ite);
   2244		return PTR_ERR(irq);
   2245	}
   2246	ite->irq = irq;
   2247
   2248	return offset;
   2249}
   2250
   2251static int vgic_its_ite_cmp(void *priv, const struct list_head *a,
   2252			    const struct list_head *b)
   2253{
   2254	struct its_ite *itea = container_of(a, struct its_ite, ite_list);
   2255	struct its_ite *iteb = container_of(b, struct its_ite, ite_list);
   2256
   2257	if (itea->event_id < iteb->event_id)
   2258		return -1;
   2259	else
   2260		return 1;
   2261}
   2262
   2263static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
   2264{
   2265	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2266	gpa_t base = device->itt_addr;
   2267	struct its_ite *ite;
   2268	int ret;
   2269	int ite_esz = abi->ite_esz;
   2270
   2271	list_sort(NULL, &device->itt_head, vgic_its_ite_cmp);
   2272
   2273	list_for_each_entry(ite, &device->itt_head, ite_list) {
   2274		gpa_t gpa = base + ite->event_id * ite_esz;
   2275
   2276		/*
   2277		 * If an LPI carries the HW bit, this means that this
   2278		 * interrupt is controlled by GICv4, and we do not
   2279		 * have direct access to that state without GICv4.1.
   2280		 * Let's simply fail the save operation...
   2281		 */
   2282		if (ite->irq->hw && !kvm_vgic_global_state.has_gicv4_1)
   2283			return -EACCES;
   2284
   2285		ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
   2286		if (ret)
   2287			return ret;
   2288	}
   2289	return 0;
   2290}
   2291
   2292/**
   2293 * vgic_its_restore_itt - restore the ITT of a device
   2294 *
   2295 * @its: its handle
   2296 * @dev: device handle
   2297 *
   2298 * Return 0 on success, < 0 on error
   2299 */
   2300static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
   2301{
   2302	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2303	gpa_t base = dev->itt_addr;
   2304	int ret;
   2305	int ite_esz = abi->ite_esz;
   2306	size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz;
   2307
   2308	ret = scan_its_table(its, base, max_size, ite_esz, 0,
   2309			     vgic_its_restore_ite, dev);
   2310
   2311	/* scan_its_table returns +1 if all ITEs are invalid */
   2312	if (ret > 0)
   2313		ret = 0;
   2314
   2315	return ret;
   2316}
   2317
   2318/**
   2319 * vgic_its_save_dte - Save a device table entry at a given GPA
   2320 *
   2321 * @its: ITS handle
   2322 * @dev: ITS device
   2323 * @ptr: GPA
   2324 */
   2325static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
   2326			     gpa_t ptr, int dte_esz)
   2327{
   2328	struct kvm *kvm = its->dev->kvm;
   2329	u64 val, itt_addr_field;
   2330	u32 next_offset;
   2331
   2332	itt_addr_field = dev->itt_addr >> 8;
   2333	next_offset = compute_next_devid_offset(&its->device_list, dev);
   2334	val = (1ULL << KVM_ITS_DTE_VALID_SHIFT |
   2335	       ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) |
   2336	       (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
   2337		(dev->num_eventid_bits - 1));
   2338	val = cpu_to_le64(val);
   2339	return kvm_write_guest_lock(kvm, ptr, &val, dte_esz);
   2340}
   2341
   2342/**
   2343 * vgic_its_restore_dte - restore a device table entry
   2344 *
   2345 * @its: its handle
   2346 * @id: device id the DTE corresponds to
   2347 * @ptr: kernel VA where the 8 byte DTE is located
   2348 * @opaque: unused
   2349 *
   2350 * Return: < 0 on error, 0 if the dte is the last one, id offset to the
   2351 * next dte otherwise
   2352 */
   2353static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
   2354				void *ptr, void *opaque)
   2355{
   2356	struct its_device *dev;
   2357	u64 baser = its->baser_device_table;
   2358	gpa_t itt_addr;
   2359	u8 num_eventid_bits;
   2360	u64 entry = *(u64 *)ptr;
   2361	bool valid;
   2362	u32 offset;
   2363	int ret;
   2364
   2365	entry = le64_to_cpu(entry);
   2366
   2367	valid = entry >> KVM_ITS_DTE_VALID_SHIFT;
   2368	num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1;
   2369	itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK)
   2370			>> KVM_ITS_DTE_ITTADDR_SHIFT) << 8;
   2371
   2372	if (!valid)
   2373		return 1;
   2374
   2375	/* dte entry is valid */
   2376	offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
   2377
   2378	if (!vgic_its_check_id(its, baser, id, NULL))
   2379		return -EINVAL;
   2380
   2381	dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
   2382	if (IS_ERR(dev))
   2383		return PTR_ERR(dev);
   2384
   2385	ret = vgic_its_restore_itt(its, dev);
   2386	if (ret) {
   2387		vgic_its_free_device(its->dev->kvm, dev);
   2388		return ret;
   2389	}
   2390
   2391	return offset;
   2392}
   2393
   2394static int vgic_its_device_cmp(void *priv, const struct list_head *a,
   2395			       const struct list_head *b)
   2396{
   2397	struct its_device *deva = container_of(a, struct its_device, dev_list);
   2398	struct its_device *devb = container_of(b, struct its_device, dev_list);
   2399
   2400	if (deva->device_id < devb->device_id)
   2401		return -1;
   2402	else
   2403		return 1;
   2404}
   2405
   2406/**
   2407 * vgic_its_save_device_tables - Save the device table and all ITT
   2408 * into guest RAM
   2409 *
   2410 * L1/L2 handling is hidden by vgic_its_check_id() helper which directly
   2411 * returns the GPA of the device entry
   2412 */
   2413static int vgic_its_save_device_tables(struct vgic_its *its)
   2414{
   2415	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2416	u64 baser = its->baser_device_table;
   2417	struct its_device *dev;
   2418	int dte_esz = abi->dte_esz;
   2419
   2420	if (!(baser & GITS_BASER_VALID))
   2421		return 0;
   2422
   2423	list_sort(NULL, &its->device_list, vgic_its_device_cmp);
   2424
   2425	list_for_each_entry(dev, &its->device_list, dev_list) {
   2426		int ret;
   2427		gpa_t eaddr;
   2428
   2429		if (!vgic_its_check_id(its, baser,
   2430				       dev->device_id, &eaddr))
   2431			return -EINVAL;
   2432
   2433		ret = vgic_its_save_itt(its, dev);
   2434		if (ret)
   2435			return ret;
   2436
   2437		ret = vgic_its_save_dte(its, dev, eaddr, dte_esz);
   2438		if (ret)
   2439			return ret;
   2440	}
   2441	return 0;
   2442}
   2443
   2444/**
   2445 * handle_l1_dte - callback used for L1 device table entries (2 stage case)
   2446 *
   2447 * @its: its handle
   2448 * @id: index of the entry in the L1 table
   2449 * @addr: kernel VA
   2450 * @opaque: unused
   2451 *
   2452 * L1 table entries are scanned by steps of 1 entry
   2453 * Return < 0 if error, 0 if last dte was found when scanning the L2
   2454 * table, +1 otherwise (meaning next L1 entry must be scanned)
   2455 */
   2456static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr,
   2457			 void *opaque)
   2458{
   2459	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2460	int l2_start_id = id * (SZ_64K / abi->dte_esz);
   2461	u64 entry = *(u64 *)addr;
   2462	int dte_esz = abi->dte_esz;
   2463	gpa_t gpa;
   2464	int ret;
   2465
   2466	entry = le64_to_cpu(entry);
   2467
   2468	if (!(entry & KVM_ITS_L1E_VALID_MASK))
   2469		return 1;
   2470
   2471	gpa = entry & KVM_ITS_L1E_ADDR_MASK;
   2472
   2473	ret = scan_its_table(its, gpa, SZ_64K, dte_esz,
   2474			     l2_start_id, vgic_its_restore_dte, NULL);
   2475
   2476	return ret;
   2477}
   2478
   2479/**
   2480 * vgic_its_restore_device_tables - Restore the device table and all ITT
   2481 * from guest RAM to internal data structs
   2482 */
   2483static int vgic_its_restore_device_tables(struct vgic_its *its)
   2484{
   2485	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2486	u64 baser = its->baser_device_table;
   2487	int l1_esz, ret;
   2488	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
   2489	gpa_t l1_gpa;
   2490
   2491	if (!(baser & GITS_BASER_VALID))
   2492		return 0;
   2493
   2494	l1_gpa = GITS_BASER_ADDR_48_to_52(baser);
   2495
   2496	if (baser & GITS_BASER_INDIRECT) {
   2497		l1_esz = GITS_LVL1_ENTRY_SIZE;
   2498		ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
   2499				     handle_l1_dte, NULL);
   2500	} else {
   2501		l1_esz = abi->dte_esz;
   2502		ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
   2503				     vgic_its_restore_dte, NULL);
   2504	}
   2505
   2506	/* scan_its_table returns +1 if all entries are invalid */
   2507	if (ret > 0)
   2508		ret = 0;
   2509
   2510	if (ret < 0)
   2511		vgic_its_free_device_list(its->dev->kvm, its);
   2512
   2513	return ret;
   2514}
   2515
   2516static int vgic_its_save_cte(struct vgic_its *its,
   2517			     struct its_collection *collection,
   2518			     gpa_t gpa, int esz)
   2519{
   2520	u64 val;
   2521
   2522	val = (1ULL << KVM_ITS_CTE_VALID_SHIFT |
   2523	       ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
   2524	       collection->collection_id);
   2525	val = cpu_to_le64(val);
   2526	return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz);
   2527}
   2528
   2529/*
   2530 * Restore a collection entry into the ITS collection table.
   2531 * Return +1 on success, 0 if the entry was invalid (which should be
   2532 * interpreted as end-of-table), and a negative error value for generic errors.
   2533 */
   2534static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
   2535{
   2536	struct its_collection *collection;
   2537	struct kvm *kvm = its->dev->kvm;
   2538	u32 target_addr, coll_id;
   2539	u64 val;
   2540	int ret;
   2541
   2542	BUG_ON(esz > sizeof(val));
   2543	ret = kvm_read_guest_lock(kvm, gpa, &val, esz);
   2544	if (ret)
   2545		return ret;
   2546	val = le64_to_cpu(val);
   2547	if (!(val & KVM_ITS_CTE_VALID_MASK))
   2548		return 0;
   2549
   2550	target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT);
   2551	coll_id = val & KVM_ITS_CTE_ICID_MASK;
   2552
   2553	if (target_addr != COLLECTION_NOT_MAPPED &&
   2554	    target_addr >= atomic_read(&kvm->online_vcpus))
   2555		return -EINVAL;
   2556
   2557	collection = find_collection(its, coll_id);
   2558	if (collection)
   2559		return -EEXIST;
   2560
   2561	if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
   2562		return -EINVAL;
   2563
   2564	ret = vgic_its_alloc_collection(its, &collection, coll_id);
   2565	if (ret)
   2566		return ret;
   2567	collection->target_addr = target_addr;
   2568	return 1;
   2569}
   2570
   2571/**
   2572 * vgic_its_save_collection_table - Save the collection table into
   2573 * guest RAM
   2574 */
   2575static int vgic_its_save_collection_table(struct vgic_its *its)
   2576{
   2577	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2578	u64 baser = its->baser_coll_table;
   2579	gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser);
   2580	struct its_collection *collection;
   2581	u64 val;
   2582	size_t max_size, filled = 0;
   2583	int ret, cte_esz = abi->cte_esz;
   2584
   2585	if (!(baser & GITS_BASER_VALID))
   2586		return 0;
   2587
   2588	max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
   2589
   2590	list_for_each_entry(collection, &its->collection_list, coll_list) {
   2591		ret = vgic_its_save_cte(its, collection, gpa, cte_esz);
   2592		if (ret)
   2593			return ret;
   2594		gpa += cte_esz;
   2595		filled += cte_esz;
   2596	}
   2597
   2598	if (filled == max_size)
   2599		return 0;
   2600
   2601	/*
   2602	 * table is not fully filled, add a last dummy element
   2603	 * with valid bit unset
   2604	 */
   2605	val = 0;
   2606	BUG_ON(cte_esz > sizeof(val));
   2607	ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz);
   2608	return ret;
   2609}
   2610
   2611/**
   2612 * vgic_its_restore_collection_table - reads the collection table
   2613 * in guest memory and restores the ITS internal state. Requires the
   2614 * BASER registers to be restored before.
   2615 */
   2616static int vgic_its_restore_collection_table(struct vgic_its *its)
   2617{
   2618	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2619	u64 baser = its->baser_coll_table;
   2620	int cte_esz = abi->cte_esz;
   2621	size_t max_size, read = 0;
   2622	gpa_t gpa;
   2623	int ret;
   2624
   2625	if (!(baser & GITS_BASER_VALID))
   2626		return 0;
   2627
   2628	gpa = GITS_BASER_ADDR_48_to_52(baser);
   2629
   2630	max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
   2631
   2632	while (read < max_size) {
   2633		ret = vgic_its_restore_cte(its, gpa, cte_esz);
   2634		if (ret <= 0)
   2635			break;
   2636		gpa += cte_esz;
   2637		read += cte_esz;
   2638	}
   2639
   2640	if (ret > 0)
   2641		return 0;
   2642
   2643	if (ret < 0)
   2644		vgic_its_free_collection_list(its->dev->kvm, its);
   2645
   2646	return ret;
   2647}
   2648
   2649/**
   2650 * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM
   2651 * according to v0 ABI
   2652 */
   2653static int vgic_its_save_tables_v0(struct vgic_its *its)
   2654{
   2655	int ret;
   2656
   2657	ret = vgic_its_save_device_tables(its);
   2658	if (ret)
   2659		return ret;
   2660
   2661	return vgic_its_save_collection_table(its);
   2662}
   2663
   2664/**
   2665 * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM
   2666 * to internal data structs according to V0 ABI
   2667 *
   2668 */
   2669static int vgic_its_restore_tables_v0(struct vgic_its *its)
   2670{
   2671	int ret;
   2672
   2673	ret = vgic_its_restore_collection_table(its);
   2674	if (ret)
   2675		return ret;
   2676
   2677	ret = vgic_its_restore_device_tables(its);
   2678	if (ret)
   2679		vgic_its_free_collection_list(its->dev->kvm, its);
   2680	return ret;
   2681}
   2682
   2683static int vgic_its_commit_v0(struct vgic_its *its)
   2684{
   2685	const struct vgic_its_abi *abi;
   2686
   2687	abi = vgic_its_get_abi(its);
   2688	its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
   2689	its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
   2690
   2691	its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5)
   2692					<< GITS_BASER_ENTRY_SIZE_SHIFT);
   2693
   2694	its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5)
   2695					<< GITS_BASER_ENTRY_SIZE_SHIFT);
   2696	return 0;
   2697}
   2698
   2699static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its)
   2700{
   2701	/* We need to keep the ABI specific field values */
   2702	its->baser_coll_table &= ~GITS_BASER_VALID;
   2703	its->baser_device_table &= ~GITS_BASER_VALID;
   2704	its->cbaser = 0;
   2705	its->creadr = 0;
   2706	its->cwriter = 0;
   2707	its->enabled = 0;
   2708	vgic_its_free_device_list(kvm, its);
   2709	vgic_its_free_collection_list(kvm, its);
   2710}
   2711
   2712static int vgic_its_has_attr(struct kvm_device *dev,
   2713			     struct kvm_device_attr *attr)
   2714{
   2715	switch (attr->group) {
   2716	case KVM_DEV_ARM_VGIC_GRP_ADDR:
   2717		switch (attr->attr) {
   2718		case KVM_VGIC_ITS_ADDR_TYPE:
   2719			return 0;
   2720		}
   2721		break;
   2722	case KVM_DEV_ARM_VGIC_GRP_CTRL:
   2723		switch (attr->attr) {
   2724		case KVM_DEV_ARM_VGIC_CTRL_INIT:
   2725			return 0;
   2726		case KVM_DEV_ARM_ITS_CTRL_RESET:
   2727			return 0;
   2728		case KVM_DEV_ARM_ITS_SAVE_TABLES:
   2729			return 0;
   2730		case KVM_DEV_ARM_ITS_RESTORE_TABLES:
   2731			return 0;
   2732		}
   2733		break;
   2734	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS:
   2735		return vgic_its_has_attr_regs(dev, attr);
   2736	}
   2737	return -ENXIO;
   2738}
   2739
   2740static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
   2741{
   2742	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
   2743	int ret = 0;
   2744
   2745	if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
   2746		return 0;
   2747
   2748	mutex_lock(&kvm->lock);
   2749	mutex_lock(&its->its_lock);
   2750
   2751	if (!lock_all_vcpus(kvm)) {
   2752		mutex_unlock(&its->its_lock);
   2753		mutex_unlock(&kvm->lock);
   2754		return -EBUSY;
   2755	}
   2756
   2757	switch (attr) {
   2758	case KVM_DEV_ARM_ITS_CTRL_RESET:
   2759		vgic_its_reset(kvm, its);
   2760		break;
   2761	case KVM_DEV_ARM_ITS_SAVE_TABLES:
   2762		ret = abi->save_tables(its);
   2763		break;
   2764	case KVM_DEV_ARM_ITS_RESTORE_TABLES:
   2765		ret = abi->restore_tables(its);
   2766		break;
   2767	}
   2768
   2769	unlock_all_vcpus(kvm);
   2770	mutex_unlock(&its->its_lock);
   2771	mutex_unlock(&kvm->lock);
   2772	return ret;
   2773}
   2774
   2775static int vgic_its_set_attr(struct kvm_device *dev,
   2776			     struct kvm_device_attr *attr)
   2777{
   2778	struct vgic_its *its = dev->private;
   2779	int ret;
   2780
   2781	switch (attr->group) {
   2782	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
   2783		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
   2784		unsigned long type = (unsigned long)attr->attr;
   2785		u64 addr;
   2786
   2787		if (type != KVM_VGIC_ITS_ADDR_TYPE)
   2788			return -ENODEV;
   2789
   2790		if (copy_from_user(&addr, uaddr, sizeof(addr)))
   2791			return -EFAULT;
   2792
   2793		ret = vgic_check_iorange(dev->kvm, its->vgic_its_base,
   2794					 addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE);
   2795		if (ret)
   2796			return ret;
   2797
   2798		return vgic_register_its_iodev(dev->kvm, its, addr);
   2799	}
   2800	case KVM_DEV_ARM_VGIC_GRP_CTRL:
   2801		return vgic_its_ctrl(dev->kvm, its, attr->attr);
   2802	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
   2803		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
   2804		u64 reg;
   2805
   2806		if (get_user(reg, uaddr))
   2807			return -EFAULT;
   2808
   2809		return vgic_its_attr_regs_access(dev, attr, &reg, true);
   2810	}
   2811	}
   2812	return -ENXIO;
   2813}
   2814
   2815static int vgic_its_get_attr(struct kvm_device *dev,
   2816			     struct kvm_device_attr *attr)
   2817{
   2818	switch (attr->group) {
   2819	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
   2820		struct vgic_its *its = dev->private;
   2821		u64 addr = its->vgic_its_base;
   2822		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
   2823		unsigned long type = (unsigned long)attr->attr;
   2824
   2825		if (type != KVM_VGIC_ITS_ADDR_TYPE)
   2826			return -ENODEV;
   2827
   2828		if (copy_to_user(uaddr, &addr, sizeof(addr)))
   2829			return -EFAULT;
   2830		break;
   2831	}
   2832	case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
   2833		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
   2834		u64 reg;
   2835		int ret;
   2836
   2837		ret = vgic_its_attr_regs_access(dev, attr, &reg, false);
   2838		if (ret)
   2839			return ret;
   2840		return put_user(reg, uaddr);
   2841	}
   2842	default:
   2843		return -ENXIO;
   2844	}
   2845
   2846	return 0;
   2847}
   2848
   2849static struct kvm_device_ops kvm_arm_vgic_its_ops = {
   2850	.name = "kvm-arm-vgic-its",
   2851	.create = vgic_its_create,
   2852	.destroy = vgic_its_destroy,
   2853	.set_attr = vgic_its_set_attr,
   2854	.get_attr = vgic_its_get_attr,
   2855	.has_attr = vgic_its_has_attr,
   2856};
   2857
   2858int kvm_vgic_register_its_device(void)
   2859{
   2860	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
   2861				       KVM_DEV_TYPE_ARM_VGIC_ITS);
   2862}