cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

book3s_xics.c (38014B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright 2012 Michael Ellerman, IBM Corporation.
      4 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
      5 */
      6
      7#include <linux/kernel.h>
      8#include <linux/kvm_host.h>
      9#include <linux/err.h>
     10#include <linux/gfp.h>
     11#include <linux/anon_inodes.h>
     12#include <linux/spinlock.h>
     13#include <linux/debugfs.h>
     14#include <linux/uaccess.h>
     15
     16#include <asm/kvm_book3s.h>
     17#include <asm/kvm_ppc.h>
     18#include <asm/hvcall.h>
     19#include <asm/xics.h>
     20#include <asm/time.h>
     21
     22#include <linux/seq_file.h>
     23
     24#include "book3s_xics.h"
     25
     26#if 1
     27#define XICS_DBG(fmt...) do { } while (0)
     28#else
     29#define XICS_DBG(fmt...) trace_printk(fmt)
     30#endif
     31
     32#define ENABLE_REALMODE	true
     33#define DEBUG_REALMODE	false
     34
     35/*
     36 * LOCKING
     37 * =======
     38 *
     39 * Each ICS has a spin lock protecting the information about the IRQ
     40 * sources and avoiding simultaneous deliveries of the same interrupt.
     41 *
     42 * ICP operations are done via a single compare & swap transaction
     43 * (most ICP state fits in the union kvmppc_icp_state)
     44 */
     45
     46/*
     47 * TODO
     48 * ====
     49 *
     50 * - To speed up resends, keep a bitmap of "resend" set bits in the
     51 *   ICS
     52 *
     53 * - Speed up server# -> ICP lookup (array ? hash table ?)
     54 *
     55 * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
     56 *   locks array to improve scalability
     57 */
     58
     59/* -- ICS routines -- */
     60
     61static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
     62			    u32 new_irq, bool check_resend);
     63
     64/*
     65 * Return value ideally indicates how the interrupt was handled, but no
     66 * callers look at it (given that we don't implement KVM_IRQ_LINE_STATUS),
     67 * so just return 0.
     68 */
     69static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
     70{
     71	struct ics_irq_state *state;
     72	struct kvmppc_ics *ics;
     73	u16 src;
     74	u32 pq_old, pq_new;
     75
     76	XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
     77
     78	ics = kvmppc_xics_find_ics(xics, irq, &src);
     79	if (!ics) {
     80		XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
     81		return -EINVAL;
     82	}
     83	state = &ics->irq_state[src];
     84	if (!state->exists)
     85		return -EINVAL;
     86
     87	if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET)
     88		level = 1;
     89	else if (level == KVM_INTERRUPT_UNSET)
     90		level = 0;
     91	/*
     92	 * Take other values the same as 1, consistent with original code.
     93	 * maybe WARN here?
     94	 */
     95
     96	if (!state->lsi && level == 0) /* noop for MSI */
     97		return 0;
     98
     99	do {
    100		pq_old = state->pq_state;
    101		if (state->lsi) {
    102			if (level) {
    103				if (pq_old & PQ_PRESENTED)
    104					/* Setting already set LSI ... */
    105					return 0;
    106
    107				pq_new = PQ_PRESENTED;
    108			} else
    109				pq_new = 0;
    110		} else
    111			pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
    112	} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
    113
    114	/* Test P=1, Q=0, this is the only case where we present */
    115	if (pq_new == PQ_PRESENTED)
    116		icp_deliver_irq(xics, NULL, irq, false);
    117
    118	/* Record which CPU this arrived on for passed-through interrupts */
    119	if (state->host_irq)
    120		state->intr_cpu = raw_smp_processor_id();
    121
    122	return 0;
    123}
    124
    125static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
    126			     struct kvmppc_icp *icp)
    127{
    128	int i;
    129
    130	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
    131		struct ics_irq_state *state = &ics->irq_state[i];
    132		if (state->resend) {
    133			XICS_DBG("resend %#x prio %#x\n", state->number,
    134				      state->priority);
    135			icp_deliver_irq(xics, icp, state->number, true);
    136		}
    137	}
    138}
    139
    140static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
    141		       struct ics_irq_state *state,
    142		       u32 server, u32 priority, u32 saved_priority)
    143{
    144	bool deliver;
    145	unsigned long flags;
    146
    147	local_irq_save(flags);
    148	arch_spin_lock(&ics->lock);
    149
    150	state->server = server;
    151	state->priority = priority;
    152	state->saved_priority = saved_priority;
    153	deliver = false;
    154	if ((state->masked_pending || state->resend) && priority != MASKED) {
    155		state->masked_pending = 0;
    156		state->resend = 0;
    157		deliver = true;
    158	}
    159
    160	arch_spin_unlock(&ics->lock);
    161	local_irq_restore(flags);
    162
    163	return deliver;
    164}
    165
    166int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
    167{
    168	struct kvmppc_xics *xics = kvm->arch.xics;
    169	struct kvmppc_icp *icp;
    170	struct kvmppc_ics *ics;
    171	struct ics_irq_state *state;
    172	u16 src;
    173
    174	if (!xics)
    175		return -ENODEV;
    176
    177	ics = kvmppc_xics_find_ics(xics, irq, &src);
    178	if (!ics)
    179		return -EINVAL;
    180	state = &ics->irq_state[src];
    181
    182	icp = kvmppc_xics_find_server(kvm, server);
    183	if (!icp)
    184		return -EINVAL;
    185
    186	XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
    187		 irq, server, priority,
    188		 state->masked_pending, state->resend);
    189
    190	if (write_xive(xics, ics, state, server, priority, priority))
    191		icp_deliver_irq(xics, icp, irq, false);
    192
    193	return 0;
    194}
    195
    196int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
    197{
    198	struct kvmppc_xics *xics = kvm->arch.xics;
    199	struct kvmppc_ics *ics;
    200	struct ics_irq_state *state;
    201	u16 src;
    202	unsigned long flags;
    203
    204	if (!xics)
    205		return -ENODEV;
    206
    207	ics = kvmppc_xics_find_ics(xics, irq, &src);
    208	if (!ics)
    209		return -EINVAL;
    210	state = &ics->irq_state[src];
    211
    212	local_irq_save(flags);
    213	arch_spin_lock(&ics->lock);
    214	*server = state->server;
    215	*priority = state->priority;
    216	arch_spin_unlock(&ics->lock);
    217	local_irq_restore(flags);
    218
    219	return 0;
    220}
    221
    222int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
    223{
    224	struct kvmppc_xics *xics = kvm->arch.xics;
    225	struct kvmppc_icp *icp;
    226	struct kvmppc_ics *ics;
    227	struct ics_irq_state *state;
    228	u16 src;
    229
    230	if (!xics)
    231		return -ENODEV;
    232
    233	ics = kvmppc_xics_find_ics(xics, irq, &src);
    234	if (!ics)
    235		return -EINVAL;
    236	state = &ics->irq_state[src];
    237
    238	icp = kvmppc_xics_find_server(kvm, state->server);
    239	if (!icp)
    240		return -EINVAL;
    241
    242	if (write_xive(xics, ics, state, state->server, state->saved_priority,
    243		       state->saved_priority))
    244		icp_deliver_irq(xics, icp, irq, false);
    245
    246	return 0;
    247}
    248
    249int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
    250{
    251	struct kvmppc_xics *xics = kvm->arch.xics;
    252	struct kvmppc_ics *ics;
    253	struct ics_irq_state *state;
    254	u16 src;
    255
    256	if (!xics)
    257		return -ENODEV;
    258
    259	ics = kvmppc_xics_find_ics(xics, irq, &src);
    260	if (!ics)
    261		return -EINVAL;
    262	state = &ics->irq_state[src];
    263
    264	write_xive(xics, ics, state, state->server, MASKED, state->priority);
    265
    266	return 0;
    267}
    268
    269/* -- ICP routines, including hcalls -- */
    270
    271static inline bool icp_try_update(struct kvmppc_icp *icp,
    272				  union kvmppc_icp_state old,
    273				  union kvmppc_icp_state new,
    274				  bool change_self)
    275{
    276	bool success;
    277
    278	/* Calculate new output value */
    279	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
    280
    281	/* Attempt atomic update */
    282	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
    283	if (!success)
    284		goto bail;
    285
    286	XICS_DBG("UPD [%04lx] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
    287		 icp->server_num,
    288		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
    289		 old.need_resend, old.out_ee);
    290	XICS_DBG("UPD        - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
    291		 new.cppr, new.mfrr, new.pending_pri, new.xisr,
    292		 new.need_resend, new.out_ee);
    293	/*
    294	 * Check for output state update
    295	 *
    296	 * Note that this is racy since another processor could be updating
    297	 * the state already. This is why we never clear the interrupt output
    298	 * here, we only ever set it. The clear only happens prior to doing
    299	 * an update and only by the processor itself. Currently we do it
    300	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
    301	 *
    302	 * We also do not try to figure out whether the EE state has changed,
    303	 * we unconditionally set it if the new state calls for it. The reason
    304	 * for that is that we opportunistically remove the pending interrupt
    305	 * flag when raising CPPR, so we need to set it back here if an
    306	 * interrupt is still pending.
    307	 */
    308	if (new.out_ee) {
    309		kvmppc_book3s_queue_irqprio(icp->vcpu,
    310					    BOOK3S_INTERRUPT_EXTERNAL);
    311		if (!change_self)
    312			kvmppc_fast_vcpu_kick(icp->vcpu);
    313	}
    314 bail:
    315	return success;
    316}
    317
    318static void icp_check_resend(struct kvmppc_xics *xics,
    319			     struct kvmppc_icp *icp)
    320{
    321	u32 icsid;
    322
    323	/* Order this load with the test for need_resend in the caller */
    324	smp_rmb();
    325	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
    326		struct kvmppc_ics *ics = xics->ics[icsid];
    327
    328		if (!test_and_clear_bit(icsid, icp->resend_map))
    329			continue;
    330		if (!ics)
    331			continue;
    332		ics_check_resend(xics, ics, icp);
    333	}
    334}
    335
    336static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
    337			       u32 *reject)
    338{
    339	union kvmppc_icp_state old_state, new_state;
    340	bool success;
    341
    342	XICS_DBG("try deliver %#x(P:%#x) to server %#lx\n", irq, priority,
    343		 icp->server_num);
    344
    345	do {
    346		old_state = new_state = READ_ONCE(icp->state);
    347
    348		*reject = 0;
    349
    350		/* See if we can deliver */
    351		success = new_state.cppr > priority &&
    352			new_state.mfrr > priority &&
    353			new_state.pending_pri > priority;
    354
    355		/*
    356		 * If we can, check for a rejection and perform the
    357		 * delivery
    358		 */
    359		if (success) {
    360			*reject = new_state.xisr;
    361			new_state.xisr = irq;
    362			new_state.pending_pri = priority;
    363		} else {
    364			/*
    365			 * If we failed to deliver we set need_resend
    366			 * so a subsequent CPPR state change causes us
    367			 * to try a new delivery.
    368			 */
    369			new_state.need_resend = true;
    370		}
    371
    372	} while (!icp_try_update(icp, old_state, new_state, false));
    373
    374	return success;
    375}
    376
    377static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
    378			    u32 new_irq, bool check_resend)
    379{
    380	struct ics_irq_state *state;
    381	struct kvmppc_ics *ics;
    382	u32 reject;
    383	u16 src;
    384	unsigned long flags;
    385
    386	/*
    387	 * This is used both for initial delivery of an interrupt and
    388	 * for subsequent rejection.
    389	 *
    390	 * Rejection can be racy vs. resends. We have evaluated the
    391	 * rejection in an atomic ICP transaction which is now complete,
    392	 * so potentially the ICP can already accept the interrupt again.
    393	 *
    394	 * So we need to retry the delivery. Essentially the reject path
    395	 * boils down to a failed delivery. Always.
    396	 *
    397	 * Now the interrupt could also have moved to a different target,
    398	 * thus we may need to re-do the ICP lookup as well
    399	 */
    400
    401 again:
    402	/* Get the ICS state and lock it */
    403	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
    404	if (!ics) {
    405		XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
    406		return;
    407	}
    408	state = &ics->irq_state[src];
    409
    410	/* Get a lock on the ICS */
    411	local_irq_save(flags);
    412	arch_spin_lock(&ics->lock);
    413
    414	/* Get our server */
    415	if (!icp || state->server != icp->server_num) {
    416		icp = kvmppc_xics_find_server(xics->kvm, state->server);
    417		if (!icp) {
    418			pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
    419				new_irq, state->server);
    420			goto out;
    421		}
    422	}
    423
    424	if (check_resend)
    425		if (!state->resend)
    426			goto out;
    427
    428	/* Clear the resend bit of that interrupt */
    429	state->resend = 0;
    430
    431	/*
    432	 * If masked, bail out
    433	 *
    434	 * Note: PAPR doesn't mention anything about masked pending
    435	 * when doing a resend, only when doing a delivery.
    436	 *
    437	 * However that would have the effect of losing a masked
    438	 * interrupt that was rejected and isn't consistent with
    439	 * the whole masked_pending business which is about not
    440	 * losing interrupts that occur while masked.
    441	 *
    442	 * I don't differentiate normal deliveries and resends, this
    443	 * implementation will differ from PAPR and not lose such
    444	 * interrupts.
    445	 */
    446	if (state->priority == MASKED) {
    447		XICS_DBG("irq %#x masked pending\n", new_irq);
    448		state->masked_pending = 1;
    449		goto out;
    450	}
    451
    452	/*
    453	 * Try the delivery, this will set the need_resend flag
    454	 * in the ICP as part of the atomic transaction if the
    455	 * delivery is not possible.
    456	 *
    457	 * Note that if successful, the new delivery might have itself
    458	 * rejected an interrupt that was "delivered" before we took the
    459	 * ics spin lock.
    460	 *
    461	 * In this case we do the whole sequence all over again for the
    462	 * new guy. We cannot assume that the rejected interrupt is less
    463	 * favored than the new one, and thus doesn't need to be delivered,
    464	 * because by the time we exit icp_try_to_deliver() the target
    465	 * processor may well have already consumed & completed it, and thus
    466	 * the rejected interrupt might actually be already acceptable.
    467	 */
    468	if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
    469		/*
    470		 * Delivery was successful, did we reject somebody else ?
    471		 */
    472		if (reject && reject != XICS_IPI) {
    473			arch_spin_unlock(&ics->lock);
    474			local_irq_restore(flags);
    475			new_irq = reject;
    476			check_resend = false;
    477			goto again;
    478		}
    479	} else {
    480		/*
    481		 * We failed to deliver the interrupt we need to set the
    482		 * resend map bit and mark the ICS state as needing a resend
    483		 */
    484		state->resend = 1;
    485
    486		/*
    487		 * Make sure when checking resend, we don't miss the resend
    488		 * if resend_map bit is seen and cleared.
    489		 */
    490		smp_wmb();
    491		set_bit(ics->icsid, icp->resend_map);
    492
    493		/*
    494		 * If the need_resend flag got cleared in the ICP some time
    495		 * between icp_try_to_deliver() atomic update and now, then
    496		 * we know it might have missed the resend_map bit. So we
    497		 * retry
    498		 */
    499		smp_mb();
    500		if (!icp->state.need_resend) {
    501			state->resend = 0;
    502			arch_spin_unlock(&ics->lock);
    503			local_irq_restore(flags);
    504			check_resend = false;
    505			goto again;
    506		}
    507	}
    508 out:
    509	arch_spin_unlock(&ics->lock);
    510	local_irq_restore(flags);
    511}
    512
    513static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
    514			  u8 new_cppr)
    515{
    516	union kvmppc_icp_state old_state, new_state;
    517	bool resend;
    518
    519	/*
    520	 * This handles several related states in one operation:
    521	 *
    522	 * ICP State: Down_CPPR
    523	 *
    524	 * Load CPPR with new value and if the XISR is 0
    525	 * then check for resends:
    526	 *
    527	 * ICP State: Resend
    528	 *
    529	 * If MFRR is more favored than CPPR, check for IPIs
    530	 * and notify ICS of a potential resend. This is done
    531	 * asynchronously (when used in real mode, we will have
    532	 * to exit here).
    533	 *
    534	 * We do not handle the complete Check_IPI as documented
    535	 * here. In the PAPR, this state will be used for both
    536	 * Set_MFRR and Down_CPPR. However, we know that we aren't
    537	 * changing the MFRR state here so we don't need to handle
    538	 * the case of an MFRR causing a reject of a pending irq,
    539	 * this will have been handled when the MFRR was set in the
    540	 * first place.
    541	 *
    542	 * Thus we don't have to handle rejects, only resends.
    543	 *
    544	 * When implementing real mode for HV KVM, resend will lead to
    545	 * a H_TOO_HARD return and the whole transaction will be handled
    546	 * in virtual mode.
    547	 */
    548	do {
    549		old_state = new_state = READ_ONCE(icp->state);
    550
    551		/* Down_CPPR */
    552		new_state.cppr = new_cppr;
    553
    554		/*
    555		 * Cut down Resend / Check_IPI / IPI
    556		 *
    557		 * The logic is that we cannot have a pending interrupt
    558		 * trumped by an IPI at this point (see above), so we
    559		 * know that either the pending interrupt is already an
    560		 * IPI (in which case we don't care to override it) or
    561		 * it's either more favored than us or non existent
    562		 */
    563		if (new_state.mfrr < new_cppr &&
    564		    new_state.mfrr <= new_state.pending_pri) {
    565			WARN_ON(new_state.xisr != XICS_IPI &&
    566				new_state.xisr != 0);
    567			new_state.pending_pri = new_state.mfrr;
    568			new_state.xisr = XICS_IPI;
    569		}
    570
    571		/* Latch/clear resend bit */
    572		resend = new_state.need_resend;
    573		new_state.need_resend = 0;
    574
    575	} while (!icp_try_update(icp, old_state, new_state, true));
    576
    577	/*
    578	 * Now handle resend checks. Those are asynchronous to the ICP
    579	 * state update in HW (ie bus transactions) so we can handle them
    580	 * separately here too
    581	 */
    582	if (resend)
    583		icp_check_resend(xics, icp);
    584}
    585
    586static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
    587{
    588	union kvmppc_icp_state old_state, new_state;
    589	struct kvmppc_icp *icp = vcpu->arch.icp;
    590	u32 xirr;
    591
    592	/* First, remove EE from the processor */
    593	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
    594
    595	/*
    596	 * ICP State: Accept_Interrupt
    597	 *
    598	 * Return the pending interrupt (if any) along with the
    599	 * current CPPR, then clear the XISR & set CPPR to the
    600	 * pending priority
    601	 */
    602	do {
    603		old_state = new_state = READ_ONCE(icp->state);
    604
    605		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
    606		if (!old_state.xisr)
    607			break;
    608		new_state.cppr = new_state.pending_pri;
    609		new_state.pending_pri = 0xff;
    610		new_state.xisr = 0;
    611
    612	} while (!icp_try_update(icp, old_state, new_state, true));
    613
    614	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
    615
    616	return xirr;
    617}
    618
    619static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
    620				 unsigned long mfrr)
    621{
    622	union kvmppc_icp_state old_state, new_state;
    623	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
    624	struct kvmppc_icp *icp;
    625	u32 reject;
    626	bool resend;
    627	bool local;
    628
    629	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
    630		 vcpu->vcpu_id, server, mfrr);
    631
    632	icp = vcpu->arch.icp;
    633	local = icp->server_num == server;
    634	if (!local) {
    635		icp = kvmppc_xics_find_server(vcpu->kvm, server);
    636		if (!icp)
    637			return H_PARAMETER;
    638	}
    639
    640	/*
    641	 * ICP state: Set_MFRR
    642	 *
    643	 * If the CPPR is more favored than the new MFRR, then
    644	 * nothing needs to be rejected as there can be no XISR to
    645	 * reject.  If the MFRR is being made less favored then
    646	 * there might be a previously-rejected interrupt needing
    647	 * to be resent.
    648	 *
    649	 * ICP state: Check_IPI
    650	 *
    651	 * If the CPPR is less favored, then we might be replacing
    652	 * an interrupt, and thus need to possibly reject it.
    653	 *
    654	 * ICP State: IPI
    655	 *
    656	 * Besides rejecting any pending interrupts, we also
    657	 * update XISR and pending_pri to mark IPI as pending.
    658	 *
    659	 * PAPR does not describe this state, but if the MFRR is being
    660	 * made less favored than its earlier value, there might be
    661	 * a previously-rejected interrupt needing to be resent.
    662	 * Ideally, we would want to resend only if
    663	 *	prio(pending_interrupt) < mfrr &&
    664	 *	prio(pending_interrupt) < cppr
    665	 * where pending interrupt is the one that was rejected. But
    666	 * we don't have that state, so we simply trigger a resend
    667	 * whenever the MFRR is made less favored.
    668	 */
    669	do {
    670		old_state = new_state = READ_ONCE(icp->state);
    671
    672		/* Set_MFRR */
    673		new_state.mfrr = mfrr;
    674
    675		/* Check_IPI */
    676		reject = 0;
    677		resend = false;
    678		if (mfrr < new_state.cppr) {
    679			/* Reject a pending interrupt if not an IPI */
    680			if (mfrr <= new_state.pending_pri) {
    681				reject = new_state.xisr;
    682				new_state.pending_pri = mfrr;
    683				new_state.xisr = XICS_IPI;
    684			}
    685		}
    686
    687		if (mfrr > old_state.mfrr) {
    688			resend = new_state.need_resend;
    689			new_state.need_resend = 0;
    690		}
    691	} while (!icp_try_update(icp, old_state, new_state, local));
    692
    693	/* Handle reject */
    694	if (reject && reject != XICS_IPI)
    695		icp_deliver_irq(xics, icp, reject, false);
    696
    697	/* Handle resend */
    698	if (resend)
    699		icp_check_resend(xics, icp);
    700
    701	return H_SUCCESS;
    702}
    703
    704static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
    705{
    706	union kvmppc_icp_state state;
    707	struct kvmppc_icp *icp;
    708
    709	icp = vcpu->arch.icp;
    710	if (icp->server_num != server) {
    711		icp = kvmppc_xics_find_server(vcpu->kvm, server);
    712		if (!icp)
    713			return H_PARAMETER;
    714	}
    715	state = READ_ONCE(icp->state);
    716	kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr);
    717	kvmppc_set_gpr(vcpu, 5, state.mfrr);
    718	return H_SUCCESS;
    719}
    720
    721static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
    722{
    723	union kvmppc_icp_state old_state, new_state;
    724	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
    725	struct kvmppc_icp *icp = vcpu->arch.icp;
    726	u32 reject;
    727
    728	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
    729
    730	/*
    731	 * ICP State: Set_CPPR
    732	 *
    733	 * We can safely compare the new value with the current
    734	 * value outside of the transaction as the CPPR is only
    735	 * ever changed by the processor on itself
    736	 */
    737	if (cppr > icp->state.cppr)
    738		icp_down_cppr(xics, icp, cppr);
    739	else if (cppr == icp->state.cppr)
    740		return;
    741
    742	/*
    743	 * ICP State: Up_CPPR
    744	 *
    745	 * The processor is raising its priority, this can result
    746	 * in a rejection of a pending interrupt:
    747	 *
    748	 * ICP State: Reject_Current
    749	 *
    750	 * We can remove EE from the current processor, the update
    751	 * transaction will set it again if needed
    752	 */
    753	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
    754
    755	do {
    756		old_state = new_state = READ_ONCE(icp->state);
    757
    758		reject = 0;
    759		new_state.cppr = cppr;
    760
    761		if (cppr <= new_state.pending_pri) {
    762			reject = new_state.xisr;
    763			new_state.xisr = 0;
    764			new_state.pending_pri = 0xff;
    765		}
    766
    767	} while (!icp_try_update(icp, old_state, new_state, true));
    768
    769	/*
    770	 * Check for rejects. They are handled by doing a new delivery
    771	 * attempt (see comments in icp_deliver_irq).
    772	 */
    773	if (reject && reject != XICS_IPI)
    774		icp_deliver_irq(xics, icp, reject, false);
    775}
    776
    777static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq)
    778{
    779	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
    780	struct kvmppc_icp *icp = vcpu->arch.icp;
    781	struct kvmppc_ics *ics;
    782	struct ics_irq_state *state;
    783	u16 src;
    784	u32 pq_old, pq_new;
    785
    786	/*
    787	 * ICS EOI handling: For LSI, if P bit is still set, we need to
    788	 * resend it.
    789	 *
    790	 * For MSI, we move Q bit into P (and clear Q). If it is set,
    791	 * resend it.
    792	 */
    793
    794	ics = kvmppc_xics_find_ics(xics, irq, &src);
    795	if (!ics) {
    796		XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq);
    797		return H_PARAMETER;
    798	}
    799	state = &ics->irq_state[src];
    800
    801	if (state->lsi)
    802		pq_new = state->pq_state;
    803	else
    804		do {
    805			pq_old = state->pq_state;
    806			pq_new = pq_old >> 1;
    807		} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
    808
    809	if (pq_new & PQ_PRESENTED)
    810		icp_deliver_irq(xics, icp, irq, false);
    811
    812	kvm_notify_acked_irq(vcpu->kvm, 0, irq);
    813
    814	return H_SUCCESS;
    815}
    816
    817static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
    818{
    819	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
    820	struct kvmppc_icp *icp = vcpu->arch.icp;
    821	u32 irq = xirr & 0x00ffffff;
    822
    823	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
    824
    825	/*
    826	 * ICP State: EOI
    827	 *
    828	 * Note: If EOI is incorrectly used by SW to lower the CPPR
    829	 * value (ie more favored), we do not check for rejection of
    830	 * a pending interrupt, this is a SW error and PAPR specifies
    831	 * that we don't have to deal with it.
    832	 *
    833	 * The sending of an EOI to the ICS is handled after the
    834	 * CPPR update
    835	 *
    836	 * ICP State: Down_CPPR which we handle
    837	 * in a separate function as it's shared with H_CPPR.
    838	 */
    839	icp_down_cppr(xics, icp, xirr >> 24);
    840
    841	/* IPIs have no EOI */
    842	if (irq == XICS_IPI)
    843		return H_SUCCESS;
    844
    845	return ics_eoi(vcpu, irq);
    846}
    847
    848int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
    849{
    850	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
    851	struct kvmppc_icp *icp = vcpu->arch.icp;
    852
    853	XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
    854		 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
    855
    856	if (icp->rm_action & XICS_RM_KICK_VCPU) {
    857		icp->n_rm_kick_vcpu++;
    858		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
    859	}
    860	if (icp->rm_action & XICS_RM_CHECK_RESEND) {
    861		icp->n_rm_check_resend++;
    862		icp_check_resend(xics, icp->rm_resend_icp);
    863	}
    864	if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
    865		icp->n_rm_notify_eoi++;
    866		kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
    867	}
    868
    869	icp->rm_action = 0;
    870
    871	return H_SUCCESS;
    872}
    873EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
    874
    875int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
    876{
    877	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
    878	unsigned long res;
    879	int rc = H_SUCCESS;
    880
    881	/* Check if we have an ICP */
    882	if (!xics || !vcpu->arch.icp)
    883		return H_HARDWARE;
    884
    885	/* These requests don't have real-mode implementations at present */
    886	switch (req) {
    887	case H_XIRR_X:
    888		res = kvmppc_h_xirr(vcpu);
    889		kvmppc_set_gpr(vcpu, 4, res);
    890		kvmppc_set_gpr(vcpu, 5, get_tb());
    891		return rc;
    892	case H_IPOLL:
    893		rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
    894		return rc;
    895	}
    896
    897	/* Check for real mode returning too hard */
    898	if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
    899		return kvmppc_xics_rm_complete(vcpu, req);
    900
    901	switch (req) {
    902	case H_XIRR:
    903		res = kvmppc_h_xirr(vcpu);
    904		kvmppc_set_gpr(vcpu, 4, res);
    905		break;
    906	case H_CPPR:
    907		kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
    908		break;
    909	case H_EOI:
    910		rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
    911		break;
    912	case H_IPI:
    913		rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
    914				  kvmppc_get_gpr(vcpu, 5));
    915		break;
    916	}
    917
    918	return rc;
    919}
    920EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
    921
    922
    923/* -- Initialisation code etc. -- */
    924
    925static void xics_debugfs_irqmap(struct seq_file *m,
    926				struct kvmppc_passthru_irqmap *pimap)
    927{
    928	int i;
    929
    930	if (!pimap)
    931		return;
    932	seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
    933				pimap->n_mapped);
    934	for (i = 0; i < pimap->n_mapped; i++)  {
    935		seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
    936			pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
    937	}
    938}
    939
    940static int xics_debug_show(struct seq_file *m, void *private)
    941{
    942	struct kvmppc_xics *xics = m->private;
    943	struct kvm *kvm = xics->kvm;
    944	struct kvm_vcpu *vcpu;
    945	int icsid;
    946	unsigned long flags, i;
    947	unsigned long t_rm_kick_vcpu, t_rm_check_resend;
    948	unsigned long t_rm_notify_eoi;
    949	unsigned long t_reject, t_check_resend;
    950
    951	if (!kvm)
    952		return 0;
    953
    954	t_rm_kick_vcpu = 0;
    955	t_rm_notify_eoi = 0;
    956	t_rm_check_resend = 0;
    957	t_check_resend = 0;
    958	t_reject = 0;
    959
    960	xics_debugfs_irqmap(m, kvm->arch.pimap);
    961
    962	seq_printf(m, "=========\nICP state\n=========\n");
    963
    964	kvm_for_each_vcpu(i, vcpu, kvm) {
    965		struct kvmppc_icp *icp = vcpu->arch.icp;
    966		union kvmppc_icp_state state;
    967
    968		if (!icp)
    969			continue;
    970
    971		state.raw = READ_ONCE(icp->state.raw);
    972		seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
    973			   icp->server_num, state.xisr,
    974			   state.pending_pri, state.cppr, state.mfrr,
    975			   state.out_ee, state.need_resend);
    976		t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
    977		t_rm_notify_eoi += icp->n_rm_notify_eoi;
    978		t_rm_check_resend += icp->n_rm_check_resend;
    979		t_check_resend += icp->n_check_resend;
    980		t_reject += icp->n_reject;
    981	}
    982
    983	seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n",
    984			t_rm_kick_vcpu, t_rm_check_resend,
    985			t_rm_notify_eoi);
    986	seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
    987			t_check_resend, t_reject);
    988	for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
    989		struct kvmppc_ics *ics = xics->ics[icsid];
    990
    991		if (!ics)
    992			continue;
    993
    994		seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
    995			   icsid);
    996
    997		local_irq_save(flags);
    998		arch_spin_lock(&ics->lock);
    999
   1000		for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
   1001			struct ics_irq_state *irq = &ics->irq_state[i];
   1002
   1003			seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n",
   1004				   irq->number, irq->server, irq->priority,
   1005				   irq->saved_priority, irq->pq_state,
   1006				   irq->resend, irq->masked_pending);
   1007
   1008		}
   1009		arch_spin_unlock(&ics->lock);
   1010		local_irq_restore(flags);
   1011	}
   1012	return 0;
   1013}
   1014
   1015DEFINE_SHOW_ATTRIBUTE(xics_debug);
   1016
   1017static void xics_debugfs_init(struct kvmppc_xics *xics)
   1018{
   1019	xics->dentry = debugfs_create_file("xics", 0444, xics->kvm->debugfs_dentry,
   1020					   xics, &xics_debug_fops);
   1021
   1022	pr_debug("%s: created\n", __func__);
   1023}
   1024
   1025static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
   1026					struct kvmppc_xics *xics, int irq)
   1027{
   1028	struct kvmppc_ics *ics;
   1029	int i, icsid;
   1030
   1031	icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
   1032
   1033	mutex_lock(&kvm->lock);
   1034
   1035	/* ICS already exists - somebody else got here first */
   1036	if (xics->ics[icsid])
   1037		goto out;
   1038
   1039	/* Create the ICS */
   1040	ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
   1041	if (!ics)
   1042		goto out;
   1043
   1044	ics->icsid = icsid;
   1045
   1046	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
   1047		ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
   1048		ics->irq_state[i].priority = MASKED;
   1049		ics->irq_state[i].saved_priority = MASKED;
   1050	}
   1051	smp_wmb();
   1052	xics->ics[icsid] = ics;
   1053
   1054	if (icsid > xics->max_icsid)
   1055		xics->max_icsid = icsid;
   1056
   1057 out:
   1058	mutex_unlock(&kvm->lock);
   1059	return xics->ics[icsid];
   1060}
   1061
   1062static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
   1063{
   1064	struct kvmppc_icp *icp;
   1065
   1066	if (!vcpu->kvm->arch.xics)
   1067		return -ENODEV;
   1068
   1069	if (kvmppc_xics_find_server(vcpu->kvm, server_num))
   1070		return -EEXIST;
   1071
   1072	icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
   1073	if (!icp)
   1074		return -ENOMEM;
   1075
   1076	icp->vcpu = vcpu;
   1077	icp->server_num = server_num;
   1078	icp->state.mfrr = MASKED;
   1079	icp->state.pending_pri = MASKED;
   1080	vcpu->arch.icp = icp;
   1081
   1082	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
   1083
   1084	return 0;
   1085}
   1086
   1087u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
   1088{
   1089	struct kvmppc_icp *icp = vcpu->arch.icp;
   1090	union kvmppc_icp_state state;
   1091
   1092	if (!icp)
   1093		return 0;
   1094	state = icp->state;
   1095	return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
   1096		((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
   1097		((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
   1098		((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
   1099}
   1100
   1101int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
   1102{
   1103	struct kvmppc_icp *icp = vcpu->arch.icp;
   1104	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
   1105	union kvmppc_icp_state old_state, new_state;
   1106	struct kvmppc_ics *ics;
   1107	u8 cppr, mfrr, pending_pri;
   1108	u32 xisr;
   1109	u16 src;
   1110	bool resend;
   1111
   1112	if (!icp || !xics)
   1113		return -ENOENT;
   1114
   1115	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
   1116	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
   1117		KVM_REG_PPC_ICP_XISR_MASK;
   1118	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
   1119	pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
   1120
   1121	/* Require the new state to be internally consistent */
   1122	if (xisr == 0) {
   1123		if (pending_pri != 0xff)
   1124			return -EINVAL;
   1125	} else if (xisr == XICS_IPI) {
   1126		if (pending_pri != mfrr || pending_pri >= cppr)
   1127			return -EINVAL;
   1128	} else {
   1129		if (pending_pri >= mfrr || pending_pri >= cppr)
   1130			return -EINVAL;
   1131		ics = kvmppc_xics_find_ics(xics, xisr, &src);
   1132		if (!ics)
   1133			return -EINVAL;
   1134	}
   1135
   1136	new_state.raw = 0;
   1137	new_state.cppr = cppr;
   1138	new_state.xisr = xisr;
   1139	new_state.mfrr = mfrr;
   1140	new_state.pending_pri = pending_pri;
   1141
   1142	/*
   1143	 * Deassert the CPU interrupt request.
   1144	 * icp_try_update will reassert it if necessary.
   1145	 */
   1146	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
   1147
   1148	/*
   1149	 * Note that if we displace an interrupt from old_state.xisr,
   1150	 * we don't mark it as rejected.  We expect userspace to set
   1151	 * the state of the interrupt sources to be consistent with
   1152	 * the ICP states (either before or afterwards, which doesn't
   1153	 * matter).  We do handle resends due to CPPR becoming less
   1154	 * favoured because that is necessary to end up with a
   1155	 * consistent state in the situation where userspace restores
   1156	 * the ICS states before the ICP states.
   1157	 */
   1158	do {
   1159		old_state = READ_ONCE(icp->state);
   1160
   1161		if (new_state.mfrr <= old_state.mfrr) {
   1162			resend = false;
   1163			new_state.need_resend = old_state.need_resend;
   1164		} else {
   1165			resend = old_state.need_resend;
   1166			new_state.need_resend = 0;
   1167		}
   1168	} while (!icp_try_update(icp, old_state, new_state, false));
   1169
   1170	if (resend)
   1171		icp_check_resend(xics, icp);
   1172
   1173	return 0;
   1174}
   1175
   1176static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
   1177{
   1178	int ret;
   1179	struct kvmppc_ics *ics;
   1180	struct ics_irq_state *irqp;
   1181	u64 __user *ubufp = (u64 __user *) addr;
   1182	u16 idx;
   1183	u64 val, prio;
   1184	unsigned long flags;
   1185
   1186	ics = kvmppc_xics_find_ics(xics, irq, &idx);
   1187	if (!ics)
   1188		return -ENOENT;
   1189
   1190	irqp = &ics->irq_state[idx];
   1191	local_irq_save(flags);
   1192	arch_spin_lock(&ics->lock);
   1193	ret = -ENOENT;
   1194	if (irqp->exists) {
   1195		val = irqp->server;
   1196		prio = irqp->priority;
   1197		if (prio == MASKED) {
   1198			val |= KVM_XICS_MASKED;
   1199			prio = irqp->saved_priority;
   1200		}
   1201		val |= prio << KVM_XICS_PRIORITY_SHIFT;
   1202		if (irqp->lsi) {
   1203			val |= KVM_XICS_LEVEL_SENSITIVE;
   1204			if (irqp->pq_state & PQ_PRESENTED)
   1205				val |= KVM_XICS_PENDING;
   1206		} else if (irqp->masked_pending || irqp->resend)
   1207			val |= KVM_XICS_PENDING;
   1208
   1209		if (irqp->pq_state & PQ_PRESENTED)
   1210			val |= KVM_XICS_PRESENTED;
   1211
   1212		if (irqp->pq_state & PQ_QUEUED)
   1213			val |= KVM_XICS_QUEUED;
   1214
   1215		ret = 0;
   1216	}
   1217	arch_spin_unlock(&ics->lock);
   1218	local_irq_restore(flags);
   1219
   1220	if (!ret && put_user(val, ubufp))
   1221		ret = -EFAULT;
   1222
   1223	return ret;
   1224}
   1225
   1226static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
   1227{
   1228	struct kvmppc_ics *ics;
   1229	struct ics_irq_state *irqp;
   1230	u64 __user *ubufp = (u64 __user *) addr;
   1231	u16 idx;
   1232	u64 val;
   1233	u8 prio;
   1234	u32 server;
   1235	unsigned long flags;
   1236
   1237	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
   1238		return -ENOENT;
   1239
   1240	ics = kvmppc_xics_find_ics(xics, irq, &idx);
   1241	if (!ics) {
   1242		ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
   1243		if (!ics)
   1244			return -ENOMEM;
   1245	}
   1246	irqp = &ics->irq_state[idx];
   1247	if (get_user(val, ubufp))
   1248		return -EFAULT;
   1249
   1250	server = val & KVM_XICS_DESTINATION_MASK;
   1251	prio = val >> KVM_XICS_PRIORITY_SHIFT;
   1252	if (prio != MASKED &&
   1253	    kvmppc_xics_find_server(xics->kvm, server) == NULL)
   1254		return -EINVAL;
   1255
   1256	local_irq_save(flags);
   1257	arch_spin_lock(&ics->lock);
   1258	irqp->server = server;
   1259	irqp->saved_priority = prio;
   1260	if (val & KVM_XICS_MASKED)
   1261		prio = MASKED;
   1262	irqp->priority = prio;
   1263	irqp->resend = 0;
   1264	irqp->masked_pending = 0;
   1265	irqp->lsi = 0;
   1266	irqp->pq_state = 0;
   1267	if (val & KVM_XICS_LEVEL_SENSITIVE)
   1268		irqp->lsi = 1;
   1269	/* If PENDING, set P in case P is not saved because of old code */
   1270	if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
   1271		irqp->pq_state |= PQ_PRESENTED;
   1272	if (val & KVM_XICS_QUEUED)
   1273		irqp->pq_state |= PQ_QUEUED;
   1274	irqp->exists = 1;
   1275	arch_spin_unlock(&ics->lock);
   1276	local_irq_restore(flags);
   1277
   1278	if (val & KVM_XICS_PENDING)
   1279		icp_deliver_irq(xics, NULL, irqp->number, false);
   1280
   1281	return 0;
   1282}
   1283
   1284int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
   1285			bool line_status)
   1286{
   1287	struct kvmppc_xics *xics = kvm->arch.xics;
   1288
   1289	if (!xics)
   1290		return -ENODEV;
   1291	return ics_deliver_irq(xics, irq, level);
   1292}
   1293
   1294static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
   1295{
   1296	struct kvmppc_xics *xics = dev->private;
   1297
   1298	switch (attr->group) {
   1299	case KVM_DEV_XICS_GRP_SOURCES:
   1300		return xics_set_source(xics, attr->attr, attr->addr);
   1301	}
   1302	return -ENXIO;
   1303}
   1304
   1305static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
   1306{
   1307	struct kvmppc_xics *xics = dev->private;
   1308
   1309	switch (attr->group) {
   1310	case KVM_DEV_XICS_GRP_SOURCES:
   1311		return xics_get_source(xics, attr->attr, attr->addr);
   1312	}
   1313	return -ENXIO;
   1314}
   1315
   1316static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
   1317{
   1318	switch (attr->group) {
   1319	case KVM_DEV_XICS_GRP_SOURCES:
   1320		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
   1321		    attr->attr < KVMPPC_XICS_NR_IRQS)
   1322			return 0;
   1323		break;
   1324	}
   1325	return -ENXIO;
   1326}
   1327
   1328/*
   1329 * Called when device fd is closed. kvm->lock is held.
   1330 */
   1331static void kvmppc_xics_release(struct kvm_device *dev)
   1332{
   1333	struct kvmppc_xics *xics = dev->private;
   1334	unsigned long i;
   1335	struct kvm *kvm = xics->kvm;
   1336	struct kvm_vcpu *vcpu;
   1337
   1338	pr_devel("Releasing xics device\n");
   1339
   1340	/*
   1341	 * Since this is the device release function, we know that
   1342	 * userspace does not have any open fd referring to the
   1343	 * device.  Therefore there can not be any of the device
   1344	 * attribute set/get functions being executed concurrently,
   1345	 * and similarly, the connect_vcpu and set/clr_mapped
   1346	 * functions also cannot be being executed.
   1347	 */
   1348
   1349	debugfs_remove(xics->dentry);
   1350
   1351	/*
   1352	 * We should clean up the vCPU interrupt presenters first.
   1353	 */
   1354	kvm_for_each_vcpu(i, vcpu, kvm) {
   1355		/*
   1356		 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
   1357		 * (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently.
   1358		 * Holding the vcpu->mutex also means that execution is
   1359		 * excluded for the vcpu until the ICP was freed. When the vcpu
   1360		 * can execute again, vcpu->arch.icp and vcpu->arch.irq_type
   1361		 * have been cleared and the vcpu will not be going into the
   1362		 * XICS code anymore.
   1363		 */
   1364		mutex_lock(&vcpu->mutex);
   1365		kvmppc_xics_free_icp(vcpu);
   1366		mutex_unlock(&vcpu->mutex);
   1367	}
   1368
   1369	if (kvm)
   1370		kvm->arch.xics = NULL;
   1371
   1372	for (i = 0; i <= xics->max_icsid; i++) {
   1373		kfree(xics->ics[i]);
   1374		xics->ics[i] = NULL;
   1375	}
   1376	/*
   1377	 * A reference of the kvmppc_xics pointer is now kept under
   1378	 * the xics_device pointer of the machine for reuse. It is
   1379	 * freed when the VM is destroyed for now until we fix all the
   1380	 * execution paths.
   1381	 */
   1382	kfree(dev);
   1383}
   1384
   1385static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm)
   1386{
   1387	struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device;
   1388	struct kvmppc_xics *xics = *kvm_xics_device;
   1389
   1390	if (!xics) {
   1391		xics = kzalloc(sizeof(*xics), GFP_KERNEL);
   1392		*kvm_xics_device = xics;
   1393	} else {
   1394		memset(xics, 0, sizeof(*xics));
   1395	}
   1396
   1397	return xics;
   1398}
   1399
   1400static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
   1401{
   1402	struct kvmppc_xics *xics;
   1403	struct kvm *kvm = dev->kvm;
   1404
   1405	pr_devel("Creating xics for partition\n");
   1406
   1407	/* Already there ? */
   1408	if (kvm->arch.xics)
   1409		return -EEXIST;
   1410
   1411	xics = kvmppc_xics_get_device(kvm);
   1412	if (!xics)
   1413		return -ENOMEM;
   1414
   1415	dev->private = xics;
   1416	xics->dev = dev;
   1417	xics->kvm = kvm;
   1418	kvm->arch.xics = xics;
   1419
   1420#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
   1421	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
   1422	    cpu_has_feature(CPU_FTR_HVMODE)) {
   1423		/* Enable real mode support */
   1424		xics->real_mode = ENABLE_REALMODE;
   1425		xics->real_mode_dbg = DEBUG_REALMODE;
   1426	}
   1427#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
   1428
   1429	return 0;
   1430}
   1431
   1432static void kvmppc_xics_init(struct kvm_device *dev)
   1433{
   1434	struct kvmppc_xics *xics = dev->private;
   1435
   1436	xics_debugfs_init(xics);
   1437}
   1438
   1439struct kvm_device_ops kvm_xics_ops = {
   1440	.name = "kvm-xics",
   1441	.create = kvmppc_xics_create,
   1442	.init = kvmppc_xics_init,
   1443	.release = kvmppc_xics_release,
   1444	.set_attr = xics_set_attr,
   1445	.get_attr = xics_get_attr,
   1446	.has_attr = xics_has_attr,
   1447};
   1448
   1449int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
   1450			     u32 xcpu)
   1451{
   1452	struct kvmppc_xics *xics = dev->private;
   1453	int r = -EBUSY;
   1454
   1455	if (dev->ops != &kvm_xics_ops)
   1456		return -EPERM;
   1457	if (xics->kvm != vcpu->kvm)
   1458		return -EPERM;
   1459	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
   1460		return -EBUSY;
   1461
   1462	r = kvmppc_xics_create_icp(vcpu, xcpu);
   1463	if (!r)
   1464		vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
   1465
   1466	return r;
   1467}
   1468
   1469void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
   1470{
   1471	if (!vcpu->arch.icp)
   1472		return;
   1473	kfree(vcpu->arch.icp);
   1474	vcpu->arch.icp = NULL;
   1475	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
   1476}
   1477
   1478void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
   1479			    unsigned long host_irq)
   1480{
   1481	struct kvmppc_xics *xics = kvm->arch.xics;
   1482	struct kvmppc_ics *ics;
   1483	u16 idx;
   1484
   1485	ics = kvmppc_xics_find_ics(xics, irq, &idx);
   1486	if (!ics)
   1487		return;
   1488
   1489	ics->irq_state[idx].host_irq = host_irq;
   1490	ics->irq_state[idx].intr_cpu = -1;
   1491}
   1492EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
   1493
   1494void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
   1495			    unsigned long host_irq)
   1496{
   1497	struct kvmppc_xics *xics = kvm->arch.xics;
   1498	struct kvmppc_ics *ics;
   1499	u16 idx;
   1500
   1501	ics = kvmppc_xics_find_ics(xics, irq, &idx);
   1502	if (!ics)
   1503		return;
   1504
   1505	ics->irq_state[idx].host_irq = 0;
   1506}
   1507EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);