cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

events_base.c (53723B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Xen event channels
      4 *
      5 * Xen models interrupts with abstract event channels.  Because each
      6 * domain gets 1024 event channels, but NR_IRQ is not that large, we
      7 * must dynamically map irqs<->event channels.  The event channels
      8 * interface with the rest of the kernel by defining a xen interrupt
      9 * chip.  When an event is received, it is mapped to an irq and sent
     10 * through the normal interrupt processing path.
     11 *
     12 * There are four kinds of events which can be mapped to an event
     13 * channel:
     14 *
     15 * 1. Inter-domain notifications.  This includes all the virtual
     16 *    device events, since they're driven by front-ends in another domain
     17 *    (typically dom0).
     18 * 2. VIRQs, typically used for timers.  These are per-cpu events.
     19 * 3. IPIs.
     20 * 4. PIRQs - Hardware interrupts.
     21 *
     22 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
     23 */
     24
     25#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
     26
     27#include <linux/linkage.h>
     28#include <linux/interrupt.h>
     29#include <linux/irq.h>
     30#include <linux/moduleparam.h>
     31#include <linux/string.h>
     32#include <linux/memblock.h>
     33#include <linux/slab.h>
     34#include <linux/irqnr.h>
     35#include <linux/pci.h>
     36#include <linux/spinlock.h>
     37#include <linux/cpuhotplug.h>
     38#include <linux/atomic.h>
     39#include <linux/ktime.h>
     40
     41#ifdef CONFIG_X86
     42#include <asm/desc.h>
     43#include <asm/ptrace.h>
     44#include <asm/idtentry.h>
     45#include <asm/irq.h>
     46#include <asm/io_apic.h>
     47#include <asm/i8259.h>
     48#include <asm/xen/pci.h>
     49#endif
     50#include <asm/sync_bitops.h>
     51#include <asm/xen/hypercall.h>
     52#include <asm/xen/hypervisor.h>
     53#include <xen/page.h>
     54
     55#include <xen/xen.h>
     56#include <xen/hvm.h>
     57#include <xen/xen-ops.h>
     58#include <xen/events.h>
     59#include <xen/interface/xen.h>
     60#include <xen/interface/event_channel.h>
     61#include <xen/interface/hvm/hvm_op.h>
     62#include <xen/interface/hvm/params.h>
     63#include <xen/interface/physdev.h>
     64#include <xen/interface/sched.h>
     65#include <xen/interface/vcpu.h>
     66#include <xen/xenbus.h>
     67#include <asm/hw_irq.h>
     68
     69#include "events_internal.h"
     70
     71#undef MODULE_PARAM_PREFIX
     72#define MODULE_PARAM_PREFIX "xen."
     73
     74/* Interrupt types. */
     75enum xen_irq_type {
     76	IRQT_UNBOUND = 0,
     77	IRQT_PIRQ,
     78	IRQT_VIRQ,
     79	IRQT_IPI,
     80	IRQT_EVTCHN
     81};
     82
     83/*
     84 * Packed IRQ information:
     85 * type - enum xen_irq_type
     86 * event channel - irq->event channel mapping
     87 * cpu - cpu this event channel is bound to
     88 * index - type-specific information:
     89 *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
     90 *           guest, or GSI (real passthrough IRQ) of the device.
     91 *    VIRQ - virq number
     92 *    IPI - IPI vector
     93 *    EVTCHN -
     94 */
     95struct irq_info {
     96	struct list_head list;
     97	struct list_head eoi_list;
     98	short refcnt;
     99	u8 spurious_cnt;
    100	u8 is_accounted;
    101	short type;		/* type: IRQT_* */
    102	u8 mask_reason;		/* Why is event channel masked */
    103#define EVT_MASK_REASON_EXPLICIT	0x01
    104#define EVT_MASK_REASON_TEMPORARY	0x02
    105#define EVT_MASK_REASON_EOI_PENDING	0x04
    106	u8 is_active;		/* Is event just being handled? */
    107	unsigned irq;
    108	evtchn_port_t evtchn;   /* event channel */
    109	unsigned short cpu;     /* cpu bound */
    110	unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
    111	unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
    112	u64 eoi_time;           /* Time in jiffies when to EOI. */
    113	raw_spinlock_t lock;
    114
    115	union {
    116		unsigned short virq;
    117		enum ipi_vector ipi;
    118		struct {
    119			unsigned short pirq;
    120			unsigned short gsi;
    121			unsigned char vector;
    122			unsigned char flags;
    123			uint16_t domid;
    124		} pirq;
    125		struct xenbus_device *interdomain;
    126	} u;
    127};
    128
    129#define PIRQ_NEEDS_EOI	(1 << 0)
    130#define PIRQ_SHAREABLE	(1 << 1)
    131#define PIRQ_MSI_GROUP	(1 << 2)
    132
    133static uint __read_mostly event_loop_timeout = 2;
    134module_param(event_loop_timeout, uint, 0644);
    135
    136static uint __read_mostly event_eoi_delay = 10;
    137module_param(event_eoi_delay, uint, 0644);
    138
    139const struct evtchn_ops *evtchn_ops;
    140
    141/*
    142 * This lock protects updates to the following mapping and reference-count
    143 * arrays. The lock does not need to be acquired to read the mapping tables.
    144 */
    145static DEFINE_MUTEX(irq_mapping_update_lock);
    146
    147/*
    148 * Lock protecting event handling loop against removing event channels.
    149 * Adding of event channels is no issue as the associated IRQ becomes active
    150 * only after everything is setup (before request_[threaded_]irq() the handler
    151 * can't be entered for an event, as the event channel will be unmasked only
    152 * then).
    153 */
    154static DEFINE_RWLOCK(evtchn_rwlock);
    155
    156/*
    157 * Lock hierarchy:
    158 *
    159 * irq_mapping_update_lock
    160 *   evtchn_rwlock
    161 *     IRQ-desc lock
    162 *       percpu eoi_list_lock
    163 *         irq_info->lock
    164 */
    165
    166static LIST_HEAD(xen_irq_list_head);
    167
    168/* IRQ <-> VIRQ mapping. */
    169static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
    170
    171/* IRQ <-> IPI mapping */
    172static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
    173
    174/* Event channel distribution data */
    175static atomic_t channels_on_cpu[NR_CPUS];
    176
    177static int **evtchn_to_irq;
    178#ifdef CONFIG_X86
    179static unsigned long *pirq_eoi_map;
    180#endif
    181static bool (*pirq_needs_eoi)(unsigned irq);
    182
    183#define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
    184#define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
    185#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
    186
    187/* Xen will never allocate port zero for any purpose. */
    188#define VALID_EVTCHN(chn)	((chn) != 0)
    189
    190static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
    191
    192static struct irq_chip xen_dynamic_chip;
    193static struct irq_chip xen_lateeoi_chip;
    194static struct irq_chip xen_percpu_chip;
    195static struct irq_chip xen_pirq_chip;
    196static void enable_dynirq(struct irq_data *data);
    197static void disable_dynirq(struct irq_data *data);
    198
    199static DEFINE_PER_CPU(unsigned int, irq_epoch);
    200
    201static void clear_evtchn_to_irq_row(int *evtchn_row)
    202{
    203	unsigned col;
    204
    205	for (col = 0; col < EVTCHN_PER_ROW; col++)
    206		WRITE_ONCE(evtchn_row[col], -1);
    207}
    208
    209static void clear_evtchn_to_irq_all(void)
    210{
    211	unsigned row;
    212
    213	for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
    214		if (evtchn_to_irq[row] == NULL)
    215			continue;
    216		clear_evtchn_to_irq_row(evtchn_to_irq[row]);
    217	}
    218}
    219
    220static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
    221{
    222	unsigned row;
    223	unsigned col;
    224	int *evtchn_row;
    225
    226	if (evtchn >= xen_evtchn_max_channels())
    227		return -EINVAL;
    228
    229	row = EVTCHN_ROW(evtchn);
    230	col = EVTCHN_COL(evtchn);
    231
    232	if (evtchn_to_irq[row] == NULL) {
    233		/* Unallocated irq entries return -1 anyway */
    234		if (irq == -1)
    235			return 0;
    236
    237		evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
    238		if (evtchn_row == NULL)
    239			return -ENOMEM;
    240
    241		clear_evtchn_to_irq_row(evtchn_row);
    242
    243		/*
    244		 * We've prepared an empty row for the mapping. If a different
    245		 * thread was faster inserting it, we can drop ours.
    246		 */
    247		if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
    248			free_page((unsigned long) evtchn_row);
    249	}
    250
    251	WRITE_ONCE(evtchn_to_irq[row][col], irq);
    252	return 0;
    253}
    254
    255int get_evtchn_to_irq(evtchn_port_t evtchn)
    256{
    257	if (evtchn >= xen_evtchn_max_channels())
    258		return -1;
    259	if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
    260		return -1;
    261	return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
    262}
    263
    264/* Get info for IRQ */
    265static struct irq_info *info_for_irq(unsigned irq)
    266{
    267	if (irq < nr_legacy_irqs())
    268		return legacy_info_ptrs[irq];
    269	else
    270		return irq_get_chip_data(irq);
    271}
    272
    273static void set_info_for_irq(unsigned int irq, struct irq_info *info)
    274{
    275	if (irq < nr_legacy_irqs())
    276		legacy_info_ptrs[irq] = info;
    277	else
    278		irq_set_chip_data(irq, info);
    279}
    280
    281/* Per CPU channel accounting */
    282static void channels_on_cpu_dec(struct irq_info *info)
    283{
    284	if (!info->is_accounted)
    285		return;
    286
    287	info->is_accounted = 0;
    288
    289	if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
    290		return;
    291
    292	WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
    293}
    294
    295static void channels_on_cpu_inc(struct irq_info *info)
    296{
    297	if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
    298		return;
    299
    300	if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
    301					    INT_MAX)))
    302		return;
    303
    304	info->is_accounted = 1;
    305}
    306
    307/* Constructors for packed IRQ information. */
    308static int xen_irq_info_common_setup(struct irq_info *info,
    309				     unsigned irq,
    310				     enum xen_irq_type type,
    311				     evtchn_port_t evtchn,
    312				     unsigned short cpu)
    313{
    314	int ret;
    315
    316	BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
    317
    318	info->type = type;
    319	info->irq = irq;
    320	info->evtchn = evtchn;
    321	info->cpu = cpu;
    322	info->mask_reason = EVT_MASK_REASON_EXPLICIT;
    323	raw_spin_lock_init(&info->lock);
    324
    325	ret = set_evtchn_to_irq(evtchn, irq);
    326	if (ret < 0)
    327		return ret;
    328
    329	irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
    330
    331	return xen_evtchn_port_setup(evtchn);
    332}
    333
    334static int xen_irq_info_evtchn_setup(unsigned irq,
    335				     evtchn_port_t evtchn,
    336				     struct xenbus_device *dev)
    337{
    338	struct irq_info *info = info_for_irq(irq);
    339	int ret;
    340
    341	ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
    342	info->u.interdomain = dev;
    343	if (dev)
    344		atomic_inc(&dev->event_channels);
    345
    346	return ret;
    347}
    348
    349static int xen_irq_info_ipi_setup(unsigned cpu,
    350				  unsigned irq,
    351				  evtchn_port_t evtchn,
    352				  enum ipi_vector ipi)
    353{
    354	struct irq_info *info = info_for_irq(irq);
    355
    356	info->u.ipi = ipi;
    357
    358	per_cpu(ipi_to_irq, cpu)[ipi] = irq;
    359
    360	return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
    361}
    362
    363static int xen_irq_info_virq_setup(unsigned cpu,
    364				   unsigned irq,
    365				   evtchn_port_t evtchn,
    366				   unsigned virq)
    367{
    368	struct irq_info *info = info_for_irq(irq);
    369
    370	info->u.virq = virq;
    371
    372	per_cpu(virq_to_irq, cpu)[virq] = irq;
    373
    374	return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
    375}
    376
    377static int xen_irq_info_pirq_setup(unsigned irq,
    378				   evtchn_port_t evtchn,
    379				   unsigned pirq,
    380				   unsigned gsi,
    381				   uint16_t domid,
    382				   unsigned char flags)
    383{
    384	struct irq_info *info = info_for_irq(irq);
    385
    386	info->u.pirq.pirq = pirq;
    387	info->u.pirq.gsi = gsi;
    388	info->u.pirq.domid = domid;
    389	info->u.pirq.flags = flags;
    390
    391	return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
    392}
    393
    394static void xen_irq_info_cleanup(struct irq_info *info)
    395{
    396	set_evtchn_to_irq(info->evtchn, -1);
    397	xen_evtchn_port_remove(info->evtchn, info->cpu);
    398	info->evtchn = 0;
    399	channels_on_cpu_dec(info);
    400}
    401
    402/*
    403 * Accessors for packed IRQ information.
    404 */
    405evtchn_port_t evtchn_from_irq(unsigned irq)
    406{
    407	const struct irq_info *info = NULL;
    408
    409	if (likely(irq < nr_irqs))
    410		info = info_for_irq(irq);
    411	if (!info)
    412		return 0;
    413
    414	return info->evtchn;
    415}
    416
    417unsigned int irq_from_evtchn(evtchn_port_t evtchn)
    418{
    419	return get_evtchn_to_irq(evtchn);
    420}
    421EXPORT_SYMBOL_GPL(irq_from_evtchn);
    422
    423int irq_from_virq(unsigned int cpu, unsigned int virq)
    424{
    425	return per_cpu(virq_to_irq, cpu)[virq];
    426}
    427
    428static enum ipi_vector ipi_from_irq(unsigned irq)
    429{
    430	struct irq_info *info = info_for_irq(irq);
    431
    432	BUG_ON(info == NULL);
    433	BUG_ON(info->type != IRQT_IPI);
    434
    435	return info->u.ipi;
    436}
    437
    438static unsigned virq_from_irq(unsigned irq)
    439{
    440	struct irq_info *info = info_for_irq(irq);
    441
    442	BUG_ON(info == NULL);
    443	BUG_ON(info->type != IRQT_VIRQ);
    444
    445	return info->u.virq;
    446}
    447
    448static unsigned pirq_from_irq(unsigned irq)
    449{
    450	struct irq_info *info = info_for_irq(irq);
    451
    452	BUG_ON(info == NULL);
    453	BUG_ON(info->type != IRQT_PIRQ);
    454
    455	return info->u.pirq.pirq;
    456}
    457
    458static enum xen_irq_type type_from_irq(unsigned irq)
    459{
    460	return info_for_irq(irq)->type;
    461}
    462
    463static unsigned cpu_from_irq(unsigned irq)
    464{
    465	return info_for_irq(irq)->cpu;
    466}
    467
    468unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
    469{
    470	int irq = get_evtchn_to_irq(evtchn);
    471	unsigned ret = 0;
    472
    473	if (irq != -1)
    474		ret = cpu_from_irq(irq);
    475
    476	return ret;
    477}
    478
    479static void do_mask(struct irq_info *info, u8 reason)
    480{
    481	unsigned long flags;
    482
    483	raw_spin_lock_irqsave(&info->lock, flags);
    484
    485	if (!info->mask_reason)
    486		mask_evtchn(info->evtchn);
    487
    488	info->mask_reason |= reason;
    489
    490	raw_spin_unlock_irqrestore(&info->lock, flags);
    491}
    492
    493static void do_unmask(struct irq_info *info, u8 reason)
    494{
    495	unsigned long flags;
    496
    497	raw_spin_lock_irqsave(&info->lock, flags);
    498
    499	info->mask_reason &= ~reason;
    500
    501	if (!info->mask_reason)
    502		unmask_evtchn(info->evtchn);
    503
    504	raw_spin_unlock_irqrestore(&info->lock, flags);
    505}
    506
    507#ifdef CONFIG_X86
    508static bool pirq_check_eoi_map(unsigned irq)
    509{
    510	return test_bit(pirq_from_irq(irq), pirq_eoi_map);
    511}
    512#endif
    513
    514static bool pirq_needs_eoi_flag(unsigned irq)
    515{
    516	struct irq_info *info = info_for_irq(irq);
    517	BUG_ON(info->type != IRQT_PIRQ);
    518
    519	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
    520}
    521
    522static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
    523			       bool force_affinity)
    524{
    525	int irq = get_evtchn_to_irq(evtchn);
    526	struct irq_info *info = info_for_irq(irq);
    527
    528	BUG_ON(irq == -1);
    529
    530	if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
    531		cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
    532		cpumask_copy(irq_get_effective_affinity_mask(irq),
    533			     cpumask_of(cpu));
    534	}
    535
    536	xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
    537
    538	channels_on_cpu_dec(info);
    539	info->cpu = cpu;
    540	channels_on_cpu_inc(info);
    541}
    542
    543/**
    544 * notify_remote_via_irq - send event to remote end of event channel via irq
    545 * @irq: irq of event channel to send event to
    546 *
    547 * Unlike notify_remote_via_evtchn(), this is safe to use across
    548 * save/restore. Notifications on a broken connection are silently
    549 * dropped.
    550 */
    551void notify_remote_via_irq(int irq)
    552{
    553	evtchn_port_t evtchn = evtchn_from_irq(irq);
    554
    555	if (VALID_EVTCHN(evtchn))
    556		notify_remote_via_evtchn(evtchn);
    557}
    558EXPORT_SYMBOL_GPL(notify_remote_via_irq);
    559
    560struct lateeoi_work {
    561	struct delayed_work delayed;
    562	spinlock_t eoi_list_lock;
    563	struct list_head eoi_list;
    564};
    565
    566static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
    567
    568static void lateeoi_list_del(struct irq_info *info)
    569{
    570	struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
    571	unsigned long flags;
    572
    573	spin_lock_irqsave(&eoi->eoi_list_lock, flags);
    574	list_del_init(&info->eoi_list);
    575	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
    576}
    577
    578static void lateeoi_list_add(struct irq_info *info)
    579{
    580	struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
    581	struct irq_info *elem;
    582	u64 now = get_jiffies_64();
    583	unsigned long delay;
    584	unsigned long flags;
    585
    586	if (now < info->eoi_time)
    587		delay = info->eoi_time - now;
    588	else
    589		delay = 1;
    590
    591	spin_lock_irqsave(&eoi->eoi_list_lock, flags);
    592
    593	if (list_empty(&eoi->eoi_list)) {
    594		list_add(&info->eoi_list, &eoi->eoi_list);
    595		mod_delayed_work_on(info->eoi_cpu, system_wq,
    596				    &eoi->delayed, delay);
    597	} else {
    598		list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
    599			if (elem->eoi_time <= info->eoi_time)
    600				break;
    601		}
    602		list_add(&info->eoi_list, &elem->eoi_list);
    603	}
    604
    605	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
    606}
    607
    608static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
    609{
    610	evtchn_port_t evtchn;
    611	unsigned int cpu;
    612	unsigned int delay = 0;
    613
    614	evtchn = info->evtchn;
    615	if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
    616		return;
    617
    618	if (spurious) {
    619		struct xenbus_device *dev = info->u.interdomain;
    620		unsigned int threshold = 1;
    621
    622		if (dev && dev->spurious_threshold)
    623			threshold = dev->spurious_threshold;
    624
    625		if ((1 << info->spurious_cnt) < (HZ << 2)) {
    626			if (info->spurious_cnt != 0xFF)
    627				info->spurious_cnt++;
    628		}
    629		if (info->spurious_cnt > threshold) {
    630			delay = 1 << (info->spurious_cnt - 1 - threshold);
    631			if (delay > HZ)
    632				delay = HZ;
    633			if (!info->eoi_time)
    634				info->eoi_cpu = smp_processor_id();
    635			info->eoi_time = get_jiffies_64() + delay;
    636			if (dev)
    637				atomic_add(delay, &dev->jiffies_eoi_delayed);
    638		}
    639		if (dev)
    640			atomic_inc(&dev->spurious_events);
    641	} else {
    642		info->spurious_cnt = 0;
    643	}
    644
    645	cpu = info->eoi_cpu;
    646	if (info->eoi_time &&
    647	    (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
    648		lateeoi_list_add(info);
    649		return;
    650	}
    651
    652	info->eoi_time = 0;
    653
    654	/* is_active hasn't been reset yet, do it now. */
    655	smp_store_release(&info->is_active, 0);
    656	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
    657}
    658
    659static void xen_irq_lateeoi_worker(struct work_struct *work)
    660{
    661	struct lateeoi_work *eoi;
    662	struct irq_info *info;
    663	u64 now = get_jiffies_64();
    664	unsigned long flags;
    665
    666	eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
    667
    668	read_lock_irqsave(&evtchn_rwlock, flags);
    669
    670	while (true) {
    671		spin_lock(&eoi->eoi_list_lock);
    672
    673		info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
    674						eoi_list);
    675
    676		if (info == NULL || now < info->eoi_time) {
    677			spin_unlock(&eoi->eoi_list_lock);
    678			break;
    679		}
    680
    681		list_del_init(&info->eoi_list);
    682
    683		spin_unlock(&eoi->eoi_list_lock);
    684
    685		info->eoi_time = 0;
    686
    687		xen_irq_lateeoi_locked(info, false);
    688	}
    689
    690	if (info)
    691		mod_delayed_work_on(info->eoi_cpu, system_wq,
    692				    &eoi->delayed, info->eoi_time - now);
    693
    694	read_unlock_irqrestore(&evtchn_rwlock, flags);
    695}
    696
    697static void xen_cpu_init_eoi(unsigned int cpu)
    698{
    699	struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
    700
    701	INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
    702	spin_lock_init(&eoi->eoi_list_lock);
    703	INIT_LIST_HEAD(&eoi->eoi_list);
    704}
    705
    706void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
    707{
    708	struct irq_info *info;
    709	unsigned long flags;
    710
    711	read_lock_irqsave(&evtchn_rwlock, flags);
    712
    713	info = info_for_irq(irq);
    714
    715	if (info)
    716		xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
    717
    718	read_unlock_irqrestore(&evtchn_rwlock, flags);
    719}
    720EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
    721
    722static void xen_irq_init(unsigned irq)
    723{
    724	struct irq_info *info;
    725
    726	info = kzalloc(sizeof(*info), GFP_KERNEL);
    727	if (info == NULL)
    728		panic("Unable to allocate metadata for IRQ%d\n", irq);
    729
    730	info->type = IRQT_UNBOUND;
    731	info->refcnt = -1;
    732
    733	set_info_for_irq(irq, info);
    734	/*
    735	 * Interrupt affinity setting can be immediate. No point
    736	 * in delaying it until an interrupt is handled.
    737	 */
    738	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
    739
    740	INIT_LIST_HEAD(&info->eoi_list);
    741	list_add_tail(&info->list, &xen_irq_list_head);
    742}
    743
    744static int __must_check xen_allocate_irqs_dynamic(int nvec)
    745{
    746	int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
    747
    748	if (irq >= 0) {
    749		for (i = 0; i < nvec; i++)
    750			xen_irq_init(irq + i);
    751	}
    752
    753	return irq;
    754}
    755
    756static inline int __must_check xen_allocate_irq_dynamic(void)
    757{
    758
    759	return xen_allocate_irqs_dynamic(1);
    760}
    761
    762static int __must_check xen_allocate_irq_gsi(unsigned gsi)
    763{
    764	int irq;
    765
    766	/*
    767	 * A PV guest has no concept of a GSI (since it has no ACPI
    768	 * nor access to/knowledge of the physical APICs). Therefore
    769	 * all IRQs are dynamically allocated from the entire IRQ
    770	 * space.
    771	 */
    772	if (xen_pv_domain() && !xen_initial_domain())
    773		return xen_allocate_irq_dynamic();
    774
    775	/* Legacy IRQ descriptors are already allocated by the arch. */
    776	if (gsi < nr_legacy_irqs())
    777		irq = gsi;
    778	else
    779		irq = irq_alloc_desc_at(gsi, -1);
    780
    781	xen_irq_init(irq);
    782
    783	return irq;
    784}
    785
    786static void xen_free_irq(unsigned irq)
    787{
    788	struct irq_info *info = info_for_irq(irq);
    789	unsigned long flags;
    790
    791	if (WARN_ON(!info))
    792		return;
    793
    794	write_lock_irqsave(&evtchn_rwlock, flags);
    795
    796	if (!list_empty(&info->eoi_list))
    797		lateeoi_list_del(info);
    798
    799	list_del(&info->list);
    800
    801	set_info_for_irq(irq, NULL);
    802
    803	WARN_ON(info->refcnt > 0);
    804
    805	write_unlock_irqrestore(&evtchn_rwlock, flags);
    806
    807	kfree(info);
    808
    809	/* Legacy IRQ descriptors are managed by the arch. */
    810	if (irq < nr_legacy_irqs())
    811		return;
    812
    813	irq_free_desc(irq);
    814}
    815
    816static void xen_evtchn_close(evtchn_port_t port)
    817{
    818	struct evtchn_close close;
    819
    820	close.port = port;
    821	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
    822		BUG();
    823}
    824
    825/* Not called for lateeoi events. */
    826static void event_handler_exit(struct irq_info *info)
    827{
    828	smp_store_release(&info->is_active, 0);
    829	clear_evtchn(info->evtchn);
    830}
    831
    832static void pirq_query_unmask(int irq)
    833{
    834	struct physdev_irq_status_query irq_status;
    835	struct irq_info *info = info_for_irq(irq);
    836
    837	BUG_ON(info->type != IRQT_PIRQ);
    838
    839	irq_status.irq = pirq_from_irq(irq);
    840	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
    841		irq_status.flags = 0;
    842
    843	info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
    844	if (irq_status.flags & XENIRQSTAT_needs_eoi)
    845		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
    846}
    847
    848static void eoi_pirq(struct irq_data *data)
    849{
    850	struct irq_info *info = info_for_irq(data->irq);
    851	evtchn_port_t evtchn = info ? info->evtchn : 0;
    852	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
    853	int rc = 0;
    854
    855	if (!VALID_EVTCHN(evtchn))
    856		return;
    857
    858	event_handler_exit(info);
    859
    860	if (pirq_needs_eoi(data->irq)) {
    861		rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
    862		WARN_ON(rc);
    863	}
    864}
    865
    866static void mask_ack_pirq(struct irq_data *data)
    867{
    868	disable_dynirq(data);
    869	eoi_pirq(data);
    870}
    871
    872static unsigned int __startup_pirq(unsigned int irq)
    873{
    874	struct evtchn_bind_pirq bind_pirq;
    875	struct irq_info *info = info_for_irq(irq);
    876	evtchn_port_t evtchn = evtchn_from_irq(irq);
    877	int rc;
    878
    879	BUG_ON(info->type != IRQT_PIRQ);
    880
    881	if (VALID_EVTCHN(evtchn))
    882		goto out;
    883
    884	bind_pirq.pirq = pirq_from_irq(irq);
    885	/* NB. We are happy to share unless we are probing. */
    886	bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
    887					BIND_PIRQ__WILL_SHARE : 0;
    888	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
    889	if (rc != 0) {
    890		pr_warn("Failed to obtain physical IRQ %d\n", irq);
    891		return 0;
    892	}
    893	evtchn = bind_pirq.port;
    894
    895	pirq_query_unmask(irq);
    896
    897	rc = set_evtchn_to_irq(evtchn, irq);
    898	if (rc)
    899		goto err;
    900
    901	info->evtchn = evtchn;
    902	bind_evtchn_to_cpu(evtchn, 0, false);
    903
    904	rc = xen_evtchn_port_setup(evtchn);
    905	if (rc)
    906		goto err;
    907
    908out:
    909	do_unmask(info, EVT_MASK_REASON_EXPLICIT);
    910
    911	eoi_pirq(irq_get_irq_data(irq));
    912
    913	return 0;
    914
    915err:
    916	pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
    917	xen_evtchn_close(evtchn);
    918	return 0;
    919}
    920
    921static unsigned int startup_pirq(struct irq_data *data)
    922{
    923	return __startup_pirq(data->irq);
    924}
    925
    926static void shutdown_pirq(struct irq_data *data)
    927{
    928	unsigned int irq = data->irq;
    929	struct irq_info *info = info_for_irq(irq);
    930	evtchn_port_t evtchn = evtchn_from_irq(irq);
    931
    932	BUG_ON(info->type != IRQT_PIRQ);
    933
    934	if (!VALID_EVTCHN(evtchn))
    935		return;
    936
    937	do_mask(info, EVT_MASK_REASON_EXPLICIT);
    938	xen_evtchn_close(evtchn);
    939	xen_irq_info_cleanup(info);
    940}
    941
    942static void enable_pirq(struct irq_data *data)
    943{
    944	enable_dynirq(data);
    945}
    946
    947static void disable_pirq(struct irq_data *data)
    948{
    949	disable_dynirq(data);
    950}
    951
    952int xen_irq_from_gsi(unsigned gsi)
    953{
    954	struct irq_info *info;
    955
    956	list_for_each_entry(info, &xen_irq_list_head, list) {
    957		if (info->type != IRQT_PIRQ)
    958			continue;
    959
    960		if (info->u.pirq.gsi == gsi)
    961			return info->irq;
    962	}
    963
    964	return -1;
    965}
    966EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
    967
    968static void __unbind_from_irq(unsigned int irq)
    969{
    970	evtchn_port_t evtchn = evtchn_from_irq(irq);
    971	struct irq_info *info = info_for_irq(irq);
    972
    973	if (info->refcnt > 0) {
    974		info->refcnt--;
    975		if (info->refcnt != 0)
    976			return;
    977	}
    978
    979	if (VALID_EVTCHN(evtchn)) {
    980		unsigned int cpu = cpu_from_irq(irq);
    981		struct xenbus_device *dev;
    982
    983		xen_evtchn_close(evtchn);
    984
    985		switch (type_from_irq(irq)) {
    986		case IRQT_VIRQ:
    987			per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
    988			break;
    989		case IRQT_IPI:
    990			per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
    991			break;
    992		case IRQT_EVTCHN:
    993			dev = info->u.interdomain;
    994			if (dev)
    995				atomic_dec(&dev->event_channels);
    996			break;
    997		default:
    998			break;
    999		}
   1000
   1001		xen_irq_info_cleanup(info);
   1002	}
   1003
   1004	xen_free_irq(irq);
   1005}
   1006
   1007/*
   1008 * Do not make any assumptions regarding the relationship between the
   1009 * IRQ number returned here and the Xen pirq argument.
   1010 *
   1011 * Note: We don't assign an event channel until the irq actually started
   1012 * up.  Return an existing irq if we've already got one for the gsi.
   1013 *
   1014 * Shareable implies level triggered, not shareable implies edge
   1015 * triggered here.
   1016 */
   1017int xen_bind_pirq_gsi_to_irq(unsigned gsi,
   1018			     unsigned pirq, int shareable, char *name)
   1019{
   1020	int irq;
   1021	struct physdev_irq irq_op;
   1022	int ret;
   1023
   1024	mutex_lock(&irq_mapping_update_lock);
   1025
   1026	irq = xen_irq_from_gsi(gsi);
   1027	if (irq != -1) {
   1028		pr_info("%s: returning irq %d for gsi %u\n",
   1029			__func__, irq, gsi);
   1030		goto out;
   1031	}
   1032
   1033	irq = xen_allocate_irq_gsi(gsi);
   1034	if (irq < 0)
   1035		goto out;
   1036
   1037	irq_op.irq = irq;
   1038	irq_op.vector = 0;
   1039
   1040	/* Only the privileged domain can do this. For non-priv, the pcifront
   1041	 * driver provides a PCI bus that does the call to do exactly
   1042	 * this in the priv domain. */
   1043	if (xen_initial_domain() &&
   1044	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
   1045		xen_free_irq(irq);
   1046		irq = -ENOSPC;
   1047		goto out;
   1048	}
   1049
   1050	ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
   1051			       shareable ? PIRQ_SHAREABLE : 0);
   1052	if (ret < 0) {
   1053		__unbind_from_irq(irq);
   1054		irq = ret;
   1055		goto out;
   1056	}
   1057
   1058	pirq_query_unmask(irq);
   1059	/* We try to use the handler with the appropriate semantic for the
   1060	 * type of interrupt: if the interrupt is an edge triggered
   1061	 * interrupt we use handle_edge_irq.
   1062	 *
   1063	 * On the other hand if the interrupt is level triggered we use
   1064	 * handle_fasteoi_irq like the native code does for this kind of
   1065	 * interrupts.
   1066	 *
   1067	 * Depending on the Xen version, pirq_needs_eoi might return true
   1068	 * not only for level triggered interrupts but for edge triggered
   1069	 * interrupts too. In any case Xen always honors the eoi mechanism,
   1070	 * not injecting any more pirqs of the same kind if the first one
   1071	 * hasn't received an eoi yet. Therefore using the fasteoi handler
   1072	 * is the right choice either way.
   1073	 */
   1074	if (shareable)
   1075		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
   1076				handle_fasteoi_irq, name);
   1077	else
   1078		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
   1079				handle_edge_irq, name);
   1080
   1081out:
   1082	mutex_unlock(&irq_mapping_update_lock);
   1083
   1084	return irq;
   1085}
   1086
   1087#ifdef CONFIG_PCI_MSI
   1088int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
   1089{
   1090	int rc;
   1091	struct physdev_get_free_pirq op_get_free_pirq;
   1092
   1093	op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
   1094	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
   1095
   1096	WARN_ONCE(rc == -ENOSYS,
   1097		  "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
   1098
   1099	return rc ? -1 : op_get_free_pirq.pirq;
   1100}
   1101
   1102int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
   1103			     int pirq, int nvec, const char *name, domid_t domid)
   1104{
   1105	int i, irq, ret;
   1106
   1107	mutex_lock(&irq_mapping_update_lock);
   1108
   1109	irq = xen_allocate_irqs_dynamic(nvec);
   1110	if (irq < 0)
   1111		goto out;
   1112
   1113	for (i = 0; i < nvec; i++) {
   1114		irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
   1115
   1116		ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
   1117					      i == 0 ? 0 : PIRQ_MSI_GROUP);
   1118		if (ret < 0)
   1119			goto error_irq;
   1120	}
   1121
   1122	ret = irq_set_msi_desc(irq, msidesc);
   1123	if (ret < 0)
   1124		goto error_irq;
   1125out:
   1126	mutex_unlock(&irq_mapping_update_lock);
   1127	return irq;
   1128error_irq:
   1129	while (nvec--)
   1130		__unbind_from_irq(irq + nvec);
   1131	mutex_unlock(&irq_mapping_update_lock);
   1132	return ret;
   1133}
   1134#endif
   1135
   1136int xen_destroy_irq(int irq)
   1137{
   1138	struct physdev_unmap_pirq unmap_irq;
   1139	struct irq_info *info = info_for_irq(irq);
   1140	int rc = -ENOENT;
   1141
   1142	mutex_lock(&irq_mapping_update_lock);
   1143
   1144	/*
   1145	 * If trying to remove a vector in a MSI group different
   1146	 * than the first one skip the PIRQ unmap unless this vector
   1147	 * is the first one in the group.
   1148	 */
   1149	if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
   1150		unmap_irq.pirq = info->u.pirq.pirq;
   1151		unmap_irq.domid = info->u.pirq.domid;
   1152		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
   1153		/* If another domain quits without making the pci_disable_msix
   1154		 * call, the Xen hypervisor takes care of freeing the PIRQs
   1155		 * (free_domain_pirqs).
   1156		 */
   1157		if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
   1158			pr_info("domain %d does not have %d anymore\n",
   1159				info->u.pirq.domid, info->u.pirq.pirq);
   1160		else if (rc) {
   1161			pr_warn("unmap irq failed %d\n", rc);
   1162			goto out;
   1163		}
   1164	}
   1165
   1166	xen_free_irq(irq);
   1167
   1168out:
   1169	mutex_unlock(&irq_mapping_update_lock);
   1170	return rc;
   1171}
   1172
   1173int xen_irq_from_pirq(unsigned pirq)
   1174{
   1175	int irq;
   1176
   1177	struct irq_info *info;
   1178
   1179	mutex_lock(&irq_mapping_update_lock);
   1180
   1181	list_for_each_entry(info, &xen_irq_list_head, list) {
   1182		if (info->type != IRQT_PIRQ)
   1183			continue;
   1184		irq = info->irq;
   1185		if (info->u.pirq.pirq == pirq)
   1186			goto out;
   1187	}
   1188	irq = -1;
   1189out:
   1190	mutex_unlock(&irq_mapping_update_lock);
   1191
   1192	return irq;
   1193}
   1194
   1195
   1196int xen_pirq_from_irq(unsigned irq)
   1197{
   1198	return pirq_from_irq(irq);
   1199}
   1200EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
   1201
   1202static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
   1203				   struct xenbus_device *dev)
   1204{
   1205	int irq;
   1206	int ret;
   1207
   1208	if (evtchn >= xen_evtchn_max_channels())
   1209		return -ENOMEM;
   1210
   1211	mutex_lock(&irq_mapping_update_lock);
   1212
   1213	irq = get_evtchn_to_irq(evtchn);
   1214
   1215	if (irq == -1) {
   1216		irq = xen_allocate_irq_dynamic();
   1217		if (irq < 0)
   1218			goto out;
   1219
   1220		irq_set_chip_and_handler_name(irq, chip,
   1221					      handle_edge_irq, "event");
   1222
   1223		ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
   1224		if (ret < 0) {
   1225			__unbind_from_irq(irq);
   1226			irq = ret;
   1227			goto out;
   1228		}
   1229		/*
   1230		 * New interdomain events are initially bound to vCPU0 This
   1231		 * is required to setup the event channel in the first
   1232		 * place and also important for UP guests because the
   1233		 * affinity setting is not invoked on them so nothing would
   1234		 * bind the channel.
   1235		 */
   1236		bind_evtchn_to_cpu(evtchn, 0, false);
   1237	} else {
   1238		struct irq_info *info = info_for_irq(irq);
   1239		WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
   1240	}
   1241
   1242out:
   1243	mutex_unlock(&irq_mapping_update_lock);
   1244
   1245	return irq;
   1246}
   1247
   1248int bind_evtchn_to_irq(evtchn_port_t evtchn)
   1249{
   1250	return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
   1251}
   1252EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
   1253
   1254int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
   1255{
   1256	return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
   1257}
   1258EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
   1259
   1260static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
   1261{
   1262	struct evtchn_bind_ipi bind_ipi;
   1263	evtchn_port_t evtchn;
   1264	int ret, irq;
   1265
   1266	mutex_lock(&irq_mapping_update_lock);
   1267
   1268	irq = per_cpu(ipi_to_irq, cpu)[ipi];
   1269
   1270	if (irq == -1) {
   1271		irq = xen_allocate_irq_dynamic();
   1272		if (irq < 0)
   1273			goto out;
   1274
   1275		irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
   1276					      handle_percpu_irq, "ipi");
   1277
   1278		bind_ipi.vcpu = xen_vcpu_nr(cpu);
   1279		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
   1280						&bind_ipi) != 0)
   1281			BUG();
   1282		evtchn = bind_ipi.port;
   1283
   1284		ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
   1285		if (ret < 0) {
   1286			__unbind_from_irq(irq);
   1287			irq = ret;
   1288			goto out;
   1289		}
   1290		/*
   1291		 * Force the affinity mask to the target CPU so proc shows
   1292		 * the correct target.
   1293		 */
   1294		bind_evtchn_to_cpu(evtchn, cpu, true);
   1295	} else {
   1296		struct irq_info *info = info_for_irq(irq);
   1297		WARN_ON(info == NULL || info->type != IRQT_IPI);
   1298	}
   1299
   1300 out:
   1301	mutex_unlock(&irq_mapping_update_lock);
   1302	return irq;
   1303}
   1304
   1305static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
   1306					       evtchn_port_t remote_port,
   1307					       struct irq_chip *chip)
   1308{
   1309	struct evtchn_bind_interdomain bind_interdomain;
   1310	int err;
   1311
   1312	bind_interdomain.remote_dom  = dev->otherend_id;
   1313	bind_interdomain.remote_port = remote_port;
   1314
   1315	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
   1316					  &bind_interdomain);
   1317
   1318	return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
   1319					       chip, dev);
   1320}
   1321
   1322int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
   1323					   evtchn_port_t remote_port)
   1324{
   1325	return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
   1326						   &xen_lateeoi_chip);
   1327}
   1328EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
   1329
   1330static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
   1331{
   1332	struct evtchn_status status;
   1333	evtchn_port_t port;
   1334	int rc = -ENOENT;
   1335
   1336	memset(&status, 0, sizeof(status));
   1337	for (port = 0; port < xen_evtchn_max_channels(); port++) {
   1338		status.dom = DOMID_SELF;
   1339		status.port = port;
   1340		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
   1341		if (rc < 0)
   1342			continue;
   1343		if (status.status != EVTCHNSTAT_virq)
   1344			continue;
   1345		if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
   1346			*evtchn = port;
   1347			break;
   1348		}
   1349	}
   1350	return rc;
   1351}
   1352
   1353/**
   1354 * xen_evtchn_nr_channels - number of usable event channel ports
   1355 *
   1356 * This may be less than the maximum supported by the current
   1357 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
   1358 * supported.
   1359 */
   1360unsigned xen_evtchn_nr_channels(void)
   1361{
   1362        return evtchn_ops->nr_channels();
   1363}
   1364EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
   1365
   1366int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
   1367{
   1368	struct evtchn_bind_virq bind_virq;
   1369	evtchn_port_t evtchn = 0;
   1370	int irq, ret;
   1371
   1372	mutex_lock(&irq_mapping_update_lock);
   1373
   1374	irq = per_cpu(virq_to_irq, cpu)[virq];
   1375
   1376	if (irq == -1) {
   1377		irq = xen_allocate_irq_dynamic();
   1378		if (irq < 0)
   1379			goto out;
   1380
   1381		if (percpu)
   1382			irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
   1383						      handle_percpu_irq, "virq");
   1384		else
   1385			irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
   1386						      handle_edge_irq, "virq");
   1387
   1388		bind_virq.virq = virq;
   1389		bind_virq.vcpu = xen_vcpu_nr(cpu);
   1390		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
   1391						&bind_virq);
   1392		if (ret == 0)
   1393			evtchn = bind_virq.port;
   1394		else {
   1395			if (ret == -EEXIST)
   1396				ret = find_virq(virq, cpu, &evtchn);
   1397			BUG_ON(ret < 0);
   1398		}
   1399
   1400		ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
   1401		if (ret < 0) {
   1402			__unbind_from_irq(irq);
   1403			irq = ret;
   1404			goto out;
   1405		}
   1406
   1407		/*
   1408		 * Force the affinity mask for percpu interrupts so proc
   1409		 * shows the correct target.
   1410		 */
   1411		bind_evtchn_to_cpu(evtchn, cpu, percpu);
   1412	} else {
   1413		struct irq_info *info = info_for_irq(irq);
   1414		WARN_ON(info == NULL || info->type != IRQT_VIRQ);
   1415	}
   1416
   1417out:
   1418	mutex_unlock(&irq_mapping_update_lock);
   1419
   1420	return irq;
   1421}
   1422
   1423static void unbind_from_irq(unsigned int irq)
   1424{
   1425	mutex_lock(&irq_mapping_update_lock);
   1426	__unbind_from_irq(irq);
   1427	mutex_unlock(&irq_mapping_update_lock);
   1428}
   1429
   1430static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
   1431					  irq_handler_t handler,
   1432					  unsigned long irqflags,
   1433					  const char *devname, void *dev_id,
   1434					  struct irq_chip *chip)
   1435{
   1436	int irq, retval;
   1437
   1438	irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
   1439	if (irq < 0)
   1440		return irq;
   1441	retval = request_irq(irq, handler, irqflags, devname, dev_id);
   1442	if (retval != 0) {
   1443		unbind_from_irq(irq);
   1444		return retval;
   1445	}
   1446
   1447	return irq;
   1448}
   1449
   1450int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
   1451			      irq_handler_t handler,
   1452			      unsigned long irqflags,
   1453			      const char *devname, void *dev_id)
   1454{
   1455	return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
   1456					      devname, dev_id,
   1457					      &xen_dynamic_chip);
   1458}
   1459EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
   1460
   1461int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
   1462				      irq_handler_t handler,
   1463				      unsigned long irqflags,
   1464				      const char *devname, void *dev_id)
   1465{
   1466	return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
   1467					      devname, dev_id,
   1468					      &xen_lateeoi_chip);
   1469}
   1470EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
   1471
   1472static int bind_interdomain_evtchn_to_irqhandler_chip(
   1473		struct xenbus_device *dev, evtchn_port_t remote_port,
   1474		irq_handler_t handler, unsigned long irqflags,
   1475		const char *devname, void *dev_id, struct irq_chip *chip)
   1476{
   1477	int irq, retval;
   1478
   1479	irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
   1480	if (irq < 0)
   1481		return irq;
   1482
   1483	retval = request_irq(irq, handler, irqflags, devname, dev_id);
   1484	if (retval != 0) {
   1485		unbind_from_irq(irq);
   1486		return retval;
   1487	}
   1488
   1489	return irq;
   1490}
   1491
   1492int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
   1493						  evtchn_port_t remote_port,
   1494						  irq_handler_t handler,
   1495						  unsigned long irqflags,
   1496						  const char *devname,
   1497						  void *dev_id)
   1498{
   1499	return bind_interdomain_evtchn_to_irqhandler_chip(dev,
   1500				remote_port, handler, irqflags, devname,
   1501				dev_id, &xen_lateeoi_chip);
   1502}
   1503EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
   1504
   1505int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
   1506			    irq_handler_t handler,
   1507			    unsigned long irqflags, const char *devname, void *dev_id)
   1508{
   1509	int irq, retval;
   1510
   1511	irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
   1512	if (irq < 0)
   1513		return irq;
   1514	retval = request_irq(irq, handler, irqflags, devname, dev_id);
   1515	if (retval != 0) {
   1516		unbind_from_irq(irq);
   1517		return retval;
   1518	}
   1519
   1520	return irq;
   1521}
   1522EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
   1523
   1524int bind_ipi_to_irqhandler(enum ipi_vector ipi,
   1525			   unsigned int cpu,
   1526			   irq_handler_t handler,
   1527			   unsigned long irqflags,
   1528			   const char *devname,
   1529			   void *dev_id)
   1530{
   1531	int irq, retval;
   1532
   1533	irq = bind_ipi_to_irq(ipi, cpu);
   1534	if (irq < 0)
   1535		return irq;
   1536
   1537	irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
   1538	retval = request_irq(irq, handler, irqflags, devname, dev_id);
   1539	if (retval != 0) {
   1540		unbind_from_irq(irq);
   1541		return retval;
   1542	}
   1543
   1544	return irq;
   1545}
   1546
   1547void unbind_from_irqhandler(unsigned int irq, void *dev_id)
   1548{
   1549	struct irq_info *info = info_for_irq(irq);
   1550
   1551	if (WARN_ON(!info))
   1552		return;
   1553	free_irq(irq, dev_id);
   1554	unbind_from_irq(irq);
   1555}
   1556EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
   1557
   1558/**
   1559 * xen_set_irq_priority() - set an event channel priority.
   1560 * @irq:irq bound to an event channel.
   1561 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
   1562 */
   1563int xen_set_irq_priority(unsigned irq, unsigned priority)
   1564{
   1565	struct evtchn_set_priority set_priority;
   1566
   1567	set_priority.port = evtchn_from_irq(irq);
   1568	set_priority.priority = priority;
   1569
   1570	return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
   1571					   &set_priority);
   1572}
   1573EXPORT_SYMBOL_GPL(xen_set_irq_priority);
   1574
   1575int evtchn_make_refcounted(evtchn_port_t evtchn)
   1576{
   1577	int irq = get_evtchn_to_irq(evtchn);
   1578	struct irq_info *info;
   1579
   1580	if (irq == -1)
   1581		return -ENOENT;
   1582
   1583	info = info_for_irq(irq);
   1584
   1585	if (!info)
   1586		return -ENOENT;
   1587
   1588	WARN_ON(info->refcnt != -1);
   1589
   1590	info->refcnt = 1;
   1591
   1592	return 0;
   1593}
   1594EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
   1595
   1596int evtchn_get(evtchn_port_t evtchn)
   1597{
   1598	int irq;
   1599	struct irq_info *info;
   1600	int err = -ENOENT;
   1601
   1602	if (evtchn >= xen_evtchn_max_channels())
   1603		return -EINVAL;
   1604
   1605	mutex_lock(&irq_mapping_update_lock);
   1606
   1607	irq = get_evtchn_to_irq(evtchn);
   1608	if (irq == -1)
   1609		goto done;
   1610
   1611	info = info_for_irq(irq);
   1612
   1613	if (!info)
   1614		goto done;
   1615
   1616	err = -EINVAL;
   1617	if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
   1618		goto done;
   1619
   1620	info->refcnt++;
   1621	err = 0;
   1622 done:
   1623	mutex_unlock(&irq_mapping_update_lock);
   1624
   1625	return err;
   1626}
   1627EXPORT_SYMBOL_GPL(evtchn_get);
   1628
   1629void evtchn_put(evtchn_port_t evtchn)
   1630{
   1631	int irq = get_evtchn_to_irq(evtchn);
   1632	if (WARN_ON(irq == -1))
   1633		return;
   1634	unbind_from_irq(irq);
   1635}
   1636EXPORT_SYMBOL_GPL(evtchn_put);
   1637
   1638void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
   1639{
   1640	int irq;
   1641
   1642#ifdef CONFIG_X86
   1643	if (unlikely(vector == XEN_NMI_VECTOR)) {
   1644		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
   1645					     NULL);
   1646		if (rc < 0)
   1647			printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
   1648		return;
   1649	}
   1650#endif
   1651	irq = per_cpu(ipi_to_irq, cpu)[vector];
   1652	BUG_ON(irq < 0);
   1653	notify_remote_via_irq(irq);
   1654}
   1655
   1656struct evtchn_loop_ctrl {
   1657	ktime_t timeout;
   1658	unsigned count;
   1659	bool defer_eoi;
   1660};
   1661
   1662void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
   1663{
   1664	int irq;
   1665	struct irq_info *info;
   1666	struct xenbus_device *dev;
   1667
   1668	irq = get_evtchn_to_irq(port);
   1669	if (irq == -1)
   1670		return;
   1671
   1672	/*
   1673	 * Check for timeout every 256 events.
   1674	 * We are setting the timeout value only after the first 256
   1675	 * events in order to not hurt the common case of few loop
   1676	 * iterations. The 256 is basically an arbitrary value.
   1677	 *
   1678	 * In case we are hitting the timeout we need to defer all further
   1679	 * EOIs in order to ensure to leave the event handling loop rather
   1680	 * sooner than later.
   1681	 */
   1682	if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
   1683		ktime_t kt = ktime_get();
   1684
   1685		if (!ctrl->timeout) {
   1686			kt = ktime_add_ms(kt,
   1687					  jiffies_to_msecs(event_loop_timeout));
   1688			ctrl->timeout = kt;
   1689		} else if (kt > ctrl->timeout) {
   1690			ctrl->defer_eoi = true;
   1691		}
   1692	}
   1693
   1694	info = info_for_irq(irq);
   1695	if (xchg_acquire(&info->is_active, 1))
   1696		return;
   1697
   1698	dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
   1699	if (dev)
   1700		atomic_inc(&dev->events);
   1701
   1702	if (ctrl->defer_eoi) {
   1703		info->eoi_cpu = smp_processor_id();
   1704		info->irq_epoch = __this_cpu_read(irq_epoch);
   1705		info->eoi_time = get_jiffies_64() + event_eoi_delay;
   1706	}
   1707
   1708	generic_handle_irq(irq);
   1709}
   1710
   1711static void __xen_evtchn_do_upcall(void)
   1712{
   1713	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
   1714	int cpu = smp_processor_id();
   1715	struct evtchn_loop_ctrl ctrl = { 0 };
   1716
   1717	read_lock(&evtchn_rwlock);
   1718
   1719	do {
   1720		vcpu_info->evtchn_upcall_pending = 0;
   1721
   1722		xen_evtchn_handle_events(cpu, &ctrl);
   1723
   1724		BUG_ON(!irqs_disabled());
   1725
   1726		virt_rmb(); /* Hypervisor can set upcall pending. */
   1727
   1728	} while (vcpu_info->evtchn_upcall_pending);
   1729
   1730	read_unlock(&evtchn_rwlock);
   1731
   1732	/*
   1733	 * Increment irq_epoch only now to defer EOIs only for
   1734	 * xen_irq_lateeoi() invocations occurring from inside the loop
   1735	 * above.
   1736	 */
   1737	__this_cpu_inc(irq_epoch);
   1738}
   1739
   1740void xen_evtchn_do_upcall(struct pt_regs *regs)
   1741{
   1742	struct pt_regs *old_regs = set_irq_regs(regs);
   1743
   1744	irq_enter();
   1745
   1746	__xen_evtchn_do_upcall();
   1747
   1748	irq_exit();
   1749	set_irq_regs(old_regs);
   1750}
   1751
   1752void xen_hvm_evtchn_do_upcall(void)
   1753{
   1754	__xen_evtchn_do_upcall();
   1755}
   1756EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
   1757
   1758/* Rebind a new event channel to an existing irq. */
   1759void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
   1760{
   1761	struct irq_info *info = info_for_irq(irq);
   1762
   1763	if (WARN_ON(!info))
   1764		return;
   1765
   1766	/* Make sure the irq is masked, since the new event channel
   1767	   will also be masked. */
   1768	disable_irq(irq);
   1769
   1770	mutex_lock(&irq_mapping_update_lock);
   1771
   1772	/* After resume the irq<->evtchn mappings are all cleared out */
   1773	BUG_ON(get_evtchn_to_irq(evtchn) != -1);
   1774	/* Expect irq to have been bound before,
   1775	   so there should be a proper type */
   1776	BUG_ON(info->type == IRQT_UNBOUND);
   1777
   1778	(void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
   1779
   1780	mutex_unlock(&irq_mapping_update_lock);
   1781
   1782	bind_evtchn_to_cpu(evtchn, info->cpu, false);
   1783
   1784	/* Unmask the event channel. */
   1785	enable_irq(irq);
   1786}
   1787
   1788/* Rebind an evtchn so that it gets delivered to a specific cpu */
   1789static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
   1790{
   1791	struct evtchn_bind_vcpu bind_vcpu;
   1792	evtchn_port_t evtchn = info ? info->evtchn : 0;
   1793
   1794	if (!VALID_EVTCHN(evtchn))
   1795		return -1;
   1796
   1797	if (!xen_support_evtchn_rebind())
   1798		return -1;
   1799
   1800	/* Send future instances of this interrupt to other vcpu. */
   1801	bind_vcpu.port = evtchn;
   1802	bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
   1803
   1804	/*
   1805	 * Mask the event while changing the VCPU binding to prevent
   1806	 * it being delivered on an unexpected VCPU.
   1807	 */
   1808	do_mask(info, EVT_MASK_REASON_TEMPORARY);
   1809
   1810	/*
   1811	 * If this fails, it usually just indicates that we're dealing with a
   1812	 * virq or IPI channel, which don't actually need to be rebound. Ignore
   1813	 * it, but don't do the xenlinux-level rebind in that case.
   1814	 */
   1815	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
   1816		bind_evtchn_to_cpu(evtchn, tcpu, false);
   1817
   1818	do_unmask(info, EVT_MASK_REASON_TEMPORARY);
   1819
   1820	return 0;
   1821}
   1822
   1823/*
   1824 * Find the CPU within @dest mask which has the least number of channels
   1825 * assigned. This is not precise as the per cpu counts can be modified
   1826 * concurrently.
   1827 */
   1828static unsigned int select_target_cpu(const struct cpumask *dest)
   1829{
   1830	unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
   1831
   1832	for_each_cpu_and(cpu, dest, cpu_online_mask) {
   1833		unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
   1834
   1835		if (curch < minch) {
   1836			minch = curch;
   1837			best_cpu = cpu;
   1838		}
   1839	}
   1840
   1841	/*
   1842	 * Catch the unlikely case that dest contains no online CPUs. Can't
   1843	 * recurse.
   1844	 */
   1845	if (best_cpu == UINT_MAX)
   1846		return select_target_cpu(cpu_online_mask);
   1847
   1848	return best_cpu;
   1849}
   1850
   1851static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
   1852			    bool force)
   1853{
   1854	unsigned int tcpu = select_target_cpu(dest);
   1855	int ret;
   1856
   1857	ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
   1858	if (!ret)
   1859		irq_data_update_effective_affinity(data, cpumask_of(tcpu));
   1860
   1861	return ret;
   1862}
   1863
   1864static void enable_dynirq(struct irq_data *data)
   1865{
   1866	struct irq_info *info = info_for_irq(data->irq);
   1867	evtchn_port_t evtchn = info ? info->evtchn : 0;
   1868
   1869	if (VALID_EVTCHN(evtchn))
   1870		do_unmask(info, EVT_MASK_REASON_EXPLICIT);
   1871}
   1872
   1873static void disable_dynirq(struct irq_data *data)
   1874{
   1875	struct irq_info *info = info_for_irq(data->irq);
   1876	evtchn_port_t evtchn = info ? info->evtchn : 0;
   1877
   1878	if (VALID_EVTCHN(evtchn))
   1879		do_mask(info, EVT_MASK_REASON_EXPLICIT);
   1880}
   1881
   1882static void ack_dynirq(struct irq_data *data)
   1883{
   1884	struct irq_info *info = info_for_irq(data->irq);
   1885	evtchn_port_t evtchn = info ? info->evtchn : 0;
   1886
   1887	if (VALID_EVTCHN(evtchn))
   1888		event_handler_exit(info);
   1889}
   1890
   1891static void mask_ack_dynirq(struct irq_data *data)
   1892{
   1893	disable_dynirq(data);
   1894	ack_dynirq(data);
   1895}
   1896
   1897static void lateeoi_ack_dynirq(struct irq_data *data)
   1898{
   1899	struct irq_info *info = info_for_irq(data->irq);
   1900	evtchn_port_t evtchn = info ? info->evtchn : 0;
   1901
   1902	if (VALID_EVTCHN(evtchn)) {
   1903		do_mask(info, EVT_MASK_REASON_EOI_PENDING);
   1904		/*
   1905		 * Don't call event_handler_exit().
   1906		 * Need to keep is_active non-zero in order to ignore re-raised
   1907		 * events after cpu affinity changes while a lateeoi is pending.
   1908		 */
   1909		clear_evtchn(evtchn);
   1910	}
   1911}
   1912
   1913static void lateeoi_mask_ack_dynirq(struct irq_data *data)
   1914{
   1915	struct irq_info *info = info_for_irq(data->irq);
   1916	evtchn_port_t evtchn = info ? info->evtchn : 0;
   1917
   1918	if (VALID_EVTCHN(evtchn)) {
   1919		do_mask(info, EVT_MASK_REASON_EXPLICIT);
   1920		event_handler_exit(info);
   1921	}
   1922}
   1923
   1924static int retrigger_dynirq(struct irq_data *data)
   1925{
   1926	struct irq_info *info = info_for_irq(data->irq);
   1927	evtchn_port_t evtchn = info ? info->evtchn : 0;
   1928
   1929	if (!VALID_EVTCHN(evtchn))
   1930		return 0;
   1931
   1932	do_mask(info, EVT_MASK_REASON_TEMPORARY);
   1933	set_evtchn(evtchn);
   1934	do_unmask(info, EVT_MASK_REASON_TEMPORARY);
   1935
   1936	return 1;
   1937}
   1938
   1939static void restore_pirqs(void)
   1940{
   1941	int pirq, rc, irq, gsi;
   1942	struct physdev_map_pirq map_irq;
   1943	struct irq_info *info;
   1944
   1945	list_for_each_entry(info, &xen_irq_list_head, list) {
   1946		if (info->type != IRQT_PIRQ)
   1947			continue;
   1948
   1949		pirq = info->u.pirq.pirq;
   1950		gsi = info->u.pirq.gsi;
   1951		irq = info->irq;
   1952
   1953		/* save/restore of PT devices doesn't work, so at this point the
   1954		 * only devices present are GSI based emulated devices */
   1955		if (!gsi)
   1956			continue;
   1957
   1958		map_irq.domid = DOMID_SELF;
   1959		map_irq.type = MAP_PIRQ_TYPE_GSI;
   1960		map_irq.index = gsi;
   1961		map_irq.pirq = pirq;
   1962
   1963		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
   1964		if (rc) {
   1965			pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
   1966				gsi, irq, pirq, rc);
   1967			xen_free_irq(irq);
   1968			continue;
   1969		}
   1970
   1971		printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
   1972
   1973		__startup_pirq(irq);
   1974	}
   1975}
   1976
   1977static void restore_cpu_virqs(unsigned int cpu)
   1978{
   1979	struct evtchn_bind_virq bind_virq;
   1980	evtchn_port_t evtchn;
   1981	int virq, irq;
   1982
   1983	for (virq = 0; virq < NR_VIRQS; virq++) {
   1984		if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
   1985			continue;
   1986
   1987		BUG_ON(virq_from_irq(irq) != virq);
   1988
   1989		/* Get a new binding from Xen. */
   1990		bind_virq.virq = virq;
   1991		bind_virq.vcpu = xen_vcpu_nr(cpu);
   1992		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
   1993						&bind_virq) != 0)
   1994			BUG();
   1995		evtchn = bind_virq.port;
   1996
   1997		/* Record the new mapping. */
   1998		(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
   1999		/* The affinity mask is still valid */
   2000		bind_evtchn_to_cpu(evtchn, cpu, false);
   2001	}
   2002}
   2003
   2004static void restore_cpu_ipis(unsigned int cpu)
   2005{
   2006	struct evtchn_bind_ipi bind_ipi;
   2007	evtchn_port_t evtchn;
   2008	int ipi, irq;
   2009
   2010	for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
   2011		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
   2012			continue;
   2013
   2014		BUG_ON(ipi_from_irq(irq) != ipi);
   2015
   2016		/* Get a new binding from Xen. */
   2017		bind_ipi.vcpu = xen_vcpu_nr(cpu);
   2018		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
   2019						&bind_ipi) != 0)
   2020			BUG();
   2021		evtchn = bind_ipi.port;
   2022
   2023		/* Record the new mapping. */
   2024		(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
   2025		/* The affinity mask is still valid */
   2026		bind_evtchn_to_cpu(evtchn, cpu, false);
   2027	}
   2028}
   2029
   2030/* Clear an irq's pending state, in preparation for polling on it */
   2031void xen_clear_irq_pending(int irq)
   2032{
   2033	struct irq_info *info = info_for_irq(irq);
   2034	evtchn_port_t evtchn = info ? info->evtchn : 0;
   2035
   2036	if (VALID_EVTCHN(evtchn))
   2037		event_handler_exit(info);
   2038}
   2039EXPORT_SYMBOL(xen_clear_irq_pending);
   2040void xen_set_irq_pending(int irq)
   2041{
   2042	evtchn_port_t evtchn = evtchn_from_irq(irq);
   2043
   2044	if (VALID_EVTCHN(evtchn))
   2045		set_evtchn(evtchn);
   2046}
   2047
   2048bool xen_test_irq_pending(int irq)
   2049{
   2050	evtchn_port_t evtchn = evtchn_from_irq(irq);
   2051	bool ret = false;
   2052
   2053	if (VALID_EVTCHN(evtchn))
   2054		ret = test_evtchn(evtchn);
   2055
   2056	return ret;
   2057}
   2058
   2059/* Poll waiting for an irq to become pending with timeout.  In the usual case,
   2060 * the irq will be disabled so it won't deliver an interrupt. */
   2061void xen_poll_irq_timeout(int irq, u64 timeout)
   2062{
   2063	evtchn_port_t evtchn = evtchn_from_irq(irq);
   2064
   2065	if (VALID_EVTCHN(evtchn)) {
   2066		struct sched_poll poll;
   2067
   2068		poll.nr_ports = 1;
   2069		poll.timeout = timeout;
   2070		set_xen_guest_handle(poll.ports, &evtchn);
   2071
   2072		if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
   2073			BUG();
   2074	}
   2075}
   2076EXPORT_SYMBOL(xen_poll_irq_timeout);
   2077/* Poll waiting for an irq to become pending.  In the usual case, the
   2078 * irq will be disabled so it won't deliver an interrupt. */
   2079void xen_poll_irq(int irq)
   2080{
   2081	xen_poll_irq_timeout(irq, 0 /* no timeout */);
   2082}
   2083
   2084/* Check whether the IRQ line is shared with other guests. */
   2085int xen_test_irq_shared(int irq)
   2086{
   2087	struct irq_info *info = info_for_irq(irq);
   2088	struct physdev_irq_status_query irq_status;
   2089
   2090	if (WARN_ON(!info))
   2091		return -ENOENT;
   2092
   2093	irq_status.irq = info->u.pirq.pirq;
   2094
   2095	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
   2096		return 0;
   2097	return !(irq_status.flags & XENIRQSTAT_shared);
   2098}
   2099EXPORT_SYMBOL_GPL(xen_test_irq_shared);
   2100
   2101void xen_irq_resume(void)
   2102{
   2103	unsigned int cpu;
   2104	struct irq_info *info;
   2105
   2106	/* New event-channel space is not 'live' yet. */
   2107	xen_evtchn_resume();
   2108
   2109	/* No IRQ <-> event-channel mappings. */
   2110	list_for_each_entry(info, &xen_irq_list_head, list) {
   2111		/* Zap event-channel binding */
   2112		info->evtchn = 0;
   2113		/* Adjust accounting */
   2114		channels_on_cpu_dec(info);
   2115	}
   2116
   2117	clear_evtchn_to_irq_all();
   2118
   2119	for_each_possible_cpu(cpu) {
   2120		restore_cpu_virqs(cpu);
   2121		restore_cpu_ipis(cpu);
   2122	}
   2123
   2124	restore_pirqs();
   2125}
   2126
   2127static struct irq_chip xen_dynamic_chip __read_mostly = {
   2128	.name			= "xen-dyn",
   2129
   2130	.irq_disable		= disable_dynirq,
   2131	.irq_mask		= disable_dynirq,
   2132	.irq_unmask		= enable_dynirq,
   2133
   2134	.irq_ack		= ack_dynirq,
   2135	.irq_mask_ack		= mask_ack_dynirq,
   2136
   2137	.irq_set_affinity	= set_affinity_irq,
   2138	.irq_retrigger		= retrigger_dynirq,
   2139};
   2140
   2141static struct irq_chip xen_lateeoi_chip __read_mostly = {
   2142	/* The chip name needs to contain "xen-dyn" for irqbalance to work. */
   2143	.name			= "xen-dyn-lateeoi",
   2144
   2145	.irq_disable		= disable_dynirq,
   2146	.irq_mask		= disable_dynirq,
   2147	.irq_unmask		= enable_dynirq,
   2148
   2149	.irq_ack		= lateeoi_ack_dynirq,
   2150	.irq_mask_ack		= lateeoi_mask_ack_dynirq,
   2151
   2152	.irq_set_affinity	= set_affinity_irq,
   2153	.irq_retrigger		= retrigger_dynirq,
   2154};
   2155
   2156static struct irq_chip xen_pirq_chip __read_mostly = {
   2157	.name			= "xen-pirq",
   2158
   2159	.irq_startup		= startup_pirq,
   2160	.irq_shutdown		= shutdown_pirq,
   2161	.irq_enable		= enable_pirq,
   2162	.irq_disable		= disable_pirq,
   2163
   2164	.irq_mask		= disable_dynirq,
   2165	.irq_unmask		= enable_dynirq,
   2166
   2167	.irq_ack		= eoi_pirq,
   2168	.irq_eoi		= eoi_pirq,
   2169	.irq_mask_ack		= mask_ack_pirq,
   2170
   2171	.irq_set_affinity	= set_affinity_irq,
   2172
   2173	.irq_retrigger		= retrigger_dynirq,
   2174};
   2175
   2176static struct irq_chip xen_percpu_chip __read_mostly = {
   2177	.name			= "xen-percpu",
   2178
   2179	.irq_disable		= disable_dynirq,
   2180	.irq_mask		= disable_dynirq,
   2181	.irq_unmask		= enable_dynirq,
   2182
   2183	.irq_ack		= ack_dynirq,
   2184};
   2185
   2186#ifdef CONFIG_XEN_PVHVM
   2187/* Vector callbacks are better than PCI interrupts to receive event
   2188 * channel notifications because we can receive vector callbacks on any
   2189 * vcpu and we don't need PCI support or APIC interactions. */
   2190void xen_setup_callback_vector(void)
   2191{
   2192	uint64_t callback_via;
   2193
   2194	if (xen_have_vector_callback) {
   2195		callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
   2196		if (xen_set_callback_via(callback_via)) {
   2197			pr_err("Request for Xen HVM callback vector failed\n");
   2198			xen_have_vector_callback = 0;
   2199		}
   2200	}
   2201}
   2202
   2203static __init void xen_alloc_callback_vector(void)
   2204{
   2205	if (!xen_have_vector_callback)
   2206		return;
   2207
   2208	pr_info("Xen HVM callback vector for event delivery is enabled\n");
   2209	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
   2210}
   2211#else
   2212void xen_setup_callback_vector(void) {}
   2213static inline void xen_alloc_callback_vector(void) {}
   2214#endif
   2215
   2216bool xen_fifo_events = true;
   2217module_param_named(fifo_events, xen_fifo_events, bool, 0);
   2218
   2219static int xen_evtchn_cpu_prepare(unsigned int cpu)
   2220{
   2221	int ret = 0;
   2222
   2223	xen_cpu_init_eoi(cpu);
   2224
   2225	if (evtchn_ops->percpu_init)
   2226		ret = evtchn_ops->percpu_init(cpu);
   2227
   2228	return ret;
   2229}
   2230
   2231static int xen_evtchn_cpu_dead(unsigned int cpu)
   2232{
   2233	int ret = 0;
   2234
   2235	if (evtchn_ops->percpu_deinit)
   2236		ret = evtchn_ops->percpu_deinit(cpu);
   2237
   2238	return ret;
   2239}
   2240
   2241void __init xen_init_IRQ(void)
   2242{
   2243	int ret = -EINVAL;
   2244	evtchn_port_t evtchn;
   2245
   2246	if (xen_fifo_events)
   2247		ret = xen_evtchn_fifo_init();
   2248	if (ret < 0) {
   2249		xen_evtchn_2l_init();
   2250		xen_fifo_events = false;
   2251	}
   2252
   2253	xen_cpu_init_eoi(smp_processor_id());
   2254
   2255	cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
   2256				  "xen/evtchn:prepare",
   2257				  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
   2258
   2259	evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
   2260				sizeof(*evtchn_to_irq), GFP_KERNEL);
   2261	BUG_ON(!evtchn_to_irq);
   2262
   2263	/* No event channels are 'live' right now. */
   2264	for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
   2265		mask_evtchn(evtchn);
   2266
   2267	pirq_needs_eoi = pirq_needs_eoi_flag;
   2268
   2269#ifdef CONFIG_X86
   2270	if (xen_pv_domain()) {
   2271		if (xen_initial_domain())
   2272			pci_xen_initial_domain();
   2273	}
   2274	if (xen_feature(XENFEAT_hvm_callback_vector)) {
   2275		xen_setup_callback_vector();
   2276		xen_alloc_callback_vector();
   2277	}
   2278
   2279	if (xen_hvm_domain()) {
   2280		native_init_IRQ();
   2281		/* pci_xen_hvm_init must be called after native_init_IRQ so that
   2282		 * __acpi_register_gsi can point at the right function */
   2283		pci_xen_hvm_init();
   2284	} else {
   2285		int rc;
   2286		struct physdev_pirq_eoi_gmfn eoi_gmfn;
   2287
   2288		pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
   2289		eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
   2290		rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
   2291		if (rc != 0) {
   2292			free_page((unsigned long) pirq_eoi_map);
   2293			pirq_eoi_map = NULL;
   2294		} else
   2295			pirq_needs_eoi = pirq_check_eoi_map;
   2296	}
   2297#endif
   2298}