cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

iosapic.c (29971B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * I/O SAPIC support.
      4 *
      5 * Copyright (C) 1999 Intel Corp.
      6 * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
      7 * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
      8 * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
      9 *	David Mosberger-Tang <davidm@hpl.hp.com>
     10 * Copyright (C) 1999 VA Linux Systems
     11 * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
     12 *
     13 * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O
     14 *				APIC code.  In particular, we now have separate
     15 *				handlers for edge and level triggered
     16 *				interrupts.
     17 * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
     18 *				allocation PCI to vector mapping, shared PCI
     19 *				interrupts.
     20 * 00/10/27	D. Mosberger	Document things a bit more to make them more
     21 *				understandable.  Clean up much of the old
     22 *				IOSAPIC cruft.
     23 * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts
     24 *				and fixes for ACPI S5(SoftOff) support.
     25 * 02/01/23	J.I. Lee	iosapic pgm fixes for PCI irq routing from _PRT
     26 * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
     27 *				vectors in iosapic_set_affinity(),
     28 *				initializations for /proc/irq/#/smp_affinity
     29 * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
     30 * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
     31 * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to
     32 *				IOSAPIC mapping error
     33 * 02/07/29	T. Kochi	Allocate interrupt vectors dynamically
     34 * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system
     35 *				interrupt, vector, etc.)
     36 * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's
     37 *				pci_irq code.
     38 * 03/02/19	B. Helgaas	Make pcat_compat system-wide, not per-IOSAPIC.
     39 *				Remove iosapic_address & gsi_base from
     40 *				external interfaces.  Rationalize
     41 *				__init/__devinit attributes.
     42 * 04/12/04 Ashok Raj	<ashok.raj@intel.com> Intel Corporation 2004
     43 *				Updated to work with irq migration necessary
     44 *				for CPU Hotplug
     45 */
     46/*
     47 * Here is what the interrupt logic between a PCI device and the kernel looks
     48 * like:
     49 *
     50 * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
     51 *     INTD).  The device is uniquely identified by its bus-, and slot-number
     52 *     (the function number does not matter here because all functions share
     53 *     the same interrupt lines).
     54 *
     55 * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
     56 *     controller.  Multiple interrupt lines may have to share the same
     57 *     IOSAPIC pin (if they're level triggered and use the same polarity).
     58 *     Each interrupt line has a unique Global System Interrupt (GSI) number
     59 *     which can be calculated as the sum of the controller's base GSI number
     60 *     and the IOSAPIC pin number to which the line connects.
     61 *
     62 * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
     63 * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
     64 * sent to the CPU.
     65 *
     66 * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
     67 *     used as architecture-independent interrupt handling mechanism in Linux.
     68 *     As an IRQ is a number, we have to have
     69 *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
     70 *     systems, we use one-to-one mapping between IA-64 vector and IRQ.
     71 *
     72 * To sum up, there are three levels of mappings involved:
     73 *
     74 *	PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
     75 *
     76 * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
     77 * describe interrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
     78 * (isa_irq) is the only exception in this source code.
     79 */
     80
     81#include <linux/acpi.h>
     82#include <linux/init.h>
     83#include <linux/irq.h>
     84#include <linux/kernel.h>
     85#include <linux/list.h>
     86#include <linux/pci.h>
     87#include <linux/slab.h>
     88#include <linux/smp.h>
     89#include <linux/string.h>
     90#include <linux/memblock.h>
     91
     92#include <asm/delay.h>
     93#include <asm/hw_irq.h>
     94#include <asm/io.h>
     95#include <asm/iosapic.h>
     96#include <asm/processor.h>
     97#include <asm/ptrace.h>
     98#include <asm/xtp.h>
     99
    100#undef DEBUG_INTERRUPT_ROUTING
    101
    102#ifdef DEBUG_INTERRUPT_ROUTING
    103#define DBG(fmt...)	printk(fmt)
    104#else
    105#define DBG(fmt...)
    106#endif
    107
    108static DEFINE_SPINLOCK(iosapic_lock);
    109
    110/*
    111 * These tables map IA-64 vectors to the IOSAPIC pin that generates this
    112 * vector.
    113 */
    114
    115#define NO_REF_RTE	0
    116
    117static struct iosapic {
    118	char __iomem	*addr;		/* base address of IOSAPIC */
    119	unsigned int	gsi_base;	/* GSI base */
    120	unsigned short	num_rte;	/* # of RTEs on this IOSAPIC */
    121	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
    122#ifdef CONFIG_NUMA
    123	unsigned short	node;		/* numa node association via pxm */
    124#endif
    125	spinlock_t	lock;		/* lock for indirect reg access */
    126} iosapic_lists[NR_IOSAPICS];
    127
    128struct iosapic_rte_info {
    129	struct list_head rte_list;	/* RTEs sharing the same vector */
    130	char		rte_index;	/* IOSAPIC RTE index */
    131	int		refcnt;		/* reference counter */
    132	struct iosapic	*iosapic;
    133} ____cacheline_aligned;
    134
    135static struct iosapic_intr_info {
    136	struct list_head rtes;		/* RTEs using this vector (empty =>
    137					 * not an IOSAPIC interrupt) */
    138	int		count;		/* # of registered RTEs */
    139	u32		low32;		/* current value of low word of
    140					 * Redirection table entry */
    141	unsigned int	dest;		/* destination CPU physical ID */
    142	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
    143	unsigned char 	polarity: 1;	/* interrupt polarity
    144					 * (see iosapic.h) */
    145	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
    146} iosapic_intr_info[NR_IRQS];
    147
    148static unsigned char pcat_compat;	/* 8259 compatibility flag */
    149
    150static inline void
    151iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
    152{
    153	unsigned long flags;
    154
    155	spin_lock_irqsave(&iosapic->lock, flags);
    156	__iosapic_write(iosapic->addr, reg, val);
    157	spin_unlock_irqrestore(&iosapic->lock, flags);
    158}
    159
    160/*
    161 * Find an IOSAPIC associated with a GSI
    162 */
    163static inline int
    164find_iosapic (unsigned int gsi)
    165{
    166	int i;
    167
    168	for (i = 0; i < NR_IOSAPICS; i++) {
    169		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
    170		    iosapic_lists[i].num_rte)
    171			return i;
    172	}
    173
    174	return -1;
    175}
    176
    177static inline int __gsi_to_irq(unsigned int gsi)
    178{
    179	int irq;
    180	struct iosapic_intr_info *info;
    181	struct iosapic_rte_info *rte;
    182
    183	for (irq = 0; irq < NR_IRQS; irq++) {
    184		info = &iosapic_intr_info[irq];
    185		list_for_each_entry(rte, &info->rtes, rte_list)
    186			if (rte->iosapic->gsi_base + rte->rte_index == gsi)
    187				return irq;
    188	}
    189	return -1;
    190}
    191
    192int
    193gsi_to_irq (unsigned int gsi)
    194{
    195	unsigned long flags;
    196	int irq;
    197
    198	spin_lock_irqsave(&iosapic_lock, flags);
    199	irq = __gsi_to_irq(gsi);
    200	spin_unlock_irqrestore(&iosapic_lock, flags);
    201	return irq;
    202}
    203
    204static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi)
    205{
    206	struct iosapic_rte_info *rte;
    207
    208	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
    209		if (rte->iosapic->gsi_base + rte->rte_index == gsi)
    210			return rte;
    211	return NULL;
    212}
    213
    214static void
    215set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask)
    216{
    217	unsigned long pol, trigger, dmode;
    218	u32 low32, high32;
    219	int rte_index;
    220	char redir;
    221	struct iosapic_rte_info *rte;
    222	ia64_vector vector = irq_to_vector(irq);
    223
    224	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
    225
    226	rte = find_rte(irq, gsi);
    227	if (!rte)
    228		return;		/* not an IOSAPIC interrupt */
    229
    230	rte_index = rte->rte_index;
    231	pol     = iosapic_intr_info[irq].polarity;
    232	trigger = iosapic_intr_info[irq].trigger;
    233	dmode   = iosapic_intr_info[irq].dmode;
    234
    235	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
    236
    237#ifdef CONFIG_SMP
    238	set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
    239#endif
    240
    241	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
    242		 (trigger << IOSAPIC_TRIGGER_SHIFT) |
    243		 (dmode << IOSAPIC_DELIVERY_SHIFT) |
    244		 ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
    245		 vector);
    246
    247	/* dest contains both id and eid */
    248	high32 = (dest << IOSAPIC_DEST_SHIFT);
    249
    250	iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
    251	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
    252	iosapic_intr_info[irq].low32 = low32;
    253	iosapic_intr_info[irq].dest = dest;
    254}
    255
    256static void
    257iosapic_nop (struct irq_data *data)
    258{
    259	/* do nothing... */
    260}
    261
    262
    263#ifdef CONFIG_KEXEC
    264void
    265kexec_disable_iosapic(void)
    266{
    267	struct iosapic_intr_info *info;
    268	struct iosapic_rte_info *rte;
    269	ia64_vector vec;
    270	int irq;
    271
    272	for (irq = 0; irq < NR_IRQS; irq++) {
    273		info = &iosapic_intr_info[irq];
    274		vec = irq_to_vector(irq);
    275		list_for_each_entry(rte, &info->rtes,
    276				rte_list) {
    277			iosapic_write(rte->iosapic,
    278					IOSAPIC_RTE_LOW(rte->rte_index),
    279					IOSAPIC_MASK|vec);
    280			iosapic_eoi(rte->iosapic->addr, vec);
    281		}
    282	}
    283}
    284#endif
    285
    286static void
    287mask_irq (struct irq_data *data)
    288{
    289	unsigned int irq = data->irq;
    290	u32 low32;
    291	int rte_index;
    292	struct iosapic_rte_info *rte;
    293
    294	if (!iosapic_intr_info[irq].count)
    295		return;			/* not an IOSAPIC interrupt! */
    296
    297	/* set only the mask bit */
    298	low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
    299	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
    300		rte_index = rte->rte_index;
    301		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
    302	}
    303}
    304
    305static void
    306unmask_irq (struct irq_data *data)
    307{
    308	unsigned int irq = data->irq;
    309	u32 low32;
    310	int rte_index;
    311	struct iosapic_rte_info *rte;
    312
    313	if (!iosapic_intr_info[irq].count)
    314		return;			/* not an IOSAPIC interrupt! */
    315
    316	low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK;
    317	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
    318		rte_index = rte->rte_index;
    319		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
    320	}
    321}
    322
    323
    324static int
    325iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
    326		     bool force)
    327{
    328#ifdef CONFIG_SMP
    329	unsigned int irq = data->irq;
    330	u32 high32, low32;
    331	int cpu, dest, rte_index;
    332	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
    333	struct iosapic_rte_info *rte;
    334	struct iosapic *iosapic;
    335
    336	irq &= (~IA64_IRQ_REDIRECTED);
    337
    338	cpu = cpumask_first_and(cpu_online_mask, mask);
    339	if (cpu >= nr_cpu_ids)
    340		return -1;
    341
    342	if (irq_prepare_move(irq, cpu))
    343		return -1;
    344
    345	dest = cpu_physical_id(cpu);
    346
    347	if (!iosapic_intr_info[irq].count)
    348		return -1;			/* not an IOSAPIC interrupt */
    349
    350	set_irq_affinity_info(irq, dest, redir);
    351
    352	/* dest contains both id and eid */
    353	high32 = dest << IOSAPIC_DEST_SHIFT;
    354
    355	low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
    356	if (redir)
    357		/* change delivery mode to lowest priority */
    358		low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
    359	else
    360		/* change delivery mode to fixed */
    361		low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
    362	low32 &= IOSAPIC_VECTOR_MASK;
    363	low32 |= irq_to_vector(irq);
    364
    365	iosapic_intr_info[irq].low32 = low32;
    366	iosapic_intr_info[irq].dest = dest;
    367	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
    368		iosapic = rte->iosapic;
    369		rte_index = rte->rte_index;
    370		iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
    371		iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
    372	}
    373
    374#endif
    375	return 0;
    376}
    377
    378/*
    379 * Handlers for level-triggered interrupts.
    380 */
    381
    382static unsigned int
    383iosapic_startup_level_irq (struct irq_data *data)
    384{
    385	unmask_irq(data);
    386	return 0;
    387}
    388
    389static void
    390iosapic_unmask_level_irq (struct irq_data *data)
    391{
    392	unsigned int irq = data->irq;
    393	ia64_vector vec = irq_to_vector(irq);
    394	struct iosapic_rte_info *rte;
    395	int do_unmask_irq = 0;
    396
    397	irq_complete_move(irq);
    398	if (unlikely(irqd_is_setaffinity_pending(data))) {
    399		do_unmask_irq = 1;
    400		mask_irq(data);
    401	} else
    402		unmask_irq(data);
    403
    404	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
    405		iosapic_eoi(rte->iosapic->addr, vec);
    406
    407	if (unlikely(do_unmask_irq)) {
    408		irq_move_masked_irq(data);
    409		unmask_irq(data);
    410	}
    411}
    412
    413#define iosapic_shutdown_level_irq	mask_irq
    414#define iosapic_enable_level_irq	unmask_irq
    415#define iosapic_disable_level_irq	mask_irq
    416#define iosapic_ack_level_irq		iosapic_nop
    417
    418static struct irq_chip irq_type_iosapic_level = {
    419	.name =			"IO-SAPIC-level",
    420	.irq_startup =		iosapic_startup_level_irq,
    421	.irq_shutdown =		iosapic_shutdown_level_irq,
    422	.irq_enable =		iosapic_enable_level_irq,
    423	.irq_disable =		iosapic_disable_level_irq,
    424	.irq_ack =		iosapic_ack_level_irq,
    425	.irq_mask =		mask_irq,
    426	.irq_unmask =		iosapic_unmask_level_irq,
    427	.irq_set_affinity =	iosapic_set_affinity
    428};
    429
    430/*
    431 * Handlers for edge-triggered interrupts.
    432 */
    433
    434static unsigned int
    435iosapic_startup_edge_irq (struct irq_data *data)
    436{
    437	unmask_irq(data);
    438	/*
    439	 * IOSAPIC simply drops interrupts pended while the
    440	 * corresponding pin was masked, so we can't know if an
    441	 * interrupt is pending already.  Let's hope not...
    442	 */
    443	return 0;
    444}
    445
    446static void
    447iosapic_ack_edge_irq (struct irq_data *data)
    448{
    449	irq_complete_move(data->irq);
    450	irq_move_irq(data);
    451}
    452
    453#define iosapic_enable_edge_irq		unmask_irq
    454#define iosapic_disable_edge_irq	iosapic_nop
    455
    456static struct irq_chip irq_type_iosapic_edge = {
    457	.name =			"IO-SAPIC-edge",
    458	.irq_startup =		iosapic_startup_edge_irq,
    459	.irq_shutdown =		iosapic_disable_edge_irq,
    460	.irq_enable =		iosapic_enable_edge_irq,
    461	.irq_disable =		iosapic_disable_edge_irq,
    462	.irq_ack =		iosapic_ack_edge_irq,
    463	.irq_mask =		mask_irq,
    464	.irq_unmask =		unmask_irq,
    465	.irq_set_affinity =	iosapic_set_affinity
    466};
    467
    468static unsigned int
    469iosapic_version (char __iomem *addr)
    470{
    471	/*
    472	 * IOSAPIC Version Register return 32 bit structure like:
    473	 * {
    474	 *	unsigned int version   : 8;
    475	 *	unsigned int reserved1 : 8;
    476	 *	unsigned int max_redir : 8;
    477	 *	unsigned int reserved2 : 8;
    478	 * }
    479	 */
    480	return __iosapic_read(addr, IOSAPIC_VERSION);
    481}
    482
    483static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
    484{
    485	int i, irq = -ENOSPC, min_count = -1;
    486	struct iosapic_intr_info *info;
    487
    488	/*
    489	 * shared vectors for edge-triggered interrupts are not
    490	 * supported yet
    491	 */
    492	if (trigger == IOSAPIC_EDGE)
    493		return -EINVAL;
    494
    495	for (i = 0; i < NR_IRQS; i++) {
    496		info = &iosapic_intr_info[i];
    497		if (info->trigger == trigger && info->polarity == pol &&
    498		    (info->dmode == IOSAPIC_FIXED ||
    499		     info->dmode == IOSAPIC_LOWEST_PRIORITY) &&
    500		    can_request_irq(i, IRQF_SHARED)) {
    501			if (min_count == -1 || info->count < min_count) {
    502				irq = i;
    503				min_count = info->count;
    504			}
    505		}
    506	}
    507	return irq;
    508}
    509
    510/*
    511 * if the given vector is already owned by other,
    512 *  assign a new vector for the other and make the vector available
    513 */
    514static void __init
    515iosapic_reassign_vector (int irq)
    516{
    517	int new_irq;
    518
    519	if (iosapic_intr_info[irq].count) {
    520		new_irq = create_irq();
    521		if (new_irq < 0)
    522			panic("%s: out of interrupt vectors!\n", __func__);
    523		printk(KERN_INFO "Reassigning vector %d to %d\n",
    524		       irq_to_vector(irq), irq_to_vector(new_irq));
    525		memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
    526		       sizeof(struct iosapic_intr_info));
    527		INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes);
    528		list_move(iosapic_intr_info[irq].rtes.next,
    529			  &iosapic_intr_info[new_irq].rtes);
    530		memset(&iosapic_intr_info[irq], 0,
    531		       sizeof(struct iosapic_intr_info));
    532		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
    533		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
    534	}
    535}
    536
    537static inline int irq_is_shared (int irq)
    538{
    539	return (iosapic_intr_info[irq].count > 1);
    540}
    541
    542struct irq_chip*
    543ia64_native_iosapic_get_irq_chip(unsigned long trigger)
    544{
    545	if (trigger == IOSAPIC_EDGE)
    546		return &irq_type_iosapic_edge;
    547	else
    548		return &irq_type_iosapic_level;
    549}
    550
    551static int
    552register_intr (unsigned int gsi, int irq, unsigned char delivery,
    553	       unsigned long polarity, unsigned long trigger)
    554{
    555	struct irq_chip *chip, *irq_type;
    556	int index;
    557	struct iosapic_rte_info *rte;
    558
    559	index = find_iosapic(gsi);
    560	if (index < 0) {
    561		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
    562		       __func__, gsi);
    563		return -ENODEV;
    564	}
    565
    566	rte = find_rte(irq, gsi);
    567	if (!rte) {
    568		rte = kzalloc(sizeof (*rte), GFP_ATOMIC);
    569		if (!rte) {
    570			printk(KERN_WARNING "%s: cannot allocate memory\n",
    571			       __func__);
    572			return -ENOMEM;
    573		}
    574
    575		rte->iosapic	= &iosapic_lists[index];
    576		rte->rte_index	= gsi - rte->iosapic->gsi_base;
    577		rte->refcnt++;
    578		list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes);
    579		iosapic_intr_info[irq].count++;
    580		iosapic_lists[index].rtes_inuse++;
    581	}
    582	else if (rte->refcnt == NO_REF_RTE) {
    583		struct iosapic_intr_info *info = &iosapic_intr_info[irq];
    584		if (info->count > 0 &&
    585		    (info->trigger != trigger || info->polarity != polarity)){
    586			printk (KERN_WARNING
    587				"%s: cannot override the interrupt\n",
    588				__func__);
    589			return -EINVAL;
    590		}
    591		rte->refcnt++;
    592		iosapic_intr_info[irq].count++;
    593		iosapic_lists[index].rtes_inuse++;
    594	}
    595
    596	iosapic_intr_info[irq].polarity = polarity;
    597	iosapic_intr_info[irq].dmode    = delivery;
    598	iosapic_intr_info[irq].trigger  = trigger;
    599
    600	irq_type = iosapic_get_irq_chip(trigger);
    601
    602	chip = irq_get_chip(irq);
    603	if (irq_type != NULL && chip != irq_type) {
    604		if (chip != &no_irq_chip)
    605			printk(KERN_WARNING
    606			       "%s: changing vector %d from %s to %s\n",
    607			       __func__, irq_to_vector(irq),
    608			       chip->name, irq_type->name);
    609		chip = irq_type;
    610	}
    611	irq_set_chip_handler_name_locked(irq_get_irq_data(irq), chip,
    612		trigger == IOSAPIC_EDGE ? handle_edge_irq : handle_level_irq,
    613		NULL);
    614	return 0;
    615}
    616
    617static unsigned int
    618get_target_cpu (unsigned int gsi, int irq)
    619{
    620#ifdef CONFIG_SMP
    621	static int cpu = -1;
    622	extern int cpe_vector;
    623	cpumask_t domain = irq_to_domain(irq);
    624
    625	/*
    626	 * In case of vector shared by multiple RTEs, all RTEs that
    627	 * share the vector need to use the same destination CPU.
    628	 */
    629	if (iosapic_intr_info[irq].count)
    630		return iosapic_intr_info[irq].dest;
    631
    632	/*
    633	 * If the platform supports redirection via XTP, let it
    634	 * distribute interrupts.
    635	 */
    636	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
    637		return cpu_physical_id(smp_processor_id());
    638
    639	/*
    640	 * Some interrupts (ACPI SCI, for instance) are registered
    641	 * before the BSP is marked as online.
    642	 */
    643	if (!cpu_online(smp_processor_id()))
    644		return cpu_physical_id(smp_processor_id());
    645
    646	if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR)
    647		return get_cpei_target_cpu();
    648
    649#ifdef CONFIG_NUMA
    650	{
    651		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
    652		const struct cpumask *cpu_mask;
    653
    654		iosapic_index = find_iosapic(gsi);
    655		if (iosapic_index < 0 ||
    656		    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
    657			goto skip_numa_setup;
    658
    659		cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
    660		num_cpus = 0;
    661		for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
    662			if (cpu_online(numa_cpu))
    663				num_cpus++;
    664		}
    665
    666		if (!num_cpus)
    667			goto skip_numa_setup;
    668
    669		/* Use irq assignment to distribute across cpus in node */
    670		cpu_index = irq % num_cpus;
    671
    672		for_each_cpu_and(numa_cpu, cpu_mask, &domain)
    673			if (cpu_online(numa_cpu) && i++ >= cpu_index)
    674				break;
    675
    676		if (numa_cpu < nr_cpu_ids)
    677			return cpu_physical_id(numa_cpu);
    678	}
    679skip_numa_setup:
    680#endif
    681	/*
    682	 * Otherwise, round-robin interrupt vectors across all the
    683	 * processors.  (It'd be nice if we could be smarter in the
    684	 * case of NUMA.)
    685	 */
    686	do {
    687		if (++cpu >= nr_cpu_ids)
    688			cpu = 0;
    689	} while (!cpu_online(cpu) || !cpumask_test_cpu(cpu, &domain));
    690
    691	return cpu_physical_id(cpu);
    692#else  /* CONFIG_SMP */
    693	return cpu_physical_id(smp_processor_id());
    694#endif
    695}
    696
    697static inline unsigned char choose_dmode(void)
    698{
    699#ifdef CONFIG_SMP
    700	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
    701		return IOSAPIC_LOWEST_PRIORITY;
    702#endif
    703	return IOSAPIC_FIXED;
    704}
    705
    706/*
    707 * ACPI can describe IOSAPIC interrupts via static tables and namespace
    708 * methods.  This provides an interface to register those interrupts and
    709 * program the IOSAPIC RTE.
    710 */
    711int
    712iosapic_register_intr (unsigned int gsi,
    713		       unsigned long polarity, unsigned long trigger)
    714{
    715	int irq, mask = 1, err;
    716	unsigned int dest;
    717	unsigned long flags;
    718	struct iosapic_rte_info *rte;
    719	u32 low32;
    720	unsigned char dmode;
    721	struct irq_desc *desc;
    722
    723	/*
    724	 * If this GSI has already been registered (i.e., it's a
    725	 * shared interrupt, or we lost a race to register it),
    726	 * don't touch the RTE.
    727	 */
    728	spin_lock_irqsave(&iosapic_lock, flags);
    729	irq = __gsi_to_irq(gsi);
    730	if (irq > 0) {
    731		rte = find_rte(irq, gsi);
    732		if(iosapic_intr_info[irq].count == 0) {
    733			assign_irq_vector(irq);
    734			irq_init_desc(irq);
    735		} else if (rte->refcnt != NO_REF_RTE) {
    736			rte->refcnt++;
    737			goto unlock_iosapic_lock;
    738		}
    739	} else
    740		irq = create_irq();
    741
    742	/* If vector is running out, we try to find a sharable vector */
    743	if (irq < 0) {
    744		irq = iosapic_find_sharable_irq(trigger, polarity);
    745		if (irq < 0)
    746			goto unlock_iosapic_lock;
    747	}
    748
    749	desc = irq_to_desc(irq);
    750	raw_spin_lock(&desc->lock);
    751	dest = get_target_cpu(gsi, irq);
    752	dmode = choose_dmode();
    753	err = register_intr(gsi, irq, dmode, polarity, trigger);
    754	if (err < 0) {
    755		raw_spin_unlock(&desc->lock);
    756		irq = err;
    757		goto unlock_iosapic_lock;
    758	}
    759
    760	/*
    761	 * If the vector is shared and already unmasked for other
    762	 * interrupt sources, don't mask it.
    763	 */
    764	low32 = iosapic_intr_info[irq].low32;
    765	if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK))
    766		mask = 0;
    767	set_rte(gsi, irq, dest, mask);
    768
    769	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
    770	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
    771	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
    772	       cpu_logical_id(dest), dest, irq_to_vector(irq));
    773
    774	raw_spin_unlock(&desc->lock);
    775 unlock_iosapic_lock:
    776	spin_unlock_irqrestore(&iosapic_lock, flags);
    777	return irq;
    778}
    779
    780void
    781iosapic_unregister_intr (unsigned int gsi)
    782{
    783	unsigned long flags;
    784	int irq, index;
    785	u32 low32;
    786	unsigned long trigger, polarity;
    787	unsigned int dest;
    788	struct iosapic_rte_info *rte;
    789
    790	/*
    791	 * If the irq associated with the gsi is not found,
    792	 * iosapic_unregister_intr() is unbalanced. We need to check
    793	 * this again after getting locks.
    794	 */
    795	irq = gsi_to_irq(gsi);
    796	if (irq < 0) {
    797		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
    798		       gsi);
    799		WARN_ON(1);
    800		return;
    801	}
    802
    803	spin_lock_irqsave(&iosapic_lock, flags);
    804	if ((rte = find_rte(irq, gsi)) == NULL) {
    805		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
    806		       gsi);
    807		WARN_ON(1);
    808		goto out;
    809	}
    810
    811	if (--rte->refcnt > 0)
    812		goto out;
    813
    814	rte->refcnt = NO_REF_RTE;
    815
    816	/* Mask the interrupt */
    817	low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK;
    818	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32);
    819
    820	iosapic_intr_info[irq].count--;
    821	index = find_iosapic(gsi);
    822	iosapic_lists[index].rtes_inuse--;
    823	WARN_ON(iosapic_lists[index].rtes_inuse < 0);
    824
    825	trigger  = iosapic_intr_info[irq].trigger;
    826	polarity = iosapic_intr_info[irq].polarity;
    827	dest     = iosapic_intr_info[irq].dest;
    828	printk(KERN_INFO
    829	       "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
    830	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
    831	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
    832	       cpu_logical_id(dest), dest, irq_to_vector(irq));
    833
    834	if (iosapic_intr_info[irq].count == 0) {
    835#ifdef CONFIG_SMP
    836		/* Clear affinity */
    837		cpumask_setall(irq_get_affinity_mask(irq));
    838#endif
    839		/* Clear the interrupt information */
    840		iosapic_intr_info[irq].dest = 0;
    841		iosapic_intr_info[irq].dmode = 0;
    842		iosapic_intr_info[irq].polarity = 0;
    843		iosapic_intr_info[irq].trigger = 0;
    844		iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
    845
    846		/* Destroy and reserve IRQ */
    847		destroy_and_reserve_irq(irq);
    848	}
    849 out:
    850	spin_unlock_irqrestore(&iosapic_lock, flags);
    851}
    852
    853/*
    854 * ACPI calls this when it finds an entry for a platform interrupt.
    855 */
    856int __init
    857iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
    858				int iosapic_vector, u16 eid, u16 id,
    859				unsigned long polarity, unsigned long trigger)
    860{
    861	static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
    862	unsigned char delivery;
    863	int irq, vector, mask = 0;
    864	unsigned int dest = ((id << 8) | eid) & 0xffff;
    865
    866	switch (int_type) {
    867	      case ACPI_INTERRUPT_PMI:
    868		irq = vector = iosapic_vector;
    869		bind_irq_vector(irq, vector, CPU_MASK_ALL);
    870		/*
    871		 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
    872		 * we need to make sure the vector is available
    873		 */
    874		iosapic_reassign_vector(irq);
    875		delivery = IOSAPIC_PMI;
    876		break;
    877	      case ACPI_INTERRUPT_INIT:
    878		irq = create_irq();
    879		if (irq < 0)
    880			panic("%s: out of interrupt vectors!\n", __func__);
    881		vector = irq_to_vector(irq);
    882		delivery = IOSAPIC_INIT;
    883		break;
    884	      case ACPI_INTERRUPT_CPEI:
    885		irq = vector = IA64_CPE_VECTOR;
    886		BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
    887		delivery = IOSAPIC_FIXED;
    888		mask = 1;
    889		break;
    890	      default:
    891		printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__,
    892		       int_type);
    893		return -1;
    894	}
    895
    896	register_intr(gsi, irq, delivery, polarity, trigger);
    897
    898	printk(KERN_INFO
    899	       "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
    900	       " vector %d\n",
    901	       int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
    902	       int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
    903	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
    904	       cpu_logical_id(dest), dest, vector);
    905
    906	set_rte(gsi, irq, dest, mask);
    907	return vector;
    908}
    909
    910/*
    911 * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
    912 */
    913void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi,
    914			      unsigned long polarity, unsigned long trigger)
    915{
    916	int vector, irq;
    917	unsigned int dest = cpu_physical_id(smp_processor_id());
    918	unsigned char dmode;
    919
    920	irq = vector = isa_irq_to_vector(isa_irq);
    921	BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
    922	dmode = choose_dmode();
    923	register_intr(gsi, irq, dmode, polarity, trigger);
    924
    925	DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
    926	    isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
    927	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
    928	    cpu_logical_id(dest), dest, vector);
    929
    930	set_rte(gsi, irq, dest, 1);
    931}
    932
    933void __init
    934ia64_native_iosapic_pcat_compat_init(void)
    935{
    936	if (pcat_compat) {
    937		/*
    938		 * Disable the compatibility mode interrupts (8259 style),
    939		 * needs IN/OUT support enabled.
    940		 */
    941		printk(KERN_INFO
    942		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
    943		       __func__);
    944		outb(0xff, 0xA1);
    945		outb(0xff, 0x21);
    946	}
    947}
    948
    949void __init
    950iosapic_system_init (int system_pcat_compat)
    951{
    952	int irq;
    953
    954	for (irq = 0; irq < NR_IRQS; ++irq) {
    955		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
    956		/* mark as unused */
    957		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
    958
    959		iosapic_intr_info[irq].count = 0;
    960	}
    961
    962	pcat_compat = system_pcat_compat;
    963	if (pcat_compat)
    964		iosapic_pcat_compat_init();
    965}
    966
    967static inline int
    968iosapic_alloc (void)
    969{
    970	int index;
    971
    972	for (index = 0; index < NR_IOSAPICS; index++)
    973		if (!iosapic_lists[index].addr)
    974			return index;
    975
    976	printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__);
    977	return -1;
    978}
    979
    980static inline void
    981iosapic_free (int index)
    982{
    983	memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
    984}
    985
    986static inline int
    987iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
    988{
    989	int index;
    990	unsigned int gsi_end, base, end;
    991
    992	/* check gsi range */
    993	gsi_end = gsi_base + ((ver >> 16) & 0xff);
    994	for (index = 0; index < NR_IOSAPICS; index++) {
    995		if (!iosapic_lists[index].addr)
    996			continue;
    997
    998		base = iosapic_lists[index].gsi_base;
    999		end  = base + iosapic_lists[index].num_rte - 1;
   1000
   1001		if (gsi_end < base || end < gsi_base)
   1002			continue; /* OK */
   1003
   1004		return -EBUSY;
   1005	}
   1006	return 0;
   1007}
   1008
   1009static int
   1010iosapic_delete_rte(unsigned int irq, unsigned int gsi)
   1011{
   1012	struct iosapic_rte_info *rte, *temp;
   1013
   1014	list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes,
   1015								rte_list) {
   1016		if (rte->iosapic->gsi_base + rte->rte_index == gsi) {
   1017			if (rte->refcnt)
   1018				return -EBUSY;
   1019
   1020			list_del(&rte->rte_list);
   1021			kfree(rte);
   1022			return 0;
   1023		}
   1024	}
   1025
   1026	return -EINVAL;
   1027}
   1028
   1029int iosapic_init(unsigned long phys_addr, unsigned int gsi_base)
   1030{
   1031	int num_rte, err, index;
   1032	unsigned int isa_irq, ver;
   1033	char __iomem *addr;
   1034	unsigned long flags;
   1035
   1036	spin_lock_irqsave(&iosapic_lock, flags);
   1037	index = find_iosapic(gsi_base);
   1038	if (index >= 0) {
   1039		spin_unlock_irqrestore(&iosapic_lock, flags);
   1040		return -EBUSY;
   1041	}
   1042
   1043	addr = ioremap(phys_addr, 0);
   1044	if (addr == NULL) {
   1045		spin_unlock_irqrestore(&iosapic_lock, flags);
   1046		return -ENOMEM;
   1047	}
   1048	ver = iosapic_version(addr);
   1049	if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
   1050		iounmap(addr);
   1051		spin_unlock_irqrestore(&iosapic_lock, flags);
   1052		return err;
   1053	}
   1054
   1055	/*
   1056	 * The MAX_REDIR register holds the highest input pin number
   1057	 * (starting from 0).  We add 1 so that we can use it for
   1058	 * number of pins (= RTEs)
   1059	 */
   1060	num_rte = ((ver >> 16) & 0xff) + 1;
   1061
   1062	index = iosapic_alloc();
   1063	iosapic_lists[index].addr = addr;
   1064	iosapic_lists[index].gsi_base = gsi_base;
   1065	iosapic_lists[index].num_rte = num_rte;
   1066#ifdef CONFIG_NUMA
   1067	iosapic_lists[index].node = MAX_NUMNODES;
   1068#endif
   1069	spin_lock_init(&iosapic_lists[index].lock);
   1070	spin_unlock_irqrestore(&iosapic_lock, flags);
   1071
   1072	if ((gsi_base == 0) && pcat_compat) {
   1073		/*
   1074		 * Map the legacy ISA devices into the IOSAPIC data.  Some of
   1075		 * these may get reprogrammed later on with data from the ACPI
   1076		 * Interrupt Source Override table.
   1077		 */
   1078		for (isa_irq = 0; isa_irq < 16; ++isa_irq)
   1079			iosapic_override_isa_irq(isa_irq, isa_irq,
   1080						 IOSAPIC_POL_HIGH,
   1081						 IOSAPIC_EDGE);
   1082	}
   1083	return 0;
   1084}
   1085
   1086int iosapic_remove(unsigned int gsi_base)
   1087{
   1088	int i, irq, index, err = 0;
   1089	unsigned long flags;
   1090
   1091	spin_lock_irqsave(&iosapic_lock, flags);
   1092	index = find_iosapic(gsi_base);
   1093	if (index < 0) {
   1094		printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
   1095		       __func__, gsi_base);
   1096		goto out;
   1097	}
   1098
   1099	if (iosapic_lists[index].rtes_inuse) {
   1100		err = -EBUSY;
   1101		printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
   1102		       __func__, gsi_base);
   1103		goto out;
   1104	}
   1105
   1106	for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) {
   1107		irq = __gsi_to_irq(i);
   1108		if (irq < 0)
   1109			continue;
   1110
   1111		err = iosapic_delete_rte(irq, i);
   1112		if (err)
   1113			goto out;
   1114	}
   1115
   1116	iounmap(iosapic_lists[index].addr);
   1117	iosapic_free(index);
   1118 out:
   1119	spin_unlock_irqrestore(&iosapic_lock, flags);
   1120	return err;
   1121}
   1122
   1123#ifdef CONFIG_NUMA
   1124void map_iosapic_to_node(unsigned int gsi_base, int node)
   1125{
   1126	int index;
   1127
   1128	index = find_iosapic(gsi_base);
   1129	if (index < 0) {
   1130		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
   1131		       __func__, gsi_base);
   1132		return;
   1133	}
   1134	iosapic_lists[index].node = node;
   1135	return;
   1136}
   1137#endif