cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

uv_nmi.c (29102B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * SGI NMI support routines
      4 *
      5 * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
      6 * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
      7 * Copyright (c) Mike Travis
      8 */
      9
     10#include <linux/cpu.h>
     11#include <linux/delay.h>
     12#include <linux/kdb.h>
     13#include <linux/kexec.h>
     14#include <linux/kgdb.h>
     15#include <linux/moduleparam.h>
     16#include <linux/nmi.h>
     17#include <linux/sched.h>
     18#include <linux/sched/debug.h>
     19#include <linux/slab.h>
     20#include <linux/clocksource.h>
     21
     22#include <asm/apic.h>
     23#include <asm/current.h>
     24#include <asm/kdebug.h>
     25#include <asm/local64.h>
     26#include <asm/nmi.h>
     27#include <asm/reboot.h>
     28#include <asm/traps.h>
     29#include <asm/uv/uv.h>
     30#include <asm/uv/uv_hub.h>
     31#include <asm/uv/uv_mmrs.h>
     32
     33/*
     34 * UV handler for NMI
     35 *
     36 * Handle system-wide NMI events generated by the global 'power nmi' command.
     37 *
     38 * Basic operation is to field the NMI interrupt on each CPU and wait
     39 * until all CPU's have arrived into the nmi handler.  If some CPU's do not
     40 * make it into the handler, try and force them in with the IPI(NMI) signal.
     41 *
     42 * We also have to lessen UV Hub MMR accesses as much as possible as this
     43 * disrupts the UV Hub's primary mission of directing NumaLink traffic and
     44 * can cause system problems to occur.
     45 *
     46 * To do this we register our primary NMI notifier on the NMI_UNKNOWN
     47 * chain.  This reduces the number of false NMI calls when the perf
     48 * tools are running which generate an enormous number of NMIs per
     49 * second (~4M/s for 1024 CPU threads).  Our secondary NMI handler is
     50 * very short as it only checks that if it has been "pinged" with the
     51 * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR.
     52 *
     53 */
     54
     55static struct uv_hub_nmi_s **uv_hub_nmi_list;
     56
     57DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
     58
     59/* Newer SMM NMI handler, not present in all systems */
     60static unsigned long uvh_nmi_mmrx;		/* UVH_EVENT_OCCURRED0/1 */
     61static unsigned long uvh_nmi_mmrx_clear;	/* UVH_EVENT_OCCURRED0/1_ALIAS */
     62static int uvh_nmi_mmrx_shift;			/* UVH_EVENT_OCCURRED0/1_EXTIO_INT0_SHFT */
     63static char *uvh_nmi_mmrx_type;			/* "EXTIO_INT0" */
     64
     65/* Non-zero indicates newer SMM NMI handler present */
     66static unsigned long uvh_nmi_mmrx_supported;	/* UVH_EXTIO_INT0_BROADCAST */
     67
     68/* Indicates to BIOS that we want to use the newer SMM NMI handler */
     69static unsigned long uvh_nmi_mmrx_req;		/* UVH_BIOS_KERNEL_MMR_ALIAS_2 */
     70static int uvh_nmi_mmrx_req_shift;		/* 62 */
     71
     72/* UV hubless values */
     73#define NMI_CONTROL_PORT	0x70
     74#define NMI_DUMMY_PORT		0x71
     75#define PAD_OWN_GPP_D_0		0x2c
     76#define GPI_NMI_STS_GPP_D_0	0x164
     77#define GPI_NMI_ENA_GPP_D_0	0x174
     78#define STS_GPP_D_0_MASK	0x1
     79#define PAD_CFG_DW0_GPP_D_0	0x4c0
     80#define GPIROUTNMI		(1ul << 17)
     81#define PCH_PCR_GPIO_1_BASE	0xfdae0000ul
     82#define PCH_PCR_GPIO_ADDRESS(offset) (int *)((u64)(pch_base) | (u64)(offset))
     83
     84static u64 *pch_base;
     85static unsigned long nmi_mmr;
     86static unsigned long nmi_mmr_clear;
     87static unsigned long nmi_mmr_pending;
     88
     89static atomic_t	uv_in_nmi;
     90static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
     91static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
     92static atomic_t uv_nmi_slave_continue;
     93static cpumask_var_t uv_nmi_cpu_mask;
     94
     95static atomic_t uv_nmi_kexec_failed;
     96
     97/* Values for uv_nmi_slave_continue */
     98#define SLAVE_CLEAR	0
     99#define SLAVE_CONTINUE	1
    100#define SLAVE_EXIT	2
    101
    102/*
    103 * Default is all stack dumps go to the console and buffer.
    104 * Lower level to send to log buffer only.
    105 */
    106static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
    107module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
    108
    109/*
    110 * The following values show statistics on how perf events are affecting
    111 * this system.
    112 */
    113static int param_get_local64(char *buffer, const struct kernel_param *kp)
    114{
    115	return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg));
    116}
    117
    118static int param_set_local64(const char *val, const struct kernel_param *kp)
    119{
    120	/* Clear on any write */
    121	local64_set((local64_t *)kp->arg, 0);
    122	return 0;
    123}
    124
    125static const struct kernel_param_ops param_ops_local64 = {
    126	.get = param_get_local64,
    127	.set = param_set_local64,
    128};
    129#define param_check_local64(name, p) __param_check(name, p, local64_t)
    130
    131static local64_t uv_nmi_count;
    132module_param_named(nmi_count, uv_nmi_count, local64, 0644);
    133
    134static local64_t uv_nmi_misses;
    135module_param_named(nmi_misses, uv_nmi_misses, local64, 0644);
    136
    137static local64_t uv_nmi_ping_count;
    138module_param_named(ping_count, uv_nmi_ping_count, local64, 0644);
    139
    140static local64_t uv_nmi_ping_misses;
    141module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644);
    142
    143/*
    144 * Following values allow tuning for large systems under heavy loading
    145 */
    146static int uv_nmi_initial_delay = 100;
    147module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644);
    148
    149static int uv_nmi_slave_delay = 100;
    150module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644);
    151
    152static int uv_nmi_loop_delay = 100;
    153module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644);
    154
    155static int uv_nmi_trigger_delay = 10000;
    156module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644);
    157
    158static int uv_nmi_wait_count = 100;
    159module_param_named(wait_count, uv_nmi_wait_count, int, 0644);
    160
    161static int uv_nmi_retry_count = 500;
    162module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
    163
    164static bool uv_pch_intr_enable = true;
    165static bool uv_pch_intr_now_enabled;
    166module_param_named(pch_intr_enable, uv_pch_intr_enable, bool, 0644);
    167
    168static bool uv_pch_init_enable = true;
    169module_param_named(pch_init_enable, uv_pch_init_enable, bool, 0644);
    170
    171static int uv_nmi_debug;
    172module_param_named(debug, uv_nmi_debug, int, 0644);
    173
    174#define nmi_debug(fmt, ...)				\
    175	do {						\
    176		if (uv_nmi_debug)			\
    177			pr_info(fmt, ##__VA_ARGS__);	\
    178	} while (0)
    179
    180/* Valid NMI Actions */
    181#define	ACTION_LEN	16
    182static struct nmi_action {
    183	char	*action;
    184	char	*desc;
    185} valid_acts[] = {
    186	{	"kdump",	"do kernel crash dump"			},
    187	{	"dump",		"dump process stack for each cpu"	},
    188	{	"ips",		"dump Inst Ptr info for each cpu"	},
    189	{	"kdb",		"enter KDB (needs kgdboc= assignment)"	},
    190	{	"kgdb",		"enter KGDB (needs gdb target remote)"	},
    191	{	"health",	"check if CPUs respond to NMI"		},
    192};
    193typedef char action_t[ACTION_LEN];
    194static action_t uv_nmi_action = { "dump" };
    195
    196static int param_get_action(char *buffer, const struct kernel_param *kp)
    197{
    198	return sprintf(buffer, "%s\n", uv_nmi_action);
    199}
    200
    201static int param_set_action(const char *val, const struct kernel_param *kp)
    202{
    203	int i;
    204	int n = ARRAY_SIZE(valid_acts);
    205	char arg[ACTION_LEN], *p;
    206
    207	/* (remove possible '\n') */
    208	strncpy(arg, val, ACTION_LEN - 1);
    209	arg[ACTION_LEN - 1] = '\0';
    210	p = strchr(arg, '\n');
    211	if (p)
    212		*p = '\0';
    213
    214	for (i = 0; i < n; i++)
    215		if (!strcmp(arg, valid_acts[i].action))
    216			break;
    217
    218	if (i < n) {
    219		strcpy(uv_nmi_action, arg);
    220		pr_info("UV: New NMI action:%s\n", uv_nmi_action);
    221		return 0;
    222	}
    223
    224	pr_err("UV: Invalid NMI action:%s, valid actions are:\n", arg);
    225	for (i = 0; i < n; i++)
    226		pr_err("UV: %-8s - %s\n",
    227			valid_acts[i].action, valid_acts[i].desc);
    228	return -EINVAL;
    229}
    230
    231static const struct kernel_param_ops param_ops_action = {
    232	.get = param_get_action,
    233	.set = param_set_action,
    234};
    235#define param_check_action(name, p) __param_check(name, p, action_t)
    236
    237module_param_named(action, uv_nmi_action, action, 0644);
    238
    239static inline bool uv_nmi_action_is(const char *action)
    240{
    241	return (strncmp(uv_nmi_action, action, strlen(action)) == 0);
    242}
    243
    244/* Setup which NMI support is present in system */
    245static void uv_nmi_setup_mmrs(void)
    246{
    247	bool new_nmi_method_only = false;
    248
    249	/* First determine arch specific MMRs to handshake with BIOS */
    250	if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {	/* UV2,3,4 setup */
    251		uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
    252		uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
    253		uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
    254		uvh_nmi_mmrx_type = "OCRD0-EXTIO_INT0";
    255
    256		uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
    257		uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
    258		uvh_nmi_mmrx_req_shift = 62;
    259
    260	} else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */
    261		uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
    262		uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
    263		uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
    264		uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
    265
    266		new_nmi_method_only = true;		/* Newer nmi always valid on UV5+ */
    267		uvh_nmi_mmrx_req = 0;			/* no request bit to clear */
    268
    269	} else {
    270		pr_err("UV:%s:NMI support not available on this system\n", __func__);
    271		return;
    272	}
    273
    274	/* Then find out if new NMI is supported */
    275	if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) {
    276		if (uvh_nmi_mmrx_req)
    277			uv_write_local_mmr(uvh_nmi_mmrx_req,
    278						1UL << uvh_nmi_mmrx_req_shift);
    279		nmi_mmr = uvh_nmi_mmrx;
    280		nmi_mmr_clear = uvh_nmi_mmrx_clear;
    281		nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift;
    282		pr_info("UV: SMI NMI support: %s\n", uvh_nmi_mmrx_type);
    283	} else {
    284		nmi_mmr = UVH_NMI_MMR;
    285		nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
    286		nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT;
    287		pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE);
    288	}
    289}
    290
    291/* Read NMI MMR and check if NMI flag was set by BMC. */
    292static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi)
    293{
    294	hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr);
    295	atomic_inc(&hub_nmi->read_mmr_count);
    296	return !!(hub_nmi->nmi_value & nmi_mmr_pending);
    297}
    298
    299static inline void uv_local_mmr_clear_nmi(void)
    300{
    301	uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending);
    302}
    303
    304/*
    305 * UV hubless NMI handler functions
    306 */
    307static inline void uv_reassert_nmi(void)
    308{
    309	/* (from arch/x86/include/asm/mach_traps.h) */
    310	outb(0x8f, NMI_CONTROL_PORT);
    311	inb(NMI_DUMMY_PORT);		/* dummy read */
    312	outb(0x0f, NMI_CONTROL_PORT);
    313	inb(NMI_DUMMY_PORT);		/* dummy read */
    314}
    315
    316static void uv_init_hubless_pch_io(int offset, int mask, int data)
    317{
    318	int *addr = PCH_PCR_GPIO_ADDRESS(offset);
    319	int readd = readl(addr);
    320
    321	if (mask) {			/* OR in new data */
    322		int writed = (readd & ~mask) | data;
    323
    324		nmi_debug("UV:PCH: %p = %x & %x | %x (%x)\n",
    325			addr, readd, ~mask, data, writed);
    326		writel(writed, addr);
    327	} else if (readd & data) {	/* clear status bit */
    328		nmi_debug("UV:PCH: %p = %x\n", addr, data);
    329		writel(data, addr);
    330	}
    331
    332	(void)readl(addr);		/* flush write data */
    333}
    334
    335static void uv_nmi_setup_hubless_intr(void)
    336{
    337	uv_pch_intr_now_enabled = uv_pch_intr_enable;
    338
    339	uv_init_hubless_pch_io(
    340		PAD_CFG_DW0_GPP_D_0, GPIROUTNMI,
    341		uv_pch_intr_now_enabled ? GPIROUTNMI : 0);
    342
    343	nmi_debug("UV:NMI: GPP_D_0 interrupt %s\n",
    344		uv_pch_intr_now_enabled ? "enabled" : "disabled");
    345}
    346
    347static struct init_nmi {
    348	unsigned int	offset;
    349	unsigned int	mask;
    350	unsigned int	data;
    351} init_nmi[] = {
    352	{	/* HOSTSW_OWN_GPP_D_0 */
    353	.offset = 0x84,
    354	.mask = 0x1,
    355	.data = 0x0,	/* ACPI Mode */
    356	},
    357
    358/* Clear status: */
    359	{	/* GPI_INT_STS_GPP_D_0 */
    360	.offset = 0x104,
    361	.mask = 0x0,
    362	.data = 0x1,	/* Clear Status */
    363	},
    364	{	/* GPI_GPE_STS_GPP_D_0 */
    365	.offset = 0x124,
    366	.mask = 0x0,
    367	.data = 0x1,	/* Clear Status */
    368	},
    369	{	/* GPI_SMI_STS_GPP_D_0 */
    370	.offset = 0x144,
    371	.mask = 0x0,
    372	.data = 0x1,	/* Clear Status */
    373	},
    374	{	/* GPI_NMI_STS_GPP_D_0 */
    375	.offset = 0x164,
    376	.mask = 0x0,
    377	.data = 0x1,	/* Clear Status */
    378	},
    379
    380/* Disable interrupts: */
    381	{	/* GPI_INT_EN_GPP_D_0 */
    382	.offset = 0x114,
    383	.mask = 0x1,
    384	.data = 0x0,	/* Disable interrupt generation */
    385	},
    386	{	/* GPI_GPE_EN_GPP_D_0 */
    387	.offset = 0x134,
    388	.mask = 0x1,
    389	.data = 0x0,	/* Disable interrupt generation */
    390	},
    391	{	/* GPI_SMI_EN_GPP_D_0 */
    392	.offset = 0x154,
    393	.mask = 0x1,
    394	.data = 0x0,	/* Disable interrupt generation */
    395	},
    396	{	/* GPI_NMI_EN_GPP_D_0 */
    397	.offset = 0x174,
    398	.mask = 0x1,
    399	.data = 0x0,	/* Disable interrupt generation */
    400	},
    401
    402/* Setup GPP_D_0 Pad Config: */
    403	{	/* PAD_CFG_DW0_GPP_D_0 */
    404	.offset = 0x4c0,
    405	.mask = 0xffffffff,
    406	.data = 0x82020100,
    407/*
    408 *  31:30 Pad Reset Config (PADRSTCFG): = 2h  # PLTRST# (default)
    409 *
    410 *  29    RX Pad State Select (RXPADSTSEL): = 0 # Raw RX pad state directly
    411 *                                                from RX buffer (default)
    412 *
    413 *  28    RX Raw Override to '1' (RXRAW1): = 0 # No Override
    414 *
    415 *  26:25 RX Level/Edge Configuration (RXEVCFG):
    416 *      = 0h # Level
    417 *      = 1h # Edge
    418 *
    419 *  23    RX Invert (RXINV): = 0 # No Inversion (signal active high)
    420 *
    421 *  20    GPIO Input Route IOxAPIC (GPIROUTIOXAPIC):
    422 * = 0 # Routing does not cause peripheral IRQ...
    423 *     # (we want an NMI not an IRQ)
    424 *
    425 *  19    GPIO Input Route SCI (GPIROUTSCI): = 0 # Routing does not cause SCI.
    426 *  18    GPIO Input Route SMI (GPIROUTSMI): = 0 # Routing does not cause SMI.
    427 *  17    GPIO Input Route NMI (GPIROUTNMI): = 1 # Routing can cause NMI.
    428 *
    429 *  11:10 Pad Mode (PMODE1/0): = 0h = GPIO control the Pad.
    430 *   9    GPIO RX Disable (GPIORXDIS):
    431 * = 0 # Enable the input buffer (active low enable)
    432 *
    433 *   8    GPIO TX Disable (GPIOTXDIS):
    434 * = 1 # Disable the output buffer; i.e. Hi-Z
    435 *
    436 *   1 GPIO RX State (GPIORXSTATE): This is the current internal RX pad state..
    437 *   0 GPIO TX State (GPIOTXSTATE):
    438 * = 0 # (Leave at default)
    439 */
    440	},
    441
    442/* Pad Config DW1 */
    443	{	/* PAD_CFG_DW1_GPP_D_0 */
    444	.offset = 0x4c4,
    445	.mask = 0x3c00,
    446	.data = 0,	/* Termination = none (default) */
    447	},
    448};
    449
    450static void uv_init_hubless_pch_d0(void)
    451{
    452	int i, read;
    453
    454	read = *PCH_PCR_GPIO_ADDRESS(PAD_OWN_GPP_D_0);
    455	if (read != 0) {
    456		pr_info("UV: Hubless NMI already configured\n");
    457		return;
    458	}
    459
    460	nmi_debug("UV: Initializing UV Hubless NMI on PCH\n");
    461	for (i = 0; i < ARRAY_SIZE(init_nmi); i++) {
    462		uv_init_hubless_pch_io(init_nmi[i].offset,
    463					init_nmi[i].mask,
    464					init_nmi[i].data);
    465	}
    466}
    467
    468static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi)
    469{
    470	int *pstat = PCH_PCR_GPIO_ADDRESS(GPI_NMI_STS_GPP_D_0);
    471	int status = *pstat;
    472
    473	hub_nmi->nmi_value = status;
    474	atomic_inc(&hub_nmi->read_mmr_count);
    475
    476	if (!(status & STS_GPP_D_0_MASK))	/* Not a UV external NMI */
    477		return 0;
    478
    479	*pstat = STS_GPP_D_0_MASK;	/* Is a UV NMI: clear GPP_D_0 status */
    480	(void)*pstat;			/* Flush write */
    481
    482	return 1;
    483}
    484
    485static int uv_test_nmi(struct uv_hub_nmi_s *hub_nmi)
    486{
    487	if (hub_nmi->hub_present)
    488		return uv_nmi_test_mmr(hub_nmi);
    489
    490	if (hub_nmi->pch_owner)		/* Only PCH owner can check status */
    491		return uv_nmi_test_hubless(hub_nmi);
    492
    493	return -1;
    494}
    495
    496/*
    497 * If first CPU in on this hub, set hub_nmi "in_nmi" and "owner" values and
    498 * return true.  If first CPU in on the system, set global "in_nmi" flag.
    499 */
    500static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi)
    501{
    502	int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1);
    503
    504	if (first) {
    505		atomic_set(&hub_nmi->cpu_owner, cpu);
    506		if (atomic_add_unless(&uv_in_nmi, 1, 1))
    507			atomic_set(&uv_nmi_cpu, cpu);
    508
    509		atomic_inc(&hub_nmi->nmi_count);
    510	}
    511	return first;
    512}
    513
    514/* Check if this is a system NMI event */
    515static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi)
    516{
    517	int cpu = smp_processor_id();
    518	int nmi = 0;
    519	int nmi_detected = 0;
    520
    521	local64_inc(&uv_nmi_count);
    522	this_cpu_inc(uv_cpu_nmi.queries);
    523
    524	do {
    525		nmi = atomic_read(&hub_nmi->in_nmi);
    526		if (nmi)
    527			break;
    528
    529		if (raw_spin_trylock(&hub_nmi->nmi_lock)) {
    530			nmi_detected = uv_test_nmi(hub_nmi);
    531
    532			/* Check flag for UV external NMI */
    533			if (nmi_detected > 0) {
    534				uv_set_in_nmi(cpu, hub_nmi);
    535				nmi = 1;
    536				break;
    537			}
    538
    539			/* A non-PCH node in a hubless system waits for NMI */
    540			else if (nmi_detected < 0)
    541				goto slave_wait;
    542
    543			/* MMR/PCH NMI flag is clear */
    544			raw_spin_unlock(&hub_nmi->nmi_lock);
    545
    546		} else {
    547
    548			/* Wait a moment for the HUB NMI locker to set flag */
    549slave_wait:		cpu_relax();
    550			udelay(uv_nmi_slave_delay);
    551
    552			/* Re-check hub in_nmi flag */
    553			nmi = atomic_read(&hub_nmi->in_nmi);
    554			if (nmi)
    555				break;
    556		}
    557
    558		/*
    559		 * Check if this BMC missed setting the MMR NMI flag (or)
    560		 * UV hubless system where only PCH owner can check flag
    561		 */
    562		if (!nmi) {
    563			nmi = atomic_read(&uv_in_nmi);
    564			if (nmi)
    565				uv_set_in_nmi(cpu, hub_nmi);
    566		}
    567
    568		/* If we're holding the hub lock, release it now */
    569		if (nmi_detected < 0)
    570			raw_spin_unlock(&hub_nmi->nmi_lock);
    571
    572	} while (0);
    573
    574	if (!nmi)
    575		local64_inc(&uv_nmi_misses);
    576
    577	return nmi;
    578}
    579
    580/* Need to reset the NMI MMR register, but only once per hub. */
    581static inline void uv_clear_nmi(int cpu)
    582{
    583	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
    584
    585	if (cpu == atomic_read(&hub_nmi->cpu_owner)) {
    586		atomic_set(&hub_nmi->cpu_owner, -1);
    587		atomic_set(&hub_nmi->in_nmi, 0);
    588		if (hub_nmi->hub_present)
    589			uv_local_mmr_clear_nmi();
    590		else
    591			uv_reassert_nmi();
    592		raw_spin_unlock(&hub_nmi->nmi_lock);
    593	}
    594}
    595
    596/* Ping non-responding CPU's attempting to force them into the NMI handler */
    597static void uv_nmi_nr_cpus_ping(void)
    598{
    599	int cpu;
    600
    601	for_each_cpu(cpu, uv_nmi_cpu_mask)
    602		uv_cpu_nmi_per(cpu).pinging = 1;
    603
    604	apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI);
    605}
    606
    607/* Clean up flags for CPU's that ignored both NMI and ping */
    608static void uv_nmi_cleanup_mask(void)
    609{
    610	int cpu;
    611
    612	for_each_cpu(cpu, uv_nmi_cpu_mask) {
    613		uv_cpu_nmi_per(cpu).pinging =  0;
    614		uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_OUT;
    615		cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
    616	}
    617}
    618
    619/* Loop waiting as CPU's enter NMI handler */
    620static int uv_nmi_wait_cpus(int first)
    621{
    622	int i, j, k, n = num_online_cpus();
    623	int last_k = 0, waiting = 0;
    624	int cpu = smp_processor_id();
    625
    626	if (first) {
    627		cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask);
    628		k = 0;
    629	} else {
    630		k = n - cpumask_weight(uv_nmi_cpu_mask);
    631	}
    632
    633	/* PCH NMI causes only one CPU to respond */
    634	if (first && uv_pch_intr_now_enabled) {
    635		cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
    636		return n - k - 1;
    637	}
    638
    639	udelay(uv_nmi_initial_delay);
    640	for (i = 0; i < uv_nmi_retry_count; i++) {
    641		int loop_delay = uv_nmi_loop_delay;
    642
    643		for_each_cpu(j, uv_nmi_cpu_mask) {
    644			if (uv_cpu_nmi_per(j).state) {
    645				cpumask_clear_cpu(j, uv_nmi_cpu_mask);
    646				if (++k >= n)
    647					break;
    648			}
    649		}
    650		if (k >= n) {		/* all in? */
    651			k = n;
    652			break;
    653		}
    654		if (last_k != k) {	/* abort if no new CPU's coming in */
    655			last_k = k;
    656			waiting = 0;
    657		} else if (++waiting > uv_nmi_wait_count)
    658			break;
    659
    660		/* Extend delay if waiting only for CPU 0: */
    661		if (waiting && (n - k) == 1 &&
    662		    cpumask_test_cpu(0, uv_nmi_cpu_mask))
    663			loop_delay *= 100;
    664
    665		udelay(loop_delay);
    666	}
    667	atomic_set(&uv_nmi_cpus_in_nmi, k);
    668	return n - k;
    669}
    670
    671/* Wait until all slave CPU's have entered UV NMI handler */
    672static void uv_nmi_wait(int master)
    673{
    674	/* Indicate this CPU is in: */
    675	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN);
    676
    677	/* If not the first CPU in (the master), then we are a slave CPU */
    678	if (!master)
    679		return;
    680
    681	do {
    682		/* Wait for all other CPU's to gather here */
    683		if (!uv_nmi_wait_cpus(1))
    684			break;
    685
    686		/* If not all made it in, send IPI NMI to them */
    687		pr_alert("UV: Sending NMI IPI to %d CPUs: %*pbl\n",
    688			 cpumask_weight(uv_nmi_cpu_mask),
    689			 cpumask_pr_args(uv_nmi_cpu_mask));
    690
    691		uv_nmi_nr_cpus_ping();
    692
    693		/* If all CPU's are in, then done */
    694		if (!uv_nmi_wait_cpus(0))
    695			break;
    696
    697		pr_alert("UV: %d CPUs not in NMI loop: %*pbl\n",
    698			 cpumask_weight(uv_nmi_cpu_mask),
    699			 cpumask_pr_args(uv_nmi_cpu_mask));
    700	} while (0);
    701
    702	pr_alert("UV: %d of %d CPUs in NMI\n",
    703		atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
    704}
    705
    706/* Dump Instruction Pointer header */
    707static void uv_nmi_dump_cpu_ip_hdr(void)
    708{
    709	pr_info("\nUV: %4s %6s %-32s %s   (Note: PID 0 not listed)\n",
    710		"CPU", "PID", "COMMAND", "IP");
    711}
    712
    713/* Dump Instruction Pointer info */
    714static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
    715{
    716	pr_info("UV: %4d %6d %-32.32s %pS",
    717		cpu, current->pid, current->comm, (void *)regs->ip);
    718}
    719
    720/*
    721 * Dump this CPU's state.  If action was set to "kdump" and the crash_kexec
    722 * failed, then we provide "dump" as an alternate action.  Action "dump" now
    723 * also includes the show "ips" (instruction pointers) action whereas the
    724 * action "ips" only displays instruction pointers for the non-idle CPU's.
    725 * This is an abbreviated form of the "ps" command.
    726 */
    727static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
    728{
    729	const char *dots = " ................................. ";
    730
    731	if (cpu == 0)
    732		uv_nmi_dump_cpu_ip_hdr();
    733
    734	if (current->pid != 0 || !uv_nmi_action_is("ips"))
    735		uv_nmi_dump_cpu_ip(cpu, regs);
    736
    737	if (uv_nmi_action_is("dump")) {
    738		pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu);
    739		show_regs(regs);
    740	}
    741
    742	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
    743}
    744
    745/* Trigger a slave CPU to dump it's state */
    746static void uv_nmi_trigger_dump(int cpu)
    747{
    748	int retry = uv_nmi_trigger_delay;
    749
    750	if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_IN)
    751		return;
    752
    753	uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP;
    754	do {
    755		cpu_relax();
    756		udelay(10);
    757		if (uv_cpu_nmi_per(cpu).state
    758				!= UV_NMI_STATE_DUMP)
    759			return;
    760	} while (--retry > 0);
    761
    762	pr_crit("UV: CPU %d stuck in process dump function\n", cpu);
    763	uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE;
    764}
    765
    766/* Wait until all CPU's ready to exit */
    767static void uv_nmi_sync_exit(int master)
    768{
    769	atomic_dec(&uv_nmi_cpus_in_nmi);
    770	if (master) {
    771		while (atomic_read(&uv_nmi_cpus_in_nmi) > 0)
    772			cpu_relax();
    773		atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
    774	} else {
    775		while (atomic_read(&uv_nmi_slave_continue))
    776			cpu_relax();
    777	}
    778}
    779
    780/* Current "health" check is to check which CPU's are responsive */
    781static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master)
    782{
    783	if (master) {
    784		int in = atomic_read(&uv_nmi_cpus_in_nmi);
    785		int out = num_online_cpus() - in;
    786
    787		pr_alert("UV: NMI CPU health check (non-responding:%d)\n", out);
    788		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
    789	} else {
    790		while (!atomic_read(&uv_nmi_slave_continue))
    791			cpu_relax();
    792	}
    793	uv_nmi_sync_exit(master);
    794}
    795
    796/* Walk through CPU list and dump state of each */
    797static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
    798{
    799	if (master) {
    800		int tcpu;
    801		int ignored = 0;
    802		int saved_console_loglevel = console_loglevel;
    803
    804		pr_alert("UV: tracing %s for %d CPUs from CPU %d\n",
    805			uv_nmi_action_is("ips") ? "IPs" : "processes",
    806			atomic_read(&uv_nmi_cpus_in_nmi), cpu);
    807
    808		console_loglevel = uv_nmi_loglevel;
    809		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
    810		for_each_online_cpu(tcpu) {
    811			if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
    812				ignored++;
    813			else if (tcpu == cpu)
    814				uv_nmi_dump_state_cpu(tcpu, regs);
    815			else
    816				uv_nmi_trigger_dump(tcpu);
    817		}
    818		if (ignored)
    819			pr_alert("UV: %d CPUs ignored NMI\n", ignored);
    820
    821		console_loglevel = saved_console_loglevel;
    822		pr_alert("UV: process trace complete\n");
    823	} else {
    824		while (!atomic_read(&uv_nmi_slave_continue))
    825			cpu_relax();
    826		while (this_cpu_read(uv_cpu_nmi.state) != UV_NMI_STATE_DUMP)
    827			cpu_relax();
    828		uv_nmi_dump_state_cpu(cpu, regs);
    829	}
    830	uv_nmi_sync_exit(master);
    831}
    832
    833static void uv_nmi_touch_watchdogs(void)
    834{
    835	touch_softlockup_watchdog_sync();
    836	clocksource_touch_watchdog();
    837	rcu_cpu_stall_reset();
    838	touch_nmi_watchdog();
    839}
    840
    841static void uv_nmi_kdump(int cpu, int main, struct pt_regs *regs)
    842{
    843	/* Check if kdump kernel loaded for both main and secondary CPUs */
    844	if (!kexec_crash_image) {
    845		if (main)
    846			pr_err("UV: NMI error: kdump kernel not loaded\n");
    847		return;
    848	}
    849
    850	/* Call crash to dump system state */
    851	if (main) {
    852		pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
    853		crash_kexec(regs);
    854
    855		pr_emerg("UV: crash_kexec unexpectedly returned\n");
    856		atomic_set(&uv_nmi_kexec_failed, 1);
    857
    858	} else { /* secondary */
    859
    860		/* If kdump kernel fails, secondaries will exit this loop */
    861		while (atomic_read(&uv_nmi_kexec_failed) == 0) {
    862
    863			/* Once shootdown cpus starts, they do not return */
    864			run_crash_ipi_callback(regs);
    865
    866			mdelay(10);
    867		}
    868	}
    869}
    870
    871#ifdef CONFIG_KGDB
    872#ifdef CONFIG_KGDB_KDB
    873static inline int uv_nmi_kdb_reason(void)
    874{
    875	return KDB_REASON_SYSTEM_NMI;
    876}
    877#else /* !CONFIG_KGDB_KDB */
    878static inline int uv_nmi_kdb_reason(void)
    879{
    880	/* Ensure user is expecting to attach gdb remote */
    881	if (uv_nmi_action_is("kgdb"))
    882		return 0;
    883
    884	pr_err("UV: NMI error: KDB is not enabled in this kernel\n");
    885	return -1;
    886}
    887#endif /* CONFIG_KGDB_KDB */
    888
    889/*
    890 * Call KGDB/KDB from NMI handler
    891 *
    892 * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or
    893 * 'kdb' has no affect on which is used.  See the KGDB documentation for further
    894 * information.
    895 */
    896static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
    897{
    898	if (master) {
    899		int reason = uv_nmi_kdb_reason();
    900		int ret;
    901
    902		if (reason < 0)
    903			return;
    904
    905		/* Call KGDB NMI handler as MASTER */
    906		ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason,
    907				&uv_nmi_slave_continue);
    908		if (ret) {
    909			pr_alert("KGDB returned error, is kgdboc set?\n");
    910			atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
    911		}
    912	} else {
    913		/* Wait for KGDB signal that it's ready for slaves to enter */
    914		int sig;
    915
    916		do {
    917			cpu_relax();
    918			sig = atomic_read(&uv_nmi_slave_continue);
    919		} while (!sig);
    920
    921		/* Call KGDB as slave */
    922		if (sig == SLAVE_CONTINUE)
    923			kgdb_nmicallback(cpu, regs);
    924	}
    925	uv_nmi_sync_exit(master);
    926}
    927
    928#else /* !CONFIG_KGDB */
    929static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
    930{
    931	pr_err("UV: NMI error: KGDB is not enabled in this kernel\n");
    932}
    933#endif /* !CONFIG_KGDB */
    934
    935/*
    936 * UV NMI handler
    937 */
    938static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
    939{
    940	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
    941	int cpu = smp_processor_id();
    942	int master = 0;
    943	unsigned long flags;
    944
    945	local_irq_save(flags);
    946
    947	/* If not a UV System NMI, ignore */
    948	if (!this_cpu_read(uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) {
    949		local_irq_restore(flags);
    950		return NMI_DONE;
    951	}
    952
    953	/* Indicate we are the first CPU into the NMI handler */
    954	master = (atomic_read(&uv_nmi_cpu) == cpu);
    955
    956	/* If NMI action is "kdump", then attempt to do it */
    957	if (uv_nmi_action_is("kdump")) {
    958		uv_nmi_kdump(cpu, master, regs);
    959
    960		/* Unexpected return, revert action to "dump" */
    961		if (master)
    962			strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
    963	}
    964
    965	/* Pause as all CPU's enter the NMI handler */
    966	uv_nmi_wait(master);
    967
    968	/* Process actions other than "kdump": */
    969	if (uv_nmi_action_is("health")) {
    970		uv_nmi_action_health(cpu, regs, master);
    971	} else if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) {
    972		uv_nmi_dump_state(cpu, regs, master);
    973	} else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) {
    974		uv_call_kgdb_kdb(cpu, regs, master);
    975	} else {
    976		if (master)
    977			pr_alert("UV: unknown NMI action: %s\n", uv_nmi_action);
    978		uv_nmi_sync_exit(master);
    979	}
    980
    981	/* Clear per_cpu "in_nmi" flag */
    982	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT);
    983
    984	/* Clear MMR NMI flag on each hub */
    985	uv_clear_nmi(cpu);
    986
    987	/* Clear global flags */
    988	if (master) {
    989		if (!cpumask_empty(uv_nmi_cpu_mask))
    990			uv_nmi_cleanup_mask();
    991		atomic_set(&uv_nmi_cpus_in_nmi, -1);
    992		atomic_set(&uv_nmi_cpu, -1);
    993		atomic_set(&uv_in_nmi, 0);
    994		atomic_set(&uv_nmi_kexec_failed, 0);
    995		atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
    996	}
    997
    998	uv_nmi_touch_watchdogs();
    999	local_irq_restore(flags);
   1000
   1001	return NMI_HANDLED;
   1002}
   1003
   1004/*
   1005 * NMI handler for pulling in CPU's when perf events are grabbing our NMI
   1006 */
   1007static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
   1008{
   1009	int ret;
   1010
   1011	this_cpu_inc(uv_cpu_nmi.queries);
   1012	if (!this_cpu_read(uv_cpu_nmi.pinging)) {
   1013		local64_inc(&uv_nmi_ping_misses);
   1014		return NMI_DONE;
   1015	}
   1016
   1017	this_cpu_inc(uv_cpu_nmi.pings);
   1018	local64_inc(&uv_nmi_ping_count);
   1019	ret = uv_handle_nmi(reason, regs);
   1020	this_cpu_write(uv_cpu_nmi.pinging, 0);
   1021	return ret;
   1022}
   1023
   1024static void uv_register_nmi_notifier(void)
   1025{
   1026	if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
   1027		pr_warn("UV: NMI handler failed to register\n");
   1028
   1029	if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping"))
   1030		pr_warn("UV: PING NMI handler failed to register\n");
   1031}
   1032
   1033void uv_nmi_init(void)
   1034{
   1035	unsigned int value;
   1036
   1037	/*
   1038	 * Unmask NMI on all CPU's
   1039	 */
   1040	value = apic_read(APIC_LVT1) | APIC_DM_NMI;
   1041	value &= ~APIC_LVT_MASKED;
   1042	apic_write(APIC_LVT1, value);
   1043}
   1044
   1045/* Setup HUB NMI info */
   1046static void __init uv_nmi_setup_common(bool hubbed)
   1047{
   1048	int size = sizeof(void *) * (1 << NODES_SHIFT);
   1049	int cpu;
   1050
   1051	uv_hub_nmi_list = kzalloc(size, GFP_KERNEL);
   1052	nmi_debug("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size);
   1053	BUG_ON(!uv_hub_nmi_list);
   1054	size = sizeof(struct uv_hub_nmi_s);
   1055	for_each_present_cpu(cpu) {
   1056		int nid = cpu_to_node(cpu);
   1057		if (uv_hub_nmi_list[nid] == NULL) {
   1058			uv_hub_nmi_list[nid] = kzalloc_node(size,
   1059							    GFP_KERNEL, nid);
   1060			BUG_ON(!uv_hub_nmi_list[nid]);
   1061			raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock));
   1062			atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1);
   1063			uv_hub_nmi_list[nid]->hub_present = hubbed;
   1064			uv_hub_nmi_list[nid]->pch_owner = (nid == 0);
   1065		}
   1066		uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid];
   1067	}
   1068	BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL));
   1069}
   1070
   1071/* Setup for UV Hub systems */
   1072void __init uv_nmi_setup(void)
   1073{
   1074	uv_nmi_setup_mmrs();
   1075	uv_nmi_setup_common(true);
   1076	uv_register_nmi_notifier();
   1077	pr_info("UV: Hub NMI enabled\n");
   1078}
   1079
   1080/* Setup for UV Hubless systems */
   1081void __init uv_nmi_setup_hubless(void)
   1082{
   1083	uv_nmi_setup_common(false);
   1084	pch_base = xlate_dev_mem_ptr(PCH_PCR_GPIO_1_BASE);
   1085	nmi_debug("UV: PCH base:%p from 0x%lx, GPP_D_0\n",
   1086		pch_base, PCH_PCR_GPIO_1_BASE);
   1087	if (uv_pch_init_enable)
   1088		uv_init_hubless_pch_d0();
   1089	uv_init_hubless_pch_io(GPI_NMI_ENA_GPP_D_0,
   1090				STS_GPP_D_0_MASK, STS_GPP_D_0_MASK);
   1091	uv_nmi_setup_hubless_intr();
   1092	/* Ensure NMI enabled in Processor Interface Reg: */
   1093	uv_reassert_nmi();
   1094	uv_register_nmi_notifier();
   1095	pr_info("UV: PCH NMI enabled\n");
   1096}