watchdog_hld.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
watchdog_hld.c (7862B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Detect hard lockups on a system
      4 *
      5 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
      6 *
      7 * Note: Most of this code is borrowed heavily from the original softlockup
      8 * detector, so thanks to Ingo for the initial implementation.
      9 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
     10 * to those contributors as well.
     11 */
     12
     13#define pr_fmt(fmt) "NMI watchdog: " fmt
     14
     15#include <linux/nmi.h>
     16#include <linux/atomic.h>
     17#include <linux/module.h>
     18#include <linux/sched/debug.h>
     19
     20#include <asm/irq_regs.h>
     21#include <linux/perf_event.h>
     22
     23static DEFINE_PER_CPU(bool, hard_watchdog_warn);
     24static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
     25static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
     26static DEFINE_PER_CPU(struct perf_event *, dead_event);
     27static struct cpumask dead_events_mask;
     28
     29static unsigned long hardlockup_allcpu_dumped;
     30static atomic_t watchdog_cpus = ATOMIC_INIT(0);
     31
     32notrace void arch_touch_nmi_watchdog(void)
     33{
     34	/*
     35	 * Using __raw here because some code paths have
     36	 * preemption enabled.  If preemption is enabled
     37	 * then interrupts should be enabled too, in which
     38	 * case we shouldn't have to worry about the watchdog
     39	 * going off.
     40	 */
     41	raw_cpu_write(watchdog_nmi_touch, true);
     42}
     43EXPORT_SYMBOL(arch_touch_nmi_watchdog);
     44
     45#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
     46static DEFINE_PER_CPU(ktime_t, last_timestamp);
     47static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
     48static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
     49
     50void watchdog_update_hrtimer_threshold(u64 period)
     51{
     52	/*
     53	 * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
     54	 *
     55	 * So it runs effectively with 2.5 times the rate of the NMI
     56	 * watchdog. That means the hrtimer should fire 2-3 times before
     57	 * the NMI watchdog expires. The NMI watchdog on x86 is based on
     58	 * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
     59	 * might run way faster than expected and the NMI fires in a
     60	 * smaller period than the one deduced from the nominal CPU
     61	 * frequency. Depending on the Turbo-Mode factor this might be fast
     62	 * enough to get the NMI period smaller than the hrtimer watchdog
     63	 * period and trigger false positives.
     64	 *
     65	 * The sample threshold is used to check in the NMI handler whether
     66	 * the minimum time between two NMI samples has elapsed. That
     67	 * prevents false positives.
     68	 *
     69	 * Set this to 4/5 of the actual watchdog threshold period so the
     70	 * hrtimer is guaranteed to fire at least once within the real
     71	 * watchdog threshold.
     72	 */
     73	watchdog_hrtimer_sample_threshold = period * 2;
     74}
     75
     76static bool watchdog_check_timestamp(void)
     77{
     78	ktime_t delta, now = ktime_get_mono_fast_ns();
     79
     80	delta = now - __this_cpu_read(last_timestamp);
     81	if (delta < watchdog_hrtimer_sample_threshold) {
     82		/*
     83		 * If ktime is jiffies based, a stalled timer would prevent
     84		 * jiffies from being incremented and the filter would look
     85		 * at a stale timestamp and never trigger.
     86		 */
     87		if (__this_cpu_inc_return(nmi_rearmed) < 10)
     88			return false;
     89	}
     90	__this_cpu_write(nmi_rearmed, 0);
     91	__this_cpu_write(last_timestamp, now);
     92	return true;
     93}
     94#else
     95static inline bool watchdog_check_timestamp(void)
     96{
     97	return true;
     98}
     99#endif
    100
    101static struct perf_event_attr wd_hw_attr = {
    102	.type		= PERF_TYPE_HARDWARE,
    103	.config		= PERF_COUNT_HW_CPU_CYCLES,
    104	.size		= sizeof(struct perf_event_attr),
    105	.pinned		= 1,
    106	.disabled	= 1,
    107};
    108
    109/* Callback function for perf event subsystem */
    110static void watchdog_overflow_callback(struct perf_event *event,
    111				       struct perf_sample_data *data,
    112				       struct pt_regs *regs)
    113{
    114	/* Ensure the watchdog never gets throttled */
    115	event->hw.interrupts = 0;
    116
    117	if (__this_cpu_read(watchdog_nmi_touch) == true) {
    118		__this_cpu_write(watchdog_nmi_touch, false);
    119		return;
    120	}
    121
    122	if (!watchdog_check_timestamp())
    123		return;
    124
    125	/* check for a hardlockup
    126	 * This is done by making sure our timer interrupt
    127	 * is incrementing.  The timer interrupt should have
    128	 * fired multiple times before we overflow'd.  If it hasn't
    129	 * then this is a good indication the cpu is stuck
    130	 */
    131	if (is_hardlockup()) {
    132		int this_cpu = smp_processor_id();
    133
    134		/* only print hardlockups once */
    135		if (__this_cpu_read(hard_watchdog_warn) == true)
    136			return;
    137
    138		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
    139			 this_cpu);
    140		print_modules();
    141		print_irqtrace_events(current);
    142		if (regs)
    143			show_regs(regs);
    144		else
    145			dump_stack();
    146
    147		/*
    148		 * Perform all-CPU dump only once to avoid multiple hardlockups
    149		 * generating interleaving traces
    150		 */
    151		if (sysctl_hardlockup_all_cpu_backtrace &&
    152				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
    153			trigger_allbutself_cpu_backtrace();
    154
    155		if (hardlockup_panic)
    156			nmi_panic(regs, "Hard LOCKUP");
    157
    158		__this_cpu_write(hard_watchdog_warn, true);
    159		return;
    160	}
    161
    162	__this_cpu_write(hard_watchdog_warn, false);
    163	return;
    164}
    165
    166static int hardlockup_detector_event_create(void)
    167{
    168	unsigned int cpu = smp_processor_id();
    169	struct perf_event_attr *wd_attr;
    170	struct perf_event *evt;
    171
    172	wd_attr = &wd_hw_attr;
    173	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
    174
    175	/* Try to register using hardware perf events */
    176	evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
    177					       watchdog_overflow_callback, NULL);
    178	if (IS_ERR(evt)) {
    179		pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
    180			 PTR_ERR(evt));
    181		return PTR_ERR(evt);
    182	}
    183	this_cpu_write(watchdog_ev, evt);
    184	return 0;
    185}
    186
    187/**
    188 * hardlockup_detector_perf_enable - Enable the local event
    189 */
    190void hardlockup_detector_perf_enable(void)
    191{
    192	if (hardlockup_detector_event_create())
    193		return;
    194
    195	/* use original value for check */
    196	if (!atomic_fetch_inc(&watchdog_cpus))
    197		pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
    198
    199	perf_event_enable(this_cpu_read(watchdog_ev));
    200}
    201
    202/**
    203 * hardlockup_detector_perf_disable - Disable the local event
    204 */
    205void hardlockup_detector_perf_disable(void)
    206{
    207	struct perf_event *event = this_cpu_read(watchdog_ev);
    208
    209	if (event) {
    210		perf_event_disable(event);
    211		this_cpu_write(watchdog_ev, NULL);
    212		this_cpu_write(dead_event, event);
    213		cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
    214		atomic_dec(&watchdog_cpus);
    215	}
    216}
    217
    218/**
    219 * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
    220 *
    221 * Called from lockup_detector_cleanup(). Serialized by the caller.
    222 */
    223void hardlockup_detector_perf_cleanup(void)
    224{
    225	int cpu;
    226
    227	for_each_cpu(cpu, &dead_events_mask) {
    228		struct perf_event *event = per_cpu(dead_event, cpu);
    229
    230		/*
    231		 * Required because for_each_cpu() reports  unconditionally
    232		 * CPU0 as set on UP kernels. Sigh.
    233		 */
    234		if (event)
    235			perf_event_release_kernel(event);
    236		per_cpu(dead_event, cpu) = NULL;
    237	}
    238	cpumask_clear(&dead_events_mask);
    239}
    240
    241/**
    242 * hardlockup_detector_perf_stop - Globally stop watchdog events
    243 *
    244 * Special interface for x86 to handle the perf HT bug.
    245 */
    246void __init hardlockup_detector_perf_stop(void)
    247{
    248	int cpu;
    249
    250	lockdep_assert_cpus_held();
    251
    252	for_each_online_cpu(cpu) {
    253		struct perf_event *event = per_cpu(watchdog_ev, cpu);
    254
    255		if (event)
    256			perf_event_disable(event);
    257	}
    258}
    259
    260/**
    261 * hardlockup_detector_perf_restart - Globally restart watchdog events
    262 *
    263 * Special interface for x86 to handle the perf HT bug.
    264 */
    265void __init hardlockup_detector_perf_restart(void)
    266{
    267	int cpu;
    268
    269	lockdep_assert_cpus_held();
    270
    271	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
    272		return;
    273
    274	for_each_online_cpu(cpu) {
    275		struct perf_event *event = per_cpu(watchdog_ev, cpu);
    276
    277		if (event)
    278			perf_event_enable(event);
    279	}
    280}
    281
    282/**
    283 * hardlockup_detector_perf_init - Probe whether NMI event is available at all
    284 */
    285int __init hardlockup_detector_perf_init(void)
    286{
    287	int ret = hardlockup_detector_event_create();
    288
    289	if (ret) {
    290		pr_info("Perf NMI watchdog permanently disabled\n");
    291	} else {
    292		perf_event_release_kernel(this_cpu_read(watchdog_ev));
    293		this_cpu_write(watchdog_ev, NULL);
    294	}
    295	return ret;
    296}