cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

tick-broadcast.c (30978B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * This file contains functions which emulate a local clock-event
      4 * device via a broadcast event source.
      5 *
      6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
      7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
      8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
      9 */
     10#include <linux/cpu.h>
     11#include <linux/err.h>
     12#include <linux/hrtimer.h>
     13#include <linux/interrupt.h>
     14#include <linux/percpu.h>
     15#include <linux/profile.h>
     16#include <linux/sched.h>
     17#include <linux/smp.h>
     18#include <linux/module.h>
     19
     20#include "tick-internal.h"
     21
     22/*
     23 * Broadcast support for broken x86 hardware, where the local apic
     24 * timer stops in C3 state.
     25 */
     26
     27static struct tick_device tick_broadcast_device;
     28static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly;
     29static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly;
     30static cpumask_var_t tmpmask __cpumask_var_read_mostly;
     31static int tick_broadcast_forced;
     32
     33static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
     34
     35#ifdef CONFIG_TICK_ONESHOT
     36static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
     37
     38static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
     39static void tick_broadcast_clear_oneshot(int cpu);
     40static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
     41# ifdef CONFIG_HOTPLUG_CPU
     42static void tick_broadcast_oneshot_offline(unsigned int cpu);
     43# endif
     44#else
     45static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
     46static inline void tick_broadcast_clear_oneshot(int cpu) { }
     47static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
     48# ifdef CONFIG_HOTPLUG_CPU
     49static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
     50# endif
     51#endif
     52
     53/*
     54 * Debugging: see timer_list.c
     55 */
     56struct tick_device *tick_get_broadcast_device(void)
     57{
     58	return &tick_broadcast_device;
     59}
     60
     61struct cpumask *tick_get_broadcast_mask(void)
     62{
     63	return tick_broadcast_mask;
     64}
     65
     66static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu);
     67
     68const struct clock_event_device *tick_get_wakeup_device(int cpu)
     69{
     70	return tick_get_oneshot_wakeup_device(cpu);
     71}
     72
     73/*
     74 * Start the device in periodic mode
     75 */
     76static void tick_broadcast_start_periodic(struct clock_event_device *bc)
     77{
     78	if (bc)
     79		tick_setup_periodic(bc, 1);
     80}
     81
     82/*
     83 * Check, if the device can be utilized as broadcast device:
     84 */
     85static bool tick_check_broadcast_device(struct clock_event_device *curdev,
     86					struct clock_event_device *newdev)
     87{
     88	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
     89	    (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
     90	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
     91		return false;
     92
     93	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
     94	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
     95		return false;
     96
     97	return !curdev || newdev->rating > curdev->rating;
     98}
     99
    100#ifdef CONFIG_TICK_ONESHOT
    101static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
    102{
    103	return per_cpu(tick_oneshot_wakeup_device, cpu);
    104}
    105
    106static void tick_oneshot_wakeup_handler(struct clock_event_device *wd)
    107{
    108	/*
    109	 * If we woke up early and the tick was reprogrammed in the
    110	 * meantime then this may be spurious but harmless.
    111	 */
    112	tick_receive_broadcast();
    113}
    114
    115static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
    116					   int cpu)
    117{
    118	struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu);
    119
    120	if (!newdev)
    121		goto set_device;
    122
    123	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
    124	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
    125		 return false;
    126
    127	if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
    128	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
    129		return false;
    130
    131	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
    132		return false;
    133
    134	if (curdev && newdev->rating <= curdev->rating)
    135		return false;
    136
    137	if (!try_module_get(newdev->owner))
    138		return false;
    139
    140	newdev->event_handler = tick_oneshot_wakeup_handler;
    141set_device:
    142	clockevents_exchange_device(curdev, newdev);
    143	per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
    144	return true;
    145}
    146#else
    147static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
    148{
    149	return NULL;
    150}
    151
    152static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
    153					   int cpu)
    154{
    155	return false;
    156}
    157#endif
    158
    159/*
    160 * Conditionally install/replace broadcast device
    161 */
    162void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
    163{
    164	struct clock_event_device *cur = tick_broadcast_device.evtdev;
    165
    166	if (tick_set_oneshot_wakeup_device(dev, cpu))
    167		return;
    168
    169	if (!tick_check_broadcast_device(cur, dev))
    170		return;
    171
    172	if (!try_module_get(dev->owner))
    173		return;
    174
    175	clockevents_exchange_device(cur, dev);
    176	if (cur)
    177		cur->event_handler = clockevents_handle_noop;
    178	tick_broadcast_device.evtdev = dev;
    179	if (!cpumask_empty(tick_broadcast_mask))
    180		tick_broadcast_start_periodic(dev);
    181
    182	if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
    183		return;
    184
    185	/*
    186	 * If the system already runs in oneshot mode, switch the newly
    187	 * registered broadcast device to oneshot mode explicitly.
    188	 */
    189	if (tick_broadcast_oneshot_active()) {
    190		tick_broadcast_switch_to_oneshot();
    191		return;
    192	}
    193
    194	/*
    195	 * Inform all cpus about this. We might be in a situation
    196	 * where we did not switch to oneshot mode because the per cpu
    197	 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
    198	 * of a oneshot capable broadcast device. Without that
    199	 * notification the systems stays stuck in periodic mode
    200	 * forever.
    201	 */
    202	tick_clock_notify();
    203}
    204
    205/*
    206 * Check, if the device is the broadcast device
    207 */
    208int tick_is_broadcast_device(struct clock_event_device *dev)
    209{
    210	return (dev && tick_broadcast_device.evtdev == dev);
    211}
    212
    213int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
    214{
    215	int ret = -ENODEV;
    216
    217	if (tick_is_broadcast_device(dev)) {
    218		raw_spin_lock(&tick_broadcast_lock);
    219		ret = __clockevents_update_freq(dev, freq);
    220		raw_spin_unlock(&tick_broadcast_lock);
    221	}
    222	return ret;
    223}
    224
    225
    226static void err_broadcast(const struct cpumask *mask)
    227{
    228	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
    229}
    230
    231static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
    232{
    233	if (!dev->broadcast)
    234		dev->broadcast = tick_broadcast;
    235	if (!dev->broadcast) {
    236		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
    237			     dev->name);
    238		dev->broadcast = err_broadcast;
    239	}
    240}
    241
    242/*
    243 * Check, if the device is dysfunctional and a placeholder, which
    244 * needs to be handled by the broadcast device.
    245 */
    246int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
    247{
    248	struct clock_event_device *bc = tick_broadcast_device.evtdev;
    249	unsigned long flags;
    250	int ret = 0;
    251
    252	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
    253
    254	/*
    255	 * Devices might be registered with both periodic and oneshot
    256	 * mode disabled. This signals, that the device needs to be
    257	 * operated from the broadcast device and is a placeholder for
    258	 * the cpu local device.
    259	 */
    260	if (!tick_device_is_functional(dev)) {
    261		dev->event_handler = tick_handle_periodic;
    262		tick_device_setup_broadcast_func(dev);
    263		cpumask_set_cpu(cpu, tick_broadcast_mask);
    264		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
    265			tick_broadcast_start_periodic(bc);
    266		else
    267			tick_broadcast_setup_oneshot(bc);
    268		ret = 1;
    269	} else {
    270		/*
    271		 * Clear the broadcast bit for this cpu if the
    272		 * device is not power state affected.
    273		 */
    274		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
    275			cpumask_clear_cpu(cpu, tick_broadcast_mask);
    276		else
    277			tick_device_setup_broadcast_func(dev);
    278
    279		/*
    280		 * Clear the broadcast bit if the CPU is not in
    281		 * periodic broadcast on state.
    282		 */
    283		if (!cpumask_test_cpu(cpu, tick_broadcast_on))
    284			cpumask_clear_cpu(cpu, tick_broadcast_mask);
    285
    286		switch (tick_broadcast_device.mode) {
    287		case TICKDEV_MODE_ONESHOT:
    288			/*
    289			 * If the system is in oneshot mode we can
    290			 * unconditionally clear the oneshot mask bit,
    291			 * because the CPU is running and therefore
    292			 * not in an idle state which causes the power
    293			 * state affected device to stop. Let the
    294			 * caller initialize the device.
    295			 */
    296			tick_broadcast_clear_oneshot(cpu);
    297			ret = 0;
    298			break;
    299
    300		case TICKDEV_MODE_PERIODIC:
    301			/*
    302			 * If the system is in periodic mode, check
    303			 * whether the broadcast device can be
    304			 * switched off now.
    305			 */
    306			if (cpumask_empty(tick_broadcast_mask) && bc)
    307				clockevents_shutdown(bc);
    308			/*
    309			 * If we kept the cpu in the broadcast mask,
    310			 * tell the caller to leave the per cpu device
    311			 * in shutdown state. The periodic interrupt
    312			 * is delivered by the broadcast device, if
    313			 * the broadcast device exists and is not
    314			 * hrtimer based.
    315			 */
    316			if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
    317				ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
    318			break;
    319		default:
    320			break;
    321		}
    322	}
    323	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
    324	return ret;
    325}
    326
    327int tick_receive_broadcast(void)
    328{
    329	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
    330	struct clock_event_device *evt = td->evtdev;
    331
    332	if (!evt)
    333		return -ENODEV;
    334
    335	if (!evt->event_handler)
    336		return -EINVAL;
    337
    338	evt->event_handler(evt);
    339	return 0;
    340}
    341
    342/*
    343 * Broadcast the event to the cpus, which are set in the mask (mangled).
    344 */
    345static bool tick_do_broadcast(struct cpumask *mask)
    346{
    347	int cpu = smp_processor_id();
    348	struct tick_device *td;
    349	bool local = false;
    350
    351	/*
    352	 * Check, if the current cpu is in the mask
    353	 */
    354	if (cpumask_test_cpu(cpu, mask)) {
    355		struct clock_event_device *bc = tick_broadcast_device.evtdev;
    356
    357		cpumask_clear_cpu(cpu, mask);
    358		/*
    359		 * We only run the local handler, if the broadcast
    360		 * device is not hrtimer based. Otherwise we run into
    361		 * a hrtimer recursion.
    362		 *
    363		 * local timer_interrupt()
    364		 *   local_handler()
    365		 *     expire_hrtimers()
    366		 *       bc_handler()
    367		 *         local_handler()
    368		 *	     expire_hrtimers()
    369		 */
    370		local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
    371	}
    372
    373	if (!cpumask_empty(mask)) {
    374		/*
    375		 * It might be necessary to actually check whether the devices
    376		 * have different broadcast functions. For now, just use the
    377		 * one of the first device. This works as long as we have this
    378		 * misfeature only on x86 (lapic)
    379		 */
    380		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
    381		td->evtdev->broadcast(mask);
    382	}
    383	return local;
    384}
    385
    386/*
    387 * Periodic broadcast:
    388 * - invoke the broadcast handlers
    389 */
    390static bool tick_do_periodic_broadcast(void)
    391{
    392	cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
    393	return tick_do_broadcast(tmpmask);
    394}
    395
    396/*
    397 * Event handler for periodic broadcast ticks
    398 */
    399static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
    400{
    401	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
    402	bool bc_local;
    403
    404	raw_spin_lock(&tick_broadcast_lock);
    405
    406	/* Handle spurious interrupts gracefully */
    407	if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
    408		raw_spin_unlock(&tick_broadcast_lock);
    409		return;
    410	}
    411
    412	bc_local = tick_do_periodic_broadcast();
    413
    414	if (clockevent_state_oneshot(dev)) {
    415		ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC);
    416
    417		clockevents_program_event(dev, next, true);
    418	}
    419	raw_spin_unlock(&tick_broadcast_lock);
    420
    421	/*
    422	 * We run the handler of the local cpu after dropping
    423	 * tick_broadcast_lock because the handler might deadlock when
    424	 * trying to switch to oneshot mode.
    425	 */
    426	if (bc_local)
    427		td->evtdev->event_handler(td->evtdev);
    428}
    429
    430/**
    431 * tick_broadcast_control - Enable/disable or force broadcast mode
    432 * @mode:	The selected broadcast mode
    433 *
    434 * Called when the system enters a state where affected tick devices
    435 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
    436 */
    437void tick_broadcast_control(enum tick_broadcast_mode mode)
    438{
    439	struct clock_event_device *bc, *dev;
    440	struct tick_device *td;
    441	int cpu, bc_stopped;
    442	unsigned long flags;
    443
    444	/* Protects also the local clockevent device. */
    445	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
    446	td = this_cpu_ptr(&tick_cpu_device);
    447	dev = td->evtdev;
    448
    449	/*
    450	 * Is the device not affected by the powerstate ?
    451	 */
    452	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
    453		goto out;
    454
    455	if (!tick_device_is_functional(dev))
    456		goto out;
    457
    458	cpu = smp_processor_id();
    459	bc = tick_broadcast_device.evtdev;
    460	bc_stopped = cpumask_empty(tick_broadcast_mask);
    461
    462	switch (mode) {
    463	case TICK_BROADCAST_FORCE:
    464		tick_broadcast_forced = 1;
    465		fallthrough;
    466	case TICK_BROADCAST_ON:
    467		cpumask_set_cpu(cpu, tick_broadcast_on);
    468		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
    469			/*
    470			 * Only shutdown the cpu local device, if:
    471			 *
    472			 * - the broadcast device exists
    473			 * - the broadcast device is not a hrtimer based one
    474			 * - the broadcast device is in periodic mode to
    475			 *   avoid a hiccup during switch to oneshot mode
    476			 */
    477			if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
    478			    tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
    479				clockevents_shutdown(dev);
    480		}
    481		break;
    482
    483	case TICK_BROADCAST_OFF:
    484		if (tick_broadcast_forced)
    485			break;
    486		cpumask_clear_cpu(cpu, tick_broadcast_on);
    487		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
    488			if (tick_broadcast_device.mode ==
    489			    TICKDEV_MODE_PERIODIC)
    490				tick_setup_periodic(dev, 0);
    491		}
    492		break;
    493	}
    494
    495	if (bc) {
    496		if (cpumask_empty(tick_broadcast_mask)) {
    497			if (!bc_stopped)
    498				clockevents_shutdown(bc);
    499		} else if (bc_stopped) {
    500			if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
    501				tick_broadcast_start_periodic(bc);
    502			else
    503				tick_broadcast_setup_oneshot(bc);
    504		}
    505	}
    506out:
    507	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
    508}
    509EXPORT_SYMBOL_GPL(tick_broadcast_control);
    510
    511/*
    512 * Set the periodic handler depending on broadcast on/off
    513 */
    514void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
    515{
    516	if (!broadcast)
    517		dev->event_handler = tick_handle_periodic;
    518	else
    519		dev->event_handler = tick_handle_periodic_broadcast;
    520}
    521
    522#ifdef CONFIG_HOTPLUG_CPU
    523static void tick_shutdown_broadcast(void)
    524{
    525	struct clock_event_device *bc = tick_broadcast_device.evtdev;
    526
    527	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
    528		if (bc && cpumask_empty(tick_broadcast_mask))
    529			clockevents_shutdown(bc);
    530	}
    531}
    532
    533/*
    534 * Remove a CPU from broadcasting
    535 */
    536void tick_broadcast_offline(unsigned int cpu)
    537{
    538	raw_spin_lock(&tick_broadcast_lock);
    539	cpumask_clear_cpu(cpu, tick_broadcast_mask);
    540	cpumask_clear_cpu(cpu, tick_broadcast_on);
    541	tick_broadcast_oneshot_offline(cpu);
    542	tick_shutdown_broadcast();
    543	raw_spin_unlock(&tick_broadcast_lock);
    544}
    545
    546#endif
    547
    548void tick_suspend_broadcast(void)
    549{
    550	struct clock_event_device *bc;
    551	unsigned long flags;
    552
    553	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
    554
    555	bc = tick_broadcast_device.evtdev;
    556	if (bc)
    557		clockevents_shutdown(bc);
    558
    559	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
    560}
    561
    562/*
    563 * This is called from tick_resume_local() on a resuming CPU. That's
    564 * called from the core resume function, tick_unfreeze() and the magic XEN
    565 * resume hackery.
    566 *
    567 * In none of these cases the broadcast device mode can change and the
    568 * bit of the resuming CPU in the broadcast mask is safe as well.
    569 */
    570bool tick_resume_check_broadcast(void)
    571{
    572	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
    573		return false;
    574	else
    575		return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
    576}
    577
    578void tick_resume_broadcast(void)
    579{
    580	struct clock_event_device *bc;
    581	unsigned long flags;
    582
    583	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
    584
    585	bc = tick_broadcast_device.evtdev;
    586
    587	if (bc) {
    588		clockevents_tick_resume(bc);
    589
    590		switch (tick_broadcast_device.mode) {
    591		case TICKDEV_MODE_PERIODIC:
    592			if (!cpumask_empty(tick_broadcast_mask))
    593				tick_broadcast_start_periodic(bc);
    594			break;
    595		case TICKDEV_MODE_ONESHOT:
    596			if (!cpumask_empty(tick_broadcast_mask))
    597				tick_resume_broadcast_oneshot(bc);
    598			break;
    599		}
    600	}
    601	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
    602}
    603
    604#ifdef CONFIG_TICK_ONESHOT
    605
    606static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly;
    607static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly;
    608static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly;
    609
    610/*
    611 * Exposed for debugging: see timer_list.c
    612 */
    613struct cpumask *tick_get_broadcast_oneshot_mask(void)
    614{
    615	return tick_broadcast_oneshot_mask;
    616}
    617
    618/*
    619 * Called before going idle with interrupts disabled. Checks whether a
    620 * broadcast event from the other core is about to happen. We detected
    621 * that in tick_broadcast_oneshot_control(). The callsite can use this
    622 * to avoid a deep idle transition as we are about to get the
    623 * broadcast IPI right away.
    624 */
    625int tick_check_broadcast_expired(void)
    626{
    627	return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
    628}
    629
    630/*
    631 * Set broadcast interrupt affinity
    632 */
    633static void tick_broadcast_set_affinity(struct clock_event_device *bc,
    634					const struct cpumask *cpumask)
    635{
    636	if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
    637		return;
    638
    639	if (cpumask_equal(bc->cpumask, cpumask))
    640		return;
    641
    642	bc->cpumask = cpumask;
    643	irq_set_affinity(bc->irq, bc->cpumask);
    644}
    645
    646static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
    647				     ktime_t expires)
    648{
    649	if (!clockevent_state_oneshot(bc))
    650		clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
    651
    652	clockevents_program_event(bc, expires, 1);
    653	tick_broadcast_set_affinity(bc, cpumask_of(cpu));
    654}
    655
    656static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
    657{
    658	clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
    659}
    660
    661/*
    662 * Called from irq_enter() when idle was interrupted to reenable the
    663 * per cpu device.
    664 */
    665void tick_check_oneshot_broadcast_this_cpu(void)
    666{
    667	if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
    668		struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
    669
    670		/*
    671		 * We might be in the middle of switching over from
    672		 * periodic to oneshot. If the CPU has not yet
    673		 * switched over, leave the device alone.
    674		 */
    675		if (td->mode == TICKDEV_MODE_ONESHOT) {
    676			clockevents_switch_state(td->evtdev,
    677					      CLOCK_EVT_STATE_ONESHOT);
    678		}
    679	}
    680}
    681
    682/*
    683 * Handle oneshot mode broadcasting
    684 */
    685static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
    686{
    687	struct tick_device *td;
    688	ktime_t now, next_event;
    689	int cpu, next_cpu = 0;
    690	bool bc_local;
    691
    692	raw_spin_lock(&tick_broadcast_lock);
    693	dev->next_event = KTIME_MAX;
    694	next_event = KTIME_MAX;
    695	cpumask_clear(tmpmask);
    696	now = ktime_get();
    697	/* Find all expired events */
    698	for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
    699		/*
    700		 * Required for !SMP because for_each_cpu() reports
    701		 * unconditionally CPU0 as set on UP kernels.
    702		 */
    703		if (!IS_ENABLED(CONFIG_SMP) &&
    704		    cpumask_empty(tick_broadcast_oneshot_mask))
    705			break;
    706
    707		td = &per_cpu(tick_cpu_device, cpu);
    708		if (td->evtdev->next_event <= now) {
    709			cpumask_set_cpu(cpu, tmpmask);
    710			/*
    711			 * Mark the remote cpu in the pending mask, so
    712			 * it can avoid reprogramming the cpu local
    713			 * timer in tick_broadcast_oneshot_control().
    714			 */
    715			cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
    716		} else if (td->evtdev->next_event < next_event) {
    717			next_event = td->evtdev->next_event;
    718			next_cpu = cpu;
    719		}
    720	}
    721
    722	/*
    723	 * Remove the current cpu from the pending mask. The event is
    724	 * delivered immediately in tick_do_broadcast() !
    725	 */
    726	cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
    727
    728	/* Take care of enforced broadcast requests */
    729	cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
    730	cpumask_clear(tick_broadcast_force_mask);
    731
    732	/*
    733	 * Sanity check. Catch the case where we try to broadcast to
    734	 * offline cpus.
    735	 */
    736	if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
    737		cpumask_and(tmpmask, tmpmask, cpu_online_mask);
    738
    739	/*
    740	 * Wakeup the cpus which have an expired event.
    741	 */
    742	bc_local = tick_do_broadcast(tmpmask);
    743
    744	/*
    745	 * Two reasons for reprogram:
    746	 *
    747	 * - The global event did not expire any CPU local
    748	 * events. This happens in dyntick mode, as the maximum PIT
    749	 * delta is quite small.
    750	 *
    751	 * - There are pending events on sleeping CPUs which were not
    752	 * in the event mask
    753	 */
    754	if (next_event != KTIME_MAX)
    755		tick_broadcast_set_event(dev, next_cpu, next_event);
    756
    757	raw_spin_unlock(&tick_broadcast_lock);
    758
    759	if (bc_local) {
    760		td = this_cpu_ptr(&tick_cpu_device);
    761		td->evtdev->event_handler(td->evtdev);
    762	}
    763}
    764
    765static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
    766{
    767	if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
    768		return 0;
    769	if (bc->next_event == KTIME_MAX)
    770		return 0;
    771	return bc->bound_on == cpu ? -EBUSY : 0;
    772}
    773
    774static void broadcast_shutdown_local(struct clock_event_device *bc,
    775				     struct clock_event_device *dev)
    776{
    777	/*
    778	 * For hrtimer based broadcasting we cannot shutdown the cpu
    779	 * local device if our own event is the first one to expire or
    780	 * if we own the broadcast timer.
    781	 */
    782	if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
    783		if (broadcast_needs_cpu(bc, smp_processor_id()))
    784			return;
    785		if (dev->next_event < bc->next_event)
    786			return;
    787	}
    788	clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
    789}
    790
    791static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state,
    792					     struct tick_device *td,
    793					     int cpu)
    794{
    795	struct clock_event_device *bc, *dev = td->evtdev;
    796	int ret = 0;
    797	ktime_t now;
    798
    799	raw_spin_lock(&tick_broadcast_lock);
    800	bc = tick_broadcast_device.evtdev;
    801
    802	if (state == TICK_BROADCAST_ENTER) {
    803		/*
    804		 * If the current CPU owns the hrtimer broadcast
    805		 * mechanism, it cannot go deep idle and we do not add
    806		 * the CPU to the broadcast mask. We don't have to go
    807		 * through the EXIT path as the local timer is not
    808		 * shutdown.
    809		 */
    810		ret = broadcast_needs_cpu(bc, cpu);
    811		if (ret)
    812			goto out;
    813
    814		/*
    815		 * If the broadcast device is in periodic mode, we
    816		 * return.
    817		 */
    818		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
    819			/* If it is a hrtimer based broadcast, return busy */
    820			if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
    821				ret = -EBUSY;
    822			goto out;
    823		}
    824
    825		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
    826			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
    827
    828			/* Conditionally shut down the local timer. */
    829			broadcast_shutdown_local(bc, dev);
    830
    831			/*
    832			 * We only reprogram the broadcast timer if we
    833			 * did not mark ourself in the force mask and
    834			 * if the cpu local event is earlier than the
    835			 * broadcast event. If the current CPU is in
    836			 * the force mask, then we are going to be
    837			 * woken by the IPI right away; we return
    838			 * busy, so the CPU does not try to go deep
    839			 * idle.
    840			 */
    841			if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
    842				ret = -EBUSY;
    843			} else if (dev->next_event < bc->next_event) {
    844				tick_broadcast_set_event(bc, cpu, dev->next_event);
    845				/*
    846				 * In case of hrtimer broadcasts the
    847				 * programming might have moved the
    848				 * timer to this cpu. If yes, remove
    849				 * us from the broadcast mask and
    850				 * return busy.
    851				 */
    852				ret = broadcast_needs_cpu(bc, cpu);
    853				if (ret) {
    854					cpumask_clear_cpu(cpu,
    855						tick_broadcast_oneshot_mask);
    856				}
    857			}
    858		}
    859	} else {
    860		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
    861			clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
    862			/*
    863			 * The cpu which was handling the broadcast
    864			 * timer marked this cpu in the broadcast
    865			 * pending mask and fired the broadcast
    866			 * IPI. So we are going to handle the expired
    867			 * event anyway via the broadcast IPI
    868			 * handler. No need to reprogram the timer
    869			 * with an already expired event.
    870			 */
    871			if (cpumask_test_and_clear_cpu(cpu,
    872				       tick_broadcast_pending_mask))
    873				goto out;
    874
    875			/*
    876			 * Bail out if there is no next event.
    877			 */
    878			if (dev->next_event == KTIME_MAX)
    879				goto out;
    880			/*
    881			 * If the pending bit is not set, then we are
    882			 * either the CPU handling the broadcast
    883			 * interrupt or we got woken by something else.
    884			 *
    885			 * We are no longer in the broadcast mask, so
    886			 * if the cpu local expiry time is already
    887			 * reached, we would reprogram the cpu local
    888			 * timer with an already expired event.
    889			 *
    890			 * This can lead to a ping-pong when we return
    891			 * to idle and therefore rearm the broadcast
    892			 * timer before the cpu local timer was able
    893			 * to fire. This happens because the forced
    894			 * reprogramming makes sure that the event
    895			 * will happen in the future and depending on
    896			 * the min_delta setting this might be far
    897			 * enough out that the ping-pong starts.
    898			 *
    899			 * If the cpu local next_event has expired
    900			 * then we know that the broadcast timer
    901			 * next_event has expired as well and
    902			 * broadcast is about to be handled. So we
    903			 * avoid reprogramming and enforce that the
    904			 * broadcast handler, which did not run yet,
    905			 * will invoke the cpu local handler.
    906			 *
    907			 * We cannot call the handler directly from
    908			 * here, because we might be in a NOHZ phase
    909			 * and we did not go through the irq_enter()
    910			 * nohz fixups.
    911			 */
    912			now = ktime_get();
    913			if (dev->next_event <= now) {
    914				cpumask_set_cpu(cpu, tick_broadcast_force_mask);
    915				goto out;
    916			}
    917			/*
    918			 * We got woken by something else. Reprogram
    919			 * the cpu local timer device.
    920			 */
    921			tick_program_event(dev->next_event, 1);
    922		}
    923	}
    924out:
    925	raw_spin_unlock(&tick_broadcast_lock);
    926	return ret;
    927}
    928
    929static int tick_oneshot_wakeup_control(enum tick_broadcast_state state,
    930				       struct tick_device *td,
    931				       int cpu)
    932{
    933	struct clock_event_device *dev, *wd;
    934
    935	dev = td->evtdev;
    936	if (td->mode != TICKDEV_MODE_ONESHOT)
    937		return -EINVAL;
    938
    939	wd = tick_get_oneshot_wakeup_device(cpu);
    940	if (!wd)
    941		return -ENODEV;
    942
    943	switch (state) {
    944	case TICK_BROADCAST_ENTER:
    945		clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
    946		clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT);
    947		clockevents_program_event(wd, dev->next_event, 1);
    948		break;
    949	case TICK_BROADCAST_EXIT:
    950		/* We may have transitioned to oneshot mode while idle */
    951		if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT)
    952			return -ENODEV;
    953	}
    954
    955	return 0;
    956}
    957
    958int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
    959{
    960	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
    961	int cpu = smp_processor_id();
    962
    963	if (!tick_oneshot_wakeup_control(state, td, cpu))
    964		return 0;
    965
    966	if (tick_broadcast_device.evtdev)
    967		return ___tick_broadcast_oneshot_control(state, td, cpu);
    968
    969	/*
    970	 * If there is no broadcast or wakeup device, tell the caller not
    971	 * to go into deep idle.
    972	 */
    973	return -EBUSY;
    974}
    975
    976/*
    977 * Reset the one shot broadcast for a cpu
    978 *
    979 * Called with tick_broadcast_lock held
    980 */
    981static void tick_broadcast_clear_oneshot(int cpu)
    982{
    983	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
    984	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
    985}
    986
    987static void tick_broadcast_init_next_event(struct cpumask *mask,
    988					   ktime_t expires)
    989{
    990	struct tick_device *td;
    991	int cpu;
    992
    993	for_each_cpu(cpu, mask) {
    994		td = &per_cpu(tick_cpu_device, cpu);
    995		if (td->evtdev)
    996			td->evtdev->next_event = expires;
    997	}
    998}
    999
   1000static inline ktime_t tick_get_next_period(void)
   1001{
   1002	ktime_t next;
   1003
   1004	/*
   1005	 * Protect against concurrent updates (store /load tearing on
   1006	 * 32bit). It does not matter if the time is already in the
   1007	 * past. The broadcast device which is about to be programmed will
   1008	 * fire in any case.
   1009	 */
   1010	raw_spin_lock(&jiffies_lock);
   1011	next = tick_next_period;
   1012	raw_spin_unlock(&jiffies_lock);
   1013	return next;
   1014}
   1015
   1016/**
   1017 * tick_broadcast_setup_oneshot - setup the broadcast device
   1018 */
   1019static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
   1020{
   1021	int cpu = smp_processor_id();
   1022
   1023	if (!bc)
   1024		return;
   1025
   1026	/* Set it up only once ! */
   1027	if (bc->event_handler != tick_handle_oneshot_broadcast) {
   1028		int was_periodic = clockevent_state_periodic(bc);
   1029
   1030		bc->event_handler = tick_handle_oneshot_broadcast;
   1031
   1032		/*
   1033		 * We must be careful here. There might be other CPUs
   1034		 * waiting for periodic broadcast. We need to set the
   1035		 * oneshot_mask bits for those and program the
   1036		 * broadcast device to fire.
   1037		 */
   1038		cpumask_copy(tmpmask, tick_broadcast_mask);
   1039		cpumask_clear_cpu(cpu, tmpmask);
   1040		cpumask_or(tick_broadcast_oneshot_mask,
   1041			   tick_broadcast_oneshot_mask, tmpmask);
   1042
   1043		if (was_periodic && !cpumask_empty(tmpmask)) {
   1044			ktime_t nextevt = tick_get_next_period();
   1045
   1046			clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
   1047			tick_broadcast_init_next_event(tmpmask, nextevt);
   1048			tick_broadcast_set_event(bc, cpu, nextevt);
   1049		} else
   1050			bc->next_event = KTIME_MAX;
   1051	} else {
   1052		/*
   1053		 * The first cpu which switches to oneshot mode sets
   1054		 * the bit for all other cpus which are in the general
   1055		 * (periodic) broadcast mask. So the bit is set and
   1056		 * would prevent the first broadcast enter after this
   1057		 * to program the bc device.
   1058		 */
   1059		tick_broadcast_clear_oneshot(cpu);
   1060	}
   1061}
   1062
   1063/*
   1064 * Select oneshot operating mode for the broadcast device
   1065 */
   1066void tick_broadcast_switch_to_oneshot(void)
   1067{
   1068	struct clock_event_device *bc;
   1069	unsigned long flags;
   1070
   1071	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
   1072
   1073	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
   1074	bc = tick_broadcast_device.evtdev;
   1075	if (bc)
   1076		tick_broadcast_setup_oneshot(bc);
   1077
   1078	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
   1079}
   1080
   1081#ifdef CONFIG_HOTPLUG_CPU
   1082void hotplug_cpu__broadcast_tick_pull(int deadcpu)
   1083{
   1084	struct clock_event_device *bc;
   1085	unsigned long flags;
   1086
   1087	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
   1088	bc = tick_broadcast_device.evtdev;
   1089
   1090	if (bc && broadcast_needs_cpu(bc, deadcpu)) {
   1091		/* This moves the broadcast assignment to this CPU: */
   1092		clockevents_program_event(bc, bc->next_event, 1);
   1093	}
   1094	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
   1095}
   1096
   1097/*
   1098 * Remove a dying CPU from broadcasting
   1099 */
   1100static void tick_broadcast_oneshot_offline(unsigned int cpu)
   1101{
   1102	if (tick_get_oneshot_wakeup_device(cpu))
   1103		tick_set_oneshot_wakeup_device(NULL, cpu);
   1104
   1105	/*
   1106	 * Clear the broadcast masks for the dead cpu, but do not stop
   1107	 * the broadcast device!
   1108	 */
   1109	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
   1110	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
   1111	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
   1112}
   1113#endif
   1114
   1115/*
   1116 * Check, whether the broadcast device is in one shot mode
   1117 */
   1118int tick_broadcast_oneshot_active(void)
   1119{
   1120	return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
   1121}
   1122
   1123/*
   1124 * Check whether the broadcast device supports oneshot.
   1125 */
   1126bool tick_broadcast_oneshot_available(void)
   1127{
   1128	struct clock_event_device *bc = tick_broadcast_device.evtdev;
   1129
   1130	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
   1131}
   1132
   1133#else
   1134int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
   1135{
   1136	struct clock_event_device *bc = tick_broadcast_device.evtdev;
   1137
   1138	if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
   1139		return -EBUSY;
   1140
   1141	return 0;
   1142}
   1143#endif
   1144
   1145void __init tick_broadcast_init(void)
   1146{
   1147	zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
   1148	zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
   1149	zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
   1150#ifdef CONFIG_TICK_ONESHOT
   1151	zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
   1152	zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
   1153	zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
   1154#endif
   1155}