cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

preempt.h (12825B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#ifndef __LINUX_PREEMPT_H
      3#define __LINUX_PREEMPT_H
      4
      5/*
      6 * include/linux/preempt.h - macros for accessing and manipulating
      7 * preempt_count (used for kernel preemption, interrupt count, etc.)
      8 */
      9
     10#include <linux/linkage.h>
     11#include <linux/list.h>
     12
     13/*
     14 * We put the hardirq and softirq counter into the preemption
     15 * counter. The bitmask has the following meaning:
     16 *
     17 * - bits 0-7 are the preemption count (max preemption depth: 256)
     18 * - bits 8-15 are the softirq count (max # of softirqs: 256)
     19 *
     20 * The hardirq count could in theory be the same as the number of
     21 * interrupts in the system, but we run all interrupt handlers with
     22 * interrupts disabled, so we cannot have nesting interrupts. Though
     23 * there are a few palaeontologic drivers which reenable interrupts in
     24 * the handler, so we need more than one bit here.
     25 *
     26 *         PREEMPT_MASK:	0x000000ff
     27 *         SOFTIRQ_MASK:	0x0000ff00
     28 *         HARDIRQ_MASK:	0x000f0000
     29 *             NMI_MASK:	0x00f00000
     30 * PREEMPT_NEED_RESCHED:	0x80000000
     31 */
     32#define PREEMPT_BITS	8
     33#define SOFTIRQ_BITS	8
     34#define HARDIRQ_BITS	4
     35#define NMI_BITS	4
     36
     37#define PREEMPT_SHIFT	0
     38#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
     39#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
     40#define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
     41
     42#define __IRQ_MASK(x)	((1UL << (x))-1)
     43
     44#define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
     45#define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
     46#define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
     47#define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
     48
     49#define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
     50#define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
     51#define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
     52#define NMI_OFFSET	(1UL << NMI_SHIFT)
     53
     54#define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
     55
     56#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
     57
     58/*
     59 * Disable preemption until the scheduler is running -- use an unconditional
     60 * value so that it also works on !PREEMPT_COUNT kernels.
     61 *
     62 * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
     63 */
     64#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
     65
     66/*
     67 * Initial preempt_count value; reflects the preempt_count schedule invariant
     68 * which states that during context switches:
     69 *
     70 *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
     71 *
     72 * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
     73 * Note: See finish_task_switch().
     74 */
     75#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
     76
     77/* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
     78#include <asm/preempt.h>
     79
     80/**
     81 * interrupt_context_level - return interrupt context level
     82 *
     83 * Returns the current interrupt context level.
     84 *  0 - normal context
     85 *  1 - softirq context
     86 *  2 - hardirq context
     87 *  3 - NMI context
     88 */
     89static __always_inline unsigned char interrupt_context_level(void)
     90{
     91	unsigned long pc = preempt_count();
     92	unsigned char level = 0;
     93
     94	level += !!(pc & (NMI_MASK));
     95	level += !!(pc & (NMI_MASK | HARDIRQ_MASK));
     96	level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
     97
     98	return level;
     99}
    100
    101#define nmi_count()	(preempt_count() & NMI_MASK)
    102#define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
    103#ifdef CONFIG_PREEMPT_RT
    104# define softirq_count()	(current->softirq_disable_cnt & SOFTIRQ_MASK)
    105#else
    106# define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
    107#endif
    108#define irq_count()	(nmi_count() | hardirq_count() | softirq_count())
    109
    110/*
    111 * Macros to retrieve the current execution context:
    112 *
    113 * in_nmi()		- We're in NMI context
    114 * in_hardirq()		- We're in hard IRQ context
    115 * in_serving_softirq()	- We're in softirq context
    116 * in_task()		- We're in task context
    117 */
    118#define in_nmi()		(nmi_count())
    119#define in_hardirq()		(hardirq_count())
    120#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
    121#define in_task()		(!(in_nmi() | in_hardirq() | in_serving_softirq()))
    122
    123/*
    124 * The following macros are deprecated and should not be used in new code:
    125 * in_irq()       - Obsolete version of in_hardirq()
    126 * in_softirq()   - We have BH disabled, or are processing softirqs
    127 * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
    128 */
    129#define in_irq()		(hardirq_count())
    130#define in_softirq()		(softirq_count())
    131#define in_interrupt()		(irq_count())
    132
    133/*
    134 * The preempt_count offset after preempt_disable();
    135 */
    136#if defined(CONFIG_PREEMPT_COUNT)
    137# define PREEMPT_DISABLE_OFFSET	PREEMPT_OFFSET
    138#else
    139# define PREEMPT_DISABLE_OFFSET	0
    140#endif
    141
    142/*
    143 * The preempt_count offset after spin_lock()
    144 */
    145#if !defined(CONFIG_PREEMPT_RT)
    146#define PREEMPT_LOCK_OFFSET		PREEMPT_DISABLE_OFFSET
    147#else
    148/* Locks on RT do not disable preemption */
    149#define PREEMPT_LOCK_OFFSET		0
    150#endif
    151
    152/*
    153 * The preempt_count offset needed for things like:
    154 *
    155 *  spin_lock_bh()
    156 *
    157 * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and
    158 * softirqs, such that unlock sequences of:
    159 *
    160 *  spin_unlock();
    161 *  local_bh_enable();
    162 *
    163 * Work as expected.
    164 */
    165#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET)
    166
    167/*
    168 * Are we running in atomic context?  WARNING: this macro cannot
    169 * always detect atomic context; in particular, it cannot know about
    170 * held spinlocks in non-preemptible kernels.  Thus it should not be
    171 * used in the general case to determine whether sleeping is possible.
    172 * Do not use in_atomic() in driver code.
    173 */
    174#define in_atomic()	(preempt_count() != 0)
    175
    176/*
    177 * Check whether we were atomic before we did preempt_disable():
    178 * (used by the scheduler)
    179 */
    180#define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
    181
    182#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
    183extern void preempt_count_add(int val);
    184extern void preempt_count_sub(int val);
    185#define preempt_count_dec_and_test() \
    186	({ preempt_count_sub(1); should_resched(0); })
    187#else
    188#define preempt_count_add(val)	__preempt_count_add(val)
    189#define preempt_count_sub(val)	__preempt_count_sub(val)
    190#define preempt_count_dec_and_test() __preempt_count_dec_and_test()
    191#endif
    192
    193#define __preempt_count_inc() __preempt_count_add(1)
    194#define __preempt_count_dec() __preempt_count_sub(1)
    195
    196#define preempt_count_inc() preempt_count_add(1)
    197#define preempt_count_dec() preempt_count_sub(1)
    198
    199#ifdef CONFIG_PREEMPT_COUNT
    200
    201#define preempt_disable() \
    202do { \
    203	preempt_count_inc(); \
    204	barrier(); \
    205} while (0)
    206
    207#define sched_preempt_enable_no_resched() \
    208do { \
    209	barrier(); \
    210	preempt_count_dec(); \
    211} while (0)
    212
    213#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
    214
    215#define preemptible()	(preempt_count() == 0 && !irqs_disabled())
    216
    217#ifdef CONFIG_PREEMPTION
    218#define preempt_enable() \
    219do { \
    220	barrier(); \
    221	if (unlikely(preempt_count_dec_and_test())) \
    222		__preempt_schedule(); \
    223} while (0)
    224
    225#define preempt_enable_notrace() \
    226do { \
    227	barrier(); \
    228	if (unlikely(__preempt_count_dec_and_test())) \
    229		__preempt_schedule_notrace(); \
    230} while (0)
    231
    232#define preempt_check_resched() \
    233do { \
    234	if (should_resched(0)) \
    235		__preempt_schedule(); \
    236} while (0)
    237
    238#else /* !CONFIG_PREEMPTION */
    239#define preempt_enable() \
    240do { \
    241	barrier(); \
    242	preempt_count_dec(); \
    243} while (0)
    244
    245#define preempt_enable_notrace() \
    246do { \
    247	barrier(); \
    248	__preempt_count_dec(); \
    249} while (0)
    250
    251#define preempt_check_resched() do { } while (0)
    252#endif /* CONFIG_PREEMPTION */
    253
    254#define preempt_disable_notrace() \
    255do { \
    256	__preempt_count_inc(); \
    257	barrier(); \
    258} while (0)
    259
    260#define preempt_enable_no_resched_notrace() \
    261do { \
    262	barrier(); \
    263	__preempt_count_dec(); \
    264} while (0)
    265
    266#else /* !CONFIG_PREEMPT_COUNT */
    267
    268/*
    269 * Even if we don't have any preemption, we need preempt disable/enable
    270 * to be barriers, so that we don't have things like get_user/put_user
    271 * that can cause faults and scheduling migrate into our preempt-protected
    272 * region.
    273 */
    274#define preempt_disable()			barrier()
    275#define sched_preempt_enable_no_resched()	barrier()
    276#define preempt_enable_no_resched()		barrier()
    277#define preempt_enable()			barrier()
    278#define preempt_check_resched()			do { } while (0)
    279
    280#define preempt_disable_notrace()		barrier()
    281#define preempt_enable_no_resched_notrace()	barrier()
    282#define preempt_enable_notrace()		barrier()
    283#define preemptible()				0
    284
    285#endif /* CONFIG_PREEMPT_COUNT */
    286
    287#ifdef MODULE
    288/*
    289 * Modules have no business playing preemption tricks.
    290 */
    291#undef sched_preempt_enable_no_resched
    292#undef preempt_enable_no_resched
    293#undef preempt_enable_no_resched_notrace
    294#undef preempt_check_resched
    295#endif
    296
    297#define preempt_set_need_resched() \
    298do { \
    299	set_preempt_need_resched(); \
    300} while (0)
    301#define preempt_fold_need_resched() \
    302do { \
    303	if (tif_need_resched()) \
    304		set_preempt_need_resched(); \
    305} while (0)
    306
    307#ifdef CONFIG_PREEMPT_NOTIFIERS
    308
    309struct preempt_notifier;
    310
    311/**
    312 * preempt_ops - notifiers called when a task is preempted and rescheduled
    313 * @sched_in: we're about to be rescheduled:
    314 *    notifier: struct preempt_notifier for the task being scheduled
    315 *    cpu:  cpu we're scheduled on
    316 * @sched_out: we've just been preempted
    317 *    notifier: struct preempt_notifier for the task being preempted
    318 *    next: the task that's kicking us out
    319 *
    320 * Please note that sched_in and out are called under different
    321 * contexts.  sched_out is called with rq lock held and irq disabled
    322 * while sched_in is called without rq lock and irq enabled.  This
    323 * difference is intentional and depended upon by its users.
    324 */
    325struct preempt_ops {
    326	void (*sched_in)(struct preempt_notifier *notifier, int cpu);
    327	void (*sched_out)(struct preempt_notifier *notifier,
    328			  struct task_struct *next);
    329};
    330
    331/**
    332 * preempt_notifier - key for installing preemption notifiers
    333 * @link: internal use
    334 * @ops: defines the notifier functions to be called
    335 *
    336 * Usually used in conjunction with container_of().
    337 */
    338struct preempt_notifier {
    339	struct hlist_node link;
    340	struct preempt_ops *ops;
    341};
    342
    343void preempt_notifier_inc(void);
    344void preempt_notifier_dec(void);
    345void preempt_notifier_register(struct preempt_notifier *notifier);
    346void preempt_notifier_unregister(struct preempt_notifier *notifier);
    347
    348static inline void preempt_notifier_init(struct preempt_notifier *notifier,
    349				     struct preempt_ops *ops)
    350{
    351	INIT_HLIST_NODE(&notifier->link);
    352	notifier->ops = ops;
    353}
    354
    355#endif
    356
    357#ifdef CONFIG_SMP
    358
    359/*
    360 * Migrate-Disable and why it is undesired.
    361 *
    362 * When a preempted task becomes elegible to run under the ideal model (IOW it
    363 * becomes one of the M highest priority tasks), it might still have to wait
    364 * for the preemptee's migrate_disable() section to complete. Thereby suffering
    365 * a reduction in bandwidth in the exact duration of the migrate_disable()
    366 * section.
    367 *
    368 * Per this argument, the change from preempt_disable() to migrate_disable()
    369 * gets us:
    370 *
    371 * - a higher priority tasks gains reduced wake-up latency; with preempt_disable()
    372 *   it would have had to wait for the lower priority task.
    373 *
    374 * - a lower priority tasks; which under preempt_disable() could've instantly
    375 *   migrated away when another CPU becomes available, is now constrained
    376 *   by the ability to push the higher priority task away, which might itself be
    377 *   in a migrate_disable() section, reducing it's available bandwidth.
    378 *
    379 * IOW it trades latency / moves the interference term, but it stays in the
    380 * system, and as long as it remains unbounded, the system is not fully
    381 * deterministic.
    382 *
    383 *
    384 * The reason we have it anyway.
    385 *
    386 * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
    387 * number of primitives into becoming preemptible, they would also allow
    388 * migration. This turns out to break a bunch of per-cpu usage. To this end,
    389 * all these primitives employ migirate_disable() to restore this implicit
    390 * assumption.
    391 *
    392 * This is a 'temporary' work-around at best. The correct solution is getting
    393 * rid of the above assumptions and reworking the code to employ explicit
    394 * per-cpu locking or short preempt-disable regions.
    395 *
    396 * The end goal must be to get rid of migrate_disable(), alternatively we need
    397 * a schedulability theory that does not depend on abritrary migration.
    398 *
    399 *
    400 * Notes on the implementation.
    401 *
    402 * The implementation is particularly tricky since existing code patterns
    403 * dictate neither migrate_disable() nor migrate_enable() is allowed to block.
    404 * This means that it cannot use cpus_read_lock() to serialize against hotplug,
    405 * nor can it easily migrate itself into a pending affinity mask change on
    406 * migrate_enable().
    407 *
    408 *
    409 * Note: even non-work-conserving schedulers like semi-partitioned depends on
    410 *       migration, so migrate_disable() is not only a problem for
    411 *       work-conserving schedulers.
    412 *
    413 */
    414extern void migrate_disable(void);
    415extern void migrate_enable(void);
    416
    417#else
    418
    419static inline void migrate_disable(void) { }
    420static inline void migrate_enable(void) { }
    421
    422#endif /* CONFIG_SMP */
    423
    424#endif /* __LINUX_PREEMPT_H */