cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

common.c (12232B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3#include <linux/context_tracking.h>
      4#include <linux/entry-common.h>
      5#include <linux/resume_user_mode.h>
      6#include <linux/highmem.h>
      7#include <linux/jump_label.h>
      8#include <linux/livepatch.h>
      9#include <linux/audit.h>
     10#include <linux/tick.h>
     11
     12#include "common.h"
     13
     14#define CREATE_TRACE_POINTS
     15#include <trace/events/syscalls.h>
     16
     17/* See comment for enter_from_user_mode() in entry-common.h */
     18static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
     19{
     20	arch_enter_from_user_mode(regs);
     21	lockdep_hardirqs_off(CALLER_ADDR0);
     22
     23	CT_WARN_ON(ct_state() != CONTEXT_USER);
     24	user_exit_irqoff();
     25
     26	instrumentation_begin();
     27	trace_hardirqs_off_finish();
     28	instrumentation_end();
     29}
     30
     31void noinstr enter_from_user_mode(struct pt_regs *regs)
     32{
     33	__enter_from_user_mode(regs);
     34}
     35
     36static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
     37{
     38	if (unlikely(audit_context())) {
     39		unsigned long args[6];
     40
     41		syscall_get_arguments(current, regs, args);
     42		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
     43	}
     44}
     45
     46static long syscall_trace_enter(struct pt_regs *regs, long syscall,
     47				unsigned long work)
     48{
     49	long ret = 0;
     50
     51	/*
     52	 * Handle Syscall User Dispatch.  This must comes first, since
     53	 * the ABI here can be something that doesn't make sense for
     54	 * other syscall_work features.
     55	 */
     56	if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
     57		if (syscall_user_dispatch(regs))
     58			return -1L;
     59	}
     60
     61	/* Handle ptrace */
     62	if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
     63		ret = ptrace_report_syscall_entry(regs);
     64		if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
     65			return -1L;
     66	}
     67
     68	/* Do seccomp after ptrace, to catch any tracer changes. */
     69	if (work & SYSCALL_WORK_SECCOMP) {
     70		ret = __secure_computing(NULL);
     71		if (ret == -1L)
     72			return ret;
     73	}
     74
     75	/* Either of the above might have changed the syscall number */
     76	syscall = syscall_get_nr(current, regs);
     77
     78	if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
     79		trace_sys_enter(regs, syscall);
     80
     81	syscall_enter_audit(regs, syscall);
     82
     83	return ret ? : syscall;
     84}
     85
     86static __always_inline long
     87__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
     88{
     89	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
     90
     91	if (work & SYSCALL_WORK_ENTER)
     92		syscall = syscall_trace_enter(regs, syscall, work);
     93
     94	return syscall;
     95}
     96
     97long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
     98{
     99	return __syscall_enter_from_user_work(regs, syscall);
    100}
    101
    102noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
    103{
    104	long ret;
    105
    106	__enter_from_user_mode(regs);
    107
    108	instrumentation_begin();
    109	local_irq_enable();
    110	ret = __syscall_enter_from_user_work(regs, syscall);
    111	instrumentation_end();
    112
    113	return ret;
    114}
    115
    116noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
    117{
    118	__enter_from_user_mode(regs);
    119	instrumentation_begin();
    120	local_irq_enable();
    121	instrumentation_end();
    122}
    123
    124/* See comment for exit_to_user_mode() in entry-common.h */
    125static __always_inline void __exit_to_user_mode(void)
    126{
    127	instrumentation_begin();
    128	trace_hardirqs_on_prepare();
    129	lockdep_hardirqs_on_prepare();
    130	instrumentation_end();
    131
    132	user_enter_irqoff();
    133	arch_exit_to_user_mode();
    134	lockdep_hardirqs_on(CALLER_ADDR0);
    135}
    136
    137void noinstr exit_to_user_mode(void)
    138{
    139	__exit_to_user_mode();
    140}
    141
    142/* Workaround to allow gradual conversion of architecture code */
    143void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
    144
    145static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
    146					    unsigned long ti_work)
    147{
    148	/*
    149	 * Before returning to user space ensure that all pending work
    150	 * items have been completed.
    151	 */
    152	while (ti_work & EXIT_TO_USER_MODE_WORK) {
    153
    154		local_irq_enable_exit_to_user(ti_work);
    155
    156		if (ti_work & _TIF_NEED_RESCHED)
    157			schedule();
    158
    159		if (ti_work & _TIF_UPROBE)
    160			uprobe_notify_resume(regs);
    161
    162		if (ti_work & _TIF_PATCH_PENDING)
    163			klp_update_patch_state(current);
    164
    165		if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
    166			arch_do_signal_or_restart(regs);
    167
    168		if (ti_work & _TIF_NOTIFY_RESUME)
    169			resume_user_mode_work(regs);
    170
    171		/* Architecture specific TIF work */
    172		arch_exit_to_user_mode_work(regs, ti_work);
    173
    174		/*
    175		 * Disable interrupts and reevaluate the work flags as they
    176		 * might have changed while interrupts and preemption was
    177		 * enabled above.
    178		 */
    179		local_irq_disable_exit_to_user();
    180
    181		/* Check if any of the above work has queued a deferred wakeup */
    182		tick_nohz_user_enter_prepare();
    183
    184		ti_work = read_thread_flags();
    185	}
    186
    187	/* Return the latest work state for arch_exit_to_user_mode() */
    188	return ti_work;
    189}
    190
    191static void exit_to_user_mode_prepare(struct pt_regs *regs)
    192{
    193	unsigned long ti_work = read_thread_flags();
    194
    195	lockdep_assert_irqs_disabled();
    196
    197	/* Flush pending rcuog wakeup before the last need_resched() check */
    198	tick_nohz_user_enter_prepare();
    199
    200	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
    201		ti_work = exit_to_user_mode_loop(regs, ti_work);
    202
    203	arch_exit_to_user_mode_prepare(regs, ti_work);
    204
    205	/* Ensure that the address limit is intact and no locks are held */
    206	addr_limit_user_check();
    207	kmap_assert_nomap();
    208	lockdep_assert_irqs_disabled();
    209	lockdep_sys_exit();
    210}
    211
    212/*
    213 * If SYSCALL_EMU is set, then the only reason to report is when
    214 * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP).  This syscall
    215 * instruction has been already reported in syscall_enter_from_user_mode().
    216 */
    217static inline bool report_single_step(unsigned long work)
    218{
    219	if (work & SYSCALL_WORK_SYSCALL_EMU)
    220		return false;
    221
    222	return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
    223}
    224
    225static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
    226{
    227	bool step;
    228
    229	/*
    230	 * If the syscall was rolled back due to syscall user dispatching,
    231	 * then the tracers below are not invoked for the same reason as
    232	 * the entry side was not invoked in syscall_trace_enter(): The ABI
    233	 * of these syscalls is unknown.
    234	 */
    235	if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
    236		if (unlikely(current->syscall_dispatch.on_dispatch)) {
    237			current->syscall_dispatch.on_dispatch = false;
    238			return;
    239		}
    240	}
    241
    242	audit_syscall_exit(regs);
    243
    244	if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
    245		trace_sys_exit(regs, syscall_get_return_value(current, regs));
    246
    247	step = report_single_step(work);
    248	if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
    249		ptrace_report_syscall_exit(regs, step);
    250}
    251
    252/*
    253 * Syscall specific exit to user mode preparation. Runs with interrupts
    254 * enabled.
    255 */
    256static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
    257{
    258	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
    259	unsigned long nr = syscall_get_nr(current, regs);
    260
    261	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
    262
    263	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
    264		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
    265			local_irq_enable();
    266	}
    267
    268	rseq_syscall(regs);
    269
    270	/*
    271	 * Do one-time syscall specific work. If these work items are
    272	 * enabled, we want to run them exactly once per syscall exit with
    273	 * interrupts enabled.
    274	 */
    275	if (unlikely(work & SYSCALL_WORK_EXIT))
    276		syscall_exit_work(regs, work);
    277}
    278
    279static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
    280{
    281	syscall_exit_to_user_mode_prepare(regs);
    282	local_irq_disable_exit_to_user();
    283	exit_to_user_mode_prepare(regs);
    284}
    285
    286void syscall_exit_to_user_mode_work(struct pt_regs *regs)
    287{
    288	__syscall_exit_to_user_mode_work(regs);
    289}
    290
    291__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
    292{
    293	instrumentation_begin();
    294	__syscall_exit_to_user_mode_work(regs);
    295	instrumentation_end();
    296	__exit_to_user_mode();
    297}
    298
    299noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
    300{
    301	__enter_from_user_mode(regs);
    302}
    303
    304noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
    305{
    306	instrumentation_begin();
    307	exit_to_user_mode_prepare(regs);
    308	instrumentation_end();
    309	__exit_to_user_mode();
    310}
    311
    312noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
    313{
    314	irqentry_state_t ret = {
    315		.exit_rcu = false,
    316	};
    317
    318	if (user_mode(regs)) {
    319		irqentry_enter_from_user_mode(regs);
    320		return ret;
    321	}
    322
    323	/*
    324	 * If this entry hit the idle task invoke rcu_irq_enter() whether
    325	 * RCU is watching or not.
    326	 *
    327	 * Interrupts can nest when the first interrupt invokes softirq
    328	 * processing on return which enables interrupts.
    329	 *
    330	 * Scheduler ticks in the idle task can mark quiescent state and
    331	 * terminate a grace period, if and only if the timer interrupt is
    332	 * not nested into another interrupt.
    333	 *
    334	 * Checking for rcu_is_watching() here would prevent the nesting
    335	 * interrupt to invoke rcu_irq_enter(). If that nested interrupt is
    336	 * the tick then rcu_flavor_sched_clock_irq() would wrongfully
    337	 * assume that it is the first interrupt and eventually claim
    338	 * quiescent state and end grace periods prematurely.
    339	 *
    340	 * Unconditionally invoke rcu_irq_enter() so RCU state stays
    341	 * consistent.
    342	 *
    343	 * TINY_RCU does not support EQS, so let the compiler eliminate
    344	 * this part when enabled.
    345	 */
    346	if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
    347		/*
    348		 * If RCU is not watching then the same careful
    349		 * sequence vs. lockdep and tracing is required
    350		 * as in irqentry_enter_from_user_mode().
    351		 */
    352		lockdep_hardirqs_off(CALLER_ADDR0);
    353		rcu_irq_enter();
    354		instrumentation_begin();
    355		trace_hardirqs_off_finish();
    356		instrumentation_end();
    357
    358		ret.exit_rcu = true;
    359		return ret;
    360	}
    361
    362	/*
    363	 * If RCU is watching then RCU only wants to check whether it needs
    364	 * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
    365	 * already contains a warning when RCU is not watching, so no point
    366	 * in having another one here.
    367	 */
    368	lockdep_hardirqs_off(CALLER_ADDR0);
    369	instrumentation_begin();
    370	rcu_irq_enter_check_tick();
    371	trace_hardirqs_off_finish();
    372	instrumentation_end();
    373
    374	return ret;
    375}
    376
    377void raw_irqentry_exit_cond_resched(void)
    378{
    379	if (!preempt_count()) {
    380		/* Sanity check RCU and thread stack */
    381		rcu_irq_exit_check_preempt();
    382		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
    383			WARN_ON_ONCE(!on_thread_stack());
    384		if (need_resched())
    385			preempt_schedule_irq();
    386	}
    387}
    388#ifdef CONFIG_PREEMPT_DYNAMIC
    389#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
    390DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
    391#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
    392DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
    393void dynamic_irqentry_exit_cond_resched(void)
    394{
    395	if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
    396		return;
    397	raw_irqentry_exit_cond_resched();
    398}
    399#endif
    400#endif
    401
    402noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
    403{
    404	lockdep_assert_irqs_disabled();
    405
    406	/* Check whether this returns to user mode */
    407	if (user_mode(regs)) {
    408		irqentry_exit_to_user_mode(regs);
    409	} else if (!regs_irqs_disabled(regs)) {
    410		/*
    411		 * If RCU was not watching on entry this needs to be done
    412		 * carefully and needs the same ordering of lockdep/tracing
    413		 * and RCU as the return to user mode path.
    414		 */
    415		if (state.exit_rcu) {
    416			instrumentation_begin();
    417			/* Tell the tracer that IRET will enable interrupts */
    418			trace_hardirqs_on_prepare();
    419			lockdep_hardirqs_on_prepare();
    420			instrumentation_end();
    421			rcu_irq_exit();
    422			lockdep_hardirqs_on(CALLER_ADDR0);
    423			return;
    424		}
    425
    426		instrumentation_begin();
    427		if (IS_ENABLED(CONFIG_PREEMPTION))
    428			irqentry_exit_cond_resched();
    429
    430		/* Covers both tracing and lockdep */
    431		trace_hardirqs_on();
    432		instrumentation_end();
    433	} else {
    434		/*
    435		 * IRQ flags state is correct already. Just tell RCU if it
    436		 * was not watching on entry.
    437		 */
    438		if (state.exit_rcu)
    439			rcu_irq_exit();
    440	}
    441}
    442
    443irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
    444{
    445	irqentry_state_t irq_state;
    446
    447	irq_state.lockdep = lockdep_hardirqs_enabled();
    448
    449	__nmi_enter();
    450	lockdep_hardirqs_off(CALLER_ADDR0);
    451	lockdep_hardirq_enter();
    452	rcu_nmi_enter();
    453
    454	instrumentation_begin();
    455	trace_hardirqs_off_finish();
    456	ftrace_nmi_enter();
    457	instrumentation_end();
    458
    459	return irq_state;
    460}
    461
    462void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
    463{
    464	instrumentation_begin();
    465	ftrace_nmi_exit();
    466	if (irq_state.lockdep) {
    467		trace_hardirqs_on_prepare();
    468		lockdep_hardirqs_on_prepare();
    469	}
    470	instrumentation_end();
    471
    472	rcu_nmi_exit();
    473	lockdep_hardirq_exit();
    474	if (irq_state.lockdep)
    475		lockdep_hardirqs_on(CALLER_ADDR0);
    476	__nmi_exit();
    477}