entry-common.h (16829B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef __LINUX_ENTRYCOMMON_H 3#define __LINUX_ENTRYCOMMON_H 4 5#include <linux/static_call_types.h> 6#include <linux/ptrace.h> 7#include <linux/syscalls.h> 8#include <linux/seccomp.h> 9#include <linux/sched.h> 10 11#include <asm/entry-common.h> 12 13/* 14 * Define dummy _TIF work flags if not defined by the architecture or for 15 * disabled functionality. 16 */ 17#ifndef _TIF_PATCH_PENDING 18# define _TIF_PATCH_PENDING (0) 19#endif 20 21#ifndef _TIF_UPROBE 22# define _TIF_UPROBE (0) 23#endif 24 25/* 26 * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() 27 */ 28#ifndef ARCH_SYSCALL_WORK_ENTER 29# define ARCH_SYSCALL_WORK_ENTER (0) 30#endif 31 32/* 33 * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() 34 */ 35#ifndef ARCH_SYSCALL_WORK_EXIT 36# define ARCH_SYSCALL_WORK_EXIT (0) 37#endif 38 39#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ 40 SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 41 SYSCALL_WORK_SYSCALL_TRACE | \ 42 SYSCALL_WORK_SYSCALL_EMU | \ 43 SYSCALL_WORK_SYSCALL_AUDIT | \ 44 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 45 ARCH_SYSCALL_WORK_ENTER) 46#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 47 SYSCALL_WORK_SYSCALL_TRACE | \ 48 SYSCALL_WORK_SYSCALL_AUDIT | \ 49 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 50 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ 51 ARCH_SYSCALL_WORK_EXIT) 52 53/* 54 * TIF flags handled in exit_to_user_mode_loop() 55 */ 56#ifndef ARCH_EXIT_TO_USER_MODE_WORK 57# define ARCH_EXIT_TO_USER_MODE_WORK (0) 58#endif 59 60#define EXIT_TO_USER_MODE_WORK \ 61 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 62 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ 63 ARCH_EXIT_TO_USER_MODE_WORK) 64 65/** 66 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs 67 * @regs: Pointer to currents pt_regs 68 * 69 * Defaults to an empty implementation. Can be replaced by architecture 70 * specific code. 71 * 72 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable 73 * section. Use __always_inline so the compiler cannot push it out of line 74 * and make it instrumentable. 75 */ 76static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); 77 78#ifndef arch_enter_from_user_mode 79static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} 80#endif 81 82/** 83 * enter_from_user_mode - Establish state when coming from user mode 84 * 85 * Syscall/interrupt entry disables interrupts, but user mode is traced as 86 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. 87 * 88 * 1) Tell lockdep that interrupts are disabled 89 * 2) Invoke context tracking if enabled to reactivate RCU 90 * 3) Trace interrupts off state 91 * 92 * Invoked from architecture specific syscall entry code with interrupts 93 * disabled. The calling code has to be non-instrumentable. When the 94 * function returns all state is correct and interrupts are still 95 * disabled. The subsequent functions can be instrumented. 96 * 97 * This is invoked when there is architecture specific functionality to be 98 * done between establishing state and enabling interrupts. The caller must 99 * enable interrupts before invoking syscall_enter_from_user_mode_work(). 100 */ 101void enter_from_user_mode(struct pt_regs *regs); 102 103/** 104 * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts 105 * @regs: Pointer to currents pt_regs 106 * 107 * Invoked from architecture specific syscall entry code with interrupts 108 * disabled. The calling code has to be non-instrumentable. When the 109 * function returns all state is correct, interrupts are enabled and the 110 * subsequent functions can be instrumented. 111 * 112 * This handles lockdep, RCU (context tracking) and tracing state, i.e. 113 * the functionality provided by enter_from_user_mode(). 114 * 115 * This is invoked when there is extra architecture specific functionality 116 * to be done between establishing state and handling user mode entry work. 117 */ 118void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); 119 120/** 121 * syscall_enter_from_user_mode_work - Check and handle work before invoking 122 * a syscall 123 * @regs: Pointer to currents pt_regs 124 * @syscall: The syscall number 125 * 126 * Invoked from architecture specific syscall entry code with interrupts 127 * enabled after invoking syscall_enter_from_user_mode_prepare() and extra 128 * architecture specific work. 129 * 130 * Returns: The original or a modified syscall number 131 * 132 * If the returned syscall number is -1 then the syscall should be 133 * skipped. In this case the caller may invoke syscall_set_error() or 134 * syscall_set_return_value() first. If neither of those are called and -1 135 * is returned, then the syscall will fail with ENOSYS. 136 * 137 * It handles the following work items: 138 * 139 * 1) syscall_work flag dependent invocations of 140 * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() 141 * 2) Invocation of audit_syscall_entry() 142 */ 143long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); 144 145/** 146 * syscall_enter_from_user_mode - Establish state and check and handle work 147 * before invoking a syscall 148 * @regs: Pointer to currents pt_regs 149 * @syscall: The syscall number 150 * 151 * Invoked from architecture specific syscall entry code with interrupts 152 * disabled. The calling code has to be non-instrumentable. When the 153 * function returns all state is correct, interrupts are enabled and the 154 * subsequent functions can be instrumented. 155 * 156 * This is combination of syscall_enter_from_user_mode_prepare() and 157 * syscall_enter_from_user_mode_work(). 158 * 159 * Returns: The original or a modified syscall number. See 160 * syscall_enter_from_user_mode_work() for further explanation. 161 */ 162long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); 163 164/** 165 * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() 166 * @ti_work: Cached TIF flags gathered with interrupts disabled 167 * 168 * Defaults to local_irq_enable(). Can be supplied by architecture specific 169 * code. 170 */ 171static inline void local_irq_enable_exit_to_user(unsigned long ti_work); 172 173#ifndef local_irq_enable_exit_to_user 174static inline void local_irq_enable_exit_to_user(unsigned long ti_work) 175{ 176 local_irq_enable(); 177} 178#endif 179 180/** 181 * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() 182 * 183 * Defaults to local_irq_disable(). Can be supplied by architecture specific 184 * code. 185 */ 186static inline void local_irq_disable_exit_to_user(void); 187 188#ifndef local_irq_disable_exit_to_user 189static inline void local_irq_disable_exit_to_user(void) 190{ 191 local_irq_disable(); 192} 193#endif 194 195/** 196 * arch_exit_to_user_mode_work - Architecture specific TIF work for exit 197 * to user mode. 198 * @regs: Pointer to currents pt_regs 199 * @ti_work: Cached TIF flags gathered with interrupts disabled 200 * 201 * Invoked from exit_to_user_mode_loop() with interrupt enabled 202 * 203 * Defaults to NOOP. Can be supplied by architecture specific code. 204 */ 205static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 206 unsigned long ti_work); 207 208#ifndef arch_exit_to_user_mode_work 209static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 210 unsigned long ti_work) 211{ 212} 213#endif 214 215/** 216 * arch_exit_to_user_mode_prepare - Architecture specific preparation for 217 * exit to user mode. 218 * @regs: Pointer to currents pt_regs 219 * @ti_work: Cached TIF flags gathered with interrupts disabled 220 * 221 * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last 222 * function before return. Defaults to NOOP. 223 */ 224static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 225 unsigned long ti_work); 226 227#ifndef arch_exit_to_user_mode_prepare 228static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 229 unsigned long ti_work) 230{ 231} 232#endif 233 234/** 235 * arch_exit_to_user_mode - Architecture specific final work before 236 * exit to user mode. 237 * 238 * Invoked from exit_to_user_mode() with interrupt disabled as the last 239 * function before return. Defaults to NOOP. 240 * 241 * This needs to be __always_inline because it is non-instrumentable code 242 * invoked after context tracking switched to user mode. 243 * 244 * An architecture implementation must not do anything complex, no locking 245 * etc. The main purpose is for speculation mitigations. 246 */ 247static __always_inline void arch_exit_to_user_mode(void); 248 249#ifndef arch_exit_to_user_mode 250static __always_inline void arch_exit_to_user_mode(void) { } 251#endif 252 253/** 254 * arch_do_signal_or_restart - Architecture specific signal delivery function 255 * @regs: Pointer to currents pt_regs 256 * @has_signal: actual signal to handle 257 * 258 * Invoked from exit_to_user_mode_loop(). 259 */ 260void arch_do_signal_or_restart(struct pt_regs *regs); 261 262/** 263 * exit_to_user_mode - Fixup state when exiting to user mode 264 * 265 * Syscall/interrupt exit enables interrupts, but the kernel state is 266 * interrupts disabled when this is invoked. Also tell RCU about it. 267 * 268 * 1) Trace interrupts on state 269 * 2) Invoke context tracking if enabled to adjust RCU state 270 * 3) Invoke architecture specific last minute exit code, e.g. speculation 271 * mitigations, etc.: arch_exit_to_user_mode() 272 * 4) Tell lockdep that interrupts are enabled 273 * 274 * Invoked from architecture specific code when syscall_exit_to_user_mode() 275 * is not suitable as the last step before returning to userspace. Must be 276 * invoked with interrupts disabled and the caller must be 277 * non-instrumentable. 278 * The caller has to invoke syscall_exit_to_user_mode_work() before this. 279 */ 280void exit_to_user_mode(void); 281 282/** 283 * syscall_exit_to_user_mode_work - Handle work before returning to user mode 284 * @regs: Pointer to currents pt_regs 285 * 286 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling 287 * exit_to_user_mode() to perform the final transition to user mode. 288 * 289 * Calling convention is the same as for syscall_exit_to_user_mode() and it 290 * returns with all work handled and interrupts disabled. The caller must 291 * invoke exit_to_user_mode() before actually switching to user mode to 292 * make the final state transitions. Interrupts must stay disabled between 293 * return from this function and the invocation of exit_to_user_mode(). 294 */ 295void syscall_exit_to_user_mode_work(struct pt_regs *regs); 296 297/** 298 * syscall_exit_to_user_mode - Handle work before returning to user mode 299 * @regs: Pointer to currents pt_regs 300 * 301 * Invoked with interrupts enabled and fully valid regs. Returns with all 302 * work handled, interrupts disabled such that the caller can immediately 303 * switch to user mode. Called from architecture specific syscall and ret 304 * from fork code. 305 * 306 * The call order is: 307 * 1) One-time syscall exit work: 308 * - rseq syscall exit 309 * - audit 310 * - syscall tracing 311 * - ptrace (single stepping) 312 * 313 * 2) Preparatory work 314 * - Exit to user mode loop (common TIF handling). Invokes 315 * arch_exit_to_user_mode_work() for architecture specific TIF work 316 * - Architecture specific one time work arch_exit_to_user_mode_prepare() 317 * - Address limit and lockdep checks 318 * 319 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the 320 * functionality in exit_to_user_mode(). 321 * 322 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and 323 * exit_to_user_mode(). This function is preferred unless there is a 324 * compelling architectural reason to use the separate functions. 325 */ 326void syscall_exit_to_user_mode(struct pt_regs *regs); 327 328/** 329 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler 330 * @regs: Pointer to currents pt_regs 331 * 332 * Invoked from architecture specific entry code with interrupts disabled. 333 * Can only be called when the interrupt entry came from user mode. The 334 * calling code must be non-instrumentable. When the function returns all 335 * state is correct and the subsequent functions can be instrumented. 336 * 337 * The function establishes state (lockdep, RCU (context tracking), tracing) 338 */ 339void irqentry_enter_from_user_mode(struct pt_regs *regs); 340 341/** 342 * irqentry_exit_to_user_mode - Interrupt exit work 343 * @regs: Pointer to current's pt_regs 344 * 345 * Invoked with interrupts disabled and fully valid regs. Returns with all 346 * work handled, interrupts disabled such that the caller can immediately 347 * switch to user mode. Called from architecture specific interrupt 348 * handling code. 349 * 350 * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). 351 * Interrupt exit is not invoking #1 which is the syscall specific one time 352 * work. 353 */ 354void irqentry_exit_to_user_mode(struct pt_regs *regs); 355 356#ifndef irqentry_state 357/** 358 * struct irqentry_state - Opaque object for exception state storage 359 * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the 360 * exit path has to invoke rcu_irq_exit(). 361 * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that 362 * lockdep state is restored correctly on exit from nmi. 363 * 364 * This opaque object is filled in by the irqentry_*_enter() functions and 365 * must be passed back into the corresponding irqentry_*_exit() functions 366 * when the exception is complete. 367 * 368 * Callers of irqentry_*_[enter|exit]() must consider this structure opaque 369 * and all members private. Descriptions of the members are provided to aid in 370 * the maintenance of the irqentry_*() functions. 371 */ 372typedef struct irqentry_state { 373 union { 374 bool exit_rcu; 375 bool lockdep; 376 }; 377} irqentry_state_t; 378#endif 379 380/** 381 * irqentry_enter - Handle state tracking on ordinary interrupt entries 382 * @regs: Pointer to pt_regs of interrupted context 383 * 384 * Invokes: 385 * - lockdep irqflag state tracking as low level ASM entry disabled 386 * interrupts. 387 * 388 * - Context tracking if the exception hit user mode. 389 * 390 * - The hardirq tracer to keep the state consistent as low level ASM 391 * entry disabled interrupts. 392 * 393 * As a precondition, this requires that the entry came from user mode, 394 * idle, or a kernel context in which RCU is watching. 395 * 396 * For kernel mode entries RCU handling is done conditional. If RCU is 397 * watching then the only RCU requirement is to check whether the tick has 398 * to be restarted. If RCU is not watching then rcu_irq_enter() has to be 399 * invoked on entry and rcu_irq_exit() on exit. 400 * 401 * Avoiding the rcu_irq_enter/exit() calls is an optimization but also 402 * solves the problem of kernel mode pagefaults which can schedule, which 403 * is not possible after invoking rcu_irq_enter() without undoing it. 404 * 405 * For user mode entries irqentry_enter_from_user_mode() is invoked to 406 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit 407 * would not be possible. 408 * 409 * Returns: An opaque object that must be passed to idtentry_exit() 410 */ 411irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); 412 413/** 414 * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt 415 * 416 * Conditional reschedule with additional sanity checks. 417 */ 418void raw_irqentry_exit_cond_resched(void); 419#ifdef CONFIG_PREEMPT_DYNAMIC 420#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 421#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched 422#define irqentry_exit_cond_resched_dynamic_disabled NULL 423DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 424#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() 425#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 426DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 427void dynamic_irqentry_exit_cond_resched(void); 428#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() 429#endif 430#else /* CONFIG_PREEMPT_DYNAMIC */ 431#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() 432#endif /* CONFIG_PREEMPT_DYNAMIC */ 433 434/** 435 * irqentry_exit - Handle return from exception that used irqentry_enter() 436 * @regs: Pointer to pt_regs (exception entry regs) 437 * @state: Return value from matching call to irqentry_enter() 438 * 439 * Depending on the return target (kernel/user) this runs the necessary 440 * preemption and work checks if possible and required and returns to 441 * the caller with interrupts disabled and no further work pending. 442 * 443 * This is the last action before returning to the low level ASM code which 444 * just needs to return to the appropriate context. 445 * 446 * Counterpart to irqentry_enter(). 447 */ 448void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); 449 450/** 451 * irqentry_nmi_enter - Handle NMI entry 452 * @regs: Pointer to currents pt_regs 453 * 454 * Similar to irqentry_enter() but taking care of the NMI constraints. 455 */ 456irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); 457 458/** 459 * irqentry_nmi_exit - Handle return from NMI handling 460 * @regs: Pointer to pt_regs (NMI entry regs) 461 * @irq_state: Return value from matching call to irqentry_nmi_enter() 462 * 463 * Last action before returning to the low level assembly code. 464 * 465 * Counterpart to irqentry_nmi_enter(). 466 */ 467void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); 468 469#endif