rcu.h (17772B)
1/* SPDX-License-Identifier: GPL-2.0+ */ 2/* 3 * Read-Copy Update definitions shared among RCU implementations. 4 * 5 * Copyright IBM Corporation, 2011 6 * 7 * Author: Paul E. McKenney <paulmck@linux.ibm.com> 8 */ 9 10#ifndef __LINUX_RCU_H 11#define __LINUX_RCU_H 12 13#include <trace/events/rcu.h> 14 15/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ 16#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) 17 18 19/* 20 * Grace-period counter management. 21 */ 22 23#define RCU_SEQ_CTR_SHIFT 2 24#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) 25 26extern int sysctl_sched_rt_runtime; 27 28/* 29 * Return the counter portion of a sequence number previously returned 30 * by rcu_seq_snap() or rcu_seq_current(). 31 */ 32static inline unsigned long rcu_seq_ctr(unsigned long s) 33{ 34 return s >> RCU_SEQ_CTR_SHIFT; 35} 36 37/* 38 * Return the state portion of a sequence number previously returned 39 * by rcu_seq_snap() or rcu_seq_current(). 40 */ 41static inline int rcu_seq_state(unsigned long s) 42{ 43 return s & RCU_SEQ_STATE_MASK; 44} 45 46/* 47 * Set the state portion of the pointed-to sequence number. 48 * The caller is responsible for preventing conflicting updates. 49 */ 50static inline void rcu_seq_set_state(unsigned long *sp, int newstate) 51{ 52 WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK); 53 WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate); 54} 55 56/* Adjust sequence number for start of update-side operation. */ 57static inline void rcu_seq_start(unsigned long *sp) 58{ 59 WRITE_ONCE(*sp, *sp + 1); 60 smp_mb(); /* Ensure update-side operation after counter increment. */ 61 WARN_ON_ONCE(rcu_seq_state(*sp) != 1); 62} 63 64/* Compute the end-of-grace-period value for the specified sequence number. */ 65static inline unsigned long rcu_seq_endval(unsigned long *sp) 66{ 67 return (*sp | RCU_SEQ_STATE_MASK) + 1; 68} 69 70/* Adjust sequence number for end of update-side operation. */ 71static inline void rcu_seq_end(unsigned long *sp) 72{ 73 smp_mb(); /* Ensure update-side operation before counter increment. */ 74 WARN_ON_ONCE(!rcu_seq_state(*sp)); 75 WRITE_ONCE(*sp, rcu_seq_endval(sp)); 76} 77 78/* 79 * rcu_seq_snap - Take a snapshot of the update side's sequence number. 80 * 81 * This function returns the earliest value of the grace-period sequence number 82 * that will indicate that a full grace period has elapsed since the current 83 * time. Once the grace-period sequence number has reached this value, it will 84 * be safe to invoke all callbacks that have been registered prior to the 85 * current time. This value is the current grace-period number plus two to the 86 * power of the number of low-order bits reserved for state, then rounded up to 87 * the next value in which the state bits are all zero. 88 */ 89static inline unsigned long rcu_seq_snap(unsigned long *sp) 90{ 91 unsigned long s; 92 93 s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK; 94 smp_mb(); /* Above access must not bleed into critical section. */ 95 return s; 96} 97 98/* Return the current value the update side's sequence number, no ordering. */ 99static inline unsigned long rcu_seq_current(unsigned long *sp) 100{ 101 return READ_ONCE(*sp); 102} 103 104/* 105 * Given a snapshot from rcu_seq_snap(), determine whether or not the 106 * corresponding update-side operation has started. 107 */ 108static inline bool rcu_seq_started(unsigned long *sp, unsigned long s) 109{ 110 return ULONG_CMP_LT((s - 1) & ~RCU_SEQ_STATE_MASK, READ_ONCE(*sp)); 111} 112 113/* 114 * Given a snapshot from rcu_seq_snap(), determine whether or not a 115 * full update-side operation has occurred. 116 */ 117static inline bool rcu_seq_done(unsigned long *sp, unsigned long s) 118{ 119 return ULONG_CMP_GE(READ_ONCE(*sp), s); 120} 121 122/* 123 * Has a grace period completed since the time the old gp_seq was collected? 124 */ 125static inline bool rcu_seq_completed_gp(unsigned long old, unsigned long new) 126{ 127 return ULONG_CMP_LT(old, new & ~RCU_SEQ_STATE_MASK); 128} 129 130/* 131 * Has a grace period started since the time the old gp_seq was collected? 132 */ 133static inline bool rcu_seq_new_gp(unsigned long old, unsigned long new) 134{ 135 return ULONG_CMP_LT((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK, 136 new); 137} 138 139/* 140 * Roughly how many full grace periods have elapsed between the collection 141 * of the two specified grace periods? 142 */ 143static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old) 144{ 145 unsigned long rnd_diff; 146 147 if (old == new) 148 return 0; 149 /* 150 * Compute the number of grace periods (still shifted up), plus 151 * one if either of new and old is not an exact grace period. 152 */ 153 rnd_diff = (new & ~RCU_SEQ_STATE_MASK) - 154 ((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK) + 155 ((new & RCU_SEQ_STATE_MASK) || (old & RCU_SEQ_STATE_MASK)); 156 if (ULONG_CMP_GE(RCU_SEQ_STATE_MASK, rnd_diff)) 157 return 1; /* Definitely no grace period has elapsed. */ 158 return ((rnd_diff - RCU_SEQ_STATE_MASK - 1) >> RCU_SEQ_CTR_SHIFT) + 2; 159} 160 161/* 162 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally 163 * by call_rcu() and rcu callback execution, and are therefore not part 164 * of the RCU API. These are in rcupdate.h because they are used by all 165 * RCU implementations. 166 */ 167 168#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 169# define STATE_RCU_HEAD_READY 0 170# define STATE_RCU_HEAD_QUEUED 1 171 172extern const struct debug_obj_descr rcuhead_debug_descr; 173 174static inline int debug_rcu_head_queue(struct rcu_head *head) 175{ 176 int r1; 177 178 r1 = debug_object_activate(head, &rcuhead_debug_descr); 179 debug_object_active_state(head, &rcuhead_debug_descr, 180 STATE_RCU_HEAD_READY, 181 STATE_RCU_HEAD_QUEUED); 182 return r1; 183} 184 185static inline void debug_rcu_head_unqueue(struct rcu_head *head) 186{ 187 debug_object_active_state(head, &rcuhead_debug_descr, 188 STATE_RCU_HEAD_QUEUED, 189 STATE_RCU_HEAD_READY); 190 debug_object_deactivate(head, &rcuhead_debug_descr); 191} 192#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 193static inline int debug_rcu_head_queue(struct rcu_head *head) 194{ 195 return 0; 196} 197 198static inline void debug_rcu_head_unqueue(struct rcu_head *head) 199{ 200} 201#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 202 203extern int rcu_cpu_stall_suppress_at_boot; 204 205static inline bool rcu_stall_is_suppressed_at_boot(void) 206{ 207 return rcu_cpu_stall_suppress_at_boot && !rcu_inkernel_boot_has_ended(); 208} 209 210#ifdef CONFIG_RCU_STALL_COMMON 211 212extern int rcu_cpu_stall_ftrace_dump; 213extern int rcu_cpu_stall_suppress; 214extern int rcu_cpu_stall_timeout; 215extern int rcu_exp_cpu_stall_timeout; 216int rcu_jiffies_till_stall_check(void); 217int rcu_exp_jiffies_till_stall_check(void); 218 219static inline bool rcu_stall_is_suppressed(void) 220{ 221 return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress; 222} 223 224#define rcu_ftrace_dump_stall_suppress() \ 225do { \ 226 if (!rcu_cpu_stall_suppress) \ 227 rcu_cpu_stall_suppress = 3; \ 228} while (0) 229 230#define rcu_ftrace_dump_stall_unsuppress() \ 231do { \ 232 if (rcu_cpu_stall_suppress == 3) \ 233 rcu_cpu_stall_suppress = 0; \ 234} while (0) 235 236#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */ 237 238static inline bool rcu_stall_is_suppressed(void) 239{ 240 return rcu_stall_is_suppressed_at_boot(); 241} 242#define rcu_ftrace_dump_stall_suppress() 243#define rcu_ftrace_dump_stall_unsuppress() 244#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 245 246/* 247 * Strings used in tracepoints need to be exported via the 248 * tracing system such that tools like perf and trace-cmd can 249 * translate the string address pointers to actual text. 250 */ 251#define TPS(x) tracepoint_string(x) 252 253/* 254 * Dump the ftrace buffer, but only one time per callsite per boot. 255 */ 256#define rcu_ftrace_dump(oops_dump_mode) \ 257do { \ 258 static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ 259 \ 260 if (!atomic_read(&___rfd_beenhere) && \ 261 !atomic_xchg(&___rfd_beenhere, 1)) { \ 262 tracing_off(); \ 263 rcu_ftrace_dump_stall_suppress(); \ 264 ftrace_dump(oops_dump_mode); \ 265 rcu_ftrace_dump_stall_unsuppress(); \ 266 } \ 267} while (0) 268 269void rcu_early_boot_tests(void); 270void rcu_test_sync_prims(void); 271 272/* 273 * This function really isn't for public consumption, but RCU is special in 274 * that context switches can allow the state machine to make progress. 275 */ 276extern void resched_cpu(int cpu); 277 278#if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU) 279 280#include <linux/rcu_node_tree.h> 281 282extern int rcu_num_lvls; 283extern int num_rcu_lvl[]; 284extern int rcu_num_nodes; 285static bool rcu_fanout_exact; 286static int rcu_fanout_leaf; 287 288/* 289 * Compute the per-level fanout, either using the exact fanout specified 290 * or balancing the tree, depending on the rcu_fanout_exact boot parameter. 291 */ 292static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) 293{ 294 int i; 295 296 for (i = 0; i < RCU_NUM_LVLS; i++) 297 levelspread[i] = INT_MIN; 298 if (rcu_fanout_exact) { 299 levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; 300 for (i = rcu_num_lvls - 2; i >= 0; i--) 301 levelspread[i] = RCU_FANOUT; 302 } else { 303 int ccur; 304 int cprv; 305 306 cprv = nr_cpu_ids; 307 for (i = rcu_num_lvls - 1; i >= 0; i--) { 308 ccur = levelcnt[i]; 309 levelspread[i] = (cprv + ccur - 1) / ccur; 310 cprv = ccur; 311 } 312 } 313} 314 315extern void rcu_init_geometry(void); 316 317/* Returns a pointer to the first leaf rcu_node structure. */ 318#define rcu_first_leaf_node() (rcu_state.level[rcu_num_lvls - 1]) 319 320/* Is this rcu_node a leaf? */ 321#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1) 322 323/* Is this rcu_node the last leaf? */ 324#define rcu_is_last_leaf_node(rnp) ((rnp) == &rcu_state.node[rcu_num_nodes - 1]) 325 326/* 327 * Do a full breadth-first scan of the {s,}rcu_node structures for the 328 * specified state structure (for SRCU) or the only rcu_state structure 329 * (for RCU). 330 */ 331#define srcu_for_each_node_breadth_first(sp, rnp) \ 332 for ((rnp) = &(sp)->node[0]; \ 333 (rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++) 334#define rcu_for_each_node_breadth_first(rnp) \ 335 srcu_for_each_node_breadth_first(&rcu_state, rnp) 336 337/* 338 * Scan the leaves of the rcu_node hierarchy for the rcu_state structure. 339 * Note that if there is a singleton rcu_node tree with but one rcu_node 340 * structure, this loop -will- visit the rcu_node structure. It is still 341 * a leaf node, even if it is also the root node. 342 */ 343#define rcu_for_each_leaf_node(rnp) \ 344 for ((rnp) = rcu_first_leaf_node(); \ 345 (rnp) < &rcu_state.node[rcu_num_nodes]; (rnp)++) 346 347/* 348 * Iterate over all possible CPUs in a leaf RCU node. 349 */ 350#define for_each_leaf_node_possible_cpu(rnp, cpu) \ 351 for (WARN_ON_ONCE(!rcu_is_leaf_node(rnp)), \ 352 (cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \ 353 (cpu) <= rnp->grphi; \ 354 (cpu) = cpumask_next((cpu), cpu_possible_mask)) 355 356/* 357 * Iterate over all CPUs in a leaf RCU node's specified mask. 358 */ 359#define rcu_find_next_bit(rnp, cpu, mask) \ 360 ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu))) 361#define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \ 362 for (WARN_ON_ONCE(!rcu_is_leaf_node(rnp)), \ 363 (cpu) = rcu_find_next_bit((rnp), 0, (mask)); \ 364 (cpu) <= rnp->grphi; \ 365 (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask))) 366 367/* 368 * Wrappers for the rcu_node::lock acquire and release. 369 * 370 * Because the rcu_nodes form a tree, the tree traversal locking will observe 371 * different lock values, this in turn means that an UNLOCK of one level 372 * followed by a LOCK of another level does not imply a full memory barrier; 373 * and most importantly transitivity is lost. 374 * 375 * In order to restore full ordering between tree levels, augment the regular 376 * lock acquire functions with smp_mb__after_unlock_lock(). 377 * 378 * As ->lock of struct rcu_node is a __private field, therefore one should use 379 * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock. 380 */ 381#define raw_spin_lock_rcu_node(p) \ 382do { \ 383 raw_spin_lock(&ACCESS_PRIVATE(p, lock)); \ 384 smp_mb__after_unlock_lock(); \ 385} while (0) 386 387#define raw_spin_unlock_rcu_node(p) \ 388do { \ 389 lockdep_assert_irqs_disabled(); \ 390 raw_spin_unlock(&ACCESS_PRIVATE(p, lock)); \ 391} while (0) 392 393#define raw_spin_lock_irq_rcu_node(p) \ 394do { \ 395 raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ 396 smp_mb__after_unlock_lock(); \ 397} while (0) 398 399#define raw_spin_unlock_irq_rcu_node(p) \ 400do { \ 401 lockdep_assert_irqs_disabled(); \ 402 raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)); \ 403} while (0) 404 405#define raw_spin_lock_irqsave_rcu_node(p, flags) \ 406do { \ 407 raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ 408 smp_mb__after_unlock_lock(); \ 409} while (0) 410 411#define raw_spin_unlock_irqrestore_rcu_node(p, flags) \ 412do { \ 413 lockdep_assert_irqs_disabled(); \ 414 raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags); \ 415} while (0) 416 417#define raw_spin_trylock_rcu_node(p) \ 418({ \ 419 bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock)); \ 420 \ 421 if (___locked) \ 422 smp_mb__after_unlock_lock(); \ 423 ___locked; \ 424}) 425 426#define raw_lockdep_assert_held_rcu_node(p) \ 427 lockdep_assert_held(&ACCESS_PRIVATE(p, lock)) 428 429#endif /* #if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU) */ 430 431#ifdef CONFIG_TINY_RCU 432/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ 433static inline bool rcu_gp_is_normal(void) { return true; } 434static inline bool rcu_gp_is_expedited(void) { return false; } 435static inline void rcu_expedite_gp(void) { } 436static inline void rcu_unexpedite_gp(void) { } 437static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } 438#else /* #ifdef CONFIG_TINY_RCU */ 439bool rcu_gp_is_normal(void); /* Internal RCU use. */ 440bool rcu_gp_is_expedited(void); /* Internal RCU use. */ 441void rcu_expedite_gp(void); 442void rcu_unexpedite_gp(void); 443void rcupdate_announce_bootup_oddness(void); 444#ifdef CONFIG_TASKS_RCU_GENERIC 445void show_rcu_tasks_gp_kthreads(void); 446#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ 447static inline void show_rcu_tasks_gp_kthreads(void) {} 448#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ 449void rcu_request_urgent_qs_task(struct task_struct *t); 450#endif /* #else #ifdef CONFIG_TINY_RCU */ 451 452#define RCU_SCHEDULER_INACTIVE 0 453#define RCU_SCHEDULER_INIT 1 454#define RCU_SCHEDULER_RUNNING 2 455 456enum rcutorture_type { 457 RCU_FLAVOR, 458 RCU_TASKS_FLAVOR, 459 RCU_TASKS_RUDE_FLAVOR, 460 RCU_TASKS_TRACING_FLAVOR, 461 RCU_TRIVIAL_FLAVOR, 462 SRCU_FLAVOR, 463 INVALID_RCU_FLAVOR 464}; 465 466#if defined(CONFIG_TREE_RCU) 467void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, 468 unsigned long *gp_seq); 469void do_trace_rcu_torture_read(const char *rcutorturename, 470 struct rcu_head *rhp, 471 unsigned long secs, 472 unsigned long c_old, 473 unsigned long c); 474void rcu_gp_set_torture_wait(int duration); 475#else 476static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, 477 int *flags, unsigned long *gp_seq) 478{ 479 *flags = 0; 480 *gp_seq = 0; 481} 482#ifdef CONFIG_RCU_TRACE 483void do_trace_rcu_torture_read(const char *rcutorturename, 484 struct rcu_head *rhp, 485 unsigned long secs, 486 unsigned long c_old, 487 unsigned long c); 488#else 489#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ 490 do { } while (0) 491#endif 492static inline void rcu_gp_set_torture_wait(int duration) { } 493#endif 494 495#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) 496long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); 497#endif 498 499#ifdef CONFIG_TINY_SRCU 500 501static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, 502 struct srcu_struct *sp, int *flags, 503 unsigned long *gp_seq) 504{ 505 if (test_type != SRCU_FLAVOR) 506 return; 507 *flags = 0; 508 *gp_seq = sp->srcu_idx; 509} 510 511#elif defined(CONFIG_TREE_SRCU) 512 513void srcutorture_get_gp_data(enum rcutorture_type test_type, 514 struct srcu_struct *sp, int *flags, 515 unsigned long *gp_seq); 516 517#endif 518 519#ifdef CONFIG_TINY_RCU 520static inline bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) { return false; } 521static inline unsigned long rcu_get_gp_seq(void) { return 0; } 522static inline unsigned long rcu_exp_batches_completed(void) { return 0; } 523static inline unsigned long 524srcu_batches_completed(struct srcu_struct *sp) { return 0; } 525static inline void rcu_force_quiescent_state(void) { } 526static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { return true; } 527static inline void show_rcu_gp_kthreads(void) { } 528static inline int rcu_get_gp_kthreads_prio(void) { return 0; } 529static inline void rcu_fwd_progress_check(unsigned long j) { } 530static inline void rcu_gp_slow_register(atomic_t *rgssp) { } 531static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { } 532#else /* #ifdef CONFIG_TINY_RCU */ 533bool rcu_dynticks_zero_in_eqs(int cpu, int *vp); 534unsigned long rcu_get_gp_seq(void); 535unsigned long rcu_exp_batches_completed(void); 536unsigned long srcu_batches_completed(struct srcu_struct *sp); 537bool rcu_check_boost_fail(unsigned long gp_state, int *cpup); 538void show_rcu_gp_kthreads(void); 539int rcu_get_gp_kthreads_prio(void); 540void rcu_fwd_progress_check(unsigned long j); 541void rcu_force_quiescent_state(void); 542extern struct workqueue_struct *rcu_gp_wq; 543#ifdef CONFIG_RCU_EXP_KTHREAD 544extern struct kthread_worker *rcu_exp_gp_kworker; 545extern struct kthread_worker *rcu_exp_par_gp_kworker; 546#else /* !CONFIG_RCU_EXP_KTHREAD */ 547extern struct workqueue_struct *rcu_par_gp_wq; 548#endif /* CONFIG_RCU_EXP_KTHREAD */ 549void rcu_gp_slow_register(atomic_t *rgssp); 550void rcu_gp_slow_unregister(atomic_t *rgssp); 551#endif /* #else #ifdef CONFIG_TINY_RCU */ 552 553#ifdef CONFIG_RCU_NOCB_CPU 554void rcu_bind_current_to_nocb(void); 555#else 556static inline void rcu_bind_current_to_nocb(void) { } 557#endif 558 559#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_RCU) 560void show_rcu_tasks_classic_gp_kthread(void); 561#else 562static inline void show_rcu_tasks_classic_gp_kthread(void) {} 563#endif 564#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_RUDE_RCU) 565void show_rcu_tasks_rude_gp_kthread(void); 566#else 567static inline void show_rcu_tasks_rude_gp_kthread(void) {} 568#endif 569#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_TRACE_RCU) 570void show_rcu_tasks_trace_gp_kthread(void); 571#else 572static inline void show_rcu_tasks_trace_gp_kthread(void) {} 573#endif 574 575#endif /* __LINUX_RCU_H */