rtmutex.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
rtmutex.c (47924B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
      4 *
      5 * started by Ingo Molnar and Thomas Gleixner.
      6 *
      7 *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
      8 *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
      9 *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
     10 *  Copyright (C) 2006 Esben Nielsen
     11 * Adaptive Spinlocks:
     12 *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
     13 *				     and Peter Morreale,
     14 * Adaptive Spinlocks simplification:
     15 *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
     16 *
     17 *  See Documentation/locking/rt-mutex-design.rst for details.
     18 */
     19#include <linux/sched.h>
     20#include <linux/sched/debug.h>
     21#include <linux/sched/deadline.h>
     22#include <linux/sched/signal.h>
     23#include <linux/sched/rt.h>
     24#include <linux/sched/wake_q.h>
     25#include <linux/ww_mutex.h>
     26
     27#include <trace/events/lock.h>
     28
     29#include "rtmutex_common.h"
     30
     31#ifndef WW_RT
     32# define build_ww_mutex()	(false)
     33# define ww_container_of(rtm)	NULL
     34
     35static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
     36					struct rt_mutex *lock,
     37					struct ww_acquire_ctx *ww_ctx)
     38{
     39	return 0;
     40}
     41
     42static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
     43					    struct ww_acquire_ctx *ww_ctx)
     44{
     45}
     46
     47static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
     48					  struct ww_acquire_ctx *ww_ctx)
     49{
     50}
     51
     52static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
     53					struct rt_mutex_waiter *waiter,
     54					struct ww_acquire_ctx *ww_ctx)
     55{
     56	return 0;
     57}
     58
     59#else
     60# define build_ww_mutex()	(true)
     61# define ww_container_of(rtm)	container_of(rtm, struct ww_mutex, base)
     62# include "ww_mutex.h"
     63#endif
     64
     65/*
     66 * lock->owner state tracking:
     67 *
     68 * lock->owner holds the task_struct pointer of the owner. Bit 0
     69 * is used to keep track of the "lock has waiters" state.
     70 *
     71 * owner	bit0
     72 * NULL		0	lock is free (fast acquire possible)
     73 * NULL		1	lock is free and has waiters and the top waiter
     74 *				is going to take the lock*
     75 * taskpointer	0	lock is held (fast release possible)
     76 * taskpointer	1	lock is held and has waiters**
     77 *
     78 * The fast atomic compare exchange based acquire and release is only
     79 * possible when bit 0 of lock->owner is 0.
     80 *
     81 * (*) It also can be a transitional state when grabbing the lock
     82 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
     83 * we need to set the bit0 before looking at the lock, and the owner may be
     84 * NULL in this small time, hence this can be a transitional state.
     85 *
     86 * (**) There is a small time when bit 0 is set but there are no
     87 * waiters. This can happen when grabbing the lock in the slow path.
     88 * To prevent a cmpxchg of the owner releasing the lock, we need to
     89 * set this bit before looking at the lock.
     90 */
     91
     92static __always_inline void
     93rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
     94{
     95	unsigned long val = (unsigned long)owner;
     96
     97	if (rt_mutex_has_waiters(lock))
     98		val |= RT_MUTEX_HAS_WAITERS;
     99
    100	WRITE_ONCE(lock->owner, (struct task_struct *)val);
    101}
    102
    103static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
    104{
    105	lock->owner = (struct task_struct *)
    106			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
    107}
    108
    109static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
    110{
    111	unsigned long owner, *p = (unsigned long *) &lock->owner;
    112
    113	if (rt_mutex_has_waiters(lock))
    114		return;
    115
    116	/*
    117	 * The rbtree has no waiters enqueued, now make sure that the
    118	 * lock->owner still has the waiters bit set, otherwise the
    119	 * following can happen:
    120	 *
    121	 * CPU 0	CPU 1		CPU2
    122	 * l->owner=T1
    123	 *		rt_mutex_lock(l)
    124	 *		lock(l->lock)
    125	 *		l->owner = T1 | HAS_WAITERS;
    126	 *		enqueue(T2)
    127	 *		boost()
    128	 *		  unlock(l->lock)
    129	 *		block()
    130	 *
    131	 *				rt_mutex_lock(l)
    132	 *				lock(l->lock)
    133	 *				l->owner = T1 | HAS_WAITERS;
    134	 *				enqueue(T3)
    135	 *				boost()
    136	 *				  unlock(l->lock)
    137	 *				block()
    138	 *		signal(->T2)	signal(->T3)
    139	 *		lock(l->lock)
    140	 *		dequeue(T2)
    141	 *		deboost()
    142	 *		  unlock(l->lock)
    143	 *				lock(l->lock)
    144	 *				dequeue(T3)
    145	 *				 ==> wait list is empty
    146	 *				deboost()
    147	 *				 unlock(l->lock)
    148	 *		lock(l->lock)
    149	 *		fixup_rt_mutex_waiters()
    150	 *		  if (wait_list_empty(l) {
    151	 *		    l->owner = owner
    152	 *		    owner = l->owner & ~HAS_WAITERS;
    153	 *		      ==> l->owner = T1
    154	 *		  }
    155	 *				lock(l->lock)
    156	 * rt_mutex_unlock(l)		fixup_rt_mutex_waiters()
    157	 *				  if (wait_list_empty(l) {
    158	 *				    owner = l->owner & ~HAS_WAITERS;
    159	 * cmpxchg(l->owner, T1, NULL)
    160	 *  ===> Success (l->owner = NULL)
    161	 *
    162	 *				    l->owner = owner
    163	 *				      ==> l->owner = T1
    164	 *				  }
    165	 *
    166	 * With the check for the waiter bit in place T3 on CPU2 will not
    167	 * overwrite. All tasks fiddling with the waiters bit are
    168	 * serialized by l->lock, so nothing else can modify the waiters
    169	 * bit. If the bit is set then nothing can change l->owner either
    170	 * so the simple RMW is safe. The cmpxchg() will simply fail if it
    171	 * happens in the middle of the RMW because the waiters bit is
    172	 * still set.
    173	 */
    174	owner = READ_ONCE(*p);
    175	if (owner & RT_MUTEX_HAS_WAITERS)
    176		WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
    177}
    178
    179/*
    180 * We can speed up the acquire/release, if there's no debugging state to be
    181 * set up.
    182 */
    183#ifndef CONFIG_DEBUG_RT_MUTEXES
    184static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
    185						     struct task_struct *old,
    186						     struct task_struct *new)
    187{
    188	return try_cmpxchg_acquire(&lock->owner, &old, new);
    189}
    190
    191static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
    192						     struct task_struct *old,
    193						     struct task_struct *new)
    194{
    195	return try_cmpxchg_release(&lock->owner, &old, new);
    196}
    197
    198/*
    199 * Callers must hold the ->wait_lock -- which is the whole purpose as we force
    200 * all future threads that attempt to [Rmw] the lock to the slowpath. As such
    201 * relaxed semantics suffice.
    202 */
    203static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
    204{
    205	unsigned long owner, *p = (unsigned long *) &lock->owner;
    206
    207	do {
    208		owner = *p;
    209	} while (cmpxchg_relaxed(p, owner,
    210				 owner | RT_MUTEX_HAS_WAITERS) != owner);
    211}
    212
    213/*
    214 * Safe fastpath aware unlock:
    215 * 1) Clear the waiters bit
    216 * 2) Drop lock->wait_lock
    217 * 3) Try to unlock the lock with cmpxchg
    218 */
    219static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
    220						 unsigned long flags)
    221	__releases(lock->wait_lock)
    222{
    223	struct task_struct *owner = rt_mutex_owner(lock);
    224
    225	clear_rt_mutex_waiters(lock);
    226	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
    227	/*
    228	 * If a new waiter comes in between the unlock and the cmpxchg
    229	 * we have two situations:
    230	 *
    231	 * unlock(wait_lock);
    232	 *					lock(wait_lock);
    233	 * cmpxchg(p, owner, 0) == owner
    234	 *					mark_rt_mutex_waiters(lock);
    235	 *					acquire(lock);
    236	 * or:
    237	 *
    238	 * unlock(wait_lock);
    239	 *					lock(wait_lock);
    240	 *					mark_rt_mutex_waiters(lock);
    241	 *
    242	 * cmpxchg(p, owner, 0) != owner
    243	 *					enqueue_waiter();
    244	 *					unlock(wait_lock);
    245	 * lock(wait_lock);
    246	 * wake waiter();
    247	 * unlock(wait_lock);
    248	 *					lock(wait_lock);
    249	 *					acquire(lock);
    250	 */
    251	return rt_mutex_cmpxchg_release(lock, owner, NULL);
    252}
    253
    254#else
    255static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
    256						     struct task_struct *old,
    257						     struct task_struct *new)
    258{
    259	return false;
    260
    261}
    262
    263static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
    264						     struct task_struct *old,
    265						     struct task_struct *new)
    266{
    267	return false;
    268}
    269
    270static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
    271{
    272	lock->owner = (struct task_struct *)
    273			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
    274}
    275
    276/*
    277 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
    278 */
    279static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
    280						 unsigned long flags)
    281	__releases(lock->wait_lock)
    282{
    283	lock->owner = NULL;
    284	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
    285	return true;
    286}
    287#endif
    288
    289static __always_inline int __waiter_prio(struct task_struct *task)
    290{
    291	int prio = task->prio;
    292
    293	if (!rt_prio(prio))
    294		return DEFAULT_PRIO;
    295
    296	return prio;
    297}
    298
    299static __always_inline void
    300waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
    301{
    302	waiter->prio = __waiter_prio(task);
    303	waiter->deadline = task->dl.deadline;
    304}
    305
    306/*
    307 * Only use with rt_mutex_waiter_{less,equal}()
    308 */
    309#define task_to_waiter(p)	\
    310	&(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
    311
    312static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
    313						struct rt_mutex_waiter *right)
    314{
    315	if (left->prio < right->prio)
    316		return 1;
    317
    318	/*
    319	 * If both waiters have dl_prio(), we check the deadlines of the
    320	 * associated tasks.
    321	 * If left waiter has a dl_prio(), and we didn't return 1 above,
    322	 * then right waiter has a dl_prio() too.
    323	 */
    324	if (dl_prio(left->prio))
    325		return dl_time_before(left->deadline, right->deadline);
    326
    327	return 0;
    328}
    329
    330static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
    331						 struct rt_mutex_waiter *right)
    332{
    333	if (left->prio != right->prio)
    334		return 0;
    335
    336	/*
    337	 * If both waiters have dl_prio(), we check the deadlines of the
    338	 * associated tasks.
    339	 * If left waiter has a dl_prio(), and we didn't return 0 above,
    340	 * then right waiter has a dl_prio() too.
    341	 */
    342	if (dl_prio(left->prio))
    343		return left->deadline == right->deadline;
    344
    345	return 1;
    346}
    347
    348static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
    349				  struct rt_mutex_waiter *top_waiter)
    350{
    351	if (rt_mutex_waiter_less(waiter, top_waiter))
    352		return true;
    353
    354#ifdef RT_MUTEX_BUILD_SPINLOCKS
    355	/*
    356	 * Note that RT tasks are excluded from same priority (lateral)
    357	 * steals to prevent the introduction of an unbounded latency.
    358	 */
    359	if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
    360		return false;
    361
    362	return rt_mutex_waiter_equal(waiter, top_waiter);
    363#else
    364	return false;
    365#endif
    366}
    367
    368#define __node_2_waiter(node) \
    369	rb_entry((node), struct rt_mutex_waiter, tree_entry)
    370
    371static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
    372{
    373	struct rt_mutex_waiter *aw = __node_2_waiter(a);
    374	struct rt_mutex_waiter *bw = __node_2_waiter(b);
    375
    376	if (rt_mutex_waiter_less(aw, bw))
    377		return 1;
    378
    379	if (!build_ww_mutex())
    380		return 0;
    381
    382	if (rt_mutex_waiter_less(bw, aw))
    383		return 0;
    384
    385	/* NOTE: relies on waiter->ww_ctx being set before insertion */
    386	if (aw->ww_ctx) {
    387		if (!bw->ww_ctx)
    388			return 1;
    389
    390		return (signed long)(aw->ww_ctx->stamp -
    391				     bw->ww_ctx->stamp) < 0;
    392	}
    393
    394	return 0;
    395}
    396
    397static __always_inline void
    398rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
    399{
    400	rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
    401}
    402
    403static __always_inline void
    404rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
    405{
    406	if (RB_EMPTY_NODE(&waiter->tree_entry))
    407		return;
    408
    409	rb_erase_cached(&waiter->tree_entry, &lock->waiters);
    410	RB_CLEAR_NODE(&waiter->tree_entry);
    411}
    412
    413#define __node_2_pi_waiter(node) \
    414	rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
    415
    416static __always_inline bool
    417__pi_waiter_less(struct rb_node *a, const struct rb_node *b)
    418{
    419	return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
    420}
    421
    422static __always_inline void
    423rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
    424{
    425	rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
    426}
    427
    428static __always_inline void
    429rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
    430{
    431	if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
    432		return;
    433
    434	rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
    435	RB_CLEAR_NODE(&waiter->pi_tree_entry);
    436}
    437
    438static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
    439{
    440	struct task_struct *pi_task = NULL;
    441
    442	lockdep_assert_held(&p->pi_lock);
    443
    444	if (task_has_pi_waiters(p))
    445		pi_task = task_top_pi_waiter(p)->task;
    446
    447	rt_mutex_setprio(p, pi_task);
    448}
    449
    450/* RT mutex specific wake_q wrappers */
    451static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
    452						     struct task_struct *task,
    453						     unsigned int wake_state)
    454{
    455	if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
    456		if (IS_ENABLED(CONFIG_PROVE_LOCKING))
    457			WARN_ON_ONCE(wqh->rtlock_task);
    458		get_task_struct(task);
    459		wqh->rtlock_task = task;
    460	} else {
    461		wake_q_add(&wqh->head, task);
    462	}
    463}
    464
    465static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
    466						struct rt_mutex_waiter *w)
    467{
    468	rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);
    469}
    470
    471static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
    472{
    473	if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
    474		wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
    475		put_task_struct(wqh->rtlock_task);
    476		wqh->rtlock_task = NULL;
    477	}
    478
    479	if (!wake_q_empty(&wqh->head))
    480		wake_up_q(&wqh->head);
    481
    482	/* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
    483	preempt_enable();
    484}
    485
    486/*
    487 * Deadlock detection is conditional:
    488 *
    489 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
    490 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
    491 *
    492 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
    493 * conducted independent of the detect argument.
    494 *
    495 * If the waiter argument is NULL this indicates the deboost path and
    496 * deadlock detection is disabled independent of the detect argument
    497 * and the config settings.
    498 */
    499static __always_inline bool
    500rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
    501			      enum rtmutex_chainwalk chwalk)
    502{
    503	if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
    504		return waiter != NULL;
    505	return chwalk == RT_MUTEX_FULL_CHAINWALK;
    506}
    507
    508static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
    509{
    510	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
    511}
    512
    513/*
    514 * Adjust the priority chain. Also used for deadlock detection.
    515 * Decreases task's usage by one - may thus free the task.
    516 *
    517 * @task:	the task owning the mutex (owner) for which a chain walk is
    518 *		probably needed
    519 * @chwalk:	do we have to carry out deadlock detection?
    520 * @orig_lock:	the mutex (can be NULL if we are walking the chain to recheck
    521 *		things for a task that has just got its priority adjusted, and
    522 *		is waiting on a mutex)
    523 * @next_lock:	the mutex on which the owner of @orig_lock was blocked before
    524 *		we dropped its pi_lock. Is never dereferenced, only used for
    525 *		comparison to detect lock chain changes.
    526 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
    527 *		its priority to the mutex owner (can be NULL in the case
    528 *		depicted above or if the top waiter is gone away and we are
    529 *		actually deboosting the owner)
    530 * @top_task:	the current top waiter
    531 *
    532 * Returns 0 or -EDEADLK.
    533 *
    534 * Chain walk basics and protection scope
    535 *
    536 * [R] refcount on task
    537 * [P] task->pi_lock held
    538 * [L] rtmutex->wait_lock held
    539 *
    540 * Step	Description				Protected by
    541 *	function arguments:
    542 *	@task					[R]
    543 *	@orig_lock if != NULL			@top_task is blocked on it
    544 *	@next_lock				Unprotected. Cannot be
    545 *						dereferenced. Only used for
    546 *						comparison.
    547 *	@orig_waiter if != NULL			@top_task is blocked on it
    548 *	@top_task				current, or in case of proxy
    549 *						locking protected by calling
    550 *						code
    551 *	again:
    552 *	  loop_sanity_check();
    553 *	retry:
    554 * [1]	  lock(task->pi_lock);			[R] acquire [P]
    555 * [2]	  waiter = task->pi_blocked_on;		[P]
    556 * [3]	  check_exit_conditions_1();		[P]
    557 * [4]	  lock = waiter->lock;			[P]
    558 * [5]	  if (!try_lock(lock->wait_lock)) {	[P] try to acquire [L]
    559 *	    unlock(task->pi_lock);		release [P]
    560 *	    goto retry;
    561 *	  }
    562 * [6]	  check_exit_conditions_2();		[P] + [L]
    563 * [7]	  requeue_lock_waiter(lock, waiter);	[P] + [L]
    564 * [8]	  unlock(task->pi_lock);		release [P]
    565 *	  put_task_struct(task);		release [R]
    566 * [9]	  check_exit_conditions_3();		[L]
    567 * [10]	  task = owner(lock);			[L]
    568 *	  get_task_struct(task);		[L] acquire [R]
    569 *	  lock(task->pi_lock);			[L] acquire [P]
    570 * [11]	  requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
    571 * [12]	  check_exit_conditions_4();		[P] + [L]
    572 * [13]	  unlock(task->pi_lock);		release [P]
    573 *	  unlock(lock->wait_lock);		release [L]
    574 *	  goto again;
    575 */
    576static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
    577					      enum rtmutex_chainwalk chwalk,
    578					      struct rt_mutex_base *orig_lock,
    579					      struct rt_mutex_base *next_lock,
    580					      struct rt_mutex_waiter *orig_waiter,
    581					      struct task_struct *top_task)
    582{
    583	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
    584	struct rt_mutex_waiter *prerequeue_top_waiter;
    585	int ret = 0, depth = 0;
    586	struct rt_mutex_base *lock;
    587	bool detect_deadlock;
    588	bool requeue = true;
    589
    590	detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
    591
    592	/*
    593	 * The (de)boosting is a step by step approach with a lot of
    594	 * pitfalls. We want this to be preemptible and we want hold a
    595	 * maximum of two locks per step. So we have to check
    596	 * carefully whether things change under us.
    597	 */
    598 again:
    599	/*
    600	 * We limit the lock chain length for each invocation.
    601	 */
    602	if (++depth > max_lock_depth) {
    603		static int prev_max;
    604
    605		/*
    606		 * Print this only once. If the admin changes the limit,
    607		 * print a new message when reaching the limit again.
    608		 */
    609		if (prev_max != max_lock_depth) {
    610			prev_max = max_lock_depth;
    611			printk(KERN_WARNING "Maximum lock depth %d reached "
    612			       "task: %s (%d)\n", max_lock_depth,
    613			       top_task->comm, task_pid_nr(top_task));
    614		}
    615		put_task_struct(task);
    616
    617		return -EDEADLK;
    618	}
    619
    620	/*
    621	 * We are fully preemptible here and only hold the refcount on
    622	 * @task. So everything can have changed under us since the
    623	 * caller or our own code below (goto retry/again) dropped all
    624	 * locks.
    625	 */
    626 retry:
    627	/*
    628	 * [1] Task cannot go away as we did a get_task() before !
    629	 */
    630	raw_spin_lock_irq(&task->pi_lock);
    631
    632	/*
    633	 * [2] Get the waiter on which @task is blocked on.
    634	 */
    635	waiter = task->pi_blocked_on;
    636
    637	/*
    638	 * [3] check_exit_conditions_1() protected by task->pi_lock.
    639	 */
    640
    641	/*
    642	 * Check whether the end of the boosting chain has been
    643	 * reached or the state of the chain has changed while we
    644	 * dropped the locks.
    645	 */
    646	if (!waiter)
    647		goto out_unlock_pi;
    648
    649	/*
    650	 * Check the orig_waiter state. After we dropped the locks,
    651	 * the previous owner of the lock might have released the lock.
    652	 */
    653	if (orig_waiter && !rt_mutex_owner(orig_lock))
    654		goto out_unlock_pi;
    655
    656	/*
    657	 * We dropped all locks after taking a refcount on @task, so
    658	 * the task might have moved on in the lock chain or even left
    659	 * the chain completely and blocks now on an unrelated lock or
    660	 * on @orig_lock.
    661	 *
    662	 * We stored the lock on which @task was blocked in @next_lock,
    663	 * so we can detect the chain change.
    664	 */
    665	if (next_lock != waiter->lock)
    666		goto out_unlock_pi;
    667
    668	/*
    669	 * There could be 'spurious' loops in the lock graph due to ww_mutex,
    670	 * consider:
    671	 *
    672	 *   P1: A, ww_A, ww_B
    673	 *   P2: ww_B, ww_A
    674	 *   P3: A
    675	 *
    676	 * P3 should not return -EDEADLK because it gets trapped in the cycle
    677	 * created by P1 and P2 (which will resolve -- and runs into
    678	 * max_lock_depth above). Therefore disable detect_deadlock such that
    679	 * the below termination condition can trigger once all relevant tasks
    680	 * are boosted.
    681	 *
    682	 * Even when we start with ww_mutex we can disable deadlock detection,
    683	 * since we would supress a ww_mutex induced deadlock at [6] anyway.
    684	 * Supressing it here however is not sufficient since we might still
    685	 * hit [6] due to adjustment driven iteration.
    686	 *
    687	 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
    688	 * utterly fail to report it; lockdep should.
    689	 */
    690	if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
    691		detect_deadlock = false;
    692
    693	/*
    694	 * Drop out, when the task has no waiters. Note,
    695	 * top_waiter can be NULL, when we are in the deboosting
    696	 * mode!
    697	 */
    698	if (top_waiter) {
    699		if (!task_has_pi_waiters(task))
    700			goto out_unlock_pi;
    701		/*
    702		 * If deadlock detection is off, we stop here if we
    703		 * are not the top pi waiter of the task. If deadlock
    704		 * detection is enabled we continue, but stop the
    705		 * requeueing in the chain walk.
    706		 */
    707		if (top_waiter != task_top_pi_waiter(task)) {
    708			if (!detect_deadlock)
    709				goto out_unlock_pi;
    710			else
    711				requeue = false;
    712		}
    713	}
    714
    715	/*
    716	 * If the waiter priority is the same as the task priority
    717	 * then there is no further priority adjustment necessary.  If
    718	 * deadlock detection is off, we stop the chain walk. If its
    719	 * enabled we continue, but stop the requeueing in the chain
    720	 * walk.
    721	 */
    722	if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
    723		if (!detect_deadlock)
    724			goto out_unlock_pi;
    725		else
    726			requeue = false;
    727	}
    728
    729	/*
    730	 * [4] Get the next lock
    731	 */
    732	lock = waiter->lock;
    733	/*
    734	 * [5] We need to trylock here as we are holding task->pi_lock,
    735	 * which is the reverse lock order versus the other rtmutex
    736	 * operations.
    737	 */
    738	if (!raw_spin_trylock(&lock->wait_lock)) {
    739		raw_spin_unlock_irq(&task->pi_lock);
    740		cpu_relax();
    741		goto retry;
    742	}
    743
    744	/*
    745	 * [6] check_exit_conditions_2() protected by task->pi_lock and
    746	 * lock->wait_lock.
    747	 *
    748	 * Deadlock detection. If the lock is the same as the original
    749	 * lock which caused us to walk the lock chain or if the
    750	 * current lock is owned by the task which initiated the chain
    751	 * walk, we detected a deadlock.
    752	 */
    753	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
    754		ret = -EDEADLK;
    755
    756		/*
    757		 * When the deadlock is due to ww_mutex; also see above. Don't
    758		 * report the deadlock and instead let the ww_mutex wound/die
    759		 * logic pick which of the contending threads gets -EDEADLK.
    760		 *
    761		 * NOTE: assumes the cycle only contains a single ww_class; any
    762		 * other configuration and we fail to report; also, see
    763		 * lockdep.
    764		 */
    765		if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
    766			ret = 0;
    767
    768		raw_spin_unlock(&lock->wait_lock);
    769		goto out_unlock_pi;
    770	}
    771
    772	/*
    773	 * If we just follow the lock chain for deadlock detection, no
    774	 * need to do all the requeue operations. To avoid a truckload
    775	 * of conditionals around the various places below, just do the
    776	 * minimum chain walk checks.
    777	 */
    778	if (!requeue) {
    779		/*
    780		 * No requeue[7] here. Just release @task [8]
    781		 */
    782		raw_spin_unlock(&task->pi_lock);
    783		put_task_struct(task);
    784
    785		/*
    786		 * [9] check_exit_conditions_3 protected by lock->wait_lock.
    787		 * If there is no owner of the lock, end of chain.
    788		 */
    789		if (!rt_mutex_owner(lock)) {
    790			raw_spin_unlock_irq(&lock->wait_lock);
    791			return 0;
    792		}
    793
    794		/* [10] Grab the next task, i.e. owner of @lock */
    795		task = get_task_struct(rt_mutex_owner(lock));
    796		raw_spin_lock(&task->pi_lock);
    797
    798		/*
    799		 * No requeue [11] here. We just do deadlock detection.
    800		 *
    801		 * [12] Store whether owner is blocked
    802		 * itself. Decision is made after dropping the locks
    803		 */
    804		next_lock = task_blocked_on_lock(task);
    805		/*
    806		 * Get the top waiter for the next iteration
    807		 */
    808		top_waiter = rt_mutex_top_waiter(lock);
    809
    810		/* [13] Drop locks */
    811		raw_spin_unlock(&task->pi_lock);
    812		raw_spin_unlock_irq(&lock->wait_lock);
    813
    814		/* If owner is not blocked, end of chain. */
    815		if (!next_lock)
    816			goto out_put_task;
    817		goto again;
    818	}
    819
    820	/*
    821	 * Store the current top waiter before doing the requeue
    822	 * operation on @lock. We need it for the boost/deboost
    823	 * decision below.
    824	 */
    825	prerequeue_top_waiter = rt_mutex_top_waiter(lock);
    826
    827	/* [7] Requeue the waiter in the lock waiter tree. */
    828	rt_mutex_dequeue(lock, waiter);
    829
    830	/*
    831	 * Update the waiter prio fields now that we're dequeued.
    832	 *
    833	 * These values can have changed through either:
    834	 *
    835	 *   sys_sched_set_scheduler() / sys_sched_setattr()
    836	 *
    837	 * or
    838	 *
    839	 *   DL CBS enforcement advancing the effective deadline.
    840	 *
    841	 * Even though pi_waiters also uses these fields, and that tree is only
    842	 * updated in [11], we can do this here, since we hold [L], which
    843	 * serializes all pi_waiters access and rb_erase() does not care about
    844	 * the values of the node being removed.
    845	 */
    846	waiter_update_prio(waiter, task);
    847
    848	rt_mutex_enqueue(lock, waiter);
    849
    850	/* [8] Release the task */
    851	raw_spin_unlock(&task->pi_lock);
    852	put_task_struct(task);
    853
    854	/*
    855	 * [9] check_exit_conditions_3 protected by lock->wait_lock.
    856	 *
    857	 * We must abort the chain walk if there is no lock owner even
    858	 * in the dead lock detection case, as we have nothing to
    859	 * follow here. This is the end of the chain we are walking.
    860	 */
    861	if (!rt_mutex_owner(lock)) {
    862		/*
    863		 * If the requeue [7] above changed the top waiter,
    864		 * then we need to wake the new top waiter up to try
    865		 * to get the lock.
    866		 */
    867		if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
    868			wake_up_state(waiter->task, waiter->wake_state);
    869		raw_spin_unlock_irq(&lock->wait_lock);
    870		return 0;
    871	}
    872
    873	/* [10] Grab the next task, i.e. the owner of @lock */
    874	task = get_task_struct(rt_mutex_owner(lock));
    875	raw_spin_lock(&task->pi_lock);
    876
    877	/* [11] requeue the pi waiters if necessary */
    878	if (waiter == rt_mutex_top_waiter(lock)) {
    879		/*
    880		 * The waiter became the new top (highest priority)
    881		 * waiter on the lock. Replace the previous top waiter
    882		 * in the owner tasks pi waiters tree with this waiter
    883		 * and adjust the priority of the owner.
    884		 */
    885		rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
    886		rt_mutex_enqueue_pi(task, waiter);
    887		rt_mutex_adjust_prio(task);
    888
    889	} else if (prerequeue_top_waiter == waiter) {
    890		/*
    891		 * The waiter was the top waiter on the lock, but is
    892		 * no longer the top priority waiter. Replace waiter in
    893		 * the owner tasks pi waiters tree with the new top
    894		 * (highest priority) waiter and adjust the priority
    895		 * of the owner.
    896		 * The new top waiter is stored in @waiter so that
    897		 * @waiter == @top_waiter evaluates to true below and
    898		 * we continue to deboost the rest of the chain.
    899		 */
    900		rt_mutex_dequeue_pi(task, waiter);
    901		waiter = rt_mutex_top_waiter(lock);
    902		rt_mutex_enqueue_pi(task, waiter);
    903		rt_mutex_adjust_prio(task);
    904	} else {
    905		/*
    906		 * Nothing changed. No need to do any priority
    907		 * adjustment.
    908		 */
    909	}
    910
    911	/*
    912	 * [12] check_exit_conditions_4() protected by task->pi_lock
    913	 * and lock->wait_lock. The actual decisions are made after we
    914	 * dropped the locks.
    915	 *
    916	 * Check whether the task which owns the current lock is pi
    917	 * blocked itself. If yes we store a pointer to the lock for
    918	 * the lock chain change detection above. After we dropped
    919	 * task->pi_lock next_lock cannot be dereferenced anymore.
    920	 */
    921	next_lock = task_blocked_on_lock(task);
    922	/*
    923	 * Store the top waiter of @lock for the end of chain walk
    924	 * decision below.
    925	 */
    926	top_waiter = rt_mutex_top_waiter(lock);
    927
    928	/* [13] Drop the locks */
    929	raw_spin_unlock(&task->pi_lock);
    930	raw_spin_unlock_irq(&lock->wait_lock);
    931
    932	/*
    933	 * Make the actual exit decisions [12], based on the stored
    934	 * values.
    935	 *
    936	 * We reached the end of the lock chain. Stop right here. No
    937	 * point to go back just to figure that out.
    938	 */
    939	if (!next_lock)
    940		goto out_put_task;
    941
    942	/*
    943	 * If the current waiter is not the top waiter on the lock,
    944	 * then we can stop the chain walk here if we are not in full
    945	 * deadlock detection mode.
    946	 */
    947	if (!detect_deadlock && waiter != top_waiter)
    948		goto out_put_task;
    949
    950	goto again;
    951
    952 out_unlock_pi:
    953	raw_spin_unlock_irq(&task->pi_lock);
    954 out_put_task:
    955	put_task_struct(task);
    956
    957	return ret;
    958}
    959
    960/*
    961 * Try to take an rt-mutex
    962 *
    963 * Must be called with lock->wait_lock held and interrupts disabled
    964 *
    965 * @lock:   The lock to be acquired.
    966 * @task:   The task which wants to acquire the lock
    967 * @waiter: The waiter that is queued to the lock's wait tree if the
    968 *	    callsite called task_blocked_on_lock(), otherwise NULL
    969 */
    970static int __sched
    971try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
    972		     struct rt_mutex_waiter *waiter)
    973{
    974	lockdep_assert_held(&lock->wait_lock);
    975
    976	/*
    977	 * Before testing whether we can acquire @lock, we set the
    978	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
    979	 * other tasks which try to modify @lock into the slow path
    980	 * and they serialize on @lock->wait_lock.
    981	 *
    982	 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
    983	 * as explained at the top of this file if and only if:
    984	 *
    985	 * - There is a lock owner. The caller must fixup the
    986	 *   transient state if it does a trylock or leaves the lock
    987	 *   function due to a signal or timeout.
    988	 *
    989	 * - @task acquires the lock and there are no other
    990	 *   waiters. This is undone in rt_mutex_set_owner(@task) at
    991	 *   the end of this function.
    992	 */
    993	mark_rt_mutex_waiters(lock);
    994
    995	/*
    996	 * If @lock has an owner, give up.
    997	 */
    998	if (rt_mutex_owner(lock))
    999		return 0;
   1000
   1001	/*
   1002	 * If @waiter != NULL, @task has already enqueued the waiter
   1003	 * into @lock waiter tree. If @waiter == NULL then this is a
   1004	 * trylock attempt.
   1005	 */
   1006	if (waiter) {
   1007		struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
   1008
   1009		/*
   1010		 * If waiter is the highest priority waiter of @lock,
   1011		 * or allowed to steal it, take it over.
   1012		 */
   1013		if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
   1014			/*
   1015			 * We can acquire the lock. Remove the waiter from the
   1016			 * lock waiters tree.
   1017			 */
   1018			rt_mutex_dequeue(lock, waiter);
   1019		} else {
   1020			return 0;
   1021		}
   1022	} else {
   1023		/*
   1024		 * If the lock has waiters already we check whether @task is
   1025		 * eligible to take over the lock.
   1026		 *
   1027		 * If there are no other waiters, @task can acquire
   1028		 * the lock.  @task->pi_blocked_on is NULL, so it does
   1029		 * not need to be dequeued.
   1030		 */
   1031		if (rt_mutex_has_waiters(lock)) {
   1032			/* Check whether the trylock can steal it. */
   1033			if (!rt_mutex_steal(task_to_waiter(task),
   1034					    rt_mutex_top_waiter(lock)))
   1035				return 0;
   1036
   1037			/*
   1038			 * The current top waiter stays enqueued. We
   1039			 * don't have to change anything in the lock
   1040			 * waiters order.
   1041			 */
   1042		} else {
   1043			/*
   1044			 * No waiters. Take the lock without the
   1045			 * pi_lock dance.@task->pi_blocked_on is NULL
   1046			 * and we have no waiters to enqueue in @task
   1047			 * pi waiters tree.
   1048			 */
   1049			goto takeit;
   1050		}
   1051	}
   1052
   1053	/*
   1054	 * Clear @task->pi_blocked_on. Requires protection by
   1055	 * @task->pi_lock. Redundant operation for the @waiter == NULL
   1056	 * case, but conditionals are more expensive than a redundant
   1057	 * store.
   1058	 */
   1059	raw_spin_lock(&task->pi_lock);
   1060	task->pi_blocked_on = NULL;
   1061	/*
   1062	 * Finish the lock acquisition. @task is the new owner. If
   1063	 * other waiters exist we have to insert the highest priority
   1064	 * waiter into @task->pi_waiters tree.
   1065	 */
   1066	if (rt_mutex_has_waiters(lock))
   1067		rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
   1068	raw_spin_unlock(&task->pi_lock);
   1069
   1070takeit:
   1071	/*
   1072	 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
   1073	 * are still waiters or clears it.
   1074	 */
   1075	rt_mutex_set_owner(lock, task);
   1076
   1077	return 1;
   1078}
   1079
   1080/*
   1081 * Task blocks on lock.
   1082 *
   1083 * Prepare waiter and propagate pi chain
   1084 *
   1085 * This must be called with lock->wait_lock held and interrupts disabled
   1086 */
   1087static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
   1088					   struct rt_mutex_waiter *waiter,
   1089					   struct task_struct *task,
   1090					   struct ww_acquire_ctx *ww_ctx,
   1091					   enum rtmutex_chainwalk chwalk)
   1092{
   1093	struct task_struct *owner = rt_mutex_owner(lock);
   1094	struct rt_mutex_waiter *top_waiter = waiter;
   1095	struct rt_mutex_base *next_lock;
   1096	int chain_walk = 0, res;
   1097
   1098	lockdep_assert_held(&lock->wait_lock);
   1099
   1100	/*
   1101	 * Early deadlock detection. We really don't want the task to
   1102	 * enqueue on itself just to untangle the mess later. It's not
   1103	 * only an optimization. We drop the locks, so another waiter
   1104	 * can come in before the chain walk detects the deadlock. So
   1105	 * the other will detect the deadlock and return -EDEADLOCK,
   1106	 * which is wrong, as the other waiter is not in a deadlock
   1107	 * situation.
   1108	 *
   1109	 * Except for ww_mutex, in that case the chain walk must already deal
   1110	 * with spurious cycles, see the comments at [3] and [6].
   1111	 */
   1112	if (owner == task && !(build_ww_mutex() && ww_ctx))
   1113		return -EDEADLK;
   1114
   1115	raw_spin_lock(&task->pi_lock);
   1116	waiter->task = task;
   1117	waiter->lock = lock;
   1118	waiter_update_prio(waiter, task);
   1119
   1120	/* Get the top priority waiter on the lock */
   1121	if (rt_mutex_has_waiters(lock))
   1122		top_waiter = rt_mutex_top_waiter(lock);
   1123	rt_mutex_enqueue(lock, waiter);
   1124
   1125	task->pi_blocked_on = waiter;
   1126
   1127	raw_spin_unlock(&task->pi_lock);
   1128
   1129	if (build_ww_mutex() && ww_ctx) {
   1130		struct rt_mutex *rtm;
   1131
   1132		/* Check whether the waiter should back out immediately */
   1133		rtm = container_of(lock, struct rt_mutex, rtmutex);
   1134		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
   1135		if (res) {
   1136			raw_spin_lock(&task->pi_lock);
   1137			rt_mutex_dequeue(lock, waiter);
   1138			task->pi_blocked_on = NULL;
   1139			raw_spin_unlock(&task->pi_lock);
   1140			return res;
   1141		}
   1142	}
   1143
   1144	if (!owner)
   1145		return 0;
   1146
   1147	raw_spin_lock(&owner->pi_lock);
   1148	if (waiter == rt_mutex_top_waiter(lock)) {
   1149		rt_mutex_dequeue_pi(owner, top_waiter);
   1150		rt_mutex_enqueue_pi(owner, waiter);
   1151
   1152		rt_mutex_adjust_prio(owner);
   1153		if (owner->pi_blocked_on)
   1154			chain_walk = 1;
   1155	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
   1156		chain_walk = 1;
   1157	}
   1158
   1159	/* Store the lock on which owner is blocked or NULL */
   1160	next_lock = task_blocked_on_lock(owner);
   1161
   1162	raw_spin_unlock(&owner->pi_lock);
   1163	/*
   1164	 * Even if full deadlock detection is on, if the owner is not
   1165	 * blocked itself, we can avoid finding this out in the chain
   1166	 * walk.
   1167	 */
   1168	if (!chain_walk || !next_lock)
   1169		return 0;
   1170
   1171	/*
   1172	 * The owner can't disappear while holding a lock,
   1173	 * so the owner struct is protected by wait_lock.
   1174	 * Gets dropped in rt_mutex_adjust_prio_chain()!
   1175	 */
   1176	get_task_struct(owner);
   1177
   1178	raw_spin_unlock_irq(&lock->wait_lock);
   1179
   1180	res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
   1181					 next_lock, waiter, task);
   1182
   1183	raw_spin_lock_irq(&lock->wait_lock);
   1184
   1185	return res;
   1186}
   1187
   1188/*
   1189 * Remove the top waiter from the current tasks pi waiter tree and
   1190 * queue it up.
   1191 *
   1192 * Called with lock->wait_lock held and interrupts disabled.
   1193 */
   1194static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
   1195					    struct rt_mutex_base *lock)
   1196{
   1197	struct rt_mutex_waiter *waiter;
   1198
   1199	raw_spin_lock(&current->pi_lock);
   1200
   1201	waiter = rt_mutex_top_waiter(lock);
   1202
   1203	/*
   1204	 * Remove it from current->pi_waiters and deboost.
   1205	 *
   1206	 * We must in fact deboost here in order to ensure we call
   1207	 * rt_mutex_setprio() to update p->pi_top_task before the
   1208	 * task unblocks.
   1209	 */
   1210	rt_mutex_dequeue_pi(current, waiter);
   1211	rt_mutex_adjust_prio(current);
   1212
   1213	/*
   1214	 * As we are waking up the top waiter, and the waiter stays
   1215	 * queued on the lock until it gets the lock, this lock
   1216	 * obviously has waiters. Just set the bit here and this has
   1217	 * the added benefit of forcing all new tasks into the
   1218	 * slow path making sure no task of lower priority than
   1219	 * the top waiter can steal this lock.
   1220	 */
   1221	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
   1222
   1223	/*
   1224	 * We deboosted before waking the top waiter task such that we don't
   1225	 * run two tasks with the 'same' priority (and ensure the
   1226	 * p->pi_top_task pointer points to a blocked task). This however can
   1227	 * lead to priority inversion if we would get preempted after the
   1228	 * deboost but before waking our donor task, hence the preempt_disable()
   1229	 * before unlock.
   1230	 *
   1231	 * Pairs with preempt_enable() in rt_mutex_wake_up_q();
   1232	 */
   1233	preempt_disable();
   1234	rt_mutex_wake_q_add(wqh, waiter);
   1235	raw_spin_unlock(&current->pi_lock);
   1236}
   1237
   1238static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
   1239{
   1240	int ret = try_to_take_rt_mutex(lock, current, NULL);
   1241
   1242	/*
   1243	 * try_to_take_rt_mutex() sets the lock waiters bit
   1244	 * unconditionally. Clean this up.
   1245	 */
   1246	fixup_rt_mutex_waiters(lock);
   1247
   1248	return ret;
   1249}
   1250
   1251/*
   1252 * Slow path try-lock function:
   1253 */
   1254static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
   1255{
   1256	unsigned long flags;
   1257	int ret;
   1258
   1259	/*
   1260	 * If the lock already has an owner we fail to get the lock.
   1261	 * This can be done without taking the @lock->wait_lock as
   1262	 * it is only being read, and this is a trylock anyway.
   1263	 */
   1264	if (rt_mutex_owner(lock))
   1265		return 0;
   1266
   1267	/*
   1268	 * The mutex has currently no owner. Lock the wait lock and try to
   1269	 * acquire the lock. We use irqsave here to support early boot calls.
   1270	 */
   1271	raw_spin_lock_irqsave(&lock->wait_lock, flags);
   1272
   1273	ret = __rt_mutex_slowtrylock(lock);
   1274
   1275	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
   1276
   1277	return ret;
   1278}
   1279
   1280static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
   1281{
   1282	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
   1283		return 1;
   1284
   1285	return rt_mutex_slowtrylock(lock);
   1286}
   1287
   1288/*
   1289 * Slow path to release a rt-mutex.
   1290 */
   1291static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
   1292{
   1293	DEFINE_RT_WAKE_Q(wqh);
   1294	unsigned long flags;
   1295
   1296	/* irqsave required to support early boot calls */
   1297	raw_spin_lock_irqsave(&lock->wait_lock, flags);
   1298
   1299	debug_rt_mutex_unlock(lock);
   1300
   1301	/*
   1302	 * We must be careful here if the fast path is enabled. If we
   1303	 * have no waiters queued we cannot set owner to NULL here
   1304	 * because of:
   1305	 *
   1306	 * foo->lock->owner = NULL;
   1307	 *			rtmutex_lock(foo->lock);   <- fast path
   1308	 *			free = atomic_dec_and_test(foo->refcnt);
   1309	 *			rtmutex_unlock(foo->lock); <- fast path
   1310	 *			if (free)
   1311	 *				kfree(foo);
   1312	 * raw_spin_unlock(foo->lock->wait_lock);
   1313	 *
   1314	 * So for the fastpath enabled kernel:
   1315	 *
   1316	 * Nothing can set the waiters bit as long as we hold
   1317	 * lock->wait_lock. So we do the following sequence:
   1318	 *
   1319	 *	owner = rt_mutex_owner(lock);
   1320	 *	clear_rt_mutex_waiters(lock);
   1321	 *	raw_spin_unlock(&lock->wait_lock);
   1322	 *	if (cmpxchg(&lock->owner, owner, 0) == owner)
   1323	 *		return;
   1324	 *	goto retry;
   1325	 *
   1326	 * The fastpath disabled variant is simple as all access to
   1327	 * lock->owner is serialized by lock->wait_lock:
   1328	 *
   1329	 *	lock->owner = NULL;
   1330	 *	raw_spin_unlock(&lock->wait_lock);
   1331	 */
   1332	while (!rt_mutex_has_waiters(lock)) {
   1333		/* Drops lock->wait_lock ! */
   1334		if (unlock_rt_mutex_safe(lock, flags) == true)
   1335			return;
   1336		/* Relock the rtmutex and try again */
   1337		raw_spin_lock_irqsave(&lock->wait_lock, flags);
   1338	}
   1339
   1340	/*
   1341	 * The wakeup next waiter path does not suffer from the above
   1342	 * race. See the comments there.
   1343	 *
   1344	 * Queue the next waiter for wakeup once we release the wait_lock.
   1345	 */
   1346	mark_wakeup_next_waiter(&wqh, lock);
   1347	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
   1348
   1349	rt_mutex_wake_up_q(&wqh);
   1350}
   1351
   1352static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
   1353{
   1354	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
   1355		return;
   1356
   1357	rt_mutex_slowunlock(lock);
   1358}
   1359
   1360#ifdef CONFIG_SMP
   1361static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
   1362				  struct rt_mutex_waiter *waiter,
   1363				  struct task_struct *owner)
   1364{
   1365	bool res = true;
   1366
   1367	rcu_read_lock();
   1368	for (;;) {
   1369		/* If owner changed, trylock again. */
   1370		if (owner != rt_mutex_owner(lock))
   1371			break;
   1372		/*
   1373		 * Ensure that @owner is dereferenced after checking that
   1374		 * the lock owner still matches @owner. If that fails,
   1375		 * @owner might point to freed memory. If it still matches,
   1376		 * the rcu_read_lock() ensures the memory stays valid.
   1377		 */
   1378		barrier();
   1379		/*
   1380		 * Stop spinning when:
   1381		 *  - the lock owner has been scheduled out
   1382		 *  - current is not longer the top waiter
   1383		 *  - current is requested to reschedule (redundant
   1384		 *    for CONFIG_PREEMPT_RCU=y)
   1385		 *  - the VCPU on which owner runs is preempted
   1386		 */
   1387		if (!owner_on_cpu(owner) || need_resched() ||
   1388		    !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
   1389			res = false;
   1390			break;
   1391		}
   1392		cpu_relax();
   1393	}
   1394	rcu_read_unlock();
   1395	return res;
   1396}
   1397#else
   1398static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
   1399				  struct rt_mutex_waiter *waiter,
   1400				  struct task_struct *owner)
   1401{
   1402	return false;
   1403}
   1404#endif
   1405
   1406#ifdef RT_MUTEX_BUILD_MUTEX
   1407/*
   1408 * Functions required for:
   1409 *	- rtmutex, futex on all kernels
   1410 *	- mutex and rwsem substitutions on RT kernels
   1411 */
   1412
   1413/*
   1414 * Remove a waiter from a lock and give up
   1415 *
   1416 * Must be called with lock->wait_lock held and interrupts disabled. It must
   1417 * have just failed to try_to_take_rt_mutex().
   1418 */
   1419static void __sched remove_waiter(struct rt_mutex_base *lock,
   1420				  struct rt_mutex_waiter *waiter)
   1421{
   1422	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
   1423	struct task_struct *owner = rt_mutex_owner(lock);
   1424	struct rt_mutex_base *next_lock;
   1425
   1426	lockdep_assert_held(&lock->wait_lock);
   1427
   1428	raw_spin_lock(&current->pi_lock);
   1429	rt_mutex_dequeue(lock, waiter);
   1430	current->pi_blocked_on = NULL;
   1431	raw_spin_unlock(&current->pi_lock);
   1432
   1433	/*
   1434	 * Only update priority if the waiter was the highest priority
   1435	 * waiter of the lock and there is an owner to update.
   1436	 */
   1437	if (!owner || !is_top_waiter)
   1438		return;
   1439
   1440	raw_spin_lock(&owner->pi_lock);
   1441
   1442	rt_mutex_dequeue_pi(owner, waiter);
   1443
   1444	if (rt_mutex_has_waiters(lock))
   1445		rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
   1446
   1447	rt_mutex_adjust_prio(owner);
   1448
   1449	/* Store the lock on which owner is blocked or NULL */
   1450	next_lock = task_blocked_on_lock(owner);
   1451
   1452	raw_spin_unlock(&owner->pi_lock);
   1453
   1454	/*
   1455	 * Don't walk the chain, if the owner task is not blocked
   1456	 * itself.
   1457	 */
   1458	if (!next_lock)
   1459		return;
   1460
   1461	/* gets dropped in rt_mutex_adjust_prio_chain()! */
   1462	get_task_struct(owner);
   1463
   1464	raw_spin_unlock_irq(&lock->wait_lock);
   1465
   1466	rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
   1467				   next_lock, NULL, current);
   1468
   1469	raw_spin_lock_irq(&lock->wait_lock);
   1470}
   1471
   1472/**
   1473 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
   1474 * @lock:		 the rt_mutex to take
   1475 * @ww_ctx:		 WW mutex context pointer
   1476 * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
   1477 *			 or TASK_UNINTERRUPTIBLE)
   1478 * @timeout:		 the pre-initialized and started timer, or NULL for none
   1479 * @waiter:		 the pre-initialized rt_mutex_waiter
   1480 *
   1481 * Must be called with lock->wait_lock held and interrupts disabled
   1482 */
   1483static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
   1484					   struct ww_acquire_ctx *ww_ctx,
   1485					   unsigned int state,
   1486					   struct hrtimer_sleeper *timeout,
   1487					   struct rt_mutex_waiter *waiter)
   1488{
   1489	struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
   1490	struct task_struct *owner;
   1491	int ret = 0;
   1492
   1493	for (;;) {
   1494		/* Try to acquire the lock: */
   1495		if (try_to_take_rt_mutex(lock, current, waiter))
   1496			break;
   1497
   1498		if (timeout && !timeout->task) {
   1499			ret = -ETIMEDOUT;
   1500			break;
   1501		}
   1502		if (signal_pending_state(state, current)) {
   1503			ret = -EINTR;
   1504			break;
   1505		}
   1506
   1507		if (build_ww_mutex() && ww_ctx) {
   1508			ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
   1509			if (ret)
   1510				break;
   1511		}
   1512
   1513		if (waiter == rt_mutex_top_waiter(lock))
   1514			owner = rt_mutex_owner(lock);
   1515		else
   1516			owner = NULL;
   1517		raw_spin_unlock_irq(&lock->wait_lock);
   1518
   1519		if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
   1520			schedule();
   1521
   1522		raw_spin_lock_irq(&lock->wait_lock);
   1523		set_current_state(state);
   1524	}
   1525
   1526	__set_current_state(TASK_RUNNING);
   1527	return ret;
   1528}
   1529
   1530static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
   1531					     struct rt_mutex_waiter *w)
   1532{
   1533	/*
   1534	 * If the result is not -EDEADLOCK or the caller requested
   1535	 * deadlock detection, nothing to do here.
   1536	 */
   1537	if (res != -EDEADLOCK || detect_deadlock)
   1538		return;
   1539
   1540	if (build_ww_mutex() && w->ww_ctx)
   1541		return;
   1542
   1543	/*
   1544	 * Yell loudly and stop the task right here.
   1545	 */
   1546	WARN(1, "rtmutex deadlock detected\n");
   1547	while (1) {
   1548		set_current_state(TASK_INTERRUPTIBLE);
   1549		schedule();
   1550	}
   1551}
   1552
   1553/**
   1554 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
   1555 * @lock:	The rtmutex to block lock
   1556 * @ww_ctx:	WW mutex context pointer
   1557 * @state:	The task state for sleeping
   1558 * @chwalk:	Indicator whether full or partial chainwalk is requested
   1559 * @waiter:	Initializer waiter for blocking
   1560 */
   1561static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
   1562				       struct ww_acquire_ctx *ww_ctx,
   1563				       unsigned int state,
   1564				       enum rtmutex_chainwalk chwalk,
   1565				       struct rt_mutex_waiter *waiter)
   1566{
   1567	struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
   1568	struct ww_mutex *ww = ww_container_of(rtm);
   1569	int ret;
   1570
   1571	lockdep_assert_held(&lock->wait_lock);
   1572
   1573	/* Try to acquire the lock again: */
   1574	if (try_to_take_rt_mutex(lock, current, NULL)) {
   1575		if (build_ww_mutex() && ww_ctx) {
   1576			__ww_mutex_check_waiters(rtm, ww_ctx);
   1577			ww_mutex_lock_acquired(ww, ww_ctx);
   1578		}
   1579		return 0;
   1580	}
   1581
   1582	set_current_state(state);
   1583
   1584	trace_contention_begin(lock, LCB_F_RT);
   1585
   1586	ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
   1587	if (likely(!ret))
   1588		ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
   1589
   1590	if (likely(!ret)) {
   1591		/* acquired the lock */
   1592		if (build_ww_mutex() && ww_ctx) {
   1593			if (!ww_ctx->is_wait_die)
   1594				__ww_mutex_check_waiters(rtm, ww_ctx);
   1595			ww_mutex_lock_acquired(ww, ww_ctx);
   1596		}
   1597	} else {
   1598		__set_current_state(TASK_RUNNING);
   1599		remove_waiter(lock, waiter);
   1600		rt_mutex_handle_deadlock(ret, chwalk, waiter);
   1601	}
   1602
   1603	/*
   1604	 * try_to_take_rt_mutex() sets the waiter bit
   1605	 * unconditionally. We might have to fix that up.
   1606	 */
   1607	fixup_rt_mutex_waiters(lock);
   1608
   1609	trace_contention_end(lock, ret);
   1610
   1611	return ret;
   1612}
   1613
   1614static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
   1615					     struct ww_acquire_ctx *ww_ctx,
   1616					     unsigned int state)
   1617{
   1618	struct rt_mutex_waiter waiter;
   1619	int ret;
   1620
   1621	rt_mutex_init_waiter(&waiter);
   1622	waiter.ww_ctx = ww_ctx;
   1623
   1624	ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
   1625				  &waiter);
   1626
   1627	debug_rt_mutex_free_waiter(&waiter);
   1628	return ret;
   1629}
   1630
   1631/*
   1632 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
   1633 * @lock:	The rtmutex to block lock
   1634 * @ww_ctx:	WW mutex context pointer
   1635 * @state:	The task state for sleeping
   1636 */
   1637static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
   1638				     struct ww_acquire_ctx *ww_ctx,
   1639				     unsigned int state)
   1640{
   1641	unsigned long flags;
   1642	int ret;
   1643
   1644	/*
   1645	 * Technically we could use raw_spin_[un]lock_irq() here, but this can
   1646	 * be called in early boot if the cmpxchg() fast path is disabled
   1647	 * (debug, no architecture support). In this case we will acquire the
   1648	 * rtmutex with lock->wait_lock held. But we cannot unconditionally
   1649	 * enable interrupts in that early boot case. So we need to use the
   1650	 * irqsave/restore variants.
   1651	 */
   1652	raw_spin_lock_irqsave(&lock->wait_lock, flags);
   1653	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
   1654	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
   1655
   1656	return ret;
   1657}
   1658
   1659static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
   1660					   unsigned int state)
   1661{
   1662	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
   1663		return 0;
   1664
   1665	return rt_mutex_slowlock(lock, NULL, state);
   1666}
   1667#endif /* RT_MUTEX_BUILD_MUTEX */
   1668
   1669#ifdef RT_MUTEX_BUILD_SPINLOCKS
   1670/*
   1671 * Functions required for spin/rw_lock substitution on RT kernels
   1672 */
   1673
   1674/**
   1675 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
   1676 * @lock:	The underlying RT mutex
   1677 */
   1678static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
   1679{
   1680	struct rt_mutex_waiter waiter;
   1681	struct task_struct *owner;
   1682
   1683	lockdep_assert_held(&lock->wait_lock);
   1684
   1685	if (try_to_take_rt_mutex(lock, current, NULL))
   1686		return;
   1687
   1688	rt_mutex_init_rtlock_waiter(&waiter);
   1689
   1690	/* Save current state and set state to TASK_RTLOCK_WAIT */
   1691	current_save_and_set_rtlock_wait_state();
   1692
   1693	trace_contention_begin(lock, LCB_F_RT);
   1694
   1695	task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
   1696
   1697	for (;;) {
   1698		/* Try to acquire the lock again */
   1699		if (try_to_take_rt_mutex(lock, current, &waiter))
   1700			break;
   1701
   1702		if (&waiter == rt_mutex_top_waiter(lock))
   1703			owner = rt_mutex_owner(lock);
   1704		else
   1705			owner = NULL;
   1706		raw_spin_unlock_irq(&lock->wait_lock);
   1707
   1708		if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
   1709			schedule_rtlock();
   1710
   1711		raw_spin_lock_irq(&lock->wait_lock);
   1712		set_current_state(TASK_RTLOCK_WAIT);
   1713	}
   1714
   1715	/* Restore the task state */
   1716	current_restore_rtlock_saved_state();
   1717
   1718	/*
   1719	 * try_to_take_rt_mutex() sets the waiter bit unconditionally.
   1720	 * We might have to fix that up:
   1721	 */
   1722	fixup_rt_mutex_waiters(lock);
   1723	debug_rt_mutex_free_waiter(&waiter);
   1724
   1725	trace_contention_end(lock, 0);
   1726}
   1727
   1728static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
   1729{
   1730	unsigned long flags;
   1731
   1732	raw_spin_lock_irqsave(&lock->wait_lock, flags);
   1733	rtlock_slowlock_locked(lock);
   1734	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
   1735}
   1736
   1737#endif /* RT_MUTEX_BUILD_SPINLOCKS */