cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

percpu-rwsem.c (7502B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2#include <linux/atomic.h>
      3#include <linux/percpu.h>
      4#include <linux/wait.h>
      5#include <linux/lockdep.h>
      6#include <linux/percpu-rwsem.h>
      7#include <linux/rcupdate.h>
      8#include <linux/sched.h>
      9#include <linux/sched/task.h>
     10#include <linux/sched/debug.h>
     11#include <linux/errno.h>
     12#include <trace/events/lock.h>
     13
     14int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
     15			const char *name, struct lock_class_key *key)
     16{
     17	sem->read_count = alloc_percpu(int);
     18	if (unlikely(!sem->read_count))
     19		return -ENOMEM;
     20
     21	rcu_sync_init(&sem->rss);
     22	rcuwait_init(&sem->writer);
     23	init_waitqueue_head(&sem->waiters);
     24	atomic_set(&sem->block, 0);
     25#ifdef CONFIG_DEBUG_LOCK_ALLOC
     26	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
     27	lockdep_init_map(&sem->dep_map, name, key, 0);
     28#endif
     29	return 0;
     30}
     31EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
     32
     33void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
     34{
     35	/*
     36	 * XXX: temporary kludge. The error path in alloc_super()
     37	 * assumes that percpu_free_rwsem() is safe after kzalloc().
     38	 */
     39	if (!sem->read_count)
     40		return;
     41
     42	rcu_sync_dtor(&sem->rss);
     43	free_percpu(sem->read_count);
     44	sem->read_count = NULL; /* catch use after free bugs */
     45}
     46EXPORT_SYMBOL_GPL(percpu_free_rwsem);
     47
     48static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
     49{
     50	this_cpu_inc(*sem->read_count);
     51
     52	/*
     53	 * Due to having preemption disabled the decrement happens on
     54	 * the same CPU as the increment, avoiding the
     55	 * increment-on-one-CPU-and-decrement-on-another problem.
     56	 *
     57	 * If the reader misses the writer's assignment of sem->block, then the
     58	 * writer is guaranteed to see the reader's increment.
     59	 *
     60	 * Conversely, any readers that increment their sem->read_count after
     61	 * the writer looks are guaranteed to see the sem->block value, which
     62	 * in turn means that they are guaranteed to immediately decrement
     63	 * their sem->read_count, so that it doesn't matter that the writer
     64	 * missed them.
     65	 */
     66
     67	smp_mb(); /* A matches D */
     68
     69	/*
     70	 * If !sem->block the critical section starts here, matched by the
     71	 * release in percpu_up_write().
     72	 */
     73	if (likely(!atomic_read_acquire(&sem->block)))
     74		return true;
     75
     76	this_cpu_dec(*sem->read_count);
     77
     78	/* Prod writer to re-evaluate readers_active_check() */
     79	rcuwait_wake_up(&sem->writer);
     80
     81	return false;
     82}
     83
     84static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
     85{
     86	if (atomic_read(&sem->block))
     87		return false;
     88
     89	return atomic_xchg(&sem->block, 1) == 0;
     90}
     91
     92static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
     93{
     94	if (reader) {
     95		bool ret;
     96
     97		preempt_disable();
     98		ret = __percpu_down_read_trylock(sem);
     99		preempt_enable();
    100
    101		return ret;
    102	}
    103	return __percpu_down_write_trylock(sem);
    104}
    105
    106/*
    107 * The return value of wait_queue_entry::func means:
    108 *
    109 *  <0 - error, wakeup is terminated and the error is returned
    110 *   0 - no wakeup, a next waiter is tried
    111 *  >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
    112 *
    113 * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
    114 * and play games with the return value to allow waking multiple readers.
    115 *
    116 * Specifically, we wake readers until we've woken a single writer, or until a
    117 * trylock fails.
    118 */
    119static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
    120				      unsigned int mode, int wake_flags,
    121				      void *key)
    122{
    123	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
    124	struct percpu_rw_semaphore *sem = key;
    125	struct task_struct *p;
    126
    127	/* concurrent against percpu_down_write(), can get stolen */
    128	if (!__percpu_rwsem_trylock(sem, reader))
    129		return 1;
    130
    131	p = get_task_struct(wq_entry->private);
    132	list_del_init(&wq_entry->entry);
    133	smp_store_release(&wq_entry->private, NULL);
    134
    135	wake_up_process(p);
    136	put_task_struct(p);
    137
    138	return !reader; /* wake (readers until) 1 writer */
    139}
    140
    141static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
    142{
    143	DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
    144	bool wait;
    145
    146	spin_lock_irq(&sem->waiters.lock);
    147	/*
    148	 * Serialize against the wakeup in percpu_up_write(), if we fail
    149	 * the trylock, the wakeup must see us on the list.
    150	 */
    151	wait = !__percpu_rwsem_trylock(sem, reader);
    152	if (wait) {
    153		wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
    154		__add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
    155	}
    156	spin_unlock_irq(&sem->waiters.lock);
    157
    158	while (wait) {
    159		set_current_state(TASK_UNINTERRUPTIBLE);
    160		if (!smp_load_acquire(&wq_entry.private))
    161			break;
    162		schedule();
    163	}
    164	__set_current_state(TASK_RUNNING);
    165}
    166
    167bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
    168{
    169	if (__percpu_down_read_trylock(sem))
    170		return true;
    171
    172	if (try)
    173		return false;
    174
    175	trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
    176	preempt_enable();
    177	percpu_rwsem_wait(sem, /* .reader = */ true);
    178	preempt_disable();
    179	trace_contention_end(sem, 0);
    180
    181	return true;
    182}
    183EXPORT_SYMBOL_GPL(__percpu_down_read);
    184
    185#define per_cpu_sum(var)						\
    186({									\
    187	typeof(var) __sum = 0;						\
    188	int cpu;							\
    189	compiletime_assert_atomic_type(__sum);				\
    190	for_each_possible_cpu(cpu)					\
    191		__sum += per_cpu(var, cpu);				\
    192	__sum;								\
    193})
    194
    195/*
    196 * Return true if the modular sum of the sem->read_count per-CPU variable is
    197 * zero.  If this sum is zero, then it is stable due to the fact that if any
    198 * newly arriving readers increment a given counter, they will immediately
    199 * decrement that same counter.
    200 *
    201 * Assumes sem->block is set.
    202 */
    203static bool readers_active_check(struct percpu_rw_semaphore *sem)
    204{
    205	if (per_cpu_sum(*sem->read_count) != 0)
    206		return false;
    207
    208	/*
    209	 * If we observed the decrement; ensure we see the entire critical
    210	 * section.
    211	 */
    212
    213	smp_mb(); /* C matches B */
    214
    215	return true;
    216}
    217
    218void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
    219{
    220	might_sleep();
    221	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
    222	trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
    223
    224	/* Notify readers to take the slow path. */
    225	rcu_sync_enter(&sem->rss);
    226
    227	/*
    228	 * Try set sem->block; this provides writer-writer exclusion.
    229	 * Having sem->block set makes new readers block.
    230	 */
    231	if (!__percpu_down_write_trylock(sem))
    232		percpu_rwsem_wait(sem, /* .reader = */ false);
    233
    234	/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
    235
    236	/*
    237	 * If they don't see our store of sem->block, then we are guaranteed to
    238	 * see their sem->read_count increment, and therefore will wait for
    239	 * them.
    240	 */
    241
    242	/* Wait for all active readers to complete. */
    243	rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
    244	trace_contention_end(sem, 0);
    245}
    246EXPORT_SYMBOL_GPL(percpu_down_write);
    247
    248void percpu_up_write(struct percpu_rw_semaphore *sem)
    249{
    250	rwsem_release(&sem->dep_map, _RET_IP_);
    251
    252	/*
    253	 * Signal the writer is done, no fast path yet.
    254	 *
    255	 * One reason that we cannot just immediately flip to readers_fast is
    256	 * that new readers might fail to see the results of this writer's
    257	 * critical section.
    258	 *
    259	 * Therefore we force it through the slow path which guarantees an
    260	 * acquire and thereby guarantees the critical section's consistency.
    261	 */
    262	atomic_set_release(&sem->block, 0);
    263
    264	/*
    265	 * Prod any pending reader/writer to make progress.
    266	 */
    267	__wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
    268
    269	/*
    270	 * Once this completes (at least one RCU-sched grace period hence) the
    271	 * reader fast path will be available again. Safe to use outside the
    272	 * exclusive write lock because its counting.
    273	 */
    274	rcu_sync_exit(&sem->rss);
    275}
    276EXPORT_SYMBOL_GPL(percpu_up_write);