cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

refscale.c (21048B)


      1// SPDX-License-Identifier: GPL-2.0+
      2//
      3// Scalability test comparing RCU vs other mechanisms
      4// for acquiring references on objects.
      5//
      6// Copyright (C) Google, 2020.
      7//
      8// Author: Joel Fernandes <joel@joelfernandes.org>
      9
     10#define pr_fmt(fmt) fmt
     11
     12#include <linux/atomic.h>
     13#include <linux/bitops.h>
     14#include <linux/completion.h>
     15#include <linux/cpu.h>
     16#include <linux/delay.h>
     17#include <linux/err.h>
     18#include <linux/init.h>
     19#include <linux/interrupt.h>
     20#include <linux/kthread.h>
     21#include <linux/kernel.h>
     22#include <linux/mm.h>
     23#include <linux/module.h>
     24#include <linux/moduleparam.h>
     25#include <linux/notifier.h>
     26#include <linux/percpu.h>
     27#include <linux/rcupdate.h>
     28#include <linux/rcupdate_trace.h>
     29#include <linux/reboot.h>
     30#include <linux/sched.h>
     31#include <linux/spinlock.h>
     32#include <linux/smp.h>
     33#include <linux/stat.h>
     34#include <linux/srcu.h>
     35#include <linux/slab.h>
     36#include <linux/torture.h>
     37#include <linux/types.h>
     38
     39#include "rcu.h"
     40
     41#define SCALE_FLAG "-ref-scale: "
     42
     43#define SCALEOUT(s, x...) \
     44	pr_alert("%s" SCALE_FLAG s, scale_type, ## x)
     45
     46#define VERBOSE_SCALEOUT(s, x...) \
     47	do { \
     48		if (verbose) \
     49			pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \
     50	} while (0)
     51
     52static atomic_t verbose_batch_ctr;
     53
     54#define VERBOSE_SCALEOUT_BATCH(s, x...)							\
     55do {											\
     56	if (verbose &&									\
     57	    (verbose_batched <= 0 ||							\
     58	     !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) {		\
     59		schedule_timeout_uninterruptible(1);					\
     60		pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x);			\
     61	}										\
     62} while (0)
     63
     64#define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x)
     65
     66MODULE_LICENSE("GPL");
     67MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
     68
     69static char *scale_type = "rcu";
     70module_param(scale_type, charp, 0444);
     71MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
     72
     73torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
     74torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
     75
     76// Wait until there are multiple CPUs before starting test.
     77torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
     78	      "Holdoff time before test start (s)");
     79// Number of loops per experiment, all readers execute operations concurrently.
     80torture_param(long, loops, 10000, "Number of loops per experiment.");
     81// Number of readers, with -1 defaulting to about 75% of the CPUs.
     82torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
     83// Number of runs.
     84torture_param(int, nruns, 30, "Number of experiments to run.");
     85// Reader delay in nanoseconds, 0 for no delay.
     86torture_param(int, readdelay, 0, "Read-side delay in nanoseconds.");
     87
     88#ifdef MODULE
     89# define REFSCALE_SHUTDOWN 0
     90#else
     91# define REFSCALE_SHUTDOWN 1
     92#endif
     93
     94torture_param(bool, shutdown, REFSCALE_SHUTDOWN,
     95	      "Shutdown at end of scalability tests.");
     96
     97struct reader_task {
     98	struct task_struct *task;
     99	int start_reader;
    100	wait_queue_head_t wq;
    101	u64 last_duration_ns;
    102};
    103
    104static struct task_struct *shutdown_task;
    105static wait_queue_head_t shutdown_wq;
    106
    107static struct task_struct *main_task;
    108static wait_queue_head_t main_wq;
    109static int shutdown_start;
    110
    111static struct reader_task *reader_tasks;
    112
    113// Number of readers that are part of the current experiment.
    114static atomic_t nreaders_exp;
    115
    116// Use to wait for all threads to start.
    117static atomic_t n_init;
    118static atomic_t n_started;
    119static atomic_t n_warmedup;
    120static atomic_t n_cooleddown;
    121
    122// Track which experiment is currently running.
    123static int exp_idx;
    124
    125// Operations vector for selecting different types of tests.
    126struct ref_scale_ops {
    127	void (*init)(void);
    128	void (*cleanup)(void);
    129	void (*readsection)(const int nloops);
    130	void (*delaysection)(const int nloops, const int udl, const int ndl);
    131	const char *name;
    132};
    133
    134static struct ref_scale_ops *cur_ops;
    135
    136static void un_delay(const int udl, const int ndl)
    137{
    138	if (udl)
    139		udelay(udl);
    140	if (ndl)
    141		ndelay(ndl);
    142}
    143
    144static void ref_rcu_read_section(const int nloops)
    145{
    146	int i;
    147
    148	for (i = nloops; i >= 0; i--) {
    149		rcu_read_lock();
    150		rcu_read_unlock();
    151	}
    152}
    153
    154static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl)
    155{
    156	int i;
    157
    158	for (i = nloops; i >= 0; i--) {
    159		rcu_read_lock();
    160		un_delay(udl, ndl);
    161		rcu_read_unlock();
    162	}
    163}
    164
    165static void rcu_sync_scale_init(void)
    166{
    167}
    168
    169static struct ref_scale_ops rcu_ops = {
    170	.init		= rcu_sync_scale_init,
    171	.readsection	= ref_rcu_read_section,
    172	.delaysection	= ref_rcu_delay_section,
    173	.name		= "rcu"
    174};
    175
    176// Definitions for SRCU ref scale testing.
    177DEFINE_STATIC_SRCU(srcu_refctl_scale);
    178static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale;
    179
    180static void srcu_ref_scale_read_section(const int nloops)
    181{
    182	int i;
    183	int idx;
    184
    185	for (i = nloops; i >= 0; i--) {
    186		idx = srcu_read_lock(srcu_ctlp);
    187		srcu_read_unlock(srcu_ctlp, idx);
    188	}
    189}
    190
    191static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
    192{
    193	int i;
    194	int idx;
    195
    196	for (i = nloops; i >= 0; i--) {
    197		idx = srcu_read_lock(srcu_ctlp);
    198		un_delay(udl, ndl);
    199		srcu_read_unlock(srcu_ctlp, idx);
    200	}
    201}
    202
    203static struct ref_scale_ops srcu_ops = {
    204	.init		= rcu_sync_scale_init,
    205	.readsection	= srcu_ref_scale_read_section,
    206	.delaysection	= srcu_ref_scale_delay_section,
    207	.name		= "srcu"
    208};
    209
    210#ifdef CONFIG_TASKS_RCU
    211
    212// Definitions for RCU Tasks ref scale testing: Empty read markers.
    213// These definitions also work for RCU Rude readers.
    214static void rcu_tasks_ref_scale_read_section(const int nloops)
    215{
    216	int i;
    217
    218	for (i = nloops; i >= 0; i--)
    219		continue;
    220}
    221
    222static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
    223{
    224	int i;
    225
    226	for (i = nloops; i >= 0; i--)
    227		un_delay(udl, ndl);
    228}
    229
    230static struct ref_scale_ops rcu_tasks_ops = {
    231	.init		= rcu_sync_scale_init,
    232	.readsection	= rcu_tasks_ref_scale_read_section,
    233	.delaysection	= rcu_tasks_ref_scale_delay_section,
    234	.name		= "rcu-tasks"
    235};
    236
    237#define RCU_TASKS_OPS &rcu_tasks_ops,
    238
    239#else // #ifdef CONFIG_TASKS_RCU
    240
    241#define RCU_TASKS_OPS
    242
    243#endif // #else // #ifdef CONFIG_TASKS_RCU
    244
    245#ifdef CONFIG_TASKS_TRACE_RCU
    246
    247// Definitions for RCU Tasks Trace ref scale testing.
    248static void rcu_trace_ref_scale_read_section(const int nloops)
    249{
    250	int i;
    251
    252	for (i = nloops; i >= 0; i--) {
    253		rcu_read_lock_trace();
    254		rcu_read_unlock_trace();
    255	}
    256}
    257
    258static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
    259{
    260	int i;
    261
    262	for (i = nloops; i >= 0; i--) {
    263		rcu_read_lock_trace();
    264		un_delay(udl, ndl);
    265		rcu_read_unlock_trace();
    266	}
    267}
    268
    269static struct ref_scale_ops rcu_trace_ops = {
    270	.init		= rcu_sync_scale_init,
    271	.readsection	= rcu_trace_ref_scale_read_section,
    272	.delaysection	= rcu_trace_ref_scale_delay_section,
    273	.name		= "rcu-trace"
    274};
    275
    276#define RCU_TRACE_OPS &rcu_trace_ops,
    277
    278#else // #ifdef CONFIG_TASKS_TRACE_RCU
    279
    280#define RCU_TRACE_OPS
    281
    282#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
    283
    284// Definitions for reference count
    285static atomic_t refcnt;
    286
    287static void ref_refcnt_section(const int nloops)
    288{
    289	int i;
    290
    291	for (i = nloops; i >= 0; i--) {
    292		atomic_inc(&refcnt);
    293		atomic_dec(&refcnt);
    294	}
    295}
    296
    297static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl)
    298{
    299	int i;
    300
    301	for (i = nloops; i >= 0; i--) {
    302		atomic_inc(&refcnt);
    303		un_delay(udl, ndl);
    304		atomic_dec(&refcnt);
    305	}
    306}
    307
    308static struct ref_scale_ops refcnt_ops = {
    309	.init		= rcu_sync_scale_init,
    310	.readsection	= ref_refcnt_section,
    311	.delaysection	= ref_refcnt_delay_section,
    312	.name		= "refcnt"
    313};
    314
    315// Definitions for rwlock
    316static rwlock_t test_rwlock;
    317
    318static void ref_rwlock_init(void)
    319{
    320	rwlock_init(&test_rwlock);
    321}
    322
    323static void ref_rwlock_section(const int nloops)
    324{
    325	int i;
    326
    327	for (i = nloops; i >= 0; i--) {
    328		read_lock(&test_rwlock);
    329		read_unlock(&test_rwlock);
    330	}
    331}
    332
    333static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl)
    334{
    335	int i;
    336
    337	for (i = nloops; i >= 0; i--) {
    338		read_lock(&test_rwlock);
    339		un_delay(udl, ndl);
    340		read_unlock(&test_rwlock);
    341	}
    342}
    343
    344static struct ref_scale_ops rwlock_ops = {
    345	.init		= ref_rwlock_init,
    346	.readsection	= ref_rwlock_section,
    347	.delaysection	= ref_rwlock_delay_section,
    348	.name		= "rwlock"
    349};
    350
    351// Definitions for rwsem
    352static struct rw_semaphore test_rwsem;
    353
    354static void ref_rwsem_init(void)
    355{
    356	init_rwsem(&test_rwsem);
    357}
    358
    359static void ref_rwsem_section(const int nloops)
    360{
    361	int i;
    362
    363	for (i = nloops; i >= 0; i--) {
    364		down_read(&test_rwsem);
    365		up_read(&test_rwsem);
    366	}
    367}
    368
    369static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl)
    370{
    371	int i;
    372
    373	for (i = nloops; i >= 0; i--) {
    374		down_read(&test_rwsem);
    375		un_delay(udl, ndl);
    376		up_read(&test_rwsem);
    377	}
    378}
    379
    380static struct ref_scale_ops rwsem_ops = {
    381	.init		= ref_rwsem_init,
    382	.readsection	= ref_rwsem_section,
    383	.delaysection	= ref_rwsem_delay_section,
    384	.name		= "rwsem"
    385};
    386
    387// Definitions for global spinlock
    388static DEFINE_SPINLOCK(test_lock);
    389
    390static void ref_lock_section(const int nloops)
    391{
    392	int i;
    393
    394	preempt_disable();
    395	for (i = nloops; i >= 0; i--) {
    396		spin_lock(&test_lock);
    397		spin_unlock(&test_lock);
    398	}
    399	preempt_enable();
    400}
    401
    402static void ref_lock_delay_section(const int nloops, const int udl, const int ndl)
    403{
    404	int i;
    405
    406	preempt_disable();
    407	for (i = nloops; i >= 0; i--) {
    408		spin_lock(&test_lock);
    409		un_delay(udl, ndl);
    410		spin_unlock(&test_lock);
    411	}
    412	preempt_enable();
    413}
    414
    415static struct ref_scale_ops lock_ops = {
    416	.readsection	= ref_lock_section,
    417	.delaysection	= ref_lock_delay_section,
    418	.name		= "lock"
    419};
    420
    421// Definitions for global irq-save spinlock
    422
    423static void ref_lock_irq_section(const int nloops)
    424{
    425	unsigned long flags;
    426	int i;
    427
    428	preempt_disable();
    429	for (i = nloops; i >= 0; i--) {
    430		spin_lock_irqsave(&test_lock, flags);
    431		spin_unlock_irqrestore(&test_lock, flags);
    432	}
    433	preempt_enable();
    434}
    435
    436static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl)
    437{
    438	unsigned long flags;
    439	int i;
    440
    441	preempt_disable();
    442	for (i = nloops; i >= 0; i--) {
    443		spin_lock_irqsave(&test_lock, flags);
    444		un_delay(udl, ndl);
    445		spin_unlock_irqrestore(&test_lock, flags);
    446	}
    447	preempt_enable();
    448}
    449
    450static struct ref_scale_ops lock_irq_ops = {
    451	.readsection	= ref_lock_irq_section,
    452	.delaysection	= ref_lock_irq_delay_section,
    453	.name		= "lock-irq"
    454};
    455
    456// Definitions acquire-release.
    457static DEFINE_PER_CPU(unsigned long, test_acqrel);
    458
    459static void ref_acqrel_section(const int nloops)
    460{
    461	unsigned long x;
    462	int i;
    463
    464	preempt_disable();
    465	for (i = nloops; i >= 0; i--) {
    466		x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
    467		smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
    468	}
    469	preempt_enable();
    470}
    471
    472static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl)
    473{
    474	unsigned long x;
    475	int i;
    476
    477	preempt_disable();
    478	for (i = nloops; i >= 0; i--) {
    479		x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
    480		un_delay(udl, ndl);
    481		smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
    482	}
    483	preempt_enable();
    484}
    485
    486static struct ref_scale_ops acqrel_ops = {
    487	.readsection	= ref_acqrel_section,
    488	.delaysection	= ref_acqrel_delay_section,
    489	.name		= "acqrel"
    490};
    491
    492static volatile u64 stopopts;
    493
    494static void ref_clock_section(const int nloops)
    495{
    496	u64 x = 0;
    497	int i;
    498
    499	preempt_disable();
    500	for (i = nloops; i >= 0; i--)
    501		x += ktime_get_real_fast_ns();
    502	preempt_enable();
    503	stopopts = x;
    504}
    505
    506static void ref_clock_delay_section(const int nloops, const int udl, const int ndl)
    507{
    508	u64 x = 0;
    509	int i;
    510
    511	preempt_disable();
    512	for (i = nloops; i >= 0; i--) {
    513		x += ktime_get_real_fast_ns();
    514		un_delay(udl, ndl);
    515	}
    516	preempt_enable();
    517	stopopts = x;
    518}
    519
    520static struct ref_scale_ops clock_ops = {
    521	.readsection	= ref_clock_section,
    522	.delaysection	= ref_clock_delay_section,
    523	.name		= "clock"
    524};
    525
    526static void rcu_scale_one_reader(void)
    527{
    528	if (readdelay <= 0)
    529		cur_ops->readsection(loops);
    530	else
    531		cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000);
    532}
    533
    534// Reader kthread.  Repeatedly does empty RCU read-side
    535// critical section, minimizing update-side interference.
    536static int
    537ref_scale_reader(void *arg)
    538{
    539	unsigned long flags;
    540	long me = (long)arg;
    541	struct reader_task *rt = &(reader_tasks[me]);
    542	u64 start;
    543	s64 duration;
    544
    545	VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me);
    546	WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)));
    547	set_user_nice(current, MAX_NICE);
    548	atomic_inc(&n_init);
    549	if (holdoff)
    550		schedule_timeout_interruptible(holdoff * HZ);
    551repeat:
    552	VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id());
    553
    554	// Wait for signal that this reader can start.
    555	wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) ||
    556			   torture_must_stop());
    557
    558	if (torture_must_stop())
    559		goto end;
    560
    561	// Make sure that the CPU is affinitized appropriately during testing.
    562	WARN_ON_ONCE(raw_smp_processor_id() != me);
    563
    564	WRITE_ONCE(rt->start_reader, 0);
    565	if (!atomic_dec_return(&n_started))
    566		while (atomic_read_acquire(&n_started))
    567			cpu_relax();
    568
    569	VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d started", me, exp_idx);
    570
    571
    572	// To reduce noise, do an initial cache-warming invocation, check
    573	// in, and then keep warming until everyone has checked in.
    574	rcu_scale_one_reader();
    575	if (!atomic_dec_return(&n_warmedup))
    576		while (atomic_read_acquire(&n_warmedup))
    577			rcu_scale_one_reader();
    578	// Also keep interrupts disabled.  This also has the effect
    579	// of preventing entries into slow path for rcu_read_unlock().
    580	local_irq_save(flags);
    581	start = ktime_get_mono_fast_ns();
    582
    583	rcu_scale_one_reader();
    584
    585	duration = ktime_get_mono_fast_ns() - start;
    586	local_irq_restore(flags);
    587
    588	rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
    589	// To reduce runtime-skew noise, do maintain-load invocations until
    590	// everyone is done.
    591	if (!atomic_dec_return(&n_cooleddown))
    592		while (atomic_read_acquire(&n_cooleddown))
    593			rcu_scale_one_reader();
    594
    595	if (atomic_dec_and_test(&nreaders_exp))
    596		wake_up(&main_wq);
    597
    598	VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)",
    599				me, exp_idx, atomic_read(&nreaders_exp));
    600
    601	if (!torture_must_stop())
    602		goto repeat;
    603end:
    604	torture_kthread_stopping("ref_scale_reader");
    605	return 0;
    606}
    607
    608static void reset_readers(void)
    609{
    610	int i;
    611	struct reader_task *rt;
    612
    613	for (i = 0; i < nreaders; i++) {
    614		rt = &(reader_tasks[i]);
    615
    616		rt->last_duration_ns = 0;
    617	}
    618}
    619
    620// Print the results of each reader and return the sum of all their durations.
    621static u64 process_durations(int n)
    622{
    623	int i;
    624	struct reader_task *rt;
    625	char buf1[64];
    626	char *buf;
    627	u64 sum = 0;
    628
    629	buf = kmalloc(800 + 64, GFP_KERNEL);
    630	if (!buf)
    631		return 0;
    632	buf[0] = 0;
    633	sprintf(buf, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)",
    634		exp_idx);
    635
    636	for (i = 0; i < n && !torture_must_stop(); i++) {
    637		rt = &(reader_tasks[i]);
    638		sprintf(buf1, "%d: %llu\t", i, rt->last_duration_ns);
    639
    640		if (i % 5 == 0)
    641			strcat(buf, "\n");
    642		if (strlen(buf) >= 800) {
    643			pr_alert("%s", buf);
    644			buf[0] = 0;
    645		}
    646		strcat(buf, buf1);
    647
    648		sum += rt->last_duration_ns;
    649	}
    650	pr_alert("%s\n", buf);
    651
    652	kfree(buf);
    653	return sum;
    654}
    655
    656// The main_func is the main orchestrator, it performs a bunch of
    657// experiments.  For every experiment, it orders all the readers
    658// involved to start and waits for them to finish the experiment. It
    659// then reads their timestamps and starts the next experiment. Each
    660// experiment progresses from 1 concurrent reader to N of them at which
    661// point all the timestamps are printed.
    662static int main_func(void *arg)
    663{
    664	int exp, r;
    665	char buf1[64];
    666	char *buf;
    667	u64 *result_avg;
    668
    669	set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids));
    670	set_user_nice(current, MAX_NICE);
    671
    672	VERBOSE_SCALEOUT("main_func task started");
    673	result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
    674	buf = kzalloc(800 + 64, GFP_KERNEL);
    675	if (!result_avg || !buf) {
    676		SCALEOUT_ERRSTRING("out of memory");
    677		goto oom_exit;
    678	}
    679	if (holdoff)
    680		schedule_timeout_interruptible(holdoff * HZ);
    681
    682	// Wait for all threads to start.
    683	atomic_inc(&n_init);
    684	while (atomic_read(&n_init) < nreaders + 1)
    685		schedule_timeout_uninterruptible(1);
    686
    687	// Start exp readers up per experiment
    688	for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
    689		if (torture_must_stop())
    690			goto end;
    691
    692		reset_readers();
    693		atomic_set(&nreaders_exp, nreaders);
    694		atomic_set(&n_started, nreaders);
    695		atomic_set(&n_warmedup, nreaders);
    696		atomic_set(&n_cooleddown, nreaders);
    697
    698		exp_idx = exp;
    699
    700		for (r = 0; r < nreaders; r++) {
    701			smp_store_release(&reader_tasks[r].start_reader, 1);
    702			wake_up(&reader_tasks[r].wq);
    703		}
    704
    705		VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers",
    706				nreaders);
    707
    708		wait_event(main_wq,
    709			   !atomic_read(&nreaders_exp) || torture_must_stop());
    710
    711		VERBOSE_SCALEOUT("main_func: experiment ended");
    712
    713		if (torture_must_stop())
    714			goto end;
    715
    716		result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops);
    717	}
    718
    719	// Print the average of all experiments
    720	SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
    721
    722	pr_alert("Runs\tTime(ns)\n");
    723	for (exp = 0; exp < nruns; exp++) {
    724		u64 avg;
    725		u32 rem;
    726
    727		avg = div_u64_rem(result_avg[exp], 1000, &rem);
    728		sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem);
    729		strcat(buf, buf1);
    730		if (strlen(buf) >= 800) {
    731			pr_alert("%s", buf);
    732			buf[0] = 0;
    733		}
    734	}
    735
    736	pr_alert("%s", buf);
    737
    738oom_exit:
    739	// This will shutdown everything including us.
    740	if (shutdown) {
    741		shutdown_start = 1;
    742		wake_up(&shutdown_wq);
    743	}
    744
    745	// Wait for torture to stop us
    746	while (!torture_must_stop())
    747		schedule_timeout_uninterruptible(1);
    748
    749end:
    750	torture_kthread_stopping("main_func");
    751	kfree(result_avg);
    752	kfree(buf);
    753	return 0;
    754}
    755
    756static void
    757ref_scale_print_module_parms(struct ref_scale_ops *cur_ops, const char *tag)
    758{
    759	pr_alert("%s" SCALE_FLAG
    760		 "--- %s:  verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
    761		 verbose, shutdown, holdoff, loops, nreaders, nruns, readdelay);
    762}
    763
    764static void
    765ref_scale_cleanup(void)
    766{
    767	int i;
    768
    769	if (torture_cleanup_begin())
    770		return;
    771
    772	if (!cur_ops) {
    773		torture_cleanup_end();
    774		return;
    775	}
    776
    777	if (reader_tasks) {
    778		for (i = 0; i < nreaders; i++)
    779			torture_stop_kthread("ref_scale_reader",
    780					     reader_tasks[i].task);
    781	}
    782	kfree(reader_tasks);
    783
    784	torture_stop_kthread("main_task", main_task);
    785	kfree(main_task);
    786
    787	// Do scale-type-specific cleanup operations.
    788	if (cur_ops->cleanup != NULL)
    789		cur_ops->cleanup();
    790
    791	torture_cleanup_end();
    792}
    793
    794// Shutdown kthread.  Just waits to be awakened, then shuts down system.
    795static int
    796ref_scale_shutdown(void *arg)
    797{
    798	wait_event(shutdown_wq, shutdown_start);
    799
    800	smp_mb(); // Wake before output.
    801	ref_scale_cleanup();
    802	kernel_power_off();
    803
    804	return -EINVAL;
    805}
    806
    807static int __init
    808ref_scale_init(void)
    809{
    810	long i;
    811	int firsterr = 0;
    812	static struct ref_scale_ops *scale_ops[] = {
    813		&rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
    814		&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
    815	};
    816
    817	if (!torture_init_begin(scale_type, verbose))
    818		return -EBUSY;
    819
    820	for (i = 0; i < ARRAY_SIZE(scale_ops); i++) {
    821		cur_ops = scale_ops[i];
    822		if (strcmp(scale_type, cur_ops->name) == 0)
    823			break;
    824	}
    825	if (i == ARRAY_SIZE(scale_ops)) {
    826		pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type);
    827		pr_alert("rcu-scale types:");
    828		for (i = 0; i < ARRAY_SIZE(scale_ops); i++)
    829			pr_cont(" %s", scale_ops[i]->name);
    830		pr_cont("\n");
    831		firsterr = -EINVAL;
    832		cur_ops = NULL;
    833		goto unwind;
    834	}
    835	if (cur_ops->init)
    836		cur_ops->init();
    837
    838	ref_scale_print_module_parms(cur_ops, "Start of test");
    839
    840	// Shutdown task
    841	if (shutdown) {
    842		init_waitqueue_head(&shutdown_wq);
    843		firsterr = torture_create_kthread(ref_scale_shutdown, NULL,
    844						  shutdown_task);
    845		if (torture_init_error(firsterr))
    846			goto unwind;
    847		schedule_timeout_uninterruptible(1);
    848	}
    849
    850	// Reader tasks (default to ~75% of online CPUs).
    851	if (nreaders < 0)
    852		nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
    853	if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops))
    854		loops = 1;
    855	if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders))
    856		nreaders = 1;
    857	if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns))
    858		nruns = 1;
    859	reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
    860			       GFP_KERNEL);
    861	if (!reader_tasks) {
    862		SCALEOUT_ERRSTRING("out of memory");
    863		firsterr = -ENOMEM;
    864		goto unwind;
    865	}
    866
    867	VERBOSE_SCALEOUT("Starting %d reader threads", nreaders);
    868
    869	for (i = 0; i < nreaders; i++) {
    870		firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
    871						  reader_tasks[i].task);
    872		if (torture_init_error(firsterr))
    873			goto unwind;
    874
    875		init_waitqueue_head(&(reader_tasks[i].wq));
    876	}
    877
    878	// Main Task
    879	init_waitqueue_head(&main_wq);
    880	firsterr = torture_create_kthread(main_func, NULL, main_task);
    881	if (torture_init_error(firsterr))
    882		goto unwind;
    883
    884	torture_init_end();
    885	return 0;
    886
    887unwind:
    888	torture_init_end();
    889	ref_scale_cleanup();
    890	if (shutdown) {
    891		WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST));
    892		kernel_power_off();
    893	}
    894	return firsterr;
    895}
    896
    897module_init(ref_scale_init);
    898module_exit(ref_scale_cleanup);