rseq_test.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
rseq_test.c (8455B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2#define _GNU_SOURCE /* for program_invocation_short_name */
      3#include <errno.h>
      4#include <fcntl.h>
      5#include <pthread.h>
      6#include <sched.h>
      7#include <stdio.h>
      8#include <stdlib.h>
      9#include <string.h>
     10#include <signal.h>
     11#include <syscall.h>
     12#include <sys/ioctl.h>
     13#include <sys/sysinfo.h>
     14#include <asm/barrier.h>
     15#include <linux/atomic.h>
     16#include <linux/rseq.h>
     17#include <linux/unistd.h>
     18
     19#include "kvm_util.h"
     20#include "processor.h"
     21#include "test_util.h"
     22
     23#define VCPU_ID 0
     24
     25static __thread volatile struct rseq __rseq = {
     26	.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
     27};
     28
     29/*
     30 * Use an arbitrary, bogus signature for configuring rseq, this test does not
     31 * actually enter an rseq critical section.
     32 */
     33#define RSEQ_SIG 0xdeadbeef
     34
     35/*
     36 * Any bug related to task migration is likely to be timing-dependent; perform
     37 * a large number of migrations to reduce the odds of a false negative.
     38 */
     39#define NR_TASK_MIGRATIONS 100000
     40
     41static pthread_t migration_thread;
     42static cpu_set_t possible_mask;
     43static int min_cpu, max_cpu;
     44static bool done;
     45
     46static atomic_t seq_cnt;
     47
     48static void guest_code(void)
     49{
     50	for (;;)
     51		GUEST_SYNC(0);
     52}
     53
     54static void sys_rseq(int flags)
     55{
     56	int r;
     57
     58	r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG);
     59	TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
     60}
     61
     62static int next_cpu(int cpu)
     63{
     64	/*
     65	 * Advance to the next CPU, skipping those that weren't in the original
     66	 * affinity set.  Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
     67	 * data storage is considered as opaque.  Note, if this task is pinned
     68	 * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
     69	 * burn a lot cycles and the test will take longer than normal to
     70	 * complete.
     71	 */
     72	do {
     73		cpu++;
     74		if (cpu > max_cpu) {
     75			cpu = min_cpu;
     76			TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
     77				    "Min CPU = %d must always be usable", cpu);
     78			break;
     79		}
     80	} while (!CPU_ISSET(cpu, &possible_mask));
     81
     82	return cpu;
     83}
     84
     85static void *migration_worker(void *ign)
     86{
     87	cpu_set_t allowed_mask;
     88	int r, i, cpu;
     89
     90	CPU_ZERO(&allowed_mask);
     91
     92	for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
     93		CPU_SET(cpu, &allowed_mask);
     94
     95		/*
     96		 * Bump the sequence count twice to allow the reader to detect
     97		 * that a migration may have occurred in between rseq and sched
     98		 * CPU ID reads.  An odd sequence count indicates a migration
     99		 * is in-progress, while a completely different count indicates
    100		 * a migration occurred since the count was last read.
    101		 */
    102		atomic_inc(&seq_cnt);
    103
    104		/*
    105		 * Ensure the odd count is visible while sched_getcpu() isn't
    106		 * stable, i.e. while changing affinity is in-progress.
    107		 */
    108		smp_wmb();
    109		r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
    110		TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
    111			    errno, strerror(errno));
    112		smp_wmb();
    113		atomic_inc(&seq_cnt);
    114
    115		CPU_CLR(cpu, &allowed_mask);
    116
    117		/*
    118		 * Wait 1-10us before proceeding to the next iteration and more
    119		 * specifically, before bumping seq_cnt again.  A delay is
    120		 * needed on three fronts:
    121		 *
    122		 *  1. To allow sched_setaffinity() to prompt migration before
    123		 *     ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
    124		 *     (or TIF_NEED_RESCHED, which indirectly leads to handling
    125		 *     NOTIFY_RESUME) is handled in KVM context.
    126		 *
    127		 *     If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
    128		 *     the guest, the guest will trigger a IO/MMIO exit all the
    129		 *     way to userspace and the TIF flags will be handled by
    130		 *     the generic "exit to userspace" logic, not by KVM.  The
    131		 *     exit to userspace is necessary to give the test a chance
    132		 *     to check the rseq CPU ID (see #2).
    133		 *
    134		 *     Alternatively, guest_code() could include an instruction
    135		 *     to trigger an exit that is handled by KVM, but any such
    136		 *     exit requires architecture specific code.
    137		 *
    138		 *  2. To let ioctl(KVM_RUN) make its way back to the test
    139		 *     before the next round of migration.  The test's check on
    140		 *     the rseq CPU ID must wait for migration to complete in
    141		 *     order to avoid false positive, thus any kernel rseq bug
    142		 *     will be missed if the next migration starts before the
    143		 *     check completes.
    144		 *
    145		 *  3. To ensure the read-side makes efficient forward progress,
    146		 *     e.g. if sched_getcpu() involves a syscall.  Stalling the
    147		 *     read-side means the test will spend more time waiting for
    148		 *     sched_getcpu() to stabilize and less time trying to hit
    149		 *     the timing-dependent bug.
    150		 *
    151		 * Because any bug in this area is likely to be timing-dependent,
    152		 * run with a range of delays at 1us intervals from 1us to 10us
    153		 * as a best effort to avoid tuning the test to the point where
    154		 * it can hit _only_ the original bug and not detect future
    155		 * regressions.
    156		 *
    157		 * The original bug can reproduce with a delay up to ~500us on
    158		 * x86-64, but starts to require more iterations to reproduce
    159		 * as the delay creeps above ~10us, and the average runtime of
    160		 * each iteration obviously increases as well.  Cap the delay
    161		 * at 10us to keep test runtime reasonable while minimizing
    162		 * potential coverage loss.
    163		 *
    164		 * The lower bound for reproducing the bug is likely below 1us,
    165		 * e.g. failures occur on x86-64 with nanosleep(0), but at that
    166		 * point the overhead of the syscall likely dominates the delay.
    167		 * Use usleep() for simplicity and to avoid unnecessary kernel
    168		 * dependencies.
    169		 */
    170		usleep((i % 10) + 1);
    171	}
    172	done = true;
    173	return NULL;
    174}
    175
    176static int calc_min_max_cpu(void)
    177{
    178	int i, cnt, nproc;
    179
    180	if (CPU_COUNT(&possible_mask) < 2)
    181		return -EINVAL;
    182
    183	/*
    184	 * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
    185	 * this task is affined to in order to reduce the time spent querying
    186	 * unusable CPUs, e.g. if this task is pinned to a small percentage of
    187	 * total CPUs.
    188	 */
    189	nproc = get_nprocs_conf();
    190	min_cpu = -1;
    191	max_cpu = -1;
    192	cnt = 0;
    193
    194	for (i = 0; i < nproc; i++) {
    195		if (!CPU_ISSET(i, &possible_mask))
    196			continue;
    197		if (min_cpu == -1)
    198			min_cpu = i;
    199		max_cpu = i;
    200		cnt++;
    201	}
    202
    203	return (cnt < 2) ? -EINVAL : 0;
    204}
    205
    206int main(int argc, char *argv[])
    207{
    208	int r, i, snapshot;
    209	struct kvm_vm *vm;
    210	u32 cpu, rseq_cpu;
    211
    212	/* Tell stdout not to buffer its content */
    213	setbuf(stdout, NULL);
    214
    215	r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
    216	TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
    217		    strerror(errno));
    218
    219	if (calc_min_max_cpu()) {
    220		print_skip("Only one usable CPU, task migration not possible");
    221		exit(KSFT_SKIP);
    222	}
    223
    224	sys_rseq(0);
    225
    226	/*
    227	 * Create and run a dummy VM that immediately exits to userspace via
    228	 * GUEST_SYNC, while concurrently migrating the process by setting its
    229	 * CPU affinity.
    230	 */
    231	vm = vm_create_default(VCPU_ID, 0, guest_code);
    232	ucall_init(vm, NULL);
    233
    234	pthread_create(&migration_thread, NULL, migration_worker, 0);
    235
    236	for (i = 0; !done; i++) {
    237		vcpu_run(vm, VCPU_ID);
    238		TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
    239			    "Guest failed?");
    240
    241		/*
    242		 * Verify rseq's CPU matches sched's CPU.  Ensure migration
    243		 * doesn't occur between sched_getcpu() and reading the rseq
    244		 * cpu_id by rereading both if the sequence count changes, or
    245		 * if the count is odd (migration in-progress).
    246		 */
    247		do {
    248			/*
    249			 * Drop bit 0 to force a mismatch if the count is odd,
    250			 * i.e. if a migration is in-progress.
    251			 */
    252			snapshot = atomic_read(&seq_cnt) & ~1;
    253
    254			/*
    255			 * Ensure reading sched_getcpu() and rseq.cpu_id
    256			 * complete in a single "no migration" window, i.e. are
    257			 * not reordered across the seq_cnt reads.
    258			 */
    259			smp_rmb();
    260			cpu = sched_getcpu();
    261			rseq_cpu = READ_ONCE(__rseq.cpu_id);
    262			smp_rmb();
    263		} while (snapshot != atomic_read(&seq_cnt));
    264
    265		TEST_ASSERT(rseq_cpu == cpu,
    266			    "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
    267	}
    268
    269	/*
    270	 * Sanity check that the test was able to enter the guest a reasonable
    271	 * number of times, e.g. didn't get stalled too often/long waiting for
    272	 * sched_getcpu() to stabilize.  A 2:1 migration:KVM_RUN ratio is a
    273	 * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs
    274	 * than migrations given the 1us+ delay in the migration task.
    275	 */
    276	TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
    277		    "Only performed %d KVM_RUNs, task stalled too much?\n", i);
    278
    279	pthread_join(migration_thread, NULL);
    280
    281	kvm_vm_free(vm);
    282
    283	sys_rseq(RSEQ_FLAG_UNREGISTER);
    284
    285	return 0;
    286}