cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

syscall_numbering.c (11538B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * syscall_numbering.c - test calling the x86-64 kernel with various
      4 * valid and invalid system call numbers.
      5 *
      6 * Copyright (c) 2018 Andrew Lutomirski
      7 */
      8
      9#define _GNU_SOURCE
     10
     11#include <stdlib.h>
     12#include <stdio.h>
     13#include <stdbool.h>
     14#include <errno.h>
     15#include <unistd.h>
     16#include <string.h>
     17#include <fcntl.h>
     18#include <limits.h>
     19#include <signal.h>
     20#include <sysexits.h>
     21
     22#include <sys/ptrace.h>
     23#include <sys/user.h>
     24#include <sys/wait.h>
     25#include <sys/mman.h>
     26
     27#include <linux/ptrace.h>
     28
     29/* Common system call numbers */
     30#define SYS_READ	  0
     31#define SYS_WRITE	  1
     32#define SYS_GETPID	 39
     33/* x64-only system call numbers */
     34#define X64_IOCTL	 16
     35#define X64_READV	 19
     36#define X64_WRITEV	 20
     37/* x32-only system call numbers (without X32_BIT) */
     38#define X32_IOCTL	514
     39#define X32_READV	515
     40#define X32_WRITEV	516
     41
     42#define X32_BIT 0x40000000
     43
     44static int nullfd = -1;		/* File descriptor for /dev/null */
     45static bool with_x32;		/* x32 supported on this kernel? */
     46
     47enum ptrace_pass {
     48	PTP_NOTHING,
     49	PTP_GETREGS,
     50	PTP_WRITEBACK,
     51	PTP_FUZZRET,
     52	PTP_FUZZHIGH,
     53	PTP_INTNUM,
     54	PTP_DONE
     55};
     56
     57static const char * const ptrace_pass_name[] =
     58{
     59	[PTP_NOTHING]	= "just stop, no data read",
     60	[PTP_GETREGS]	= "only getregs",
     61	[PTP_WRITEBACK]	= "getregs, unmodified setregs",
     62	[PTP_FUZZRET]	= "modifying the default return",
     63	[PTP_FUZZHIGH]	= "clobbering the top 32 bits",
     64	[PTP_INTNUM]	= "sign-extending the syscall number",
     65};
     66
     67/*
     68 * Shared memory block between tracer and test
     69 */
     70struct shared {
     71	unsigned int nerr;	/* Total error count */
     72	unsigned int indent;	/* Message indentation level */
     73	enum ptrace_pass ptrace_pass;
     74	bool probing_syscall;	/* In probe_syscall() */
     75};
     76static volatile struct shared *sh;
     77
     78static inline unsigned int offset(void)
     79{
     80	unsigned int level = sh ? sh->indent : 0;
     81
     82	return 8 + level * 4;
     83}
     84
     85#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
     86				  ## __VA_ARGS__)
     87
     88#define run(fmt, ...)  msg(RUN,  fmt, ## __VA_ARGS__)
     89#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
     90#define ok(fmt, ...)   msg(OK,   fmt, ## __VA_ARGS__)
     91
     92#define fail(fmt, ...)					\
     93	do {						\
     94		msg(FAIL, fmt, ## __VA_ARGS__);		\
     95		sh->nerr++;				\
     96       } while (0)
     97
     98#define crit(fmt, ...)					\
     99	do {						\
    100		sh->indent = 0;				\
    101		msg(FAIL, fmt, ## __VA_ARGS__);		\
    102		msg(SKIP, "Unable to run test\n");	\
    103		exit(EX_OSERR);				\
    104       } while (0)
    105
    106/* Sentinel for ptrace-modified return value */
    107#define MODIFIED_BY_PTRACE	-9999
    108
    109/*
    110 * Directly invokes the given syscall with nullfd as the first argument
    111 * and the rest zero. Avoids involving glibc wrappers in case they ever
    112 * end up intercepting some system calls for some reason, or modify
    113 * the system call number itself.
    114 */
    115static long long probe_syscall(int msb, int lsb)
    116{
    117	register long long arg1 asm("rdi") = nullfd;
    118	register long long arg2 asm("rsi") = 0;
    119	register long long arg3 asm("rdx") = 0;
    120	register long long arg4 asm("r10") = 0;
    121	register long long arg5 asm("r8")  = 0;
    122	register long long arg6 asm("r9")  = 0;
    123	long long nr = ((long long)msb << 32) | (unsigned int)lsb;
    124	long long ret;
    125
    126	/*
    127	 * We pass in an extra copy of the extended system call number
    128	 * in %rbx, so we can examine it from the ptrace handler without
    129	 * worrying about it being possibly modified. This is to test
    130	 * the validity of struct user regs.orig_rax a.k.a.
    131	 * struct pt_regs.orig_ax.
    132	 */
    133	sh->probing_syscall = true;
    134	asm volatile("syscall"
    135		     : "=a" (ret)
    136		     : "a" (nr), "b" (nr),
    137		       "r" (arg1), "r" (arg2), "r" (arg3),
    138		       "r" (arg4), "r" (arg5), "r" (arg6)
    139		     : "rcx", "r11", "memory", "cc");
    140	sh->probing_syscall = false;
    141
    142	return ret;
    143}
    144
    145static const char *syscall_str(int msb, int start, int end)
    146{
    147	static char buf[64];
    148	const char * const type = (start & X32_BIT) ? "x32" : "x64";
    149	int lsb = start;
    150
    151	/*
    152	 * Improve readability by stripping the x32 bit, but round
    153	 * toward zero so we don't display -1 as -1073741825.
    154	 */
    155	if (lsb < 0)
    156		lsb |= X32_BIT;
    157	else
    158		lsb &= ~X32_BIT;
    159
    160	if (start == end)
    161		snprintf(buf, sizeof buf, "%s syscall %d:%d",
    162			 type, msb, lsb);
    163	else
    164		snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
    165			 type, msb, lsb, lsb + (end-start));
    166
    167	return buf;
    168}
    169
    170static unsigned int _check_for(int msb, int start, int end, long long expect,
    171			       const char *expect_str)
    172{
    173	unsigned int err = 0;
    174
    175	sh->indent++;
    176	if (start != end)
    177		sh->indent++;
    178
    179	for (int nr = start; nr <= end; nr++) {
    180		long long ret = probe_syscall(msb, nr);
    181
    182		if (ret != expect) {
    183			fail("%s returned %lld, but it should have returned %s\n",
    184			       syscall_str(msb, nr, nr),
    185			       ret, expect_str);
    186			err++;
    187		}
    188	}
    189
    190	if (start != end)
    191		sh->indent--;
    192
    193	if (err) {
    194		if (start != end)
    195			fail("%s had %u failure%s\n",
    196			     syscall_str(msb, start, end),
    197			     err, err == 1 ? "s" : "");
    198	} else {
    199		ok("%s returned %s as expected\n",
    200		   syscall_str(msb, start, end), expect_str);
    201	}
    202
    203	sh->indent--;
    204
    205	return err;
    206}
    207
    208#define check_for(msb,start,end,expect) \
    209	_check_for(msb,start,end,expect,#expect)
    210
    211static bool check_zero(int msb, int nr)
    212{
    213	return check_for(msb, nr, nr, 0);
    214}
    215
    216static bool check_enosys(int msb, int nr)
    217{
    218	return check_for(msb, nr, nr, -ENOSYS);
    219}
    220
    221/*
    222 * Anyone diagnosing a failure will want to know whether the kernel
    223 * supports x32. Tell them. This can also be used to conditionalize
    224 * tests based on existence or nonexistence of x32.
    225 */
    226static bool test_x32(void)
    227{
    228	long long ret;
    229	pid_t mypid = getpid();
    230
    231	run("Checking for x32 by calling x32 getpid()\n");
    232	ret = probe_syscall(0, SYS_GETPID | X32_BIT);
    233
    234	sh->indent++;
    235	if (ret == mypid) {
    236		info("x32 is supported\n");
    237		with_x32 = true;
    238	} else if (ret == -ENOSYS) {
    239		info("x32 is not supported\n");
    240		with_x32 = false;
    241	} else {
    242		fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
    243		with_x32 = false;
    244	}
    245	sh->indent--;
    246	return with_x32;
    247}
    248
    249static void test_syscalls_common(int msb)
    250{
    251	enum ptrace_pass pass = sh->ptrace_pass;
    252
    253	run("Checking some common syscalls as 64 bit\n");
    254	check_zero(msb, SYS_READ);
    255	check_zero(msb, SYS_WRITE);
    256
    257	run("Checking some 64-bit only syscalls as 64 bit\n");
    258	check_zero(msb, X64_READV);
    259	check_zero(msb, X64_WRITEV);
    260
    261	run("Checking out of range system calls\n");
    262	check_for(msb, -64, -2, -ENOSYS);
    263	if (pass >= PTP_FUZZRET)
    264		check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
    265	else
    266		check_for(msb, -1, -1, -ENOSYS);
    267	check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
    268	check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
    269	check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
    270}
    271
    272static void test_syscalls_with_x32(int msb)
    273{
    274	/*
    275	 * Syscalls 512-547 are "x32" syscalls.  They are
    276	 * intended to be called with the x32 (0x40000000) bit
    277	 * set.  Calling them without the x32 bit set is
    278	 * nonsense and should not work.
    279	 */
    280	run("Checking x32 syscalls as 64 bit\n");
    281	check_for(msb, 512, 547, -ENOSYS);
    282
    283	run("Checking some common syscalls as x32\n");
    284	check_zero(msb, SYS_READ   | X32_BIT);
    285	check_zero(msb, SYS_WRITE  | X32_BIT);
    286
    287	run("Checking some x32 syscalls as x32\n");
    288	check_zero(msb, X32_READV  | X32_BIT);
    289	check_zero(msb, X32_WRITEV | X32_BIT);
    290
    291	run("Checking some 64-bit syscalls as x32\n");
    292	check_enosys(msb, X64_IOCTL  | X32_BIT);
    293	check_enosys(msb, X64_READV  | X32_BIT);
    294	check_enosys(msb, X64_WRITEV | X32_BIT);
    295}
    296
    297static void test_syscalls_without_x32(int msb)
    298{
    299	run("Checking for absence of x32 system calls\n");
    300	check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
    301}
    302
    303static void test_syscall_numbering(void)
    304{
    305	static const int msbs[] = {
    306		0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
    307		INT_MIN, INT_MIN+1
    308	};
    309
    310	sh->indent++;
    311
    312	/*
    313	 * The MSB is supposed to be ignored, so we loop over a few
    314	 * to test that out.
    315	 */
    316	for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
    317		int msb = msbs[i];
    318		run("Checking system calls with msb = %d (0x%x)\n",
    319		    msb, msb);
    320
    321		sh->indent++;
    322
    323		test_syscalls_common(msb);
    324		if (with_x32)
    325			test_syscalls_with_x32(msb);
    326		else
    327			test_syscalls_without_x32(msb);
    328
    329		sh->indent--;
    330	}
    331
    332	sh->indent--;
    333}
    334
    335static void syscall_numbering_tracee(void)
    336{
    337	enum ptrace_pass pass;
    338
    339	if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
    340		crit("Failed to request tracing\n");
    341		return;
    342	}
    343	raise(SIGSTOP);
    344
    345	for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
    346	     sh->ptrace_pass = ++pass) {
    347		run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
    348		test_syscall_numbering();
    349	}
    350}
    351
    352static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
    353{
    354	struct user_regs_struct regs;
    355
    356	sh->probing_syscall = false; /* Do this on entry only */
    357
    358	/* For these, don't even getregs */
    359	if (pass == PTP_NOTHING || pass == PTP_DONE)
    360		return;
    361
    362	ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
    363
    364	if (regs.orig_rax != regs.rbx) {
    365		fail("orig_rax %#llx doesn't match syscall number %#llx\n",
    366		     (unsigned long long)regs.orig_rax,
    367		     (unsigned long long)regs.rbx);
    368	}
    369
    370	switch (pass) {
    371	case PTP_GETREGS:
    372		/* Just read, no writeback */
    373		return;
    374	case PTP_WRITEBACK:
    375		/* Write back the same register state verbatim */
    376		break;
    377	case PTP_FUZZRET:
    378		regs.rax = MODIFIED_BY_PTRACE;
    379		break;
    380	case PTP_FUZZHIGH:
    381		regs.rax = MODIFIED_BY_PTRACE;
    382		regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
    383		break;
    384	case PTP_INTNUM:
    385		regs.rax = MODIFIED_BY_PTRACE;
    386		regs.orig_rax = (int)regs.orig_rax;
    387		break;
    388	default:
    389		crit("invalid ptrace_pass\n");
    390		break;
    391	}
    392
    393	ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
    394}
    395
    396static void syscall_numbering_tracer(pid_t testpid)
    397{
    398	int wstatus;
    399
    400	do {
    401		pid_t wpid = waitpid(testpid, &wstatus, 0);
    402		if (wpid < 0 && errno != EINTR)
    403			break;
    404		if (wpid != testpid)
    405			continue;
    406		if (!WIFSTOPPED(wstatus))
    407			break;	/* Thread exited? */
    408
    409		if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
    410			mess_with_syscall(testpid, sh->ptrace_pass);
    411	} while (sh->ptrace_pass != PTP_DONE &&
    412		 !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
    413
    414	ptrace(PTRACE_DETACH, testpid, NULL, NULL);
    415
    416	/* Wait for the child process to terminate */
    417	while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
    418		/* wait some more */;
    419}
    420
    421static void test_traced_syscall_numbering(void)
    422{
    423	pid_t testpid;
    424
    425	/* Launch the test thread; this thread continues as the tracer thread */
    426	testpid = fork();
    427
    428	if (testpid < 0) {
    429		crit("Unable to launch tracer process\n");
    430	} else if (testpid == 0) {
    431		syscall_numbering_tracee();
    432		_exit(0);
    433	} else {
    434		syscall_numbering_tracer(testpid);
    435	}
    436}
    437
    438int main(void)
    439{
    440	unsigned int nerr;
    441
    442	/*
    443	 * It is quite likely to get a segfault on a failure, so make
    444	 * sure the message gets out by setting stdout to nonbuffered.
    445	 */
    446	setvbuf(stdout, NULL, _IONBF, 0);
    447
    448	/*
    449	 * Harmless file descriptor to work on...
    450	 */
    451	nullfd = open("/dev/null", O_RDWR);
    452	if (nullfd < 0) {
    453		crit("Unable to open /dev/null: %s\n", strerror(errno));
    454	}
    455
    456	/*
    457	 * Set up a block of shared memory...
    458	 */
    459	sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
    460		  MAP_ANONYMOUS|MAP_SHARED, 0, 0);
    461	if (sh == MAP_FAILED) {
    462		crit("Unable to allocated shared memory block: %s\n",
    463		     strerror(errno));
    464	}
    465
    466	with_x32 = test_x32();
    467
    468	run("Running tests without ptrace...\n");
    469	test_syscall_numbering();
    470
    471	test_traced_syscall_numbering();
    472
    473	nerr = sh->nerr;
    474	if (!nerr) {
    475		ok("All system calls succeeded or failed as expected\n");
    476		return 0;
    477	} else {
    478		fail("A total of %u system call%s had incorrect behavior\n",
    479		     nerr, nerr != 1 ? "s" : "");
    480		return 1;
    481	}
    482}