cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sigreturn.c (23967B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
      4 * Copyright (c) 2014-2015 Andrew Lutomirski
      5 *
      6 * This is a series of tests that exercises the sigreturn(2) syscall and
      7 * the IRET / SYSRET paths in the kernel.
      8 *
      9 * For now, this focuses on the effects of unusual CS and SS values,
     10 * and it has a bunch of tests to make sure that ESP/RSP is restored
     11 * properly.
     12 *
     13 * The basic idea behind these tests is to raise(SIGUSR1) to create a
     14 * sigcontext frame, plug in the values to be tested, and then return,
     15 * which implicitly invokes sigreturn(2) and programs the user context
     16 * as desired.
     17 *
     18 * For tests for which we expect sigreturn and the subsequent return to
     19 * user mode to succeed, we return to a short trampoline that generates
     20 * SIGTRAP so that the meat of the tests can be ordinary C code in a
     21 * SIGTRAP handler.
     22 *
     23 * The inner workings of each test is documented below.
     24 *
     25 * Do not run on outdated, unpatched kernels at risk of nasty crashes.
     26 */
     27
     28#define _GNU_SOURCE
     29
     30#include <sys/time.h>
     31#include <time.h>
     32#include <stdlib.h>
     33#include <sys/syscall.h>
     34#include <unistd.h>
     35#include <stdio.h>
     36#include <string.h>
     37#include <inttypes.h>
     38#include <sys/mman.h>
     39#include <sys/signal.h>
     40#include <sys/ucontext.h>
     41#include <asm/ldt.h>
     42#include <err.h>
     43#include <setjmp.h>
     44#include <stddef.h>
     45#include <stdbool.h>
     46#include <sys/ptrace.h>
     47#include <sys/user.h>
     48
     49/* Pull in AR_xyz defines. */
     50typedef unsigned int u32;
     51typedef unsigned short u16;
     52#include "../../../../arch/x86/include/asm/desc_defs.h"
     53
     54/*
     55 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
     56 * headers.
     57 */
     58#ifdef __x86_64__
     59/*
     60 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
     61 * kernels that save SS in the sigcontext.  All kernels that set
     62 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
     63 * regardless of SS (i.e. they implement espfix).
     64 *
     65 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
     66 * when delivering a signal that came from 64-bit code.
     67 *
     68 * Sigreturn restores SS as follows:
     69 *
     70 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
     71 *     saved CS is not 64-bit)
     72 *         new SS = saved SS  (will fail IRET and signal if invalid)
     73 * else
     74 *         new SS = a flat 32-bit data segment
     75 */
     76#define UC_SIGCONTEXT_SS       0x2
     77#define UC_STRICT_RESTORE_SS   0x4
     78#endif
     79
     80/*
     81 * In principle, this test can run on Linux emulation layers (e.g.
     82 * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
     83 * entries 0-5 for their own internal purposes, so start our LDT
     84 * allocations above that reservation.  (The tests don't pass on LX
     85 * branded zones, but at least this lets them run.)
     86 */
     87#define LDT_OFFSET 6
     88
     89/* An aligned stack accessible through some of our segments. */
     90static unsigned char stack16[65536] __attribute__((aligned(4096)));
     91
     92/*
     93 * An aligned int3 instruction used as a trampoline.  Some of the tests
     94 * want to fish out their ss values, so this trampoline copies ss to eax
     95 * before the int3.
     96 */
     97asm (".pushsection .text\n\t"
     98     ".type int3, @function\n\t"
     99     ".align 4096\n\t"
    100     "int3:\n\t"
    101     "mov %ss,%ecx\n\t"
    102     "int3\n\t"
    103     ".size int3, . - int3\n\t"
    104     ".align 4096, 0xcc\n\t"
    105     ".popsection");
    106extern char int3[4096];
    107
    108/*
    109 * At startup, we prepapre:
    110 *
    111 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
    112 *   descriptor or out of bounds).
    113 * - code16_sel: A 16-bit LDT code segment pointing to int3.
    114 * - data16_sel: A 16-bit LDT data segment pointing to stack16.
    115 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
    116 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
    117 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
    118 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
    119 *   stack16.
    120 *
    121 * For no particularly good reason, xyz_sel is a selector value with the
    122 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
    123 * descriptor table.  These variables will be zero if their respective
    124 * segments could not be allocated.
    125 */
    126static unsigned short ldt_nonexistent_sel;
    127static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
    128
    129static unsigned short gdt_data16_idx, gdt_npdata32_idx;
    130
    131static unsigned short GDT3(int idx)
    132{
    133	return (idx << 3) | 3;
    134}
    135
    136static unsigned short LDT3(int idx)
    137{
    138	return (idx << 3) | 7;
    139}
    140
    141static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
    142		       int flags)
    143{
    144	struct sigaction sa;
    145	memset(&sa, 0, sizeof(sa));
    146	sa.sa_sigaction = handler;
    147	sa.sa_flags = SA_SIGINFO | flags;
    148	sigemptyset(&sa.sa_mask);
    149	if (sigaction(sig, &sa, 0))
    150		err(1, "sigaction");
    151}
    152
    153static void clearhandler(int sig)
    154{
    155	struct sigaction sa;
    156	memset(&sa, 0, sizeof(sa));
    157	sa.sa_handler = SIG_DFL;
    158	sigemptyset(&sa.sa_mask);
    159	if (sigaction(sig, &sa, 0))
    160		err(1, "sigaction");
    161}
    162
    163static void add_ldt(const struct user_desc *desc, unsigned short *var,
    164		    const char *name)
    165{
    166	if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
    167		*var = LDT3(desc->entry_number);
    168	} else {
    169		printf("[NOTE]\tFailed to create %s segment\n", name);
    170		*var = 0;
    171	}
    172}
    173
    174static void setup_ldt(void)
    175{
    176	if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
    177		errx(1, "stack16 is too high\n");
    178	if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
    179		errx(1, "int3 is too high\n");
    180
    181	ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
    182
    183	const struct user_desc code16_desc = {
    184		.entry_number    = LDT_OFFSET + 0,
    185		.base_addr       = (unsigned long)int3,
    186		.limit           = 4095,
    187		.seg_32bit       = 0,
    188		.contents        = 2, /* Code, not conforming */
    189		.read_exec_only  = 0,
    190		.limit_in_pages  = 0,
    191		.seg_not_present = 0,
    192		.useable         = 0
    193	};
    194	add_ldt(&code16_desc, &code16_sel, "code16");
    195
    196	const struct user_desc data16_desc = {
    197		.entry_number    = LDT_OFFSET + 1,
    198		.base_addr       = (unsigned long)stack16,
    199		.limit           = 0xffff,
    200		.seg_32bit       = 0,
    201		.contents        = 0, /* Data, grow-up */
    202		.read_exec_only  = 0,
    203		.limit_in_pages  = 0,
    204		.seg_not_present = 0,
    205		.useable         = 0
    206	};
    207	add_ldt(&data16_desc, &data16_sel, "data16");
    208
    209	const struct user_desc npcode32_desc = {
    210		.entry_number    = LDT_OFFSET + 3,
    211		.base_addr       = (unsigned long)int3,
    212		.limit           = 4095,
    213		.seg_32bit       = 1,
    214		.contents        = 2, /* Code, not conforming */
    215		.read_exec_only  = 0,
    216		.limit_in_pages  = 0,
    217		.seg_not_present = 1,
    218		.useable         = 0
    219	};
    220	add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
    221
    222	const struct user_desc npdata32_desc = {
    223		.entry_number    = LDT_OFFSET + 4,
    224		.base_addr       = (unsigned long)stack16,
    225		.limit           = 0xffff,
    226		.seg_32bit       = 1,
    227		.contents        = 0, /* Data, grow-up */
    228		.read_exec_only  = 0,
    229		.limit_in_pages  = 0,
    230		.seg_not_present = 1,
    231		.useable         = 0
    232	};
    233	add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
    234
    235	struct user_desc gdt_data16_desc = {
    236		.entry_number    = -1,
    237		.base_addr       = (unsigned long)stack16,
    238		.limit           = 0xffff,
    239		.seg_32bit       = 0,
    240		.contents        = 0, /* Data, grow-up */
    241		.read_exec_only  = 0,
    242		.limit_in_pages  = 0,
    243		.seg_not_present = 0,
    244		.useable         = 0
    245	};
    246
    247	if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
    248		/*
    249		 * This probably indicates vulnerability to CVE-2014-8133.
    250		 * Merely getting here isn't definitive, though, and we'll
    251		 * diagnose the problem for real later on.
    252		 */
    253		printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
    254		       gdt_data16_desc.entry_number);
    255		gdt_data16_idx = gdt_data16_desc.entry_number;
    256	} else {
    257		printf("[OK]\tset_thread_area refused 16-bit data\n");
    258	}
    259
    260	struct user_desc gdt_npdata32_desc = {
    261		.entry_number    = -1,
    262		.base_addr       = (unsigned long)stack16,
    263		.limit           = 0xffff,
    264		.seg_32bit       = 1,
    265		.contents        = 0, /* Data, grow-up */
    266		.read_exec_only  = 0,
    267		.limit_in_pages  = 0,
    268		.seg_not_present = 1,
    269		.useable         = 0
    270	};
    271
    272	if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
    273		/*
    274		 * As a hardening measure, newer kernels don't allow this.
    275		 */
    276		printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
    277		       gdt_npdata32_desc.entry_number);
    278		gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
    279	} else {
    280		printf("[OK]\tset_thread_area refused 16-bit data\n");
    281	}
    282}
    283
    284/* State used by our signal handlers. */
    285static gregset_t initial_regs, requested_regs, resulting_regs;
    286
    287/* Instructions for the SIGUSR1 handler. */
    288static volatile unsigned short sig_cs, sig_ss;
    289static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
    290#ifdef __x86_64__
    291static volatile sig_atomic_t sig_corrupt_final_ss;
    292#endif
    293
    294/* Abstractions for some 32-bit vs 64-bit differences. */
    295#ifdef __x86_64__
    296# define REG_IP REG_RIP
    297# define REG_SP REG_RSP
    298# define REG_CX REG_RCX
    299
    300struct selectors {
    301	unsigned short cs, gs, fs, ss;
    302};
    303
    304static unsigned short *ssptr(ucontext_t *ctx)
    305{
    306	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
    307	return &sels->ss;
    308}
    309
    310static unsigned short *csptr(ucontext_t *ctx)
    311{
    312	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
    313	return &sels->cs;
    314}
    315#else
    316# define REG_IP REG_EIP
    317# define REG_SP REG_ESP
    318# define REG_CX REG_ECX
    319
    320static greg_t *ssptr(ucontext_t *ctx)
    321{
    322	return &ctx->uc_mcontext.gregs[REG_SS];
    323}
    324
    325static greg_t *csptr(ucontext_t *ctx)
    326{
    327	return &ctx->uc_mcontext.gregs[REG_CS];
    328}
    329#endif
    330
    331/*
    332 * Checks a given selector for its code bitness or returns -1 if it's not
    333 * a usable code segment selector.
    334 */
    335int cs_bitness(unsigned short cs)
    336{
    337	uint32_t valid = 0, ar;
    338	asm ("lar %[cs], %[ar]\n\t"
    339	     "jnz 1f\n\t"
    340	     "mov $1, %[valid]\n\t"
    341	     "1:"
    342	     : [ar] "=r" (ar), [valid] "+rm" (valid)
    343	     : [cs] "r" (cs));
    344
    345	if (!valid)
    346		return -1;
    347
    348	bool db = (ar & (1 << 22));
    349	bool l = (ar & (1 << 21));
    350
    351	if (!(ar & (1<<11)))
    352	    return -1;	/* Not code. */
    353
    354	if (l && !db)
    355		return 64;
    356	else if (!l && db)
    357		return 32;
    358	else if (!l && !db)
    359		return 16;
    360	else
    361		return -1;	/* Unknown bitness. */
    362}
    363
    364/*
    365 * Checks a given selector for its code bitness or returns -1 if it's not
    366 * a usable code segment selector.
    367 */
    368bool is_valid_ss(unsigned short cs)
    369{
    370	uint32_t valid = 0, ar;
    371	asm ("lar %[cs], %[ar]\n\t"
    372	     "jnz 1f\n\t"
    373	     "mov $1, %[valid]\n\t"
    374	     "1:"
    375	     : [ar] "=r" (ar), [valid] "+rm" (valid)
    376	     : [cs] "r" (cs));
    377
    378	if (!valid)
    379		return false;
    380
    381	if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
    382	    (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
    383		return false;
    384
    385	return (ar & AR_P);
    386}
    387
    388/* Number of errors in the current test case. */
    389static volatile sig_atomic_t nerrs;
    390
    391static void validate_signal_ss(int sig, ucontext_t *ctx)
    392{
    393#ifdef __x86_64__
    394	bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
    395
    396	if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
    397		printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
    398		nerrs++;
    399
    400		/*
    401		 * This happens on Linux 4.1.  The rest will fail, too, so
    402		 * return now to reduce the noise.
    403		 */
    404		return;
    405	}
    406
    407	/* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
    408	if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
    409		printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
    410		       sig);
    411		nerrs++;
    412	}
    413
    414	if (is_valid_ss(*ssptr(ctx))) {
    415		/*
    416		 * DOSEMU was written before 64-bit sigcontext had SS, and
    417		 * it tries to figure out the signal source SS by looking at
    418		 * the physical register.  Make sure that keeps working.
    419		 */
    420		unsigned short hw_ss;
    421		asm ("mov %%ss, %0" : "=rm" (hw_ss));
    422		if (hw_ss != *ssptr(ctx)) {
    423			printf("[FAIL]\tHW SS didn't match saved SS\n");
    424			nerrs++;
    425		}
    426	}
    427#endif
    428}
    429
    430/*
    431 * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
    432 * int3 trampoline.  Sets SP to a large known value so that we can see
    433 * whether the value round-trips back to user mode correctly.
    434 */
    435static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
    436{
    437	ucontext_t *ctx = (ucontext_t*)ctx_void;
    438
    439	validate_signal_ss(sig, ctx);
    440
    441	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
    442
    443	*csptr(ctx) = sig_cs;
    444	*ssptr(ctx) = sig_ss;
    445
    446	ctx->uc_mcontext.gregs[REG_IP] =
    447		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
    448	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
    449	ctx->uc_mcontext.gregs[REG_CX] = 0;
    450
    451#ifdef __i386__
    452	/*
    453	 * Make sure the kernel doesn't inadvertently use DS or ES-relative
    454	 * accesses in a region where user DS or ES is loaded.
    455	 *
    456	 * Skip this for 64-bit builds because long mode doesn't care about
    457	 * DS and ES and skipping it increases test coverage a little bit,
    458	 * since 64-bit kernels can still run the 32-bit build.
    459	 */
    460	ctx->uc_mcontext.gregs[REG_DS] = 0;
    461	ctx->uc_mcontext.gregs[REG_ES] = 0;
    462#endif
    463
    464	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
    465	requested_regs[REG_CX] = *ssptr(ctx);	/* The asm code does this. */
    466
    467	return;
    468}
    469
    470/*
    471 * Called after a successful sigreturn (via int3) or from a failed
    472 * sigreturn (directly by kernel).  Restores our state so that the
    473 * original raise(SIGUSR1) returns.
    474 */
    475static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
    476{
    477	ucontext_t *ctx = (ucontext_t*)ctx_void;
    478
    479	validate_signal_ss(sig, ctx);
    480
    481	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
    482	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
    483
    484	unsigned short ss;
    485	asm ("mov %%ss,%0" : "=r" (ss));
    486
    487	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
    488	if (asm_ss != sig_ss && sig == SIGTRAP) {
    489		/* Sanity check failure. */
    490		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
    491		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
    492		nerrs++;
    493	}
    494
    495	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
    496	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
    497
    498#ifdef __x86_64__
    499	if (sig_corrupt_final_ss) {
    500		if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
    501			printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
    502			nerrs++;
    503		} else {
    504			/*
    505			 * DOSEMU transitions from 32-bit to 64-bit mode by
    506			 * adjusting sigcontext, and it requires that this work
    507			 * even if the saved SS is bogus.
    508			 */
    509			printf("\tCorrupting SS on return to 64-bit mode\n");
    510			*ssptr(ctx) = 0;
    511		}
    512	}
    513#endif
    514
    515	sig_trapped = sig;
    516}
    517
    518#ifdef __x86_64__
    519/* Tests recovery if !UC_STRICT_RESTORE_SS */
    520static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
    521{
    522	ucontext_t *ctx = (ucontext_t*)ctx_void;
    523
    524	if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
    525		printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
    526		nerrs++;
    527		return;  /* We can't do the rest. */
    528	}
    529
    530	ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
    531	*ssptr(ctx) = 0;
    532
    533	/* Return.  The kernel should recover without sending another signal. */
    534}
    535
    536static int test_nonstrict_ss(void)
    537{
    538	clearhandler(SIGUSR1);
    539	clearhandler(SIGTRAP);
    540	clearhandler(SIGSEGV);
    541	clearhandler(SIGILL);
    542	sethandler(SIGUSR2, sigusr2, 0);
    543
    544	nerrs = 0;
    545
    546	printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
    547	raise(SIGUSR2);
    548	if (!nerrs)
    549		printf("[OK]\tIt worked\n");
    550
    551	return nerrs;
    552}
    553#endif
    554
    555/* Finds a usable code segment of the requested bitness. */
    556int find_cs(int bitness)
    557{
    558	unsigned short my_cs;
    559
    560	asm ("mov %%cs,%0" :  "=r" (my_cs));
    561
    562	if (cs_bitness(my_cs) == bitness)
    563		return my_cs;
    564	if (cs_bitness(my_cs + (2 << 3)) == bitness)
    565		return my_cs + (2 << 3);
    566	if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
    567	    return my_cs - (2 << 3);
    568	if (cs_bitness(code16_sel) == bitness)
    569		return code16_sel;
    570
    571	printf("[WARN]\tCould not find %d-bit CS\n", bitness);
    572	return -1;
    573}
    574
    575static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
    576{
    577	int cs = find_cs(cs_bits);
    578	if (cs == -1) {
    579		printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
    580		       cs_bits, use_16bit_ss ? 16 : 32);
    581		return 0;
    582	}
    583
    584	if (force_ss != -1) {
    585		sig_ss = force_ss;
    586	} else {
    587		if (use_16bit_ss) {
    588			if (!data16_sel) {
    589				printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
    590				       cs_bits);
    591				return 0;
    592			}
    593			sig_ss = data16_sel;
    594		} else {
    595			asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
    596		}
    597	}
    598
    599	sig_cs = cs;
    600
    601	printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
    602	       cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
    603	       (sig_ss & 4) ? "" : ", GDT");
    604
    605	raise(SIGUSR1);
    606
    607	nerrs = 0;
    608
    609	/*
    610	 * Check that each register had an acceptable value when the
    611	 * int3 trampoline was invoked.
    612	 */
    613	for (int i = 0; i < NGREG; i++) {
    614		greg_t req = requested_regs[i], res = resulting_regs[i];
    615
    616		if (i == REG_TRAPNO || i == REG_IP)
    617			continue;	/* don't care */
    618
    619		if (i == REG_SP) {
    620			/*
    621			 * If we were using a 16-bit stack segment, then
    622			 * the kernel is a bit stuck: IRET only restores
    623			 * the low 16 bits of ESP/RSP if SS is 16-bit.
    624			 * The kernel uses a hack to restore bits 31:16,
    625			 * but that hack doesn't help with bits 63:32.
    626			 * On Intel CPUs, bits 63:32 end up zeroed, and, on
    627			 * AMD CPUs, they leak the high bits of the kernel
    628			 * espfix64 stack pointer.  There's very little that
    629			 * the kernel can do about it.
    630			 *
    631			 * Similarly, if we are returning to a 32-bit context,
    632			 * the CPU will often lose the high 32 bits of RSP.
    633			 */
    634
    635			if (res == req)
    636				continue;
    637
    638			if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
    639				printf("[NOTE]\tSP: %llx -> %llx\n",
    640				       (unsigned long long)req,
    641				       (unsigned long long)res);
    642				continue;
    643			}
    644
    645			printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
    646			       (unsigned long long)requested_regs[i],
    647			       (unsigned long long)resulting_regs[i]);
    648			nerrs++;
    649			continue;
    650		}
    651
    652		bool ignore_reg = false;
    653#if __i386__
    654		if (i == REG_UESP)
    655			ignore_reg = true;
    656#else
    657		if (i == REG_CSGSFS) {
    658			struct selectors *req_sels =
    659				(void *)&requested_regs[REG_CSGSFS];
    660			struct selectors *res_sels =
    661				(void *)&resulting_regs[REG_CSGSFS];
    662			if (req_sels->cs != res_sels->cs) {
    663				printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
    664				       req_sels->cs, res_sels->cs);
    665				nerrs++;
    666			}
    667
    668			if (req_sels->ss != res_sels->ss) {
    669				printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
    670				       req_sels->ss, res_sels->ss);
    671				nerrs++;
    672			}
    673
    674			continue;
    675		}
    676#endif
    677
    678		/* Sanity check on the kernel */
    679		if (i == REG_CX && req != res) {
    680			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
    681			       (unsigned long long)req,
    682			       (unsigned long long)res);
    683			nerrs++;
    684			continue;
    685		}
    686
    687		if (req != res && !ignore_reg) {
    688			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
    689			       i, (unsigned long long)req,
    690			       (unsigned long long)res);
    691			nerrs++;
    692		}
    693	}
    694
    695	if (nerrs == 0)
    696		printf("[OK]\tall registers okay\n");
    697
    698	return nerrs;
    699}
    700
    701static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
    702{
    703	int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
    704	if (cs == -1)
    705		return 0;
    706
    707	sig_cs = cs;
    708	sig_ss = ss;
    709
    710	printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
    711	       cs_bits, sig_cs, sig_ss);
    712
    713	sig_trapped = 0;
    714	raise(SIGUSR1);
    715	if (sig_trapped) {
    716		char errdesc[32] = "";
    717		if (sig_err) {
    718			const char *src = (sig_err & 1) ? " EXT" : "";
    719			const char *table;
    720			if ((sig_err & 0x6) == 0x0)
    721				table = "GDT";
    722			else if ((sig_err & 0x6) == 0x4)
    723				table = "LDT";
    724			else if ((sig_err & 0x6) == 0x2)
    725				table = "IDT";
    726			else
    727				table = "???";
    728
    729			sprintf(errdesc, "%s%s index %d, ",
    730				table, src, sig_err >> 3);
    731		}
    732
    733		char trapname[32];
    734		if (sig_trapno == 13)
    735			strcpy(trapname, "GP");
    736		else if (sig_trapno == 11)
    737			strcpy(trapname, "NP");
    738		else if (sig_trapno == 12)
    739			strcpy(trapname, "SS");
    740		else if (sig_trapno == 32)
    741			strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
    742		else
    743			sprintf(trapname, "%d", sig_trapno);
    744
    745		printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
    746		       trapname, (unsigned long)sig_err,
    747		       errdesc, strsignal(sig_trapped));
    748		return 0;
    749	} else {
    750		/*
    751		 * This also implicitly tests UC_STRICT_RESTORE_SS:
    752		 * We check that these signals set UC_STRICT_RESTORE_SS and,
    753		 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
    754		 * then we won't get SIGSEGV.
    755		 */
    756		printf("[FAIL]\tDid not get SIGSEGV\n");
    757		return 1;
    758	}
    759}
    760
    761int main()
    762{
    763	int total_nerrs = 0;
    764	unsigned short my_cs, my_ss;
    765
    766	asm volatile ("mov %%cs,%0" : "=r" (my_cs));
    767	asm volatile ("mov %%ss,%0" : "=r" (my_ss));
    768	setup_ldt();
    769
    770	stack_t stack = {
    771		/* Our sigaltstack scratch space. */
    772		.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
    773		.ss_size = SIGSTKSZ,
    774	};
    775	if (sigaltstack(&stack, NULL) != 0)
    776		err(1, "sigaltstack");
    777
    778	sethandler(SIGUSR1, sigusr1, 0);
    779	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
    780
    781	/* Easy cases: return to a 32-bit SS in each possible CS bitness. */
    782	total_nerrs += test_valid_sigreturn(64, false, -1);
    783	total_nerrs += test_valid_sigreturn(32, false, -1);
    784	total_nerrs += test_valid_sigreturn(16, false, -1);
    785
    786	/*
    787	 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
    788	 * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
    789	 *
    790	 * This catches the original missing-espfix-on-64-bit-kernels issue
    791	 * as well as CVE-2014-8134.
    792	 */
    793	total_nerrs += test_valid_sigreturn(64, true, -1);
    794	total_nerrs += test_valid_sigreturn(32, true, -1);
    795	total_nerrs += test_valid_sigreturn(16, true, -1);
    796
    797	if (gdt_data16_idx) {
    798		/*
    799		 * For performance reasons, Linux skips espfix if SS points
    800		 * to the GDT.  If we were able to allocate a 16-bit SS in
    801		 * the GDT, see if it leaks parts of the kernel stack pointer.
    802		 *
    803		 * This tests for CVE-2014-8133.
    804		 */
    805		total_nerrs += test_valid_sigreturn(64, true,
    806						    GDT3(gdt_data16_idx));
    807		total_nerrs += test_valid_sigreturn(32, true,
    808						    GDT3(gdt_data16_idx));
    809		total_nerrs += test_valid_sigreturn(16, true,
    810						    GDT3(gdt_data16_idx));
    811	}
    812
    813#ifdef __x86_64__
    814	/* Nasty ABI case: check SS corruption handling. */
    815	sig_corrupt_final_ss = 1;
    816	total_nerrs += test_valid_sigreturn(32, false, -1);
    817	total_nerrs += test_valid_sigreturn(32, true, -1);
    818	sig_corrupt_final_ss = 0;
    819#endif
    820
    821	/*
    822	 * We're done testing valid sigreturn cases.  Now we test states
    823	 * for which sigreturn itself will succeed but the subsequent
    824	 * entry to user mode will fail.
    825	 *
    826	 * Depending on the failure mode and the kernel bitness, these
    827	 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
    828	 */
    829	clearhandler(SIGTRAP);
    830	sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
    831	sethandler(SIGBUS, sigtrap, SA_ONSTACK);
    832	sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
    833
    834	/* Easy failures: invalid SS, resulting in #GP(0) */
    835	test_bad_iret(64, ldt_nonexistent_sel, -1);
    836	test_bad_iret(32, ldt_nonexistent_sel, -1);
    837	test_bad_iret(16, ldt_nonexistent_sel, -1);
    838
    839	/* These fail because SS isn't a data segment, resulting in #GP(SS) */
    840	test_bad_iret(64, my_cs, -1);
    841	test_bad_iret(32, my_cs, -1);
    842	test_bad_iret(16, my_cs, -1);
    843
    844	/* Try to return to a not-present code segment, triggering #NP(SS). */
    845	test_bad_iret(32, my_ss, npcode32_sel);
    846
    847	/*
    848	 * Try to return to a not-present but otherwise valid data segment.
    849	 * This will cause IRET to fail with #SS on the espfix stack.  This
    850	 * exercises CVE-2014-9322.
    851	 *
    852	 * Note that, if espfix is enabled, 64-bit Linux will lose track
    853	 * of the actual cause of failure and report #GP(0) instead.
    854	 * This would be very difficult for Linux to avoid, because
    855	 * espfix64 causes IRET failures to be promoted to #DF, so the
    856	 * original exception frame is never pushed onto the stack.
    857	 */
    858	test_bad_iret(32, npdata32_sel, -1);
    859
    860	/*
    861	 * Try to return to a not-present but otherwise valid data
    862	 * segment without invoking espfix.  Newer kernels don't allow
    863	 * this to happen in the first place.  On older kernels, though,
    864	 * this can trigger CVE-2014-9322.
    865	 */
    866	if (gdt_npdata32_idx)
    867		test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
    868
    869#ifdef __x86_64__
    870	total_nerrs += test_nonstrict_ss();
    871#endif
    872
    873	free(stack.ss_sp);
    874	return total_nerrs ? 1 : 0;
    875}