sigreturn.c (23967B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace 4 * Copyright (c) 2014-2015 Andrew Lutomirski 5 * 6 * This is a series of tests that exercises the sigreturn(2) syscall and 7 * the IRET / SYSRET paths in the kernel. 8 * 9 * For now, this focuses on the effects of unusual CS and SS values, 10 * and it has a bunch of tests to make sure that ESP/RSP is restored 11 * properly. 12 * 13 * The basic idea behind these tests is to raise(SIGUSR1) to create a 14 * sigcontext frame, plug in the values to be tested, and then return, 15 * which implicitly invokes sigreturn(2) and programs the user context 16 * as desired. 17 * 18 * For tests for which we expect sigreturn and the subsequent return to 19 * user mode to succeed, we return to a short trampoline that generates 20 * SIGTRAP so that the meat of the tests can be ordinary C code in a 21 * SIGTRAP handler. 22 * 23 * The inner workings of each test is documented below. 24 * 25 * Do not run on outdated, unpatched kernels at risk of nasty crashes. 26 */ 27 28#define _GNU_SOURCE 29 30#include <sys/time.h> 31#include <time.h> 32#include <stdlib.h> 33#include <sys/syscall.h> 34#include <unistd.h> 35#include <stdio.h> 36#include <string.h> 37#include <inttypes.h> 38#include <sys/mman.h> 39#include <sys/signal.h> 40#include <sys/ucontext.h> 41#include <asm/ldt.h> 42#include <err.h> 43#include <setjmp.h> 44#include <stddef.h> 45#include <stdbool.h> 46#include <sys/ptrace.h> 47#include <sys/user.h> 48 49/* Pull in AR_xyz defines. */ 50typedef unsigned int u32; 51typedef unsigned short u16; 52#include "../../../../arch/x86/include/asm/desc_defs.h" 53 54/* 55 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc 56 * headers. 57 */ 58#ifdef __x86_64__ 59/* 60 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on 61 * kernels that save SS in the sigcontext. All kernels that set 62 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp 63 * regardless of SS (i.e. they implement espfix). 64 * 65 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS 66 * when delivering a signal that came from 64-bit code. 67 * 68 * Sigreturn restores SS as follows: 69 * 70 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || 71 * saved CS is not 64-bit) 72 * new SS = saved SS (will fail IRET and signal if invalid) 73 * else 74 * new SS = a flat 32-bit data segment 75 */ 76#define UC_SIGCONTEXT_SS 0x2 77#define UC_STRICT_RESTORE_SS 0x4 78#endif 79 80/* 81 * In principle, this test can run on Linux emulation layers (e.g. 82 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT 83 * entries 0-5 for their own internal purposes, so start our LDT 84 * allocations above that reservation. (The tests don't pass on LX 85 * branded zones, but at least this lets them run.) 86 */ 87#define LDT_OFFSET 6 88 89/* An aligned stack accessible through some of our segments. */ 90static unsigned char stack16[65536] __attribute__((aligned(4096))); 91 92/* 93 * An aligned int3 instruction used as a trampoline. Some of the tests 94 * want to fish out their ss values, so this trampoline copies ss to eax 95 * before the int3. 96 */ 97asm (".pushsection .text\n\t" 98 ".type int3, @function\n\t" 99 ".align 4096\n\t" 100 "int3:\n\t" 101 "mov %ss,%ecx\n\t" 102 "int3\n\t" 103 ".size int3, . - int3\n\t" 104 ".align 4096, 0xcc\n\t" 105 ".popsection"); 106extern char int3[4096]; 107 108/* 109 * At startup, we prepapre: 110 * 111 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero 112 * descriptor or out of bounds). 113 * - code16_sel: A 16-bit LDT code segment pointing to int3. 114 * - data16_sel: A 16-bit LDT data segment pointing to stack16. 115 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. 116 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. 117 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. 118 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to 119 * stack16. 120 * 121 * For no particularly good reason, xyz_sel is a selector value with the 122 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the 123 * descriptor table. These variables will be zero if their respective 124 * segments could not be allocated. 125 */ 126static unsigned short ldt_nonexistent_sel; 127static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; 128 129static unsigned short gdt_data16_idx, gdt_npdata32_idx; 130 131static unsigned short GDT3(int idx) 132{ 133 return (idx << 3) | 3; 134} 135 136static unsigned short LDT3(int idx) 137{ 138 return (idx << 3) | 7; 139} 140 141static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 142 int flags) 143{ 144 struct sigaction sa; 145 memset(&sa, 0, sizeof(sa)); 146 sa.sa_sigaction = handler; 147 sa.sa_flags = SA_SIGINFO | flags; 148 sigemptyset(&sa.sa_mask); 149 if (sigaction(sig, &sa, 0)) 150 err(1, "sigaction"); 151} 152 153static void clearhandler(int sig) 154{ 155 struct sigaction sa; 156 memset(&sa, 0, sizeof(sa)); 157 sa.sa_handler = SIG_DFL; 158 sigemptyset(&sa.sa_mask); 159 if (sigaction(sig, &sa, 0)) 160 err(1, "sigaction"); 161} 162 163static void add_ldt(const struct user_desc *desc, unsigned short *var, 164 const char *name) 165{ 166 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { 167 *var = LDT3(desc->entry_number); 168 } else { 169 printf("[NOTE]\tFailed to create %s segment\n", name); 170 *var = 0; 171 } 172} 173 174static void setup_ldt(void) 175{ 176 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) 177 errx(1, "stack16 is too high\n"); 178 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) 179 errx(1, "int3 is too high\n"); 180 181 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); 182 183 const struct user_desc code16_desc = { 184 .entry_number = LDT_OFFSET + 0, 185 .base_addr = (unsigned long)int3, 186 .limit = 4095, 187 .seg_32bit = 0, 188 .contents = 2, /* Code, not conforming */ 189 .read_exec_only = 0, 190 .limit_in_pages = 0, 191 .seg_not_present = 0, 192 .useable = 0 193 }; 194 add_ldt(&code16_desc, &code16_sel, "code16"); 195 196 const struct user_desc data16_desc = { 197 .entry_number = LDT_OFFSET + 1, 198 .base_addr = (unsigned long)stack16, 199 .limit = 0xffff, 200 .seg_32bit = 0, 201 .contents = 0, /* Data, grow-up */ 202 .read_exec_only = 0, 203 .limit_in_pages = 0, 204 .seg_not_present = 0, 205 .useable = 0 206 }; 207 add_ldt(&data16_desc, &data16_sel, "data16"); 208 209 const struct user_desc npcode32_desc = { 210 .entry_number = LDT_OFFSET + 3, 211 .base_addr = (unsigned long)int3, 212 .limit = 4095, 213 .seg_32bit = 1, 214 .contents = 2, /* Code, not conforming */ 215 .read_exec_only = 0, 216 .limit_in_pages = 0, 217 .seg_not_present = 1, 218 .useable = 0 219 }; 220 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); 221 222 const struct user_desc npdata32_desc = { 223 .entry_number = LDT_OFFSET + 4, 224 .base_addr = (unsigned long)stack16, 225 .limit = 0xffff, 226 .seg_32bit = 1, 227 .contents = 0, /* Data, grow-up */ 228 .read_exec_only = 0, 229 .limit_in_pages = 0, 230 .seg_not_present = 1, 231 .useable = 0 232 }; 233 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); 234 235 struct user_desc gdt_data16_desc = { 236 .entry_number = -1, 237 .base_addr = (unsigned long)stack16, 238 .limit = 0xffff, 239 .seg_32bit = 0, 240 .contents = 0, /* Data, grow-up */ 241 .read_exec_only = 0, 242 .limit_in_pages = 0, 243 .seg_not_present = 0, 244 .useable = 0 245 }; 246 247 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { 248 /* 249 * This probably indicates vulnerability to CVE-2014-8133. 250 * Merely getting here isn't definitive, though, and we'll 251 * diagnose the problem for real later on. 252 */ 253 printf("[WARN]\tset_thread_area allocated data16 at index %d\n", 254 gdt_data16_desc.entry_number); 255 gdt_data16_idx = gdt_data16_desc.entry_number; 256 } else { 257 printf("[OK]\tset_thread_area refused 16-bit data\n"); 258 } 259 260 struct user_desc gdt_npdata32_desc = { 261 .entry_number = -1, 262 .base_addr = (unsigned long)stack16, 263 .limit = 0xffff, 264 .seg_32bit = 1, 265 .contents = 0, /* Data, grow-up */ 266 .read_exec_only = 0, 267 .limit_in_pages = 0, 268 .seg_not_present = 1, 269 .useable = 0 270 }; 271 272 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { 273 /* 274 * As a hardening measure, newer kernels don't allow this. 275 */ 276 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", 277 gdt_npdata32_desc.entry_number); 278 gdt_npdata32_idx = gdt_npdata32_desc.entry_number; 279 } else { 280 printf("[OK]\tset_thread_area refused 16-bit data\n"); 281 } 282} 283 284/* State used by our signal handlers. */ 285static gregset_t initial_regs, requested_regs, resulting_regs; 286 287/* Instructions for the SIGUSR1 handler. */ 288static volatile unsigned short sig_cs, sig_ss; 289static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; 290#ifdef __x86_64__ 291static volatile sig_atomic_t sig_corrupt_final_ss; 292#endif 293 294/* Abstractions for some 32-bit vs 64-bit differences. */ 295#ifdef __x86_64__ 296# define REG_IP REG_RIP 297# define REG_SP REG_RSP 298# define REG_CX REG_RCX 299 300struct selectors { 301 unsigned short cs, gs, fs, ss; 302}; 303 304static unsigned short *ssptr(ucontext_t *ctx) 305{ 306 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 307 return &sels->ss; 308} 309 310static unsigned short *csptr(ucontext_t *ctx) 311{ 312 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 313 return &sels->cs; 314} 315#else 316# define REG_IP REG_EIP 317# define REG_SP REG_ESP 318# define REG_CX REG_ECX 319 320static greg_t *ssptr(ucontext_t *ctx) 321{ 322 return &ctx->uc_mcontext.gregs[REG_SS]; 323} 324 325static greg_t *csptr(ucontext_t *ctx) 326{ 327 return &ctx->uc_mcontext.gregs[REG_CS]; 328} 329#endif 330 331/* 332 * Checks a given selector for its code bitness or returns -1 if it's not 333 * a usable code segment selector. 334 */ 335int cs_bitness(unsigned short cs) 336{ 337 uint32_t valid = 0, ar; 338 asm ("lar %[cs], %[ar]\n\t" 339 "jnz 1f\n\t" 340 "mov $1, %[valid]\n\t" 341 "1:" 342 : [ar] "=r" (ar), [valid] "+rm" (valid) 343 : [cs] "r" (cs)); 344 345 if (!valid) 346 return -1; 347 348 bool db = (ar & (1 << 22)); 349 bool l = (ar & (1 << 21)); 350 351 if (!(ar & (1<<11))) 352 return -1; /* Not code. */ 353 354 if (l && !db) 355 return 64; 356 else if (!l && db) 357 return 32; 358 else if (!l && !db) 359 return 16; 360 else 361 return -1; /* Unknown bitness. */ 362} 363 364/* 365 * Checks a given selector for its code bitness or returns -1 if it's not 366 * a usable code segment selector. 367 */ 368bool is_valid_ss(unsigned short cs) 369{ 370 uint32_t valid = 0, ar; 371 asm ("lar %[cs], %[ar]\n\t" 372 "jnz 1f\n\t" 373 "mov $1, %[valid]\n\t" 374 "1:" 375 : [ar] "=r" (ar), [valid] "+rm" (valid) 376 : [cs] "r" (cs)); 377 378 if (!valid) 379 return false; 380 381 if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && 382 (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) 383 return false; 384 385 return (ar & AR_P); 386} 387 388/* Number of errors in the current test case. */ 389static volatile sig_atomic_t nerrs; 390 391static void validate_signal_ss(int sig, ucontext_t *ctx) 392{ 393#ifdef __x86_64__ 394 bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); 395 396 if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { 397 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); 398 nerrs++; 399 400 /* 401 * This happens on Linux 4.1. The rest will fail, too, so 402 * return now to reduce the noise. 403 */ 404 return; 405 } 406 407 /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ 408 if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { 409 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", 410 sig); 411 nerrs++; 412 } 413 414 if (is_valid_ss(*ssptr(ctx))) { 415 /* 416 * DOSEMU was written before 64-bit sigcontext had SS, and 417 * it tries to figure out the signal source SS by looking at 418 * the physical register. Make sure that keeps working. 419 */ 420 unsigned short hw_ss; 421 asm ("mov %%ss, %0" : "=rm" (hw_ss)); 422 if (hw_ss != *ssptr(ctx)) { 423 printf("[FAIL]\tHW SS didn't match saved SS\n"); 424 nerrs++; 425 } 426 } 427#endif 428} 429 430/* 431 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the 432 * int3 trampoline. Sets SP to a large known value so that we can see 433 * whether the value round-trips back to user mode correctly. 434 */ 435static void sigusr1(int sig, siginfo_t *info, void *ctx_void) 436{ 437 ucontext_t *ctx = (ucontext_t*)ctx_void; 438 439 validate_signal_ss(sig, ctx); 440 441 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 442 443 *csptr(ctx) = sig_cs; 444 *ssptr(ctx) = sig_ss; 445 446 ctx->uc_mcontext.gregs[REG_IP] = 447 sig_cs == code16_sel ? 0 : (unsigned long)&int3; 448 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; 449 ctx->uc_mcontext.gregs[REG_CX] = 0; 450 451#ifdef __i386__ 452 /* 453 * Make sure the kernel doesn't inadvertently use DS or ES-relative 454 * accesses in a region where user DS or ES is loaded. 455 * 456 * Skip this for 64-bit builds because long mode doesn't care about 457 * DS and ES and skipping it increases test coverage a little bit, 458 * since 64-bit kernels can still run the 32-bit build. 459 */ 460 ctx->uc_mcontext.gregs[REG_DS] = 0; 461 ctx->uc_mcontext.gregs[REG_ES] = 0; 462#endif 463 464 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 465 requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ 466 467 return; 468} 469 470/* 471 * Called after a successful sigreturn (via int3) or from a failed 472 * sigreturn (directly by kernel). Restores our state so that the 473 * original raise(SIGUSR1) returns. 474 */ 475static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 476{ 477 ucontext_t *ctx = (ucontext_t*)ctx_void; 478 479 validate_signal_ss(sig, ctx); 480 481 sig_err = ctx->uc_mcontext.gregs[REG_ERR]; 482 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; 483 484 unsigned short ss; 485 asm ("mov %%ss,%0" : "=r" (ss)); 486 487 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; 488 if (asm_ss != sig_ss && sig == SIGTRAP) { 489 /* Sanity check failure. */ 490 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", 491 ss, *ssptr(ctx), (unsigned long long)asm_ss); 492 nerrs++; 493 } 494 495 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 496 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); 497 498#ifdef __x86_64__ 499 if (sig_corrupt_final_ss) { 500 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { 501 printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); 502 nerrs++; 503 } else { 504 /* 505 * DOSEMU transitions from 32-bit to 64-bit mode by 506 * adjusting sigcontext, and it requires that this work 507 * even if the saved SS is bogus. 508 */ 509 printf("\tCorrupting SS on return to 64-bit mode\n"); 510 *ssptr(ctx) = 0; 511 } 512 } 513#endif 514 515 sig_trapped = sig; 516} 517 518#ifdef __x86_64__ 519/* Tests recovery if !UC_STRICT_RESTORE_SS */ 520static void sigusr2(int sig, siginfo_t *info, void *ctx_void) 521{ 522 ucontext_t *ctx = (ucontext_t*)ctx_void; 523 524 if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { 525 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); 526 nerrs++; 527 return; /* We can't do the rest. */ 528 } 529 530 ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; 531 *ssptr(ctx) = 0; 532 533 /* Return. The kernel should recover without sending another signal. */ 534} 535 536static int test_nonstrict_ss(void) 537{ 538 clearhandler(SIGUSR1); 539 clearhandler(SIGTRAP); 540 clearhandler(SIGSEGV); 541 clearhandler(SIGILL); 542 sethandler(SIGUSR2, sigusr2, 0); 543 544 nerrs = 0; 545 546 printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); 547 raise(SIGUSR2); 548 if (!nerrs) 549 printf("[OK]\tIt worked\n"); 550 551 return nerrs; 552} 553#endif 554 555/* Finds a usable code segment of the requested bitness. */ 556int find_cs(int bitness) 557{ 558 unsigned short my_cs; 559 560 asm ("mov %%cs,%0" : "=r" (my_cs)); 561 562 if (cs_bitness(my_cs) == bitness) 563 return my_cs; 564 if (cs_bitness(my_cs + (2 << 3)) == bitness) 565 return my_cs + (2 << 3); 566 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) 567 return my_cs - (2 << 3); 568 if (cs_bitness(code16_sel) == bitness) 569 return code16_sel; 570 571 printf("[WARN]\tCould not find %d-bit CS\n", bitness); 572 return -1; 573} 574 575static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) 576{ 577 int cs = find_cs(cs_bits); 578 if (cs == -1) { 579 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", 580 cs_bits, use_16bit_ss ? 16 : 32); 581 return 0; 582 } 583 584 if (force_ss != -1) { 585 sig_ss = force_ss; 586 } else { 587 if (use_16bit_ss) { 588 if (!data16_sel) { 589 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", 590 cs_bits); 591 return 0; 592 } 593 sig_ss = data16_sel; 594 } else { 595 asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); 596 } 597 } 598 599 sig_cs = cs; 600 601 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", 602 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, 603 (sig_ss & 4) ? "" : ", GDT"); 604 605 raise(SIGUSR1); 606 607 nerrs = 0; 608 609 /* 610 * Check that each register had an acceptable value when the 611 * int3 trampoline was invoked. 612 */ 613 for (int i = 0; i < NGREG; i++) { 614 greg_t req = requested_regs[i], res = resulting_regs[i]; 615 616 if (i == REG_TRAPNO || i == REG_IP) 617 continue; /* don't care */ 618 619 if (i == REG_SP) { 620 /* 621 * If we were using a 16-bit stack segment, then 622 * the kernel is a bit stuck: IRET only restores 623 * the low 16 bits of ESP/RSP if SS is 16-bit. 624 * The kernel uses a hack to restore bits 31:16, 625 * but that hack doesn't help with bits 63:32. 626 * On Intel CPUs, bits 63:32 end up zeroed, and, on 627 * AMD CPUs, they leak the high bits of the kernel 628 * espfix64 stack pointer. There's very little that 629 * the kernel can do about it. 630 * 631 * Similarly, if we are returning to a 32-bit context, 632 * the CPU will often lose the high 32 bits of RSP. 633 */ 634 635 if (res == req) 636 continue; 637 638 if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { 639 printf("[NOTE]\tSP: %llx -> %llx\n", 640 (unsigned long long)req, 641 (unsigned long long)res); 642 continue; 643 } 644 645 printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", 646 (unsigned long long)requested_regs[i], 647 (unsigned long long)resulting_regs[i]); 648 nerrs++; 649 continue; 650 } 651 652 bool ignore_reg = false; 653#if __i386__ 654 if (i == REG_UESP) 655 ignore_reg = true; 656#else 657 if (i == REG_CSGSFS) { 658 struct selectors *req_sels = 659 (void *)&requested_regs[REG_CSGSFS]; 660 struct selectors *res_sels = 661 (void *)&resulting_regs[REG_CSGSFS]; 662 if (req_sels->cs != res_sels->cs) { 663 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", 664 req_sels->cs, res_sels->cs); 665 nerrs++; 666 } 667 668 if (req_sels->ss != res_sels->ss) { 669 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", 670 req_sels->ss, res_sels->ss); 671 nerrs++; 672 } 673 674 continue; 675 } 676#endif 677 678 /* Sanity check on the kernel */ 679 if (i == REG_CX && req != res) { 680 printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", 681 (unsigned long long)req, 682 (unsigned long long)res); 683 nerrs++; 684 continue; 685 } 686 687 if (req != res && !ignore_reg) { 688 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", 689 i, (unsigned long long)req, 690 (unsigned long long)res); 691 nerrs++; 692 } 693 } 694 695 if (nerrs == 0) 696 printf("[OK]\tall registers okay\n"); 697 698 return nerrs; 699} 700 701static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) 702{ 703 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; 704 if (cs == -1) 705 return 0; 706 707 sig_cs = cs; 708 sig_ss = ss; 709 710 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", 711 cs_bits, sig_cs, sig_ss); 712 713 sig_trapped = 0; 714 raise(SIGUSR1); 715 if (sig_trapped) { 716 char errdesc[32] = ""; 717 if (sig_err) { 718 const char *src = (sig_err & 1) ? " EXT" : ""; 719 const char *table; 720 if ((sig_err & 0x6) == 0x0) 721 table = "GDT"; 722 else if ((sig_err & 0x6) == 0x4) 723 table = "LDT"; 724 else if ((sig_err & 0x6) == 0x2) 725 table = "IDT"; 726 else 727 table = "???"; 728 729 sprintf(errdesc, "%s%s index %d, ", 730 table, src, sig_err >> 3); 731 } 732 733 char trapname[32]; 734 if (sig_trapno == 13) 735 strcpy(trapname, "GP"); 736 else if (sig_trapno == 11) 737 strcpy(trapname, "NP"); 738 else if (sig_trapno == 12) 739 strcpy(trapname, "SS"); 740 else if (sig_trapno == 32) 741 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ 742 else 743 sprintf(trapname, "%d", sig_trapno); 744 745 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", 746 trapname, (unsigned long)sig_err, 747 errdesc, strsignal(sig_trapped)); 748 return 0; 749 } else { 750 /* 751 * This also implicitly tests UC_STRICT_RESTORE_SS: 752 * We check that these signals set UC_STRICT_RESTORE_SS and, 753 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, 754 * then we won't get SIGSEGV. 755 */ 756 printf("[FAIL]\tDid not get SIGSEGV\n"); 757 return 1; 758 } 759} 760 761int main() 762{ 763 int total_nerrs = 0; 764 unsigned short my_cs, my_ss; 765 766 asm volatile ("mov %%cs,%0" : "=r" (my_cs)); 767 asm volatile ("mov %%ss,%0" : "=r" (my_ss)); 768 setup_ldt(); 769 770 stack_t stack = { 771 /* Our sigaltstack scratch space. */ 772 .ss_sp = malloc(sizeof(char) * SIGSTKSZ), 773 .ss_size = SIGSTKSZ, 774 }; 775 if (sigaltstack(&stack, NULL) != 0) 776 err(1, "sigaltstack"); 777 778 sethandler(SIGUSR1, sigusr1, 0); 779 sethandler(SIGTRAP, sigtrap, SA_ONSTACK); 780 781 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ 782 total_nerrs += test_valid_sigreturn(64, false, -1); 783 total_nerrs += test_valid_sigreturn(32, false, -1); 784 total_nerrs += test_valid_sigreturn(16, false, -1); 785 786 /* 787 * Test easy espfix cases: return to a 16-bit LDT SS in each possible 788 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. 789 * 790 * This catches the original missing-espfix-on-64-bit-kernels issue 791 * as well as CVE-2014-8134. 792 */ 793 total_nerrs += test_valid_sigreturn(64, true, -1); 794 total_nerrs += test_valid_sigreturn(32, true, -1); 795 total_nerrs += test_valid_sigreturn(16, true, -1); 796 797 if (gdt_data16_idx) { 798 /* 799 * For performance reasons, Linux skips espfix if SS points 800 * to the GDT. If we were able to allocate a 16-bit SS in 801 * the GDT, see if it leaks parts of the kernel stack pointer. 802 * 803 * This tests for CVE-2014-8133. 804 */ 805 total_nerrs += test_valid_sigreturn(64, true, 806 GDT3(gdt_data16_idx)); 807 total_nerrs += test_valid_sigreturn(32, true, 808 GDT3(gdt_data16_idx)); 809 total_nerrs += test_valid_sigreturn(16, true, 810 GDT3(gdt_data16_idx)); 811 } 812 813#ifdef __x86_64__ 814 /* Nasty ABI case: check SS corruption handling. */ 815 sig_corrupt_final_ss = 1; 816 total_nerrs += test_valid_sigreturn(32, false, -1); 817 total_nerrs += test_valid_sigreturn(32, true, -1); 818 sig_corrupt_final_ss = 0; 819#endif 820 821 /* 822 * We're done testing valid sigreturn cases. Now we test states 823 * for which sigreturn itself will succeed but the subsequent 824 * entry to user mode will fail. 825 * 826 * Depending on the failure mode and the kernel bitness, these 827 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. 828 */ 829 clearhandler(SIGTRAP); 830 sethandler(SIGSEGV, sigtrap, SA_ONSTACK); 831 sethandler(SIGBUS, sigtrap, SA_ONSTACK); 832 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ 833 834 /* Easy failures: invalid SS, resulting in #GP(0) */ 835 test_bad_iret(64, ldt_nonexistent_sel, -1); 836 test_bad_iret(32, ldt_nonexistent_sel, -1); 837 test_bad_iret(16, ldt_nonexistent_sel, -1); 838 839 /* These fail because SS isn't a data segment, resulting in #GP(SS) */ 840 test_bad_iret(64, my_cs, -1); 841 test_bad_iret(32, my_cs, -1); 842 test_bad_iret(16, my_cs, -1); 843 844 /* Try to return to a not-present code segment, triggering #NP(SS). */ 845 test_bad_iret(32, my_ss, npcode32_sel); 846 847 /* 848 * Try to return to a not-present but otherwise valid data segment. 849 * This will cause IRET to fail with #SS on the espfix stack. This 850 * exercises CVE-2014-9322. 851 * 852 * Note that, if espfix is enabled, 64-bit Linux will lose track 853 * of the actual cause of failure and report #GP(0) instead. 854 * This would be very difficult for Linux to avoid, because 855 * espfix64 causes IRET failures to be promoted to #DF, so the 856 * original exception frame is never pushed onto the stack. 857 */ 858 test_bad_iret(32, npdata32_sel, -1); 859 860 /* 861 * Try to return to a not-present but otherwise valid data 862 * segment without invoking espfix. Newer kernels don't allow 863 * this to happen in the first place. On older kernels, though, 864 * this can trigger CVE-2014-9322. 865 */ 866 if (gdt_npdata32_idx) 867 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); 868 869#ifdef __x86_64__ 870 total_nerrs += test_nonstrict_ss(); 871#endif 872 873 free(stack.ss_sp); 874 return total_nerrs ? 1 : 0; 875}