test_vsyscall.c (13978B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#define _GNU_SOURCE 4 5#include <stdio.h> 6#include <sys/time.h> 7#include <time.h> 8#include <stdlib.h> 9#include <sys/syscall.h> 10#include <unistd.h> 11#include <dlfcn.h> 12#include <string.h> 13#include <inttypes.h> 14#include <signal.h> 15#include <sys/ucontext.h> 16#include <errno.h> 17#include <err.h> 18#include <sched.h> 19#include <stdbool.h> 20#include <setjmp.h> 21#include <sys/uio.h> 22 23#include "helpers.h" 24 25#ifdef __x86_64__ 26# define VSYS(x) (x) 27#else 28# define VSYS(x) 0 29#endif 30 31#ifndef SYS_getcpu 32# ifdef __x86_64__ 33# define SYS_getcpu 309 34# else 35# define SYS_getcpu 318 36# endif 37#endif 38 39/* max length of lines in /proc/self/maps - anything longer is skipped here */ 40#define MAPS_LINE_LEN 128 41 42static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 43 int flags) 44{ 45 struct sigaction sa; 46 memset(&sa, 0, sizeof(sa)); 47 sa.sa_sigaction = handler; 48 sa.sa_flags = SA_SIGINFO | flags; 49 sigemptyset(&sa.sa_mask); 50 if (sigaction(sig, &sa, 0)) 51 err(1, "sigaction"); 52} 53 54/* vsyscalls and vDSO */ 55bool vsyscall_map_r = false, vsyscall_map_x = false; 56 57typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); 58const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); 59gtod_t vdso_gtod; 60 61typedef int (*vgettime_t)(clockid_t, struct timespec *); 62vgettime_t vdso_gettime; 63 64typedef long (*time_func_t)(time_t *t); 65const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); 66time_func_t vdso_time; 67 68typedef long (*getcpu_t)(unsigned *, unsigned *, void *); 69const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); 70getcpu_t vdso_getcpu; 71 72static void init_vdso(void) 73{ 74 void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); 75 if (!vdso) 76 vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); 77 if (!vdso) { 78 printf("[WARN]\tfailed to find vDSO\n"); 79 return; 80 } 81 82 vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); 83 if (!vdso_gtod) 84 printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); 85 86 vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); 87 if (!vdso_gettime) 88 printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); 89 90 vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); 91 if (!vdso_time) 92 printf("[WARN]\tfailed to find time in vDSO\n"); 93 94 vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); 95 if (!vdso_getcpu) { 96 /* getcpu() was never wired up in the 32-bit vDSO. */ 97 printf("[%s]\tfailed to find getcpu in vDSO\n", 98 sizeof(long) == 8 ? "WARN" : "NOTE"); 99 } 100} 101 102static int init_vsys(void) 103{ 104#ifdef __x86_64__ 105 int nerrs = 0; 106 FILE *maps; 107 char line[MAPS_LINE_LEN]; 108 bool found = false; 109 110 maps = fopen("/proc/self/maps", "r"); 111 if (!maps) { 112 printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); 113 vsyscall_map_r = true; 114 return 0; 115 } 116 117 while (fgets(line, MAPS_LINE_LEN, maps)) { 118 char r, x; 119 void *start, *end; 120 char name[MAPS_LINE_LEN]; 121 122 /* sscanf() is safe here as strlen(name) >= strlen(line) */ 123 if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", 124 &start, &end, &r, &x, name) != 5) 125 continue; 126 127 if (strcmp(name, "[vsyscall]")) 128 continue; 129 130 printf("\tvsyscall map: %s", line); 131 132 if (start != (void *)0xffffffffff600000 || 133 end != (void *)0xffffffffff601000) { 134 printf("[FAIL]\taddress range is nonsense\n"); 135 nerrs++; 136 } 137 138 printf("\tvsyscall permissions are %c-%c\n", r, x); 139 vsyscall_map_r = (r == 'r'); 140 vsyscall_map_x = (x == 'x'); 141 142 found = true; 143 break; 144 } 145 146 fclose(maps); 147 148 if (!found) { 149 printf("\tno vsyscall map in /proc/self/maps\n"); 150 vsyscall_map_r = false; 151 vsyscall_map_x = false; 152 } 153 154 return nerrs; 155#else 156 return 0; 157#endif 158} 159 160/* syscalls */ 161static inline long sys_gtod(struct timeval *tv, struct timezone *tz) 162{ 163 return syscall(SYS_gettimeofday, tv, tz); 164} 165 166static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) 167{ 168 return syscall(SYS_clock_gettime, id, ts); 169} 170 171static inline long sys_time(time_t *t) 172{ 173 return syscall(SYS_time, t); 174} 175 176static inline long sys_getcpu(unsigned * cpu, unsigned * node, 177 void* cache) 178{ 179 return syscall(SYS_getcpu, cpu, node, cache); 180} 181 182static jmp_buf jmpbuf; 183static volatile unsigned long segv_err; 184 185static void sigsegv(int sig, siginfo_t *info, void *ctx_void) 186{ 187 ucontext_t *ctx = (ucontext_t *)ctx_void; 188 189 segv_err = ctx->uc_mcontext.gregs[REG_ERR]; 190 siglongjmp(jmpbuf, 1); 191} 192 193static double tv_diff(const struct timeval *a, const struct timeval *b) 194{ 195 return (double)(a->tv_sec - b->tv_sec) + 196 (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; 197} 198 199static int check_gtod(const struct timeval *tv_sys1, 200 const struct timeval *tv_sys2, 201 const struct timezone *tz_sys, 202 const char *which, 203 const struct timeval *tv_other, 204 const struct timezone *tz_other) 205{ 206 int nerrs = 0; 207 double d1, d2; 208 209 if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { 210 printf("[FAIL] %s tz mismatch\n", which); 211 nerrs++; 212 } 213 214 d1 = tv_diff(tv_other, tv_sys1); 215 d2 = tv_diff(tv_sys2, tv_other); 216 printf("\t%s time offsets: %lf %lf\n", which, d1, d2); 217 218 if (d1 < 0 || d2 < 0) { 219 printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); 220 nerrs++; 221 } else { 222 printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); 223 } 224 225 return nerrs; 226} 227 228static int test_gtod(void) 229{ 230 struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; 231 struct timezone tz_sys, tz_vdso, tz_vsys; 232 long ret_vdso = -1; 233 long ret_vsys = -1; 234 int nerrs = 0; 235 236 printf("[RUN]\ttest gettimeofday()\n"); 237 238 if (sys_gtod(&tv_sys1, &tz_sys) != 0) 239 err(1, "syscall gettimeofday"); 240 if (vdso_gtod) 241 ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); 242 if (vsyscall_map_x) 243 ret_vsys = vgtod(&tv_vsys, &tz_vsys); 244 if (sys_gtod(&tv_sys2, &tz_sys) != 0) 245 err(1, "syscall gettimeofday"); 246 247 if (vdso_gtod) { 248 if (ret_vdso == 0) { 249 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); 250 } else { 251 printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); 252 nerrs++; 253 } 254 } 255 256 if (vsyscall_map_x) { 257 if (ret_vsys == 0) { 258 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); 259 } else { 260 printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); 261 nerrs++; 262 } 263 } 264 265 return nerrs; 266} 267 268static int test_time(void) { 269 int nerrs = 0; 270 271 printf("[RUN]\ttest time()\n"); 272 long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; 273 long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; 274 t_sys1 = sys_time(&t2_sys1); 275 if (vdso_time) 276 t_vdso = vdso_time(&t2_vdso); 277 if (vsyscall_map_x) 278 t_vsys = vtime(&t2_vsys); 279 t_sys2 = sys_time(&t2_sys2); 280 if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { 281 printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); 282 nerrs++; 283 return nerrs; 284 } 285 286 if (vdso_time) { 287 if (t_vdso < 0 || t_vdso != t2_vdso) { 288 printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); 289 nerrs++; 290 } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { 291 printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); 292 nerrs++; 293 } else { 294 printf("[OK]\tvDSO time() is okay\n"); 295 } 296 } 297 298 if (vsyscall_map_x) { 299 if (t_vsys < 0 || t_vsys != t2_vsys) { 300 printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); 301 nerrs++; 302 } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { 303 printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); 304 nerrs++; 305 } else { 306 printf("[OK]\tvsyscall time() is okay\n"); 307 } 308 } 309 310 return nerrs; 311} 312 313static int test_getcpu(int cpu) 314{ 315 int nerrs = 0; 316 long ret_sys, ret_vdso = -1, ret_vsys = -1; 317 318 printf("[RUN]\tgetcpu() on CPU %d\n", cpu); 319 320 cpu_set_t cpuset; 321 CPU_ZERO(&cpuset); 322 CPU_SET(cpu, &cpuset); 323 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 324 printf("[SKIP]\tfailed to force CPU %d\n", cpu); 325 return nerrs; 326 } 327 328 unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; 329 unsigned node = 0; 330 bool have_node = false; 331 ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); 332 if (vdso_getcpu) 333 ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); 334 if (vsyscall_map_x) 335 ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); 336 337 if (ret_sys == 0) { 338 if (cpu_sys != cpu) { 339 printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); 340 nerrs++; 341 } 342 343 have_node = true; 344 node = node_sys; 345 } 346 347 if (vdso_getcpu) { 348 if (ret_vdso) { 349 printf("[FAIL]\tvDSO getcpu() failed\n"); 350 nerrs++; 351 } else { 352 if (!have_node) { 353 have_node = true; 354 node = node_vdso; 355 } 356 357 if (cpu_vdso != cpu) { 358 printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); 359 nerrs++; 360 } else { 361 printf("[OK]\tvDSO reported correct CPU\n"); 362 } 363 364 if (node_vdso != node) { 365 printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); 366 nerrs++; 367 } else { 368 printf("[OK]\tvDSO reported correct node\n"); 369 } 370 } 371 } 372 373 if (vsyscall_map_x) { 374 if (ret_vsys) { 375 printf("[FAIL]\tvsyscall getcpu() failed\n"); 376 nerrs++; 377 } else { 378 if (!have_node) { 379 have_node = true; 380 node = node_vsys; 381 } 382 383 if (cpu_vsys != cpu) { 384 printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); 385 nerrs++; 386 } else { 387 printf("[OK]\tvsyscall reported correct CPU\n"); 388 } 389 390 if (node_vsys != node) { 391 printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); 392 nerrs++; 393 } else { 394 printf("[OK]\tvsyscall reported correct node\n"); 395 } 396 } 397 } 398 399 return nerrs; 400} 401 402static int test_vsys_r(void) 403{ 404#ifdef __x86_64__ 405 printf("[RUN]\tChecking read access to the vsyscall page\n"); 406 bool can_read; 407 if (sigsetjmp(jmpbuf, 1) == 0) { 408 *(volatile int *)0xffffffffff600000; 409 can_read = true; 410 } else { 411 can_read = false; 412 } 413 414 if (can_read && !vsyscall_map_r) { 415 printf("[FAIL]\tWe have read access, but we shouldn't\n"); 416 return 1; 417 } else if (!can_read && vsyscall_map_r) { 418 printf("[FAIL]\tWe don't have read access, but we should\n"); 419 return 1; 420 } else if (can_read) { 421 printf("[OK]\tWe have read access\n"); 422 } else { 423 printf("[OK]\tWe do not have read access: #PF(0x%lx)\n", 424 segv_err); 425 } 426#endif 427 428 return 0; 429} 430 431static int test_vsys_x(void) 432{ 433#ifdef __x86_64__ 434 if (vsyscall_map_x) { 435 /* We already tested this adequately. */ 436 return 0; 437 } 438 439 printf("[RUN]\tMake sure that vsyscalls really page fault\n"); 440 441 bool can_exec; 442 if (sigsetjmp(jmpbuf, 1) == 0) { 443 vgtod(NULL, NULL); 444 can_exec = true; 445 } else { 446 can_exec = false; 447 } 448 449 if (can_exec) { 450 printf("[FAIL]\tExecuting the vsyscall did not page fault\n"); 451 return 1; 452 } else if (segv_err & (1 << 4)) { /* INSTR */ 453 printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n", 454 segv_err); 455 } else { 456 printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n", 457 segv_err); 458 return 1; 459 } 460#endif 461 462 return 0; 463} 464 465/* 466 * Debuggers expect ptrace() to be able to peek at the vsyscall page. 467 * Use process_vm_readv() as a proxy for ptrace() to test this. We 468 * want it to work in the vsyscall=emulate case and to fail in the 469 * vsyscall=xonly case. 470 * 471 * It's worth noting that this ABI is a bit nutty. write(2) can't 472 * read from the vsyscall page on any kernel version or mode. The 473 * fact that ptrace() ever worked was a nice courtesy of old kernels, 474 * but the code to support it is fairly gross. 475 */ 476static int test_process_vm_readv(void) 477{ 478#ifdef __x86_64__ 479 char buf[4096]; 480 struct iovec local, remote; 481 int ret; 482 483 printf("[RUN]\tprocess_vm_readv() from vsyscall page\n"); 484 485 local.iov_base = buf; 486 local.iov_len = 4096; 487 remote.iov_base = (void *)0xffffffffff600000; 488 remote.iov_len = 4096; 489 ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); 490 if (ret != 4096) { 491 /* 492 * We expect process_vm_readv() to work if and only if the 493 * vsyscall page is readable. 494 */ 495 printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno); 496 return vsyscall_map_r ? 1 : 0; 497 } 498 499 if (vsyscall_map_r) { 500 if (!memcmp(buf, remote.iov_base, sizeof(buf))) { 501 printf("[OK]\tIt worked and read correct data\n"); 502 } else { 503 printf("[FAIL]\tIt worked but returned incorrect data\n"); 504 return 1; 505 } 506 } else { 507 printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n"); 508 return 1; 509 } 510#endif 511 512 return 0; 513} 514 515#ifdef __x86_64__ 516static volatile sig_atomic_t num_vsyscall_traps; 517 518static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 519{ 520 ucontext_t *ctx = (ucontext_t *)ctx_void; 521 unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; 522 523 if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) 524 num_vsyscall_traps++; 525} 526 527static int test_emulation(void) 528{ 529 time_t tmp; 530 bool is_native; 531 532 if (!vsyscall_map_x) 533 return 0; 534 535 printf("[RUN]\tchecking that vsyscalls are emulated\n"); 536 sethandler(SIGTRAP, sigtrap, 0); 537 set_eflags(get_eflags() | X86_EFLAGS_TF); 538 vtime(&tmp); 539 set_eflags(get_eflags() & ~X86_EFLAGS_TF); 540 541 /* 542 * If vsyscalls are emulated, we expect a single trap in the 543 * vsyscall page -- the call instruction will trap with RIP 544 * pointing to the entry point before emulation takes over. 545 * In native mode, we expect two traps, since whatever code 546 * the vsyscall page contains will be more than just a ret 547 * instruction. 548 */ 549 is_native = (num_vsyscall_traps > 1); 550 551 printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n", 552 (is_native ? "FAIL" : "OK"), 553 (is_native ? "native" : "emulated"), 554 (int)num_vsyscall_traps); 555 556 return is_native; 557} 558#endif 559 560int main(int argc, char **argv) 561{ 562 int nerrs = 0; 563 564 init_vdso(); 565 nerrs += init_vsys(); 566 567 nerrs += test_gtod(); 568 nerrs += test_time(); 569 nerrs += test_getcpu(0); 570 nerrs += test_getcpu(1); 571 572 sethandler(SIGSEGV, sigsegv, 0); 573 nerrs += test_vsys_r(); 574 nerrs += test_vsys_x(); 575 576 nerrs += test_process_vm_readv(); 577 578#ifdef __x86_64__ 579 nerrs += test_emulation(); 580#endif 581 582 return nerrs ? 1 : 0; 583}