khugepaged.c (22413B)
1#define _GNU_SOURCE 2#include <fcntl.h> 3#include <limits.h> 4#include <signal.h> 5#include <stdio.h> 6#include <stdlib.h> 7#include <stdbool.h> 8#include <string.h> 9#include <unistd.h> 10 11#include <sys/mman.h> 12#include <sys/wait.h> 13 14#ifndef MADV_PAGEOUT 15#define MADV_PAGEOUT 21 16#endif 17 18#define BASE_ADDR ((void *)(1UL << 30)) 19static unsigned long hpage_pmd_size; 20static unsigned long page_size; 21static int hpage_pmd_nr; 22 23#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" 24#define PID_SMAPS "/proc/self/smaps" 25 26enum thp_enabled { 27 THP_ALWAYS, 28 THP_MADVISE, 29 THP_NEVER, 30}; 31 32static const char *thp_enabled_strings[] = { 33 "always", 34 "madvise", 35 "never", 36 NULL 37}; 38 39enum thp_defrag { 40 THP_DEFRAG_ALWAYS, 41 THP_DEFRAG_DEFER, 42 THP_DEFRAG_DEFER_MADVISE, 43 THP_DEFRAG_MADVISE, 44 THP_DEFRAG_NEVER, 45}; 46 47static const char *thp_defrag_strings[] = { 48 "always", 49 "defer", 50 "defer+madvise", 51 "madvise", 52 "never", 53 NULL 54}; 55 56enum shmem_enabled { 57 SHMEM_ALWAYS, 58 SHMEM_WITHIN_SIZE, 59 SHMEM_ADVISE, 60 SHMEM_NEVER, 61 SHMEM_DENY, 62 SHMEM_FORCE, 63}; 64 65static const char *shmem_enabled_strings[] = { 66 "always", 67 "within_size", 68 "advise", 69 "never", 70 "deny", 71 "force", 72 NULL 73}; 74 75struct khugepaged_settings { 76 bool defrag; 77 unsigned int alloc_sleep_millisecs; 78 unsigned int scan_sleep_millisecs; 79 unsigned int max_ptes_none; 80 unsigned int max_ptes_swap; 81 unsigned int max_ptes_shared; 82 unsigned long pages_to_scan; 83}; 84 85struct settings { 86 enum thp_enabled thp_enabled; 87 enum thp_defrag thp_defrag; 88 enum shmem_enabled shmem_enabled; 89 bool use_zero_page; 90 struct khugepaged_settings khugepaged; 91}; 92 93static struct settings default_settings = { 94 .thp_enabled = THP_MADVISE, 95 .thp_defrag = THP_DEFRAG_ALWAYS, 96 .shmem_enabled = SHMEM_NEVER, 97 .use_zero_page = 0, 98 .khugepaged = { 99 .defrag = 1, 100 .alloc_sleep_millisecs = 10, 101 .scan_sleep_millisecs = 10, 102 }, 103}; 104 105static struct settings saved_settings; 106static bool skip_settings_restore; 107 108static int exit_status; 109 110static void success(const char *msg) 111{ 112 printf(" \e[32m%s\e[0m\n", msg); 113} 114 115static void fail(const char *msg) 116{ 117 printf(" \e[31m%s\e[0m\n", msg); 118 exit_status++; 119} 120 121static int read_file(const char *path, char *buf, size_t buflen) 122{ 123 int fd; 124 ssize_t numread; 125 126 fd = open(path, O_RDONLY); 127 if (fd == -1) 128 return 0; 129 130 numread = read(fd, buf, buflen - 1); 131 if (numread < 1) { 132 close(fd); 133 return 0; 134 } 135 136 buf[numread] = '\0'; 137 close(fd); 138 139 return (unsigned int) numread; 140} 141 142static int write_file(const char *path, const char *buf, size_t buflen) 143{ 144 int fd; 145 ssize_t numwritten; 146 147 fd = open(path, O_WRONLY); 148 if (fd == -1) 149 return 0; 150 151 numwritten = write(fd, buf, buflen - 1); 152 close(fd); 153 if (numwritten < 1) 154 return 0; 155 156 return (unsigned int) numwritten; 157} 158 159static int read_string(const char *name, const char *strings[]) 160{ 161 char path[PATH_MAX]; 162 char buf[256]; 163 char *c; 164 int ret; 165 166 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 167 if (ret >= PATH_MAX) { 168 printf("%s: Pathname is too long\n", __func__); 169 exit(EXIT_FAILURE); 170 } 171 172 if (!read_file(path, buf, sizeof(buf))) { 173 perror(path); 174 exit(EXIT_FAILURE); 175 } 176 177 c = strchr(buf, '['); 178 if (!c) { 179 printf("%s: Parse failure\n", __func__); 180 exit(EXIT_FAILURE); 181 } 182 183 c++; 184 memmove(buf, c, sizeof(buf) - (c - buf)); 185 186 c = strchr(buf, ']'); 187 if (!c) { 188 printf("%s: Parse failure\n", __func__); 189 exit(EXIT_FAILURE); 190 } 191 *c = '\0'; 192 193 ret = 0; 194 while (strings[ret]) { 195 if (!strcmp(strings[ret], buf)) 196 return ret; 197 ret++; 198 } 199 200 printf("Failed to parse %s\n", name); 201 exit(EXIT_FAILURE); 202} 203 204static void write_string(const char *name, const char *val) 205{ 206 char path[PATH_MAX]; 207 int ret; 208 209 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 210 if (ret >= PATH_MAX) { 211 printf("%s: Pathname is too long\n", __func__); 212 exit(EXIT_FAILURE); 213 } 214 215 if (!write_file(path, val, strlen(val) + 1)) { 216 perror(path); 217 exit(EXIT_FAILURE); 218 } 219} 220 221static const unsigned long read_num(const char *name) 222{ 223 char path[PATH_MAX]; 224 char buf[21]; 225 int ret; 226 227 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 228 if (ret >= PATH_MAX) { 229 printf("%s: Pathname is too long\n", __func__); 230 exit(EXIT_FAILURE); 231 } 232 233 ret = read_file(path, buf, sizeof(buf)); 234 if (ret < 0) { 235 perror("read_file(read_num)"); 236 exit(EXIT_FAILURE); 237 } 238 239 return strtoul(buf, NULL, 10); 240} 241 242static void write_num(const char *name, unsigned long num) 243{ 244 char path[PATH_MAX]; 245 char buf[21]; 246 int ret; 247 248 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); 249 if (ret >= PATH_MAX) { 250 printf("%s: Pathname is too long\n", __func__); 251 exit(EXIT_FAILURE); 252 } 253 254 sprintf(buf, "%ld", num); 255 if (!write_file(path, buf, strlen(buf) + 1)) { 256 perror(path); 257 exit(EXIT_FAILURE); 258 } 259} 260 261static void write_settings(struct settings *settings) 262{ 263 struct khugepaged_settings *khugepaged = &settings->khugepaged; 264 265 write_string("enabled", thp_enabled_strings[settings->thp_enabled]); 266 write_string("defrag", thp_defrag_strings[settings->thp_defrag]); 267 write_string("shmem_enabled", 268 shmem_enabled_strings[settings->shmem_enabled]); 269 write_num("use_zero_page", settings->use_zero_page); 270 271 write_num("khugepaged/defrag", khugepaged->defrag); 272 write_num("khugepaged/alloc_sleep_millisecs", 273 khugepaged->alloc_sleep_millisecs); 274 write_num("khugepaged/scan_sleep_millisecs", 275 khugepaged->scan_sleep_millisecs); 276 write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); 277 write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); 278 write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); 279 write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); 280} 281 282static void restore_settings(int sig) 283{ 284 if (skip_settings_restore) 285 goto out; 286 287 printf("Restore THP and khugepaged settings..."); 288 write_settings(&saved_settings); 289 success("OK"); 290 if (sig) 291 exit(EXIT_FAILURE); 292out: 293 exit(exit_status); 294} 295 296static void save_settings(void) 297{ 298 printf("Save THP and khugepaged settings..."); 299 saved_settings = (struct settings) { 300 .thp_enabled = read_string("enabled", thp_enabled_strings), 301 .thp_defrag = read_string("defrag", thp_defrag_strings), 302 .shmem_enabled = 303 read_string("shmem_enabled", shmem_enabled_strings), 304 .use_zero_page = read_num("use_zero_page"), 305 }; 306 saved_settings.khugepaged = (struct khugepaged_settings) { 307 .defrag = read_num("khugepaged/defrag"), 308 .alloc_sleep_millisecs = 309 read_num("khugepaged/alloc_sleep_millisecs"), 310 .scan_sleep_millisecs = 311 read_num("khugepaged/scan_sleep_millisecs"), 312 .max_ptes_none = read_num("khugepaged/max_ptes_none"), 313 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), 314 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), 315 .pages_to_scan = read_num("khugepaged/pages_to_scan"), 316 }; 317 success("OK"); 318 319 signal(SIGTERM, restore_settings); 320 signal(SIGINT, restore_settings); 321 signal(SIGHUP, restore_settings); 322 signal(SIGQUIT, restore_settings); 323} 324 325static void adjust_settings(void) 326{ 327 328 printf("Adjust settings..."); 329 write_settings(&default_settings); 330 success("OK"); 331} 332 333#define MAX_LINE_LENGTH 500 334 335static bool check_for_pattern(FILE *fp, char *pattern, char *buf) 336{ 337 while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { 338 if (!strncmp(buf, pattern, strlen(pattern))) 339 return true; 340 } 341 return false; 342} 343 344static bool check_huge(void *addr) 345{ 346 bool thp = false; 347 int ret; 348 FILE *fp; 349 char buffer[MAX_LINE_LENGTH]; 350 char addr_pattern[MAX_LINE_LENGTH]; 351 352 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", 353 (unsigned long) addr); 354 if (ret >= MAX_LINE_LENGTH) { 355 printf("%s: Pattern is too long\n", __func__); 356 exit(EXIT_FAILURE); 357 } 358 359 360 fp = fopen(PID_SMAPS, "r"); 361 if (!fp) { 362 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); 363 exit(EXIT_FAILURE); 364 } 365 if (!check_for_pattern(fp, addr_pattern, buffer)) 366 goto err_out; 367 368 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB", 369 hpage_pmd_size >> 10); 370 if (ret >= MAX_LINE_LENGTH) { 371 printf("%s: Pattern is too long\n", __func__); 372 exit(EXIT_FAILURE); 373 } 374 /* 375 * Fetch the AnonHugePages: in the same block and check whether it got 376 * the expected number of hugeepages next. 377 */ 378 if (!check_for_pattern(fp, "AnonHugePages:", buffer)) 379 goto err_out; 380 381 if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) 382 goto err_out; 383 384 thp = true; 385err_out: 386 fclose(fp); 387 return thp; 388} 389 390 391static bool check_swap(void *addr, unsigned long size) 392{ 393 bool swap = false; 394 int ret; 395 FILE *fp; 396 char buffer[MAX_LINE_LENGTH]; 397 char addr_pattern[MAX_LINE_LENGTH]; 398 399 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", 400 (unsigned long) addr); 401 if (ret >= MAX_LINE_LENGTH) { 402 printf("%s: Pattern is too long\n", __func__); 403 exit(EXIT_FAILURE); 404 } 405 406 407 fp = fopen(PID_SMAPS, "r"); 408 if (!fp) { 409 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); 410 exit(EXIT_FAILURE); 411 } 412 if (!check_for_pattern(fp, addr_pattern, buffer)) 413 goto err_out; 414 415 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB", 416 size >> 10); 417 if (ret >= MAX_LINE_LENGTH) { 418 printf("%s: Pattern is too long\n", __func__); 419 exit(EXIT_FAILURE); 420 } 421 /* 422 * Fetch the Swap: in the same block and check whether it got 423 * the expected number of hugeepages next. 424 */ 425 if (!check_for_pattern(fp, "Swap:", buffer)) 426 goto err_out; 427 428 if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) 429 goto err_out; 430 431 swap = true; 432err_out: 433 fclose(fp); 434 return swap; 435} 436 437static void *alloc_mapping(void) 438{ 439 void *p; 440 441 p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE, 442 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 443 if (p != BASE_ADDR) { 444 printf("Failed to allocate VMA at %p\n", BASE_ADDR); 445 exit(EXIT_FAILURE); 446 } 447 448 return p; 449} 450 451static void fill_memory(int *p, unsigned long start, unsigned long end) 452{ 453 int i; 454 455 for (i = start / page_size; i < end / page_size; i++) 456 p[i * page_size / sizeof(*p)] = i + 0xdead0000; 457} 458 459static void validate_memory(int *p, unsigned long start, unsigned long end) 460{ 461 int i; 462 463 for (i = start / page_size; i < end / page_size; i++) { 464 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) { 465 printf("Page %d is corrupted: %#x\n", 466 i, p[i * page_size / sizeof(*p)]); 467 exit(EXIT_FAILURE); 468 } 469 } 470} 471 472#define TICK 500000 473static bool wait_for_scan(const char *msg, char *p) 474{ 475 int full_scans; 476 int timeout = 6; /* 3 seconds */ 477 478 /* Sanity check */ 479 if (check_huge(p)) { 480 printf("Unexpected huge page\n"); 481 exit(EXIT_FAILURE); 482 } 483 484 madvise(p, hpage_pmd_size, MADV_HUGEPAGE); 485 486 /* Wait until the second full_scan completed */ 487 full_scans = read_num("khugepaged/full_scans") + 2; 488 489 printf("%s...", msg); 490 while (timeout--) { 491 if (check_huge(p)) 492 break; 493 if (read_num("khugepaged/full_scans") >= full_scans) 494 break; 495 printf("."); 496 usleep(TICK); 497 } 498 499 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); 500 501 return timeout == -1; 502} 503 504static void alloc_at_fault(void) 505{ 506 struct settings settings = default_settings; 507 char *p; 508 509 settings.thp_enabled = THP_ALWAYS; 510 write_settings(&settings); 511 512 p = alloc_mapping(); 513 *p = 1; 514 printf("Allocate huge page on fault..."); 515 if (check_huge(p)) 516 success("OK"); 517 else 518 fail("Fail"); 519 520 write_settings(&default_settings); 521 522 madvise(p, page_size, MADV_DONTNEED); 523 printf("Split huge PMD on MADV_DONTNEED..."); 524 if (!check_huge(p)) 525 success("OK"); 526 else 527 fail("Fail"); 528 munmap(p, hpage_pmd_size); 529} 530 531static void collapse_full(void) 532{ 533 void *p; 534 535 p = alloc_mapping(); 536 fill_memory(p, 0, hpage_pmd_size); 537 if (wait_for_scan("Collapse fully populated PTE table", p)) 538 fail("Timeout"); 539 else if (check_huge(p)) 540 success("OK"); 541 else 542 fail("Fail"); 543 validate_memory(p, 0, hpage_pmd_size); 544 munmap(p, hpage_pmd_size); 545} 546 547static void collapse_empty(void) 548{ 549 void *p; 550 551 p = alloc_mapping(); 552 if (wait_for_scan("Do not collapse empty PTE table", p)) 553 fail("Timeout"); 554 else if (check_huge(p)) 555 fail("Fail"); 556 else 557 success("OK"); 558 munmap(p, hpage_pmd_size); 559} 560 561static void collapse_single_pte_entry(void) 562{ 563 void *p; 564 565 p = alloc_mapping(); 566 fill_memory(p, 0, page_size); 567 if (wait_for_scan("Collapse PTE table with single PTE entry present", p)) 568 fail("Timeout"); 569 else if (check_huge(p)) 570 success("OK"); 571 else 572 fail("Fail"); 573 validate_memory(p, 0, page_size); 574 munmap(p, hpage_pmd_size); 575} 576 577static void collapse_max_ptes_none(void) 578{ 579 int max_ptes_none = hpage_pmd_nr / 2; 580 struct settings settings = default_settings; 581 void *p; 582 583 settings.khugepaged.max_ptes_none = max_ptes_none; 584 write_settings(&settings); 585 586 p = alloc_mapping(); 587 588 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); 589 if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p)) 590 fail("Timeout"); 591 else if (check_huge(p)) 592 fail("Fail"); 593 else 594 success("OK"); 595 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); 596 597 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); 598 if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p)) 599 fail("Timeout"); 600 else if (check_huge(p)) 601 success("OK"); 602 else 603 fail("Fail"); 604 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); 605 606 munmap(p, hpage_pmd_size); 607 write_settings(&default_settings); 608} 609 610static void collapse_swapin_single_pte(void) 611{ 612 void *p; 613 p = alloc_mapping(); 614 fill_memory(p, 0, hpage_pmd_size); 615 616 printf("Swapout one page..."); 617 if (madvise(p, page_size, MADV_PAGEOUT)) { 618 perror("madvise(MADV_PAGEOUT)"); 619 exit(EXIT_FAILURE); 620 } 621 if (check_swap(p, page_size)) { 622 success("OK"); 623 } else { 624 fail("Fail"); 625 goto out; 626 } 627 628 if (wait_for_scan("Collapse with swapping in single PTE entry", p)) 629 fail("Timeout"); 630 else if (check_huge(p)) 631 success("OK"); 632 else 633 fail("Fail"); 634 validate_memory(p, 0, hpage_pmd_size); 635out: 636 munmap(p, hpage_pmd_size); 637} 638 639static void collapse_max_ptes_swap(void) 640{ 641 int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); 642 void *p; 643 644 p = alloc_mapping(); 645 646 fill_memory(p, 0, hpage_pmd_size); 647 printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); 648 if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) { 649 perror("madvise(MADV_PAGEOUT)"); 650 exit(EXIT_FAILURE); 651 } 652 if (check_swap(p, (max_ptes_swap + 1) * page_size)) { 653 success("OK"); 654 } else { 655 fail("Fail"); 656 goto out; 657 } 658 659 if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p)) 660 fail("Timeout"); 661 else if (check_huge(p)) 662 fail("Fail"); 663 else 664 success("OK"); 665 validate_memory(p, 0, hpage_pmd_size); 666 667 fill_memory(p, 0, hpage_pmd_size); 668 printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr); 669 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { 670 perror("madvise(MADV_PAGEOUT)"); 671 exit(EXIT_FAILURE); 672 } 673 if (check_swap(p, max_ptes_swap * page_size)) { 674 success("OK"); 675 } else { 676 fail("Fail"); 677 goto out; 678 } 679 680 if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p)) 681 fail("Timeout"); 682 else if (check_huge(p)) 683 success("OK"); 684 else 685 fail("Fail"); 686 validate_memory(p, 0, hpage_pmd_size); 687out: 688 munmap(p, hpage_pmd_size); 689} 690 691static void collapse_single_pte_entry_compound(void) 692{ 693 void *p; 694 695 p = alloc_mapping(); 696 697 printf("Allocate huge page..."); 698 madvise(p, hpage_pmd_size, MADV_HUGEPAGE); 699 fill_memory(p, 0, hpage_pmd_size); 700 if (check_huge(p)) 701 success("OK"); 702 else 703 fail("Fail"); 704 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); 705 706 printf("Split huge page leaving single PTE mapping compound page..."); 707 madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); 708 if (!check_huge(p)) 709 success("OK"); 710 else 711 fail("Fail"); 712 713 if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p)) 714 fail("Timeout"); 715 else if (check_huge(p)) 716 success("OK"); 717 else 718 fail("Fail"); 719 validate_memory(p, 0, page_size); 720 munmap(p, hpage_pmd_size); 721} 722 723static void collapse_full_of_compound(void) 724{ 725 void *p; 726 727 p = alloc_mapping(); 728 729 printf("Allocate huge page..."); 730 madvise(p, hpage_pmd_size, MADV_HUGEPAGE); 731 fill_memory(p, 0, hpage_pmd_size); 732 if (check_huge(p)) 733 success("OK"); 734 else 735 fail("Fail"); 736 737 printf("Split huge page leaving single PTE page table full of compound pages..."); 738 madvise(p, page_size, MADV_NOHUGEPAGE); 739 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); 740 if (!check_huge(p)) 741 success("OK"); 742 else 743 fail("Fail"); 744 745 if (wait_for_scan("Collapse PTE table full of compound pages", p)) 746 fail("Timeout"); 747 else if (check_huge(p)) 748 success("OK"); 749 else 750 fail("Fail"); 751 validate_memory(p, 0, hpage_pmd_size); 752 munmap(p, hpage_pmd_size); 753} 754 755static void collapse_compound_extreme(void) 756{ 757 void *p; 758 int i; 759 760 p = alloc_mapping(); 761 for (i = 0; i < hpage_pmd_nr; i++) { 762 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", 763 i + 1, hpage_pmd_nr); 764 765 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); 766 fill_memory(BASE_ADDR, 0, hpage_pmd_size); 767 if (!check_huge(BASE_ADDR)) { 768 printf("Failed to allocate huge page\n"); 769 exit(EXIT_FAILURE); 770 } 771 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE); 772 773 p = mremap(BASE_ADDR - i * page_size, 774 i * page_size + hpage_pmd_size, 775 (i + 1) * page_size, 776 MREMAP_MAYMOVE | MREMAP_FIXED, 777 BASE_ADDR + 2 * hpage_pmd_size); 778 if (p == MAP_FAILED) { 779 perror("mremap+unmap"); 780 exit(EXIT_FAILURE); 781 } 782 783 p = mremap(BASE_ADDR + 2 * hpage_pmd_size, 784 (i + 1) * page_size, 785 (i + 1) * page_size + hpage_pmd_size, 786 MREMAP_MAYMOVE | MREMAP_FIXED, 787 BASE_ADDR - (i + 1) * page_size); 788 if (p == MAP_FAILED) { 789 perror("mremap+alloc"); 790 exit(EXIT_FAILURE); 791 } 792 } 793 794 munmap(BASE_ADDR, hpage_pmd_size); 795 fill_memory(p, 0, hpage_pmd_size); 796 if (!check_huge(p)) 797 success("OK"); 798 else 799 fail("Fail"); 800 801 if (wait_for_scan("Collapse PTE table full of different compound pages", p)) 802 fail("Timeout"); 803 else if (check_huge(p)) 804 success("OK"); 805 else 806 fail("Fail"); 807 808 validate_memory(p, 0, hpage_pmd_size); 809 munmap(p, hpage_pmd_size); 810} 811 812static void collapse_fork(void) 813{ 814 int wstatus; 815 void *p; 816 817 p = alloc_mapping(); 818 819 printf("Allocate small page..."); 820 fill_memory(p, 0, page_size); 821 if (!check_huge(p)) 822 success("OK"); 823 else 824 fail("Fail"); 825 826 printf("Share small page over fork()..."); 827 if (!fork()) { 828 /* Do not touch settings on child exit */ 829 skip_settings_restore = true; 830 exit_status = 0; 831 832 if (!check_huge(p)) 833 success("OK"); 834 else 835 fail("Fail"); 836 837 fill_memory(p, page_size, 2 * page_size); 838 839 if (wait_for_scan("Collapse PTE table with single page shared with parent process", p)) 840 fail("Timeout"); 841 else if (check_huge(p)) 842 success("OK"); 843 else 844 fail("Fail"); 845 846 validate_memory(p, 0, page_size); 847 munmap(p, hpage_pmd_size); 848 exit(exit_status); 849 } 850 851 wait(&wstatus); 852 exit_status += WEXITSTATUS(wstatus); 853 854 printf("Check if parent still has small page..."); 855 if (!check_huge(p)) 856 success("OK"); 857 else 858 fail("Fail"); 859 validate_memory(p, 0, page_size); 860 munmap(p, hpage_pmd_size); 861} 862 863static void collapse_fork_compound(void) 864{ 865 int wstatus; 866 void *p; 867 868 p = alloc_mapping(); 869 870 printf("Allocate huge page..."); 871 madvise(p, hpage_pmd_size, MADV_HUGEPAGE); 872 fill_memory(p, 0, hpage_pmd_size); 873 if (check_huge(p)) 874 success("OK"); 875 else 876 fail("Fail"); 877 878 printf("Share huge page over fork()..."); 879 if (!fork()) { 880 /* Do not touch settings on child exit */ 881 skip_settings_restore = true; 882 exit_status = 0; 883 884 if (check_huge(p)) 885 success("OK"); 886 else 887 fail("Fail"); 888 889 printf("Split huge page PMD in child process..."); 890 madvise(p, page_size, MADV_NOHUGEPAGE); 891 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); 892 if (!check_huge(p)) 893 success("OK"); 894 else 895 fail("Fail"); 896 fill_memory(p, 0, page_size); 897 898 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); 899 if (wait_for_scan("Collapse PTE table full of compound pages in child", p)) 900 fail("Timeout"); 901 else if (check_huge(p)) 902 success("OK"); 903 else 904 fail("Fail"); 905 write_num("khugepaged/max_ptes_shared", 906 default_settings.khugepaged.max_ptes_shared); 907 908 validate_memory(p, 0, hpage_pmd_size); 909 munmap(p, hpage_pmd_size); 910 exit(exit_status); 911 } 912 913 wait(&wstatus); 914 exit_status += WEXITSTATUS(wstatus); 915 916 printf("Check if parent still has huge page..."); 917 if (check_huge(p)) 918 success("OK"); 919 else 920 fail("Fail"); 921 validate_memory(p, 0, hpage_pmd_size); 922 munmap(p, hpage_pmd_size); 923} 924 925static void collapse_max_ptes_shared() 926{ 927 int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); 928 int wstatus; 929 void *p; 930 931 p = alloc_mapping(); 932 933 printf("Allocate huge page..."); 934 madvise(p, hpage_pmd_size, MADV_HUGEPAGE); 935 fill_memory(p, 0, hpage_pmd_size); 936 if (check_huge(p)) 937 success("OK"); 938 else 939 fail("Fail"); 940 941 printf("Share huge page over fork()..."); 942 if (!fork()) { 943 /* Do not touch settings on child exit */ 944 skip_settings_restore = true; 945 exit_status = 0; 946 947 if (check_huge(p)) 948 success("OK"); 949 else 950 fail("Fail"); 951 952 printf("Trigger CoW on page %d of %d...", 953 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); 954 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); 955 if (!check_huge(p)) 956 success("OK"); 957 else 958 fail("Fail"); 959 960 if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p)) 961 fail("Timeout"); 962 else if (!check_huge(p)) 963 success("OK"); 964 else 965 fail("Fail"); 966 967 printf("Trigger CoW on page %d of %d...", 968 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); 969 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); 970 if (!check_huge(p)) 971 success("OK"); 972 else 973 fail("Fail"); 974 975 976 if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p)) 977 fail("Timeout"); 978 else if (check_huge(p)) 979 success("OK"); 980 else 981 fail("Fail"); 982 983 validate_memory(p, 0, hpage_pmd_size); 984 munmap(p, hpage_pmd_size); 985 exit(exit_status); 986 } 987 988 wait(&wstatus); 989 exit_status += WEXITSTATUS(wstatus); 990 991 printf("Check if parent still has huge page..."); 992 if (check_huge(p)) 993 success("OK"); 994 else 995 fail("Fail"); 996 validate_memory(p, 0, hpage_pmd_size); 997 munmap(p, hpage_pmd_size); 998} 999 1000int main(void) 1001{ 1002 setbuf(stdout, NULL); 1003 1004 page_size = getpagesize(); 1005 hpage_pmd_size = read_num("hpage_pmd_size"); 1006 hpage_pmd_nr = hpage_pmd_size / page_size; 1007 1008 default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; 1009 default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; 1010 default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; 1011 default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; 1012 1013 save_settings(); 1014 adjust_settings(); 1015 1016 alloc_at_fault(); 1017 collapse_full(); 1018 collapse_empty(); 1019 collapse_single_pte_entry(); 1020 collapse_max_ptes_none(); 1021 collapse_swapin_single_pte(); 1022 collapse_max_ptes_swap(); 1023 collapse_single_pte_entry_compound(); 1024 collapse_full_of_compound(); 1025 collapse_compound_extreme(); 1026 collapse_fork(); 1027 collapse_fork_compound(); 1028 collapse_max_ptes_shared(); 1029 1030 restore_settings(0); 1031}