cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

khugepaged.c (22413B)


      1#define _GNU_SOURCE
      2#include <fcntl.h>
      3#include <limits.h>
      4#include <signal.h>
      5#include <stdio.h>
      6#include <stdlib.h>
      7#include <stdbool.h>
      8#include <string.h>
      9#include <unistd.h>
     10
     11#include <sys/mman.h>
     12#include <sys/wait.h>
     13
     14#ifndef MADV_PAGEOUT
     15#define MADV_PAGEOUT 21
     16#endif
     17
     18#define BASE_ADDR ((void *)(1UL << 30))
     19static unsigned long hpage_pmd_size;
     20static unsigned long page_size;
     21static int hpage_pmd_nr;
     22
     23#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
     24#define PID_SMAPS "/proc/self/smaps"
     25
     26enum thp_enabled {
     27	THP_ALWAYS,
     28	THP_MADVISE,
     29	THP_NEVER,
     30};
     31
     32static const char *thp_enabled_strings[] = {
     33	"always",
     34	"madvise",
     35	"never",
     36	NULL
     37};
     38
     39enum thp_defrag {
     40	THP_DEFRAG_ALWAYS,
     41	THP_DEFRAG_DEFER,
     42	THP_DEFRAG_DEFER_MADVISE,
     43	THP_DEFRAG_MADVISE,
     44	THP_DEFRAG_NEVER,
     45};
     46
     47static const char *thp_defrag_strings[] = {
     48	"always",
     49	"defer",
     50	"defer+madvise",
     51	"madvise",
     52	"never",
     53	NULL
     54};
     55
     56enum shmem_enabled {
     57	SHMEM_ALWAYS,
     58	SHMEM_WITHIN_SIZE,
     59	SHMEM_ADVISE,
     60	SHMEM_NEVER,
     61	SHMEM_DENY,
     62	SHMEM_FORCE,
     63};
     64
     65static const char *shmem_enabled_strings[] = {
     66	"always",
     67	"within_size",
     68	"advise",
     69	"never",
     70	"deny",
     71	"force",
     72	NULL
     73};
     74
     75struct khugepaged_settings {
     76	bool defrag;
     77	unsigned int alloc_sleep_millisecs;
     78	unsigned int scan_sleep_millisecs;
     79	unsigned int max_ptes_none;
     80	unsigned int max_ptes_swap;
     81	unsigned int max_ptes_shared;
     82	unsigned long pages_to_scan;
     83};
     84
     85struct settings {
     86	enum thp_enabled thp_enabled;
     87	enum thp_defrag thp_defrag;
     88	enum shmem_enabled shmem_enabled;
     89	bool use_zero_page;
     90	struct khugepaged_settings khugepaged;
     91};
     92
     93static struct settings default_settings = {
     94	.thp_enabled = THP_MADVISE,
     95	.thp_defrag = THP_DEFRAG_ALWAYS,
     96	.shmem_enabled = SHMEM_NEVER,
     97	.use_zero_page = 0,
     98	.khugepaged = {
     99		.defrag = 1,
    100		.alloc_sleep_millisecs = 10,
    101		.scan_sleep_millisecs = 10,
    102	},
    103};
    104
    105static struct settings saved_settings;
    106static bool skip_settings_restore;
    107
    108static int exit_status;
    109
    110static void success(const char *msg)
    111{
    112	printf(" \e[32m%s\e[0m\n", msg);
    113}
    114
    115static void fail(const char *msg)
    116{
    117	printf(" \e[31m%s\e[0m\n", msg);
    118	exit_status++;
    119}
    120
    121static int read_file(const char *path, char *buf, size_t buflen)
    122{
    123	int fd;
    124	ssize_t numread;
    125
    126	fd = open(path, O_RDONLY);
    127	if (fd == -1)
    128		return 0;
    129
    130	numread = read(fd, buf, buflen - 1);
    131	if (numread < 1) {
    132		close(fd);
    133		return 0;
    134	}
    135
    136	buf[numread] = '\0';
    137	close(fd);
    138
    139	return (unsigned int) numread;
    140}
    141
    142static int write_file(const char *path, const char *buf, size_t buflen)
    143{
    144	int fd;
    145	ssize_t numwritten;
    146
    147	fd = open(path, O_WRONLY);
    148	if (fd == -1)
    149		return 0;
    150
    151	numwritten = write(fd, buf, buflen - 1);
    152	close(fd);
    153	if (numwritten < 1)
    154		return 0;
    155
    156	return (unsigned int) numwritten;
    157}
    158
    159static int read_string(const char *name, const char *strings[])
    160{
    161	char path[PATH_MAX];
    162	char buf[256];
    163	char *c;
    164	int ret;
    165
    166	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
    167	if (ret >= PATH_MAX) {
    168		printf("%s: Pathname is too long\n", __func__);
    169		exit(EXIT_FAILURE);
    170	}
    171
    172	if (!read_file(path, buf, sizeof(buf))) {
    173		perror(path);
    174		exit(EXIT_FAILURE);
    175	}
    176
    177	c = strchr(buf, '[');
    178	if (!c) {
    179		printf("%s: Parse failure\n", __func__);
    180		exit(EXIT_FAILURE);
    181	}
    182
    183	c++;
    184	memmove(buf, c, sizeof(buf) - (c - buf));
    185
    186	c = strchr(buf, ']');
    187	if (!c) {
    188		printf("%s: Parse failure\n", __func__);
    189		exit(EXIT_FAILURE);
    190	}
    191	*c = '\0';
    192
    193	ret = 0;
    194	while (strings[ret]) {
    195		if (!strcmp(strings[ret], buf))
    196			return ret;
    197		ret++;
    198	}
    199
    200	printf("Failed to parse %s\n", name);
    201	exit(EXIT_FAILURE);
    202}
    203
    204static void write_string(const char *name, const char *val)
    205{
    206	char path[PATH_MAX];
    207	int ret;
    208
    209	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
    210	if (ret >= PATH_MAX) {
    211		printf("%s: Pathname is too long\n", __func__);
    212		exit(EXIT_FAILURE);
    213	}
    214
    215	if (!write_file(path, val, strlen(val) + 1)) {
    216		perror(path);
    217		exit(EXIT_FAILURE);
    218	}
    219}
    220
    221static const unsigned long read_num(const char *name)
    222{
    223	char path[PATH_MAX];
    224	char buf[21];
    225	int ret;
    226
    227	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
    228	if (ret >= PATH_MAX) {
    229		printf("%s: Pathname is too long\n", __func__);
    230		exit(EXIT_FAILURE);
    231	}
    232
    233	ret = read_file(path, buf, sizeof(buf));
    234	if (ret < 0) {
    235		perror("read_file(read_num)");
    236		exit(EXIT_FAILURE);
    237	}
    238
    239	return strtoul(buf, NULL, 10);
    240}
    241
    242static void write_num(const char *name, unsigned long num)
    243{
    244	char path[PATH_MAX];
    245	char buf[21];
    246	int ret;
    247
    248	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
    249	if (ret >= PATH_MAX) {
    250		printf("%s: Pathname is too long\n", __func__);
    251		exit(EXIT_FAILURE);
    252	}
    253
    254	sprintf(buf, "%ld", num);
    255	if (!write_file(path, buf, strlen(buf) + 1)) {
    256		perror(path);
    257		exit(EXIT_FAILURE);
    258	}
    259}
    260
    261static void write_settings(struct settings *settings)
    262{
    263	struct khugepaged_settings *khugepaged = &settings->khugepaged;
    264
    265	write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
    266	write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
    267	write_string("shmem_enabled",
    268			shmem_enabled_strings[settings->shmem_enabled]);
    269	write_num("use_zero_page", settings->use_zero_page);
    270
    271	write_num("khugepaged/defrag", khugepaged->defrag);
    272	write_num("khugepaged/alloc_sleep_millisecs",
    273			khugepaged->alloc_sleep_millisecs);
    274	write_num("khugepaged/scan_sleep_millisecs",
    275			khugepaged->scan_sleep_millisecs);
    276	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
    277	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
    278	write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
    279	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
    280}
    281
    282static void restore_settings(int sig)
    283{
    284	if (skip_settings_restore)
    285		goto out;
    286
    287	printf("Restore THP and khugepaged settings...");
    288	write_settings(&saved_settings);
    289	success("OK");
    290	if (sig)
    291		exit(EXIT_FAILURE);
    292out:
    293	exit(exit_status);
    294}
    295
    296static void save_settings(void)
    297{
    298	printf("Save THP and khugepaged settings...");
    299	saved_settings = (struct settings) {
    300		.thp_enabled = read_string("enabled", thp_enabled_strings),
    301		.thp_defrag = read_string("defrag", thp_defrag_strings),
    302		.shmem_enabled =
    303			read_string("shmem_enabled", shmem_enabled_strings),
    304		.use_zero_page = read_num("use_zero_page"),
    305	};
    306	saved_settings.khugepaged = (struct khugepaged_settings) {
    307		.defrag = read_num("khugepaged/defrag"),
    308		.alloc_sleep_millisecs =
    309			read_num("khugepaged/alloc_sleep_millisecs"),
    310		.scan_sleep_millisecs =
    311			read_num("khugepaged/scan_sleep_millisecs"),
    312		.max_ptes_none = read_num("khugepaged/max_ptes_none"),
    313		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
    314		.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
    315		.pages_to_scan = read_num("khugepaged/pages_to_scan"),
    316	};
    317	success("OK");
    318
    319	signal(SIGTERM, restore_settings);
    320	signal(SIGINT, restore_settings);
    321	signal(SIGHUP, restore_settings);
    322	signal(SIGQUIT, restore_settings);
    323}
    324
    325static void adjust_settings(void)
    326{
    327
    328	printf("Adjust settings...");
    329	write_settings(&default_settings);
    330	success("OK");
    331}
    332
    333#define MAX_LINE_LENGTH 500
    334
    335static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
    336{
    337	while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
    338		if (!strncmp(buf, pattern, strlen(pattern)))
    339			return true;
    340	}
    341	return false;
    342}
    343
    344static bool check_huge(void *addr)
    345{
    346	bool thp = false;
    347	int ret;
    348	FILE *fp;
    349	char buffer[MAX_LINE_LENGTH];
    350	char addr_pattern[MAX_LINE_LENGTH];
    351
    352	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
    353		       (unsigned long) addr);
    354	if (ret >= MAX_LINE_LENGTH) {
    355		printf("%s: Pattern is too long\n", __func__);
    356		exit(EXIT_FAILURE);
    357	}
    358
    359
    360	fp = fopen(PID_SMAPS, "r");
    361	if (!fp) {
    362		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
    363		exit(EXIT_FAILURE);
    364	}
    365	if (!check_for_pattern(fp, addr_pattern, buffer))
    366		goto err_out;
    367
    368	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
    369		       hpage_pmd_size >> 10);
    370	if (ret >= MAX_LINE_LENGTH) {
    371		printf("%s: Pattern is too long\n", __func__);
    372		exit(EXIT_FAILURE);
    373	}
    374	/*
    375	 * Fetch the AnonHugePages: in the same block and check whether it got
    376	 * the expected number of hugeepages next.
    377	 */
    378	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
    379		goto err_out;
    380
    381	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
    382		goto err_out;
    383
    384	thp = true;
    385err_out:
    386	fclose(fp);
    387	return thp;
    388}
    389
    390
    391static bool check_swap(void *addr, unsigned long size)
    392{
    393	bool swap = false;
    394	int ret;
    395	FILE *fp;
    396	char buffer[MAX_LINE_LENGTH];
    397	char addr_pattern[MAX_LINE_LENGTH];
    398
    399	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
    400		       (unsigned long) addr);
    401	if (ret >= MAX_LINE_LENGTH) {
    402		printf("%s: Pattern is too long\n", __func__);
    403		exit(EXIT_FAILURE);
    404	}
    405
    406
    407	fp = fopen(PID_SMAPS, "r");
    408	if (!fp) {
    409		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
    410		exit(EXIT_FAILURE);
    411	}
    412	if (!check_for_pattern(fp, addr_pattern, buffer))
    413		goto err_out;
    414
    415	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
    416		       size >> 10);
    417	if (ret >= MAX_LINE_LENGTH) {
    418		printf("%s: Pattern is too long\n", __func__);
    419		exit(EXIT_FAILURE);
    420	}
    421	/*
    422	 * Fetch the Swap: in the same block and check whether it got
    423	 * the expected number of hugeepages next.
    424	 */
    425	if (!check_for_pattern(fp, "Swap:", buffer))
    426		goto err_out;
    427
    428	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
    429		goto err_out;
    430
    431	swap = true;
    432err_out:
    433	fclose(fp);
    434	return swap;
    435}
    436
    437static void *alloc_mapping(void)
    438{
    439	void *p;
    440
    441	p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
    442			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    443	if (p != BASE_ADDR) {
    444		printf("Failed to allocate VMA at %p\n", BASE_ADDR);
    445		exit(EXIT_FAILURE);
    446	}
    447
    448	return p;
    449}
    450
    451static void fill_memory(int *p, unsigned long start, unsigned long end)
    452{
    453	int i;
    454
    455	for (i = start / page_size; i < end / page_size; i++)
    456		p[i * page_size / sizeof(*p)] = i + 0xdead0000;
    457}
    458
    459static void validate_memory(int *p, unsigned long start, unsigned long end)
    460{
    461	int i;
    462
    463	for (i = start / page_size; i < end / page_size; i++) {
    464		if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
    465			printf("Page %d is corrupted: %#x\n",
    466					i, p[i * page_size / sizeof(*p)]);
    467			exit(EXIT_FAILURE);
    468		}
    469	}
    470}
    471
    472#define TICK 500000
    473static bool wait_for_scan(const char *msg, char *p)
    474{
    475	int full_scans;
    476	int timeout = 6; /* 3 seconds */
    477
    478	/* Sanity check */
    479	if (check_huge(p)) {
    480		printf("Unexpected huge page\n");
    481		exit(EXIT_FAILURE);
    482	}
    483
    484	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
    485
    486	/* Wait until the second full_scan completed */
    487	full_scans = read_num("khugepaged/full_scans") + 2;
    488
    489	printf("%s...", msg);
    490	while (timeout--) {
    491		if (check_huge(p))
    492			break;
    493		if (read_num("khugepaged/full_scans") >= full_scans)
    494			break;
    495		printf(".");
    496		usleep(TICK);
    497	}
    498
    499	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
    500
    501	return timeout == -1;
    502}
    503
    504static void alloc_at_fault(void)
    505{
    506	struct settings settings = default_settings;
    507	char *p;
    508
    509	settings.thp_enabled = THP_ALWAYS;
    510	write_settings(&settings);
    511
    512	p = alloc_mapping();
    513	*p = 1;
    514	printf("Allocate huge page on fault...");
    515	if (check_huge(p))
    516		success("OK");
    517	else
    518		fail("Fail");
    519
    520	write_settings(&default_settings);
    521
    522	madvise(p, page_size, MADV_DONTNEED);
    523	printf("Split huge PMD on MADV_DONTNEED...");
    524	if (!check_huge(p))
    525		success("OK");
    526	else
    527		fail("Fail");
    528	munmap(p, hpage_pmd_size);
    529}
    530
    531static void collapse_full(void)
    532{
    533	void *p;
    534
    535	p = alloc_mapping();
    536	fill_memory(p, 0, hpage_pmd_size);
    537	if (wait_for_scan("Collapse fully populated PTE table", p))
    538		fail("Timeout");
    539	else if (check_huge(p))
    540		success("OK");
    541	else
    542		fail("Fail");
    543	validate_memory(p, 0, hpage_pmd_size);
    544	munmap(p, hpage_pmd_size);
    545}
    546
    547static void collapse_empty(void)
    548{
    549	void *p;
    550
    551	p = alloc_mapping();
    552	if (wait_for_scan("Do not collapse empty PTE table", p))
    553		fail("Timeout");
    554	else if (check_huge(p))
    555		fail("Fail");
    556	else
    557		success("OK");
    558	munmap(p, hpage_pmd_size);
    559}
    560
    561static void collapse_single_pte_entry(void)
    562{
    563	void *p;
    564
    565	p = alloc_mapping();
    566	fill_memory(p, 0, page_size);
    567	if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
    568		fail("Timeout");
    569	else if (check_huge(p))
    570		success("OK");
    571	else
    572		fail("Fail");
    573	validate_memory(p, 0, page_size);
    574	munmap(p, hpage_pmd_size);
    575}
    576
    577static void collapse_max_ptes_none(void)
    578{
    579	int max_ptes_none = hpage_pmd_nr / 2;
    580	struct settings settings = default_settings;
    581	void *p;
    582
    583	settings.khugepaged.max_ptes_none = max_ptes_none;
    584	write_settings(&settings);
    585
    586	p = alloc_mapping();
    587
    588	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
    589	if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
    590		fail("Timeout");
    591	else if (check_huge(p))
    592		fail("Fail");
    593	else
    594		success("OK");
    595	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
    596
    597	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
    598	if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
    599		fail("Timeout");
    600	else if (check_huge(p))
    601		success("OK");
    602	else
    603		fail("Fail");
    604	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
    605
    606	munmap(p, hpage_pmd_size);
    607	write_settings(&default_settings);
    608}
    609
    610static void collapse_swapin_single_pte(void)
    611{
    612	void *p;
    613	p = alloc_mapping();
    614	fill_memory(p, 0, hpage_pmd_size);
    615
    616	printf("Swapout one page...");
    617	if (madvise(p, page_size, MADV_PAGEOUT)) {
    618		perror("madvise(MADV_PAGEOUT)");
    619		exit(EXIT_FAILURE);
    620	}
    621	if (check_swap(p, page_size)) {
    622		success("OK");
    623	} else {
    624		fail("Fail");
    625		goto out;
    626	}
    627
    628	if (wait_for_scan("Collapse with swapping in single PTE entry", p))
    629		fail("Timeout");
    630	else if (check_huge(p))
    631		success("OK");
    632	else
    633		fail("Fail");
    634	validate_memory(p, 0, hpage_pmd_size);
    635out:
    636	munmap(p, hpage_pmd_size);
    637}
    638
    639static void collapse_max_ptes_swap(void)
    640{
    641	int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
    642	void *p;
    643
    644	p = alloc_mapping();
    645
    646	fill_memory(p, 0, hpage_pmd_size);
    647	printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
    648	if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
    649		perror("madvise(MADV_PAGEOUT)");
    650		exit(EXIT_FAILURE);
    651	}
    652	if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
    653		success("OK");
    654	} else {
    655		fail("Fail");
    656		goto out;
    657	}
    658
    659	if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
    660		fail("Timeout");
    661	else if (check_huge(p))
    662		fail("Fail");
    663	else
    664		success("OK");
    665	validate_memory(p, 0, hpage_pmd_size);
    666
    667	fill_memory(p, 0, hpage_pmd_size);
    668	printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
    669	if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
    670		perror("madvise(MADV_PAGEOUT)");
    671		exit(EXIT_FAILURE);
    672	}
    673	if (check_swap(p, max_ptes_swap * page_size)) {
    674		success("OK");
    675	} else {
    676		fail("Fail");
    677		goto out;
    678	}
    679
    680	if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
    681		fail("Timeout");
    682	else if (check_huge(p))
    683		success("OK");
    684	else
    685		fail("Fail");
    686	validate_memory(p, 0, hpage_pmd_size);
    687out:
    688	munmap(p, hpage_pmd_size);
    689}
    690
    691static void collapse_single_pte_entry_compound(void)
    692{
    693	void *p;
    694
    695	p = alloc_mapping();
    696
    697	printf("Allocate huge page...");
    698	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
    699	fill_memory(p, 0, hpage_pmd_size);
    700	if (check_huge(p))
    701		success("OK");
    702	else
    703		fail("Fail");
    704	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
    705
    706	printf("Split huge page leaving single PTE mapping compound page...");
    707	madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
    708	if (!check_huge(p))
    709		success("OK");
    710	else
    711		fail("Fail");
    712
    713	if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
    714		fail("Timeout");
    715	else if (check_huge(p))
    716		success("OK");
    717	else
    718		fail("Fail");
    719	validate_memory(p, 0, page_size);
    720	munmap(p, hpage_pmd_size);
    721}
    722
    723static void collapse_full_of_compound(void)
    724{
    725	void *p;
    726
    727	p = alloc_mapping();
    728
    729	printf("Allocate huge page...");
    730	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
    731	fill_memory(p, 0, hpage_pmd_size);
    732	if (check_huge(p))
    733		success("OK");
    734	else
    735		fail("Fail");
    736
    737	printf("Split huge page leaving single PTE page table full of compound pages...");
    738	madvise(p, page_size, MADV_NOHUGEPAGE);
    739	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
    740	if (!check_huge(p))
    741		success("OK");
    742	else
    743		fail("Fail");
    744
    745	if (wait_for_scan("Collapse PTE table full of compound pages", p))
    746		fail("Timeout");
    747	else if (check_huge(p))
    748		success("OK");
    749	else
    750		fail("Fail");
    751	validate_memory(p, 0, hpage_pmd_size);
    752	munmap(p, hpage_pmd_size);
    753}
    754
    755static void collapse_compound_extreme(void)
    756{
    757	void *p;
    758	int i;
    759
    760	p = alloc_mapping();
    761	for (i = 0; i < hpage_pmd_nr; i++) {
    762		printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
    763				i + 1, hpage_pmd_nr);
    764
    765		madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
    766		fill_memory(BASE_ADDR, 0, hpage_pmd_size);
    767		if (!check_huge(BASE_ADDR)) {
    768			printf("Failed to allocate huge page\n");
    769			exit(EXIT_FAILURE);
    770		}
    771		madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
    772
    773		p = mremap(BASE_ADDR - i * page_size,
    774				i * page_size + hpage_pmd_size,
    775				(i + 1) * page_size,
    776				MREMAP_MAYMOVE | MREMAP_FIXED,
    777				BASE_ADDR + 2 * hpage_pmd_size);
    778		if (p == MAP_FAILED) {
    779			perror("mremap+unmap");
    780			exit(EXIT_FAILURE);
    781		}
    782
    783		p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
    784				(i + 1) * page_size,
    785				(i + 1) * page_size + hpage_pmd_size,
    786				MREMAP_MAYMOVE | MREMAP_FIXED,
    787				BASE_ADDR - (i + 1) * page_size);
    788		if (p == MAP_FAILED) {
    789			perror("mremap+alloc");
    790			exit(EXIT_FAILURE);
    791		}
    792	}
    793
    794	munmap(BASE_ADDR, hpage_pmd_size);
    795	fill_memory(p, 0, hpage_pmd_size);
    796	if (!check_huge(p))
    797		success("OK");
    798	else
    799		fail("Fail");
    800
    801	if (wait_for_scan("Collapse PTE table full of different compound pages", p))
    802		fail("Timeout");
    803	else if (check_huge(p))
    804		success("OK");
    805	else
    806		fail("Fail");
    807
    808	validate_memory(p, 0, hpage_pmd_size);
    809	munmap(p, hpage_pmd_size);
    810}
    811
    812static void collapse_fork(void)
    813{
    814	int wstatus;
    815	void *p;
    816
    817	p = alloc_mapping();
    818
    819	printf("Allocate small page...");
    820	fill_memory(p, 0, page_size);
    821	if (!check_huge(p))
    822		success("OK");
    823	else
    824		fail("Fail");
    825
    826	printf("Share small page over fork()...");
    827	if (!fork()) {
    828		/* Do not touch settings on child exit */
    829		skip_settings_restore = true;
    830		exit_status = 0;
    831
    832		if (!check_huge(p))
    833			success("OK");
    834		else
    835			fail("Fail");
    836
    837		fill_memory(p, page_size, 2 * page_size);
    838
    839		if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
    840			fail("Timeout");
    841		else if (check_huge(p))
    842			success("OK");
    843		else
    844			fail("Fail");
    845
    846		validate_memory(p, 0, page_size);
    847		munmap(p, hpage_pmd_size);
    848		exit(exit_status);
    849	}
    850
    851	wait(&wstatus);
    852	exit_status += WEXITSTATUS(wstatus);
    853
    854	printf("Check if parent still has small page...");
    855	if (!check_huge(p))
    856		success("OK");
    857	else
    858		fail("Fail");
    859	validate_memory(p, 0, page_size);
    860	munmap(p, hpage_pmd_size);
    861}
    862
    863static void collapse_fork_compound(void)
    864{
    865	int wstatus;
    866	void *p;
    867
    868	p = alloc_mapping();
    869
    870	printf("Allocate huge page...");
    871	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
    872	fill_memory(p, 0, hpage_pmd_size);
    873	if (check_huge(p))
    874		success("OK");
    875	else
    876		fail("Fail");
    877
    878	printf("Share huge page over fork()...");
    879	if (!fork()) {
    880		/* Do not touch settings on child exit */
    881		skip_settings_restore = true;
    882		exit_status = 0;
    883
    884		if (check_huge(p))
    885			success("OK");
    886		else
    887			fail("Fail");
    888
    889		printf("Split huge page PMD in child process...");
    890		madvise(p, page_size, MADV_NOHUGEPAGE);
    891		madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
    892		if (!check_huge(p))
    893			success("OK");
    894		else
    895			fail("Fail");
    896		fill_memory(p, 0, page_size);
    897
    898		write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
    899		if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
    900			fail("Timeout");
    901		else if (check_huge(p))
    902			success("OK");
    903		else
    904			fail("Fail");
    905		write_num("khugepaged/max_ptes_shared",
    906				default_settings.khugepaged.max_ptes_shared);
    907
    908		validate_memory(p, 0, hpage_pmd_size);
    909		munmap(p, hpage_pmd_size);
    910		exit(exit_status);
    911	}
    912
    913	wait(&wstatus);
    914	exit_status += WEXITSTATUS(wstatus);
    915
    916	printf("Check if parent still has huge page...");
    917	if (check_huge(p))
    918		success("OK");
    919	else
    920		fail("Fail");
    921	validate_memory(p, 0, hpage_pmd_size);
    922	munmap(p, hpage_pmd_size);
    923}
    924
    925static void collapse_max_ptes_shared()
    926{
    927	int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
    928	int wstatus;
    929	void *p;
    930
    931	p = alloc_mapping();
    932
    933	printf("Allocate huge page...");
    934	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
    935	fill_memory(p, 0, hpage_pmd_size);
    936	if (check_huge(p))
    937		success("OK");
    938	else
    939		fail("Fail");
    940
    941	printf("Share huge page over fork()...");
    942	if (!fork()) {
    943		/* Do not touch settings on child exit */
    944		skip_settings_restore = true;
    945		exit_status = 0;
    946
    947		if (check_huge(p))
    948			success("OK");
    949		else
    950			fail("Fail");
    951
    952		printf("Trigger CoW on page %d of %d...",
    953				hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
    954		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
    955		if (!check_huge(p))
    956			success("OK");
    957		else
    958			fail("Fail");
    959
    960		if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
    961			fail("Timeout");
    962		else if (!check_huge(p))
    963			success("OK");
    964		else
    965			fail("Fail");
    966
    967		printf("Trigger CoW on page %d of %d...",
    968				hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
    969		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
    970		if (!check_huge(p))
    971			success("OK");
    972		else
    973			fail("Fail");
    974
    975
    976		if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
    977			fail("Timeout");
    978		else if (check_huge(p))
    979			success("OK");
    980		else
    981			fail("Fail");
    982
    983		validate_memory(p, 0, hpage_pmd_size);
    984		munmap(p, hpage_pmd_size);
    985		exit(exit_status);
    986	}
    987
    988	wait(&wstatus);
    989	exit_status += WEXITSTATUS(wstatus);
    990
    991	printf("Check if parent still has huge page...");
    992	if (check_huge(p))
    993		success("OK");
    994	else
    995		fail("Fail");
    996	validate_memory(p, 0, hpage_pmd_size);
    997	munmap(p, hpage_pmd_size);
    998}
    999
   1000int main(void)
   1001{
   1002	setbuf(stdout, NULL);
   1003
   1004	page_size = getpagesize();
   1005	hpage_pmd_size = read_num("hpage_pmd_size");
   1006	hpage_pmd_nr = hpage_pmd_size / page_size;
   1007
   1008	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
   1009	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
   1010	default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
   1011	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
   1012
   1013	save_settings();
   1014	adjust_settings();
   1015
   1016	alloc_at_fault();
   1017	collapse_full();
   1018	collapse_empty();
   1019	collapse_single_pte_entry();
   1020	collapse_max_ptes_none();
   1021	collapse_swapin_single_pte();
   1022	collapse_max_ptes_swap();
   1023	collapse_single_pte_entry_compound();
   1024	collapse_full_of_compound();
   1025	collapse_compound_extreme();
   1026	collapse_fork();
   1027	collapse_fork_compound();
   1028	collapse_max_ptes_shared();
   1029
   1030	restore_settings(0);
   1031}