cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

test_memcontrol.c (27493B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#define _GNU_SOURCE
      3
      4#include <linux/limits.h>
      5#include <linux/oom.h>
      6#include <fcntl.h>
      7#include <stdio.h>
      8#include <stdlib.h>
      9#include <string.h>
     10#include <sys/stat.h>
     11#include <sys/types.h>
     12#include <unistd.h>
     13#include <sys/socket.h>
     14#include <sys/wait.h>
     15#include <arpa/inet.h>
     16#include <netinet/in.h>
     17#include <netdb.h>
     18#include <errno.h>
     19#include <sys/mman.h>
     20
     21#include "../kselftest.h"
     22#include "cgroup_util.h"
     23
     24static bool has_localevents;
     25static bool has_recursiveprot;
     26
     27/*
     28 * This test creates two nested cgroups with and without enabling
     29 * the memory controller.
     30 */
     31static int test_memcg_subtree_control(const char *root)
     32{
     33	char *parent, *child, *parent2 = NULL, *child2 = NULL;
     34	int ret = KSFT_FAIL;
     35	char buf[PAGE_SIZE];
     36
     37	/* Create two nested cgroups with the memory controller enabled */
     38	parent = cg_name(root, "memcg_test_0");
     39	child = cg_name(root, "memcg_test_0/memcg_test_1");
     40	if (!parent || !child)
     41		goto cleanup_free;
     42
     43	if (cg_create(parent))
     44		goto cleanup_free;
     45
     46	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
     47		goto cleanup_parent;
     48
     49	if (cg_create(child))
     50		goto cleanup_parent;
     51
     52	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
     53		goto cleanup_child;
     54
     55	/* Create two nested cgroups without enabling memory controller */
     56	parent2 = cg_name(root, "memcg_test_1");
     57	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
     58	if (!parent2 || !child2)
     59		goto cleanup_free2;
     60
     61	if (cg_create(parent2))
     62		goto cleanup_free2;
     63
     64	if (cg_create(child2))
     65		goto cleanup_parent2;
     66
     67	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
     68		goto cleanup_all;
     69
     70	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
     71		goto cleanup_all;
     72
     73	ret = KSFT_PASS;
     74
     75cleanup_all:
     76	cg_destroy(child2);
     77cleanup_parent2:
     78	cg_destroy(parent2);
     79cleanup_free2:
     80	free(parent2);
     81	free(child2);
     82cleanup_child:
     83	cg_destroy(child);
     84cleanup_parent:
     85	cg_destroy(parent);
     86cleanup_free:
     87	free(parent);
     88	free(child);
     89
     90	return ret;
     91}
     92
     93static int alloc_anon_50M_check(const char *cgroup, void *arg)
     94{
     95	size_t size = MB(50);
     96	char *buf, *ptr;
     97	long anon, current;
     98	int ret = -1;
     99
    100	buf = malloc(size);
    101	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
    102		*ptr = 0;
    103
    104	current = cg_read_long(cgroup, "memory.current");
    105	if (current < size)
    106		goto cleanup;
    107
    108	if (!values_close(size, current, 3))
    109		goto cleanup;
    110
    111	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
    112	if (anon < 0)
    113		goto cleanup;
    114
    115	if (!values_close(anon, current, 3))
    116		goto cleanup;
    117
    118	ret = 0;
    119cleanup:
    120	free(buf);
    121	return ret;
    122}
    123
    124static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
    125{
    126	size_t size = MB(50);
    127	int ret = -1;
    128	long current, file;
    129	int fd;
    130
    131	fd = get_temp_fd();
    132	if (fd < 0)
    133		return -1;
    134
    135	if (alloc_pagecache(fd, size))
    136		goto cleanup;
    137
    138	current = cg_read_long(cgroup, "memory.current");
    139	if (current < size)
    140		goto cleanup;
    141
    142	file = cg_read_key_long(cgroup, "memory.stat", "file ");
    143	if (file < 0)
    144		goto cleanup;
    145
    146	if (!values_close(file, current, 10))
    147		goto cleanup;
    148
    149	ret = 0;
    150
    151cleanup:
    152	close(fd);
    153	return ret;
    154}
    155
    156/*
    157 * This test create a memory cgroup, allocates
    158 * some anonymous memory and some pagecache
    159 * and check memory.current and some memory.stat values.
    160 */
    161static int test_memcg_current(const char *root)
    162{
    163	int ret = KSFT_FAIL;
    164	long current;
    165	char *memcg;
    166
    167	memcg = cg_name(root, "memcg_test");
    168	if (!memcg)
    169		goto cleanup;
    170
    171	if (cg_create(memcg))
    172		goto cleanup;
    173
    174	current = cg_read_long(memcg, "memory.current");
    175	if (current != 0)
    176		goto cleanup;
    177
    178	if (cg_run(memcg, alloc_anon_50M_check, NULL))
    179		goto cleanup;
    180
    181	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
    182		goto cleanup;
    183
    184	ret = KSFT_PASS;
    185
    186cleanup:
    187	cg_destroy(memcg);
    188	free(memcg);
    189
    190	return ret;
    191}
    192
    193static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
    194{
    195	int fd = (long)arg;
    196	int ppid = getppid();
    197
    198	if (alloc_pagecache(fd, MB(50)))
    199		return -1;
    200
    201	while (getppid() == ppid)
    202		sleep(1);
    203
    204	return 0;
    205}
    206
    207static int alloc_anon_noexit(const char *cgroup, void *arg)
    208{
    209	int ppid = getppid();
    210	size_t size = (unsigned long)arg;
    211	char *buf, *ptr;
    212
    213	buf = malloc(size);
    214	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
    215		*ptr = 0;
    216
    217	while (getppid() == ppid)
    218		sleep(1);
    219
    220	free(buf);
    221	return 0;
    222}
    223
    224/*
    225 * Wait until processes are killed asynchronously by the OOM killer
    226 * If we exceed a timeout, fail.
    227 */
    228static int cg_test_proc_killed(const char *cgroup)
    229{
    230	int limit;
    231
    232	for (limit = 10; limit > 0; limit--) {
    233		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
    234			return 0;
    235
    236		usleep(100000);
    237	}
    238	return -1;
    239}
    240
    241/*
    242 * First, this test creates the following hierarchy:
    243 * A       memory.min = 0,    memory.max = 200M
    244 * A/B     memory.min = 50M
    245 * A/B/C   memory.min = 75M,  memory.current = 50M
    246 * A/B/D   memory.min = 25M,  memory.current = 50M
    247 * A/B/E   memory.min = 0,    memory.current = 50M
    248 * A/B/F   memory.min = 500M, memory.current = 0
    249 *
    250 * (or memory.low if we test soft protection)
    251 *
    252 * Usages are pagecache and the test keeps a running
    253 * process in every leaf cgroup.
    254 * Then it creates A/G and creates a significant
    255 * memory pressure in A.
    256 *
    257 * Then it checks actual memory usages and expects that:
    258 * A/B    memory.current ~= 50M
    259 * A/B/C  memory.current ~= 29M
    260 * A/B/D  memory.current ~= 21M
    261 * A/B/E  memory.current ~= 0
    262 * A/B/F  memory.current  = 0
    263 * (for origin of the numbers, see model in memcg_protection.m.)
    264 *
    265 * After that it tries to allocate more than there is
    266 * unprotected memory in A available, and checks that:
    267 * a) memory.min protects pagecache even in this case,
    268 * b) memory.low allows reclaiming page cache with low events.
    269 */
    270static int test_memcg_protection(const char *root, bool min)
    271{
    272	int ret = KSFT_FAIL, rc;
    273	char *parent[3] = {NULL};
    274	char *children[4] = {NULL};
    275	const char *attribute = min ? "memory.min" : "memory.low";
    276	long c[4];
    277	int i, attempts;
    278	int fd;
    279
    280	fd = get_temp_fd();
    281	if (fd < 0)
    282		goto cleanup;
    283
    284	parent[0] = cg_name(root, "memcg_test_0");
    285	if (!parent[0])
    286		goto cleanup;
    287
    288	parent[1] = cg_name(parent[0], "memcg_test_1");
    289	if (!parent[1])
    290		goto cleanup;
    291
    292	parent[2] = cg_name(parent[0], "memcg_test_2");
    293	if (!parent[2])
    294		goto cleanup;
    295
    296	if (cg_create(parent[0]))
    297		goto cleanup;
    298
    299	if (cg_read_long(parent[0], attribute)) {
    300		/* No memory.min on older kernels is fine */
    301		if (min)
    302			ret = KSFT_SKIP;
    303		goto cleanup;
    304	}
    305
    306	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
    307		goto cleanup;
    308
    309	if (cg_write(parent[0], "memory.max", "200M"))
    310		goto cleanup;
    311
    312	if (cg_write(parent[0], "memory.swap.max", "0"))
    313		goto cleanup;
    314
    315	if (cg_create(parent[1]))
    316		goto cleanup;
    317
    318	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
    319		goto cleanup;
    320
    321	if (cg_create(parent[2]))
    322		goto cleanup;
    323
    324	for (i = 0; i < ARRAY_SIZE(children); i++) {
    325		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
    326		if (!children[i])
    327			goto cleanup;
    328
    329		if (cg_create(children[i]))
    330			goto cleanup;
    331
    332		if (i > 2)
    333			continue;
    334
    335		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
    336			      (void *)(long)fd);
    337	}
    338
    339	if (cg_write(parent[1],   attribute, "50M"))
    340		goto cleanup;
    341	if (cg_write(children[0], attribute, "75M"))
    342		goto cleanup;
    343	if (cg_write(children[1], attribute, "25M"))
    344		goto cleanup;
    345	if (cg_write(children[2], attribute, "0"))
    346		goto cleanup;
    347	if (cg_write(children[3], attribute, "500M"))
    348		goto cleanup;
    349
    350	attempts = 0;
    351	while (!values_close(cg_read_long(parent[1], "memory.current"),
    352			     MB(150), 3)) {
    353		if (attempts++ > 5)
    354			break;
    355		sleep(1);
    356	}
    357
    358	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
    359		goto cleanup;
    360
    361	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
    362		goto cleanup;
    363
    364	for (i = 0; i < ARRAY_SIZE(children); i++)
    365		c[i] = cg_read_long(children[i], "memory.current");
    366
    367	if (!values_close(c[0], MB(29), 10))
    368		goto cleanup;
    369
    370	if (!values_close(c[1], MB(21), 10))
    371		goto cleanup;
    372
    373	if (c[3] != 0)
    374		goto cleanup;
    375
    376	rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
    377	if (min && !rc)
    378		goto cleanup;
    379	else if (!min && rc) {
    380		fprintf(stderr,
    381			"memory.low prevents from allocating anon memory\n");
    382		goto cleanup;
    383	}
    384
    385	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
    386		goto cleanup;
    387
    388	if (min) {
    389		ret = KSFT_PASS;
    390		goto cleanup;
    391	}
    392
    393	for (i = 0; i < ARRAY_SIZE(children); i++) {
    394		int no_low_events_index = 1;
    395		long low, oom;
    396
    397		oom = cg_read_key_long(children[i], "memory.events", "oom ");
    398		low = cg_read_key_long(children[i], "memory.events", "low ");
    399
    400		if (oom)
    401			goto cleanup;
    402		if (i <= no_low_events_index && low <= 0)
    403			goto cleanup;
    404		if (i > no_low_events_index && low)
    405			goto cleanup;
    406
    407	}
    408
    409	ret = KSFT_PASS;
    410
    411cleanup:
    412	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
    413		if (!children[i])
    414			continue;
    415
    416		cg_destroy(children[i]);
    417		free(children[i]);
    418	}
    419
    420	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
    421		if (!parent[i])
    422			continue;
    423
    424		cg_destroy(parent[i]);
    425		free(parent[i]);
    426	}
    427	close(fd);
    428	return ret;
    429}
    430
    431static int test_memcg_min(const char *root)
    432{
    433	return test_memcg_protection(root, true);
    434}
    435
    436static int test_memcg_low(const char *root)
    437{
    438	return test_memcg_protection(root, false);
    439}
    440
    441static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
    442{
    443	size_t size = MB(50);
    444	int ret = -1;
    445	long current, high, max;
    446	int fd;
    447
    448	high = cg_read_long(cgroup, "memory.high");
    449	max = cg_read_long(cgroup, "memory.max");
    450	if (high != MB(30) && max != MB(30))
    451		return -1;
    452
    453	fd = get_temp_fd();
    454	if (fd < 0)
    455		return -1;
    456
    457	if (alloc_pagecache(fd, size))
    458		goto cleanup;
    459
    460	current = cg_read_long(cgroup, "memory.current");
    461	if (!values_close(current, MB(30), 5))
    462		goto cleanup;
    463
    464	ret = 0;
    465
    466cleanup:
    467	close(fd);
    468	return ret;
    469
    470}
    471
    472/*
    473 * This test checks that memory.high limits the amount of
    474 * memory which can be consumed by either anonymous memory
    475 * or pagecache.
    476 */
    477static int test_memcg_high(const char *root)
    478{
    479	int ret = KSFT_FAIL;
    480	char *memcg;
    481	long high;
    482
    483	memcg = cg_name(root, "memcg_test");
    484	if (!memcg)
    485		goto cleanup;
    486
    487	if (cg_create(memcg))
    488		goto cleanup;
    489
    490	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
    491		goto cleanup;
    492
    493	if (cg_write(memcg, "memory.swap.max", "0"))
    494		goto cleanup;
    495
    496	if (cg_write(memcg, "memory.high", "30M"))
    497		goto cleanup;
    498
    499	if (cg_run(memcg, alloc_anon, (void *)MB(31)))
    500		goto cleanup;
    501
    502	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
    503		goto cleanup;
    504
    505	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
    506		goto cleanup;
    507
    508	high = cg_read_key_long(memcg, "memory.events", "high ");
    509	if (high <= 0)
    510		goto cleanup;
    511
    512	ret = KSFT_PASS;
    513
    514cleanup:
    515	cg_destroy(memcg);
    516	free(memcg);
    517
    518	return ret;
    519}
    520
    521static int alloc_anon_mlock(const char *cgroup, void *arg)
    522{
    523	size_t size = (size_t)arg;
    524	void *buf;
    525
    526	buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
    527		   0, 0);
    528	if (buf == MAP_FAILED)
    529		return -1;
    530
    531	mlock(buf, size);
    532	munmap(buf, size);
    533	return 0;
    534}
    535
    536/*
    537 * This test checks that memory.high is able to throttle big single shot
    538 * allocation i.e. large allocation within one kernel entry.
    539 */
    540static int test_memcg_high_sync(const char *root)
    541{
    542	int ret = KSFT_FAIL, pid, fd = -1;
    543	char *memcg;
    544	long pre_high, pre_max;
    545	long post_high, post_max;
    546
    547	memcg = cg_name(root, "memcg_test");
    548	if (!memcg)
    549		goto cleanup;
    550
    551	if (cg_create(memcg))
    552		goto cleanup;
    553
    554	pre_high = cg_read_key_long(memcg, "memory.events", "high ");
    555	pre_max = cg_read_key_long(memcg, "memory.events", "max ");
    556	if (pre_high < 0 || pre_max < 0)
    557		goto cleanup;
    558
    559	if (cg_write(memcg, "memory.swap.max", "0"))
    560		goto cleanup;
    561
    562	if (cg_write(memcg, "memory.high", "30M"))
    563		goto cleanup;
    564
    565	if (cg_write(memcg, "memory.max", "140M"))
    566		goto cleanup;
    567
    568	fd = memcg_prepare_for_wait(memcg);
    569	if (fd < 0)
    570		goto cleanup;
    571
    572	pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
    573	if (pid < 0)
    574		goto cleanup;
    575
    576	cg_wait_for(fd);
    577
    578	post_high = cg_read_key_long(memcg, "memory.events", "high ");
    579	post_max = cg_read_key_long(memcg, "memory.events", "max ");
    580	if (post_high < 0 || post_max < 0)
    581		goto cleanup;
    582
    583	if (pre_high == post_high || pre_max != post_max)
    584		goto cleanup;
    585
    586	ret = KSFT_PASS;
    587
    588cleanup:
    589	if (fd >= 0)
    590		close(fd);
    591	cg_destroy(memcg);
    592	free(memcg);
    593
    594	return ret;
    595}
    596
    597/*
    598 * This test checks that memory.max limits the amount of
    599 * memory which can be consumed by either anonymous memory
    600 * or pagecache.
    601 */
    602static int test_memcg_max(const char *root)
    603{
    604	int ret = KSFT_FAIL;
    605	char *memcg;
    606	long current, max;
    607
    608	memcg = cg_name(root, "memcg_test");
    609	if (!memcg)
    610		goto cleanup;
    611
    612	if (cg_create(memcg))
    613		goto cleanup;
    614
    615	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
    616		goto cleanup;
    617
    618	if (cg_write(memcg, "memory.swap.max", "0"))
    619		goto cleanup;
    620
    621	if (cg_write(memcg, "memory.max", "30M"))
    622		goto cleanup;
    623
    624	/* Should be killed by OOM killer */
    625	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
    626		goto cleanup;
    627
    628	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
    629		goto cleanup;
    630
    631	current = cg_read_long(memcg, "memory.current");
    632	if (current > MB(30) || !current)
    633		goto cleanup;
    634
    635	max = cg_read_key_long(memcg, "memory.events", "max ");
    636	if (max <= 0)
    637		goto cleanup;
    638
    639	ret = KSFT_PASS;
    640
    641cleanup:
    642	cg_destroy(memcg);
    643	free(memcg);
    644
    645	return ret;
    646}
    647
    648/*
    649 * This test checks that memory.reclaim reclaims the given
    650 * amount of memory (from both anon and file, if possible).
    651 */
    652static int test_memcg_reclaim(const char *root)
    653{
    654	int ret = KSFT_FAIL, fd, retries;
    655	char *memcg;
    656	long current, expected_usage, to_reclaim;
    657	char buf[64];
    658
    659	memcg = cg_name(root, "memcg_test");
    660	if (!memcg)
    661		goto cleanup;
    662
    663	if (cg_create(memcg))
    664		goto cleanup;
    665
    666	current = cg_read_long(memcg, "memory.current");
    667	if (current != 0)
    668		goto cleanup;
    669
    670	fd = get_temp_fd();
    671	if (fd < 0)
    672		goto cleanup;
    673
    674	cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
    675
    676	/*
    677	 * If swap is enabled, try to reclaim from both anon and file, else try
    678	 * to reclaim from file only.
    679	 */
    680	if (is_swap_enabled()) {
    681		cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
    682		expected_usage = MB(100);
    683	} else
    684		expected_usage = MB(50);
    685
    686	/*
    687	 * Wait until current usage reaches the expected usage (or we run out of
    688	 * retries).
    689	 */
    690	retries = 5;
    691	while (!values_close(cg_read_long(memcg, "memory.current"),
    692			    expected_usage, 10)) {
    693		if (retries--) {
    694			sleep(1);
    695			continue;
    696		} else {
    697			fprintf(stderr,
    698				"failed to allocate %ld for memcg reclaim test\n",
    699				expected_usage);
    700			goto cleanup;
    701		}
    702	}
    703
    704	/*
    705	 * Reclaim until current reaches 30M, this makes sure we hit both anon
    706	 * and file if swap is enabled.
    707	 */
    708	retries = 5;
    709	while (true) {
    710		int err;
    711
    712		current = cg_read_long(memcg, "memory.current");
    713		to_reclaim = current - MB(30);
    714
    715		/*
    716		 * We only keep looping if we get EAGAIN, which means we could
    717		 * not reclaim the full amount.
    718		 */
    719		if (to_reclaim <= 0)
    720			goto cleanup;
    721
    722
    723		snprintf(buf, sizeof(buf), "%ld", to_reclaim);
    724		err = cg_write(memcg, "memory.reclaim", buf);
    725		if (!err) {
    726			/*
    727			 * If writing succeeds, then the written amount should have been
    728			 * fully reclaimed (and maybe more).
    729			 */
    730			current = cg_read_long(memcg, "memory.current");
    731			if (!values_close(current, MB(30), 3) && current > MB(30))
    732				goto cleanup;
    733			break;
    734		}
    735
    736		/* The kernel could not reclaim the full amount, try again. */
    737		if (err == -EAGAIN && retries--)
    738			continue;
    739
    740		/* We got an unexpected error or ran out of retries. */
    741		goto cleanup;
    742	}
    743
    744	ret = KSFT_PASS;
    745cleanup:
    746	cg_destroy(memcg);
    747	free(memcg);
    748	close(fd);
    749
    750	return ret;
    751}
    752
    753static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
    754{
    755	long mem_max = (long)arg;
    756	size_t size = MB(50);
    757	char *buf, *ptr;
    758	long mem_current, swap_current;
    759	int ret = -1;
    760
    761	buf = malloc(size);
    762	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
    763		*ptr = 0;
    764
    765	mem_current = cg_read_long(cgroup, "memory.current");
    766	if (!mem_current || !values_close(mem_current, mem_max, 3))
    767		goto cleanup;
    768
    769	swap_current = cg_read_long(cgroup, "memory.swap.current");
    770	if (!swap_current ||
    771	    !values_close(mem_current + swap_current, size, 3))
    772		goto cleanup;
    773
    774	ret = 0;
    775cleanup:
    776	free(buf);
    777	return ret;
    778}
    779
    780/*
    781 * This test checks that memory.swap.max limits the amount of
    782 * anonymous memory which can be swapped out.
    783 */
    784static int test_memcg_swap_max(const char *root)
    785{
    786	int ret = KSFT_FAIL;
    787	char *memcg;
    788	long max;
    789
    790	if (!is_swap_enabled())
    791		return KSFT_SKIP;
    792
    793	memcg = cg_name(root, "memcg_test");
    794	if (!memcg)
    795		goto cleanup;
    796
    797	if (cg_create(memcg))
    798		goto cleanup;
    799
    800	if (cg_read_long(memcg, "memory.swap.current")) {
    801		ret = KSFT_SKIP;
    802		goto cleanup;
    803	}
    804
    805	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
    806		goto cleanup;
    807
    808	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
    809		goto cleanup;
    810
    811	if (cg_write(memcg, "memory.swap.max", "30M"))
    812		goto cleanup;
    813
    814	if (cg_write(memcg, "memory.max", "30M"))
    815		goto cleanup;
    816
    817	/* Should be killed by OOM killer */
    818	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
    819		goto cleanup;
    820
    821	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
    822		goto cleanup;
    823
    824	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
    825		goto cleanup;
    826
    827	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
    828		goto cleanup;
    829
    830	max = cg_read_key_long(memcg, "memory.events", "max ");
    831	if (max <= 0)
    832		goto cleanup;
    833
    834	ret = KSFT_PASS;
    835
    836cleanup:
    837	cg_destroy(memcg);
    838	free(memcg);
    839
    840	return ret;
    841}
    842
    843/*
    844 * This test disables swapping and tries to allocate anonymous memory
    845 * up to OOM. Then it checks for oom and oom_kill events in
    846 * memory.events.
    847 */
    848static int test_memcg_oom_events(const char *root)
    849{
    850	int ret = KSFT_FAIL;
    851	char *memcg;
    852
    853	memcg = cg_name(root, "memcg_test");
    854	if (!memcg)
    855		goto cleanup;
    856
    857	if (cg_create(memcg))
    858		goto cleanup;
    859
    860	if (cg_write(memcg, "memory.max", "30M"))
    861		goto cleanup;
    862
    863	if (cg_write(memcg, "memory.swap.max", "0"))
    864		goto cleanup;
    865
    866	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
    867		goto cleanup;
    868
    869	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
    870		goto cleanup;
    871
    872	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
    873		goto cleanup;
    874
    875	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
    876		goto cleanup;
    877
    878	ret = KSFT_PASS;
    879
    880cleanup:
    881	cg_destroy(memcg);
    882	free(memcg);
    883
    884	return ret;
    885}
    886
    887struct tcp_server_args {
    888	unsigned short port;
    889	int ctl[2];
    890};
    891
    892static int tcp_server(const char *cgroup, void *arg)
    893{
    894	struct tcp_server_args *srv_args = arg;
    895	struct sockaddr_in6 saddr = { 0 };
    896	socklen_t slen = sizeof(saddr);
    897	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
    898
    899	close(srv_args->ctl[0]);
    900	ctl_fd = srv_args->ctl[1];
    901
    902	saddr.sin6_family = AF_INET6;
    903	saddr.sin6_addr = in6addr_any;
    904	saddr.sin6_port = htons(srv_args->port);
    905
    906	sk = socket(AF_INET6, SOCK_STREAM, 0);
    907	if (sk < 0)
    908		return ret;
    909
    910	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
    911		goto cleanup;
    912
    913	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
    914		write(ctl_fd, &errno, sizeof(errno));
    915		goto cleanup;
    916	}
    917
    918	if (listen(sk, 1))
    919		goto cleanup;
    920
    921	ret = 0;
    922	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
    923		ret = -1;
    924		goto cleanup;
    925	}
    926
    927	client_sk = accept(sk, NULL, NULL);
    928	if (client_sk < 0)
    929		goto cleanup;
    930
    931	ret = -1;
    932	for (;;) {
    933		uint8_t buf[0x100000];
    934
    935		if (write(client_sk, buf, sizeof(buf)) <= 0) {
    936			if (errno == ECONNRESET)
    937				ret = 0;
    938			break;
    939		}
    940	}
    941
    942	close(client_sk);
    943
    944cleanup:
    945	close(sk);
    946	return ret;
    947}
    948
    949static int tcp_client(const char *cgroup, unsigned short port)
    950{
    951	const char server[] = "localhost";
    952	struct addrinfo *ai;
    953	char servport[6];
    954	int retries = 0x10; /* nice round number */
    955	int sk, ret;
    956
    957	snprintf(servport, sizeof(servport), "%hd", port);
    958	ret = getaddrinfo(server, servport, NULL, &ai);
    959	if (ret)
    960		return ret;
    961
    962	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
    963	if (sk < 0)
    964		goto free_ainfo;
    965
    966	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
    967	if (ret < 0)
    968		goto close_sk;
    969
    970	ret = KSFT_FAIL;
    971	while (retries--) {
    972		uint8_t buf[0x100000];
    973		long current, sock;
    974
    975		if (read(sk, buf, sizeof(buf)) <= 0)
    976			goto close_sk;
    977
    978		current = cg_read_long(cgroup, "memory.current");
    979		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
    980
    981		if (current < 0 || sock < 0)
    982			goto close_sk;
    983
    984		if (values_close(current, sock, 10)) {
    985			ret = KSFT_PASS;
    986			break;
    987		}
    988	}
    989
    990close_sk:
    991	close(sk);
    992free_ainfo:
    993	freeaddrinfo(ai);
    994	return ret;
    995}
    996
    997/*
    998 * This test checks socket memory accounting.
    999 * The test forks a TCP server listens on a random port between 1000
   1000 * and 61000. Once it gets a client connection, it starts writing to
   1001 * its socket.
   1002 * The TCP client interleaves reads from the socket with check whether
   1003 * memory.current and memory.stat.sock are similar.
   1004 */
   1005static int test_memcg_sock(const char *root)
   1006{
   1007	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
   1008	unsigned short port;
   1009	char *memcg;
   1010
   1011	memcg = cg_name(root, "memcg_test");
   1012	if (!memcg)
   1013		goto cleanup;
   1014
   1015	if (cg_create(memcg))
   1016		goto cleanup;
   1017
   1018	while (bind_retries--) {
   1019		struct tcp_server_args args;
   1020
   1021		if (pipe(args.ctl))
   1022			goto cleanup;
   1023
   1024		port = args.port = 1000 + rand() % 60000;
   1025
   1026		pid = cg_run_nowait(memcg, tcp_server, &args);
   1027		if (pid < 0)
   1028			goto cleanup;
   1029
   1030		close(args.ctl[1]);
   1031		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
   1032			goto cleanup;
   1033		close(args.ctl[0]);
   1034
   1035		if (!err)
   1036			break;
   1037		if (err != EADDRINUSE)
   1038			goto cleanup;
   1039
   1040		waitpid(pid, NULL, 0);
   1041	}
   1042
   1043	if (err == EADDRINUSE) {
   1044		ret = KSFT_SKIP;
   1045		goto cleanup;
   1046	}
   1047
   1048	if (tcp_client(memcg, port) != KSFT_PASS)
   1049		goto cleanup;
   1050
   1051	waitpid(pid, &err, 0);
   1052	if (WEXITSTATUS(err))
   1053		goto cleanup;
   1054
   1055	if (cg_read_long(memcg, "memory.current") < 0)
   1056		goto cleanup;
   1057
   1058	if (cg_read_key_long(memcg, "memory.stat", "sock "))
   1059		goto cleanup;
   1060
   1061	ret = KSFT_PASS;
   1062
   1063cleanup:
   1064	cg_destroy(memcg);
   1065	free(memcg);
   1066
   1067	return ret;
   1068}
   1069
   1070/*
   1071 * This test disables swapping and tries to allocate anonymous memory
   1072 * up to OOM with memory.group.oom set. Then it checks that all
   1073 * processes in the leaf were killed. It also checks that oom_events
   1074 * were propagated to the parent level.
   1075 */
   1076static int test_memcg_oom_group_leaf_events(const char *root)
   1077{
   1078	int ret = KSFT_FAIL;
   1079	char *parent, *child;
   1080	long parent_oom_events;
   1081
   1082	parent = cg_name(root, "memcg_test_0");
   1083	child = cg_name(root, "memcg_test_0/memcg_test_1");
   1084
   1085	if (!parent || !child)
   1086		goto cleanup;
   1087
   1088	if (cg_create(parent))
   1089		goto cleanup;
   1090
   1091	if (cg_create(child))
   1092		goto cleanup;
   1093
   1094	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
   1095		goto cleanup;
   1096
   1097	if (cg_write(child, "memory.max", "50M"))
   1098		goto cleanup;
   1099
   1100	if (cg_write(child, "memory.swap.max", "0"))
   1101		goto cleanup;
   1102
   1103	if (cg_write(child, "memory.oom.group", "1"))
   1104		goto cleanup;
   1105
   1106	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
   1107	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
   1108	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
   1109	if (!cg_run(child, alloc_anon, (void *)MB(100)))
   1110		goto cleanup;
   1111
   1112	if (cg_test_proc_killed(child))
   1113		goto cleanup;
   1114
   1115	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
   1116		goto cleanup;
   1117
   1118	parent_oom_events = cg_read_key_long(
   1119			parent, "memory.events", "oom_kill ");
   1120	/*
   1121	 * If memory_localevents is not enabled (the default), the parent should
   1122	 * count OOM events in its children groups. Otherwise, it should not
   1123	 * have observed any events.
   1124	 */
   1125	if (has_localevents && parent_oom_events != 0)
   1126		goto cleanup;
   1127	else if (!has_localevents && parent_oom_events <= 0)
   1128		goto cleanup;
   1129
   1130	ret = KSFT_PASS;
   1131
   1132cleanup:
   1133	if (child)
   1134		cg_destroy(child);
   1135	if (parent)
   1136		cg_destroy(parent);
   1137	free(child);
   1138	free(parent);
   1139
   1140	return ret;
   1141}
   1142
   1143/*
   1144 * This test disables swapping and tries to allocate anonymous memory
   1145 * up to OOM with memory.group.oom set. Then it checks that all
   1146 * processes in the parent and leaf were killed.
   1147 */
   1148static int test_memcg_oom_group_parent_events(const char *root)
   1149{
   1150	int ret = KSFT_FAIL;
   1151	char *parent, *child;
   1152
   1153	parent = cg_name(root, "memcg_test_0");
   1154	child = cg_name(root, "memcg_test_0/memcg_test_1");
   1155
   1156	if (!parent || !child)
   1157		goto cleanup;
   1158
   1159	if (cg_create(parent))
   1160		goto cleanup;
   1161
   1162	if (cg_create(child))
   1163		goto cleanup;
   1164
   1165	if (cg_write(parent, "memory.max", "80M"))
   1166		goto cleanup;
   1167
   1168	if (cg_write(parent, "memory.swap.max", "0"))
   1169		goto cleanup;
   1170
   1171	if (cg_write(parent, "memory.oom.group", "1"))
   1172		goto cleanup;
   1173
   1174	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
   1175	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
   1176	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
   1177
   1178	if (!cg_run(child, alloc_anon, (void *)MB(100)))
   1179		goto cleanup;
   1180
   1181	if (cg_test_proc_killed(child))
   1182		goto cleanup;
   1183	if (cg_test_proc_killed(parent))
   1184		goto cleanup;
   1185
   1186	ret = KSFT_PASS;
   1187
   1188cleanup:
   1189	if (child)
   1190		cg_destroy(child);
   1191	if (parent)
   1192		cg_destroy(parent);
   1193	free(child);
   1194	free(parent);
   1195
   1196	return ret;
   1197}
   1198
   1199/*
   1200 * This test disables swapping and tries to allocate anonymous memory
   1201 * up to OOM with memory.group.oom set. Then it checks that all
   1202 * processes were killed except those set with OOM_SCORE_ADJ_MIN
   1203 */
   1204static int test_memcg_oom_group_score_events(const char *root)
   1205{
   1206	int ret = KSFT_FAIL;
   1207	char *memcg;
   1208	int safe_pid;
   1209
   1210	memcg = cg_name(root, "memcg_test_0");
   1211
   1212	if (!memcg)
   1213		goto cleanup;
   1214
   1215	if (cg_create(memcg))
   1216		goto cleanup;
   1217
   1218	if (cg_write(memcg, "memory.max", "50M"))
   1219		goto cleanup;
   1220
   1221	if (cg_write(memcg, "memory.swap.max", "0"))
   1222		goto cleanup;
   1223
   1224	if (cg_write(memcg, "memory.oom.group", "1"))
   1225		goto cleanup;
   1226
   1227	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
   1228	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
   1229		goto cleanup;
   1230
   1231	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
   1232	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
   1233		goto cleanup;
   1234
   1235	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
   1236		goto cleanup;
   1237
   1238	if (kill(safe_pid, SIGKILL))
   1239		goto cleanup;
   1240
   1241	ret = KSFT_PASS;
   1242
   1243cleanup:
   1244	if (memcg)
   1245		cg_destroy(memcg);
   1246	free(memcg);
   1247
   1248	return ret;
   1249}
   1250
   1251#define T(x) { x, #x }
   1252struct memcg_test {
   1253	int (*fn)(const char *root);
   1254	const char *name;
   1255} tests[] = {
   1256	T(test_memcg_subtree_control),
   1257	T(test_memcg_current),
   1258	T(test_memcg_min),
   1259	T(test_memcg_low),
   1260	T(test_memcg_high),
   1261	T(test_memcg_high_sync),
   1262	T(test_memcg_max),
   1263	T(test_memcg_reclaim),
   1264	T(test_memcg_oom_events),
   1265	T(test_memcg_swap_max),
   1266	T(test_memcg_sock),
   1267	T(test_memcg_oom_group_leaf_events),
   1268	T(test_memcg_oom_group_parent_events),
   1269	T(test_memcg_oom_group_score_events),
   1270};
   1271#undef T
   1272
   1273int main(int argc, char **argv)
   1274{
   1275	char root[PATH_MAX];
   1276	int i, proc_status, ret = EXIT_SUCCESS;
   1277
   1278	if (cg_find_unified_root(root, sizeof(root)))
   1279		ksft_exit_skip("cgroup v2 isn't mounted\n");
   1280
   1281	/*
   1282	 * Check that memory controller is available:
   1283	 * memory is listed in cgroup.controllers
   1284	 */
   1285	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
   1286		ksft_exit_skip("memory controller isn't available\n");
   1287
   1288	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
   1289		if (cg_write(root, "cgroup.subtree_control", "+memory"))
   1290			ksft_exit_skip("Failed to set memory controller\n");
   1291
   1292	proc_status = proc_mount_contains("memory_recursiveprot");
   1293	if (proc_status < 0)
   1294		ksft_exit_skip("Failed to query cgroup mount option\n");
   1295	has_recursiveprot = proc_status;
   1296
   1297	proc_status = proc_mount_contains("memory_localevents");
   1298	if (proc_status < 0)
   1299		ksft_exit_skip("Failed to query cgroup mount option\n");
   1300	has_localevents = proc_status;
   1301
   1302	for (i = 0; i < ARRAY_SIZE(tests); i++) {
   1303		switch (tests[i].fn(root)) {
   1304		case KSFT_PASS:
   1305			ksft_test_result_pass("%s\n", tests[i].name);
   1306			break;
   1307		case KSFT_SKIP:
   1308			ksft_test_result_skip("%s\n", tests[i].name);
   1309			break;
   1310		default:
   1311			ret = EXIT_FAILURE;
   1312			ksft_test_result_fail("%s\n", tests[i].name);
   1313			break;
   1314		}
   1315	}
   1316
   1317	return ret;
   1318}