cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mount_setattr_test.c (38617B)


      1// SPDX-License-Identifier: GPL-2.0
      2#define _GNU_SOURCE
      3#include <sched.h>
      4#include <stdio.h>
      5#include <errno.h>
      6#include <pthread.h>
      7#include <string.h>
      8#include <sys/stat.h>
      9#include <sys/types.h>
     10#include <sys/mount.h>
     11#include <sys/wait.h>
     12#include <sys/vfs.h>
     13#include <sys/statvfs.h>
     14#include <sys/sysinfo.h>
     15#include <stdlib.h>
     16#include <unistd.h>
     17#include <fcntl.h>
     18#include <grp.h>
     19#include <stdbool.h>
     20#include <stdarg.h>
     21
     22#include "../kselftest_harness.h"
     23
     24#ifndef CLONE_NEWNS
     25#define CLONE_NEWNS 0x00020000
     26#endif
     27
     28#ifndef CLONE_NEWUSER
     29#define CLONE_NEWUSER 0x10000000
     30#endif
     31
     32#ifndef MS_REC
     33#define MS_REC 16384
     34#endif
     35
     36#ifndef MS_RELATIME
     37#define MS_RELATIME (1 << 21)
     38#endif
     39
     40#ifndef MS_STRICTATIME
     41#define MS_STRICTATIME (1 << 24)
     42#endif
     43
     44#ifndef MOUNT_ATTR_RDONLY
     45#define MOUNT_ATTR_RDONLY 0x00000001
     46#endif
     47
     48#ifndef MOUNT_ATTR_NOSUID
     49#define MOUNT_ATTR_NOSUID 0x00000002
     50#endif
     51
     52#ifndef MOUNT_ATTR_NOEXEC
     53#define MOUNT_ATTR_NOEXEC 0x00000008
     54#endif
     55
     56#ifndef MOUNT_ATTR_NODIRATIME
     57#define MOUNT_ATTR_NODIRATIME 0x00000080
     58#endif
     59
     60#ifndef MOUNT_ATTR__ATIME
     61#define MOUNT_ATTR__ATIME 0x00000070
     62#endif
     63
     64#ifndef MOUNT_ATTR_RELATIME
     65#define MOUNT_ATTR_RELATIME 0x00000000
     66#endif
     67
     68#ifndef MOUNT_ATTR_NOATIME
     69#define MOUNT_ATTR_NOATIME 0x00000010
     70#endif
     71
     72#ifndef MOUNT_ATTR_STRICTATIME
     73#define MOUNT_ATTR_STRICTATIME 0x00000020
     74#endif
     75
     76#ifndef AT_RECURSIVE
     77#define AT_RECURSIVE 0x8000
     78#endif
     79
     80#ifndef MS_SHARED
     81#define MS_SHARED (1 << 20)
     82#endif
     83
     84#define DEFAULT_THREADS 4
     85#define ptr_to_int(p) ((int)((intptr_t)(p)))
     86#define int_to_ptr(u) ((void *)((intptr_t)(u)))
     87
     88#ifndef __NR_mount_setattr
     89	#if defined __alpha__
     90		#define __NR_mount_setattr 552
     91	#elif defined _MIPS_SIM
     92		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
     93			#define __NR_mount_setattr (442 + 4000)
     94		#endif
     95		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
     96			#define __NR_mount_setattr (442 + 6000)
     97		#endif
     98		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
     99			#define __NR_mount_setattr (442 + 5000)
    100		#endif
    101	#elif defined __ia64__
    102		#define __NR_mount_setattr (442 + 1024)
    103	#else
    104		#define __NR_mount_setattr 442
    105	#endif
    106
    107struct mount_attr {
    108	__u64 attr_set;
    109	__u64 attr_clr;
    110	__u64 propagation;
    111	__u64 userns_fd;
    112};
    113#endif
    114
    115#ifndef __NR_open_tree
    116	#if defined __alpha__
    117		#define __NR_open_tree 538
    118	#elif defined _MIPS_SIM
    119		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
    120			#define __NR_open_tree 4428
    121		#endif
    122		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
    123			#define __NR_open_tree 6428
    124		#endif
    125		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
    126			#define __NR_open_tree 5428
    127		#endif
    128	#elif defined __ia64__
    129		#define __NR_open_tree (428 + 1024)
    130	#else
    131		#define __NR_open_tree 428
    132	#endif
    133#endif
    134
    135#ifndef MOUNT_ATTR_IDMAP
    136#define MOUNT_ATTR_IDMAP 0x00100000
    137#endif
    138
    139#ifndef MOUNT_ATTR_NOSYMFOLLOW
    140#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
    141#endif
    142
    143static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
    144				    struct mount_attr *attr, size_t size)
    145{
    146	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
    147}
    148
    149#ifndef OPEN_TREE_CLONE
    150#define OPEN_TREE_CLONE 1
    151#endif
    152
    153#ifndef OPEN_TREE_CLOEXEC
    154#define OPEN_TREE_CLOEXEC O_CLOEXEC
    155#endif
    156
    157#ifndef AT_RECURSIVE
    158#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
    159#endif
    160
    161static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
    162{
    163	return syscall(__NR_open_tree, dfd, filename, flags);
    164}
    165
    166static ssize_t write_nointr(int fd, const void *buf, size_t count)
    167{
    168	ssize_t ret;
    169
    170	do {
    171		ret = write(fd, buf, count);
    172	} while (ret < 0 && errno == EINTR);
    173
    174	return ret;
    175}
    176
    177static int write_file(const char *path, const void *buf, size_t count)
    178{
    179	int fd;
    180	ssize_t ret;
    181
    182	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
    183	if (fd < 0)
    184		return -1;
    185
    186	ret = write_nointr(fd, buf, count);
    187	close(fd);
    188	if (ret < 0 || (size_t)ret != count)
    189		return -1;
    190
    191	return 0;
    192}
    193
    194static int create_and_enter_userns(void)
    195{
    196	uid_t uid;
    197	gid_t gid;
    198	char map[100];
    199
    200	uid = getuid();
    201	gid = getgid();
    202
    203	if (unshare(CLONE_NEWUSER))
    204		return -1;
    205
    206	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
    207	    errno != ENOENT)
    208		return -1;
    209
    210	snprintf(map, sizeof(map), "0 %d 1", uid);
    211	if (write_file("/proc/self/uid_map", map, strlen(map)))
    212		return -1;
    213
    214
    215	snprintf(map, sizeof(map), "0 %d 1", gid);
    216	if (write_file("/proc/self/gid_map", map, strlen(map)))
    217		return -1;
    218
    219	if (setgid(0))
    220		return -1;
    221
    222	if (setuid(0))
    223		return -1;
    224
    225	return 0;
    226}
    227
    228static int prepare_unpriv_mountns(void)
    229{
    230	if (create_and_enter_userns())
    231		return -1;
    232
    233	if (unshare(CLONE_NEWNS))
    234		return -1;
    235
    236	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
    237		return -1;
    238
    239	return 0;
    240}
    241
    242#ifndef ST_NOSYMFOLLOW
    243#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
    244#endif
    245
    246static int read_mnt_flags(const char *path)
    247{
    248	int ret;
    249	struct statvfs stat;
    250	unsigned int mnt_flags;
    251
    252	ret = statvfs(path, &stat);
    253	if (ret != 0)
    254		return -EINVAL;
    255
    256	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
    257			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
    258			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
    259		return -EINVAL;
    260
    261	mnt_flags = 0;
    262	if (stat.f_flag & ST_RDONLY)
    263		mnt_flags |= MS_RDONLY;
    264	if (stat.f_flag & ST_NOSUID)
    265		mnt_flags |= MS_NOSUID;
    266	if (stat.f_flag & ST_NODEV)
    267		mnt_flags |= MS_NODEV;
    268	if (stat.f_flag & ST_NOEXEC)
    269		mnt_flags |= MS_NOEXEC;
    270	if (stat.f_flag & ST_NOATIME)
    271		mnt_flags |= MS_NOATIME;
    272	if (stat.f_flag & ST_NODIRATIME)
    273		mnt_flags |= MS_NODIRATIME;
    274	if (stat.f_flag & ST_RELATIME)
    275		mnt_flags |= MS_RELATIME;
    276	if (stat.f_flag & ST_SYNCHRONOUS)
    277		mnt_flags |= MS_SYNCHRONOUS;
    278	if (stat.f_flag & ST_MANDLOCK)
    279		mnt_flags |= ST_MANDLOCK;
    280	if (stat.f_flag & ST_NOSYMFOLLOW)
    281		mnt_flags |= ST_NOSYMFOLLOW;
    282
    283	return mnt_flags;
    284}
    285
    286static char *get_field(char *src, int nfields)
    287{
    288	int i;
    289	char *p = src;
    290
    291	for (i = 0; i < nfields; i++) {
    292		while (*p && *p != ' ' && *p != '\t')
    293			p++;
    294
    295		if (!*p)
    296			break;
    297
    298		p++;
    299	}
    300
    301	return p;
    302}
    303
    304static void null_endofword(char *word)
    305{
    306	while (*word && *word != ' ' && *word != '\t')
    307		word++;
    308	*word = '\0';
    309}
    310
    311static bool is_shared_mount(const char *path)
    312{
    313	size_t len = 0;
    314	char *line = NULL;
    315	FILE *f = NULL;
    316
    317	f = fopen("/proc/self/mountinfo", "re");
    318	if (!f)
    319		return false;
    320
    321	while (getline(&line, &len, f) != -1) {
    322		char *opts, *target;
    323
    324		target = get_field(line, 4);
    325		if (!target)
    326			continue;
    327
    328		opts = get_field(target, 2);
    329		if (!opts)
    330			continue;
    331
    332		null_endofword(target);
    333
    334		if (strcmp(target, path) != 0)
    335			continue;
    336
    337		null_endofword(opts);
    338		if (strstr(opts, "shared:"))
    339			return true;
    340	}
    341
    342	free(line);
    343	fclose(f);
    344
    345	return false;
    346}
    347
    348static void *mount_setattr_thread(void *data)
    349{
    350	struct mount_attr attr = {
    351		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
    352		.attr_clr	= 0,
    353		.propagation	= MS_SHARED,
    354	};
    355
    356	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
    357		pthread_exit(int_to_ptr(-1));
    358
    359	pthread_exit(int_to_ptr(0));
    360}
    361
    362/* Attempt to de-conflict with the selftests tree. */
    363#ifndef SKIP
    364#define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
    365#endif
    366
    367static bool mount_setattr_supported(void)
    368{
    369	int ret;
    370
    371	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
    372	if (ret < 0 && errno == ENOSYS)
    373		return false;
    374
    375	return true;
    376}
    377
    378FIXTURE(mount_setattr) {
    379};
    380
    381#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
    382#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
    383
    384FIXTURE_SETUP(mount_setattr)
    385{
    386	int fd = -EBADF;
    387
    388	if (!mount_setattr_supported())
    389		SKIP(return, "mount_setattr syscall not supported");
    390
    391	ASSERT_EQ(prepare_unpriv_mountns(), 0);
    392
    393	(void)umount2("/mnt", MNT_DETACH);
    394	(void)umount2("/tmp", MNT_DETACH);
    395
    396	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
    397			"size=100000,mode=700"), 0);
    398
    399	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
    400
    401	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
    402			"size=100000,mode=700"), 0);
    403
    404	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
    405
    406	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
    407			"size=100000,mode=700"), 0);
    408
    409	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
    410			"size=100000,mode=700"), 0);
    411
    412	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
    413
    414	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
    415			"size=100000,mode=700"), 0);
    416
    417	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
    418
    419	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
    420
    421	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
    422
    423	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
    424			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
    425
    426	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
    427
    428	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
    429			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
    430
    431	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
    432	ASSERT_GT(fd, 0);
    433	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
    434	ASSERT_EQ(close(fd), 0);
    435}
    436
    437FIXTURE_TEARDOWN(mount_setattr)
    438{
    439	if (!mount_setattr_supported())
    440		SKIP(return, "mount_setattr syscall not supported");
    441
    442	(void)umount2("/mnt/A", MNT_DETACH);
    443	(void)umount2("/tmp", MNT_DETACH);
    444}
    445
    446TEST_F(mount_setattr, invalid_attributes)
    447{
    448	struct mount_attr invalid_attr = {
    449		.attr_set = (1U << 31),
    450	};
    451
    452	if (!mount_setattr_supported())
    453		SKIP(return, "mount_setattr syscall not supported");
    454
    455	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
    456				    sizeof(invalid_attr)), 0);
    457
    458	invalid_attr.attr_set	= 0;
    459	invalid_attr.attr_clr	= (1U << 31);
    460	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
    461				    sizeof(invalid_attr)), 0);
    462
    463	invalid_attr.attr_clr		= 0;
    464	invalid_attr.propagation	= (1U << 31);
    465	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
    466				    sizeof(invalid_attr)), 0);
    467
    468	invalid_attr.attr_set		= (1U << 31);
    469	invalid_attr.attr_clr		= (1U << 31);
    470	invalid_attr.propagation	= (1U << 31);
    471	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
    472				    sizeof(invalid_attr)), 0);
    473
    474	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
    475				    sizeof(invalid_attr)), 0);
    476}
    477
    478TEST_F(mount_setattr, extensibility)
    479{
    480	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
    481	char *s = "dummy";
    482	struct mount_attr invalid_attr = {};
    483	struct mount_attr_large {
    484		struct mount_attr attr1;
    485		struct mount_attr attr2;
    486		struct mount_attr attr3;
    487	} large_attr = {};
    488
    489	if (!mount_setattr_supported())
    490		SKIP(return, "mount_setattr syscall not supported");
    491
    492	old_flags = read_mnt_flags("/mnt/A");
    493	ASSERT_GT(old_flags, 0);
    494
    495	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
    496				    sizeof(invalid_attr)), 0);
    497	ASSERT_EQ(errno, EFAULT);
    498
    499	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
    500				    sizeof(invalid_attr)), 0);
    501	ASSERT_EQ(errno, EINVAL);
    502
    503	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
    504	ASSERT_EQ(errno, EINVAL);
    505
    506	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
    507				    sizeof(invalid_attr) / 2), 0);
    508	ASSERT_EQ(errno, EINVAL);
    509
    510	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
    511				    sizeof(invalid_attr) / 2), 0);
    512	ASSERT_EQ(errno, EINVAL);
    513
    514	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
    515				    (void *)&large_attr, sizeof(large_attr)), 0);
    516
    517	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
    518	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
    519				    (void *)&large_attr, sizeof(large_attr)), 0);
    520
    521	large_attr.attr3.attr_set = 0;
    522	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
    523	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
    524				    (void *)&large_attr, sizeof(large_attr)), 0);
    525
    526	expected_flags = old_flags;
    527	expected_flags |= MS_RDONLY;
    528
    529	new_flags = read_mnt_flags("/mnt/A");
    530	ASSERT_EQ(new_flags, expected_flags);
    531
    532	new_flags = read_mnt_flags("/mnt/A/AA");
    533	ASSERT_EQ(new_flags, expected_flags);
    534
    535	new_flags = read_mnt_flags("/mnt/A/AA/B");
    536	ASSERT_EQ(new_flags, expected_flags);
    537
    538	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    539	ASSERT_EQ(new_flags, expected_flags);
    540}
    541
    542TEST_F(mount_setattr, basic)
    543{
    544	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
    545	struct mount_attr attr = {
    546		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
    547		.attr_clr	= MOUNT_ATTR__ATIME,
    548	};
    549
    550	if (!mount_setattr_supported())
    551		SKIP(return, "mount_setattr syscall not supported");
    552
    553	old_flags = read_mnt_flags("/mnt/A");
    554	ASSERT_GT(old_flags, 0);
    555
    556	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
    557
    558	expected_flags = old_flags;
    559	expected_flags |= MS_RDONLY;
    560	expected_flags |= MS_NOEXEC;
    561	expected_flags &= ~MS_NOATIME;
    562	expected_flags |= MS_RELATIME;
    563
    564	new_flags = read_mnt_flags("/mnt/A");
    565	ASSERT_EQ(new_flags, expected_flags);
    566
    567	new_flags = read_mnt_flags("/mnt/A/AA");
    568	ASSERT_EQ(new_flags, old_flags);
    569
    570	new_flags = read_mnt_flags("/mnt/A/AA/B");
    571	ASSERT_EQ(new_flags, old_flags);
    572
    573	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    574	ASSERT_EQ(new_flags, old_flags);
    575}
    576
    577TEST_F(mount_setattr, basic_recursive)
    578{
    579	int fd;
    580	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
    581	struct mount_attr attr = {
    582		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
    583		.attr_clr	= MOUNT_ATTR__ATIME,
    584	};
    585
    586	if (!mount_setattr_supported())
    587		SKIP(return, "mount_setattr syscall not supported");
    588
    589	old_flags = read_mnt_flags("/mnt/A");
    590	ASSERT_GT(old_flags, 0);
    591
    592	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    593
    594	expected_flags = old_flags;
    595	expected_flags |= MS_RDONLY;
    596	expected_flags |= MS_NOEXEC;
    597	expected_flags &= ~MS_NOATIME;
    598	expected_flags |= MS_RELATIME;
    599
    600	new_flags = read_mnt_flags("/mnt/A");
    601	ASSERT_EQ(new_flags, expected_flags);
    602
    603	new_flags = read_mnt_flags("/mnt/A/AA");
    604	ASSERT_EQ(new_flags, expected_flags);
    605
    606	new_flags = read_mnt_flags("/mnt/A/AA/B");
    607	ASSERT_EQ(new_flags, expected_flags);
    608
    609	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    610	ASSERT_EQ(new_flags, expected_flags);
    611
    612	memset(&attr, 0, sizeof(attr));
    613	attr.attr_clr = MOUNT_ATTR_RDONLY;
    614	attr.propagation = MS_SHARED;
    615	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    616
    617	expected_flags &= ~MS_RDONLY;
    618	new_flags = read_mnt_flags("/mnt/A");
    619	ASSERT_EQ(new_flags, expected_flags);
    620
    621	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
    622
    623	new_flags = read_mnt_flags("/mnt/A/AA");
    624	ASSERT_EQ(new_flags, expected_flags);
    625
    626	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
    627
    628	new_flags = read_mnt_flags("/mnt/A/AA/B");
    629	ASSERT_EQ(new_flags, expected_flags);
    630
    631	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
    632
    633	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    634	ASSERT_EQ(new_flags, expected_flags);
    635
    636	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
    637
    638	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
    639	ASSERT_GE(fd, 0);
    640
    641	/*
    642	 * We're holding a fd open for writing so this needs to fail somewhere
    643	 * in the middle and the mount options need to be unchanged.
    644	 */
    645	attr.attr_set = MOUNT_ATTR_RDONLY;
    646	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    647
    648	new_flags = read_mnt_flags("/mnt/A");
    649	ASSERT_EQ(new_flags, expected_flags);
    650
    651	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
    652
    653	new_flags = read_mnt_flags("/mnt/A/AA");
    654	ASSERT_EQ(new_flags, expected_flags);
    655
    656	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
    657
    658	new_flags = read_mnt_flags("/mnt/A/AA/B");
    659	ASSERT_EQ(new_flags, expected_flags);
    660
    661	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
    662
    663	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    664	ASSERT_EQ(new_flags, expected_flags);
    665
    666	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
    667
    668	EXPECT_EQ(close(fd), 0);
    669}
    670
    671TEST_F(mount_setattr, mount_has_writers)
    672{
    673	int fd, dfd;
    674	unsigned int old_flags = 0, new_flags = 0;
    675	struct mount_attr attr = {
    676		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
    677		.attr_clr	= MOUNT_ATTR__ATIME,
    678		.propagation	= MS_SHARED,
    679	};
    680
    681	if (!mount_setattr_supported())
    682		SKIP(return, "mount_setattr syscall not supported");
    683
    684	old_flags = read_mnt_flags("/mnt/A");
    685	ASSERT_GT(old_flags, 0);
    686
    687	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
    688	ASSERT_GE(fd, 0);
    689
    690	/*
    691	 * We're holding a fd open to a mount somwhere in the middle so this
    692	 * needs to fail somewhere in the middle. After this the mount options
    693	 * need to be unchanged.
    694	 */
    695	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    696
    697	new_flags = read_mnt_flags("/mnt/A");
    698	ASSERT_EQ(new_flags, old_flags);
    699
    700	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
    701
    702	new_flags = read_mnt_flags("/mnt/A/AA");
    703	ASSERT_EQ(new_flags, old_flags);
    704
    705	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
    706
    707	new_flags = read_mnt_flags("/mnt/A/AA/B");
    708	ASSERT_EQ(new_flags, old_flags);
    709
    710	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
    711
    712	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    713	ASSERT_EQ(new_flags, old_flags);
    714
    715	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
    716
    717	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
    718	ASSERT_GE(dfd, 0);
    719	EXPECT_EQ(fsync(dfd), 0);
    720	EXPECT_EQ(close(dfd), 0);
    721
    722	EXPECT_EQ(fsync(fd), 0);
    723	EXPECT_EQ(close(fd), 0);
    724
    725	/* All writers are gone so this should succeed. */
    726	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    727}
    728
    729TEST_F(mount_setattr, mixed_mount_options)
    730{
    731	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
    732	struct mount_attr attr = {
    733		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
    734		.attr_set = MOUNT_ATTR_RELATIME,
    735	};
    736
    737	if (!mount_setattr_supported())
    738		SKIP(return, "mount_setattr syscall not supported");
    739
    740	old_flags1 = read_mnt_flags("/mnt/B");
    741	ASSERT_GT(old_flags1, 0);
    742
    743	old_flags2 = read_mnt_flags("/mnt/B/BB");
    744	ASSERT_GT(old_flags2, 0);
    745
    746	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    747
    748	expected_flags = old_flags2;
    749	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
    750	expected_flags |= MS_RELATIME;
    751
    752	new_flags = read_mnt_flags("/mnt/B");
    753	ASSERT_EQ(new_flags, expected_flags);
    754
    755	expected_flags = old_flags2;
    756	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
    757	expected_flags |= MS_RELATIME;
    758
    759	new_flags = read_mnt_flags("/mnt/B/BB");
    760	ASSERT_EQ(new_flags, expected_flags);
    761}
    762
    763TEST_F(mount_setattr, time_changes)
    764{
    765	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
    766	struct mount_attr attr = {
    767		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
    768	};
    769
    770	if (!mount_setattr_supported())
    771		SKIP(return, "mount_setattr syscall not supported");
    772
    773	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    774
    775	attr.attr_set = MOUNT_ATTR_STRICTATIME;
    776	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    777
    778	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
    779	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    780
    781	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
    782	attr.attr_clr = MOUNT_ATTR__ATIME;
    783	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    784
    785	attr.attr_set = 0;
    786	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
    787	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    788
    789	attr.attr_clr = MOUNT_ATTR_NOATIME;
    790	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    791
    792	old_flags = read_mnt_flags("/mnt/A");
    793	ASSERT_GT(old_flags, 0);
    794
    795	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
    796	attr.attr_clr = MOUNT_ATTR__ATIME;
    797	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    798
    799	expected_flags = old_flags;
    800	expected_flags |= MS_NOATIME;
    801	expected_flags |= MS_NODIRATIME;
    802
    803	new_flags = read_mnt_flags("/mnt/A");
    804	ASSERT_EQ(new_flags, expected_flags);
    805
    806	new_flags = read_mnt_flags("/mnt/A/AA");
    807	ASSERT_EQ(new_flags, expected_flags);
    808
    809	new_flags = read_mnt_flags("/mnt/A/AA/B");
    810	ASSERT_EQ(new_flags, expected_flags);
    811
    812	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    813	ASSERT_EQ(new_flags, expected_flags);
    814
    815	memset(&attr, 0, sizeof(attr));
    816	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
    817	attr.attr_set |= MOUNT_ATTR_RELATIME;
    818	attr.attr_clr |= MOUNT_ATTR__ATIME;
    819	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    820
    821	expected_flags &= ~MS_NOATIME;
    822	expected_flags |= MS_RELATIME;
    823
    824	new_flags = read_mnt_flags("/mnt/A");
    825	ASSERT_EQ(new_flags, expected_flags);
    826
    827	new_flags = read_mnt_flags("/mnt/A/AA");
    828	ASSERT_EQ(new_flags, expected_flags);
    829
    830	new_flags = read_mnt_flags("/mnt/A/AA/B");
    831	ASSERT_EQ(new_flags, expected_flags);
    832
    833	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    834	ASSERT_EQ(new_flags, expected_flags);
    835
    836	memset(&attr, 0, sizeof(attr));
    837	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
    838	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
    839	attr.attr_clr |= MOUNT_ATTR__ATIME;
    840	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    841
    842	expected_flags &= ~MS_RELATIME;
    843
    844	new_flags = read_mnt_flags("/mnt/A");
    845	ASSERT_EQ(new_flags, expected_flags);
    846
    847	new_flags = read_mnt_flags("/mnt/A/AA");
    848	ASSERT_EQ(new_flags, expected_flags);
    849
    850	new_flags = read_mnt_flags("/mnt/A/AA/B");
    851	ASSERT_EQ(new_flags, expected_flags);
    852
    853	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    854	ASSERT_EQ(new_flags, expected_flags);
    855
    856	memset(&attr, 0, sizeof(attr));
    857	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
    858	attr.attr_set |= MOUNT_ATTR_NOATIME;
    859	attr.attr_clr |= MOUNT_ATTR__ATIME;
    860	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    861
    862	expected_flags |= MS_NOATIME;
    863	new_flags = read_mnt_flags("/mnt/A");
    864	ASSERT_EQ(new_flags, expected_flags);
    865
    866	new_flags = read_mnt_flags("/mnt/A/AA");
    867	ASSERT_EQ(new_flags, expected_flags);
    868
    869	new_flags = read_mnt_flags("/mnt/A/AA/B");
    870	ASSERT_EQ(new_flags, expected_flags);
    871
    872	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    873	ASSERT_EQ(new_flags, expected_flags);
    874
    875	memset(&attr, 0, sizeof(attr));
    876	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    877
    878	new_flags = read_mnt_flags("/mnt/A");
    879	ASSERT_EQ(new_flags, expected_flags);
    880
    881	new_flags = read_mnt_flags("/mnt/A/AA");
    882	ASSERT_EQ(new_flags, expected_flags);
    883
    884	new_flags = read_mnt_flags("/mnt/A/AA/B");
    885	ASSERT_EQ(new_flags, expected_flags);
    886
    887	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    888	ASSERT_EQ(new_flags, expected_flags);
    889
    890	memset(&attr, 0, sizeof(attr));
    891	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
    892	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
    893
    894	expected_flags &= ~MS_NODIRATIME;
    895
    896	new_flags = read_mnt_flags("/mnt/A");
    897	ASSERT_EQ(new_flags, expected_flags);
    898
    899	new_flags = read_mnt_flags("/mnt/A/AA");
    900	ASSERT_EQ(new_flags, expected_flags);
    901
    902	new_flags = read_mnt_flags("/mnt/A/AA/B");
    903	ASSERT_EQ(new_flags, expected_flags);
    904
    905	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    906	ASSERT_EQ(new_flags, expected_flags);
    907}
    908
    909TEST_F(mount_setattr, multi_threaded)
    910{
    911	int i, j, nthreads, ret = 0;
    912	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
    913	pthread_attr_t pattr;
    914	pthread_t threads[DEFAULT_THREADS];
    915
    916	if (!mount_setattr_supported())
    917		SKIP(return, "mount_setattr syscall not supported");
    918
    919	old_flags = read_mnt_flags("/mnt/A");
    920	ASSERT_GT(old_flags, 0);
    921
    922	/* Try to change mount options from multiple threads. */
    923	nthreads = get_nprocs_conf();
    924	if (nthreads > DEFAULT_THREADS)
    925		nthreads = DEFAULT_THREADS;
    926
    927	pthread_attr_init(&pattr);
    928	for (i = 0; i < nthreads; i++)
    929		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
    930
    931	for (j = 0; j < i; j++) {
    932		void *retptr = NULL;
    933
    934		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
    935
    936		ret += ptr_to_int(retptr);
    937		EXPECT_EQ(ret, 0);
    938	}
    939	pthread_attr_destroy(&pattr);
    940
    941	ASSERT_EQ(ret, 0);
    942
    943	expected_flags = old_flags;
    944	expected_flags |= MS_RDONLY;
    945	expected_flags |= MS_NOSUID;
    946	new_flags = read_mnt_flags("/mnt/A");
    947	ASSERT_EQ(new_flags, expected_flags);
    948
    949	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
    950
    951	new_flags = read_mnt_flags("/mnt/A/AA");
    952	ASSERT_EQ(new_flags, expected_flags);
    953
    954	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
    955
    956	new_flags = read_mnt_flags("/mnt/A/AA/B");
    957	ASSERT_EQ(new_flags, expected_flags);
    958
    959	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
    960
    961	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
    962	ASSERT_EQ(new_flags, expected_flags);
    963
    964	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
    965}
    966
    967TEST_F(mount_setattr, wrong_user_namespace)
    968{
    969	int ret;
    970	struct mount_attr attr = {
    971		.attr_set = MOUNT_ATTR_RDONLY,
    972	};
    973
    974	if (!mount_setattr_supported())
    975		SKIP(return, "mount_setattr syscall not supported");
    976
    977	EXPECT_EQ(create_and_enter_userns(), 0);
    978	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
    979	ASSERT_LT(ret, 0);
    980	ASSERT_EQ(errno, EPERM);
    981}
    982
    983TEST_F(mount_setattr, wrong_mount_namespace)
    984{
    985	int fd, ret;
    986	struct mount_attr attr = {
    987		.attr_set = MOUNT_ATTR_RDONLY,
    988	};
    989
    990	if (!mount_setattr_supported())
    991		SKIP(return, "mount_setattr syscall not supported");
    992
    993	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
    994	ASSERT_GE(fd, 0);
    995
    996	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
    997
    998	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
    999	ASSERT_LT(ret, 0);
   1000	ASSERT_EQ(errno, EINVAL);
   1001}
   1002
   1003FIXTURE(mount_setattr_idmapped) {
   1004};
   1005
   1006FIXTURE_SETUP(mount_setattr_idmapped)
   1007{
   1008	int img_fd = -EBADF;
   1009
   1010	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
   1011
   1012	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
   1013
   1014	(void)umount2("/mnt", MNT_DETACH);
   1015	(void)umount2("/tmp", MNT_DETACH);
   1016
   1017	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
   1018			"size=100000,mode=700"), 0);
   1019
   1020	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
   1021	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
   1022	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
   1023
   1024	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
   1025			"size=100000,mode=700"), 0);
   1026
   1027	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
   1028	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
   1029	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
   1030
   1031	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
   1032			"size=100000,mode=700"), 0);
   1033
   1034	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
   1035			"size=100000,mode=700"), 0);
   1036
   1037	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
   1038
   1039	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
   1040			"size=100000,mode=700"), 0);
   1041
   1042	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
   1043
   1044	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
   1045
   1046	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
   1047
   1048	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
   1049			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
   1050
   1051	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
   1052
   1053	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
   1054			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
   1055
   1056	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
   1057	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
   1058	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
   1059	ASSERT_GE(img_fd, 0);
   1060	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
   1061	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
   1062	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
   1063	ASSERT_EQ(close(img_fd), 0);
   1064}
   1065
   1066FIXTURE_TEARDOWN(mount_setattr_idmapped)
   1067{
   1068	(void)umount2("/mnt/A", MNT_DETACH);
   1069	(void)umount2("/tmp", MNT_DETACH);
   1070}
   1071
   1072/**
   1073 * Validate that negative fd values are rejected.
   1074 */
   1075TEST_F(mount_setattr_idmapped, invalid_fd_negative)
   1076{
   1077	struct mount_attr attr = {
   1078		.attr_set	= MOUNT_ATTR_IDMAP,
   1079		.userns_fd	= -EBADF,
   1080	};
   1081
   1082	if (!mount_setattr_supported())
   1083		SKIP(return, "mount_setattr syscall not supported");
   1084
   1085	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
   1086		TH_LOG("failure: created idmapped mount with negative fd");
   1087	}
   1088}
   1089
   1090/**
   1091 * Validate that excessively large fd values are rejected.
   1092 */
   1093TEST_F(mount_setattr_idmapped, invalid_fd_large)
   1094{
   1095	struct mount_attr attr = {
   1096		.attr_set	= MOUNT_ATTR_IDMAP,
   1097		.userns_fd	= INT64_MAX,
   1098	};
   1099
   1100	if (!mount_setattr_supported())
   1101		SKIP(return, "mount_setattr syscall not supported");
   1102
   1103	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
   1104		TH_LOG("failure: created idmapped mount with too large fd value");
   1105	}
   1106}
   1107
   1108/**
   1109 * Validate that closed fd values are rejected.
   1110 */
   1111TEST_F(mount_setattr_idmapped, invalid_fd_closed)
   1112{
   1113	int fd;
   1114	struct mount_attr attr = {
   1115		.attr_set = MOUNT_ATTR_IDMAP,
   1116	};
   1117
   1118	if (!mount_setattr_supported())
   1119		SKIP(return, "mount_setattr syscall not supported");
   1120
   1121	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
   1122	ASSERT_GE(fd, 0);
   1123	ASSERT_GE(close(fd), 0);
   1124
   1125	attr.userns_fd = fd;
   1126	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
   1127		TH_LOG("failure: created idmapped mount with closed fd");
   1128	}
   1129}
   1130
   1131/**
   1132 * Validate that the initial user namespace is rejected.
   1133 */
   1134TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
   1135{
   1136	int open_tree_fd = -EBADF;
   1137	struct mount_attr attr = {
   1138		.attr_set = MOUNT_ATTR_IDMAP,
   1139	};
   1140
   1141	if (!mount_setattr_supported())
   1142		SKIP(return, "mount_setattr syscall not supported");
   1143
   1144	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
   1145				     AT_NO_AUTOMOUNT |
   1146				     AT_SYMLINK_NOFOLLOW |
   1147				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
   1148	ASSERT_GE(open_tree_fd, 0);
   1149
   1150	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
   1151	ASSERT_GE(attr.userns_fd, 0);
   1152	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
   1153	ASSERT_EQ(errno, EPERM);
   1154	ASSERT_EQ(close(attr.userns_fd), 0);
   1155	ASSERT_EQ(close(open_tree_fd), 0);
   1156}
   1157
   1158static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
   1159		   unsigned long range)
   1160{
   1161	char map[100], procfile[256];
   1162
   1163	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
   1164	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
   1165	if (write_file(procfile, map, strlen(map)))
   1166		return -1;
   1167
   1168
   1169	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
   1170	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
   1171	if (write_file(procfile, map, strlen(map)))
   1172		return -1;
   1173
   1174	return 0;
   1175}
   1176
   1177#define __STACK_SIZE (8 * 1024 * 1024)
   1178static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
   1179{
   1180	void *stack;
   1181
   1182	stack = malloc(__STACK_SIZE);
   1183	if (!stack)
   1184		return -ENOMEM;
   1185
   1186#ifdef __ia64__
   1187	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
   1188#else
   1189	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
   1190#endif
   1191}
   1192
   1193static int get_userns_fd_cb(void *data)
   1194{
   1195	return kill(getpid(), SIGSTOP);
   1196}
   1197
   1198static int wait_for_pid(pid_t pid)
   1199{
   1200	int status, ret;
   1201
   1202again:
   1203	ret = waitpid(pid, &status, 0);
   1204	if (ret == -1) {
   1205		if (errno == EINTR)
   1206			goto again;
   1207
   1208		return -1;
   1209	}
   1210
   1211	if (!WIFEXITED(status))
   1212		return -1;
   1213
   1214	return WEXITSTATUS(status);
   1215}
   1216
   1217static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
   1218{
   1219	int ret;
   1220	pid_t pid;
   1221	char path[256];
   1222
   1223	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
   1224	if (pid < 0)
   1225		return -errno;
   1226
   1227	ret = map_ids(pid, nsid, hostid, range);
   1228	if (ret < 0)
   1229		return ret;
   1230
   1231	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
   1232	ret = open(path, O_RDONLY | O_CLOEXEC);
   1233	kill(pid, SIGKILL);
   1234	wait_for_pid(pid);
   1235	return ret;
   1236}
   1237
   1238/**
   1239 * Validate that an attached mount in our mount namespace cannot be idmapped.
   1240 * (The kernel enforces that the mount's mount namespace and the caller's mount
   1241 *  namespace match.)
   1242 */
   1243TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
   1244{
   1245	int open_tree_fd = -EBADF;
   1246	struct mount_attr attr = {
   1247		.attr_set = MOUNT_ATTR_IDMAP,
   1248	};
   1249
   1250	if (!mount_setattr_supported())
   1251		SKIP(return, "mount_setattr syscall not supported");
   1252
   1253	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
   1254				     AT_EMPTY_PATH |
   1255				     AT_NO_AUTOMOUNT |
   1256				     AT_SYMLINK_NOFOLLOW |
   1257				     OPEN_TREE_CLOEXEC);
   1258	ASSERT_GE(open_tree_fd, 0);
   1259
   1260	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
   1261	ASSERT_GE(attr.userns_fd, 0);
   1262	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
   1263	ASSERT_EQ(close(attr.userns_fd), 0);
   1264	ASSERT_EQ(close(open_tree_fd), 0);
   1265}
   1266
   1267/**
   1268 * Validate that idmapping a mount is rejected if the mount's mount namespace
   1269 * and our mount namespace don't match.
   1270 * (The kernel enforces that the mount's mount namespace and the caller's mount
   1271 *  namespace match.)
   1272 */
   1273TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
   1274{
   1275	int open_tree_fd = -EBADF;
   1276	struct mount_attr attr = {
   1277		.attr_set = MOUNT_ATTR_IDMAP,
   1278	};
   1279
   1280	if (!mount_setattr_supported())
   1281		SKIP(return, "mount_setattr syscall not supported");
   1282
   1283	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
   1284				     AT_EMPTY_PATH |
   1285				     AT_NO_AUTOMOUNT |
   1286				     AT_SYMLINK_NOFOLLOW |
   1287				     OPEN_TREE_CLOEXEC);
   1288	ASSERT_GE(open_tree_fd, 0);
   1289
   1290	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
   1291
   1292	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
   1293	ASSERT_GE(attr.userns_fd, 0);
   1294	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
   1295				    sizeof(attr)), 0);
   1296	ASSERT_EQ(close(attr.userns_fd), 0);
   1297	ASSERT_EQ(close(open_tree_fd), 0);
   1298}
   1299
   1300/**
   1301 * Validate that an attached mount in our mount namespace can be idmapped.
   1302 */
   1303TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
   1304{
   1305	int open_tree_fd = -EBADF;
   1306	struct mount_attr attr = {
   1307		.attr_set = MOUNT_ATTR_IDMAP,
   1308	};
   1309
   1310	if (!mount_setattr_supported())
   1311		SKIP(return, "mount_setattr syscall not supported");
   1312
   1313	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
   1314				     AT_EMPTY_PATH |
   1315				     AT_NO_AUTOMOUNT |
   1316				     AT_SYMLINK_NOFOLLOW |
   1317				     OPEN_TREE_CLOEXEC |
   1318				     OPEN_TREE_CLONE);
   1319	ASSERT_GE(open_tree_fd, 0);
   1320
   1321	/* Changing mount properties on a detached mount. */
   1322	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
   1323	ASSERT_GE(attr.userns_fd, 0);
   1324	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
   1325				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
   1326	ASSERT_EQ(close(attr.userns_fd), 0);
   1327	ASSERT_EQ(close(open_tree_fd), 0);
   1328}
   1329
   1330/**
   1331 * Validate that a detached mount not in our mount namespace can be idmapped.
   1332 */
   1333TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
   1334{
   1335	int open_tree_fd = -EBADF;
   1336	struct mount_attr attr = {
   1337		.attr_set = MOUNT_ATTR_IDMAP,
   1338	};
   1339
   1340	if (!mount_setattr_supported())
   1341		SKIP(return, "mount_setattr syscall not supported");
   1342
   1343	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
   1344				     AT_EMPTY_PATH |
   1345				     AT_NO_AUTOMOUNT |
   1346				     AT_SYMLINK_NOFOLLOW |
   1347				     OPEN_TREE_CLOEXEC |
   1348				     OPEN_TREE_CLONE);
   1349	ASSERT_GE(open_tree_fd, 0);
   1350
   1351	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
   1352
   1353	/* Changing mount properties on a detached mount. */
   1354	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
   1355	ASSERT_GE(attr.userns_fd, 0);
   1356	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
   1357				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
   1358	ASSERT_EQ(close(attr.userns_fd), 0);
   1359	ASSERT_EQ(close(open_tree_fd), 0);
   1360}
   1361
   1362/**
   1363 * Validate that currently changing the idmapping of an idmapped mount fails.
   1364 */
   1365TEST_F(mount_setattr_idmapped, change_idmapping)
   1366{
   1367	int open_tree_fd = -EBADF;
   1368	struct mount_attr attr = {
   1369		.attr_set = MOUNT_ATTR_IDMAP,
   1370	};
   1371
   1372	if (!mount_setattr_supported())
   1373		SKIP(return, "mount_setattr syscall not supported");
   1374
   1375	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
   1376				     AT_EMPTY_PATH |
   1377				     AT_NO_AUTOMOUNT |
   1378				     AT_SYMLINK_NOFOLLOW |
   1379				     OPEN_TREE_CLOEXEC |
   1380				     OPEN_TREE_CLONE);
   1381	ASSERT_GE(open_tree_fd, 0);
   1382
   1383	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
   1384	ASSERT_GE(attr.userns_fd, 0);
   1385	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
   1386				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
   1387	ASSERT_EQ(close(attr.userns_fd), 0);
   1388
   1389	/* Change idmapping on a detached mount that is already idmapped. */
   1390	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
   1391	ASSERT_GE(attr.userns_fd, 0);
   1392	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
   1393	ASSERT_EQ(close(attr.userns_fd), 0);
   1394	ASSERT_EQ(close(open_tree_fd), 0);
   1395}
   1396
   1397static bool expected_uid_gid(int dfd, const char *path, int flags,
   1398			     uid_t expected_uid, gid_t expected_gid)
   1399{
   1400	int ret;
   1401	struct stat st;
   1402
   1403	ret = fstatat(dfd, path, &st, flags);
   1404	if (ret < 0)
   1405		return false;
   1406
   1407	return st.st_uid == expected_uid && st.st_gid == expected_gid;
   1408}
   1409
   1410TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
   1411{
   1412	int open_tree_fd = -EBADF;
   1413	struct mount_attr attr = {
   1414		.attr_set = MOUNT_ATTR_IDMAP,
   1415	};
   1416
   1417	if (!mount_setattr_supported())
   1418		SKIP(return, "mount_setattr syscall not supported");
   1419
   1420	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
   1421	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
   1422
   1423	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
   1424				     AT_RECURSIVE |
   1425				     AT_EMPTY_PATH |
   1426				     AT_NO_AUTOMOUNT |
   1427				     AT_SYMLINK_NOFOLLOW |
   1428				     OPEN_TREE_CLOEXEC |
   1429				     OPEN_TREE_CLONE);
   1430	ASSERT_GE(open_tree_fd, 0);
   1431
   1432	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
   1433	ASSERT_GE(attr.userns_fd, 0);
   1434	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
   1435	ASSERT_EQ(close(attr.userns_fd), 0);
   1436	ASSERT_EQ(close(open_tree_fd), 0);
   1437
   1438	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
   1439	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
   1440	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
   1441	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
   1442}
   1443
   1444TEST_F(mount_setattr, mount_attr_nosymfollow)
   1445{
   1446	int fd;
   1447	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
   1448	struct mount_attr attr = {
   1449		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
   1450	};
   1451
   1452	if (!mount_setattr_supported())
   1453		SKIP(return, "mount_setattr syscall not supported");
   1454
   1455	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
   1456	ASSERT_GT(fd, 0);
   1457	ASSERT_EQ(close(fd), 0);
   1458
   1459	old_flags = read_mnt_flags("/mnt/A");
   1460	ASSERT_GT(old_flags, 0);
   1461
   1462	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
   1463
   1464	expected_flags = old_flags;
   1465	expected_flags |= ST_NOSYMFOLLOW;
   1466
   1467	new_flags = read_mnt_flags("/mnt/A");
   1468	ASSERT_EQ(new_flags, expected_flags);
   1469
   1470	new_flags = read_mnt_flags("/mnt/A/AA");
   1471	ASSERT_EQ(new_flags, expected_flags);
   1472
   1473	new_flags = read_mnt_flags("/mnt/A/AA/B");
   1474	ASSERT_EQ(new_flags, expected_flags);
   1475
   1476	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
   1477	ASSERT_EQ(new_flags, expected_flags);
   1478
   1479	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
   1480	ASSERT_LT(fd, 0);
   1481	ASSERT_EQ(errno, ELOOP);
   1482
   1483	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
   1484	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
   1485
   1486	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
   1487
   1488	expected_flags &= ~ST_NOSYMFOLLOW;
   1489	new_flags = read_mnt_flags("/mnt/A");
   1490	ASSERT_EQ(new_flags, expected_flags);
   1491
   1492	new_flags = read_mnt_flags("/mnt/A/AA");
   1493	ASSERT_EQ(new_flags, expected_flags);
   1494
   1495	new_flags = read_mnt_flags("/mnt/A/AA/B");
   1496	ASSERT_EQ(new_flags, expected_flags);
   1497
   1498	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
   1499	ASSERT_EQ(new_flags, expected_flags);
   1500
   1501	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
   1502	ASSERT_GT(fd, 0);
   1503	ASSERT_EQ(close(fd), 0);
   1504}
   1505
   1506TEST_HARNESS_MAIN