cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

user-trap.c (8127B)


      1#include <signal.h>
      2#include <stdio.h>
      3#include <stdlib.h>
      4#include <unistd.h>
      5#include <errno.h>
      6#include <fcntl.h>
      7#include <string.h>
      8#include <stddef.h>
      9#include <sys/sysmacros.h>
     10#include <sys/types.h>
     11#include <sys/wait.h>
     12#include <sys/socket.h>
     13#include <sys/stat.h>
     14#include <sys/mman.h>
     15#include <sys/syscall.h>
     16#include <sys/user.h>
     17#include <sys/ioctl.h>
     18#include <sys/ptrace.h>
     19#include <sys/mount.h>
     20#include <linux/limits.h>
     21#include <linux/filter.h>
     22#include <linux/seccomp.h>
     23
     24#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
     25
     26static int seccomp(unsigned int op, unsigned int flags, void *args)
     27{
     28	errno = 0;
     29	return syscall(__NR_seccomp, op, flags, args);
     30}
     31
     32static int send_fd(int sock, int fd)
     33{
     34	struct msghdr msg = {};
     35	struct cmsghdr *cmsg;
     36	char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
     37	struct iovec io = {
     38		.iov_base = &c,
     39		.iov_len = 1,
     40	};
     41
     42	msg.msg_iov = &io;
     43	msg.msg_iovlen = 1;
     44	msg.msg_control = buf;
     45	msg.msg_controllen = sizeof(buf);
     46	cmsg = CMSG_FIRSTHDR(&msg);
     47	cmsg->cmsg_level = SOL_SOCKET;
     48	cmsg->cmsg_type = SCM_RIGHTS;
     49	cmsg->cmsg_len = CMSG_LEN(sizeof(int));
     50	*((int *)CMSG_DATA(cmsg)) = fd;
     51	msg.msg_controllen = cmsg->cmsg_len;
     52
     53	if (sendmsg(sock, &msg, 0) < 0) {
     54		perror("sendmsg");
     55		return -1;
     56	}
     57
     58	return 0;
     59}
     60
     61static int recv_fd(int sock)
     62{
     63	struct msghdr msg = {};
     64	struct cmsghdr *cmsg;
     65	char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
     66	struct iovec io = {
     67		.iov_base = &c,
     68		.iov_len = 1,
     69	};
     70
     71	msg.msg_iov = &io;
     72	msg.msg_iovlen = 1;
     73	msg.msg_control = buf;
     74	msg.msg_controllen = sizeof(buf);
     75
     76	if (recvmsg(sock, &msg, 0) < 0) {
     77		perror("recvmsg");
     78		return -1;
     79	}
     80
     81	cmsg = CMSG_FIRSTHDR(&msg);
     82
     83	return *((int *)CMSG_DATA(cmsg));
     84}
     85
     86static int user_trap_syscall(int nr, unsigned int flags)
     87{
     88	struct sock_filter filter[] = {
     89		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
     90			offsetof(struct seccomp_data, nr)),
     91		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
     92		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
     93		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
     94	};
     95
     96	struct sock_fprog prog = {
     97		.len = (unsigned short)ARRAY_SIZE(filter),
     98		.filter = filter,
     99	};
    100
    101	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
    102}
    103
    104static int handle_req(struct seccomp_notif *req,
    105		      struct seccomp_notif_resp *resp, int listener)
    106{
    107	char path[PATH_MAX], source[PATH_MAX], target[PATH_MAX];
    108	int ret = -1, mem;
    109
    110	resp->id = req->id;
    111	resp->error = -EPERM;
    112	resp->val = 0;
    113
    114	if (req->data.nr != __NR_mount) {
    115		fprintf(stderr, "huh? trapped something besides mount? %d\n", req->data.nr);
    116		return -1;
    117	}
    118
    119	/* Only allow bind mounts. */
    120	if (!(req->data.args[3] & MS_BIND))
    121		return 0;
    122
    123	/*
    124	 * Ok, let's read the task's memory to see where they wanted their
    125	 * mount to go.
    126	 */
    127	snprintf(path, sizeof(path), "/proc/%d/mem", req->pid);
    128	mem = open(path, O_RDONLY);
    129	if (mem < 0) {
    130		perror("open mem");
    131		return -1;
    132	}
    133
    134	/*
    135	 * Now we avoid a TOCTOU: we referred to a pid by its pid, but since
    136	 * the pid that made the syscall may have died, we need to confirm that
    137	 * the pid is still valid after we open its /proc/pid/mem file. We can
    138	 * ask the listener fd this as follows.
    139	 *
    140	 * Note that this check should occur *after* any task-specific
    141	 * resources are opened, to make sure that the task has not died and
    142	 * we're not wrongly reading someone else's state in order to make
    143	 * decisions.
    144	 */
    145	if (ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req->id) < 0) {
    146		fprintf(stderr, "task died before we could map its memory\n");
    147		goto out;
    148	}
    149
    150	/*
    151	 * Phew, we've got the right /proc/pid/mem. Now we can read it. Note
    152	 * that to avoid another TOCTOU, we should read all of the pointer args
    153	 * before we decide to allow the syscall.
    154	 */
    155	if (lseek(mem, req->data.args[0], SEEK_SET) < 0) {
    156		perror("seek");
    157		goto out;
    158	}
    159
    160	ret = read(mem, source, sizeof(source));
    161	if (ret < 0) {
    162		perror("read");
    163		goto out;
    164	}
    165
    166	if (lseek(mem, req->data.args[1], SEEK_SET) < 0) {
    167		perror("seek");
    168		goto out;
    169	}
    170
    171	ret = read(mem, target, sizeof(target));
    172	if (ret < 0) {
    173		perror("read");
    174		goto out;
    175	}
    176
    177	/*
    178	 * Our policy is to only allow bind mounts inside /tmp. This isn't very
    179	 * interesting, because we could do unprivlieged bind mounts with user
    180	 * namespaces already, but you get the idea.
    181	 */
    182	if (!strncmp(source, "/tmp/", 5) && !strncmp(target, "/tmp/", 5)) {
    183		if (mount(source, target, NULL, req->data.args[3], NULL) < 0) {
    184			ret = -1;
    185			perror("actual mount");
    186			goto out;
    187		}
    188		resp->error = 0;
    189	}
    190
    191	/* Even if we didn't allow it because of policy, generating the
    192	 * response was be a success, because we want to tell the worker EPERM.
    193	 */
    194	ret = 0;
    195
    196out:
    197	close(mem);
    198	return ret;
    199}
    200
    201int main(void)
    202{
    203	int sk_pair[2], ret = 1, status, listener;
    204	pid_t worker = 0 , tracer = 0;
    205
    206	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair) < 0) {
    207		perror("socketpair");
    208		return 1;
    209	}
    210
    211	worker = fork();
    212	if (worker < 0) {
    213		perror("fork");
    214		goto close_pair;
    215	}
    216
    217	if (worker == 0) {
    218		listener = user_trap_syscall(__NR_mount,
    219					     SECCOMP_FILTER_FLAG_NEW_LISTENER);
    220		if (listener < 0) {
    221			perror("seccomp");
    222			exit(1);
    223		}
    224
    225		/*
    226		 * Drop privileges. We definitely can't mount as uid 1000.
    227		 */
    228		if (setuid(1000) < 0) {
    229			perror("setuid");
    230			exit(1);
    231		}
    232
    233		/*
    234		 * Send the listener to the parent; also serves as
    235		 * synchronization.
    236		 */
    237		if (send_fd(sk_pair[1], listener) < 0)
    238			exit(1);
    239		close(listener);
    240
    241		if (mkdir("/tmp/foo", 0755) < 0) {
    242			perror("mkdir");
    243			exit(1);
    244		}
    245
    246		/*
    247		 * Try a bad mount just for grins.
    248		 */
    249		if (mount("/dev/sda", "/tmp/foo", NULL, 0, NULL) != -1) {
    250			fprintf(stderr, "huh? mounted /dev/sda?\n");
    251			exit(1);
    252		}
    253
    254		if (errno != EPERM) {
    255			perror("bad error from mount");
    256			exit(1);
    257		}
    258
    259		/*
    260		 * Ok, we expect this one to succeed.
    261		 */
    262		if (mount("/tmp/foo", "/tmp/foo", NULL, MS_BIND, NULL) < 0) {
    263			perror("mount");
    264			exit(1);
    265		}
    266
    267		exit(0);
    268	}
    269
    270	/*
    271	 * Get the listener from the child.
    272	 */
    273	listener = recv_fd(sk_pair[0]);
    274	if (listener < 0)
    275		goto out_kill;
    276
    277	/*
    278	 * Fork a task to handle the requests. This isn't strictly necessary,
    279	 * but it makes the particular writing of this sample easier, since we
    280	 * can just wait ofr the tracee to exit and kill the tracer.
    281	 */
    282	tracer = fork();
    283	if (tracer < 0) {
    284		perror("fork");
    285		goto out_kill;
    286	}
    287
    288	if (tracer == 0) {
    289		struct seccomp_notif *req;
    290		struct seccomp_notif_resp *resp;
    291		struct seccomp_notif_sizes sizes;
    292
    293		if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) < 0) {
    294			perror("seccomp(GET_NOTIF_SIZES)");
    295			goto out_close;
    296		}
    297
    298		req = malloc(sizes.seccomp_notif);
    299		if (!req)
    300			goto out_close;
    301
    302		resp = malloc(sizes.seccomp_notif_resp);
    303		if (!resp)
    304			goto out_req;
    305		memset(resp, 0, sizes.seccomp_notif_resp);
    306
    307		while (1) {
    308			memset(req, 0, sizes.seccomp_notif);
    309			if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
    310				perror("ioctl recv");
    311				goto out_resp;
    312			}
    313
    314			if (handle_req(req, resp, listener) < 0)
    315				goto out_resp;
    316
    317			/*
    318			 * ENOENT here means that the task may have gotten a
    319			 * signal and restarted the syscall. It's up to the
    320			 * handler to decide what to do in this case, but for
    321			 * the sample code, we just ignore it. Probably
    322			 * something better should happen, like undoing the
    323			 * mount, or keeping track of the args to make sure we
    324			 * don't do it again.
    325			 */
    326			if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, resp) < 0 &&
    327			    errno != ENOENT) {
    328				perror("ioctl send");
    329				goto out_resp;
    330			}
    331		}
    332out_resp:
    333		free(resp);
    334out_req:
    335		free(req);
    336out_close:
    337		close(listener);
    338		exit(1);
    339	}
    340
    341	close(listener);
    342
    343	if (waitpid(worker, &status, 0) != worker) {
    344		perror("waitpid");
    345		goto out_kill;
    346	}
    347
    348	if (umount2("/tmp/foo", MNT_DETACH) < 0 && errno != EINVAL) {
    349		perror("umount2");
    350		goto out_kill;
    351	}
    352
    353	if (remove("/tmp/foo") < 0 && errno != ENOENT) {
    354		perror("remove");
    355		exit(1);
    356	}
    357
    358	if (!WIFEXITED(status) || WEXITSTATUS(status)) {
    359		fprintf(stderr, "worker exited nonzero\n");
    360		goto out_kill;
    361	}
    362
    363	ret = 0;
    364
    365out_kill:
    366	if (tracer > 0)
    367		kill(tracer, SIGKILL);
    368	if (worker > 0)
    369		kill(worker, SIGKILL);
    370
    371close_pair:
    372	close(sk_pair[0]);
    373	close(sk_pair[1]);
    374	return ret;
    375}