cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

io_uring-bench.c (12614B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Simple benchmark program that uses the various features of io_uring
      4 * to provide fast random access to a device/file. It has various
      5 * options that are control how we use io_uring, see the OPTIONS section
      6 * below. This uses the raw io_uring interface.
      7 *
      8 * Copyright (C) 2018-2019 Jens Axboe
      9 */
     10#include <stdio.h>
     11#include <errno.h>
     12#include <assert.h>
     13#include <stdlib.h>
     14#include <stddef.h>
     15#include <signal.h>
     16#include <inttypes.h>
     17
     18#include <sys/types.h>
     19#include <sys/stat.h>
     20#include <sys/ioctl.h>
     21#include <sys/syscall.h>
     22#include <sys/resource.h>
     23#include <sys/mman.h>
     24#include <sys/uio.h>
     25#include <linux/fs.h>
     26#include <fcntl.h>
     27#include <unistd.h>
     28#include <string.h>
     29#include <pthread.h>
     30#include <sched.h>
     31
     32#include "liburing.h"
     33#include "barrier.h"
     34
     35#define min(a, b)		((a < b) ? (a) : (b))
     36
     37struct io_sq_ring {
     38	unsigned *head;
     39	unsigned *tail;
     40	unsigned *ring_mask;
     41	unsigned *ring_entries;
     42	unsigned *flags;
     43	unsigned *array;
     44};
     45
     46struct io_cq_ring {
     47	unsigned *head;
     48	unsigned *tail;
     49	unsigned *ring_mask;
     50	unsigned *ring_entries;
     51	struct io_uring_cqe *cqes;
     52};
     53
     54#define DEPTH			128
     55
     56#define BATCH_SUBMIT		32
     57#define BATCH_COMPLETE		32
     58
     59#define BS			4096
     60
     61#define MAX_FDS			16
     62
     63static unsigned sq_ring_mask, cq_ring_mask;
     64
     65struct file {
     66	unsigned long max_blocks;
     67	unsigned pending_ios;
     68	int real_fd;
     69	int fixed_fd;
     70};
     71
     72struct submitter {
     73	pthread_t thread;
     74	int ring_fd;
     75	struct drand48_data rand;
     76	struct io_sq_ring sq_ring;
     77	struct io_uring_sqe *sqes;
     78	struct iovec iovecs[DEPTH];
     79	struct io_cq_ring cq_ring;
     80	int inflight;
     81	unsigned long reaps;
     82	unsigned long done;
     83	unsigned long calls;
     84	volatile int finish;
     85
     86	__s32 *fds;
     87
     88	struct file files[MAX_FDS];
     89	unsigned nr_files;
     90	unsigned cur_file;
     91};
     92
     93static struct submitter submitters[1];
     94static volatile int finish;
     95
     96/*
     97 * OPTIONS: Set these to test the various features of io_uring.
     98 */
     99static int polled = 1;		/* use IO polling */
    100static int fixedbufs = 1;	/* use fixed user buffers */
    101static int register_files = 1;	/* use fixed files */
    102static int buffered = 0;	/* use buffered IO, not O_DIRECT */
    103static int sq_thread_poll = 0;	/* use kernel submission/poller thread */
    104static int sq_thread_cpu = -1;	/* pin above thread to this CPU */
    105static int do_nop = 0;		/* no-op SQ ring commands */
    106
    107static int io_uring_register_buffers(struct submitter *s)
    108{
    109	if (do_nop)
    110		return 0;
    111
    112	return io_uring_register(s->ring_fd, IORING_REGISTER_BUFFERS, s->iovecs,
    113					DEPTH);
    114}
    115
    116static int io_uring_register_files(struct submitter *s)
    117{
    118	unsigned i;
    119
    120	if (do_nop)
    121		return 0;
    122
    123	s->fds = calloc(s->nr_files, sizeof(__s32));
    124	for (i = 0; i < s->nr_files; i++) {
    125		s->fds[i] = s->files[i].real_fd;
    126		s->files[i].fixed_fd = i;
    127	}
    128
    129	return io_uring_register(s->ring_fd, IORING_REGISTER_FILES, s->fds,
    130					s->nr_files);
    131}
    132
    133static int lk_gettid(void)
    134{
    135	return syscall(__NR_gettid);
    136}
    137
    138static unsigned file_depth(struct submitter *s)
    139{
    140	return (DEPTH + s->nr_files - 1) / s->nr_files;
    141}
    142
    143static void init_io(struct submitter *s, unsigned index)
    144{
    145	struct io_uring_sqe *sqe = &s->sqes[index];
    146	unsigned long offset;
    147	struct file *f;
    148	long r;
    149
    150	if (do_nop) {
    151		sqe->opcode = IORING_OP_NOP;
    152		return;
    153	}
    154
    155	if (s->nr_files == 1) {
    156		f = &s->files[0];
    157	} else {
    158		f = &s->files[s->cur_file];
    159		if (f->pending_ios >= file_depth(s)) {
    160			s->cur_file++;
    161			if (s->cur_file == s->nr_files)
    162				s->cur_file = 0;
    163			f = &s->files[s->cur_file];
    164		}
    165	}
    166	f->pending_ios++;
    167
    168	lrand48_r(&s->rand, &r);
    169	offset = (r % (f->max_blocks - 1)) * BS;
    170
    171	if (register_files) {
    172		sqe->flags = IOSQE_FIXED_FILE;
    173		sqe->fd = f->fixed_fd;
    174	} else {
    175		sqe->flags = 0;
    176		sqe->fd = f->real_fd;
    177	}
    178	if (fixedbufs) {
    179		sqe->opcode = IORING_OP_READ_FIXED;
    180		sqe->addr = (unsigned long) s->iovecs[index].iov_base;
    181		sqe->len = BS;
    182		sqe->buf_index = index;
    183	} else {
    184		sqe->opcode = IORING_OP_READV;
    185		sqe->addr = (unsigned long) &s->iovecs[index];
    186		sqe->len = 1;
    187		sqe->buf_index = 0;
    188	}
    189	sqe->ioprio = 0;
    190	sqe->off = offset;
    191	sqe->user_data = (unsigned long) f;
    192}
    193
    194static int prep_more_ios(struct submitter *s, unsigned max_ios)
    195{
    196	struct io_sq_ring *ring = &s->sq_ring;
    197	unsigned index, tail, next_tail, prepped = 0;
    198
    199	next_tail = tail = *ring->tail;
    200	do {
    201		next_tail++;
    202		read_barrier();
    203		if (next_tail == *ring->head)
    204			break;
    205
    206		index = tail & sq_ring_mask;
    207		init_io(s, index);
    208		ring->array[index] = index;
    209		prepped++;
    210		tail = next_tail;
    211	} while (prepped < max_ios);
    212
    213	if (*ring->tail != tail) {
    214		/* order tail store with writes to sqes above */
    215		write_barrier();
    216		*ring->tail = tail;
    217		write_barrier();
    218	}
    219	return prepped;
    220}
    221
    222static int get_file_size(struct file *f)
    223{
    224	struct stat st;
    225
    226	if (fstat(f->real_fd, &st) < 0)
    227		return -1;
    228	if (S_ISBLK(st.st_mode)) {
    229		unsigned long long bytes;
    230
    231		if (ioctl(f->real_fd, BLKGETSIZE64, &bytes) != 0)
    232			return -1;
    233
    234		f->max_blocks = bytes / BS;
    235		return 0;
    236	} else if (S_ISREG(st.st_mode)) {
    237		f->max_blocks = st.st_size / BS;
    238		return 0;
    239	}
    240
    241	return -1;
    242}
    243
    244static int reap_events(struct submitter *s)
    245{
    246	struct io_cq_ring *ring = &s->cq_ring;
    247	struct io_uring_cqe *cqe;
    248	unsigned head, reaped = 0;
    249
    250	head = *ring->head;
    251	do {
    252		struct file *f;
    253
    254		read_barrier();
    255		if (head == *ring->tail)
    256			break;
    257		cqe = &ring->cqes[head & cq_ring_mask];
    258		if (!do_nop) {
    259			f = (struct file *) (uintptr_t) cqe->user_data;
    260			f->pending_ios--;
    261			if (cqe->res != BS) {
    262				printf("io: unexpected ret=%d\n", cqe->res);
    263				if (polled && cqe->res == -EOPNOTSUPP)
    264					printf("Your filesystem doesn't support poll\n");
    265				return -1;
    266			}
    267		}
    268		reaped++;
    269		head++;
    270	} while (1);
    271
    272	s->inflight -= reaped;
    273	*ring->head = head;
    274	write_barrier();
    275	return reaped;
    276}
    277
    278static void *submitter_fn(void *data)
    279{
    280	struct submitter *s = data;
    281	struct io_sq_ring *ring = &s->sq_ring;
    282	int ret, prepped;
    283
    284	printf("submitter=%d\n", lk_gettid());
    285
    286	srand48_r(pthread_self(), &s->rand);
    287
    288	prepped = 0;
    289	do {
    290		int to_wait, to_submit, this_reap, to_prep;
    291
    292		if (!prepped && s->inflight < DEPTH) {
    293			to_prep = min(DEPTH - s->inflight, BATCH_SUBMIT);
    294			prepped = prep_more_ios(s, to_prep);
    295		}
    296		s->inflight += prepped;
    297submit_more:
    298		to_submit = prepped;
    299submit:
    300		if (to_submit && (s->inflight + to_submit <= DEPTH))
    301			to_wait = 0;
    302		else
    303			to_wait = min(s->inflight + to_submit, BATCH_COMPLETE);
    304
    305		/*
    306		 * Only need to call io_uring_enter if we're not using SQ thread
    307		 * poll, or if IORING_SQ_NEED_WAKEUP is set.
    308		 */
    309		if (!sq_thread_poll || (*ring->flags & IORING_SQ_NEED_WAKEUP)) {
    310			unsigned flags = 0;
    311
    312			if (to_wait)
    313				flags = IORING_ENTER_GETEVENTS;
    314			if ((*ring->flags & IORING_SQ_NEED_WAKEUP))
    315				flags |= IORING_ENTER_SQ_WAKEUP;
    316			ret = io_uring_enter(s->ring_fd, to_submit, to_wait,
    317						flags, NULL);
    318			s->calls++;
    319		}
    320
    321		/*
    322		 * For non SQ thread poll, we already got the events we needed
    323		 * through the io_uring_enter() above. For SQ thread poll, we
    324		 * need to loop here until we find enough events.
    325		 */
    326		this_reap = 0;
    327		do {
    328			int r;
    329			r = reap_events(s);
    330			if (r == -1) {
    331				s->finish = 1;
    332				break;
    333			} else if (r > 0)
    334				this_reap += r;
    335		} while (sq_thread_poll && this_reap < to_wait);
    336		s->reaps += this_reap;
    337
    338		if (ret >= 0) {
    339			if (!ret) {
    340				to_submit = 0;
    341				if (s->inflight)
    342					goto submit;
    343				continue;
    344			} else if (ret < to_submit) {
    345				int diff = to_submit - ret;
    346
    347				s->done += ret;
    348				prepped -= diff;
    349				goto submit_more;
    350			}
    351			s->done += ret;
    352			prepped = 0;
    353			continue;
    354		} else if (ret < 0) {
    355			if (errno == EAGAIN) {
    356				if (s->finish)
    357					break;
    358				if (this_reap)
    359					goto submit;
    360				to_submit = 0;
    361				goto submit;
    362			}
    363			printf("io_submit: %s\n", strerror(errno));
    364			break;
    365		}
    366	} while (!s->finish);
    367
    368	finish = 1;
    369	return NULL;
    370}
    371
    372static void sig_int(int sig)
    373{
    374	printf("Exiting on signal %d\n", sig);
    375	submitters[0].finish = 1;
    376	finish = 1;
    377}
    378
    379static void arm_sig_int(void)
    380{
    381	struct sigaction act;
    382
    383	memset(&act, 0, sizeof(act));
    384	act.sa_handler = sig_int;
    385	act.sa_flags = SA_RESTART;
    386	sigaction(SIGINT, &act, NULL);
    387}
    388
    389static int setup_ring(struct submitter *s)
    390{
    391	struct io_sq_ring *sring = &s->sq_ring;
    392	struct io_cq_ring *cring = &s->cq_ring;
    393	struct io_uring_params p;
    394	int ret, fd;
    395	void *ptr;
    396
    397	memset(&p, 0, sizeof(p));
    398
    399	if (polled && !do_nop)
    400		p.flags |= IORING_SETUP_IOPOLL;
    401	if (sq_thread_poll) {
    402		p.flags |= IORING_SETUP_SQPOLL;
    403		if (sq_thread_cpu != -1) {
    404			p.flags |= IORING_SETUP_SQ_AFF;
    405			p.sq_thread_cpu = sq_thread_cpu;
    406		}
    407	}
    408
    409	fd = io_uring_setup(DEPTH, &p);
    410	if (fd < 0) {
    411		perror("io_uring_setup");
    412		return 1;
    413	}
    414	s->ring_fd = fd;
    415
    416	if (fixedbufs) {
    417		ret = io_uring_register_buffers(s);
    418		if (ret < 0) {
    419			perror("io_uring_register_buffers");
    420			return 1;
    421		}
    422	}
    423
    424	if (register_files) {
    425		ret = io_uring_register_files(s);
    426		if (ret < 0) {
    427			perror("io_uring_register_files");
    428			return 1;
    429		}
    430	}
    431
    432	ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
    433			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
    434			IORING_OFF_SQ_RING);
    435	printf("sq_ring ptr = 0x%p\n", ptr);
    436	sring->head = ptr + p.sq_off.head;
    437	sring->tail = ptr + p.sq_off.tail;
    438	sring->ring_mask = ptr + p.sq_off.ring_mask;
    439	sring->ring_entries = ptr + p.sq_off.ring_entries;
    440	sring->flags = ptr + p.sq_off.flags;
    441	sring->array = ptr + p.sq_off.array;
    442	sq_ring_mask = *sring->ring_mask;
    443
    444	s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
    445			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
    446			IORING_OFF_SQES);
    447	printf("sqes ptr    = 0x%p\n", s->sqes);
    448
    449	ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
    450			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
    451			IORING_OFF_CQ_RING);
    452	printf("cq_ring ptr = 0x%p\n", ptr);
    453	cring->head = ptr + p.cq_off.head;
    454	cring->tail = ptr + p.cq_off.tail;
    455	cring->ring_mask = ptr + p.cq_off.ring_mask;
    456	cring->ring_entries = ptr + p.cq_off.ring_entries;
    457	cring->cqes = ptr + p.cq_off.cqes;
    458	cq_ring_mask = *cring->ring_mask;
    459	return 0;
    460}
    461
    462static void file_depths(char *buf)
    463{
    464	struct submitter *s = &submitters[0];
    465	unsigned i;
    466	char *p;
    467
    468	buf[0] = '\0';
    469	p = buf;
    470	for (i = 0; i < s->nr_files; i++) {
    471		struct file *f = &s->files[i];
    472
    473		if (i + 1 == s->nr_files)
    474			p += sprintf(p, "%d", f->pending_ios);
    475		else
    476			p += sprintf(p, "%d, ", f->pending_ios);
    477	}
    478}
    479
    480int main(int argc, char *argv[])
    481{
    482	struct submitter *s = &submitters[0];
    483	unsigned long done, calls, reap;
    484	int err, i, flags, fd;
    485	char *fdepths;
    486	void *ret;
    487
    488	if (!do_nop && argc < 2) {
    489		printf("%s: filename\n", argv[0]);
    490		return 1;
    491	}
    492
    493	flags = O_RDONLY | O_NOATIME;
    494	if (!buffered)
    495		flags |= O_DIRECT;
    496
    497	i = 1;
    498	while (!do_nop && i < argc) {
    499		struct file *f;
    500
    501		if (s->nr_files == MAX_FDS) {
    502			printf("Max number of files (%d) reached\n", MAX_FDS);
    503			break;
    504		}
    505		fd = open(argv[i], flags);
    506		if (fd < 0) {
    507			perror("open");
    508			return 1;
    509		}
    510
    511		f = &s->files[s->nr_files];
    512		f->real_fd = fd;
    513		if (get_file_size(f)) {
    514			printf("failed getting size of device/file\n");
    515			return 1;
    516		}
    517		if (f->max_blocks <= 1) {
    518			printf("Zero file/device size?\n");
    519			return 1;
    520		}
    521		f->max_blocks--;
    522
    523		printf("Added file %s\n", argv[i]);
    524		s->nr_files++;
    525		i++;
    526	}
    527
    528	if (fixedbufs) {
    529		struct rlimit rlim;
    530
    531		rlim.rlim_cur = RLIM_INFINITY;
    532		rlim.rlim_max = RLIM_INFINITY;
    533		if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
    534			perror("setrlimit");
    535			return 1;
    536		}
    537	}
    538
    539	arm_sig_int();
    540
    541	for (i = 0; i < DEPTH; i++) {
    542		void *buf;
    543
    544		if (posix_memalign(&buf, BS, BS)) {
    545			printf("failed alloc\n");
    546			return 1;
    547		}
    548		s->iovecs[i].iov_base = buf;
    549		s->iovecs[i].iov_len = BS;
    550	}
    551
    552	err = setup_ring(s);
    553	if (err) {
    554		printf("ring setup failed: %s, %d\n", strerror(errno), err);
    555		return 1;
    556	}
    557	printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
    558	printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
    559
    560	pthread_create(&s->thread, NULL, submitter_fn, s);
    561
    562	fdepths = malloc(8 * s->nr_files);
    563	reap = calls = done = 0;
    564	do {
    565		unsigned long this_done = 0;
    566		unsigned long this_reap = 0;
    567		unsigned long this_call = 0;
    568		unsigned long rpc = 0, ipc = 0;
    569
    570		sleep(1);
    571		this_done += s->done;
    572		this_call += s->calls;
    573		this_reap += s->reaps;
    574		if (this_call - calls) {
    575			rpc = (this_done - done) / (this_call - calls);
    576			ipc = (this_reap - reap) / (this_call - calls);
    577		} else
    578			rpc = ipc = -1;
    579		file_depths(fdepths);
    580		printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n",
    581				this_done - done, rpc, ipc, s->inflight,
    582				fdepths);
    583		done = this_done;
    584		calls = this_call;
    585		reap = this_reap;
    586	} while (!finish);
    587
    588	pthread_join(s->thread, &ret);
    589	close(s->ring_fd);
    590	free(fdepths);
    591	return 0;
    592}