cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

bench_ringbufs.c (13441B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c) 2020 Facebook */
      3#include <asm/barrier.h>
      4#include <linux/perf_event.h>
      5#include <linux/ring_buffer.h>
      6#include <sys/epoll.h>
      7#include <sys/mman.h>
      8#include <argp.h>
      9#include <stdlib.h>
     10#include "bench.h"
     11#include "ringbuf_bench.skel.h"
     12#include "perfbuf_bench.skel.h"
     13
     14static struct {
     15	bool back2back;
     16	int batch_cnt;
     17	bool sampled;
     18	int sample_rate;
     19	int ringbuf_sz; /* per-ringbuf, in bytes */
     20	bool ringbuf_use_output; /* use slower output API */
     21	int perfbuf_sz; /* per-CPU size, in pages */
     22} args = {
     23	.back2back = false,
     24	.batch_cnt = 500,
     25	.sampled = false,
     26	.sample_rate = 500,
     27	.ringbuf_sz = 512 * 1024,
     28	.ringbuf_use_output = false,
     29	.perfbuf_sz = 128,
     30};
     31
     32enum {
     33	ARG_RB_BACK2BACK = 2000,
     34	ARG_RB_USE_OUTPUT = 2001,
     35	ARG_RB_BATCH_CNT = 2002,
     36	ARG_RB_SAMPLED = 2003,
     37	ARG_RB_SAMPLE_RATE = 2004,
     38};
     39
     40static const struct argp_option opts[] = {
     41	{ "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
     42	{ "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
     43	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
     44	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
     45	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
     46	{},
     47};
     48
     49static error_t parse_arg(int key, char *arg, struct argp_state *state)
     50{
     51	switch (key) {
     52	case ARG_RB_BACK2BACK:
     53		args.back2back = true;
     54		break;
     55	case ARG_RB_USE_OUTPUT:
     56		args.ringbuf_use_output = true;
     57		break;
     58	case ARG_RB_BATCH_CNT:
     59		args.batch_cnt = strtol(arg, NULL, 10);
     60		if (args.batch_cnt < 0) {
     61			fprintf(stderr, "Invalid batch count.");
     62			argp_usage(state);
     63		}
     64		break;
     65	case ARG_RB_SAMPLED:
     66		args.sampled = true;
     67		break;
     68	case ARG_RB_SAMPLE_RATE:
     69		args.sample_rate = strtol(arg, NULL, 10);
     70		if (args.sample_rate < 0) {
     71			fprintf(stderr, "Invalid perfbuf sample rate.");
     72			argp_usage(state);
     73		}
     74		break;
     75	default:
     76		return ARGP_ERR_UNKNOWN;
     77	}
     78	return 0;
     79}
     80
     81/* exported into benchmark runner */
     82const struct argp bench_ringbufs_argp = {
     83	.options = opts,
     84	.parser = parse_arg,
     85};
     86
     87/* RINGBUF-LIBBPF benchmark */
     88
     89static struct counter buf_hits;
     90
     91static inline void bufs_trigger_batch(void)
     92{
     93	(void)syscall(__NR_getpgid);
     94}
     95
     96static void bufs_validate(void)
     97{
     98	if (env.consumer_cnt != 1) {
     99		fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
    100		exit(1);
    101	}
    102
    103	if (args.back2back && env.producer_cnt > 1) {
    104		fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
    105		exit(1);
    106	}
    107}
    108
    109static void *bufs_sample_producer(void *input)
    110{
    111	if (args.back2back) {
    112		/* initial batch to get everything started */
    113		bufs_trigger_batch();
    114		return NULL;
    115	}
    116
    117	while (true)
    118		bufs_trigger_batch();
    119	return NULL;
    120}
    121
    122static struct ringbuf_libbpf_ctx {
    123	struct ringbuf_bench *skel;
    124	struct ring_buffer *ringbuf;
    125} ringbuf_libbpf_ctx;
    126
    127static void ringbuf_libbpf_measure(struct bench_res *res)
    128{
    129	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
    130
    131	res->hits = atomic_swap(&buf_hits.value, 0);
    132	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
    133}
    134
    135static struct ringbuf_bench *ringbuf_setup_skeleton(void)
    136{
    137	struct ringbuf_bench *skel;
    138
    139	setup_libbpf();
    140
    141	skel = ringbuf_bench__open();
    142	if (!skel) {
    143		fprintf(stderr, "failed to open skeleton\n");
    144		exit(1);
    145	}
    146
    147	skel->rodata->batch_cnt = args.batch_cnt;
    148	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
    149
    150	if (args.sampled)
    151		/* record data + header take 16 bytes */
    152		skel->rodata->wakeup_data_size = args.sample_rate * 16;
    153
    154	bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
    155
    156	if (ringbuf_bench__load(skel)) {
    157		fprintf(stderr, "failed to load skeleton\n");
    158		exit(1);
    159	}
    160
    161	return skel;
    162}
    163
    164static int buf_process_sample(void *ctx, void *data, size_t len)
    165{
    166	atomic_inc(&buf_hits.value);
    167	return 0;
    168}
    169
    170static void ringbuf_libbpf_setup(void)
    171{
    172	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
    173	struct bpf_link *link;
    174
    175	ctx->skel = ringbuf_setup_skeleton();
    176	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
    177					buf_process_sample, NULL, NULL);
    178	if (!ctx->ringbuf) {
    179		fprintf(stderr, "failed to create ringbuf\n");
    180		exit(1);
    181	}
    182
    183	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
    184	if (!link) {
    185		fprintf(stderr, "failed to attach program!\n");
    186		exit(1);
    187	}
    188}
    189
    190static void *ringbuf_libbpf_consumer(void *input)
    191{
    192	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
    193
    194	while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
    195		if (args.back2back)
    196			bufs_trigger_batch();
    197	}
    198	fprintf(stderr, "ringbuf polling failed!\n");
    199	return NULL;
    200}
    201
    202/* RINGBUF-CUSTOM benchmark */
    203struct ringbuf_custom {
    204	__u64 *consumer_pos;
    205	__u64 *producer_pos;
    206	__u64 mask;
    207	void *data;
    208	int map_fd;
    209};
    210
    211static struct ringbuf_custom_ctx {
    212	struct ringbuf_bench *skel;
    213	struct ringbuf_custom ringbuf;
    214	int epoll_fd;
    215	struct epoll_event event;
    216} ringbuf_custom_ctx;
    217
    218static void ringbuf_custom_measure(struct bench_res *res)
    219{
    220	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
    221
    222	res->hits = atomic_swap(&buf_hits.value, 0);
    223	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
    224}
    225
    226static void ringbuf_custom_setup(void)
    227{
    228	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
    229	const size_t page_size = getpagesize();
    230	struct bpf_link *link;
    231	struct ringbuf_custom *r;
    232	void *tmp;
    233	int err;
    234
    235	ctx->skel = ringbuf_setup_skeleton();
    236
    237	ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
    238	if (ctx->epoll_fd < 0) {
    239		fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
    240		exit(1);
    241	}
    242
    243	r = &ctx->ringbuf;
    244	r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
    245	r->mask = args.ringbuf_sz - 1;
    246
    247	/* Map writable consumer page */
    248	tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
    249		   r->map_fd, 0);
    250	if (tmp == MAP_FAILED) {
    251		fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
    252		exit(1);
    253	}
    254	r->consumer_pos = tmp;
    255
    256	/* Map read-only producer page and data pages. */
    257	tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
    258		   r->map_fd, page_size);
    259	if (tmp == MAP_FAILED) {
    260		fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
    261		exit(1);
    262	}
    263	r->producer_pos = tmp;
    264	r->data = tmp + page_size;
    265
    266	ctx->event.events = EPOLLIN;
    267	err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
    268	if (err < 0) {
    269		fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
    270		exit(1);
    271	}
    272
    273	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
    274	if (!link) {
    275		fprintf(stderr, "failed to attach program\n");
    276		exit(1);
    277	}
    278}
    279
    280#define RINGBUF_BUSY_BIT (1 << 31)
    281#define RINGBUF_DISCARD_BIT (1 << 30)
    282#define RINGBUF_META_LEN 8
    283
    284static inline int roundup_len(__u32 len)
    285{
    286	/* clear out top 2 bits */
    287	len <<= 2;
    288	len >>= 2;
    289	/* add length prefix */
    290	len += RINGBUF_META_LEN;
    291	/* round up to 8 byte alignment */
    292	return (len + 7) / 8 * 8;
    293}
    294
    295static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
    296{
    297	unsigned long cons_pos, prod_pos;
    298	int *len_ptr, len;
    299	bool got_new_data;
    300
    301	cons_pos = smp_load_acquire(r->consumer_pos);
    302	while (true) {
    303		got_new_data = false;
    304		prod_pos = smp_load_acquire(r->producer_pos);
    305		while (cons_pos < prod_pos) {
    306			len_ptr = r->data + (cons_pos & r->mask);
    307			len = smp_load_acquire(len_ptr);
    308
    309			/* sample not committed yet, bail out for now */
    310			if (len & RINGBUF_BUSY_BIT)
    311				return;
    312
    313			got_new_data = true;
    314			cons_pos += roundup_len(len);
    315
    316			atomic_inc(&buf_hits.value);
    317		}
    318		if (got_new_data)
    319			smp_store_release(r->consumer_pos, cons_pos);
    320		else
    321			break;
    322	}
    323}
    324
    325static void *ringbuf_custom_consumer(void *input)
    326{
    327	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
    328	int cnt;
    329
    330	do {
    331		if (args.back2back)
    332			bufs_trigger_batch();
    333		cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
    334		if (cnt > 0)
    335			ringbuf_custom_process_ring(&ctx->ringbuf);
    336	} while (cnt >= 0);
    337	fprintf(stderr, "ringbuf polling failed!\n");
    338	return 0;
    339}
    340
    341/* PERFBUF-LIBBPF benchmark */
    342static struct perfbuf_libbpf_ctx {
    343	struct perfbuf_bench *skel;
    344	struct perf_buffer *perfbuf;
    345} perfbuf_libbpf_ctx;
    346
    347static void perfbuf_measure(struct bench_res *res)
    348{
    349	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
    350
    351	res->hits = atomic_swap(&buf_hits.value, 0);
    352	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
    353}
    354
    355static struct perfbuf_bench *perfbuf_setup_skeleton(void)
    356{
    357	struct perfbuf_bench *skel;
    358
    359	setup_libbpf();
    360
    361	skel = perfbuf_bench__open();
    362	if (!skel) {
    363		fprintf(stderr, "failed to open skeleton\n");
    364		exit(1);
    365	}
    366
    367	skel->rodata->batch_cnt = args.batch_cnt;
    368
    369	if (perfbuf_bench__load(skel)) {
    370		fprintf(stderr, "failed to load skeleton\n");
    371		exit(1);
    372	}
    373
    374	return skel;
    375}
    376
    377static enum bpf_perf_event_ret
    378perfbuf_process_sample_raw(void *input_ctx, int cpu,
    379			   struct perf_event_header *e)
    380{
    381	switch (e->type) {
    382	case PERF_RECORD_SAMPLE:
    383		atomic_inc(&buf_hits.value);
    384		break;
    385	case PERF_RECORD_LOST:
    386		break;
    387	default:
    388		return LIBBPF_PERF_EVENT_ERROR;
    389	}
    390	return LIBBPF_PERF_EVENT_CONT;
    391}
    392
    393static void perfbuf_libbpf_setup(void)
    394{
    395	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
    396	struct perf_event_attr attr;
    397	struct bpf_link *link;
    398
    399	ctx->skel = perfbuf_setup_skeleton();
    400
    401	memset(&attr, 0, sizeof(attr));
    402	attr.config = PERF_COUNT_SW_BPF_OUTPUT,
    403	attr.type = PERF_TYPE_SOFTWARE;
    404	attr.sample_type = PERF_SAMPLE_RAW;
    405	/* notify only every Nth sample */
    406	if (args.sampled) {
    407		attr.sample_period = args.sample_rate;
    408		attr.wakeup_events = args.sample_rate;
    409	} else {
    410		attr.sample_period = 1;
    411		attr.wakeup_events = 1;
    412	}
    413
    414	if (args.sample_rate > args.batch_cnt) {
    415		fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
    416			args.sample_rate, args.batch_cnt);
    417		exit(1);
    418	}
    419
    420	ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
    421					    args.perfbuf_sz, &attr,
    422					    perfbuf_process_sample_raw, NULL, NULL);
    423	if (!ctx->perfbuf) {
    424		fprintf(stderr, "failed to create perfbuf\n");
    425		exit(1);
    426	}
    427
    428	link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
    429	if (!link) {
    430		fprintf(stderr, "failed to attach program\n");
    431		exit(1);
    432	}
    433}
    434
    435static void *perfbuf_libbpf_consumer(void *input)
    436{
    437	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
    438
    439	while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
    440		if (args.back2back)
    441			bufs_trigger_batch();
    442	}
    443	fprintf(stderr, "perfbuf polling failed!\n");
    444	return NULL;
    445}
    446
    447/* PERFBUF-CUSTOM benchmark */
    448
    449/* copies of internal libbpf definitions */
    450struct perf_cpu_buf {
    451	struct perf_buffer *pb;
    452	void *base; /* mmap()'ed memory */
    453	void *buf; /* for reconstructing segmented data */
    454	size_t buf_size;
    455	int fd;
    456	int cpu;
    457	int map_key;
    458};
    459
    460struct perf_buffer {
    461	perf_buffer_event_fn event_cb;
    462	perf_buffer_sample_fn sample_cb;
    463	perf_buffer_lost_fn lost_cb;
    464	void *ctx; /* passed into callbacks */
    465
    466	size_t page_size;
    467	size_t mmap_size;
    468	struct perf_cpu_buf **cpu_bufs;
    469	struct epoll_event *events;
    470	int cpu_cnt; /* number of allocated CPU buffers */
    471	int epoll_fd; /* perf event FD */
    472	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
    473};
    474
    475static void *perfbuf_custom_consumer(void *input)
    476{
    477	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
    478	struct perf_buffer *pb = ctx->perfbuf;
    479	struct perf_cpu_buf *cpu_buf;
    480	struct perf_event_mmap_page *header;
    481	size_t mmap_mask = pb->mmap_size - 1;
    482	struct perf_event_header *ehdr;
    483	__u64 data_head, data_tail;
    484	size_t ehdr_size;
    485	void *base;
    486	int i, cnt;
    487
    488	while (true) {
    489		if (args.back2back)
    490			bufs_trigger_batch();
    491		cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
    492		if (cnt <= 0) {
    493			fprintf(stderr, "perf epoll failed: %d\n", -errno);
    494			exit(1);
    495		}
    496
    497		for (i = 0; i < cnt; ++i) {
    498			cpu_buf = pb->events[i].data.ptr;
    499			header = cpu_buf->base;
    500			base = ((void *)header) + pb->page_size;
    501
    502			data_head = ring_buffer_read_head(header);
    503			data_tail = header->data_tail;
    504			while (data_head != data_tail) {
    505				ehdr = base + (data_tail & mmap_mask);
    506				ehdr_size = ehdr->size;
    507
    508				if (ehdr->type == PERF_RECORD_SAMPLE)
    509					atomic_inc(&buf_hits.value);
    510
    511				data_tail += ehdr_size;
    512			}
    513			ring_buffer_write_tail(header, data_tail);
    514		}
    515	}
    516	return NULL;
    517}
    518
    519const struct bench bench_rb_libbpf = {
    520	.name = "rb-libbpf",
    521	.validate = bufs_validate,
    522	.setup = ringbuf_libbpf_setup,
    523	.producer_thread = bufs_sample_producer,
    524	.consumer_thread = ringbuf_libbpf_consumer,
    525	.measure = ringbuf_libbpf_measure,
    526	.report_progress = hits_drops_report_progress,
    527	.report_final = hits_drops_report_final,
    528};
    529
    530const struct bench bench_rb_custom = {
    531	.name = "rb-custom",
    532	.validate = bufs_validate,
    533	.setup = ringbuf_custom_setup,
    534	.producer_thread = bufs_sample_producer,
    535	.consumer_thread = ringbuf_custom_consumer,
    536	.measure = ringbuf_custom_measure,
    537	.report_progress = hits_drops_report_progress,
    538	.report_final = hits_drops_report_final,
    539};
    540
    541const struct bench bench_pb_libbpf = {
    542	.name = "pb-libbpf",
    543	.validate = bufs_validate,
    544	.setup = perfbuf_libbpf_setup,
    545	.producer_thread = bufs_sample_producer,
    546	.consumer_thread = perfbuf_libbpf_consumer,
    547	.measure = perfbuf_measure,
    548	.report_progress = hits_drops_report_progress,
    549	.report_final = hits_drops_report_final,
    550};
    551
    552const struct bench bench_pb_custom = {
    553	.name = "pb-custom",
    554	.validate = bufs_validate,
    555	.setup = perfbuf_libbpf_setup,
    556	.producer_thread = bufs_sample_producer,
    557	.consumer_thread = perfbuf_custom_consumer,
    558	.measure = perfbuf_measure,
    559	.report_progress = hits_drops_report_progress,
    560	.report_final = hits_drops_report_final,
    561};
    562