cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

strobemeta.h (16948B)


      1// SPDX-License-Identifier: GPL-2.0
      2// Copyright (c) 2019 Facebook
      3
      4#include <stdint.h>
      5#include <stddef.h>
      6#include <stdbool.h>
      7#include <linux/bpf.h>
      8#include <linux/ptrace.h>
      9#include <linux/sched.h>
     10#include <linux/types.h>
     11#include <bpf/bpf_helpers.h>
     12
     13typedef uint32_t pid_t;
     14struct task_struct {};
     15
     16#define TASK_COMM_LEN 16
     17#define PERF_MAX_STACK_DEPTH 127
     18
     19#define STROBE_TYPE_INVALID 0
     20#define STROBE_TYPE_INT 1
     21#define STROBE_TYPE_STR 2
     22#define STROBE_TYPE_MAP 3
     23
     24#define STACK_TABLE_EPOCH_SHIFT 20
     25#define STROBE_MAX_STR_LEN 1
     26#define STROBE_MAX_CFGS 32
     27#define STROBE_MAX_PAYLOAD						\
     28	(STROBE_MAX_STRS * STROBE_MAX_STR_LEN +				\
     29	STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
     30
     31struct strobe_value_header {
     32	/*
     33	 * meaning depends on type:
     34	 * 1. int: 0, if value not set, 1 otherwise
     35	 * 2. str: 1 always, whether value is set or not is determined by ptr
     36	 * 3. map: 1 always, pointer points to additional struct with number
     37	 *    of entries (up to STROBE_MAX_MAP_ENTRIES)
     38	 */
     39	uint16_t len;
     40	/*
     41	 * _reserved might be used for some future fields/flags, but we always
     42	 * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
     43	 * bytes in one go and get both header and value
     44	 */
     45	uint8_t _reserved[6];
     46};
     47
     48/*
     49 * strobe_value_generic is used from BPF probe only, but needs to be a union
     50 * of strobe_value_int/strobe_value_str/strobe_value_map
     51 */
     52struct strobe_value_generic {
     53	struct strobe_value_header header;
     54	union {
     55		int64_t val;
     56		void *ptr;
     57	};
     58};
     59
     60struct strobe_value_int {
     61	struct strobe_value_header header;
     62	int64_t value;
     63};
     64
     65struct strobe_value_str {
     66	struct strobe_value_header header;
     67	const char* value;
     68};
     69
     70struct strobe_value_map {
     71	struct strobe_value_header header;
     72	const struct strobe_map_raw* value;
     73};
     74
     75struct strobe_map_entry {
     76	const char* key;
     77	const char* val;
     78};
     79
     80/*
     81 * Map of C-string key/value pairs with fixed maximum capacity. Each map has
     82 * corresponding int64 ID, which application can use (or ignore) in whatever
     83 * way appropriate. Map is "write-only", there is no way to get data out of
     84 * map. Map is intended to be used to provide metadata for profilers and is
     85 * not to be used for internal in-app communication. All methods are
     86 * thread-safe.
     87 */
     88struct strobe_map_raw {
     89	/*
     90	 * general purpose unique ID that's up to application to decide
     91	 * whether and how to use; for request metadata use case id is unique
     92	 * request ID that's used to match metadata with stack traces on
     93	 * Strobelight backend side
     94	 */
     95	int64_t id;
     96	/* number of used entries in map */
     97	int64_t cnt;
     98	/*
     99	 * having volatile doesn't change anything on BPF side, but clang
    100	 * emits warnings for passing `volatile const char *` into
    101	 * bpf_probe_read_user_str that expects just `const char *`
    102	 */
    103	const char* tag;
    104	/*
    105	 * key/value entries, each consisting of 2 pointers to key and value
    106	 * C strings
    107	 */
    108	struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
    109};
    110
    111/* Following values define supported values of TLS mode */
    112#define TLS_NOT_SET -1
    113#define TLS_LOCAL_EXEC 0
    114#define TLS_IMM_EXEC 1
    115#define TLS_GENERAL_DYN 2
    116
    117/*
    118 * structure that universally represents TLS location (both for static
    119 * executables and shared libraries)
    120 */
    121struct strobe_value_loc {
    122	/*
    123	 * tls_mode defines what TLS mode was used for particular metavariable:
    124	 * - -1 (TLS_NOT_SET) - no metavariable;
    125	 * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
    126	 * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
    127	 * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
    128	 * Local Dynamic mode is not yet supported, because never seen in
    129	 * practice.  Mode defines how offset field is interpreted. See
    130	 * calc_location() in below for details.
    131	 */
    132	int64_t tls_mode;
    133	/*
    134	 * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
    135	 * tpidr_el0 for aarch64).
    136	 * TLS_IMM_EXEC: absolute address of GOT entry containing offset
    137	 * from thread pointer;
    138	 * TLS_GENERAL_DYN: absolute addres of double GOT entry
    139	 * containing tls_index_t struct;
    140	 */
    141	int64_t offset;
    142};
    143
    144struct strobemeta_cfg {
    145	int64_t req_meta_idx;
    146	struct strobe_value_loc int_locs[STROBE_MAX_INTS];
    147	struct strobe_value_loc str_locs[STROBE_MAX_STRS];
    148	struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
    149};
    150
    151struct strobe_map_descr {
    152	uint64_t id;
    153	int16_t tag_len;
    154	/*
    155	 * cnt <0 - map value isn't set;
    156	 * 0 - map has id set, but no key/value entries
    157	 */
    158	int16_t cnt;
    159	/*
    160	 * both key_lens[i] and val_lens[i] should be >0 for present key/value
    161	 * entry
    162	 */
    163	uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
    164	uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
    165};
    166
    167struct strobemeta_payload {
    168	/* req_id has valid request ID, if req_meta_valid == 1 */
    169	int64_t req_id;
    170	uint8_t req_meta_valid;
    171	/*
    172	 * mask has Nth bit set to 1, if Nth metavar was present and
    173	 * successfully read
    174	 */
    175	uint64_t int_vals_set_mask;
    176	int64_t int_vals[STROBE_MAX_INTS];
    177	/* len is >0 for present values */
    178	uint16_t str_lens[STROBE_MAX_STRS];
    179	/* if map_descrs[i].cnt == -1, metavar is not present/set */
    180	struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
    181	/*
    182	 * payload has compactly packed values of str and map variables in the
    183	 * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
    184	 * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
    185	 * value length
    186	 */
    187	char payload[STROBE_MAX_PAYLOAD];
    188};
    189
    190struct strobelight_bpf_sample {
    191	uint64_t ktime;
    192	char comm[TASK_COMM_LEN];
    193	pid_t pid;
    194	int user_stack_id;
    195	int kernel_stack_id;
    196	int has_meta;
    197	struct strobemeta_payload metadata;
    198	/*
    199	 * makes it possible to pass (<real payload size> + 1) as data size to
    200	 * perf_submit() to avoid perf_submit's paranoia about passing zero as
    201	 * size, as it deduces that <real payload size> might be
    202	 * **theoretically** zero
    203	 */
    204	char dummy_safeguard;
    205};
    206
    207struct {
    208	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
    209	__uint(max_entries, 32);
    210	__uint(key_size, sizeof(int));
    211	__uint(value_size, sizeof(int));
    212} samples SEC(".maps");
    213
    214struct {
    215	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
    216	__uint(max_entries, 16);
    217	__uint(key_size, sizeof(uint32_t));
    218	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
    219} stacks_0 SEC(".maps");
    220
    221struct {
    222	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
    223	__uint(max_entries, 16);
    224	__uint(key_size, sizeof(uint32_t));
    225	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
    226} stacks_1 SEC(".maps");
    227
    228struct {
    229	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
    230	__uint(max_entries, 1);
    231	__type(key, uint32_t);
    232	__type(value, struct strobelight_bpf_sample);
    233} sample_heap SEC(".maps");
    234
    235struct {
    236	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
    237	__uint(max_entries, STROBE_MAX_CFGS);
    238	__type(key, pid_t);
    239	__type(value, struct strobemeta_cfg);
    240} strobemeta_cfgs SEC(".maps");
    241
    242/* Type for the dtv.  */
    243/* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
    244typedef union dtv {
    245	size_t counter;
    246	struct {
    247		void* val;
    248		bool is_static;
    249	} pointer;
    250} dtv_t;
    251
    252/* Partial definition for tcbhead_t */
    253/* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
    254struct tcbhead {
    255	void* tcb;
    256	dtv_t* dtv;
    257};
    258
    259/*
    260 * TLS module/offset information for shared library case.
    261 * For x86-64, this is mapped onto two entries in GOT.
    262 * For aarch64, this is pointed to by second GOT entry.
    263 */
    264struct tls_index {
    265	uint64_t module;
    266	uint64_t offset;
    267};
    268
    269#ifdef SUBPROGS
    270__noinline
    271#else
    272__always_inline
    273#endif
    274static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
    275{
    276	/*
    277	 * tls_mode value is:
    278	 * - -1 (TLS_NOT_SET), if no metavar is present;
    279	 * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
    280	 * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
    281	 * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
    282	 * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
    283	 * This schema allows to use something like:
    284	 * (tls_mode + 1) * (tls_base + offset)
    285	 * to get NULL for "no metavar" location, or correct pointer for local
    286	 * executable mode without doing extra ifs.
    287	 */
    288	if (loc->tls_mode <= TLS_LOCAL_EXEC) {
    289		/* static executable is simple, we just have offset from
    290		 * tls_base */
    291		void *addr = tls_base + loc->offset;
    292		/* multiply by (tls_mode + 1) to get NULL, if we have no
    293		 * metavar in this slot */
    294		return (void *)((loc->tls_mode + 1) * (int64_t)addr);
    295	}
    296	/*
    297	 * Other modes are more complicated, we need to jump through few hoops.
    298	 *
    299	 * For immediate executable mode (currently supported only for aarch64):
    300	 *  - loc->offset is pointing to a GOT entry containing fixed offset
    301	 *  relative to tls_base;
    302	 *
    303	 * For general dynamic mode:
    304	 *  - loc->offset is pointing to a beginning of double GOT entries;
    305	 *  - (for aarch64 only) second entry points to tls_index_t struct;
    306	 *  - (for x86-64 only) two GOT entries are already tls_index_t;
    307	 *  - tls_index_t->module is used to find start of TLS section in
    308	 *  which variable resides;
    309	 *  - tls_index_t->offset provides offset within that TLS section,
    310	 *  pointing to value of variable.
    311	 */
    312	struct tls_index tls_index;
    313	dtv_t *dtv;
    314	void *tls_ptr;
    315
    316	bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
    317			    (void *)loc->offset);
    318	/* valid module index is always positive */
    319	if (tls_index.module > 0) {
    320		/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
    321		bpf_probe_read_user(&dtv, sizeof(dtv),
    322				    &((struct tcbhead *)tls_base)->dtv);
    323		dtv += tls_index.module;
    324	} else {
    325		dtv = NULL;
    326	}
    327	bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
    328	/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
    329	return tls_ptr && tls_ptr != (void *)-1
    330		? tls_ptr + tls_index.offset
    331		: NULL;
    332}
    333
    334#ifdef SUBPROGS
    335__noinline
    336#else
    337__always_inline
    338#endif
    339static void read_int_var(struct strobemeta_cfg *cfg,
    340			 size_t idx, void *tls_base,
    341			 struct strobe_value_generic *value,
    342			 struct strobemeta_payload *data)
    343{
    344	void *location = calc_location(&cfg->int_locs[idx], tls_base);
    345	if (!location)
    346		return;
    347
    348	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
    349	data->int_vals[idx] = value->val;
    350	if (value->header.len)
    351		data->int_vals_set_mask |= (1 << idx);
    352}
    353
    354static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
    355					     size_t idx, void *tls_base,
    356					     struct strobe_value_generic *value,
    357					     struct strobemeta_payload *data,
    358					     void *payload)
    359{
    360	void *location;
    361	uint64_t len;
    362
    363	data->str_lens[idx] = 0;
    364	location = calc_location(&cfg->str_locs[idx], tls_base);
    365	if (!location)
    366		return 0;
    367
    368	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
    369	len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
    370	/*
    371	 * if bpf_probe_read_user_str returns error (<0), due to casting to
    372	 * unsinged int, it will become big number, so next check is
    373	 * sufficient to check for errors AND prove to BPF verifier, that
    374	 * bpf_probe_read_user_str won't return anything bigger than
    375	 * STROBE_MAX_STR_LEN
    376	 */
    377	if (len > STROBE_MAX_STR_LEN)
    378		return 0;
    379
    380	data->str_lens[idx] = len;
    381	return len;
    382}
    383
    384static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
    385					  size_t idx, void *tls_base,
    386					  struct strobe_value_generic *value,
    387					  struct strobemeta_payload *data,
    388					  void *payload)
    389{
    390	struct strobe_map_descr* descr = &data->map_descrs[idx];
    391	struct strobe_map_raw map;
    392	void *location;
    393	uint64_t len;
    394	int i;
    395
    396	descr->tag_len = 0; /* presume no tag is set */
    397	descr->cnt = -1; /* presume no value is set */
    398
    399	location = calc_location(&cfg->map_locs[idx], tls_base);
    400	if (!location)
    401		return payload;
    402
    403	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
    404	if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
    405		return payload;
    406
    407	descr->id = map.id;
    408	descr->cnt = map.cnt;
    409	if (cfg->req_meta_idx == idx) {
    410		data->req_id = map.id;
    411		data->req_meta_valid = 1;
    412	}
    413
    414	len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
    415	if (len <= STROBE_MAX_STR_LEN) {
    416		descr->tag_len = len;
    417		payload += len;
    418	}
    419
    420#ifdef NO_UNROLL
    421#pragma clang loop unroll(disable)
    422#else
    423#pragma unroll
    424#endif
    425	for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
    426		if (i >= map.cnt)
    427			break;
    428
    429		descr->key_lens[i] = 0;
    430		len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
    431					      map.entries[i].key);
    432		if (len <= STROBE_MAX_STR_LEN) {
    433			descr->key_lens[i] = len;
    434			payload += len;
    435		}
    436		descr->val_lens[i] = 0;
    437		len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
    438					      map.entries[i].val);
    439		if (len <= STROBE_MAX_STR_LEN) {
    440			descr->val_lens[i] = len;
    441			payload += len;
    442		}
    443	}
    444
    445	return payload;
    446}
    447
    448#ifdef USE_BPF_LOOP
    449enum read_type {
    450	READ_INT_VAR,
    451	READ_MAP_VAR,
    452	READ_STR_VAR,
    453};
    454
    455struct read_var_ctx {
    456	struct strobemeta_payload *data;
    457	void *tls_base;
    458	struct strobemeta_cfg *cfg;
    459	void *payload;
    460	/* value gets mutated */
    461	struct strobe_value_generic *value;
    462	enum read_type type;
    463};
    464
    465static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
    466{
    467	switch (ctx->type) {
    468	case READ_INT_VAR:
    469		if (index >= STROBE_MAX_INTS)
    470			return 1;
    471		read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
    472		break;
    473	case READ_MAP_VAR:
    474		if (index >= STROBE_MAX_MAPS)
    475			return 1;
    476		ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
    477					    ctx->value, ctx->data, ctx->payload);
    478		break;
    479	case READ_STR_VAR:
    480		if (index >= STROBE_MAX_STRS)
    481			return 1;
    482		ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
    483					     ctx->value, ctx->data, ctx->payload);
    484		break;
    485	}
    486	return 0;
    487}
    488#endif /* USE_BPF_LOOP */
    489
    490/*
    491 * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
    492 * pointer to *right after* payload ends
    493 */
    494#ifdef SUBPROGS
    495__noinline
    496#else
    497__always_inline
    498#endif
    499static void *read_strobe_meta(struct task_struct *task,
    500			      struct strobemeta_payload *data)
    501{
    502	pid_t pid = bpf_get_current_pid_tgid() >> 32;
    503	struct strobe_value_generic value = {0};
    504	struct strobemeta_cfg *cfg;
    505	void *tls_base, *payload;
    506
    507	cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
    508	if (!cfg)
    509		return NULL;
    510
    511	data->int_vals_set_mask = 0;
    512	data->req_meta_valid = 0;
    513	payload = data->payload;
    514	/*
    515	 * we don't have struct task_struct definition, it should be:
    516	 * tls_base = (void *)task->thread.fsbase;
    517	 */
    518	tls_base = (void *)task;
    519
    520#ifdef USE_BPF_LOOP
    521	struct read_var_ctx ctx = {
    522		.cfg = cfg,
    523		.tls_base = tls_base,
    524		.value = &value,
    525		.data = data,
    526		.payload = payload,
    527	};
    528	int err;
    529
    530	ctx.type = READ_INT_VAR;
    531	err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
    532	if (err != STROBE_MAX_INTS)
    533		return NULL;
    534
    535	ctx.type = READ_STR_VAR;
    536	err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
    537	if (err != STROBE_MAX_STRS)
    538		return NULL;
    539
    540	ctx.type = READ_MAP_VAR;
    541	err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
    542	if (err != STROBE_MAX_MAPS)
    543		return NULL;
    544#else
    545#ifdef NO_UNROLL
    546#pragma clang loop unroll(disable)
    547#else
    548#pragma unroll
    549#endif /* NO_UNROLL */
    550	for (int i = 0; i < STROBE_MAX_INTS; ++i) {
    551		read_int_var(cfg, i, tls_base, &value, data);
    552	}
    553#ifdef NO_UNROLL
    554#pragma clang loop unroll(disable)
    555#else
    556#pragma unroll
    557#endif /* NO_UNROLL */
    558	for (int i = 0; i < STROBE_MAX_STRS; ++i) {
    559		payload += read_str_var(cfg, i, tls_base, &value, data, payload);
    560	}
    561#ifdef NO_UNROLL
    562#pragma clang loop unroll(disable)
    563#else
    564#pragma unroll
    565#endif /* NO_UNROLL */
    566	for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
    567		payload = read_map_var(cfg, i, tls_base, &value, data, payload);
    568	}
    569#endif /* USE_BPF_LOOP */
    570
    571	/*
    572	 * return pointer right after end of payload, so it's possible to
    573	 * calculate exact amount of useful data that needs to be sent
    574	 */
    575	return payload;
    576}
    577
    578SEC("raw_tracepoint/kfree_skb")
    579int on_event(struct pt_regs *ctx) {
    580	pid_t pid =  bpf_get_current_pid_tgid() >> 32;
    581	struct strobelight_bpf_sample* sample;
    582	struct task_struct *task;
    583	uint32_t zero = 0;
    584	uint64_t ktime_ns;
    585	void *sample_end;
    586
    587	sample = bpf_map_lookup_elem(&sample_heap, &zero);
    588	if (!sample)
    589		return 0; /* this will never happen */
    590
    591	sample->pid = pid;
    592	bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
    593	ktime_ns = bpf_ktime_get_ns();
    594	sample->ktime = ktime_ns;
    595
    596	task = (struct task_struct *)bpf_get_current_task();
    597	sample_end = read_strobe_meta(task, &sample->metadata);
    598	sample->has_meta = sample_end != NULL;
    599	sample_end = sample_end ? : &sample->metadata;
    600
    601	if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
    602		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
    603		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
    604	} else {
    605		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
    606		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
    607	}
    608
    609	uint64_t sample_size = sample_end - (void *)sample;
    610	/* should always be true */
    611	if (sample_size < sizeof(struct strobelight_bpf_sample))
    612		bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
    613	return 0;
    614}
    615
    616char _license[] SEC("license") = "GPL";