cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

profiler.inc.h (30036B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c) 2020 Facebook */
      3#include <vmlinux.h>
      4#include <bpf/bpf_core_read.h>
      5#include <bpf/bpf_helpers.h>
      6#include <bpf/bpf_tracing.h>
      7
      8#include "profiler.h"
      9
     10#ifndef NULL
     11#define NULL 0
     12#endif
     13
     14#define O_WRONLY 00000001
     15#define O_RDWR 00000002
     16#define O_DIRECTORY 00200000
     17#define __O_TMPFILE 020000000
     18#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
     19#define MAX_ERRNO 4095
     20#define S_IFMT 00170000
     21#define S_IFSOCK 0140000
     22#define S_IFLNK 0120000
     23#define S_IFREG 0100000
     24#define S_IFBLK 0060000
     25#define S_IFDIR 0040000
     26#define S_IFCHR 0020000
     27#define S_IFIFO 0010000
     28#define S_ISUID 0004000
     29#define S_ISGID 0002000
     30#define S_ISVTX 0001000
     31#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
     32#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
     33#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
     34#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
     35#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
     36#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
     37#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
     38
     39#define KILL_DATA_ARRAY_SIZE 8
     40
     41struct var_kill_data_arr_t {
     42	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
     43};
     44
     45union any_profiler_data_t {
     46	struct var_exec_data_t var_exec;
     47	struct var_kill_data_t var_kill;
     48	struct var_sysctl_data_t var_sysctl;
     49	struct var_filemod_data_t var_filemod;
     50	struct var_fork_data_t var_fork;
     51	struct var_kill_data_arr_t var_kill_data_arr;
     52};
     53
     54volatile struct profiler_config_struct bpf_config = {};
     55
     56#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
     57#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
     58#define CGROUP_LOGIN_SESSION_INODE \
     59	(bpf_config.cgroup_login_session_inode)
     60#define KILL_SIGNALS (bpf_config.kill_signals_mask)
     61#define STALE_INFO (bpf_config.stale_info_secs)
     62#define INODE_FILTER (bpf_config.inode_filter)
     63#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
     64#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
     65
     66struct kernfs_iattrs___52 {
     67	struct iattr ia_iattr;
     68};
     69
     70struct kernfs_node___52 {
     71	union /* kernfs_node_id */ {
     72		struct {
     73			u32 ino;
     74			u32 generation;
     75		};
     76		u64 id;
     77	} id;
     78};
     79
     80struct {
     81	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
     82	__uint(max_entries, 1);
     83	__type(key, u32);
     84	__type(value, union any_profiler_data_t);
     85} data_heap SEC(".maps");
     86
     87struct {
     88	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
     89	__uint(key_size, sizeof(int));
     90	__uint(value_size, sizeof(int));
     91} events SEC(".maps");
     92
     93struct {
     94	__uint(type, BPF_MAP_TYPE_HASH);
     95	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
     96	__type(key, u32);
     97	__type(value, struct var_kill_data_arr_t);
     98} var_tpid_to_data SEC(".maps");
     99
    100struct {
    101	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
    102	__uint(max_entries, profiler_bpf_max_function_id);
    103	__type(key, u32);
    104	__type(value, struct bpf_func_stats_data);
    105} bpf_func_stats SEC(".maps");
    106
    107struct {
    108	__uint(type, BPF_MAP_TYPE_HASH);
    109	__type(key, u32);
    110	__type(value, bool);
    111	__uint(max_entries, 16);
    112} allowed_devices SEC(".maps");
    113
    114struct {
    115	__uint(type, BPF_MAP_TYPE_HASH);
    116	__type(key, u64);
    117	__type(value, bool);
    118	__uint(max_entries, 1024);
    119} allowed_file_inodes SEC(".maps");
    120
    121struct {
    122	__uint(type, BPF_MAP_TYPE_HASH);
    123	__type(key, u64);
    124	__type(value, bool);
    125	__uint(max_entries, 1024);
    126} allowed_directory_inodes SEC(".maps");
    127
    128struct {
    129	__uint(type, BPF_MAP_TYPE_HASH);
    130	__type(key, u32);
    131	__type(value, bool);
    132	__uint(max_entries, 16);
    133} disallowed_exec_inodes SEC(".maps");
    134
    135#ifndef ARRAY_SIZE
    136#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
    137#endif
    138
    139static INLINE bool IS_ERR(const void* ptr)
    140{
    141	return IS_ERR_VALUE((unsigned long)ptr);
    142}
    143
    144static INLINE u32 get_userspace_pid()
    145{
    146	return bpf_get_current_pid_tgid() >> 32;
    147}
    148
    149static INLINE bool is_init_process(u32 tgid)
    150{
    151	return tgid == 1 || tgid == 0;
    152}
    153
    154static INLINE unsigned long
    155probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
    156{
    157	len = len < max ? len : max;
    158	if (len > 1) {
    159		if (bpf_probe_read(dst, len, src))
    160			return 0;
    161	} else if (len == 1) {
    162		if (bpf_probe_read(dst, 1, src))
    163			return 0;
    164	}
    165	return len;
    166}
    167
    168static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
    169				     int spid)
    170{
    171#ifdef UNROLL
    172#pragma unroll
    173#endif
    174	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
    175		if (arr_struct->array[i].meta.pid == spid)
    176			return i;
    177	return -1;
    178}
    179
    180static INLINE void populate_ancestors(struct task_struct* task,
    181				      struct ancestors_data_t* ancestors_data)
    182{
    183	struct task_struct* parent = task;
    184	u32 num_ancestors, ppid;
    185
    186	ancestors_data->num_ancestors = 0;
    187#ifdef UNROLL
    188#pragma unroll
    189#endif
    190	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
    191		parent = BPF_CORE_READ(parent, real_parent);
    192		if (parent == NULL)
    193			break;
    194		ppid = BPF_CORE_READ(parent, tgid);
    195		if (is_init_process(ppid))
    196			break;
    197		ancestors_data->ancestor_pids[num_ancestors] = ppid;
    198		ancestors_data->ancestor_exec_ids[num_ancestors] =
    199			BPF_CORE_READ(parent, self_exec_id);
    200		ancestors_data->ancestor_start_times[num_ancestors] =
    201			BPF_CORE_READ(parent, start_time);
    202		ancestors_data->num_ancestors = num_ancestors;
    203	}
    204}
    205
    206static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
    207					  struct kernfs_node* cgroup_root_node,
    208					  void* payload,
    209					  int* root_pos)
    210{
    211	void* payload_start = payload;
    212	size_t filepart_length;
    213
    214#ifdef UNROLL
    215#pragma unroll
    216#endif
    217	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
    218		filepart_length =
    219			bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
    220		if (!cgroup_node)
    221			return payload;
    222		if (cgroup_node == cgroup_root_node)
    223			*root_pos = payload - payload_start;
    224		if (filepart_length <= MAX_PATH) {
    225			barrier_var(filepart_length);
    226			payload += filepart_length;
    227		}
    228		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
    229	}
    230	return payload;
    231}
    232
    233static ino_t get_inode_from_kernfs(struct kernfs_node* node)
    234{
    235	struct kernfs_node___52* node52 = (void*)node;
    236
    237	if (bpf_core_field_exists(node52->id.ino)) {
    238		barrier_var(node52);
    239		return BPF_CORE_READ(node52, id.ino);
    240	} else {
    241		barrier_var(node);
    242		return (u64)BPF_CORE_READ(node, id);
    243	}
    244}
    245
    246extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
    247enum cgroup_subsys_id___local {
    248	pids_cgrp_id___local = 123, /* value doesn't matter */
    249};
    250
    251static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
    252					 struct task_struct* task,
    253					 void* payload)
    254{
    255	struct kernfs_node* root_kernfs =
    256		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
    257	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
    258
    259#if __has_builtin(__builtin_preserve_enum_value)
    260	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
    261		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
    262						  pids_cgrp_id___local);
    263#ifdef UNROLL
    264#pragma unroll
    265#endif
    266		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
    267			struct cgroup_subsys_state* subsys =
    268				BPF_CORE_READ(task, cgroups, subsys[i]);
    269			if (subsys != NULL) {
    270				int subsys_id = BPF_CORE_READ(subsys, ss, id);
    271				if (subsys_id == cgrp_id) {
    272					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
    273					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
    274					break;
    275				}
    276			}
    277		}
    278	}
    279#endif
    280
    281	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
    282	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
    283
    284	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
    285		cgroup_data->cgroup_root_mtime =
    286			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
    287		cgroup_data->cgroup_proc_mtime =
    288			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
    289	} else {
    290		struct kernfs_iattrs___52* root_iattr =
    291			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
    292		cgroup_data->cgroup_root_mtime =
    293			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
    294
    295		struct kernfs_iattrs___52* proc_iattr =
    296			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
    297		cgroup_data->cgroup_proc_mtime =
    298			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
    299	}
    300
    301	cgroup_data->cgroup_root_length = 0;
    302	cgroup_data->cgroup_proc_length = 0;
    303	cgroup_data->cgroup_full_length = 0;
    304
    305	size_t cgroup_root_length =
    306		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
    307	barrier_var(cgroup_root_length);
    308	if (cgroup_root_length <= MAX_PATH) {
    309		barrier_var(cgroup_root_length);
    310		cgroup_data->cgroup_root_length = cgroup_root_length;
    311		payload += cgroup_root_length;
    312	}
    313
    314	size_t cgroup_proc_length =
    315		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
    316	barrier_var(cgroup_proc_length);
    317	if (cgroup_proc_length <= MAX_PATH) {
    318		barrier_var(cgroup_proc_length);
    319		cgroup_data->cgroup_proc_length = cgroup_proc_length;
    320		payload += cgroup_proc_length;
    321	}
    322
    323	if (FETCH_CGROUPS_FROM_BPF) {
    324		cgroup_data->cgroup_full_path_root_pos = -1;
    325		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
    326							      &cgroup_data->cgroup_full_path_root_pos);
    327		cgroup_data->cgroup_full_length = payload_end_pos - payload;
    328		payload = payload_end_pos;
    329	}
    330
    331	return (void*)payload;
    332}
    333
    334static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
    335					  struct task_struct* task,
    336					  u32 pid, void* payload)
    337{
    338	u64 uid_gid = bpf_get_current_uid_gid();
    339
    340	metadata->uid = (u32)uid_gid;
    341	metadata->gid = uid_gid >> 32;
    342	metadata->pid = pid;
    343	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
    344	metadata->start_time = BPF_CORE_READ(task, start_time);
    345	metadata->comm_length = 0;
    346
    347	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
    348	barrier_var(comm_length);
    349	if (comm_length <= TASK_COMM_LEN) {
    350		barrier_var(comm_length);
    351		metadata->comm_length = comm_length;
    352		payload += comm_length;
    353	}
    354
    355	return (void*)payload;
    356}
    357
    358static INLINE struct var_kill_data_t*
    359get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
    360{
    361	int zero = 0;
    362	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
    363
    364	if (kill_data == NULL)
    365		return NULL;
    366	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
    367
    368	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
    369	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
    370	size_t payload_length = payload - (void*)kill_data->payload;
    371	kill_data->payload_length = payload_length;
    372	populate_ancestors(task, &kill_data->ancestors_info);
    373	kill_data->meta.type = KILL_EVENT;
    374	kill_data->kill_target_pid = tpid;
    375	kill_data->kill_sig = sig;
    376	kill_data->kill_count = 1;
    377	kill_data->last_kill_time = bpf_ktime_get_ns();
    378	return kill_data;
    379}
    380
    381static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
    382{
    383	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
    384		return 0;
    385
    386	u32 spid = get_userspace_pid();
    387	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
    388
    389	if (arr_struct == NULL) {
    390		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
    391		int zero = 0;
    392
    393		if (kill_data == NULL)
    394			return 0;
    395		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
    396		if (arr_struct == NULL)
    397			return 0;
    398		bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
    399	} else {
    400		int index = get_var_spid_index(arr_struct, spid);
    401
    402		if (index == -1) {
    403			struct var_kill_data_t* kill_data =
    404				get_var_kill_data(ctx, spid, tpid, sig);
    405			if (kill_data == NULL)
    406				return 0;
    407#ifdef UNROLL
    408#pragma unroll
    409#endif
    410			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
    411				if (arr_struct->array[i].meta.pid == 0) {
    412					bpf_probe_read(&arr_struct->array[i],
    413						       sizeof(arr_struct->array[i]), kill_data);
    414					bpf_map_update_elem(&var_tpid_to_data, &tpid,
    415							    arr_struct, 0);
    416
    417					return 0;
    418				}
    419			return 0;
    420		}
    421
    422		struct var_kill_data_t* kill_data = &arr_struct->array[index];
    423
    424		u64 delta_sec =
    425			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
    426
    427		if (delta_sec < STALE_INFO) {
    428			kill_data->kill_count++;
    429			kill_data->last_kill_time = bpf_ktime_get_ns();
    430			bpf_probe_read(&arr_struct->array[index],
    431				       sizeof(arr_struct->array[index]),
    432				       kill_data);
    433		} else {
    434			struct var_kill_data_t* kill_data =
    435				get_var_kill_data(ctx, spid, tpid, sig);
    436			if (kill_data == NULL)
    437				return 0;
    438			bpf_probe_read(&arr_struct->array[index],
    439				       sizeof(arr_struct->array[index]),
    440				       kill_data);
    441		}
    442	}
    443	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
    444	return 0;
    445}
    446
    447static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
    448				   enum bpf_function_id func_id)
    449{
    450	int func_id_key = func_id;
    451
    452	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
    453	bpf_stat_ctx->bpf_func_stats_data_val =
    454		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
    455	if (bpf_stat_ctx->bpf_func_stats_data_val)
    456		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
    457}
    458
    459static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
    460{
    461	if (bpf_stat_ctx->bpf_func_stats_data_val)
    462		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
    463			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
    464}
    465
    466static INLINE void
    467bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
    468				    struct var_metadata_t* meta)
    469{
    470	if (bpf_stat_ctx->bpf_func_stats_data_val) {
    471		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
    472		meta->bpf_stats_num_perf_events =
    473			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
    474	}
    475	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
    476	meta->cpu_id = bpf_get_smp_processor_id();
    477}
    478
    479static INLINE size_t
    480read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
    481{
    482	size_t length = 0;
    483	size_t filepart_length;
    484	struct dentry* parent_dentry;
    485
    486#ifdef UNROLL
    487#pragma unroll
    488#endif
    489	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
    490		filepart_length = bpf_probe_read_str(payload, MAX_PATH,
    491						     BPF_CORE_READ(filp_dentry, d_name.name));
    492		barrier_var(filepart_length);
    493		if (filepart_length > MAX_PATH)
    494			break;
    495		barrier_var(filepart_length);
    496		payload += filepart_length;
    497		length += filepart_length;
    498
    499		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
    500		if (filp_dentry == parent_dentry)
    501			break;
    502		filp_dentry = parent_dentry;
    503	}
    504
    505	return length;
    506}
    507
    508static INLINE bool
    509is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
    510{
    511	struct dentry* parent_dentry;
    512#ifdef UNROLL
    513#pragma unroll
    514#endif
    515	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
    516		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
    517		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
    518
    519		if (allowed_dir != NULL)
    520			return true;
    521		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
    522		if (filp_dentry == parent_dentry)
    523			break;
    524		filp_dentry = parent_dentry;
    525	}
    526	return false;
    527}
    528
    529static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
    530						 u32* device_id,
    531						 u64* file_ino)
    532{
    533	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
    534	*device_id = dev_id;
    535	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
    536
    537	if (allowed_device == NULL)
    538		return false;
    539
    540	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
    541	*file_ino = ino;
    542	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
    543
    544	if (allowed_file == NULL)
    545		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
    546			return false;
    547	return true;
    548}
    549
    550SEC("kprobe/proc_sys_write")
    551ssize_t BPF_KPROBE(kprobe__proc_sys_write,
    552		   struct file* filp, const char* buf,
    553		   size_t count, loff_t* ppos)
    554{
    555	struct bpf_func_stats_ctx stats_ctx;
    556	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
    557
    558	u32 pid = get_userspace_pid();
    559	int zero = 0;
    560	struct var_sysctl_data_t* sysctl_data =
    561		bpf_map_lookup_elem(&data_heap, &zero);
    562	if (!sysctl_data)
    563		goto out;
    564
    565	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
    566	sysctl_data->meta.type = SYSCTL_EVENT;
    567	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
    568	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
    569
    570	populate_ancestors(task, &sysctl_data->ancestors_info);
    571
    572	sysctl_data->sysctl_val_length = 0;
    573	sysctl_data->sysctl_path_length = 0;
    574
    575	size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
    576	barrier_var(sysctl_val_length);
    577	if (sysctl_val_length <= CTL_MAXNAME) {
    578		barrier_var(sysctl_val_length);
    579		sysctl_data->sysctl_val_length = sysctl_val_length;
    580		payload += sysctl_val_length;
    581	}
    582
    583	size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
    584						       BPF_CORE_READ(filp, f_path.dentry, d_name.name));
    585	barrier_var(sysctl_path_length);
    586	if (sysctl_path_length <= MAX_PATH) {
    587		barrier_var(sysctl_path_length);
    588		sysctl_data->sysctl_path_length = sysctl_path_length;
    589		payload += sysctl_path_length;
    590	}
    591
    592	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
    593	unsigned long data_len = payload - (void*)sysctl_data;
    594	data_len = data_len > sizeof(struct var_sysctl_data_t)
    595		? sizeof(struct var_sysctl_data_t)
    596		: data_len;
    597	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
    598out:
    599	bpf_stats_exit(&stats_ctx);
    600	return 0;
    601}
    602
    603SEC("tracepoint/syscalls/sys_enter_kill")
    604int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
    605{
    606	struct bpf_func_stats_ctx stats_ctx;
    607
    608	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
    609	int pid = ctx->args[0];
    610	int sig = ctx->args[1];
    611	int ret = trace_var_sys_kill(ctx, pid, sig);
    612	bpf_stats_exit(&stats_ctx);
    613	return ret;
    614};
    615
    616SEC("raw_tracepoint/sched_process_exit")
    617int raw_tracepoint__sched_process_exit(void* ctx)
    618{
    619	int zero = 0;
    620	struct bpf_func_stats_ctx stats_ctx;
    621	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
    622
    623	u32 tpid = get_userspace_pid();
    624
    625	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
    626	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
    627
    628	if (arr_struct == NULL || kill_data == NULL)
    629		goto out;
    630
    631	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
    632	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
    633
    634#ifdef UNROLL
    635#pragma unroll
    636#endif
    637	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
    638		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
    639
    640		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
    641			bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
    642			void* payload = kill_data->payload;
    643			size_t offset = kill_data->payload_length;
    644			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
    645				return 0;
    646			payload += offset;
    647
    648			kill_data->kill_target_name_length = 0;
    649			kill_data->kill_target_cgroup_proc_length = 0;
    650
    651			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
    652			barrier_var(comm_length);
    653			if (comm_length <= TASK_COMM_LEN) {
    654				barrier_var(comm_length);
    655				kill_data->kill_target_name_length = comm_length;
    656				payload += comm_length;
    657			}
    658
    659			size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
    660								       BPF_CORE_READ(proc_kernfs, name));
    661			barrier_var(cgroup_proc_length);
    662			if (cgroup_proc_length <= KILL_TARGET_LEN) {
    663				barrier_var(cgroup_proc_length);
    664				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
    665				payload += cgroup_proc_length;
    666			}
    667
    668			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
    669			unsigned long data_len = (void*)payload - (void*)kill_data;
    670			data_len = data_len > sizeof(struct var_kill_data_t)
    671				? sizeof(struct var_kill_data_t)
    672				: data_len;
    673			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
    674		}
    675	}
    676	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
    677out:
    678	bpf_stats_exit(&stats_ctx);
    679	return 0;
    680}
    681
    682SEC("raw_tracepoint/sched_process_exec")
    683int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
    684{
    685	struct bpf_func_stats_ctx stats_ctx;
    686	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
    687
    688	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
    689	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
    690
    691	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
    692	if (should_filter_binprm != NULL)
    693		goto out;
    694
    695	int zero = 0;
    696	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
    697	if (!proc_exec_data)
    698		goto out;
    699
    700	if (INODE_FILTER && inode != INODE_FILTER)
    701		return 0;
    702
    703	u32 pid = get_userspace_pid();
    704	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
    705
    706	proc_exec_data->meta.type = EXEC_EVENT;
    707	proc_exec_data->bin_path_length = 0;
    708	proc_exec_data->cmdline_length = 0;
    709	proc_exec_data->environment_length = 0;
    710	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
    711					      proc_exec_data->payload);
    712	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
    713
    714	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
    715	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
    716	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
    717	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
    718	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
    719
    720	const char* filename = BPF_CORE_READ(bprm, filename);
    721	size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
    722	barrier_var(bin_path_length);
    723	if (bin_path_length <= MAX_FILENAME_LEN) {
    724		barrier_var(bin_path_length);
    725		proc_exec_data->bin_path_length = bin_path_length;
    726		payload += bin_path_length;
    727	}
    728
    729	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
    730	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
    731	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
    732						     arg_end - arg_start, MAX_ARGS_LEN);
    733
    734	if (cmdline_length <= MAX_ARGS_LEN) {
    735		barrier_var(cmdline_length);
    736		proc_exec_data->cmdline_length = cmdline_length;
    737		payload += cmdline_length;
    738	}
    739
    740	if (READ_ENVIRON_FROM_EXEC) {
    741		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
    742		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
    743		unsigned long env_len = probe_read_lim(payload, env_start,
    744						       env_end - env_start, MAX_ENVIRON_LEN);
    745		if (cmdline_length <= MAX_ENVIRON_LEN) {
    746			proc_exec_data->environment_length = env_len;
    747			payload += env_len;
    748		}
    749	}
    750
    751	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
    752	unsigned long data_len = payload - (void*)proc_exec_data;
    753	data_len = data_len > sizeof(struct var_exec_data_t)
    754		? sizeof(struct var_exec_data_t)
    755		: data_len;
    756	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
    757out:
    758	bpf_stats_exit(&stats_ctx);
    759	return 0;
    760}
    761
    762SEC("kretprobe/do_filp_open")
    763int kprobe_ret__do_filp_open(struct pt_regs* ctx)
    764{
    765	struct bpf_func_stats_ctx stats_ctx;
    766	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
    767
    768	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
    769
    770	if (filp == NULL || IS_ERR(filp))
    771		goto out;
    772	unsigned int flags = BPF_CORE_READ(filp, f_flags);
    773	if ((flags & (O_RDWR | O_WRONLY)) == 0)
    774		goto out;
    775	if ((flags & O_TMPFILE) > 0)
    776		goto out;
    777	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
    778	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
    779	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
    780	    S_ISSOCK(mode))
    781		goto out;
    782
    783	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
    784	u32 device_id = 0;
    785	u64 file_ino = 0;
    786	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
    787		goto out;
    788
    789	int zero = 0;
    790	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
    791	if (!filemod_data)
    792		goto out;
    793
    794	u32 pid = get_userspace_pid();
    795	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
    796
    797	filemod_data->meta.type = FILEMOD_EVENT;
    798	filemod_data->fmod_type = FMOD_OPEN;
    799	filemod_data->dst_flags = flags;
    800	filemod_data->src_inode = 0;
    801	filemod_data->dst_inode = file_ino;
    802	filemod_data->src_device_id = 0;
    803	filemod_data->dst_device_id = device_id;
    804	filemod_data->src_filepath_length = 0;
    805	filemod_data->dst_filepath_length = 0;
    806
    807	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
    808					      filemod_data->payload);
    809	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
    810
    811	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
    812	barrier_var(len);
    813	if (len <= MAX_FILEPATH_LENGTH) {
    814		barrier_var(len);
    815		payload += len;
    816		filemod_data->dst_filepath_length = len;
    817	}
    818	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
    819	unsigned long data_len = payload - (void*)filemod_data;
    820	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
    821	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
    822out:
    823	bpf_stats_exit(&stats_ctx);
    824	return 0;
    825}
    826
    827SEC("kprobe/vfs_link")
    828int BPF_KPROBE(kprobe__vfs_link,
    829	       struct dentry* old_dentry, struct user_namespace *mnt_userns,
    830	       struct inode* dir, struct dentry* new_dentry,
    831	       struct inode** delegated_inode)
    832{
    833	struct bpf_func_stats_ctx stats_ctx;
    834	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
    835
    836	u32 src_device_id = 0;
    837	u64 src_file_ino = 0;
    838	u32 dst_device_id = 0;
    839	u64 dst_file_ino = 0;
    840	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
    841	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
    842		goto out;
    843
    844	int zero = 0;
    845	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
    846	if (!filemod_data)
    847		goto out;
    848
    849	u32 pid = get_userspace_pid();
    850	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
    851
    852	filemod_data->meta.type = FILEMOD_EVENT;
    853	filemod_data->fmod_type = FMOD_LINK;
    854	filemod_data->dst_flags = 0;
    855	filemod_data->src_inode = src_file_ino;
    856	filemod_data->dst_inode = dst_file_ino;
    857	filemod_data->src_device_id = src_device_id;
    858	filemod_data->dst_device_id = dst_device_id;
    859	filemod_data->src_filepath_length = 0;
    860	filemod_data->dst_filepath_length = 0;
    861
    862	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
    863					      filemod_data->payload);
    864	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
    865
    866	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
    867	barrier_var(len);
    868	if (len <= MAX_FILEPATH_LENGTH) {
    869		barrier_var(len);
    870		payload += len;
    871		filemod_data->src_filepath_length = len;
    872	}
    873
    874	len = read_absolute_file_path_from_dentry(new_dentry, payload);
    875	barrier_var(len);
    876	if (len <= MAX_FILEPATH_LENGTH) {
    877		barrier_var(len);
    878		payload += len;
    879		filemod_data->dst_filepath_length = len;
    880	}
    881
    882	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
    883	unsigned long data_len = payload - (void*)filemod_data;
    884	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
    885	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
    886out:
    887	bpf_stats_exit(&stats_ctx);
    888	return 0;
    889}
    890
    891SEC("kprobe/vfs_symlink")
    892int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
    893	       const char* oldname)
    894{
    895	struct bpf_func_stats_ctx stats_ctx;
    896	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
    897
    898	u32 dst_device_id = 0;
    899	u64 dst_file_ino = 0;
    900	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
    901		goto out;
    902
    903	int zero = 0;
    904	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
    905	if (!filemod_data)
    906		goto out;
    907
    908	u32 pid = get_userspace_pid();
    909	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
    910
    911	filemod_data->meta.type = FILEMOD_EVENT;
    912	filemod_data->fmod_type = FMOD_SYMLINK;
    913	filemod_data->dst_flags = 0;
    914	filemod_data->src_inode = 0;
    915	filemod_data->dst_inode = dst_file_ino;
    916	filemod_data->src_device_id = 0;
    917	filemod_data->dst_device_id = dst_device_id;
    918	filemod_data->src_filepath_length = 0;
    919	filemod_data->dst_filepath_length = 0;
    920
    921	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
    922					      filemod_data->payload);
    923	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
    924
    925	size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
    926	barrier_var(len);
    927	if (len <= MAX_FILEPATH_LENGTH) {
    928		barrier_var(len);
    929		payload += len;
    930		filemod_data->src_filepath_length = len;
    931	}
    932	len = read_absolute_file_path_from_dentry(dentry, payload);
    933	barrier_var(len);
    934	if (len <= MAX_FILEPATH_LENGTH) {
    935		barrier_var(len);
    936		payload += len;
    937		filemod_data->dst_filepath_length = len;
    938	}
    939	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
    940	unsigned long data_len = payload - (void*)filemod_data;
    941	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
    942	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
    943out:
    944	bpf_stats_exit(&stats_ctx);
    945	return 0;
    946}
    947
    948SEC("raw_tracepoint/sched_process_fork")
    949int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
    950{
    951	struct bpf_func_stats_ctx stats_ctx;
    952	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
    953
    954	int zero = 0;
    955	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
    956	if (!fork_data)
    957		goto out;
    958
    959	struct task_struct* parent = (struct task_struct*)ctx->args[0];
    960	struct task_struct* child = (struct task_struct*)ctx->args[1];
    961	fork_data->meta.type = FORK_EVENT;
    962
    963	void* payload = populate_var_metadata(&fork_data->meta, child,
    964					      BPF_CORE_READ(child, pid), fork_data->payload);
    965	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
    966	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
    967	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
    968	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
    969
    970	unsigned long data_len = payload - (void*)fork_data;
    971	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
    972	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
    973out:
    974	bpf_stats_exit(&stats_ctx);
    975	return 0;
    976}
    977char _license[] SEC("license") = "GPL";