profiler.inc.h (30036B)
1// SPDX-License-Identifier: GPL-2.0 2/* Copyright (c) 2020 Facebook */ 3#include <vmlinux.h> 4#include <bpf/bpf_core_read.h> 5#include <bpf/bpf_helpers.h> 6#include <bpf/bpf_tracing.h> 7 8#include "profiler.h" 9 10#ifndef NULL 11#define NULL 0 12#endif 13 14#define O_WRONLY 00000001 15#define O_RDWR 00000002 16#define O_DIRECTORY 00200000 17#define __O_TMPFILE 020000000 18#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) 19#define MAX_ERRNO 4095 20#define S_IFMT 00170000 21#define S_IFSOCK 0140000 22#define S_IFLNK 0120000 23#define S_IFREG 0100000 24#define S_IFBLK 0060000 25#define S_IFDIR 0040000 26#define S_IFCHR 0020000 27#define S_IFIFO 0010000 28#define S_ISUID 0004000 29#define S_ISGID 0002000 30#define S_ISVTX 0001000 31#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) 32#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) 33#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) 34#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) 35#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) 36#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) 37#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO 38 39#define KILL_DATA_ARRAY_SIZE 8 40 41struct var_kill_data_arr_t { 42 struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; 43}; 44 45union any_profiler_data_t { 46 struct var_exec_data_t var_exec; 47 struct var_kill_data_t var_kill; 48 struct var_sysctl_data_t var_sysctl; 49 struct var_filemod_data_t var_filemod; 50 struct var_fork_data_t var_fork; 51 struct var_kill_data_arr_t var_kill_data_arr; 52}; 53 54volatile struct profiler_config_struct bpf_config = {}; 55 56#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) 57#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) 58#define CGROUP_LOGIN_SESSION_INODE \ 59 (bpf_config.cgroup_login_session_inode) 60#define KILL_SIGNALS (bpf_config.kill_signals_mask) 61#define STALE_INFO (bpf_config.stale_info_secs) 62#define INODE_FILTER (bpf_config.inode_filter) 63#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) 64#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) 65 66struct kernfs_iattrs___52 { 67 struct iattr ia_iattr; 68}; 69 70struct kernfs_node___52 { 71 union /* kernfs_node_id */ { 72 struct { 73 u32 ino; 74 u32 generation; 75 }; 76 u64 id; 77 } id; 78}; 79 80struct { 81 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 82 __uint(max_entries, 1); 83 __type(key, u32); 84 __type(value, union any_profiler_data_t); 85} data_heap SEC(".maps"); 86 87struct { 88 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 89 __uint(key_size, sizeof(int)); 90 __uint(value_size, sizeof(int)); 91} events SEC(".maps"); 92 93struct { 94 __uint(type, BPF_MAP_TYPE_HASH); 95 __uint(max_entries, KILL_DATA_ARRAY_SIZE); 96 __type(key, u32); 97 __type(value, struct var_kill_data_arr_t); 98} var_tpid_to_data SEC(".maps"); 99 100struct { 101 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 102 __uint(max_entries, profiler_bpf_max_function_id); 103 __type(key, u32); 104 __type(value, struct bpf_func_stats_data); 105} bpf_func_stats SEC(".maps"); 106 107struct { 108 __uint(type, BPF_MAP_TYPE_HASH); 109 __type(key, u32); 110 __type(value, bool); 111 __uint(max_entries, 16); 112} allowed_devices SEC(".maps"); 113 114struct { 115 __uint(type, BPF_MAP_TYPE_HASH); 116 __type(key, u64); 117 __type(value, bool); 118 __uint(max_entries, 1024); 119} allowed_file_inodes SEC(".maps"); 120 121struct { 122 __uint(type, BPF_MAP_TYPE_HASH); 123 __type(key, u64); 124 __type(value, bool); 125 __uint(max_entries, 1024); 126} allowed_directory_inodes SEC(".maps"); 127 128struct { 129 __uint(type, BPF_MAP_TYPE_HASH); 130 __type(key, u32); 131 __type(value, bool); 132 __uint(max_entries, 16); 133} disallowed_exec_inodes SEC(".maps"); 134 135#ifndef ARRAY_SIZE 136#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 137#endif 138 139static INLINE bool IS_ERR(const void* ptr) 140{ 141 return IS_ERR_VALUE((unsigned long)ptr); 142} 143 144static INLINE u32 get_userspace_pid() 145{ 146 return bpf_get_current_pid_tgid() >> 32; 147} 148 149static INLINE bool is_init_process(u32 tgid) 150{ 151 return tgid == 1 || tgid == 0; 152} 153 154static INLINE unsigned long 155probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) 156{ 157 len = len < max ? len : max; 158 if (len > 1) { 159 if (bpf_probe_read(dst, len, src)) 160 return 0; 161 } else if (len == 1) { 162 if (bpf_probe_read(dst, 1, src)) 163 return 0; 164 } 165 return len; 166} 167 168static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, 169 int spid) 170{ 171#ifdef UNROLL 172#pragma unroll 173#endif 174 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 175 if (arr_struct->array[i].meta.pid == spid) 176 return i; 177 return -1; 178} 179 180static INLINE void populate_ancestors(struct task_struct* task, 181 struct ancestors_data_t* ancestors_data) 182{ 183 struct task_struct* parent = task; 184 u32 num_ancestors, ppid; 185 186 ancestors_data->num_ancestors = 0; 187#ifdef UNROLL 188#pragma unroll 189#endif 190 for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { 191 parent = BPF_CORE_READ(parent, real_parent); 192 if (parent == NULL) 193 break; 194 ppid = BPF_CORE_READ(parent, tgid); 195 if (is_init_process(ppid)) 196 break; 197 ancestors_data->ancestor_pids[num_ancestors] = ppid; 198 ancestors_data->ancestor_exec_ids[num_ancestors] = 199 BPF_CORE_READ(parent, self_exec_id); 200 ancestors_data->ancestor_start_times[num_ancestors] = 201 BPF_CORE_READ(parent, start_time); 202 ancestors_data->num_ancestors = num_ancestors; 203 } 204} 205 206static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, 207 struct kernfs_node* cgroup_root_node, 208 void* payload, 209 int* root_pos) 210{ 211 void* payload_start = payload; 212 size_t filepart_length; 213 214#ifdef UNROLL 215#pragma unroll 216#endif 217 for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { 218 filepart_length = 219 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name)); 220 if (!cgroup_node) 221 return payload; 222 if (cgroup_node == cgroup_root_node) 223 *root_pos = payload - payload_start; 224 if (filepart_length <= MAX_PATH) { 225 barrier_var(filepart_length); 226 payload += filepart_length; 227 } 228 cgroup_node = BPF_CORE_READ(cgroup_node, parent); 229 } 230 return payload; 231} 232 233static ino_t get_inode_from_kernfs(struct kernfs_node* node) 234{ 235 struct kernfs_node___52* node52 = (void*)node; 236 237 if (bpf_core_field_exists(node52->id.ino)) { 238 barrier_var(node52); 239 return BPF_CORE_READ(node52, id.ino); 240 } else { 241 barrier_var(node); 242 return (u64)BPF_CORE_READ(node, id); 243 } 244} 245 246extern bool CONFIG_CGROUP_PIDS __kconfig __weak; 247enum cgroup_subsys_id___local { 248 pids_cgrp_id___local = 123, /* value doesn't matter */ 249}; 250 251static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, 252 struct task_struct* task, 253 void* payload) 254{ 255 struct kernfs_node* root_kernfs = 256 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); 257 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 258 259#if __has_builtin(__builtin_preserve_enum_value) 260 if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { 261 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, 262 pids_cgrp_id___local); 263#ifdef UNROLL 264#pragma unroll 265#endif 266 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 267 struct cgroup_subsys_state* subsys = 268 BPF_CORE_READ(task, cgroups, subsys[i]); 269 if (subsys != NULL) { 270 int subsys_id = BPF_CORE_READ(subsys, ss, id); 271 if (subsys_id == cgrp_id) { 272 proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); 273 root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); 274 break; 275 } 276 } 277 } 278 } 279#endif 280 281 cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); 282 cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); 283 284 if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { 285 cgroup_data->cgroup_root_mtime = 286 BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); 287 cgroup_data->cgroup_proc_mtime = 288 BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); 289 } else { 290 struct kernfs_iattrs___52* root_iattr = 291 (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); 292 cgroup_data->cgroup_root_mtime = 293 BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); 294 295 struct kernfs_iattrs___52* proc_iattr = 296 (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); 297 cgroup_data->cgroup_proc_mtime = 298 BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); 299 } 300 301 cgroup_data->cgroup_root_length = 0; 302 cgroup_data->cgroup_proc_length = 0; 303 cgroup_data->cgroup_full_length = 0; 304 305 size_t cgroup_root_length = 306 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); 307 barrier_var(cgroup_root_length); 308 if (cgroup_root_length <= MAX_PATH) { 309 barrier_var(cgroup_root_length); 310 cgroup_data->cgroup_root_length = cgroup_root_length; 311 payload += cgroup_root_length; 312 } 313 314 size_t cgroup_proc_length = 315 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); 316 barrier_var(cgroup_proc_length); 317 if (cgroup_proc_length <= MAX_PATH) { 318 barrier_var(cgroup_proc_length); 319 cgroup_data->cgroup_proc_length = cgroup_proc_length; 320 payload += cgroup_proc_length; 321 } 322 323 if (FETCH_CGROUPS_FROM_BPF) { 324 cgroup_data->cgroup_full_path_root_pos = -1; 325 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, 326 &cgroup_data->cgroup_full_path_root_pos); 327 cgroup_data->cgroup_full_length = payload_end_pos - payload; 328 payload = payload_end_pos; 329 } 330 331 return (void*)payload; 332} 333 334static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, 335 struct task_struct* task, 336 u32 pid, void* payload) 337{ 338 u64 uid_gid = bpf_get_current_uid_gid(); 339 340 metadata->uid = (u32)uid_gid; 341 metadata->gid = uid_gid >> 32; 342 metadata->pid = pid; 343 metadata->exec_id = BPF_CORE_READ(task, self_exec_id); 344 metadata->start_time = BPF_CORE_READ(task, start_time); 345 metadata->comm_length = 0; 346 347 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 348 barrier_var(comm_length); 349 if (comm_length <= TASK_COMM_LEN) { 350 barrier_var(comm_length); 351 metadata->comm_length = comm_length; 352 payload += comm_length; 353 } 354 355 return (void*)payload; 356} 357 358static INLINE struct var_kill_data_t* 359get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) 360{ 361 int zero = 0; 362 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 363 364 if (kill_data == NULL) 365 return NULL; 366 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 367 368 void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); 369 payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); 370 size_t payload_length = payload - (void*)kill_data->payload; 371 kill_data->payload_length = payload_length; 372 populate_ancestors(task, &kill_data->ancestors_info); 373 kill_data->meta.type = KILL_EVENT; 374 kill_data->kill_target_pid = tpid; 375 kill_data->kill_sig = sig; 376 kill_data->kill_count = 1; 377 kill_data->last_kill_time = bpf_ktime_get_ns(); 378 return kill_data; 379} 380 381static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) 382{ 383 if ((KILL_SIGNALS & (1ULL << sig)) == 0) 384 return 0; 385 386 u32 spid = get_userspace_pid(); 387 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 388 389 if (arr_struct == NULL) { 390 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); 391 int zero = 0; 392 393 if (kill_data == NULL) 394 return 0; 395 arr_struct = bpf_map_lookup_elem(&data_heap, &zero); 396 if (arr_struct == NULL) 397 return 0; 398 bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data); 399 } else { 400 int index = get_var_spid_index(arr_struct, spid); 401 402 if (index == -1) { 403 struct var_kill_data_t* kill_data = 404 get_var_kill_data(ctx, spid, tpid, sig); 405 if (kill_data == NULL) 406 return 0; 407#ifdef UNROLL 408#pragma unroll 409#endif 410 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 411 if (arr_struct->array[i].meta.pid == 0) { 412 bpf_probe_read(&arr_struct->array[i], 413 sizeof(arr_struct->array[i]), kill_data); 414 bpf_map_update_elem(&var_tpid_to_data, &tpid, 415 arr_struct, 0); 416 417 return 0; 418 } 419 return 0; 420 } 421 422 struct var_kill_data_t* kill_data = &arr_struct->array[index]; 423 424 u64 delta_sec = 425 (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; 426 427 if (delta_sec < STALE_INFO) { 428 kill_data->kill_count++; 429 kill_data->last_kill_time = bpf_ktime_get_ns(); 430 bpf_probe_read(&arr_struct->array[index], 431 sizeof(arr_struct->array[index]), 432 kill_data); 433 } else { 434 struct var_kill_data_t* kill_data = 435 get_var_kill_data(ctx, spid, tpid, sig); 436 if (kill_data == NULL) 437 return 0; 438 bpf_probe_read(&arr_struct->array[index], 439 sizeof(arr_struct->array[index]), 440 kill_data); 441 } 442 } 443 bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); 444 return 0; 445} 446 447static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, 448 enum bpf_function_id func_id) 449{ 450 int func_id_key = func_id; 451 452 bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); 453 bpf_stat_ctx->bpf_func_stats_data_val = 454 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); 455 if (bpf_stat_ctx->bpf_func_stats_data_val) 456 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; 457} 458 459static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) 460{ 461 if (bpf_stat_ctx->bpf_func_stats_data_val) 462 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += 463 bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; 464} 465 466static INLINE void 467bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, 468 struct var_metadata_t* meta) 469{ 470 if (bpf_stat_ctx->bpf_func_stats_data_val) { 471 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; 472 meta->bpf_stats_num_perf_events = 473 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; 474 } 475 meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; 476 meta->cpu_id = bpf_get_smp_processor_id(); 477} 478 479static INLINE size_t 480read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) 481{ 482 size_t length = 0; 483 size_t filepart_length; 484 struct dentry* parent_dentry; 485 486#ifdef UNROLL 487#pragma unroll 488#endif 489 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 490 filepart_length = bpf_probe_read_str(payload, MAX_PATH, 491 BPF_CORE_READ(filp_dentry, d_name.name)); 492 barrier_var(filepart_length); 493 if (filepart_length > MAX_PATH) 494 break; 495 barrier_var(filepart_length); 496 payload += filepart_length; 497 length += filepart_length; 498 499 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 500 if (filp_dentry == parent_dentry) 501 break; 502 filp_dentry = parent_dentry; 503 } 504 505 return length; 506} 507 508static INLINE bool 509is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) 510{ 511 struct dentry* parent_dentry; 512#ifdef UNROLL 513#pragma unroll 514#endif 515 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 516 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); 517 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); 518 519 if (allowed_dir != NULL) 520 return true; 521 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 522 if (filp_dentry == parent_dentry) 523 break; 524 filp_dentry = parent_dentry; 525 } 526 return false; 527} 528 529static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, 530 u32* device_id, 531 u64* file_ino) 532{ 533 u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); 534 *device_id = dev_id; 535 bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); 536 537 if (allowed_device == NULL) 538 return false; 539 540 u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); 541 *file_ino = ino; 542 bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); 543 544 if (allowed_file == NULL) 545 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) 546 return false; 547 return true; 548} 549 550SEC("kprobe/proc_sys_write") 551ssize_t BPF_KPROBE(kprobe__proc_sys_write, 552 struct file* filp, const char* buf, 553 size_t count, loff_t* ppos) 554{ 555 struct bpf_func_stats_ctx stats_ctx; 556 bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); 557 558 u32 pid = get_userspace_pid(); 559 int zero = 0; 560 struct var_sysctl_data_t* sysctl_data = 561 bpf_map_lookup_elem(&data_heap, &zero); 562 if (!sysctl_data) 563 goto out; 564 565 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 566 sysctl_data->meta.type = SYSCTL_EVENT; 567 void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); 568 payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); 569 570 populate_ancestors(task, &sysctl_data->ancestors_info); 571 572 sysctl_data->sysctl_val_length = 0; 573 sysctl_data->sysctl_path_length = 0; 574 575 size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf); 576 barrier_var(sysctl_val_length); 577 if (sysctl_val_length <= CTL_MAXNAME) { 578 barrier_var(sysctl_val_length); 579 sysctl_data->sysctl_val_length = sysctl_val_length; 580 payload += sysctl_val_length; 581 } 582 583 size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH, 584 BPF_CORE_READ(filp, f_path.dentry, d_name.name)); 585 barrier_var(sysctl_path_length); 586 if (sysctl_path_length <= MAX_PATH) { 587 barrier_var(sysctl_path_length); 588 sysctl_data->sysctl_path_length = sysctl_path_length; 589 payload += sysctl_path_length; 590 } 591 592 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); 593 unsigned long data_len = payload - (void*)sysctl_data; 594 data_len = data_len > sizeof(struct var_sysctl_data_t) 595 ? sizeof(struct var_sysctl_data_t) 596 : data_len; 597 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); 598out: 599 bpf_stats_exit(&stats_ctx); 600 return 0; 601} 602 603SEC("tracepoint/syscalls/sys_enter_kill") 604int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) 605{ 606 struct bpf_func_stats_ctx stats_ctx; 607 608 bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); 609 int pid = ctx->args[0]; 610 int sig = ctx->args[1]; 611 int ret = trace_var_sys_kill(ctx, pid, sig); 612 bpf_stats_exit(&stats_ctx); 613 return ret; 614}; 615 616SEC("raw_tracepoint/sched_process_exit") 617int raw_tracepoint__sched_process_exit(void* ctx) 618{ 619 int zero = 0; 620 struct bpf_func_stats_ctx stats_ctx; 621 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); 622 623 u32 tpid = get_userspace_pid(); 624 625 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 626 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 627 628 if (arr_struct == NULL || kill_data == NULL) 629 goto out; 630 631 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 632 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 633 634#ifdef UNROLL 635#pragma unroll 636#endif 637 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { 638 struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; 639 640 if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { 641 bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data); 642 void* payload = kill_data->payload; 643 size_t offset = kill_data->payload_length; 644 if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) 645 return 0; 646 payload += offset; 647 648 kill_data->kill_target_name_length = 0; 649 kill_data->kill_target_cgroup_proc_length = 0; 650 651 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 652 barrier_var(comm_length); 653 if (comm_length <= TASK_COMM_LEN) { 654 barrier_var(comm_length); 655 kill_data->kill_target_name_length = comm_length; 656 payload += comm_length; 657 } 658 659 size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN, 660 BPF_CORE_READ(proc_kernfs, name)); 661 barrier_var(cgroup_proc_length); 662 if (cgroup_proc_length <= KILL_TARGET_LEN) { 663 barrier_var(cgroup_proc_length); 664 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; 665 payload += cgroup_proc_length; 666 } 667 668 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); 669 unsigned long data_len = (void*)payload - (void*)kill_data; 670 data_len = data_len > sizeof(struct var_kill_data_t) 671 ? sizeof(struct var_kill_data_t) 672 : data_len; 673 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); 674 } 675 } 676 bpf_map_delete_elem(&var_tpid_to_data, &tpid); 677out: 678 bpf_stats_exit(&stats_ctx); 679 return 0; 680} 681 682SEC("raw_tracepoint/sched_process_exec") 683int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) 684{ 685 struct bpf_func_stats_ctx stats_ctx; 686 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); 687 688 struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; 689 u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); 690 691 bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); 692 if (should_filter_binprm != NULL) 693 goto out; 694 695 int zero = 0; 696 struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); 697 if (!proc_exec_data) 698 goto out; 699 700 if (INODE_FILTER && inode != INODE_FILTER) 701 return 0; 702 703 u32 pid = get_userspace_pid(); 704 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 705 706 proc_exec_data->meta.type = EXEC_EVENT; 707 proc_exec_data->bin_path_length = 0; 708 proc_exec_data->cmdline_length = 0; 709 proc_exec_data->environment_length = 0; 710 void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, 711 proc_exec_data->payload); 712 payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); 713 714 struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); 715 proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); 716 proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); 717 proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); 718 proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); 719 720 const char* filename = BPF_CORE_READ(bprm, filename); 721 size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename); 722 barrier_var(bin_path_length); 723 if (bin_path_length <= MAX_FILENAME_LEN) { 724 barrier_var(bin_path_length); 725 proc_exec_data->bin_path_length = bin_path_length; 726 payload += bin_path_length; 727 } 728 729 void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); 730 void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); 731 unsigned int cmdline_length = probe_read_lim(payload, arg_start, 732 arg_end - arg_start, MAX_ARGS_LEN); 733 734 if (cmdline_length <= MAX_ARGS_LEN) { 735 barrier_var(cmdline_length); 736 proc_exec_data->cmdline_length = cmdline_length; 737 payload += cmdline_length; 738 } 739 740 if (READ_ENVIRON_FROM_EXEC) { 741 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); 742 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); 743 unsigned long env_len = probe_read_lim(payload, env_start, 744 env_end - env_start, MAX_ENVIRON_LEN); 745 if (cmdline_length <= MAX_ENVIRON_LEN) { 746 proc_exec_data->environment_length = env_len; 747 payload += env_len; 748 } 749 } 750 751 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); 752 unsigned long data_len = payload - (void*)proc_exec_data; 753 data_len = data_len > sizeof(struct var_exec_data_t) 754 ? sizeof(struct var_exec_data_t) 755 : data_len; 756 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); 757out: 758 bpf_stats_exit(&stats_ctx); 759 return 0; 760} 761 762SEC("kretprobe/do_filp_open") 763int kprobe_ret__do_filp_open(struct pt_regs* ctx) 764{ 765 struct bpf_func_stats_ctx stats_ctx; 766 bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); 767 768 struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); 769 770 if (filp == NULL || IS_ERR(filp)) 771 goto out; 772 unsigned int flags = BPF_CORE_READ(filp, f_flags); 773 if ((flags & (O_RDWR | O_WRONLY)) == 0) 774 goto out; 775 if ((flags & O_TMPFILE) > 0) 776 goto out; 777 struct inode* file_inode = BPF_CORE_READ(filp, f_inode); 778 umode_t mode = BPF_CORE_READ(file_inode, i_mode); 779 if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || 780 S_ISSOCK(mode)) 781 goto out; 782 783 struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); 784 u32 device_id = 0; 785 u64 file_ino = 0; 786 if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) 787 goto out; 788 789 int zero = 0; 790 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 791 if (!filemod_data) 792 goto out; 793 794 u32 pid = get_userspace_pid(); 795 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 796 797 filemod_data->meta.type = FILEMOD_EVENT; 798 filemod_data->fmod_type = FMOD_OPEN; 799 filemod_data->dst_flags = flags; 800 filemod_data->src_inode = 0; 801 filemod_data->dst_inode = file_ino; 802 filemod_data->src_device_id = 0; 803 filemod_data->dst_device_id = device_id; 804 filemod_data->src_filepath_length = 0; 805 filemod_data->dst_filepath_length = 0; 806 807 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 808 filemod_data->payload); 809 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 810 811 size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); 812 barrier_var(len); 813 if (len <= MAX_FILEPATH_LENGTH) { 814 barrier_var(len); 815 payload += len; 816 filemod_data->dst_filepath_length = len; 817 } 818 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 819 unsigned long data_len = payload - (void*)filemod_data; 820 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 821 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 822out: 823 bpf_stats_exit(&stats_ctx); 824 return 0; 825} 826 827SEC("kprobe/vfs_link") 828int BPF_KPROBE(kprobe__vfs_link, 829 struct dentry* old_dentry, struct user_namespace *mnt_userns, 830 struct inode* dir, struct dentry* new_dentry, 831 struct inode** delegated_inode) 832{ 833 struct bpf_func_stats_ctx stats_ctx; 834 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); 835 836 u32 src_device_id = 0; 837 u64 src_file_ino = 0; 838 u32 dst_device_id = 0; 839 u64 dst_file_ino = 0; 840 if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && 841 !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) 842 goto out; 843 844 int zero = 0; 845 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 846 if (!filemod_data) 847 goto out; 848 849 u32 pid = get_userspace_pid(); 850 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 851 852 filemod_data->meta.type = FILEMOD_EVENT; 853 filemod_data->fmod_type = FMOD_LINK; 854 filemod_data->dst_flags = 0; 855 filemod_data->src_inode = src_file_ino; 856 filemod_data->dst_inode = dst_file_ino; 857 filemod_data->src_device_id = src_device_id; 858 filemod_data->dst_device_id = dst_device_id; 859 filemod_data->src_filepath_length = 0; 860 filemod_data->dst_filepath_length = 0; 861 862 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 863 filemod_data->payload); 864 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 865 866 size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); 867 barrier_var(len); 868 if (len <= MAX_FILEPATH_LENGTH) { 869 barrier_var(len); 870 payload += len; 871 filemod_data->src_filepath_length = len; 872 } 873 874 len = read_absolute_file_path_from_dentry(new_dentry, payload); 875 barrier_var(len); 876 if (len <= MAX_FILEPATH_LENGTH) { 877 barrier_var(len); 878 payload += len; 879 filemod_data->dst_filepath_length = len; 880 } 881 882 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 883 unsigned long data_len = payload - (void*)filemod_data; 884 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 885 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 886out: 887 bpf_stats_exit(&stats_ctx); 888 return 0; 889} 890 891SEC("kprobe/vfs_symlink") 892int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, 893 const char* oldname) 894{ 895 struct bpf_func_stats_ctx stats_ctx; 896 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); 897 898 u32 dst_device_id = 0; 899 u64 dst_file_ino = 0; 900 if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) 901 goto out; 902 903 int zero = 0; 904 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 905 if (!filemod_data) 906 goto out; 907 908 u32 pid = get_userspace_pid(); 909 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 910 911 filemod_data->meta.type = FILEMOD_EVENT; 912 filemod_data->fmod_type = FMOD_SYMLINK; 913 filemod_data->dst_flags = 0; 914 filemod_data->src_inode = 0; 915 filemod_data->dst_inode = dst_file_ino; 916 filemod_data->src_device_id = 0; 917 filemod_data->dst_device_id = dst_device_id; 918 filemod_data->src_filepath_length = 0; 919 filemod_data->dst_filepath_length = 0; 920 921 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 922 filemod_data->payload); 923 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 924 925 size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname); 926 barrier_var(len); 927 if (len <= MAX_FILEPATH_LENGTH) { 928 barrier_var(len); 929 payload += len; 930 filemod_data->src_filepath_length = len; 931 } 932 len = read_absolute_file_path_from_dentry(dentry, payload); 933 barrier_var(len); 934 if (len <= MAX_FILEPATH_LENGTH) { 935 barrier_var(len); 936 payload += len; 937 filemod_data->dst_filepath_length = len; 938 } 939 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 940 unsigned long data_len = payload - (void*)filemod_data; 941 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 942 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 943out: 944 bpf_stats_exit(&stats_ctx); 945 return 0; 946} 947 948SEC("raw_tracepoint/sched_process_fork") 949int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) 950{ 951 struct bpf_func_stats_ctx stats_ctx; 952 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); 953 954 int zero = 0; 955 struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); 956 if (!fork_data) 957 goto out; 958 959 struct task_struct* parent = (struct task_struct*)ctx->args[0]; 960 struct task_struct* child = (struct task_struct*)ctx->args[1]; 961 fork_data->meta.type = FORK_EVENT; 962 963 void* payload = populate_var_metadata(&fork_data->meta, child, 964 BPF_CORE_READ(child, pid), fork_data->payload); 965 fork_data->parent_pid = BPF_CORE_READ(parent, pid); 966 fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); 967 fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); 968 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); 969 970 unsigned long data_len = payload - (void*)fork_data; 971 data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; 972 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); 973out: 974 bpf_stats_exit(&stats_ctx); 975 return 0; 976} 977char _license[] SEC("license") = "GPL";