pyperf.h (8509B)
1// SPDX-License-Identifier: GPL-2.0 2// Copyright (c) 2019 Facebook 3#include <linux/sched.h> 4#include <linux/ptrace.h> 5#include <stdint.h> 6#include <stddef.h> 7#include <stdbool.h> 8#include <linux/bpf.h> 9#include <bpf/bpf_helpers.h> 10 11#define FUNCTION_NAME_LEN 64 12#define FILE_NAME_LEN 128 13#define TASK_COMM_LEN 16 14 15typedef struct { 16 int PyThreadState_frame; 17 int PyThreadState_thread; 18 int PyFrameObject_back; 19 int PyFrameObject_code; 20 int PyFrameObject_lineno; 21 int PyCodeObject_filename; 22 int PyCodeObject_name; 23 int String_data; 24 int String_size; 25} OffsetConfig; 26 27typedef struct { 28 uintptr_t current_state_addr; 29 uintptr_t tls_key_addr; 30 OffsetConfig offsets; 31 bool use_tls; 32} PidData; 33 34typedef struct { 35 uint32_t success; 36} Stats; 37 38typedef struct { 39 char name[FUNCTION_NAME_LEN]; 40 char file[FILE_NAME_LEN]; 41} Symbol; 42 43typedef struct { 44 uint32_t pid; 45 uint32_t tid; 46 char comm[TASK_COMM_LEN]; 47 int32_t kernel_stack_id; 48 int32_t user_stack_id; 49 bool thread_current; 50 bool pthread_match; 51 bool stack_complete; 52 int16_t stack_len; 53 int32_t stack[STACK_MAX_LEN]; 54 55 int has_meta; 56 int metadata; 57 char dummy_safeguard; 58} Event; 59 60 61typedef int pid_t; 62 63typedef struct { 64 void* f_back; // PyFrameObject.f_back, previous frame 65 void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject 66 void* co_filename; // PyCodeObject.co_filename 67 void* co_name; // PyCodeObject.co_name 68} FrameData; 69 70#ifdef SUBPROGS 71__noinline 72#else 73__always_inline 74#endif 75static void *get_thread_state(void *tls_base, PidData *pidData) 76{ 77 void* thread_state; 78 int key; 79 80 bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); 81 bpf_probe_read_user(&thread_state, sizeof(thread_state), 82 tls_base + 0x310 + key * 0x10 + 0x08); 83 return thread_state; 84} 85 86static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, 87 FrameData *frame, Symbol *symbol) 88{ 89 // read data from PyFrameObject 90 bpf_probe_read_user(&frame->f_back, 91 sizeof(frame->f_back), 92 frame_ptr + pidData->offsets.PyFrameObject_back); 93 bpf_probe_read_user(&frame->f_code, 94 sizeof(frame->f_code), 95 frame_ptr + pidData->offsets.PyFrameObject_code); 96 97 // read data from PyCodeObject 98 if (!frame->f_code) 99 return false; 100 bpf_probe_read_user(&frame->co_filename, 101 sizeof(frame->co_filename), 102 frame->f_code + pidData->offsets.PyCodeObject_filename); 103 bpf_probe_read_user(&frame->co_name, 104 sizeof(frame->co_name), 105 frame->f_code + pidData->offsets.PyCodeObject_name); 106 // read actual names into symbol 107 if (frame->co_filename) 108 bpf_probe_read_user_str(&symbol->file, 109 sizeof(symbol->file), 110 frame->co_filename + 111 pidData->offsets.String_data); 112 if (frame->co_name) 113 bpf_probe_read_user_str(&symbol->name, 114 sizeof(symbol->name), 115 frame->co_name + 116 pidData->offsets.String_data); 117 return true; 118} 119 120struct { 121 __uint(type, BPF_MAP_TYPE_HASH); 122 __uint(max_entries, 1); 123 __type(key, int); 124 __type(value, PidData); 125} pidmap SEC(".maps"); 126 127struct { 128 __uint(type, BPF_MAP_TYPE_HASH); 129 __uint(max_entries, 1); 130 __type(key, int); 131 __type(value, Event); 132} eventmap SEC(".maps"); 133 134struct { 135 __uint(type, BPF_MAP_TYPE_HASH); 136 __uint(max_entries, 1); 137 __type(key, Symbol); 138 __type(value, int); 139} symbolmap SEC(".maps"); 140 141struct { 142 __uint(type, BPF_MAP_TYPE_ARRAY); 143 __uint(max_entries, 1); 144 __type(key, int); 145 __type(value, Stats); 146} statsmap SEC(".maps"); 147 148struct { 149 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 150 __uint(max_entries, 32); 151 __uint(key_size, sizeof(int)); 152 __uint(value_size, sizeof(int)); 153} perfmap SEC(".maps"); 154 155struct { 156 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 157 __uint(max_entries, 1000); 158 __uint(key_size, sizeof(int)); 159 __uint(value_size, sizeof(long long) * 127); 160} stackmap SEC(".maps"); 161 162#ifdef USE_BPF_LOOP 163struct process_frame_ctx { 164 int cur_cpu; 165 int32_t *symbol_counter; 166 void *frame_ptr; 167 FrameData *frame; 168 PidData *pidData; 169 Symbol *sym; 170 Event *event; 171 bool done; 172}; 173 174static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) 175{ 176 int zero = 0; 177 void *frame_ptr = ctx->frame_ptr; 178 PidData *pidData = ctx->pidData; 179 FrameData *frame = ctx->frame; 180 int32_t *symbol_counter = ctx->symbol_counter; 181 int cur_cpu = ctx->cur_cpu; 182 Event *event = ctx->event; 183 Symbol *sym = ctx->sym; 184 185 if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { 186 int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; 187 int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 188 189 if (!symbol_id) { 190 bpf_map_update_elem(&symbolmap, sym, &zero, 0); 191 symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 192 if (!symbol_id) { 193 ctx->done = true; 194 return 1; 195 } 196 } 197 if (*symbol_id == new_symbol_id) 198 (*symbol_counter)++; 199 200 barrier_var(i); 201 if (i >= STACK_MAX_LEN) 202 return 1; 203 204 event->stack[i] = *symbol_id; 205 206 event->stack_len = i + 1; 207 frame_ptr = frame->f_back; 208 } 209 return 0; 210} 211#endif /* USE_BPF_LOOP */ 212 213#ifdef GLOBAL_FUNC 214__noinline 215#elif defined(SUBPROGS) 216static __noinline 217#else 218static __always_inline 219#endif 220int __on_event(struct bpf_raw_tracepoint_args *ctx) 221{ 222 uint64_t pid_tgid = bpf_get_current_pid_tgid(); 223 pid_t pid = (pid_t)(pid_tgid >> 32); 224 PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); 225 if (!pidData) 226 return 0; 227 228 int zero = 0; 229 Event* event = bpf_map_lookup_elem(&eventmap, &zero); 230 if (!event) 231 return 0; 232 233 event->pid = pid; 234 235 event->tid = (pid_t)pid_tgid; 236 bpf_get_current_comm(&event->comm, sizeof(event->comm)); 237 238 event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); 239 event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); 240 241 void* thread_state_current = (void*)0; 242 bpf_probe_read_user(&thread_state_current, 243 sizeof(thread_state_current), 244 (void*)(long)pidData->current_state_addr); 245 246 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 247 void* tls_base = (void*)task; 248 249 void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) 250 : thread_state_current; 251 event->thread_current = thread_state == thread_state_current; 252 253 if (pidData->use_tls) { 254 uint64_t pthread_created; 255 uint64_t pthread_self; 256 bpf_probe_read_user(&pthread_self, sizeof(pthread_self), 257 tls_base + 0x10); 258 259 bpf_probe_read_user(&pthread_created, 260 sizeof(pthread_created), 261 thread_state + 262 pidData->offsets.PyThreadState_thread); 263 event->pthread_match = pthread_created == pthread_self; 264 } else { 265 event->pthread_match = 1; 266 } 267 268 if (event->pthread_match || !pidData->use_tls) { 269 void* frame_ptr; 270 FrameData frame; 271 Symbol sym = {}; 272 int cur_cpu = bpf_get_smp_processor_id(); 273 274 bpf_probe_read_user(&frame_ptr, 275 sizeof(frame_ptr), 276 thread_state + 277 pidData->offsets.PyThreadState_frame); 278 279 int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); 280 if (symbol_counter == NULL) 281 return 0; 282#ifdef USE_BPF_LOOP 283 struct process_frame_ctx ctx = { 284 .cur_cpu = cur_cpu, 285 .symbol_counter = symbol_counter, 286 .frame_ptr = frame_ptr, 287 .frame = &frame, 288 .pidData = pidData, 289 .sym = &sym, 290 .event = event, 291 }; 292 293 bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); 294 if (ctx.done) 295 return 0; 296#else 297#ifdef NO_UNROLL 298#pragma clang loop unroll(disable) 299#else 300#ifdef UNROLL_COUNT 301#pragma clang loop unroll_count(UNROLL_COUNT) 302#else 303#pragma clang loop unroll(full) 304#endif 305#endif /* NO_UNROLL */ 306 /* Unwind python stack */ 307 for (int i = 0; i < STACK_MAX_LEN; ++i) { 308 if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { 309 int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; 310 int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); 311 if (!symbol_id) { 312 bpf_map_update_elem(&symbolmap, &sym, &zero, 0); 313 symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); 314 if (!symbol_id) 315 return 0; 316 } 317 if (*symbol_id == new_symbol_id) 318 (*symbol_counter)++; 319 event->stack[i] = *symbol_id; 320 event->stack_len = i + 1; 321 frame_ptr = frame.f_back; 322 } 323 } 324#endif /* USE_BPF_LOOP */ 325 event->stack_complete = frame_ptr == NULL; 326 } else { 327 event->stack_complete = 1; 328 } 329 330 Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); 331 if (stats) 332 stats->success++; 333 334 event->has_meta = 0; 335 bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); 336 return 0; 337} 338 339SEC("raw_tracepoint/kfree_skb") 340int on_event(struct bpf_raw_tracepoint_args* ctx) 341{ 342 int i, ret = 0; 343 ret |= __on_event(ctx); 344 ret |= __on_event(ctx); 345 ret |= __on_event(ctx); 346 ret |= __on_event(ctx); 347 ret |= __on_event(ctx); 348 return ret; 349} 350 351char _license[] SEC("license") = "GPL";