offwaketime_kern.c (3940B)
1/* Copyright (c) 2016 Facebook 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 */ 7#include <uapi/linux/bpf.h> 8#include <uapi/linux/ptrace.h> 9#include <uapi/linux/perf_event.h> 10#include <linux/version.h> 11#include <linux/sched.h> 12#include <bpf/bpf_helpers.h> 13#include <bpf/bpf_tracing.h> 14 15#define _(P) \ 16 ({ \ 17 typeof(P) val; \ 18 bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 19 val; \ 20 }) 21 22#define MINBLOCK_US 1 23#define MAX_ENTRIES 10000 24 25struct key_t { 26 char waker[TASK_COMM_LEN]; 27 char target[TASK_COMM_LEN]; 28 u32 wret; 29 u32 tret; 30}; 31 32struct { 33 __uint(type, BPF_MAP_TYPE_HASH); 34 __type(key, struct key_t); 35 __type(value, u64); 36 __uint(max_entries, MAX_ENTRIES); 37} counts SEC(".maps"); 38 39struct { 40 __uint(type, BPF_MAP_TYPE_HASH); 41 __type(key, u32); 42 __type(value, u64); 43 __uint(max_entries, MAX_ENTRIES); 44} start SEC(".maps"); 45 46struct wokeby_t { 47 char name[TASK_COMM_LEN]; 48 u32 ret; 49}; 50 51struct { 52 __uint(type, BPF_MAP_TYPE_HASH); 53 __type(key, u32); 54 __type(value, struct wokeby_t); 55 __uint(max_entries, MAX_ENTRIES); 56} wokeby SEC(".maps"); 57 58struct { 59 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 60 __uint(key_size, sizeof(u32)); 61 __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); 62 __uint(max_entries, MAX_ENTRIES); 63} stackmap SEC(".maps"); 64 65#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) 66 67SEC("kprobe/try_to_wake_up") 68int waker(struct pt_regs *ctx) 69{ 70 struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 71 struct wokeby_t woke; 72 u32 pid; 73 74 pid = _(p->pid); 75 76 bpf_get_current_comm(&woke.name, sizeof(woke.name)); 77 woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); 78 79 bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY); 80 return 0; 81} 82 83static inline int update_counts(void *ctx, u32 pid, u64 delta) 84{ 85 struct wokeby_t *woke; 86 u64 zero = 0, *val; 87 struct key_t key; 88 89 __builtin_memset(&key.waker, 0, sizeof(key.waker)); 90 bpf_get_current_comm(&key.target, sizeof(key.target)); 91 key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); 92 key.wret = 0; 93 94 woke = bpf_map_lookup_elem(&wokeby, &pid); 95 if (woke) { 96 key.wret = woke->ret; 97 __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker)); 98 bpf_map_delete_elem(&wokeby, &pid); 99 } 100 101 val = bpf_map_lookup_elem(&counts, &key); 102 if (!val) { 103 bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST); 104 val = bpf_map_lookup_elem(&counts, &key); 105 if (!val) 106 return 0; 107 } 108 (*val) += delta; 109 return 0; 110} 111 112#if 1 113/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ 114struct sched_switch_args { 115 unsigned long long pad; 116 char prev_comm[TASK_COMM_LEN]; 117 int prev_pid; 118 int prev_prio; 119 long long prev_state; 120 char next_comm[TASK_COMM_LEN]; 121 int next_pid; 122 int next_prio; 123}; 124SEC("tracepoint/sched/sched_switch") 125int oncpu(struct sched_switch_args *ctx) 126{ 127 /* record previous thread sleep time */ 128 u32 pid = ctx->prev_pid; 129#else 130SEC("kprobe/finish_task_switch") 131int oncpu(struct pt_regs *ctx) 132{ 133 struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 134 /* record previous thread sleep time */ 135 u32 pid = _(p->pid); 136#endif 137 u64 delta, ts, *tsp; 138 139 ts = bpf_ktime_get_ns(); 140 bpf_map_update_elem(&start, &pid, &ts, BPF_ANY); 141 142 /* calculate current thread's delta time */ 143 pid = bpf_get_current_pid_tgid(); 144 tsp = bpf_map_lookup_elem(&start, &pid); 145 if (!tsp) 146 /* missed start or filtered */ 147 return 0; 148 149 delta = bpf_ktime_get_ns() - *tsp; 150 bpf_map_delete_elem(&start, &pid); 151 delta = delta / 1000; 152 if (delta < MINBLOCK_US) 153 return 0; 154 155 return update_counts(ctx, pid, delta); 156} 157char _license[] SEC("license") = "GPL"; 158u32 _version SEC("version") = LINUX_VERSION_CODE;