augmented_raw_syscalls.c (9228B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. 4 * 5 * Test it with: 6 * 7 * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null 8 * 9 * This exactly matches what is marshalled into the raw_syscall:sys_enter 10 * payload expected by the 'perf trace' beautifiers. 11 * 12 * For now it just uses the existing tracepoint augmentation code in 'perf 13 * trace', in the next csets we'll hook up these with the sys_enter/sys_exit 14 * code that will combine entry/exit in a strace like way. 15 */ 16 17#include <unistd.h> 18#include <linux/limits.h> 19#include <linux/socket.h> 20#include <pid_filter.h> 21 22/* bpf-output associated map */ 23bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); 24 25/* 26 * string_args_len: one per syscall arg, 0 means not a string or don't copy it, 27 * PATH_MAX for copying everything, any other value to limit 28 * it a la 'strace -s strsize'. 29 */ 30struct syscall { 31 bool enabled; 32 u16 string_args_len[6]; 33}; 34 35bpf_map(syscalls, ARRAY, int, struct syscall, 512); 36 37/* 38 * What to augment at entry? 39 * 40 * Pointer arg payloads (filenames, etc) passed from userspace to the kernel 41 */ 42bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512); 43 44/* 45 * What to augment at exit? 46 * 47 * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. 48 */ 49bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512); 50 51struct syscall_enter_args { 52 unsigned long long common_tp_fields; 53 long syscall_nr; 54 unsigned long args[6]; 55}; 56 57struct syscall_exit_args { 58 unsigned long long common_tp_fields; 59 long syscall_nr; 60 long ret; 61}; 62 63struct augmented_arg { 64 unsigned int size; 65 int err; 66 char value[PATH_MAX]; 67}; 68 69pid_filter(pids_filtered); 70 71struct augmented_args_payload { 72 struct syscall_enter_args args; 73 union { 74 struct { 75 struct augmented_arg arg, arg2; 76 }; 77 struct sockaddr_storage saddr; 78 }; 79}; 80 81// We need more tmp space than the BPF stack can give us 82bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1); 83 84static inline struct augmented_args_payload *augmented_args_payload(void) 85{ 86 int key = 0; 87 return bpf_map_lookup_elem(&augmented_args_tmp, &key); 88} 89 90static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) 91{ 92 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ 93 return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); 94} 95 96static inline 97unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) 98{ 99 unsigned int augmented_len = sizeof(*augmented_arg); 100 int string_len = probe_read_str(&augmented_arg->value, arg_len, arg); 101 102 augmented_arg->size = augmented_arg->err = 0; 103 /* 104 * probe_read_str may return < 0, e.g. -EFAULT 105 * So we leave that in the augmented_arg->size that userspace will 106 */ 107 if (string_len > 0) { 108 augmented_len -= sizeof(augmented_arg->value) - string_len; 109 augmented_len &= sizeof(augmented_arg->value) - 1; 110 augmented_arg->size = string_len; 111 } else { 112 /* 113 * So that username notice the error while still being able 114 * to skip this augmented arg record 115 */ 116 augmented_arg->err = string_len; 117 augmented_len = offsetof(struct augmented_arg, value); 118 } 119 120 return augmented_len; 121} 122 123SEC("!raw_syscalls:unaugmented") 124int syscall_unaugmented(struct syscall_enter_args *args) 125{ 126 return 1; 127} 128 129/* 130 * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in 131 * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go 132 * on from there, reading the first syscall arg as a string, i.e. open's 133 * filename. 134 */ 135SEC("!syscalls:sys_enter_connect") 136int sys_enter_connect(struct syscall_enter_args *args) 137{ 138 struct augmented_args_payload *augmented_args = augmented_args_payload(); 139 const void *sockaddr_arg = (const void *)args->args[1]; 140 unsigned int socklen = args->args[2]; 141 unsigned int len = sizeof(augmented_args->args); 142 143 if (augmented_args == NULL) 144 return 1; /* Failure: don't filter */ 145 146 if (socklen > sizeof(augmented_args->saddr)) 147 socklen = sizeof(augmented_args->saddr); 148 149 probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 150 151 return augmented__output(args, augmented_args, len + socklen); 152} 153 154SEC("!syscalls:sys_enter_sendto") 155int sys_enter_sendto(struct syscall_enter_args *args) 156{ 157 struct augmented_args_payload *augmented_args = augmented_args_payload(); 158 const void *sockaddr_arg = (const void *)args->args[4]; 159 unsigned int socklen = args->args[5]; 160 unsigned int len = sizeof(augmented_args->args); 161 162 if (augmented_args == NULL) 163 return 1; /* Failure: don't filter */ 164 165 if (socklen > sizeof(augmented_args->saddr)) 166 socklen = sizeof(augmented_args->saddr); 167 168 probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 169 170 return augmented__output(args, augmented_args, len + socklen); 171} 172 173SEC("!syscalls:sys_enter_open") 174int sys_enter_open(struct syscall_enter_args *args) 175{ 176 struct augmented_args_payload *augmented_args = augmented_args_payload(); 177 const void *filename_arg = (const void *)args->args[0]; 178 unsigned int len = sizeof(augmented_args->args); 179 180 if (augmented_args == NULL) 181 return 1; /* Failure: don't filter */ 182 183 len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); 184 185 return augmented__output(args, augmented_args, len); 186} 187 188SEC("!syscalls:sys_enter_openat") 189int sys_enter_openat(struct syscall_enter_args *args) 190{ 191 struct augmented_args_payload *augmented_args = augmented_args_payload(); 192 const void *filename_arg = (const void *)args->args[1]; 193 unsigned int len = sizeof(augmented_args->args); 194 195 if (augmented_args == NULL) 196 return 1; /* Failure: don't filter */ 197 198 len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); 199 200 return augmented__output(args, augmented_args, len); 201} 202 203SEC("!syscalls:sys_enter_rename") 204int sys_enter_rename(struct syscall_enter_args *args) 205{ 206 struct augmented_args_payload *augmented_args = augmented_args_payload(); 207 const void *oldpath_arg = (const void *)args->args[0], 208 *newpath_arg = (const void *)args->args[1]; 209 unsigned int len = sizeof(augmented_args->args), oldpath_len; 210 211 if (augmented_args == NULL) 212 return 1; /* Failure: don't filter */ 213 214 oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); 215 len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); 216 217 return augmented__output(args, augmented_args, len); 218} 219 220SEC("!syscalls:sys_enter_renameat") 221int sys_enter_renameat(struct syscall_enter_args *args) 222{ 223 struct augmented_args_payload *augmented_args = augmented_args_payload(); 224 const void *oldpath_arg = (const void *)args->args[1], 225 *newpath_arg = (const void *)args->args[3]; 226 unsigned int len = sizeof(augmented_args->args), oldpath_len; 227 228 if (augmented_args == NULL) 229 return 1; /* Failure: don't filter */ 230 231 oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); 232 len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); 233 234 return augmented__output(args, augmented_args, len); 235} 236 237SEC("raw_syscalls:sys_enter") 238int sys_enter(struct syscall_enter_args *args) 239{ 240 struct augmented_args_payload *augmented_args; 241 /* 242 * We start len, the amount of data that will be in the perf ring 243 * buffer, if this is not filtered out by one of pid_filter__has(), 244 * syscall->enabled, etc, with the non-augmented raw syscall payload, 245 * i.e. sizeof(augmented_args->args). 246 * 247 * We'll add to this as we add augmented syscalls right after that 248 * initial, non-augmented raw_syscalls:sys_enter payload. 249 */ 250 unsigned int len = sizeof(augmented_args->args); 251 struct syscall *syscall; 252 253 if (pid_filter__has(&pids_filtered, getpid())) 254 return 0; 255 256 augmented_args = augmented_args_payload(); 257 if (augmented_args == NULL) 258 return 1; 259 260 probe_read(&augmented_args->args, sizeof(augmented_args->args), args); 261 262 /* 263 * Jump to syscall specific augmenter, even if the default one, 264 * "!raw_syscalls:unaugmented" that will just return 1 to return the 265 * unaugmented tracepoint payload. 266 */ 267 bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); 268 269 // If not found on the PROG_ARRAY syscalls map, then we're filtering it: 270 return 0; 271} 272 273SEC("raw_syscalls:sys_exit") 274int sys_exit(struct syscall_exit_args *args) 275{ 276 struct syscall_exit_args exit_args; 277 278 if (pid_filter__has(&pids_filtered, getpid())) 279 return 0; 280 281 probe_read(&exit_args, sizeof(exit_args), args); 282 /* 283 * Jump to syscall specific return augmenter, even if the default one, 284 * "!raw_syscalls:unaugmented" that will just return 1 to return the 285 * unaugmented tracepoint payload. 286 */ 287 bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); 288 /* 289 * If not found on the PROG_ARRAY syscalls map, then we're filtering it: 290 */ 291 return 0; 292} 293 294license(GPL);