stackdepot.c (13927B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Generic stack depot for storing stack traces. 4 * 5 * Some debugging tools need to save stack traces of certain events which can 6 * be later presented to the user. For example, KASAN needs to safe alloc and 7 * free stacks for each object, but storing two stack traces per object 8 * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for 9 * that). 10 * 11 * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc 12 * and free stacks repeat a lot, we save about 100x space. 13 * Stacks are never removed from depot, so we store them contiguously one after 14 * another in a contiguous memory allocation. 15 * 16 * Author: Alexander Potapenko <glider@google.com> 17 * Copyright (C) 2016 Google, Inc. 18 * 19 * Based on code by Dmitry Chernenkov. 20 */ 21 22#include <linux/gfp.h> 23#include <linux/jhash.h> 24#include <linux/kernel.h> 25#include <linux/mm.h> 26#include <linux/mutex.h> 27#include <linux/percpu.h> 28#include <linux/printk.h> 29#include <linux/slab.h> 30#include <linux/stacktrace.h> 31#include <linux/stackdepot.h> 32#include <linux/string.h> 33#include <linux/types.h> 34#include <linux/memblock.h> 35 36#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) 37 38#define STACK_ALLOC_NULL_PROTECTION_BITS 1 39#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ 40#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) 41#define STACK_ALLOC_ALIGN 4 42#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ 43 STACK_ALLOC_ALIGN) 44#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ 45 STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) 46#define STACK_ALLOC_SLABS_CAP 8192 47#define STACK_ALLOC_MAX_SLABS \ 48 (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ 49 (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) 50 51/* The compact structure to store the reference to stacks. */ 52union handle_parts { 53 depot_stack_handle_t handle; 54 struct { 55 u32 slabindex : STACK_ALLOC_INDEX_BITS; 56 u32 offset : STACK_ALLOC_OFFSET_BITS; 57 u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; 58 }; 59}; 60 61struct stack_record { 62 struct stack_record *next; /* Link in the hashtable */ 63 u32 hash; /* Hash in the hastable */ 64 u32 size; /* Number of frames in the stack */ 65 union handle_parts handle; 66 unsigned long entries[]; /* Variable-sized array of entries. */ 67}; 68 69static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 70static bool __stack_depot_early_init_passed __initdata; 71 72static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; 73 74static int depot_index; 75static int next_slab_inited; 76static size_t depot_offset; 77static DEFINE_RAW_SPINLOCK(depot_lock); 78 79static bool init_stack_slab(void **prealloc) 80{ 81 if (!*prealloc) 82 return false; 83 /* 84 * This smp_load_acquire() pairs with smp_store_release() to 85 * |next_slab_inited| below and in depot_alloc_stack(). 86 */ 87 if (smp_load_acquire(&next_slab_inited)) 88 return true; 89 if (stack_slabs[depot_index] == NULL) { 90 stack_slabs[depot_index] = *prealloc; 91 *prealloc = NULL; 92 } else { 93 /* If this is the last depot slab, do not touch the next one. */ 94 if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { 95 stack_slabs[depot_index + 1] = *prealloc; 96 *prealloc = NULL; 97 } 98 /* 99 * This smp_store_release pairs with smp_load_acquire() from 100 * |next_slab_inited| above and in stack_depot_save(). 101 */ 102 smp_store_release(&next_slab_inited, 1); 103 } 104 return true; 105} 106 107/* Allocation of a new stack in raw storage */ 108static struct stack_record * 109depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) 110{ 111 struct stack_record *stack; 112 size_t required_size = struct_size(stack, entries, size); 113 114 required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); 115 116 if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { 117 if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { 118 WARN_ONCE(1, "Stack depot reached limit capacity"); 119 return NULL; 120 } 121 depot_index++; 122 depot_offset = 0; 123 /* 124 * smp_store_release() here pairs with smp_load_acquire() from 125 * |next_slab_inited| in stack_depot_save() and 126 * init_stack_slab(). 127 */ 128 if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) 129 smp_store_release(&next_slab_inited, 0); 130 } 131 init_stack_slab(prealloc); 132 if (stack_slabs[depot_index] == NULL) 133 return NULL; 134 135 stack = stack_slabs[depot_index] + depot_offset; 136 137 stack->hash = hash; 138 stack->size = size; 139 stack->handle.slabindex = depot_index; 140 stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; 141 stack->handle.valid = 1; 142 memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); 143 depot_offset += required_size; 144 145 return stack; 146} 147 148#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER) 149#define STACK_HASH_MASK (STACK_HASH_SIZE - 1) 150#define STACK_HASH_SEED 0x9747b28c 151 152static bool stack_depot_disable; 153static struct stack_record **stack_table; 154 155static int __init is_stack_depot_disabled(char *str) 156{ 157 int ret; 158 159 ret = kstrtobool(str, &stack_depot_disable); 160 if (!ret && stack_depot_disable) { 161 pr_info("Stack Depot is disabled\n"); 162 stack_table = NULL; 163 } 164 return 0; 165} 166early_param("stack_depot_disable", is_stack_depot_disabled); 167 168void __init stack_depot_want_early_init(void) 169{ 170 /* Too late to request early init now */ 171 WARN_ON(__stack_depot_early_init_passed); 172 173 __stack_depot_want_early_init = true; 174} 175 176int __init stack_depot_early_init(void) 177{ 178 size_t size; 179 180 /* This is supposed to be called only once, from mm_init() */ 181 if (WARN_ON(__stack_depot_early_init_passed)) 182 return 0; 183 184 __stack_depot_early_init_passed = true; 185 186 if (!__stack_depot_want_early_init || stack_depot_disable) 187 return 0; 188 189 size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); 190 pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n", 191 size); 192 stack_table = memblock_alloc(size, SMP_CACHE_BYTES); 193 194 if (!stack_table) { 195 pr_err("Stack Depot hash table allocation failed, disabling\n"); 196 stack_depot_disable = true; 197 return -ENOMEM; 198 } 199 200 return 0; 201} 202 203int stack_depot_init(void) 204{ 205 static DEFINE_MUTEX(stack_depot_init_mutex); 206 int ret = 0; 207 208 mutex_lock(&stack_depot_init_mutex); 209 if (!stack_depot_disable && !stack_table) { 210 pr_info("Stack Depot allocating hash table with kvcalloc\n"); 211 stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL); 212 if (!stack_table) { 213 pr_err("Stack Depot hash table allocation failed, disabling\n"); 214 stack_depot_disable = true; 215 ret = -ENOMEM; 216 } 217 } 218 mutex_unlock(&stack_depot_init_mutex); 219 return ret; 220} 221EXPORT_SYMBOL_GPL(stack_depot_init); 222 223/* Calculate hash for a stack */ 224static inline u32 hash_stack(unsigned long *entries, unsigned int size) 225{ 226 return jhash2((u32 *)entries, 227 array_size(size, sizeof(*entries)) / sizeof(u32), 228 STACK_HASH_SEED); 229} 230 231/* Use our own, non-instrumented version of memcmp(). 232 * 233 * We actually don't care about the order, just the equality. 234 */ 235static inline 236int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 237 unsigned int n) 238{ 239 for ( ; n-- ; u1++, u2++) { 240 if (*u1 != *u2) 241 return 1; 242 } 243 return 0; 244} 245 246/* Find a stack that is equal to the one stored in entries in the hash */ 247static inline struct stack_record *find_stack(struct stack_record *bucket, 248 unsigned long *entries, int size, 249 u32 hash) 250{ 251 struct stack_record *found; 252 253 for (found = bucket; found; found = found->next) { 254 if (found->hash == hash && 255 found->size == size && 256 !stackdepot_memcmp(entries, found->entries, size)) 257 return found; 258 } 259 return NULL; 260} 261 262/** 263 * stack_depot_snprint - print stack entries from a depot into a buffer 264 * 265 * @handle: Stack depot handle which was returned from 266 * stack_depot_save(). 267 * @buf: Pointer to the print buffer 268 * 269 * @size: Size of the print buffer 270 * 271 * @spaces: Number of leading spaces to print 272 * 273 * Return: Number of bytes printed. 274 */ 275int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 276 int spaces) 277{ 278 unsigned long *entries; 279 unsigned int nr_entries; 280 281 nr_entries = stack_depot_fetch(handle, &entries); 282 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 283 spaces) : 0; 284} 285EXPORT_SYMBOL_GPL(stack_depot_snprint); 286 287/** 288 * stack_depot_print - print stack entries from a depot 289 * 290 * @stack: Stack depot handle which was returned from 291 * stack_depot_save(). 292 * 293 */ 294void stack_depot_print(depot_stack_handle_t stack) 295{ 296 unsigned long *entries; 297 unsigned int nr_entries; 298 299 nr_entries = stack_depot_fetch(stack, &entries); 300 if (nr_entries > 0) 301 stack_trace_print(entries, nr_entries, 0); 302} 303EXPORT_SYMBOL_GPL(stack_depot_print); 304 305/** 306 * stack_depot_fetch - Fetch stack entries from a depot 307 * 308 * @handle: Stack depot handle which was returned from 309 * stack_depot_save(). 310 * @entries: Pointer to store the entries address 311 * 312 * Return: The number of trace entries for this depot. 313 */ 314unsigned int stack_depot_fetch(depot_stack_handle_t handle, 315 unsigned long **entries) 316{ 317 union handle_parts parts = { .handle = handle }; 318 void *slab; 319 size_t offset = parts.offset << STACK_ALLOC_ALIGN; 320 struct stack_record *stack; 321 322 *entries = NULL; 323 if (!handle) 324 return 0; 325 326 if (parts.slabindex > depot_index) { 327 WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", 328 parts.slabindex, depot_index, handle); 329 return 0; 330 } 331 slab = stack_slabs[parts.slabindex]; 332 if (!slab) 333 return 0; 334 stack = slab + offset; 335 336 *entries = stack->entries; 337 return stack->size; 338} 339EXPORT_SYMBOL_GPL(stack_depot_fetch); 340 341/** 342 * __stack_depot_save - Save a stack trace from an array 343 * 344 * @entries: Pointer to storage array 345 * @nr_entries: Size of the storage array 346 * @alloc_flags: Allocation gfp flags 347 * @can_alloc: Allocate stack slabs (increased chance of failure if false) 348 * 349 * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is 350 * %true, is allowed to replenish the stack slab pool in case no space is left 351 * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids 352 * any allocations and will fail if no space is left to store the stack trace. 353 * 354 * If the stack trace in @entries is from an interrupt, only the portion up to 355 * interrupt entry is saved. 356 * 357 * Context: Any context, but setting @can_alloc to %false is required if 358 * alloc_pages() cannot be used from the current context. Currently 359 * this is the case from contexts where neither %GFP_ATOMIC nor 360 * %GFP_NOWAIT can be used (NMI, raw_spin_lock). 361 * 362 * Return: The handle of the stack struct stored in depot, 0 on failure. 363 */ 364depot_stack_handle_t __stack_depot_save(unsigned long *entries, 365 unsigned int nr_entries, 366 gfp_t alloc_flags, bool can_alloc) 367{ 368 struct stack_record *found = NULL, **bucket; 369 depot_stack_handle_t retval = 0; 370 struct page *page = NULL; 371 void *prealloc = NULL; 372 unsigned long flags; 373 u32 hash; 374 375 /* 376 * If this stack trace is from an interrupt, including anything before 377 * interrupt entry usually leads to unbounded stackdepot growth. 378 * 379 * Because use of filter_irq_stacks() is a requirement to ensure 380 * stackdepot can efficiently deduplicate interrupt stacks, always 381 * filter_irq_stacks() to simplify all callers' use of stackdepot. 382 */ 383 nr_entries = filter_irq_stacks(entries, nr_entries); 384 385 if (unlikely(nr_entries == 0) || stack_depot_disable) 386 goto fast_exit; 387 388 hash = hash_stack(entries, nr_entries); 389 bucket = &stack_table[hash & STACK_HASH_MASK]; 390 391 /* 392 * Fast path: look the stack trace up without locking. 393 * The smp_load_acquire() here pairs with smp_store_release() to 394 * |bucket| below. 395 */ 396 found = find_stack(smp_load_acquire(bucket), entries, 397 nr_entries, hash); 398 if (found) 399 goto exit; 400 401 /* 402 * Check if the current or the next stack slab need to be initialized. 403 * If so, allocate the memory - we won't be able to do that under the 404 * lock. 405 * 406 * The smp_load_acquire() here pairs with smp_store_release() to 407 * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). 408 */ 409 if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { 410 /* 411 * Zero out zone modifiers, as we don't have specific zone 412 * requirements. Keep the flags related to allocation in atomic 413 * contexts and I/O. 414 */ 415 alloc_flags &= ~GFP_ZONEMASK; 416 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); 417 alloc_flags |= __GFP_NOWARN; 418 page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); 419 if (page) 420 prealloc = page_address(page); 421 } 422 423 raw_spin_lock_irqsave(&depot_lock, flags); 424 425 found = find_stack(*bucket, entries, nr_entries, hash); 426 if (!found) { 427 struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); 428 429 if (new) { 430 new->next = *bucket; 431 /* 432 * This smp_store_release() pairs with 433 * smp_load_acquire() from |bucket| above. 434 */ 435 smp_store_release(bucket, new); 436 found = new; 437 } 438 } else if (prealloc) { 439 /* 440 * We didn't need to store this stack trace, but let's keep 441 * the preallocated memory for the future. 442 */ 443 WARN_ON(!init_stack_slab(&prealloc)); 444 } 445 446 raw_spin_unlock_irqrestore(&depot_lock, flags); 447exit: 448 if (prealloc) { 449 /* Nobody used this memory, ok to free it. */ 450 free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); 451 } 452 if (found) 453 retval = found->handle.handle; 454fast_exit: 455 return retval; 456} 457EXPORT_SYMBOL_GPL(__stack_depot_save); 458 459/** 460 * stack_depot_save - Save a stack trace from an array 461 * 462 * @entries: Pointer to storage array 463 * @nr_entries: Size of the storage array 464 * @alloc_flags: Allocation gfp flags 465 * 466 * Context: Contexts where allocations via alloc_pages() are allowed. 467 * See __stack_depot_save() for more details. 468 * 469 * Return: The handle of the stack struct stored in depot, 0 on failure. 470 */ 471depot_stack_handle_t stack_depot_save(unsigned long *entries, 472 unsigned int nr_entries, 473 gfp_t alloc_flags) 474{ 475 return __stack_depot_save(entries, nr_entries, alloc_flags, true); 476} 477EXPORT_SYMBOL_GPL(stack_depot_save);