cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dwarf.c (29752B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
      4 *
      5 * This is an implementation of a DWARF unwinder. Its main purpose is
      6 * for generating stacktrace information. Based on the DWARF 3
      7 * specification from http://www.dwarfstd.org.
      8 *
      9 * TODO:
     10 *	- DWARF64 doesn't work.
     11 *	- Registers with DWARF_VAL_OFFSET rules aren't handled properly.
     12 */
     13
     14/* #define DEBUG */
     15#include <linux/kernel.h>
     16#include <linux/io.h>
     17#include <linux/list.h>
     18#include <linux/mempool.h>
     19#include <linux/mm.h>
     20#include <linux/elf.h>
     21#include <linux/ftrace.h>
     22#include <linux/module.h>
     23#include <linux/slab.h>
     24#include <asm/dwarf.h>
     25#include <asm/unwinder.h>
     26#include <asm/sections.h>
     27#include <asm/unaligned.h>
     28#include <asm/stacktrace.h>
     29
     30/* Reserve enough memory for two stack frames */
     31#define DWARF_FRAME_MIN_REQ	2
     32/* ... with 4 registers per frame. */
     33#define DWARF_REG_MIN_REQ	(DWARF_FRAME_MIN_REQ * 4)
     34
     35static struct kmem_cache *dwarf_frame_cachep;
     36static mempool_t *dwarf_frame_pool;
     37
     38static struct kmem_cache *dwarf_reg_cachep;
     39static mempool_t *dwarf_reg_pool;
     40
     41static struct rb_root cie_root;
     42static DEFINE_SPINLOCK(dwarf_cie_lock);
     43
     44static struct rb_root fde_root;
     45static DEFINE_SPINLOCK(dwarf_fde_lock);
     46
     47static struct dwarf_cie *cached_cie;
     48
     49static unsigned int dwarf_unwinder_ready;
     50
     51/**
     52 *	dwarf_frame_alloc_reg - allocate memory for a DWARF register
     53 *	@frame: the DWARF frame whose list of registers we insert on
     54 *	@reg_num: the register number
     55 *
     56 *	Allocate space for, and initialise, a dwarf reg from
     57 *	dwarf_reg_pool and insert it onto the (unsorted) linked-list of
     58 *	dwarf registers for @frame.
     59 *
     60 *	Return the initialised DWARF reg.
     61 */
     62static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
     63					       unsigned int reg_num)
     64{
     65	struct dwarf_reg *reg;
     66
     67	reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
     68	if (!reg) {
     69		printk(KERN_WARNING "Unable to allocate a DWARF register\n");
     70		/*
     71		 * Let's just bomb hard here, we have no way to
     72		 * gracefully recover.
     73		 */
     74		UNWINDER_BUG();
     75	}
     76
     77	reg->number = reg_num;
     78	reg->addr = 0;
     79	reg->flags = 0;
     80
     81	list_add(&reg->link, &frame->reg_list);
     82
     83	return reg;
     84}
     85
     86static void dwarf_frame_free_regs(struct dwarf_frame *frame)
     87{
     88	struct dwarf_reg *reg, *n;
     89
     90	list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
     91		list_del(&reg->link);
     92		mempool_free(reg, dwarf_reg_pool);
     93	}
     94}
     95
     96/**
     97 *	dwarf_frame_reg - return a DWARF register
     98 *	@frame: the DWARF frame to search in for @reg_num
     99 *	@reg_num: the register number to search for
    100 *
    101 *	Lookup and return the dwarf reg @reg_num for this frame. Return
    102 *	NULL if @reg_num is an register invalid number.
    103 */
    104static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
    105					 unsigned int reg_num)
    106{
    107	struct dwarf_reg *reg;
    108
    109	list_for_each_entry(reg, &frame->reg_list, link) {
    110		if (reg->number == reg_num)
    111			return reg;
    112	}
    113
    114	return NULL;
    115}
    116
    117/**
    118 *	dwarf_read_addr - read dwarf data
    119 *	@src: source address of data
    120 *	@dst: destination address to store the data to
    121 *
    122 *	Read 'n' bytes from @src, where 'n' is the size of an address on
    123 *	the native machine. We return the number of bytes read, which
    124 *	should always be 'n'. We also have to be careful when reading
    125 *	from @src and writing to @dst, because they can be arbitrarily
    126 *	aligned. Return 'n' - the number of bytes read.
    127 */
    128static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
    129{
    130	u32 val = get_unaligned(src);
    131	put_unaligned(val, dst);
    132	return sizeof(unsigned long *);
    133}
    134
    135/**
    136 *	dwarf_read_uleb128 - read unsigned LEB128 data
    137 *	@addr: the address where the ULEB128 data is stored
    138 *	@ret: address to store the result
    139 *
    140 *	Decode an unsigned LEB128 encoded datum. The algorithm is taken
    141 *	from Appendix C of the DWARF 3 spec. For information on the
    142 *	encodings refer to section "7.6 - Variable Length Data". Return
    143 *	the number of bytes read.
    144 */
    145static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
    146{
    147	unsigned int result;
    148	unsigned char byte;
    149	int shift, count;
    150
    151	result = 0;
    152	shift = 0;
    153	count = 0;
    154
    155	while (1) {
    156		byte = __raw_readb(addr);
    157		addr++;
    158		count++;
    159
    160		result |= (byte & 0x7f) << shift;
    161		shift += 7;
    162
    163		if (!(byte & 0x80))
    164			break;
    165	}
    166
    167	*ret = result;
    168
    169	return count;
    170}
    171
    172/**
    173 *	dwarf_read_leb128 - read signed LEB128 data
    174 *	@addr: the address of the LEB128 encoded data
    175 *	@ret: address to store the result
    176 *
    177 *	Decode signed LEB128 data. The algorithm is taken from Appendix
    178 *	C of the DWARF 3 spec. Return the number of bytes read.
    179 */
    180static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
    181{
    182	unsigned char byte;
    183	int result, shift;
    184	int num_bits;
    185	int count;
    186
    187	result = 0;
    188	shift = 0;
    189	count = 0;
    190
    191	while (1) {
    192		byte = __raw_readb(addr);
    193		addr++;
    194		result |= (byte & 0x7f) << shift;
    195		shift += 7;
    196		count++;
    197
    198		if (!(byte & 0x80))
    199			break;
    200	}
    201
    202	/* The number of bits in a signed integer. */
    203	num_bits = 8 * sizeof(result);
    204
    205	if ((shift < num_bits) && (byte & 0x40))
    206		result |= (-1 << shift);
    207
    208	*ret = result;
    209
    210	return count;
    211}
    212
    213/**
    214 *	dwarf_read_encoded_value - return the decoded value at @addr
    215 *	@addr: the address of the encoded value
    216 *	@val: where to write the decoded value
    217 *	@encoding: the encoding with which we can decode @addr
    218 *
    219 *	GCC emits encoded address in the .eh_frame FDE entries. Decode
    220 *	the value at @addr using @encoding. The decoded value is written
    221 *	to @val and the number of bytes read is returned.
    222 */
    223static int dwarf_read_encoded_value(char *addr, unsigned long *val,
    224				    char encoding)
    225{
    226	unsigned long decoded_addr = 0;
    227	int count = 0;
    228
    229	switch (encoding & 0x70) {
    230	case DW_EH_PE_absptr:
    231		break;
    232	case DW_EH_PE_pcrel:
    233		decoded_addr = (unsigned long)addr;
    234		break;
    235	default:
    236		pr_debug("encoding=0x%x\n", (encoding & 0x70));
    237		UNWINDER_BUG();
    238	}
    239
    240	if ((encoding & 0x07) == 0x00)
    241		encoding |= DW_EH_PE_udata4;
    242
    243	switch (encoding & 0x0f) {
    244	case DW_EH_PE_sdata4:
    245	case DW_EH_PE_udata4:
    246		count += 4;
    247		decoded_addr += get_unaligned((u32 *)addr);
    248		__raw_writel(decoded_addr, val);
    249		break;
    250	default:
    251		pr_debug("encoding=0x%x\n", encoding);
    252		UNWINDER_BUG();
    253	}
    254
    255	return count;
    256}
    257
    258/**
    259 *	dwarf_entry_len - return the length of an FDE or CIE
    260 *	@addr: the address of the entry
    261 *	@len: the length of the entry
    262 *
    263 *	Read the initial_length field of the entry and store the size of
    264 *	the entry in @len. We return the number of bytes read. Return a
    265 *	count of 0 on error.
    266 */
    267static inline int dwarf_entry_len(char *addr, unsigned long *len)
    268{
    269	u32 initial_len;
    270	int count;
    271
    272	initial_len = get_unaligned((u32 *)addr);
    273	count = 4;
    274
    275	/*
    276	 * An initial length field value in the range DW_LEN_EXT_LO -
    277	 * DW_LEN_EXT_HI indicates an extension, and should not be
    278	 * interpreted as a length. The only extension that we currently
    279	 * understand is the use of DWARF64 addresses.
    280	 */
    281	if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
    282		/*
    283		 * The 64-bit length field immediately follows the
    284		 * compulsory 32-bit length field.
    285		 */
    286		if (initial_len == DW_EXT_DWARF64) {
    287			*len = get_unaligned((u64 *)addr + 4);
    288			count = 12;
    289		} else {
    290			printk(KERN_WARNING "Unknown DWARF extension\n");
    291			count = 0;
    292		}
    293	} else
    294		*len = initial_len;
    295
    296	return count;
    297}
    298
    299/**
    300 *	dwarf_lookup_cie - locate the cie
    301 *	@cie_ptr: pointer to help with lookup
    302 */
    303static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
    304{
    305	struct rb_node **rb_node = &cie_root.rb_node;
    306	struct dwarf_cie *cie = NULL;
    307	unsigned long flags;
    308
    309	spin_lock_irqsave(&dwarf_cie_lock, flags);
    310
    311	/*
    312	 * We've cached the last CIE we looked up because chances are
    313	 * that the FDE wants this CIE.
    314	 */
    315	if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
    316		cie = cached_cie;
    317		goto out;
    318	}
    319
    320	while (*rb_node) {
    321		struct dwarf_cie *cie_tmp;
    322
    323		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
    324		BUG_ON(!cie_tmp);
    325
    326		if (cie_ptr == cie_tmp->cie_pointer) {
    327			cie = cie_tmp;
    328			cached_cie = cie_tmp;
    329			goto out;
    330		} else {
    331			if (cie_ptr < cie_tmp->cie_pointer)
    332				rb_node = &(*rb_node)->rb_left;
    333			else
    334				rb_node = &(*rb_node)->rb_right;
    335		}
    336	}
    337
    338out:
    339	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
    340	return cie;
    341}
    342
    343/**
    344 *	dwarf_lookup_fde - locate the FDE that covers pc
    345 *	@pc: the program counter
    346 */
    347struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
    348{
    349	struct rb_node **rb_node = &fde_root.rb_node;
    350	struct dwarf_fde *fde = NULL;
    351	unsigned long flags;
    352
    353	spin_lock_irqsave(&dwarf_fde_lock, flags);
    354
    355	while (*rb_node) {
    356		struct dwarf_fde *fde_tmp;
    357		unsigned long tmp_start, tmp_end;
    358
    359		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
    360		BUG_ON(!fde_tmp);
    361
    362		tmp_start = fde_tmp->initial_location;
    363		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
    364
    365		if (pc < tmp_start) {
    366			rb_node = &(*rb_node)->rb_left;
    367		} else {
    368			if (pc < tmp_end) {
    369				fde = fde_tmp;
    370				goto out;
    371			} else
    372				rb_node = &(*rb_node)->rb_right;
    373		}
    374	}
    375
    376out:
    377	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
    378
    379	return fde;
    380}
    381
    382/**
    383 *	dwarf_cfa_execute_insns - execute instructions to calculate a CFA
    384 *	@insn_start: address of the first instruction
    385 *	@insn_end: address of the last instruction
    386 *	@cie: the CIE for this function
    387 *	@fde: the FDE for this function
    388 *	@frame: the instructions calculate the CFA for this frame
    389 *	@pc: the program counter of the address we're interested in
    390 *
    391 *	Execute the Call Frame instruction sequence starting at
    392 *	@insn_start and ending at @insn_end. The instructions describe
    393 *	how to calculate the Canonical Frame Address of a stackframe.
    394 *	Store the results in @frame.
    395 */
    396static int dwarf_cfa_execute_insns(unsigned char *insn_start,
    397				   unsigned char *insn_end,
    398				   struct dwarf_cie *cie,
    399				   struct dwarf_fde *fde,
    400				   struct dwarf_frame *frame,
    401				   unsigned long pc)
    402{
    403	unsigned char insn;
    404	unsigned char *current_insn;
    405	unsigned int count, delta, reg, expr_len, offset;
    406	struct dwarf_reg *regp;
    407
    408	current_insn = insn_start;
    409
    410	while (current_insn < insn_end && frame->pc <= pc) {
    411		insn = __raw_readb(current_insn++);
    412
    413		/*
    414		 * Firstly, handle the opcodes that embed their operands
    415		 * in the instructions.
    416		 */
    417		switch (DW_CFA_opcode(insn)) {
    418		case DW_CFA_advance_loc:
    419			delta = DW_CFA_operand(insn);
    420			delta *= cie->code_alignment_factor;
    421			frame->pc += delta;
    422			continue;
    423			/* NOTREACHED */
    424		case DW_CFA_offset:
    425			reg = DW_CFA_operand(insn);
    426			count = dwarf_read_uleb128(current_insn, &offset);
    427			current_insn += count;
    428			offset *= cie->data_alignment_factor;
    429			regp = dwarf_frame_alloc_reg(frame, reg);
    430			regp->addr = offset;
    431			regp->flags |= DWARF_REG_OFFSET;
    432			continue;
    433			/* NOTREACHED */
    434		case DW_CFA_restore:
    435			reg = DW_CFA_operand(insn);
    436			continue;
    437			/* NOTREACHED */
    438		}
    439
    440		/*
    441		 * Secondly, handle the opcodes that don't embed their
    442		 * operands in the instruction.
    443		 */
    444		switch (insn) {
    445		case DW_CFA_nop:
    446			continue;
    447		case DW_CFA_advance_loc1:
    448			delta = *current_insn++;
    449			frame->pc += delta * cie->code_alignment_factor;
    450			break;
    451		case DW_CFA_advance_loc2:
    452			delta = get_unaligned((u16 *)current_insn);
    453			current_insn += 2;
    454			frame->pc += delta * cie->code_alignment_factor;
    455			break;
    456		case DW_CFA_advance_loc4:
    457			delta = get_unaligned((u32 *)current_insn);
    458			current_insn += 4;
    459			frame->pc += delta * cie->code_alignment_factor;
    460			break;
    461		case DW_CFA_offset_extended:
    462			count = dwarf_read_uleb128(current_insn, &reg);
    463			current_insn += count;
    464			count = dwarf_read_uleb128(current_insn, &offset);
    465			current_insn += count;
    466			offset *= cie->data_alignment_factor;
    467			break;
    468		case DW_CFA_restore_extended:
    469			count = dwarf_read_uleb128(current_insn, &reg);
    470			current_insn += count;
    471			break;
    472		case DW_CFA_undefined:
    473			count = dwarf_read_uleb128(current_insn, &reg);
    474			current_insn += count;
    475			regp = dwarf_frame_alloc_reg(frame, reg);
    476			regp->flags |= DWARF_UNDEFINED;
    477			break;
    478		case DW_CFA_def_cfa:
    479			count = dwarf_read_uleb128(current_insn,
    480						   &frame->cfa_register);
    481			current_insn += count;
    482			count = dwarf_read_uleb128(current_insn,
    483						   &frame->cfa_offset);
    484			current_insn += count;
    485
    486			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
    487			break;
    488		case DW_CFA_def_cfa_register:
    489			count = dwarf_read_uleb128(current_insn,
    490						   &frame->cfa_register);
    491			current_insn += count;
    492			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
    493			break;
    494		case DW_CFA_def_cfa_offset:
    495			count = dwarf_read_uleb128(current_insn, &offset);
    496			current_insn += count;
    497			frame->cfa_offset = offset;
    498			break;
    499		case DW_CFA_def_cfa_expression:
    500			count = dwarf_read_uleb128(current_insn, &expr_len);
    501			current_insn += count;
    502
    503			frame->cfa_expr = current_insn;
    504			frame->cfa_expr_len = expr_len;
    505			current_insn += expr_len;
    506
    507			frame->flags |= DWARF_FRAME_CFA_REG_EXP;
    508			break;
    509		case DW_CFA_offset_extended_sf:
    510			count = dwarf_read_uleb128(current_insn, &reg);
    511			current_insn += count;
    512			count = dwarf_read_leb128(current_insn, &offset);
    513			current_insn += count;
    514			offset *= cie->data_alignment_factor;
    515			regp = dwarf_frame_alloc_reg(frame, reg);
    516			regp->flags |= DWARF_REG_OFFSET;
    517			regp->addr = offset;
    518			break;
    519		case DW_CFA_val_offset:
    520			count = dwarf_read_uleb128(current_insn, &reg);
    521			current_insn += count;
    522			count = dwarf_read_leb128(current_insn, &offset);
    523			offset *= cie->data_alignment_factor;
    524			regp = dwarf_frame_alloc_reg(frame, reg);
    525			regp->flags |= DWARF_VAL_OFFSET;
    526			regp->addr = offset;
    527			break;
    528		case DW_CFA_GNU_args_size:
    529			count = dwarf_read_uleb128(current_insn, &offset);
    530			current_insn += count;
    531			break;
    532		case DW_CFA_GNU_negative_offset_extended:
    533			count = dwarf_read_uleb128(current_insn, &reg);
    534			current_insn += count;
    535			count = dwarf_read_uleb128(current_insn, &offset);
    536			offset *= cie->data_alignment_factor;
    537
    538			regp = dwarf_frame_alloc_reg(frame, reg);
    539			regp->flags |= DWARF_REG_OFFSET;
    540			regp->addr = -offset;
    541			break;
    542		default:
    543			pr_debug("unhandled DWARF instruction 0x%x\n", insn);
    544			UNWINDER_BUG();
    545			break;
    546		}
    547	}
    548
    549	return 0;
    550}
    551
    552/**
    553 *	dwarf_free_frame - free the memory allocated for @frame
    554 *	@frame: the frame to free
    555 */
    556void dwarf_free_frame(struct dwarf_frame *frame)
    557{
    558	dwarf_frame_free_regs(frame);
    559	mempool_free(frame, dwarf_frame_pool);
    560}
    561
    562extern void ret_from_irq(void);
    563
    564/**
    565 *	dwarf_unwind_stack - unwind the stack
    566 *
    567 *	@pc: address of the function to unwind
    568 *	@prev: struct dwarf_frame of the previous stackframe on the callstack
    569 *
    570 *	Return a struct dwarf_frame representing the most recent frame
    571 *	on the callstack. Each of the lower (older) stack frames are
    572 *	linked via the "prev" member.
    573 */
    574struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
    575				       struct dwarf_frame *prev)
    576{
    577	struct dwarf_frame *frame;
    578	struct dwarf_cie *cie;
    579	struct dwarf_fde *fde;
    580	struct dwarf_reg *reg;
    581	unsigned long addr;
    582
    583	/*
    584	 * If we've been called in to before initialization has
    585	 * completed, bail out immediately.
    586	 */
    587	if (!dwarf_unwinder_ready)
    588		return NULL;
    589
    590	/*
    591	 * If we're starting at the top of the stack we need get the
    592	 * contents of a physical register to get the CFA in order to
    593	 * begin the virtual unwinding of the stack.
    594	 *
    595	 * NOTE: the return address is guaranteed to be setup by the
    596	 * time this function makes its first function call.
    597	 */
    598	if (!pc || !prev)
    599		pc = _THIS_IP_;
    600
    601#ifdef CONFIG_FUNCTION_GRAPH_TRACER
    602	/*
    603	 * If our stack has been patched by the function graph tracer
    604	 * then we might see the address of return_to_handler() where we
    605	 * expected to find the real return address.
    606	 */
    607	if (pc == (unsigned long)&return_to_handler) {
    608		struct ftrace_ret_stack *ret_stack;
    609
    610		ret_stack = ftrace_graph_get_ret_stack(current, 0);
    611		if (ret_stack)
    612			pc = ret_stack->ret;
    613		/*
    614		 * We currently have no way of tracking how many
    615		 * return_to_handler()'s we've seen. If there is more
    616		 * than one patched return address on our stack,
    617		 * complain loudly.
    618		 */
    619		WARN_ON(ftrace_graph_get_ret_stack(current, 1));
    620	}
    621#endif
    622
    623	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
    624	if (!frame) {
    625		printk(KERN_ERR "Unable to allocate a dwarf frame\n");
    626		UNWINDER_BUG();
    627	}
    628
    629	INIT_LIST_HEAD(&frame->reg_list);
    630	frame->flags = 0;
    631	frame->prev = prev;
    632	frame->return_addr = 0;
    633
    634	fde = dwarf_lookup_fde(pc);
    635	if (!fde) {
    636		/*
    637		 * This is our normal exit path. There are two reasons
    638		 * why we might exit here,
    639		 *
    640		 *	a) pc has no asscociated DWARF frame info and so
    641		 *	we don't know how to unwind this frame. This is
    642		 *	usually the case when we're trying to unwind a
    643		 *	frame that was called from some assembly code
    644		 *	that has no DWARF info, e.g. syscalls.
    645		 *
    646		 *	b) the DEBUG info for pc is bogus. There's
    647		 *	really no way to distinguish this case from the
    648		 *	case above, which sucks because we could print a
    649		 *	warning here.
    650		 */
    651		goto bail;
    652	}
    653
    654	cie = dwarf_lookup_cie(fde->cie_pointer);
    655
    656	frame->pc = fde->initial_location;
    657
    658	/* CIE initial instructions */
    659	dwarf_cfa_execute_insns(cie->initial_instructions,
    660				cie->instructions_end, cie, fde,
    661				frame, pc);
    662
    663	/* FDE instructions */
    664	dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
    665				fde, frame, pc);
    666
    667	/* Calculate the CFA */
    668	switch (frame->flags) {
    669	case DWARF_FRAME_CFA_REG_OFFSET:
    670		if (prev) {
    671			reg = dwarf_frame_reg(prev, frame->cfa_register);
    672			UNWINDER_BUG_ON(!reg);
    673			UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
    674
    675			addr = prev->cfa + reg->addr;
    676			frame->cfa = __raw_readl(addr);
    677
    678		} else {
    679			/*
    680			 * Again, we're starting from the top of the
    681			 * stack. We need to physically read
    682			 * the contents of a register in order to get
    683			 * the Canonical Frame Address for this
    684			 * function.
    685			 */
    686			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
    687		}
    688
    689		frame->cfa += frame->cfa_offset;
    690		break;
    691	default:
    692		UNWINDER_BUG();
    693	}
    694
    695	reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
    696
    697	/*
    698	 * If we haven't seen the return address register or the return
    699	 * address column is undefined then we must assume that this is
    700	 * the end of the callstack.
    701	 */
    702	if (!reg || reg->flags == DWARF_UNDEFINED)
    703		goto bail;
    704
    705	UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
    706
    707	addr = frame->cfa + reg->addr;
    708	frame->return_addr = __raw_readl(addr);
    709
    710	/*
    711	 * Ah, the joys of unwinding through interrupts.
    712	 *
    713	 * Interrupts are tricky - the DWARF info needs to be _really_
    714	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
    715	 * info. For example, I've seen interrupts occur in epilogues
    716	 * just after the frame pointer (r14) had been restored. The
    717	 * problem was that the DWARF info claimed that the CFA could be
    718	 * reached by using the value of the frame pointer before it was
    719	 * restored.
    720	 *
    721	 * So until the compiler can be trusted to produce reliable
    722	 * DWARF info when it really matters, let's stop unwinding once
    723	 * we've calculated the function that was interrupted.
    724	 */
    725	if (prev && prev->pc == (unsigned long)ret_from_irq)
    726		frame->return_addr = 0;
    727
    728	return frame;
    729
    730bail:
    731	dwarf_free_frame(frame);
    732	return NULL;
    733}
    734
    735static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
    736			   unsigned char *end, struct module *mod)
    737{
    738	struct rb_node **rb_node = &cie_root.rb_node;
    739	struct rb_node *parent = *rb_node;
    740	struct dwarf_cie *cie;
    741	unsigned long flags;
    742	int count;
    743
    744	cie = kzalloc(sizeof(*cie), GFP_KERNEL);
    745	if (!cie)
    746		return -ENOMEM;
    747
    748	cie->length = len;
    749
    750	/*
    751	 * Record the offset into the .eh_frame section
    752	 * for this CIE. It allows this CIE to be
    753	 * quickly and easily looked up from the
    754	 * corresponding FDE.
    755	 */
    756	cie->cie_pointer = (unsigned long)entry;
    757
    758	cie->version = *(char *)p++;
    759	UNWINDER_BUG_ON(cie->version != 1);
    760
    761	cie->augmentation = p;
    762	p += strlen(cie->augmentation) + 1;
    763
    764	count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
    765	p += count;
    766
    767	count = dwarf_read_leb128(p, &cie->data_alignment_factor);
    768	p += count;
    769
    770	/*
    771	 * Which column in the rule table contains the
    772	 * return address?
    773	 */
    774	if (cie->version == 1) {
    775		cie->return_address_reg = __raw_readb(p);
    776		p++;
    777	} else {
    778		count = dwarf_read_uleb128(p, &cie->return_address_reg);
    779		p += count;
    780	}
    781
    782	if (cie->augmentation[0] == 'z') {
    783		unsigned int length, count;
    784		cie->flags |= DWARF_CIE_Z_AUGMENTATION;
    785
    786		count = dwarf_read_uleb128(p, &length);
    787		p += count;
    788
    789		UNWINDER_BUG_ON((unsigned char *)p > end);
    790
    791		cie->initial_instructions = p + length;
    792		cie->augmentation++;
    793	}
    794
    795	while (*cie->augmentation) {
    796		/*
    797		 * "L" indicates a byte showing how the
    798		 * LSDA pointer is encoded. Skip it.
    799		 */
    800		if (*cie->augmentation == 'L') {
    801			p++;
    802			cie->augmentation++;
    803		} else if (*cie->augmentation == 'R') {
    804			/*
    805			 * "R" indicates a byte showing
    806			 * how FDE addresses are
    807			 * encoded.
    808			 */
    809			cie->encoding = *(char *)p++;
    810			cie->augmentation++;
    811		} else if (*cie->augmentation == 'P') {
    812			/*
    813			 * "R" indicates a personality
    814			 * routine in the CIE
    815			 * augmentation.
    816			 */
    817			UNWINDER_BUG();
    818		} else if (*cie->augmentation == 'S') {
    819			UNWINDER_BUG();
    820		} else {
    821			/*
    822			 * Unknown augmentation. Assume
    823			 * 'z' augmentation.
    824			 */
    825			p = cie->initial_instructions;
    826			UNWINDER_BUG_ON(!p);
    827			break;
    828		}
    829	}
    830
    831	cie->initial_instructions = p;
    832	cie->instructions_end = end;
    833
    834	/* Add to list */
    835	spin_lock_irqsave(&dwarf_cie_lock, flags);
    836
    837	while (*rb_node) {
    838		struct dwarf_cie *cie_tmp;
    839
    840		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
    841
    842		parent = *rb_node;
    843
    844		if (cie->cie_pointer < cie_tmp->cie_pointer)
    845			rb_node = &parent->rb_left;
    846		else if (cie->cie_pointer >= cie_tmp->cie_pointer)
    847			rb_node = &parent->rb_right;
    848		else
    849			WARN_ON(1);
    850	}
    851
    852	rb_link_node(&cie->node, parent, rb_node);
    853	rb_insert_color(&cie->node, &cie_root);
    854
    855#ifdef CONFIG_MODULES
    856	if (mod != NULL)
    857		list_add_tail(&cie->link, &mod->arch.cie_list);
    858#endif
    859
    860	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
    861
    862	return 0;
    863}
    864
    865static int dwarf_parse_fde(void *entry, u32 entry_type,
    866			   void *start, unsigned long len,
    867			   unsigned char *end, struct module *mod)
    868{
    869	struct rb_node **rb_node = &fde_root.rb_node;
    870	struct rb_node *parent = *rb_node;
    871	struct dwarf_fde *fde;
    872	struct dwarf_cie *cie;
    873	unsigned long flags;
    874	int count;
    875	void *p = start;
    876
    877	fde = kzalloc(sizeof(*fde), GFP_KERNEL);
    878	if (!fde)
    879		return -ENOMEM;
    880
    881	fde->length = len;
    882
    883	/*
    884	 * In a .eh_frame section the CIE pointer is the
    885	 * delta between the address within the FDE
    886	 */
    887	fde->cie_pointer = (unsigned long)(p - entry_type - 4);
    888
    889	cie = dwarf_lookup_cie(fde->cie_pointer);
    890	fde->cie = cie;
    891
    892	if (cie->encoding)
    893		count = dwarf_read_encoded_value(p, &fde->initial_location,
    894						 cie->encoding);
    895	else
    896		count = dwarf_read_addr(p, &fde->initial_location);
    897
    898	p += count;
    899
    900	if (cie->encoding)
    901		count = dwarf_read_encoded_value(p, &fde->address_range,
    902						 cie->encoding & 0x0f);
    903	else
    904		count = dwarf_read_addr(p, &fde->address_range);
    905
    906	p += count;
    907
    908	if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
    909		unsigned int length;
    910		count = dwarf_read_uleb128(p, &length);
    911		p += count + length;
    912	}
    913
    914	/* Call frame instructions. */
    915	fde->instructions = p;
    916	fde->end = end;
    917
    918	/* Add to list. */
    919	spin_lock_irqsave(&dwarf_fde_lock, flags);
    920
    921	while (*rb_node) {
    922		struct dwarf_fde *fde_tmp;
    923		unsigned long tmp_start, tmp_end;
    924		unsigned long start, end;
    925
    926		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
    927
    928		start = fde->initial_location;
    929		end = fde->initial_location + fde->address_range;
    930
    931		tmp_start = fde_tmp->initial_location;
    932		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
    933
    934		parent = *rb_node;
    935
    936		if (start < tmp_start)
    937			rb_node = &parent->rb_left;
    938		else if (start >= tmp_end)
    939			rb_node = &parent->rb_right;
    940		else
    941			WARN_ON(1);
    942	}
    943
    944	rb_link_node(&fde->node, parent, rb_node);
    945	rb_insert_color(&fde->node, &fde_root);
    946
    947#ifdef CONFIG_MODULES
    948	if (mod != NULL)
    949		list_add_tail(&fde->link, &mod->arch.fde_list);
    950#endif
    951
    952	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
    953
    954	return 0;
    955}
    956
    957static void dwarf_unwinder_dump(struct task_struct *task,
    958				struct pt_regs *regs,
    959				unsigned long *sp,
    960				const struct stacktrace_ops *ops,
    961				void *data)
    962{
    963	struct dwarf_frame *frame, *_frame;
    964	unsigned long return_addr;
    965
    966	_frame = NULL;
    967	return_addr = 0;
    968
    969	while (1) {
    970		frame = dwarf_unwind_stack(return_addr, _frame);
    971
    972		if (_frame)
    973			dwarf_free_frame(_frame);
    974
    975		_frame = frame;
    976
    977		if (!frame || !frame->return_addr)
    978			break;
    979
    980		return_addr = frame->return_addr;
    981		ops->address(data, return_addr, 1);
    982	}
    983
    984	if (frame)
    985		dwarf_free_frame(frame);
    986}
    987
    988static struct unwinder dwarf_unwinder = {
    989	.name = "dwarf-unwinder",
    990	.dump = dwarf_unwinder_dump,
    991	.rating = 150,
    992};
    993
    994static void __init dwarf_unwinder_cleanup(void)
    995{
    996	struct dwarf_fde *fde, *next_fde;
    997	struct dwarf_cie *cie, *next_cie;
    998
    999	/*
   1000	 * Deallocate all the memory allocated for the DWARF unwinder.
   1001	 * Traverse all the FDE/CIE lists and remove and free all the
   1002	 * memory associated with those data structures.
   1003	 */
   1004	rbtree_postorder_for_each_entry_safe(fde, next_fde, &fde_root, node)
   1005		kfree(fde);
   1006
   1007	rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
   1008		kfree(cie);
   1009
   1010	mempool_destroy(dwarf_reg_pool);
   1011	mempool_destroy(dwarf_frame_pool);
   1012	kmem_cache_destroy(dwarf_reg_cachep);
   1013	kmem_cache_destroy(dwarf_frame_cachep);
   1014}
   1015
   1016/**
   1017 *	dwarf_parse_section - parse DWARF section
   1018 *	@eh_frame_start: start address of the .eh_frame section
   1019 *	@eh_frame_end: end address of the .eh_frame section
   1020 *	@mod: the kernel module containing the .eh_frame section
   1021 *
   1022 *	Parse the information in a .eh_frame section.
   1023 */
   1024static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
   1025			       struct module *mod)
   1026{
   1027	u32 entry_type;
   1028	void *p, *entry;
   1029	int count, err = 0;
   1030	unsigned long len = 0;
   1031	unsigned int c_entries, f_entries;
   1032	unsigned char *end;
   1033
   1034	c_entries = 0;
   1035	f_entries = 0;
   1036	entry = eh_frame_start;
   1037
   1038	while ((char *)entry < eh_frame_end) {
   1039		p = entry;
   1040
   1041		count = dwarf_entry_len(p, &len);
   1042		if (count == 0) {
   1043			/*
   1044			 * We read a bogus length field value. There is
   1045			 * nothing we can do here apart from disabling
   1046			 * the DWARF unwinder. We can't even skip this
   1047			 * entry and move to the next one because 'len'
   1048			 * tells us where our next entry is.
   1049			 */
   1050			err = -EINVAL;
   1051			goto out;
   1052		} else
   1053			p += count;
   1054
   1055		/* initial length does not include itself */
   1056		end = p + len;
   1057
   1058		entry_type = get_unaligned((u32 *)p);
   1059		p += 4;
   1060
   1061		if (entry_type == DW_EH_FRAME_CIE) {
   1062			err = dwarf_parse_cie(entry, p, len, end, mod);
   1063			if (err < 0)
   1064				goto out;
   1065			else
   1066				c_entries++;
   1067		} else {
   1068			err = dwarf_parse_fde(entry, entry_type, p, len,
   1069					      end, mod);
   1070			if (err < 0)
   1071				goto out;
   1072			else
   1073				f_entries++;
   1074		}
   1075
   1076		entry = (char *)entry + len + 4;
   1077	}
   1078
   1079	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
   1080	       c_entries, f_entries);
   1081
   1082	return 0;
   1083
   1084out:
   1085	return err;
   1086}
   1087
   1088#ifdef CONFIG_MODULES
   1089int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
   1090			  struct module *me)
   1091{
   1092	unsigned int i, err;
   1093	unsigned long start, end;
   1094	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
   1095
   1096	start = end = 0;
   1097
   1098	for (i = 1; i < hdr->e_shnum; i++) {
   1099		/* Alloc bit cleared means "ignore it." */
   1100		if ((sechdrs[i].sh_flags & SHF_ALLOC)
   1101		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
   1102			start = sechdrs[i].sh_addr;
   1103			end = start + sechdrs[i].sh_size;
   1104			break;
   1105		}
   1106	}
   1107
   1108	/* Did we find the .eh_frame section? */
   1109	if (i != hdr->e_shnum) {
   1110		INIT_LIST_HEAD(&me->arch.cie_list);
   1111		INIT_LIST_HEAD(&me->arch.fde_list);
   1112		err = dwarf_parse_section((char *)start, (char *)end, me);
   1113		if (err) {
   1114			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
   1115			       me->name);
   1116			return err;
   1117		}
   1118	}
   1119
   1120	return 0;
   1121}
   1122
   1123/**
   1124 *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
   1125 *	@mod: the module that is being unloaded
   1126 *
   1127 *	Remove any FDEs and CIEs from the global lists that came from
   1128 *	@mod's .eh_frame section because @mod is being unloaded.
   1129 */
   1130void module_dwarf_cleanup(struct module *mod)
   1131{
   1132	struct dwarf_fde *fde, *ftmp;
   1133	struct dwarf_cie *cie, *ctmp;
   1134	unsigned long flags;
   1135
   1136	spin_lock_irqsave(&dwarf_cie_lock, flags);
   1137
   1138	list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
   1139		list_del(&cie->link);
   1140		rb_erase(&cie->node, &cie_root);
   1141		kfree(cie);
   1142	}
   1143
   1144	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
   1145
   1146	spin_lock_irqsave(&dwarf_fde_lock, flags);
   1147
   1148	list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
   1149		list_del(&fde->link);
   1150		rb_erase(&fde->node, &fde_root);
   1151		kfree(fde);
   1152	}
   1153
   1154	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
   1155}
   1156#endif /* CONFIG_MODULES */
   1157
   1158/**
   1159 *	dwarf_unwinder_init - initialise the dwarf unwinder
   1160 *
   1161 *	Build the data structures describing the .dwarf_frame section to
   1162 *	make it easier to lookup CIE and FDE entries. Because the
   1163 *	.eh_frame section is packed as tightly as possible it is not
   1164 *	easy to lookup the FDE for a given PC, so we build a list of FDE
   1165 *	and CIE entries that make it easier.
   1166 */
   1167static int __init dwarf_unwinder_init(void)
   1168{
   1169	int err = -ENOMEM;
   1170
   1171	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
   1172			sizeof(struct dwarf_frame), 0,
   1173			SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
   1174
   1175	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
   1176			sizeof(struct dwarf_reg), 0,
   1177			SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
   1178
   1179	dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
   1180						    dwarf_frame_cachep);
   1181	if (!dwarf_frame_pool)
   1182		goto out;
   1183
   1184	dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ,
   1185						  dwarf_reg_cachep);
   1186	if (!dwarf_reg_pool)
   1187		goto out;
   1188
   1189	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
   1190	if (err)
   1191		goto out;
   1192
   1193	err = unwinder_register(&dwarf_unwinder);
   1194	if (err)
   1195		goto out;
   1196
   1197	dwarf_unwinder_ready = 1;
   1198
   1199	return 0;
   1200
   1201out:
   1202	printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
   1203	dwarf_unwinder_cleanup();
   1204	return err;
   1205}
   1206early_initcall(dwarf_unwinder_init);