cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

boot.S (5837B)


      1/*
      2 * Minimal AArch64 system boot code.
      3 *
      4 * Copyright Linaro Ltd 2019
      5 *
      6 * Loosely based on the newlib/libgloss setup stubs. Using semihosting
      7 * for serial output and exit functions.
      8 */
      9
     10/*
     11 * Semihosting interface on ARM AArch64
     12 * See "Semihosting for AArch32 and AArch64 Relase 2.0" by ARM
     13 * w0 - semihosting call number
     14 * x1 - semihosting parameter
     15 */
     16#define semihosting_call hlt 0xf000
     17#define SYS_WRITEC	0x03	/* character to debug channel */
     18#define SYS_WRITE0	0x04	/* string to debug channel */
     19#define SYS_EXIT	0x18
     20
     21	.align	12
     22
     23	.macro	ventry	label
     24	.align	7
     25	b	\label
     26	.endm
     27
     28vector_table:
     29	/* Current EL with SP0.	 */
     30	ventry	curr_sp0_sync		/* Synchronous	*/
     31	ventry	curr_sp0_irq		/* Irq/vIRQ  */
     32	ventry	curr_sp0_fiq		/* Fiq/vFIQ  */
     33	ventry	curr_sp0_serror		/* SError/VSError  */
     34
     35	/* Current EL with SPx.	 */
     36	ventry	curr_spx_sync		/* Synchronous	*/
     37	ventry	curr_spx_irq		/* IRQ/vIRQ  */
     38	ventry	curr_spx_fiq		/* FIQ/vFIQ  */
     39	ventry	curr_spx_serror		/* SError/VSError  */
     40
     41	/* Lower EL using AArch64.  */
     42	ventry	lower_a64_sync		/* Synchronous	*/
     43	ventry	lower_a64_irq		/* IRQ/vIRQ  */
     44	ventry	lower_a64_fiq		/* FIQ/vFIQ  */
     45	ventry	lower_a64_serror	/* SError/VSError  */
     46
     47	/* Lower EL using AArch32.  */
     48	ventry	lower_a32_sync		/* Synchronous	*/
     49	ventry	lower_a32_irq		/* IRQ/vIRQ  */
     50	ventry	lower_a32_fiq		/* FIQ/vFIQ  */
     51	ventry	lower_a32_serror	/* SError/VSError  */
     52
     53	.text
     54	.align 4
     55
     56	/* Common vector handling for now */
     57curr_sp0_sync:
     58curr_sp0_irq:
     59curr_sp0_fiq:
     60curr_sp0_serror:
     61curr_spx_sync:
     62curr_spx_irq:
     63curr_spx_fiq:
     64curr_spx_serror:
     65lower_a64_sync:
     66lower_a64_irq:
     67lower_a64_fiq:
     68lower_a64_serror:
     69lower_a32_sync:
     70lower_a32_irq:
     71lower_a32_fiq:
     72lower_a32_serror:
     73	mov	x0, SYS_WRITE0
     74	adr	x1, .error
     75	semihosting_call
     76	mov	x0, SYS_EXIT
     77	mov	x1, 1
     78	semihosting_call
     79	/* never returns */
     80
     81	.section .rodata
     82.error:
     83	.string "Terminated by exception.\n"
     84
     85	.text
     86	.align 4
     87	.global __start
     88__start:
     89	/* Installs a table of exception vectors to catch and handle all
     90	   exceptions by terminating the process with a diagnostic.  */
     91	adr	x0, vector_table
     92	msr	vbar_el1, x0
     93
     94	/* Page table setup (identity mapping). */
     95	adrp	x0, ttb
     96	add	x0, x0, :lo12:ttb
     97	msr	ttbr0_el1, x0
     98
     99	/*
    100	 * Setup a flat address mapping page-tables. Stage one simply
    101	 * maps RAM to the first Gb. The stage2 tables have two 2mb
    102	 * translation block entries covering a series of adjacent
    103	 * 4k pages.
    104	*/
    105
    106	/* Stage 1 entry: indexed by IA[38:30] */
    107	adr	x1, .				/* phys address */
    108	bic	x1, x1, #(1 << 30) - 1		/* 1GB alignment*/
    109	add	x2, x0, x1, lsr #(30 - 3)	/* offset in l1 page table */
    110
    111	/* point to stage 2 table [47:12] */
    112	adrp	x0, ttb_stage2
    113	orr 	x1, x0, #3 			/* ptr to stage 2 */
    114	str	x1, [x2]
    115
    116	/* Stage 2 entries: indexed by IA[29:21] */
    117	ldr	x5, =(((1 << 9) - 1) << 21)
    118
    119	/* First block: .text/RO/execute enabled */
    120	adr	x1, .				/* phys address */
    121	bic	x1, x1, #(1 << 21) - 1		/* 2mb block alignment	*/
    122	and	x4, x1, x5			/* IA[29:21] */
    123	add	x2, x0, x4, lsr #(21 - 3)	/* offset in l2 page table */
    124	ldr	x3, =0x401			/* attr(AF, block) */
    125	orr	x1, x1, x3
    126	str	x1, [x2]			/* 1st 2mb (.text & rodata) */
    127
    128	/* Second block: .data/RW/no execute */
    129	adrp	x1, .data
    130	add	x1, x1, :lo12:.data
    131	bic	x1, x1, #(1 << 21) - 1		/* 2mb block alignment */
    132	and	x4, x1, x5			/* IA[29:21] */
    133	add	x2, x0, x4, lsr #(21 - 3)	/* offset in l2 page table */
    134	ldr	x3, =(3 << 53) | 0x401		/* attr(AF, NX, block) */
    135	orr	x1, x1, x3
    136	str	x1, [x2]			/* 2nd 2mb (.data & .bss)*/
    137
    138	/* Setup/enable the MMU.  */
    139
    140	/*
    141	 * TCR_EL1 - Translation Control Registers
    142	 *
    143	 * IPS[34:32] = 40-bit PA, 1TB
    144	 * TG0[14:15] = b00 => 4kb granuale
    145	 * ORGN0[11:10] = Outer: Normal, WB Read-Alloc No Write-Alloc Cacheable
    146	 * IRGN0[9:8] = Inner: Normal, WB Read-Alloc No Write-Alloc Cacheable
    147	 * T0SZ[5:0]  = 2^(64 - 25)
    148	 *
    149	 * The size of T0SZ controls what the initial lookup level. It
    150	 * would be nice to start at level 2 but unfortunatly for a
    151	 * flat-mapping on the virt machine we need to handle IA's
    152	 * with at least 1gb range to see RAM. So we start with a
    153	 * level 1 lookup.
    154	 */
    155	ldr	x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
    156	msr	tcr_el1, x0
    157
    158	mov	x0, #0xee			/* Inner/outer cacheable WB */
    159	msr	mair_el1, x0
    160	isb
    161
    162	/*
    163	 * SCTLR_EL1 - System Control Register
    164	 *
    165	 * WXN[19] = 0 = no effect, Write does not imply XN (execute never)
    166	 * I[12] = Instruction cachability control
    167	 * SA[3] = SP alignment check
    168	 * C[2] = Data cachability control
    169	 * M[0] = 1, enable stage 1 address translation for EL0/1
    170	 */
    171	mrs	x0, sctlr_el1
    172	ldr	x1, =0x100d			/* bits I(12) SA(3) C(2) M(0) */
    173	bic	x0, x0, #(1 << 1)		/* clear bit A(1) */
    174	bic	x0, x0, #(1 << 19)		/* clear WXN */
    175	orr	x0, x0, x1			/* set bits */
    176
    177	dsb	sy
    178	msr	sctlr_el1, x0
    179	isb
    180
    181	/*
    182	 * Enable FP registers. The standard C pre-amble will be
    183	 * saving these and A-profile compilers will use AdvSIMD
    184	 * registers unless we tell it not to.
    185	*/
    186	mrs	x0, cpacr_el1
    187	orr	x0, x0, #(3 << 20)
    188	msr	cpacr_el1, x0
    189
    190	/* Setup some stack space and enter the test code.
    191	 * Assume everthing except the return value is garbage when we
    192	 * return, we won't need it.
    193	 */
    194	adrp	x0, stack_end
    195	add	x0, x0, :lo12:stack_end
    196	mov	sp, x0
    197	bl	main
    198
    199	/* pass return value to sys exit */
    200_exit:
    201	mov    x1, x0
    202	ldr    x0, =0x20026 /* ADP_Stopped_ApplicationExit */
    203	stp    x0, x1, [sp, #-16]!
    204	mov    x1, sp
    205	mov    x0, SYS_EXIT
    206	semihosting_call
    207	/* never returns */
    208
    209	/*
    210	 * Helper Functions
    211	*/
    212
    213	/* Output a single character to serial port */
    214	.global __sys_outc
    215__sys_outc:
    216	stp x0, x1, [sp, #-16]!
    217	/* pass address of c on stack */
    218	mov x1, sp
    219	mov x0, SYS_WRITEC
    220	semihosting_call
    221	ldp x0, x1, [sp], #16
    222	ret
    223
    224	.data
    225	.align	12
    226
    227	/* Translation table
    228	 * @4k granuale: 9 bit lookup, 512 entries
    229	*/
    230ttb:
    231	.space	4096, 0
    232
    233	.align	12
    234ttb_stage2:
    235	.space	4096, 0
    236
    237	.align	12
    238stack:
    239	.space 65536, 0
    240stack_end: