cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

head.S (39093B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 *  linux/arch/arm/boot/compressed/head.S
      4 *
      5 *  Copyright (C) 1996-2002 Russell King
      6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
      7 */
      8#include <linux/linkage.h>
      9#include <asm/assembler.h>
     10#include <asm/v7m.h>
     11
     12#include "efi-header.S"
     13
     14#ifdef __ARMEB__
     15#define OF_DT_MAGIC 0xd00dfeed
     16#else
     17#define OF_DT_MAGIC 0xedfe0dd0
     18#endif
     19
     20 AR_CLASS(	.arch	armv7-a	)
     21 M_CLASS(	.arch	armv7-m	)
     22
     23/*
     24 * Debugging stuff
     25 *
     26 * Note that these macros must not contain any code which is not
     27 * 100% relocatable.  Any attempt to do so will result in a crash.
     28 * Please select one of the following when turning on debugging.
     29 */
     30#ifdef DEBUG
     31
     32#if defined(CONFIG_DEBUG_ICEDCC)
     33
     34#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
     35		.macro	loadsp, rb, tmp1, tmp2
     36		.endm
     37		.macro	writeb, ch, rb, tmp
     38		mcr	p14, 0, \ch, c0, c5, 0
     39		.endm
     40#elif defined(CONFIG_CPU_XSCALE)
     41		.macro	loadsp, rb, tmp1, tmp2
     42		.endm
     43		.macro	writeb, ch, rb, tmp
     44		mcr	p14, 0, \ch, c8, c0, 0
     45		.endm
     46#else
     47		.macro	loadsp, rb, tmp1, tmp2
     48		.endm
     49		.macro	writeb, ch, rb, tmp
     50		mcr	p14, 0, \ch, c1, c0, 0
     51		.endm
     52#endif
     53
     54#else
     55
     56#include CONFIG_DEBUG_LL_INCLUDE
     57
     58		.macro	writeb,	ch, rb, tmp
     59#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
     60		waituartcts \tmp, \rb
     61#endif
     62		waituarttxrdy \tmp, \rb
     63		senduart \ch, \rb
     64		busyuart \tmp, \rb
     65		.endm
     66
     67#if defined(CONFIG_ARCH_SA1100)
     68		.macro	loadsp, rb, tmp1, tmp2
     69		mov	\rb, #0x80000000	@ physical base address
     70#ifdef CONFIG_DEBUG_LL_SER3
     71		add	\rb, \rb, #0x00050000	@ Ser3
     72#else
     73		add	\rb, \rb, #0x00010000	@ Ser1
     74#endif
     75		.endm
     76#else
     77		.macro	loadsp,	rb, tmp1, tmp2
     78		addruart \rb, \tmp1, \tmp2
     79		.endm
     80#endif
     81#endif
     82#endif
     83
     84		.macro	kputc,val
     85		mov	r0, \val
     86		bl	putc
     87		.endm
     88
     89		.macro	kphex,val,len
     90		mov	r0, \val
     91		mov	r1, #\len
     92		bl	phex
     93		.endm
     94
     95		/*
     96		 * Debug kernel copy by printing the memory addresses involved
     97		 */
     98		.macro dbgkc, begin, end, cbegin, cend
     99#ifdef DEBUG
    100		kputc   #'C'
    101		kputc   #':'
    102		kputc   #'0'
    103		kputc   #'x'
    104		kphex   \begin, 8	/* Start of compressed kernel */
    105		kputc	#'-'
    106		kputc	#'0'
    107		kputc	#'x'
    108		kphex	\end, 8		/* End of compressed kernel */
    109		kputc	#'-'
    110		kputc	#'>'
    111		kputc   #'0'
    112		kputc   #'x'
    113		kphex   \cbegin, 8	/* Start of kernel copy */
    114		kputc	#'-'
    115		kputc	#'0'
    116		kputc	#'x'
    117		kphex	\cend, 8	/* End of kernel copy */
    118		kputc	#'\n'
    119#endif
    120		.endm
    121
    122		/*
    123		 * Debug print of the final appended DTB location
    124		 */
    125		.macro dbgadtb, begin, size
    126#ifdef DEBUG
    127		kputc   #'D'
    128		kputc   #'T'
    129		kputc   #'B'
    130		kputc   #':'
    131		kputc   #'0'
    132		kputc   #'x'
    133		kphex   \begin, 8	/* Start of appended DTB */
    134		kputc	#' '
    135		kputc	#'('
    136		kputc	#'0'
    137		kputc	#'x'
    138		kphex	\size, 8	/* Size of appended DTB */
    139		kputc	#')'
    140		kputc	#'\n'
    141#endif
    142		.endm
    143
    144		.macro	enable_cp15_barriers, reg
    145		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
    146		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
    147		bne	.L_\@
    148		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
    149		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
    150 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
    151 THUMB(		isb						)
    152.L_\@:
    153		.endm
    154
    155		/*
    156		 * The kernel build system appends the size of the
    157		 * decompressed kernel at the end of the compressed data
    158		 * in little-endian form.
    159		 */
    160		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
    161		adr	\res, .Linflated_image_size_offset
    162		ldr	\tmp1, [\res]
    163		add	\tmp1, \tmp1, \res	@ address of inflated image size
    164
    165		ldrb	\res, [\tmp1]		@ get_unaligned_le32
    166		ldrb	\tmp2, [\tmp1, #1]
    167		orr	\res, \res, \tmp2, lsl #8
    168		ldrb	\tmp2, [\tmp1, #2]
    169		ldrb	\tmp1, [\tmp1, #3]
    170		orr	\res, \res, \tmp2, lsl #16
    171		orr	\res, \res, \tmp1, lsl #24
    172		.endm
    173
    174		.macro	be32tocpu, val, tmp
    175#ifndef __ARMEB__
    176		/* convert to little endian */
    177		rev_l	\val, \tmp
    178#endif
    179		.endm
    180
    181		.section ".start", "ax"
    182/*
    183 * sort out different calling conventions
    184 */
    185		.align
    186		/*
    187		 * Always enter in ARM state for CPUs that support the ARM ISA.
    188		 * As of today (2014) that's exactly the members of the A and R
    189		 * classes.
    190		 */
    191 AR_CLASS(	.arm	)
    192start:
    193		.type	start,#function
    194		/*
    195		 * These 7 nops along with the 1 nop immediately below for
    196		 * !THUMB2 form 8 nops that make the compressed kernel bootable
    197		 * on legacy ARM systems that were assuming the kernel in a.out
    198		 * binary format. The boot loaders on these systems would
    199		 * jump 32 bytes into the image to skip the a.out header.
    200		 * with these 8 nops filling exactly 32 bytes, things still
    201		 * work as expected on these legacy systems. Thumb2 mode keeps
    202		 * 7 of the nops as it turns out that some boot loaders
    203		 * were patching the initial instructions of the kernel, i.e
    204		 * had started to exploit this "patch area".
    205		 */
    206		__initial_nops
    207		.rept	5
    208		__nop
    209		.endr
    210#ifndef CONFIG_THUMB2_KERNEL
    211		__nop
    212#else
    213 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
    214  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
    215		.thumb
    216#endif
    217		W(b)	1f
    218
    219		.word	_magic_sig	@ Magic numbers to help the loader
    220		.word	_magic_start	@ absolute load/run zImage address
    221		.word	_magic_end	@ zImage end address
    222		.word	0x04030201	@ endianness flag
    223		.word	0x45454545	@ another magic number to indicate
    224		.word	_magic_table	@ additional data table
    225
    226		__EFI_HEADER
    2271:
    228 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
    229 AR_CLASS(	mrs	r9, cpsr	)
    230#ifdef CONFIG_ARM_VIRT_EXT
    231		bl	__hyp_stub_install	@ get into SVC mode, reversibly
    232#endif
    233		mov	r7, r1			@ save architecture ID
    234		mov	r8, r2			@ save atags pointer
    235
    236#ifndef CONFIG_CPU_V7M
    237		/*
    238		 * Booting from Angel - need to enter SVC mode and disable
    239		 * FIQs/IRQs (numeric definitions from angel arm.h source).
    240		 * We only do this if we were in user mode on entry.
    241		 */
    242		mrs	r2, cpsr		@ get current mode
    243		tst	r2, #3			@ not user?
    244		bne	not_angel
    245		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
    246 ARM(		swi	0x123456	)	@ angel_SWI_ARM
    247 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
    248not_angel:
    249		safe_svcmode_maskall r0
    250		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
    251						@ SPSR
    252#endif
    253		/*
    254		 * Note that some cache flushing and other stuff may
    255		 * be needed here - is there an Angel SWI call for this?
    256		 */
    257
    258		/*
    259		 * some architecture specific code can be inserted
    260		 * by the linker here, but it should preserve r7, r8, and r9.
    261		 */
    262
    263		.text
    264
    265#ifdef CONFIG_AUTO_ZRELADDR
    266		/*
    267		 * Find the start of physical memory.  As we are executing
    268		 * without the MMU on, we are in the physical address space.
    269		 * We just need to get rid of any offset by aligning the
    270		 * address.
    271		 *
    272		 * This alignment is a balance between the requirements of
    273		 * different platforms - we have chosen 128MB to allow
    274		 * platforms which align the start of their physical memory
    275		 * to 128MB to use this feature, while allowing the zImage
    276		 * to be placed within the first 128MB of memory on other
    277		 * platforms.  Increasing the alignment means we place
    278		 * stricter alignment requirements on the start of physical
    279		 * memory, but relaxing it means that we break people who
    280		 * are already placing their zImage in (eg) the top 64MB
    281		 * of this range.
    282		 */
    283		mov	r0, pc
    284		and	r0, r0, #0xf8000000
    285#ifdef CONFIG_USE_OF
    286		adr	r1, LC1
    287#ifdef CONFIG_ARM_APPENDED_DTB
    288		/*
    289		 * Look for an appended DTB.  If found, we cannot use it to
    290		 * validate the calculated start of physical memory, as its
    291		 * memory nodes may need to be augmented by ATAGS stored at
    292		 * an offset from the same start of physical memory.
    293		 */
    294		ldr	r2, [r1, #4]	@ get &_edata
    295		add	r2, r2, r1	@ relocate it
    296		ldr	r2, [r2]	@ get DTB signature
    297		ldr	r3, =OF_DT_MAGIC
    298		cmp	r2, r3		@ do we have a DTB there?
    299		beq	1f		@ if yes, skip validation
    300#endif /* CONFIG_ARM_APPENDED_DTB */
    301
    302		/*
    303		 * Make sure we have some stack before calling C code.
    304		 * No GOT fixup has occurred yet, but none of the code we're
    305		 * about to call uses any global variables.
    306		 */
    307		ldr	sp, [r1]	@ get stack location
    308		add	sp, sp, r1	@ apply relocation
    309
    310		/* Validate calculated start against passed DTB */
    311		mov	r1, r8
    312		bl	fdt_check_mem_start
    3131:
    314#endif /* CONFIG_USE_OF */
    315		/* Determine final kernel image address. */
    316		add	r4, r0, #TEXT_OFFSET
    317#else
    318		ldr	r4, =zreladdr
    319#endif
    320
    321		/*
    322		 * Set up a page table only if it won't overwrite ourself.
    323		 * That means r4 < pc || r4 - 16k page directory > &_end.
    324		 * Given that r4 > &_end is most unfrequent, we add a rough
    325		 * additional 1MB of room for a possible appended DTB.
    326		 */
    327		mov	r0, pc
    328		cmp	r0, r4
    329		ldrcc	r0, .Lheadroom
    330		addcc	r0, r0, pc
    331		cmpcc	r4, r0
    332		orrcc	r4, r4, #1		@ remember we skipped cache_on
    333		blcs	cache_on
    334
    335restart:	adr	r0, LC1
    336		ldr	sp, [r0]
    337		ldr	r6, [r0, #4]
    338		add	sp, sp, r0
    339		add	r6, r6, r0
    340
    341		get_inflated_image_size	r9, r10, lr
    342
    343#ifndef CONFIG_ZBOOT_ROM
    344		/* malloc space is above the relocated stack (64k max) */
    345		add	r10, sp, #MALLOC_SIZE
    346#else
    347		/*
    348		 * With ZBOOT_ROM the bss/stack is non relocatable,
    349		 * but someone could still run this code from RAM,
    350		 * in which case our reference is _edata.
    351		 */
    352		mov	r10, r6
    353#endif
    354
    355		mov	r5, #0			@ init dtb size to 0
    356#ifdef CONFIG_ARM_APPENDED_DTB
    357/*
    358 *   r4  = final kernel address (possibly with LSB set)
    359 *   r5  = appended dtb size (still unknown)
    360 *   r6  = _edata
    361 *   r7  = architecture ID
    362 *   r8  = atags/device tree pointer
    363 *   r9  = size of decompressed image
    364 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
    365 *   sp  = stack pointer
    366 *
    367 * if there are device trees (dtb) appended to zImage, advance r10 so that the
    368 * dtb data will get relocated along with the kernel if necessary.
    369 */
    370
    371		ldr	lr, [r6, #0]
    372		ldr	r1, =OF_DT_MAGIC
    373		cmp	lr, r1
    374		bne	dtb_check_done		@ not found
    375
    376#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
    377		/*
    378		 * OK... Let's do some funky business here.
    379		 * If we do have a DTB appended to zImage, and we do have
    380		 * an ATAG list around, we want the later to be translated
    381		 * and folded into the former here. No GOT fixup has occurred
    382		 * yet, but none of the code we're about to call uses any
    383		 * global variable.
    384		*/
    385
    386		/* Get the initial DTB size */
    387		ldr	r5, [r6, #4]
    388		be32tocpu r5, r1
    389		dbgadtb	r6, r5
    390		/* 50% DTB growth should be good enough */
    391		add	r5, r5, r5, lsr #1
    392		/* preserve 64-bit alignment */
    393		add	r5, r5, #7
    394		bic	r5, r5, #7
    395		/* clamp to 32KB min and 1MB max */
    396		cmp	r5, #(1 << 15)
    397		movlo	r5, #(1 << 15)
    398		cmp	r5, #(1 << 20)
    399		movhi	r5, #(1 << 20)
    400		/* temporarily relocate the stack past the DTB work space */
    401		add	sp, sp, r5
    402
    403		mov	r0, r8
    404		mov	r1, r6
    405		mov	r2, r5
    406		bl	atags_to_fdt
    407
    408		/*
    409		 * If returned value is 1, there is no ATAG at the location
    410		 * pointed by r8.  Try the typical 0x100 offset from start
    411		 * of RAM and hope for the best.
    412		 */
    413		cmp	r0, #1
    414		sub	r0, r4, #TEXT_OFFSET
    415		bic	r0, r0, #1
    416		add	r0, r0, #0x100
    417		mov	r1, r6
    418		mov	r2, r5
    419		bleq	atags_to_fdt
    420
    421		sub	sp, sp, r5
    422#endif
    423
    424		mov	r8, r6			@ use the appended device tree
    425
    426		/*
    427		 * Make sure that the DTB doesn't end up in the final
    428		 * kernel's .bss area. To do so, we adjust the decompressed
    429		 * kernel size to compensate if that .bss size is larger
    430		 * than the relocated code.
    431		 */
    432		ldr	r5, =_kernel_bss_size
    433		adr	r1, wont_overwrite
    434		sub	r1, r6, r1
    435		subs	r1, r5, r1
    436		addhi	r9, r9, r1
    437
    438		/* Get the current DTB size */
    439		ldr	r5, [r6, #4]
    440		be32tocpu r5, r1
    441
    442		/* preserve 64-bit alignment */
    443		add	r5, r5, #7
    444		bic	r5, r5, #7
    445
    446		/* relocate some pointers past the appended dtb */
    447		add	r6, r6, r5
    448		add	r10, r10, r5
    449		add	sp, sp, r5
    450dtb_check_done:
    451#endif
    452
    453/*
    454 * Check to see if we will overwrite ourselves.
    455 *   r4  = final kernel address (possibly with LSB set)
    456 *   r9  = size of decompressed image
    457 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
    458 * We basically want:
    459 *   r4 - 16k page directory >= r10 -> OK
    460 *   r4 + image length <= address of wont_overwrite -> OK
    461 * Note: the possible LSB in r4 is harmless here.
    462 */
    463		add	r10, r10, #16384
    464		cmp	r4, r10
    465		bhs	wont_overwrite
    466		add	r10, r4, r9
    467		adr	r9, wont_overwrite
    468		cmp	r10, r9
    469		bls	wont_overwrite
    470
    471/*
    472 * Relocate ourselves past the end of the decompressed kernel.
    473 *   r6  = _edata
    474 *   r10 = end of the decompressed kernel
    475 * Because we always copy ahead, we need to do it from the end and go
    476 * backward in case the source and destination overlap.
    477 */
    478		/*
    479		 * Bump to the next 256-byte boundary with the size of
    480		 * the relocation code added. This avoids overwriting
    481		 * ourself when the offset is small.
    482		 */
    483		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
    484		bic	r10, r10, #255
    485
    486		/* Get start of code we want to copy and align it down. */
    487		adr	r5, restart
    488		bic	r5, r5, #31
    489
    490/* Relocate the hyp vector base if necessary */
    491#ifdef CONFIG_ARM_VIRT_EXT
    492		mrs	r0, spsr
    493		and	r0, r0, #MODE_MASK
    494		cmp	r0, #HYP_MODE
    495		bne	1f
    496
    497		/*
    498		 * Compute the address of the hyp vectors after relocation.
    499		 * Call __hyp_set_vectors with the new address so that we
    500		 * can HVC again after the copy.
    501		 */
    502		adr_l	r0, __hyp_stub_vectors
    503		sub	r0, r0, r5
    504		add	r0, r0, r10
    505		bl	__hyp_set_vectors
    5061:
    507#endif
    508
    509		sub	r9, r6, r5		@ size to copy
    510		add	r9, r9, #31		@ rounded up to a multiple
    511		bic	r9, r9, #31		@ ... of 32 bytes
    512		add	r6, r9, r5
    513		add	r9, r9, r10
    514
    515#ifdef DEBUG
    516		sub     r10, r6, r5
    517		sub     r10, r9, r10
    518		/*
    519		 * We are about to copy the kernel to a new memory area.
    520		 * The boundaries of the new memory area can be found in
    521		 * r10 and r9, whilst r5 and r6 contain the boundaries
    522		 * of the memory we are going to copy.
    523		 * Calling dbgkc will help with the printing of this
    524		 * information.
    525		 */
    526		dbgkc	r5, r6, r10, r9
    527#endif
    528
    5291:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
    530		cmp	r6, r5
    531		stmdb	r9!, {r0 - r3, r10 - r12, lr}
    532		bhi	1b
    533
    534		/* Preserve offset to relocated code. */
    535		sub	r6, r9, r6
    536
    537		mov	r0, r9			@ start of relocated zImage
    538		add	r1, sp, r6		@ end of relocated zImage
    539		bl	cache_clean_flush
    540
    541		badr	r0, restart
    542		add	r0, r0, r6
    543		mov	pc, r0
    544
    545wont_overwrite:
    546		adr	r0, LC0
    547		ldmia	r0, {r1, r2, r3, r11, r12}
    548		sub	r0, r0, r1		@ calculate the delta offset
    549
    550/*
    551 * If delta is zero, we are running at the address we were linked at.
    552 *   r0  = delta
    553 *   r2  = BSS start
    554 *   r3  = BSS end
    555 *   r4  = kernel execution address (possibly with LSB set)
    556 *   r5  = appended dtb size (0 if not present)
    557 *   r7  = architecture ID
    558 *   r8  = atags pointer
    559 *   r11 = GOT start
    560 *   r12 = GOT end
    561 *   sp  = stack pointer
    562 */
    563		orrs	r1, r0, r5
    564		beq	not_relocated
    565
    566		add	r11, r11, r0
    567		add	r12, r12, r0
    568
    569#ifndef CONFIG_ZBOOT_ROM
    570		/*
    571		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
    572		 * we need to fix up pointers into the BSS region.
    573		 * Note that the stack pointer has already been fixed up.
    574		 */
    575		add	r2, r2, r0
    576		add	r3, r3, r0
    577
    578		/*
    579		 * Relocate all entries in the GOT table.
    580		 * Bump bss entries to _edata + dtb size
    581		 */
    5821:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
    583		add	r1, r1, r0		@ This fixes up C references
    584		cmp	r1, r2			@ if entry >= bss_start &&
    585		cmphs	r3, r1			@       bss_end > entry
    586		addhi	r1, r1, r5		@    entry += dtb size
    587		str	r1, [r11], #4		@ next entry
    588		cmp	r11, r12
    589		blo	1b
    590
    591		/* bump our bss pointers too */
    592		add	r2, r2, r5
    593		add	r3, r3, r5
    594
    595#else
    596
    597		/*
    598		 * Relocate entries in the GOT table.  We only relocate
    599		 * the entries that are outside the (relocated) BSS region.
    600		 */
    6011:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
    602		cmp	r1, r2			@ entry < bss_start ||
    603		cmphs	r3, r1			@ _end < entry
    604		addlo	r1, r1, r0		@ table.  This fixes up the
    605		str	r1, [r11], #4		@ C references.
    606		cmp	r11, r12
    607		blo	1b
    608#endif
    609
    610not_relocated:	mov	r0, #0
    6111:		str	r0, [r2], #4		@ clear bss
    612		str	r0, [r2], #4
    613		str	r0, [r2], #4
    614		str	r0, [r2], #4
    615		cmp	r2, r3
    616		blo	1b
    617
    618		/*
    619		 * Did we skip the cache setup earlier?
    620		 * That is indicated by the LSB in r4.
    621		 * Do it now if so.
    622		 */
    623		tst	r4, #1
    624		bic	r4, r4, #1
    625		blne	cache_on
    626
    627/*
    628 * The C runtime environment should now be setup sufficiently.
    629 * Set up some pointers, and start decompressing.
    630 *   r4  = kernel execution address
    631 *   r7  = architecture ID
    632 *   r8  = atags pointer
    633 */
    634		mov	r0, r4
    635		mov	r1, sp			@ malloc space above stack
    636		add	r2, sp, #MALLOC_SIZE	@ 64k max
    637		mov	r3, r7
    638		bl	decompress_kernel
    639
    640		get_inflated_image_size	r1, r2, r3
    641
    642		mov	r0, r4			@ start of inflated image
    643		add	r1, r1, r0		@ end of inflated image
    644		bl	cache_clean_flush
    645		bl	cache_off
    646
    647#ifdef CONFIG_ARM_VIRT_EXT
    648		mrs	r0, spsr		@ Get saved CPU boot mode
    649		and	r0, r0, #MODE_MASK
    650		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
    651		bne	__enter_kernel		@ boot kernel directly
    652
    653		adr_l	r0, __hyp_reentry_vectors
    654		bl	__hyp_set_vectors
    655		__HVC(0)			@ otherwise bounce to hyp mode
    656
    657		b	.			@ should never be reached
    658#else
    659		b	__enter_kernel
    660#endif
    661
    662		.align	2
    663		.type	LC0, #object
    664LC0:		.word	LC0			@ r1
    665		.word	__bss_start		@ r2
    666		.word	_end			@ r3
    667		.word	_got_start		@ r11
    668		.word	_got_end		@ ip
    669		.size	LC0, . - LC0
    670
    671		.type	LC1, #object
    672LC1:		.word	.L_user_stack_end - LC1	@ sp
    673		.word	_edata - LC1		@ r6
    674		.size	LC1, . - LC1
    675
    676.Lheadroom:
    677		.word	_end - restart + 16384 + 1024*1024
    678
    679.Linflated_image_size_offset:
    680		.long	(input_data_end - 4) - .
    681
    682#ifdef CONFIG_ARCH_RPC
    683		.globl	params
    684params:		ldr	r0, =0x10000100		@ params_phys for RPC
    685		mov	pc, lr
    686		.ltorg
    687		.align
    688#endif
    689
    690/*
    691 * dcache_line_size - get the minimum D-cache line size from the CTR register
    692 * on ARMv7.
    693 */
    694		.macro	dcache_line_size, reg, tmp
    695#ifdef CONFIG_CPU_V7M
    696		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
    697		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
    698		ldr	\tmp, [\tmp]
    699#else
    700		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
    701#endif
    702		lsr	\tmp, \tmp, #16
    703		and	\tmp, \tmp, #0xf		@ cache line size encoding
    704		mov	\reg, #4			@ bytes per word
    705		mov	\reg, \reg, lsl \tmp		@ actual cache line size
    706		.endm
    707
    708/*
    709 * Turn on the cache.  We need to setup some page tables so that we
    710 * can have both the I and D caches on.
    711 *
    712 * We place the page tables 16k down from the kernel execution address,
    713 * and we hope that nothing else is using it.  If we're using it, we
    714 * will go pop!
    715 *
    716 * On entry,
    717 *  r4 = kernel execution address
    718 *  r7 = architecture number
    719 *  r8 = atags pointer
    720 * On exit,
    721 *  r0, r1, r2, r3, r9, r10, r12 corrupted
    722 * This routine must preserve:
    723 *  r4, r7, r8
    724 */
    725		.align	5
    726cache_on:	mov	r3, #8			@ cache_on function
    727		b	call_cache_fn
    728
    729/*
    730 * Initialize the highest priority protection region, PR7
    731 * to cover all 32bit address and cacheable and bufferable.
    732 */
    733__armv4_mpu_cache_on:
    734		mov	r0, #0x3f		@ 4G, the whole
    735		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
    736		mcr 	p15, 0, r0, c6, c7, 1
    737
    738		mov	r0, #0x80		@ PR7
    739		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
    740		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
    741		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
    742
    743		mov	r0, #0xc000
    744		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
    745		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
    746
    747		mov	r0, #0
    748		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
    749		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
    750		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
    751		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
    752						@ ...I .... ..D. WC.M
    753		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
    754		orr	r0, r0, #0x1000		@ ...1 .... .... ....
    755
    756		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
    757
    758		mov	r0, #0
    759		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
    760		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
    761		mov	pc, lr
    762
    763__armv3_mpu_cache_on:
    764		mov	r0, #0x3f		@ 4G, the whole
    765		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
    766
    767		mov	r0, #0x80		@ PR7
    768		mcr	p15, 0, r0, c2, c0, 0	@ cache on
    769		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
    770
    771		mov	r0, #0xc000
    772		mcr	p15, 0, r0, c5, c0, 0	@ access permission
    773
    774		mov	r0, #0
    775		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
    776		/*
    777		 * ?? ARMv3 MMU does not allow reading the control register,
    778		 * does this really work on ARMv3 MPU?
    779		 */
    780		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
    781						@ .... .... .... WC.M
    782		orr	r0, r0, #0x000d		@ .... .... .... 11.1
    783		/* ?? this overwrites the value constructed above? */
    784		mov	r0, #0
    785		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
    786
    787		/* ?? invalidate for the second time? */
    788		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
    789		mov	pc, lr
    790
    791#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
    792#define CB_BITS 0x08
    793#else
    794#define CB_BITS 0x0c
    795#endif
    796
    797__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
    798		bic	r3, r3, #0xff		@ Align the pointer
    799		bic	r3, r3, #0x3f00
    800/*
    801 * Initialise the page tables, turning on the cacheable and bufferable
    802 * bits for the RAM area only.
    803 */
    804		mov	r0, r3
    805		mov	r9, r0, lsr #18
    806		mov	r9, r9, lsl #18		@ start of RAM
    807		add	r10, r9, #0x10000000	@ a reasonable RAM size
    808		mov	r1, #0x12		@ XN|U + section mapping
    809		orr	r1, r1, #3 << 10	@ AP=11
    810		add	r2, r3, #16384
    8111:		cmp	r1, r9			@ if virt > start of RAM
    812		cmphs	r10, r1			@   && end of RAM > virt
    813		bic	r1, r1, #0x1c		@ clear XN|U + C + B
    814		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
    815		orrhs	r1, r1, r6		@ set RAM section settings
    816		str	r1, [r0], #4		@ 1:1 mapping
    817		add	r1, r1, #1048576
    818		teq	r0, r2
    819		bne	1b
    820/*
    821 * If ever we are running from Flash, then we surely want the cache
    822 * to be enabled also for our execution instance...  We map 2MB of it
    823 * so there is no map overlap problem for up to 1 MB compressed kernel.
    824 * If the execution is in RAM then we would only be duplicating the above.
    825 */
    826		orr	r1, r6, #0x04		@ ensure B is set for this
    827		orr	r1, r1, #3 << 10
    828		mov	r2, pc
    829		mov	r2, r2, lsr #20
    830		orr	r1, r1, r2, lsl #20
    831		add	r0, r3, r2, lsl #2
    832		str	r1, [r0], #4
    833		add	r1, r1, #1048576
    834		str	r1, [r0]
    835		mov	pc, lr
    836ENDPROC(__setup_mmu)
    837
    838@ Enable unaligned access on v6, to allow better code generation
    839@ for the decompressor C code:
    840__armv6_mmu_cache_on:
    841		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
    842		bic	r0, r0, #2		@ A (no unaligned access fault)
    843		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
    844		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
    845		b	__armv4_mmu_cache_on
    846
    847__arm926ejs_mmu_cache_on:
    848#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
    849		mov	r0, #4			@ put dcache in WT mode
    850		mcr	p15, 7, r0, c15, c0, 0
    851#endif
    852
    853__armv4_mmu_cache_on:
    854		mov	r12, lr
    855#ifdef CONFIG_MMU
    856		mov	r6, #CB_BITS | 0x12	@ U
    857		bl	__setup_mmu
    858		mov	r0, #0
    859		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
    860		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
    861		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
    862		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
    863		orr	r0, r0, #0x0030
    864 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
    865		bl	__common_mmu_cache_on
    866		mov	r0, #0
    867		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
    868#endif
    869		mov	pc, r12
    870
    871__armv7_mmu_cache_on:
    872		enable_cp15_barriers	r11
    873		mov	r12, lr
    874#ifdef CONFIG_MMU
    875		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
    876		tst	r11, #0xf		@ VMSA
    877		movne	r6, #CB_BITS | 0x02	@ !XN
    878		blne	__setup_mmu
    879		mov	r0, #0
    880		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
    881		tst	r11, #0xf		@ VMSA
    882		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
    883#endif
    884		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
    885		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
    886		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
    887		orr	r0, r0, #0x003c		@ write buffer
    888		bic	r0, r0, #2		@ A (no unaligned access fault)
    889		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
    890						@ (needed for ARM1176)
    891#ifdef CONFIG_MMU
    892 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
    893		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
    894		orrne	r0, r0, #1		@ MMU enabled
    895		movne	r1, #0xfffffffd		@ domain 0 = client
    896		bic     r6, r6, #1 << 31        @ 32-bit translation system
    897		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
    898		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
    899		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
    900		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
    901#endif
    902		mcr	p15, 0, r0, c7, c5, 4	@ ISB
    903		mcr	p15, 0, r0, c1, c0, 0	@ load control register
    904		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
    905		mov	r0, #0
    906		mcr	p15, 0, r0, c7, c5, 4	@ ISB
    907		mov	pc, r12
    908
    909__fa526_cache_on:
    910		mov	r12, lr
    911		mov	r6, #CB_BITS | 0x12	@ U
    912		bl	__setup_mmu
    913		mov	r0, #0
    914		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
    915		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
    916		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
    917		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
    918		orr	r0, r0, #0x1000		@ I-cache enable
    919		bl	__common_mmu_cache_on
    920		mov	r0, #0
    921		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
    922		mov	pc, r12
    923
    924__common_mmu_cache_on:
    925#ifndef CONFIG_THUMB2_KERNEL
    926#ifndef DEBUG
    927		orr	r0, r0, #0x000d		@ Write buffer, mmu
    928#endif
    929		mov	r1, #-1
    930		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
    931		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
    932		b	1f
    933		.align	5			@ cache line aligned
    9341:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
    935		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
    936		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
    937#endif
    938
    939#define PROC_ENTRY_SIZE (4*5)
    940
    941/*
    942 * Here follow the relocatable cache support functions for the
    943 * various processors.  This is a generic hook for locating an
    944 * entry and jumping to an instruction at the specified offset
    945 * from the start of the block.  Please note this is all position
    946 * independent code.
    947 *
    948 *  r1  = corrupted
    949 *  r2  = corrupted
    950 *  r3  = block offset
    951 *  r9  = corrupted
    952 *  r12 = corrupted
    953 */
    954
    955call_cache_fn:	adr	r12, proc_types
    956#ifdef CONFIG_CPU_CP15
    957		mrc	p15, 0, r9, c0, c0	@ get processor ID
    958#elif defined(CONFIG_CPU_V7M)
    959		/*
    960		 * On v7-M the processor id is located in the V7M_SCB_CPUID
    961		 * register, but as cache handling is IMPLEMENTATION DEFINED on
    962		 * v7-M (if existant at all) we just return early here.
    963		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
    964		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
    965		 * use cp15 registers that are not implemented on v7-M.
    966		 */
    967		bx	lr
    968#else
    969		ldr	r9, =CONFIG_PROCESSOR_ID
    970#endif
    9711:		ldr	r1, [r12, #0]		@ get value
    972		ldr	r2, [r12, #4]		@ get mask
    973		eor	r1, r1, r9		@ (real ^ match)
    974		tst	r1, r2			@       & mask
    975 ARM(		addeq	pc, r12, r3		) @ call cache function
    976 THUMB(		addeq	r12, r3			)
    977 THUMB(		moveq	pc, r12			) @ call cache function
    978		add	r12, r12, #PROC_ENTRY_SIZE
    979		b	1b
    980
    981/*
    982 * Table for cache operations.  This is basically:
    983 *   - CPU ID match
    984 *   - CPU ID mask
    985 *   - 'cache on' method instruction
    986 *   - 'cache off' method instruction
    987 *   - 'cache flush' method instruction
    988 *
    989 * We match an entry using: ((real_id ^ match) & mask) == 0
    990 *
    991 * Writethrough caches generally only need 'on' and 'off'
    992 * methods.  Writeback caches _must_ have the flush method
    993 * defined.
    994 */
    995		.align	2
    996		.type	proc_types,#object
    997proc_types:
    998		.word	0x41000000		@ old ARM ID
    999		.word	0xff00f000
   1000		mov	pc, lr
   1001 THUMB(		nop				)
   1002		mov	pc, lr
   1003 THUMB(		nop				)
   1004		mov	pc, lr
   1005 THUMB(		nop				)
   1006
   1007		.word	0x41007000		@ ARM7/710
   1008		.word	0xfff8fe00
   1009		mov	pc, lr
   1010 THUMB(		nop				)
   1011		mov	pc, lr
   1012 THUMB(		nop				)
   1013		mov	pc, lr
   1014 THUMB(		nop				)
   1015
   1016		.word	0x41807200		@ ARM720T (writethrough)
   1017		.word	0xffffff00
   1018		W(b)	__armv4_mmu_cache_on
   1019		W(b)	__armv4_mmu_cache_off
   1020		mov	pc, lr
   1021 THUMB(		nop				)
   1022
   1023		.word	0x41007400		@ ARM74x
   1024		.word	0xff00ff00
   1025		W(b)	__armv3_mpu_cache_on
   1026		W(b)	__armv3_mpu_cache_off
   1027		W(b)	__armv3_mpu_cache_flush
   1028		
   1029		.word	0x41009400		@ ARM94x
   1030		.word	0xff00ff00
   1031		W(b)	__armv4_mpu_cache_on
   1032		W(b)	__armv4_mpu_cache_off
   1033		W(b)	__armv4_mpu_cache_flush
   1034
   1035		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
   1036		.word	0xff0ffff0
   1037		W(b)	__arm926ejs_mmu_cache_on
   1038		W(b)	__armv4_mmu_cache_off
   1039		W(b)	__armv5tej_mmu_cache_flush
   1040
   1041		.word	0x00007000		@ ARM7 IDs
   1042		.word	0x0000f000
   1043		mov	pc, lr
   1044 THUMB(		nop				)
   1045		mov	pc, lr
   1046 THUMB(		nop				)
   1047		mov	pc, lr
   1048 THUMB(		nop				)
   1049
   1050		@ Everything from here on will be the new ID system.
   1051
   1052		.word	0x4401a100		@ sa110 / sa1100
   1053		.word	0xffffffe0
   1054		W(b)	__armv4_mmu_cache_on
   1055		W(b)	__armv4_mmu_cache_off
   1056		W(b)	__armv4_mmu_cache_flush
   1057
   1058		.word	0x6901b110		@ sa1110
   1059		.word	0xfffffff0
   1060		W(b)	__armv4_mmu_cache_on
   1061		W(b)	__armv4_mmu_cache_off
   1062		W(b)	__armv4_mmu_cache_flush
   1063
   1064		.word	0x56056900
   1065		.word	0xffffff00		@ PXA9xx
   1066		W(b)	__armv4_mmu_cache_on
   1067		W(b)	__armv4_mmu_cache_off
   1068		W(b)	__armv4_mmu_cache_flush
   1069
   1070		.word	0x56158000		@ PXA168
   1071		.word	0xfffff000
   1072		W(b)	__armv4_mmu_cache_on
   1073		W(b)	__armv4_mmu_cache_off
   1074		W(b)	__armv5tej_mmu_cache_flush
   1075
   1076		.word	0x56050000		@ Feroceon
   1077		.word	0xff0f0000
   1078		W(b)	__armv4_mmu_cache_on
   1079		W(b)	__armv4_mmu_cache_off
   1080		W(b)	__armv5tej_mmu_cache_flush
   1081
   1082#ifdef CONFIG_CPU_FEROCEON_OLD_ID
   1083		/* this conflicts with the standard ARMv5TE entry */
   1084		.long	0x41009260		@ Old Feroceon
   1085		.long	0xff00fff0
   1086		b	__armv4_mmu_cache_on
   1087		b	__armv4_mmu_cache_off
   1088		b	__armv5tej_mmu_cache_flush
   1089#endif
   1090
   1091		.word	0x66015261		@ FA526
   1092		.word	0xff01fff1
   1093		W(b)	__fa526_cache_on
   1094		W(b)	__armv4_mmu_cache_off
   1095		W(b)	__fa526_cache_flush
   1096
   1097		@ These match on the architecture ID
   1098
   1099		.word	0x00020000		@ ARMv4T
   1100		.word	0x000f0000
   1101		W(b)	__armv4_mmu_cache_on
   1102		W(b)	__armv4_mmu_cache_off
   1103		W(b)	__armv4_mmu_cache_flush
   1104
   1105		.word	0x00050000		@ ARMv5TE
   1106		.word	0x000f0000
   1107		W(b)	__armv4_mmu_cache_on
   1108		W(b)	__armv4_mmu_cache_off
   1109		W(b)	__armv4_mmu_cache_flush
   1110
   1111		.word	0x00060000		@ ARMv5TEJ
   1112		.word	0x000f0000
   1113		W(b)	__armv4_mmu_cache_on
   1114		W(b)	__armv4_mmu_cache_off
   1115		W(b)	__armv5tej_mmu_cache_flush
   1116
   1117		.word	0x0007b000		@ ARMv6
   1118		.word	0x000ff000
   1119		W(b)	__armv6_mmu_cache_on
   1120		W(b)	__armv4_mmu_cache_off
   1121		W(b)	__armv6_mmu_cache_flush
   1122
   1123		.word	0x000f0000		@ new CPU Id
   1124		.word	0x000f0000
   1125		W(b)	__armv7_mmu_cache_on
   1126		W(b)	__armv7_mmu_cache_off
   1127		W(b)	__armv7_mmu_cache_flush
   1128
   1129		.word	0			@ unrecognised type
   1130		.word	0
   1131		mov	pc, lr
   1132 THUMB(		nop				)
   1133		mov	pc, lr
   1134 THUMB(		nop				)
   1135		mov	pc, lr
   1136 THUMB(		nop				)
   1137
   1138		.size	proc_types, . - proc_types
   1139
   1140		/*
   1141		 * If you get a "non-constant expression in ".if" statement"
   1142		 * error from the assembler on this line, check that you have
   1143		 * not accidentally written a "b" instruction where you should
   1144		 * have written W(b).
   1145		 */
   1146		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
   1147		.error "The size of one or more proc_types entries is wrong."
   1148		.endif
   1149
   1150/*
   1151 * Turn off the Cache and MMU.  ARMv3 does not support
   1152 * reading the control register, but ARMv4 does.
   1153 *
   1154 * On exit,
   1155 *  r0, r1, r2, r3, r9, r12 corrupted
   1156 * This routine must preserve:
   1157 *  r4, r7, r8
   1158 */
   1159		.align	5
   1160cache_off:	mov	r3, #12			@ cache_off function
   1161		b	call_cache_fn
   1162
   1163__armv4_mpu_cache_off:
   1164		mrc	p15, 0, r0, c1, c0
   1165		bic	r0, r0, #0x000d
   1166		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
   1167		mov	r0, #0
   1168		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
   1169		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
   1170		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
   1171		mov	pc, lr
   1172
   1173__armv3_mpu_cache_off:
   1174		mrc	p15, 0, r0, c1, c0
   1175		bic	r0, r0, #0x000d
   1176		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
   1177		mov	r0, #0
   1178		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
   1179		mov	pc, lr
   1180
   1181__armv4_mmu_cache_off:
   1182#ifdef CONFIG_MMU
   1183		mrc	p15, 0, r0, c1, c0
   1184		bic	r0, r0, #0x000d
   1185		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
   1186		mov	r0, #0
   1187		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
   1188		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
   1189#endif
   1190		mov	pc, lr
   1191
   1192__armv7_mmu_cache_off:
   1193		mrc	p15, 0, r0, c1, c0
   1194#ifdef CONFIG_MMU
   1195		bic	r0, r0, #0x0005
   1196#else
   1197		bic	r0, r0, #0x0004
   1198#endif
   1199		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
   1200		mov	r0, #0
   1201#ifdef CONFIG_MMU
   1202		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
   1203#endif
   1204		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
   1205		mcr	p15, 0, r0, c7, c10, 4	@ DSB
   1206		mcr	p15, 0, r0, c7, c5, 4	@ ISB
   1207		mov	pc, lr
   1208
   1209/*
   1210 * Clean and flush the cache to maintain consistency.
   1211 *
   1212 * On entry,
   1213 *  r0 = start address
   1214 *  r1 = end address (exclusive)
   1215 * On exit,
   1216 *  r1, r2, r3, r9, r10, r11, r12 corrupted
   1217 * This routine must preserve:
   1218 *  r4, r6, r7, r8
   1219 */
   1220		.align	5
   1221cache_clean_flush:
   1222		mov	r3, #16
   1223		mov	r11, r1
   1224		b	call_cache_fn
   1225
   1226__armv4_mpu_cache_flush:
   1227		tst	r4, #1
   1228		movne	pc, lr
   1229		mov	r2, #1
   1230		mov	r3, #0
   1231		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
   1232		mov	r1, #7 << 5		@ 8 segments
   12331:		orr	r3, r1, #63 << 26	@ 64 entries
   12342:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
   1235		subs	r3, r3, #1 << 26
   1236		bcs	2b			@ entries 63 to 0
   1237		subs 	r1, r1, #1 << 5
   1238		bcs	1b			@ segments 7 to 0
   1239
   1240		teq	r2, #0
   1241		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
   1242		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
   1243		mov	pc, lr
   1244		
   1245__fa526_cache_flush:
   1246		tst	r4, #1
   1247		movne	pc, lr
   1248		mov	r1, #0
   1249		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
   1250		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
   1251		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
   1252		mov	pc, lr
   1253
   1254__armv6_mmu_cache_flush:
   1255		mov	r1, #0
   1256		tst	r4, #1
   1257		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
   1258		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
   1259		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
   1260		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
   1261		mov	pc, lr
   1262
   1263__armv7_mmu_cache_flush:
   1264		enable_cp15_barriers	r10
   1265		tst	r4, #1
   1266		bne	iflush
   1267		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
   1268		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
   1269		mov	r10, #0
   1270		beq	hierarchical
   1271		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
   1272		b	iflush
   1273hierarchical:
   1274		dcache_line_size r1, r2		@ r1 := dcache min line size
   1275		sub	r2, r1, #1		@ r2 := line size mask
   1276		bic	r0, r0, r2		@ round down start to line size
   1277		sub	r11, r11, #1		@ end address is exclusive
   1278		bic	r11, r11, r2		@ round down end to line size
   12790:		cmp	r0, r11			@ finished?
   1280		bgt	iflush
   1281		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
   1282		add	r0, r0, r1
   1283		b	0b
   1284iflush:
   1285		mcr	p15, 0, r10, c7, c10, 4	@ DSB
   1286		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
   1287		mcr	p15, 0, r10, c7, c10, 4	@ DSB
   1288		mcr	p15, 0, r10, c7, c5, 4	@ ISB
   1289		mov	pc, lr
   1290
   1291__armv5tej_mmu_cache_flush:
   1292		tst	r4, #1
   1293		movne	pc, lr
   12941:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
   1295		bne	1b
   1296		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
   1297		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
   1298		mov	pc, lr
   1299
   1300__armv4_mmu_cache_flush:
   1301		tst	r4, #1
   1302		movne	pc, lr
   1303		mov	r2, #64*1024		@ default: 32K dcache size (*2)
   1304		mov	r11, #32		@ default: 32 byte line size
   1305		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
   1306		teq	r3, r9			@ cache ID register present?
   1307		beq	no_cache_id
   1308		mov	r1, r3, lsr #18
   1309		and	r1, r1, #7
   1310		mov	r2, #1024
   1311		mov	r2, r2, lsl r1		@ base dcache size *2
   1312		tst	r3, #1 << 14		@ test M bit
   1313		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
   1314		mov	r3, r3, lsr #12
   1315		and	r3, r3, #3
   1316		mov	r11, #8
   1317		mov	r11, r11, lsl r3	@ cache line size in bytes
   1318no_cache_id:
   1319		mov	r1, pc
   1320		bic	r1, r1, #63		@ align to longest cache line
   1321		add	r2, r1, r2
   13221:
   1323 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
   1324 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
   1325 THUMB(		add     r1, r1, r11		)
   1326		teq	r1, r2
   1327		bne	1b
   1328
   1329		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
   1330		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
   1331		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
   1332		mov	pc, lr
   1333
   1334__armv3_mmu_cache_flush:
   1335__armv3_mpu_cache_flush:
   1336		tst	r4, #1
   1337		movne	pc, lr
   1338		mov	r1, #0
   1339		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
   1340		mov	pc, lr
   1341
   1342/*
   1343 * Various debugging routines for printing hex characters and
   1344 * memory, which again must be relocatable.
   1345 */
   1346#ifdef DEBUG
   1347		.align	2
   1348		.type	phexbuf,#object
   1349phexbuf:	.space	12
   1350		.size	phexbuf, . - phexbuf
   1351
   1352@ phex corrupts {r0, r1, r2, r3}
   1353phex:		adr	r3, phexbuf
   1354		mov	r2, #0
   1355		strb	r2, [r3, r1]
   13561:		subs	r1, r1, #1
   1357		movmi	r0, r3
   1358		bmi	puts
   1359		and	r2, r0, #15
   1360		mov	r0, r0, lsr #4
   1361		cmp	r2, #10
   1362		addge	r2, r2, #7
   1363		add	r2, r2, #'0'
   1364		strb	r2, [r3, r1]
   1365		b	1b
   1366
   1367@ puts corrupts {r0, r1, r2, r3}
   1368puts:		loadsp	r3, r2, r1
   13691:		ldrb	r2, [r0], #1
   1370		teq	r2, #0
   1371		moveq	pc, lr
   13722:		writeb	r2, r3, r1
   1373		mov	r1, #0x00020000
   13743:		subs	r1, r1, #1
   1375		bne	3b
   1376		teq	r2, #'\n'
   1377		moveq	r2, #'\r'
   1378		beq	2b
   1379		teq	r0, #0
   1380		bne	1b
   1381		mov	pc, lr
   1382@ putc corrupts {r0, r1, r2, r3}
   1383putc:
   1384		mov	r2, r0
   1385		loadsp	r3, r1, r0
   1386		mov	r0, #0
   1387		b	2b
   1388
   1389@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
   1390memdump:	mov	r12, r0
   1391		mov	r10, lr
   1392		mov	r11, #0
   13932:		mov	r0, r11, lsl #2
   1394		add	r0, r0, r12
   1395		mov	r1, #8
   1396		bl	phex
   1397		mov	r0, #':'
   1398		bl	putc
   13991:		mov	r0, #' '
   1400		bl	putc
   1401		ldr	r0, [r12, r11, lsl #2]
   1402		mov	r1, #8
   1403		bl	phex
   1404		and	r0, r11, #7
   1405		teq	r0, #3
   1406		moveq	r0, #' '
   1407		bleq	putc
   1408		and	r0, r11, #7
   1409		add	r11, r11, #1
   1410		teq	r0, #7
   1411		bne	1b
   1412		mov	r0, #'\n'
   1413		bl	putc
   1414		cmp	r11, #64
   1415		blt	2b
   1416		mov	pc, r10
   1417#endif
   1418
   1419		.ltorg
   1420
   1421#ifdef CONFIG_ARM_VIRT_EXT
   1422.align 5
   1423__hyp_reentry_vectors:
   1424		W(b)	.			@ reset
   1425		W(b)	.			@ undef
   1426#ifdef CONFIG_EFI_STUB
   1427		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
   1428#else
   1429		W(b)	.			@ svc
   1430#endif
   1431		W(b)	.			@ pabort
   1432		W(b)	.			@ dabort
   1433		W(b)	__enter_kernel		@ hyp
   1434		W(b)	.			@ irq
   1435		W(b)	.			@ fiq
   1436#endif /* CONFIG_ARM_VIRT_EXT */
   1437
   1438__enter_kernel:
   1439		mov	r0, #0			@ must be 0
   1440		mov	r1, r7			@ restore architecture number
   1441		mov	r2, r8			@ restore atags pointer
   1442 ARM(		mov	pc, r4		)	@ call kernel
   1443 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
   1444 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
   1445
   1446reloc_code_end:
   1447
   1448#ifdef CONFIG_EFI_STUB
   1449__enter_kernel_from_hyp:
   1450		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
   1451		bic	r0, r0, #0x5		@ disable MMU and caches
   1452		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
   1453		isb
   1454		b	__enter_kernel
   1455
   1456ENTRY(efi_enter_kernel)
   1457		mov	r4, r0			@ preserve image base
   1458		mov	r8, r1			@ preserve DT pointer
   1459
   1460		adr_l	r0, call_cache_fn
   1461		adr	r1, 0f			@ clean the region of code we
   1462		bl	cache_clean_flush	@ may run with the MMU off
   1463
   1464#ifdef CONFIG_ARM_VIRT_EXT
   1465		@
   1466		@ The EFI spec does not support booting on ARM in HYP mode,
   1467		@ since it mandates that the MMU and caches are on, with all
   1468		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
   1469		@
   1470		@ While the EDK2 reference implementation adheres to this,
   1471		@ U-Boot might decide to enter the EFI stub in HYP mode
   1472		@ anyway, with the MMU and caches either on or off.
   1473		@
   1474		mrs	r0, cpsr		@ get the current mode
   1475		msr	spsr_cxsf, r0		@ record boot mode
   1476		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
   1477		cmp	r0, #HYP_MODE
   1478		bne	.Lefi_svc
   1479
   1480		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
   1481		tst	r1, #0x1		@ MMU enabled at HYP?
   1482		beq	1f
   1483
   1484		@
   1485		@ When running in HYP mode with the caches on, we're better
   1486		@ off just carrying on using the cached 1:1 mapping that the
   1487		@ firmware provided. Set up the HYP vectors so HVC instructions
   1488		@ issued from HYP mode take us to the correct handler code. We
   1489		@ will disable the MMU before jumping to the kernel proper.
   1490		@
   1491 ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
   1492 THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
   1493		mcr	p15, 4, r1, c1, c0, 0
   1494		adr	r0, __hyp_reentry_vectors
   1495		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
   1496		isb
   1497		b	.Lefi_hyp
   1498
   1499		@
   1500		@ When running in HYP mode with the caches off, we need to drop
   1501		@ into SVC mode now, and let the decompressor set up its cached
   1502		@ 1:1 mapping as usual.
   1503		@
   15041:		mov	r9, r4			@ preserve image base
   1505		bl	__hyp_stub_install	@ install HYP stub vectors
   1506		safe_svcmode_maskall	r1	@ drop to SVC mode
   1507		msr	spsr_cxsf, r0		@ record boot mode
   1508		orr	r4, r9, #1		@ restore image base and set LSB
   1509		b	.Lefi_hyp
   1510.Lefi_svc:
   1511#endif
   1512		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
   1513		tst	r0, #0x1		@ MMU enabled?
   1514		orreq	r4, r4, #1		@ set LSB if not
   1515
   1516.Lefi_hyp:
   1517		mov	r0, r8			@ DT start
   1518		add	r1, r8, r2		@ DT end
   1519		bl	cache_clean_flush
   1520
   1521		adr	r0, 0f			@ switch to our stack
   1522		ldr	sp, [r0]
   1523		add	sp, sp, r0
   1524
   1525		mov	r5, #0			@ appended DTB size
   1526		mov	r7, #0xFFFFFFFF		@ machine ID
   1527		b	wont_overwrite
   1528ENDPROC(efi_enter_kernel)
   15290:		.long	.L_user_stack_end - .
   1530#endif
   1531
   1532		.align
   1533		.section ".stack", "aw", %nobits
   1534.L_user_stack:	.space	4096
   1535.L_user_stack_end: