cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

misc_32.S (7450B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * This file contains miscellaneous low-level functions.
      4 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
      5 *
      6 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
      7 * and Paul Mackerras.
      8 *
      9 */
     10
     11#include <linux/sys.h>
     12#include <asm/unistd.h>
     13#include <asm/errno.h>
     14#include <asm/reg.h>
     15#include <asm/page.h>
     16#include <asm/cache.h>
     17#include <asm/cputable.h>
     18#include <asm/mmu.h>
     19#include <asm/ppc_asm.h>
     20#include <asm/thread_info.h>
     21#include <asm/asm-offsets.h>
     22#include <asm/processor.h>
     23#include <asm/bug.h>
     24#include <asm/ptrace.h>
     25#include <asm/export.h>
     26#include <asm/feature-fixups.h>
     27
     28	.text
     29
     30/*
     31 * This returns the high 64 bits of the product of two 64-bit numbers.
     32 */
     33_GLOBAL(mulhdu)
     34	cmpwi	r6,0
     35	cmpwi	cr1,r3,0
     36	mr	r10,r4
     37	mulhwu	r4,r4,r5
     38	beq	1f
     39	mulhwu	r0,r10,r6
     40	mullw	r7,r10,r5
     41	addc	r7,r0,r7
     42	addze	r4,r4
     431:	beqlr	cr1		/* all done if high part of A is 0 */
     44	mullw	r9,r3,r5
     45	mulhwu	r10,r3,r5
     46	beq	2f
     47	mullw	r0,r3,r6
     48	mulhwu	r8,r3,r6
     49	addc	r7,r0,r7
     50	adde	r4,r4,r8
     51	addze	r10,r10
     522:	addc	r4,r4,r9
     53	addze	r3,r10
     54	blr
     55
     56/*
     57 * reloc_got2 runs through the .got2 section adding an offset
     58 * to each entry.
     59 */
     60_GLOBAL(reloc_got2)
     61	mflr	r11
     62	lis	r7,__got2_start@ha
     63	addi	r7,r7,__got2_start@l
     64	lis	r8,__got2_end@ha
     65	addi	r8,r8,__got2_end@l
     66	subf	r8,r7,r8
     67	srwi.	r8,r8,2
     68	beqlr
     69	mtctr	r8
     70	bcl	20,31,$+4
     711:	mflr	r0
     72	lis	r4,1b@ha
     73	addi	r4,r4,1b@l
     74	subf	r0,r4,r0
     75	add	r7,r0,r7
     762:	lwz	r0,0(r7)
     77	add	r0,r0,r3
     78	stw	r0,0(r7)
     79	addi	r7,r7,4
     80	bdnz	2b
     81	mtlr	r11
     82	blr
     83
     84/*
     85 * call_setup_cpu - call the setup_cpu function for this cpu
     86 * r3 = data offset, r24 = cpu number
     87 *
     88 * Setup function is called with:
     89 *   r3 = data offset
     90 *   r4 = ptr to CPU spec (relocated)
     91 */
     92_GLOBAL(call_setup_cpu)
     93	addis	r4,r3,cur_cpu_spec@ha
     94	addi	r4,r4,cur_cpu_spec@l
     95	lwz	r4,0(r4)
     96	add	r4,r4,r3
     97	lwz	r5,CPU_SPEC_SETUP(r4)
     98	cmpwi	0,r5,0
     99	add	r5,r5,r3
    100	beqlr
    101	mtctr	r5
    102	bctr
    103
    104#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_PPC_BOOK3S_32)
    105
    106/* This gets called by via-pmu.c to switch the PLL selection
    107 * on 750fx CPU. This function should really be moved to some
    108 * other place (as most of the cpufreq code in via-pmu
    109 */
    110_GLOBAL(low_choose_750fx_pll)
    111	/* Clear MSR:EE */
    112	mfmsr	r7
    113	rlwinm	r0,r7,0,17,15
    114	mtmsr	r0
    115
    116	/* If switching to PLL1, disable HID0:BTIC */
    117	cmplwi	cr0,r3,0
    118	beq	1f
    119	mfspr	r5,SPRN_HID0
    120	rlwinm	r5,r5,0,27,25
    121	sync
    122	mtspr	SPRN_HID0,r5
    123	isync
    124	sync
    125
    1261:
    127	/* Calc new HID1 value */
    128	mfspr	r4,SPRN_HID1	/* Build a HID1:PS bit from parameter */
    129	rlwinm	r5,r3,16,15,15	/* Clear out HID1:PS from value read */
    130	rlwinm	r4,r4,0,16,14	/* Could have I used rlwimi here ? */
    131	or	r4,r4,r5
    132	mtspr	SPRN_HID1,r4
    133
    134#ifdef CONFIG_SMP
    135	/* Store new HID1 image */
    136	lwz	r6,TASK_CPU(r2)
    137	slwi	r6,r6,2
    138#else
    139	li	r6, 0
    140#endif
    141	addis	r6,r6,nap_save_hid1@ha
    142	stw	r4,nap_save_hid1@l(r6)
    143
    144	/* If switching to PLL0, enable HID0:BTIC */
    145	cmplwi	cr0,r3,0
    146	bne	1f
    147	mfspr	r5,SPRN_HID0
    148	ori	r5,r5,HID0_BTIC
    149	sync
    150	mtspr	SPRN_HID0,r5
    151	isync
    152	sync
    153
    1541:
    155	/* Return */
    156	mtmsr	r7
    157	blr
    158
    159_GLOBAL(low_choose_7447a_dfs)
    160	/* Clear MSR:EE */
    161	mfmsr	r7
    162	rlwinm	r0,r7,0,17,15
    163	mtmsr	r0
    164	
    165	/* Calc new HID1 value */
    166	mfspr	r4,SPRN_HID1
    167	insrwi	r4,r3,1,9	/* insert parameter into bit 9 */
    168	sync
    169	mtspr	SPRN_HID1,r4
    170	sync
    171	isync
    172
    173	/* Return */
    174	mtmsr	r7
    175	blr
    176
    177#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
    178
    179#ifdef CONFIG_40x
    180
    181/*
    182 * Do an IO access in real mode
    183 */
    184_GLOBAL(real_readb)
    185	mfmsr	r7
    186	rlwinm	r0,r7,0,~MSR_DR
    187	sync
    188	mtmsr	r0
    189	sync
    190	isync
    191	lbz	r3,0(r3)
    192	sync
    193	mtmsr	r7
    194	sync
    195	isync
    196	blr
    197_ASM_NOKPROBE_SYMBOL(real_readb)
    198
    199	/*
    200 * Do an IO access in real mode
    201 */
    202_GLOBAL(real_writeb)
    203	mfmsr	r7
    204	rlwinm	r0,r7,0,~MSR_DR
    205	sync
    206	mtmsr	r0
    207	sync
    208	isync
    209	stb	r3,0(r4)
    210	sync
    211	mtmsr	r7
    212	sync
    213	isync
    214	blr
    215_ASM_NOKPROBE_SYMBOL(real_writeb)
    216
    217#endif /* CONFIG_40x */
    218
    219/*
    220 * Copy a whole page.  We use the dcbz instruction on the destination
    221 * to reduce memory traffic (it eliminates the unnecessary reads of
    222 * the destination into cache).  This requires that the destination
    223 * is cacheable.
    224 */
    225#define COPY_16_BYTES		\
    226	lwz	r6,4(r4);	\
    227	lwz	r7,8(r4);	\
    228	lwz	r8,12(r4);	\
    229	lwzu	r9,16(r4);	\
    230	stw	r6,4(r3);	\
    231	stw	r7,8(r3);	\
    232	stw	r8,12(r3);	\
    233	stwu	r9,16(r3)
    234
    235_GLOBAL(copy_page)
    236	rlwinm	r5, r3, 0, L1_CACHE_BYTES - 1
    237	addi	r3,r3,-4
    238
    2390:	twnei	r5, 0	/* WARN if r3 is not cache aligned */
    240	EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
    241
    242	addi	r4,r4,-4
    243
    244	li	r5,4
    245
    246#if MAX_COPY_PREFETCH > 1
    247	li	r0,MAX_COPY_PREFETCH
    248	li	r11,4
    249	mtctr	r0
    25011:	dcbt	r11,r4
    251	addi	r11,r11,L1_CACHE_BYTES
    252	bdnz	11b
    253#else /* MAX_COPY_PREFETCH == 1 */
    254	dcbt	r5,r4
    255	li	r11,L1_CACHE_BYTES+4
    256#endif /* MAX_COPY_PREFETCH */
    257	li	r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
    258	crclr	4*cr0+eq
    2592:
    260	mtctr	r0
    2611:
    262	dcbt	r11,r4
    263	dcbz	r5,r3
    264	COPY_16_BYTES
    265#if L1_CACHE_BYTES >= 32
    266	COPY_16_BYTES
    267#if L1_CACHE_BYTES >= 64
    268	COPY_16_BYTES
    269	COPY_16_BYTES
    270#if L1_CACHE_BYTES >= 128
    271	COPY_16_BYTES
    272	COPY_16_BYTES
    273	COPY_16_BYTES
    274	COPY_16_BYTES
    275#endif
    276#endif
    277#endif
    278	bdnz	1b
    279	beqlr
    280	crnot	4*cr0+eq,4*cr0+eq
    281	li	r0,MAX_COPY_PREFETCH
    282	li	r11,4
    283	b	2b
    284EXPORT_SYMBOL(copy_page)
    285
    286/*
    287 * Extended precision shifts.
    288 *
    289 * Updated to be valid for shift counts from 0 to 63 inclusive.
    290 * -- Gabriel
    291 *
    292 * R3/R4 has 64 bit value
    293 * R5    has shift count
    294 * result in R3/R4
    295 *
    296 *  ashrdi3: arithmetic right shift (sign propagation)	
    297 *  lshrdi3: logical right shift
    298 *  ashldi3: left shift
    299 */
    300_GLOBAL(__ashrdi3)
    301	subfic	r6,r5,32
    302	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
    303	addi	r7,r5,32	# could be xori, or addi with -32
    304	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
    305	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
    306	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
    307	or	r4,r4,r6	# LSW |= t1
    308	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
    309	sraw	r3,r3,r5	# MSW = MSW >> count
    310	or	r4,r4,r7	# LSW |= t2
    311	blr
    312EXPORT_SYMBOL(__ashrdi3)
    313
    314_GLOBAL(__ashldi3)
    315	subfic	r6,r5,32
    316	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
    317	addi	r7,r5,32	# could be xori, or addi with -32
    318	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
    319	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
    320	or	r3,r3,r6	# MSW |= t1
    321	slw	r4,r4,r5	# LSW = LSW << count
    322	or	r3,r3,r7	# MSW |= t2
    323	blr
    324EXPORT_SYMBOL(__ashldi3)
    325
    326_GLOBAL(__lshrdi3)
    327	subfic	r6,r5,32
    328	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
    329	addi	r7,r5,32	# could be xori, or addi with -32
    330	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
    331	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
    332	or	r4,r4,r6	# LSW |= t1
    333	srw	r3,r3,r5	# MSW = MSW >> count
    334	or	r4,r4,r7	# LSW |= t2
    335	blr
    336EXPORT_SYMBOL(__lshrdi3)
    337
    338/*
    339 * 64-bit comparison: __cmpdi2(s64 a, s64 b)
    340 * Returns 0 if a < b, 1 if a == b, 2 if a > b.
    341 */
    342_GLOBAL(__cmpdi2)
    343	cmpw	r3,r5
    344	li	r3,1
    345	bne	1f
    346	cmplw	r4,r6
    347	beqlr
    3481:	li	r3,0
    349	bltlr
    350	li	r3,2
    351	blr
    352EXPORT_SYMBOL(__cmpdi2)
    353/*
    354 * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
    355 * Returns 0 if a < b, 1 if a == b, 2 if a > b.
    356 */
    357_GLOBAL(__ucmpdi2)
    358	cmplw	r3,r5
    359	li	r3,1
    360	bne	1f
    361	cmplw	r4,r6
    362	beqlr
    3631:	li	r3,0
    364	bltlr
    365	li	r3,2
    366	blr
    367EXPORT_SYMBOL(__ucmpdi2)
    368
    369_GLOBAL(__bswapdi2)
    370	rotlwi  r9,r4,8
    371	rotlwi  r10,r3,8
    372	rlwimi  r9,r4,24,0,7
    373	rlwimi  r10,r3,24,0,7
    374	rlwimi  r9,r4,24,16,23
    375	rlwimi  r10,r3,24,16,23
    376	mr      r3,r9
    377	mr      r4,r10
    378	blr
    379EXPORT_SYMBOL(__bswapdi2)
    380
    381#ifdef CONFIG_SMP
    382_GLOBAL(start_secondary_resume)
    383	/* Reset stack */
    384	rlwinm	r1, r1, 0, 0, 31 - THREAD_SHIFT
    385	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
    386	li	r3,0
    387	stw	r3,0(r1)		/* Zero the stack frame pointer	*/
    388	bl	start_secondary
    389	b	.
    390#endif /* CONFIG_SMP */