cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vector.S (7110B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#include <asm/processor.h>
      3#include <asm/ppc_asm.h>
      4#include <asm/reg.h>
      5#include <asm/asm-offsets.h>
      6#include <asm/cputable.h>
      7#include <asm/thread_info.h>
      8#include <asm/page.h>
      9#include <asm/ptrace.h>
     10#include <asm/export.h>
     11#include <asm/asm-compat.h>
     12
     13/*
     14 * Load state from memory into VMX registers including VSCR.
     15 * Assumes the caller has enabled VMX in the MSR.
     16 */
     17_GLOBAL(load_vr_state)
     18	li	r4,VRSTATE_VSCR
     19	lvx	v0,r4,r3
     20	mtvscr	v0
     21	REST_32VRS(0,r4,r3)
     22	blr
     23EXPORT_SYMBOL(load_vr_state)
     24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
     25
     26/*
     27 * Store VMX state into memory, including VSCR.
     28 * Assumes the caller has enabled VMX in the MSR.
     29 */
     30_GLOBAL(store_vr_state)
     31	SAVE_32VRS(0, r4, r3)
     32	mfvscr	v0
     33	li	r4, VRSTATE_VSCR
     34	stvx	v0, r4, r3
     35	blr
     36EXPORT_SYMBOL(store_vr_state)
     37
     38/*
     39 * Disable VMX for the task which had it previously,
     40 * and save its vector registers in its thread_struct.
     41 * Enables the VMX for use in the kernel on return.
     42 * On SMP we know the VMX is free, since we give it up every
     43 * switch (ie, no lazy save of the vector registers).
     44 *
     45 * Note that on 32-bit this can only use registers that will be
     46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
     47 */
     48_GLOBAL(load_up_altivec)
     49	mfmsr	r5			/* grab the current MSR */
     50#ifdef CONFIG_PPC_BOOK3S_64
     51	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
     52	ori	r5,r5,MSR_RI
     53#endif
     54	oris	r5,r5,MSR_VEC@h
     55	MTMSRD(r5)			/* enable use of AltiVec now */
     56	isync
     57
     58	/*
     59	 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
     60	 * to optimise userspace context save/restore. Whenever we take an
     61	 * altivec unavailable exception we must set VRSAVE to something non
     62	 * zero. Set it to all 1s. See also the programming note in the ISA.
     63	 */
     64	mfspr	r4,SPRN_VRSAVE
     65	cmpwi	0,r4,0
     66	bne+	1f
     67	li	r4,-1
     68	mtspr	SPRN_VRSAVE,r4
     691:
     70	/* enable use of VMX after return */
     71#ifdef CONFIG_PPC32
     72	addi	r5,r2,THREAD
     73	oris	r9,r9,MSR_VEC@h
     74#else
     75	ld	r4,PACACURRENT(r13)
     76	addi	r5,r4,THREAD		/* Get THREAD */
     77	oris	r12,r12,MSR_VEC@h
     78	std	r12,_MSR(r1)
     79#ifdef CONFIG_PPC_BOOK3S_64
     80	li	r4,0
     81	stb	r4,PACASRR_VALID(r13)
     82#endif
     83#endif
     84	li	r4,1
     85	stb	r4,THREAD_LOAD_VEC(r5)
     86	addi	r6,r5,THREAD_VRSTATE
     87	li	r10,VRSTATE_VSCR
     88	stw	r4,THREAD_USED_VR(r5)
     89	lvx	v0,r10,r6
     90	mtvscr	v0
     91	REST_32VRS(0,r4,r6)
     92	/* restore registers and return */
     93	blr
     94_ASM_NOKPROBE_SYMBOL(load_up_altivec)
     95
     96/*
     97 * save_altivec(tsk)
     98 * Save the vector registers to its thread_struct
     99 */
    100_GLOBAL(save_altivec)
    101	addi	r3,r3,THREAD		/* want THREAD of task */
    102	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
    103	PPC_LL	r5,PT_REGS(r3)
    104	PPC_LCMPI	0,r7,0
    105	bne	2f
    106	addi	r7,r3,THREAD_VRSTATE
    1072:	SAVE_32VRS(0,r4,r7)
    108	mfvscr	v0
    109	li	r4,VRSTATE_VSCR
    110	stvx	v0,r4,r7
    111	blr
    112
    113#ifdef CONFIG_VSX
    114
    115#ifdef CONFIG_PPC32
    116#error This asm code isn't ready for 32-bit kernels
    117#endif
    118
    119/*
    120 * load_up_vsx(unused, unused, tsk)
    121 * Disable VSX for the task which had it previously,
    122 * and save its vector registers in its thread_struct.
    123 * Reuse the fp and vsx saves, but first check to see if they have
    124 * been saved already.
    125 */
    126_GLOBAL(load_up_vsx)
    127/* Load FP and VSX registers if they haven't been done yet */
    128	andi.	r5,r12,MSR_FP
    129	beql+	load_up_fpu		/* skip if already loaded */
    130	andis.	r5,r12,MSR_VEC@h
    131	beql+	load_up_altivec		/* skip if already loaded */
    132
    133#ifdef CONFIG_PPC_BOOK3S_64
    134	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
    135	li	r5,MSR_RI
    136	mtmsrd	r5,1
    137#endif
    138
    139	ld	r4,PACACURRENT(r13)
    140	addi	r4,r4,THREAD		/* Get THREAD */
    141	li	r6,1
    142	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
    143	/* enable use of VSX after return */
    144	oris	r12,r12,MSR_VSX@h
    145	std	r12,_MSR(r1)
    146	li	r4,0
    147	stb	r4,PACASRR_VALID(r13)
    148	b	fast_interrupt_return_srr
    149
    150#endif /* CONFIG_VSX */
    151
    152
    153/*
    154 * The routines below are in assembler so we can closely control the
    155 * usage of floating-point registers.  These routines must be called
    156 * with preempt disabled.
    157 */
    158#ifdef CONFIG_PPC32
    159	.data
    160fpzero:
    161	.long	0
    162fpone:
    163	.long	0x3f800000	/* 1.0 in single-precision FP */
    164fphalf:
    165	.long	0x3f000000	/* 0.5 in single-precision FP */
    166
    167#define LDCONST(fr, name)	\
    168	lis	r11,name@ha;	\
    169	lfs	fr,name@l(r11)
    170#else
    171
    172	.section ".toc","aw"
    173fpzero:
    174	.tc	FD_0_0[TC],0
    175fpone:
    176	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
    177fphalf:
    178	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
    179
    180#define LDCONST(fr, name)	\
    181	lfd	fr,name@toc(r2)
    182#endif
    183
    184	.text
    185/*
    186 * Internal routine to enable floating point and set FPSCR to 0.
    187 * Don't call it from C; it doesn't use the normal calling convention.
    188 */
    189fpenable:
    190#ifdef CONFIG_PPC32
    191	stwu	r1,-64(r1)
    192#else
    193	stdu	r1,-64(r1)
    194#endif
    195	mfmsr	r10
    196	ori	r11,r10,MSR_FP
    197	mtmsr	r11
    198	isync
    199	stfd	fr0,24(r1)
    200	stfd	fr1,16(r1)
    201	stfd	fr31,8(r1)
    202	LDCONST(fr1, fpzero)
    203	mffs	fr31
    204	MTFSF_L(fr1)
    205	blr
    206
    207fpdisable:
    208	mtlr	r12
    209	MTFSF_L(fr31)
    210	lfd	fr31,8(r1)
    211	lfd	fr1,16(r1)
    212	lfd	fr0,24(r1)
    213	mtmsr	r10
    214	isync
    215	addi	r1,r1,64
    216	blr
    217
    218/*
    219 * Vector add, floating point.
    220 */
    221_GLOBAL(vaddfp)
    222	mflr	r12
    223	bl	fpenable
    224	li	r0,4
    225	mtctr	r0
    226	li	r6,0
    2271:	lfsx	fr0,r4,r6
    228	lfsx	fr1,r5,r6
    229	fadds	fr0,fr0,fr1
    230	stfsx	fr0,r3,r6
    231	addi	r6,r6,4
    232	bdnz	1b
    233	b	fpdisable
    234
    235/*
    236 * Vector subtract, floating point.
    237 */
    238_GLOBAL(vsubfp)
    239	mflr	r12
    240	bl	fpenable
    241	li	r0,4
    242	mtctr	r0
    243	li	r6,0
    2441:	lfsx	fr0,r4,r6
    245	lfsx	fr1,r5,r6
    246	fsubs	fr0,fr0,fr1
    247	stfsx	fr0,r3,r6
    248	addi	r6,r6,4
    249	bdnz	1b
    250	b	fpdisable
    251
    252/*
    253 * Vector multiply and add, floating point.
    254 */
    255_GLOBAL(vmaddfp)
    256	mflr	r12
    257	bl	fpenable
    258	stfd	fr2,32(r1)
    259	li	r0,4
    260	mtctr	r0
    261	li	r7,0
    2621:	lfsx	fr0,r4,r7
    263	lfsx	fr1,r5,r7
    264	lfsx	fr2,r6,r7
    265	fmadds	fr0,fr0,fr2,fr1
    266	stfsx	fr0,r3,r7
    267	addi	r7,r7,4
    268	bdnz	1b
    269	lfd	fr2,32(r1)
    270	b	fpdisable
    271
    272/*
    273 * Vector negative multiply and subtract, floating point.
    274 */
    275_GLOBAL(vnmsubfp)
    276	mflr	r12
    277	bl	fpenable
    278	stfd	fr2,32(r1)
    279	li	r0,4
    280	mtctr	r0
    281	li	r7,0
    2821:	lfsx	fr0,r4,r7
    283	lfsx	fr1,r5,r7
    284	lfsx	fr2,r6,r7
    285	fnmsubs	fr0,fr0,fr2,fr1
    286	stfsx	fr0,r3,r7
    287	addi	r7,r7,4
    288	bdnz	1b
    289	lfd	fr2,32(r1)
    290	b	fpdisable
    291
    292/*
    293 * Vector reciprocal estimate.  We just compute 1.0/x.
    294 * r3 -> destination, r4 -> source.
    295 */
    296_GLOBAL(vrefp)
    297	mflr	r12
    298	bl	fpenable
    299	li	r0,4
    300	LDCONST(fr1, fpone)
    301	mtctr	r0
    302	li	r6,0
    3031:	lfsx	fr0,r4,r6
    304	fdivs	fr0,fr1,fr0
    305	stfsx	fr0,r3,r6
    306	addi	r6,r6,4
    307	bdnz	1b
    308	b	fpdisable
    309
    310/*
    311 * Vector reciprocal square-root estimate, floating point.
    312 * We use the frsqrte instruction for the initial estimate followed
    313 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
    314 * r3 -> destination, r4 -> source.
    315 */
    316_GLOBAL(vrsqrtefp)
    317	mflr	r12
    318	bl	fpenable
    319	stfd	fr2,32(r1)
    320	stfd	fr3,40(r1)
    321	stfd	fr4,48(r1)
    322	stfd	fr5,56(r1)
    323	li	r0,4
    324	LDCONST(fr4, fpone)
    325	LDCONST(fr5, fphalf)
    326	mtctr	r0
    327	li	r6,0
    3281:	lfsx	fr0,r4,r6
    329	frsqrte	fr1,fr0		/* r = frsqrte(s) */
    330	fmuls	fr3,fr1,fr0	/* r * s */
    331	fmuls	fr2,fr1,fr5	/* r * 0.5 */
    332	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
    333	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
    334	fmuls	fr3,fr1,fr0	/* r * s */
    335	fmuls	fr2,fr1,fr5	/* r * 0.5 */
    336	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
    337	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
    338	stfsx	fr1,r3,r6
    339	addi	r6,r6,4
    340	bdnz	1b
    341	lfd	fr5,56(r1)
    342	lfd	fr4,48(r1)
    343	lfd	fr3,40(r1)
    344	lfd	fr2,32(r1)
    345	b	fpdisable