cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

lib1funcs.S (8247B)


      1/*
      2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
      3 *
      4 * Author: Nicolas Pitre <nico@fluxnic.net>
      5 *   - contributed to gcc-3.4 on Sep 30, 2003
      6 *   - adapted for the Linux kernel on Oct 2, 2003
      7 */
      8
      9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
     10
     11This file is free software; you can redistribute it and/or modify it
     12under the terms of the GNU General Public License as published by the
     13Free Software Foundation; either version 2, or (at your option) any
     14later version.
     15
     16In addition to the permissions in the GNU General Public License, the
     17Free Software Foundation gives you unlimited permission to link the
     18compiled version of this file into combinations with other programs,
     19and to distribute those combinations without any restriction coming
     20from the use of this file.  (The General Public License restrictions
     21do apply in other respects; for example, they cover modification of
     22the file, and distribution when not linked into a combine
     23executable.)
     24
     25This file is distributed in the hope that it will be useful, but
     26WITHOUT ANY WARRANTY; without even the implied warranty of
     27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     28General Public License for more details.
     29
     30You should have received a copy of the GNU General Public License
     31along with this program; see the file COPYING.  If not, write to
     32the Free Software Foundation, 59 Temple Place - Suite 330,
     33Boston, MA 02111-1307, USA.  */
     34
     35
     36#include <linux/linkage.h>
     37#include <asm/assembler.h>
     38#include <asm/unwind.h>
     39
     40.macro ARM_DIV_BODY dividend, divisor, result, curbit
     41
     42#if __LINUX_ARM_ARCH__ >= 5
     43
     44	clz	\curbit, \divisor
     45	clz	\result, \dividend
     46	sub	\result, \curbit, \result
     47	mov	\curbit, #1
     48	mov	\divisor, \divisor, lsl \result
     49	mov	\curbit, \curbit, lsl \result
     50	mov	\result, #0
     51	
     52#else
     53
     54	@ Initially shift the divisor left 3 bits if possible,
     55	@ set curbit accordingly.  This allows for curbit to be located
     56	@ at the left end of each 4 bit nibbles in the division loop
     57	@ to save one loop in most cases.
     58	tst	\divisor, #0xe0000000
     59	moveq	\divisor, \divisor, lsl #3
     60	moveq	\curbit, #8
     61	movne	\curbit, #1
     62
     63	@ Unless the divisor is very big, shift it up in multiples of
     64	@ four bits, since this is the amount of unwinding in the main
     65	@ division loop.  Continue shifting until the divisor is 
     66	@ larger than the dividend.
     671:	cmp	\divisor, #0x10000000
     68	cmplo	\divisor, \dividend
     69	movlo	\divisor, \divisor, lsl #4
     70	movlo	\curbit, \curbit, lsl #4
     71	blo	1b
     72
     73	@ For very big divisors, we must shift it a bit at a time, or
     74	@ we will be in danger of overflowing.
     751:	cmp	\divisor, #0x80000000
     76	cmplo	\divisor, \dividend
     77	movlo	\divisor, \divisor, lsl #1
     78	movlo	\curbit, \curbit, lsl #1
     79	blo	1b
     80
     81	mov	\result, #0
     82
     83#endif
     84
     85	@ Division loop
     861:	cmp	\dividend, \divisor
     87	subhs	\dividend, \dividend, \divisor
     88	orrhs	\result,   \result,   \curbit
     89	cmp	\dividend, \divisor,  lsr #1
     90	subhs	\dividend, \dividend, \divisor, lsr #1
     91	orrhs	\result,   \result,   \curbit,  lsr #1
     92	cmp	\dividend, \divisor,  lsr #2
     93	subhs	\dividend, \dividend, \divisor, lsr #2
     94	orrhs	\result,   \result,   \curbit,  lsr #2
     95	cmp	\dividend, \divisor,  lsr #3
     96	subhs	\dividend, \dividend, \divisor, lsr #3
     97	orrhs	\result,   \result,   \curbit,  lsr #3
     98	cmp	\dividend, #0			@ Early termination?
     99	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
    100	movne	\divisor,  \divisor, lsr #4
    101	bne	1b
    102
    103.endm
    104
    105
    106.macro ARM_DIV2_ORDER divisor, order
    107
    108#if __LINUX_ARM_ARCH__ >= 5
    109
    110	clz	\order, \divisor
    111	rsb	\order, \order, #31
    112
    113#else
    114
    115	cmp	\divisor, #(1 << 16)
    116	movhs	\divisor, \divisor, lsr #16
    117	movhs	\order, #16
    118	movlo	\order, #0
    119
    120	cmp	\divisor, #(1 << 8)
    121	movhs	\divisor, \divisor, lsr #8
    122	addhs	\order, \order, #8
    123
    124	cmp	\divisor, #(1 << 4)
    125	movhs	\divisor, \divisor, lsr #4
    126	addhs	\order, \order, #4
    127
    128	cmp	\divisor, #(1 << 2)
    129	addhi	\order, \order, #3
    130	addls	\order, \order, \divisor, lsr #1
    131
    132#endif
    133
    134.endm
    135
    136
    137.macro ARM_MOD_BODY dividend, divisor, order, spare
    138
    139#if __LINUX_ARM_ARCH__ >= 5
    140
    141	clz	\order, \divisor
    142	clz	\spare, \dividend
    143	sub	\order, \order, \spare
    144	mov	\divisor, \divisor, lsl \order
    145
    146#else
    147
    148	mov	\order, #0
    149
    150	@ Unless the divisor is very big, shift it up in multiples of
    151	@ four bits, since this is the amount of unwinding in the main
    152	@ division loop.  Continue shifting until the divisor is 
    153	@ larger than the dividend.
    1541:	cmp	\divisor, #0x10000000
    155	cmplo	\divisor, \dividend
    156	movlo	\divisor, \divisor, lsl #4
    157	addlo	\order, \order, #4
    158	blo	1b
    159
    160	@ For very big divisors, we must shift it a bit at a time, or
    161	@ we will be in danger of overflowing.
    1621:	cmp	\divisor, #0x80000000
    163	cmplo	\divisor, \dividend
    164	movlo	\divisor, \divisor, lsl #1
    165	addlo	\order, \order, #1
    166	blo	1b
    167
    168#endif
    169
    170	@ Perform all needed subtractions to keep only the reminder.
    171	@ Do comparisons in batch of 4 first.
    172	subs	\order, \order, #3		@ yes, 3 is intended here
    173	blt	2f
    174
    1751:	cmp	\dividend, \divisor
    176	subhs	\dividend, \dividend, \divisor
    177	cmp	\dividend, \divisor,  lsr #1
    178	subhs	\dividend, \dividend, \divisor, lsr #1
    179	cmp	\dividend, \divisor,  lsr #2
    180	subhs	\dividend, \dividend, \divisor, lsr #2
    181	cmp	\dividend, \divisor,  lsr #3
    182	subhs	\dividend, \dividend, \divisor, lsr #3
    183	cmp	\dividend, #1
    184	mov	\divisor, \divisor, lsr #4
    185	subsge	\order, \order, #4
    186	bge	1b
    187
    188	tst	\order, #3
    189	teqne	\dividend, #0
    190	beq	5f
    191
    192	@ Either 1, 2 or 3 comparison/subtractions are left.
    1932:	cmn	\order, #2
    194	blt	4f
    195	beq	3f
    196	cmp	\dividend, \divisor
    197	subhs	\dividend, \dividend, \divisor
    198	mov	\divisor,  \divisor,  lsr #1
    1993:	cmp	\dividend, \divisor
    200	subhs	\dividend, \dividend, \divisor
    201	mov	\divisor,  \divisor,  lsr #1
    2024:	cmp	\dividend, \divisor
    203	subhs	\dividend, \dividend, \divisor
    2045:
    205.endm
    206
    207
    208#ifdef CONFIG_ARM_PATCH_IDIV
    209	.align	3
    210#endif
    211
    212ENTRY(__udivsi3)
    213ENTRY(__aeabi_uidiv)
    214UNWIND(.fnstart)
    215
    216	subs	r2, r1, #1
    217	reteq	lr
    218	bcc	Ldiv0
    219	cmp	r0, r1
    220	bls	11f
    221	tst	r1, r2
    222	beq	12f
    223
    224	ARM_DIV_BODY r0, r1, r2, r3
    225
    226	mov	r0, r2
    227	ret	lr
    228
    22911:	moveq	r0, #1
    230	movne	r0, #0
    231	ret	lr
    232
    23312:	ARM_DIV2_ORDER r1, r2
    234
    235	mov	r0, r0, lsr r2
    236	ret	lr
    237
    238UNWIND(.fnend)
    239ENDPROC(__udivsi3)
    240ENDPROC(__aeabi_uidiv)
    241
    242ENTRY(__umodsi3)
    243UNWIND(.fnstart)
    244
    245	subs	r2, r1, #1			@ compare divisor with 1
    246	bcc	Ldiv0
    247	cmpne	r0, r1				@ compare dividend with divisor
    248	moveq   r0, #0
    249	tsthi	r1, r2				@ see if divisor is power of 2
    250	andeq	r0, r0, r2
    251	retls	lr
    252
    253	ARM_MOD_BODY r0, r1, r2, r3
    254
    255	ret	lr
    256
    257UNWIND(.fnend)
    258ENDPROC(__umodsi3)
    259
    260#ifdef CONFIG_ARM_PATCH_IDIV
    261	.align 3
    262#endif
    263
    264ENTRY(__divsi3)
    265ENTRY(__aeabi_idiv)
    266UNWIND(.fnstart)
    267
    268	cmp	r1, #0
    269	eor	ip, r0, r1			@ save the sign of the result.
    270	beq	Ldiv0
    271	rsbmi	r1, r1, #0			@ loops below use unsigned.
    272	subs	r2, r1, #1			@ division by 1 or -1 ?
    273	beq	10f
    274	movs	r3, r0
    275	rsbmi	r3, r0, #0			@ positive dividend value
    276	cmp	r3, r1
    277	bls	11f
    278	tst	r1, r2				@ divisor is power of 2 ?
    279	beq	12f
    280
    281	ARM_DIV_BODY r3, r1, r0, r2
    282
    283	cmp	ip, #0
    284	rsbmi	r0, r0, #0
    285	ret	lr
    286
    28710:	teq	ip, r0				@ same sign ?
    288	rsbmi	r0, r0, #0
    289	ret	lr
    290
    29111:	movlo	r0, #0
    292	moveq	r0, ip, asr #31
    293	orreq	r0, r0, #1
    294	ret	lr
    295
    29612:	ARM_DIV2_ORDER r1, r2
    297
    298	cmp	ip, #0
    299	mov	r0, r3, lsr r2
    300	rsbmi	r0, r0, #0
    301	ret	lr
    302
    303UNWIND(.fnend)
    304ENDPROC(__divsi3)
    305ENDPROC(__aeabi_idiv)
    306
    307ENTRY(__modsi3)
    308UNWIND(.fnstart)
    309
    310	cmp	r1, #0
    311	beq	Ldiv0
    312	rsbmi	r1, r1, #0			@ loops below use unsigned.
    313	movs	ip, r0				@ preserve sign of dividend
    314	rsbmi	r0, r0, #0			@ if negative make positive
    315	subs	r2, r1, #1			@ compare divisor with 1
    316	cmpne	r0, r1				@ compare dividend with divisor
    317	moveq	r0, #0
    318	tsthi	r1, r2				@ see if divisor is power of 2
    319	andeq	r0, r0, r2
    320	bls	10f
    321
    322	ARM_MOD_BODY r0, r1, r2, r3
    323
    32410:	cmp	ip, #0
    325	rsbmi	r0, r0, #0
    326	ret	lr
    327
    328UNWIND(.fnend)
    329ENDPROC(__modsi3)
    330
    331#ifdef CONFIG_AEABI
    332
    333ENTRY(__aeabi_uidivmod)
    334UNWIND(.fnstart)
    335UNWIND(.save {r0, r1, ip, lr}	)
    336
    337	stmfd	sp!, {r0, r1, ip, lr}
    338	bl	__aeabi_uidiv
    339	ldmfd	sp!, {r1, r2, ip, lr}
    340	mul	r3, r0, r2
    341	sub	r1, r1, r3
    342	ret	lr
    343
    344UNWIND(.fnend)
    345ENDPROC(__aeabi_uidivmod)
    346
    347ENTRY(__aeabi_idivmod)
    348UNWIND(.fnstart)
    349UNWIND(.save {r0, r1, ip, lr}	)
    350	stmfd	sp!, {r0, r1, ip, lr}
    351	bl	__aeabi_idiv
    352	ldmfd	sp!, {r1, r2, ip, lr}
    353	mul	r3, r0, r2
    354	sub	r1, r1, r3
    355	ret	lr
    356
    357UNWIND(.fnend)
    358ENDPROC(__aeabi_idivmod)
    359
    360#endif
    361
    362Ldiv0:
    363UNWIND(.fnstart)
    364UNWIND(.pad #4)
    365UNWIND(.save {lr})
    366	str	lr, [sp, #-8]!
    367	bl	__div0
    368	mov	r0, #0			@ About as wrong as it could be.
    369	ldr	pc, [sp], #8
    370UNWIND(.fnend)
    371ENDPROC(Ldiv0)