cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

divide.S (4406B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * arch/alpha/lib/divide.S
      4 *
      5 * (C) 1995 Linus Torvalds
      6 *
      7 * Alpha division..
      8 */
      9
     10/*
     11 * The alpha chip doesn't provide hardware division, so we have to do it
     12 * by hand.  The compiler expects the functions
     13 *
     14 *	__divqu: 64-bit unsigned long divide
     15 *	__remqu: 64-bit unsigned long remainder
     16 *	__divqs/__remqs: signed 64-bit
     17 *	__divlu/__remlu: unsigned 32-bit
     18 *	__divls/__remls: signed 32-bit
     19 *
     20 * These are not normal C functions: instead of the normal
     21 * calling sequence, these expect their arguments in registers
     22 * $24 and $25, and return the result in $27. Register $28 may
     23 * be clobbered (assembly temporary), anything else must be saved. 
     24 *
     25 * In short: painful.
     26 *
     27 * This is a rather simple bit-at-a-time algorithm: it's very good
     28 * at dividing random 64-bit numbers, but the more usual case where
     29 * the divisor is small is handled better by the DEC algorithm
     30 * using lookup tables. This uses much less memory, though, and is
     31 * nicer on the cache.. Besides, I don't know the copyright status
     32 * of the DEC code.
     33 */
     34
     35/*
     36 * My temporaries:
     37 *	$0 - current bit
     38 *	$1 - shifted divisor
     39 *	$2 - modulus/quotient
     40 *
     41 *	$23 - return address
     42 *	$24 - dividend
     43 *	$25 - divisor
     44 *
     45 *	$27 - quotient/modulus
     46 *	$28 - compare status
     47 */
     48
     49#include <asm/export.h>
     50#define halt .long 0
     51
     52/*
     53 * Select function type and registers
     54 */
     55#define mask	$0
     56#define divisor	$1
     57#define compare $28
     58#define tmp1	$3
     59#define tmp2	$4
     60
     61#ifdef DIV
     62#define DIV_ONLY(x,y...) x,##y
     63#define MOD_ONLY(x,y...)
     64#define func(x) __div##x
     65#define modulus $2
     66#define quotient $27
     67#define GETSIGN(x) xor $24,$25,x
     68#define STACK 48
     69#else
     70#define DIV_ONLY(x,y...)
     71#define MOD_ONLY(x,y...) x,##y
     72#define func(x) __rem##x
     73#define modulus $27
     74#define quotient $2
     75#define GETSIGN(x) bis $24,$24,x
     76#define STACK 32
     77#endif
     78
     79/*
     80 * For 32-bit operations, we need to extend to 64-bit
     81 */
     82#ifdef INTSIZE
     83#define ufunction func(lu)
     84#define sfunction func(l)
     85#define LONGIFY(x) zapnot x,15,x
     86#define SLONGIFY(x) addl x,0,x
     87#else
     88#define ufunction func(qu)
     89#define sfunction func(q)
     90#define LONGIFY(x)
     91#define SLONGIFY(x)
     92#endif
     93
     94.set noat
     95.align	3
     96.globl	ufunction
     97.ent	ufunction
     98ufunction:
     99	subq	$30,STACK,$30
    100	.frame	$30,STACK,$23
    101	.prologue 0
    102
    1037:	stq	$1, 0($30)
    104	bis	$25,$25,divisor
    105	stq	$2, 8($30)
    106	bis	$24,$24,modulus
    107	stq	$0,16($30)
    108	bis	$31,$31,quotient
    109	LONGIFY(divisor)
    110	stq	tmp1,24($30)
    111	LONGIFY(modulus)
    112	bis	$31,1,mask
    113	DIV_ONLY(stq tmp2,32($30))
    114	beq	divisor, 9f			/* div by zero */
    115
    116#ifdef INTSIZE
    117	/*
    118	 * shift divisor left, using 3-bit shifts for
    119	 * 32-bit divides as we can't overflow. Three-bit
    120	 * shifts will result in looping three times less
    121	 * here, but can result in two loops more later.
    122	 * Thus using a large shift isn't worth it (and
    123	 * s8add pairs better than a sll..)
    124	 */
    1251:	cmpult	divisor,modulus,compare
    126	s8addq	divisor,$31,divisor
    127	s8addq	mask,$31,mask
    128	bne	compare,1b
    129#else
    1301:	cmpult	divisor,modulus,compare
    131	blt     divisor, 2f
    132	addq	divisor,divisor,divisor
    133	addq	mask,mask,mask
    134	bne	compare,1b
    135	unop
    136#endif
    137
    138	/* ok, start to go right again.. */
    1392:	DIV_ONLY(addq quotient,mask,tmp2)
    140	srl	mask,1,mask
    141	cmpule	divisor,modulus,compare
    142	subq	modulus,divisor,tmp1
    143	DIV_ONLY(cmovne compare,tmp2,quotient)
    144	srl	divisor,1,divisor
    145	cmovne	compare,tmp1,modulus
    146	bne	mask,2b
    147
    1489:	ldq	$1, 0($30)
    149	ldq	$2, 8($30)
    150	ldq	$0,16($30)
    151	ldq	tmp1,24($30)
    152	DIV_ONLY(ldq tmp2,32($30))
    153	addq	$30,STACK,$30
    154	ret	$31,($23),1
    155	.end	ufunction
    156EXPORT_SYMBOL(ufunction)
    157
    158/*
    159 * Uhh.. Ugly signed division. I'd rather not have it at all, but
    160 * it's needed in some circumstances. There are different ways to
    161 * handle this, really. This does:
    162 * 	-a / b = a / -b = -(a / b)
    163 *	-a % b = -(a % b)
    164 *	a % -b = a % b
    165 * which is probably not the best solution, but at least should
    166 * have the property that (x/y)*y + (x%y) = x.
    167 */
    168.align 3
    169.globl	sfunction
    170.ent	sfunction
    171sfunction:
    172	subq	$30,STACK,$30
    173	.frame	$30,STACK,$23
    174	.prologue 0
    175	bis	$24,$25,$28
    176	SLONGIFY($28)
    177	bge	$28,7b
    178	stq	$24,0($30)
    179	subq	$31,$24,$28
    180	stq	$25,8($30)
    181	cmovlt	$24,$28,$24	/* abs($24) */
    182	stq	$23,16($30)
    183	subq	$31,$25,$28
    184	stq	tmp1,24($30)
    185	cmovlt	$25,$28,$25	/* abs($25) */
    186	unop
    187	bsr	$23,ufunction
    188	ldq	$24,0($30)
    189	ldq	$25,8($30)
    190	GETSIGN($28)
    191	subq	$31,$27,tmp1
    192	SLONGIFY($28)
    193	ldq	$23,16($30)
    194	cmovlt	$28,tmp1,$27
    195	ldq	tmp1,24($30)
    196	addq	$30,STACK,$30
    197	ret	$31,($23),1
    198	.end	sfunction
    199EXPORT_SYMBOL(sfunction)