cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

div64.S (3912B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 *  linux/arch/arm/lib/div64.S
      4 *
      5 *  Optimized computation of 64-bit dividend / 32-bit divisor
      6 *
      7 *  Author:	Nicolas Pitre
      8 *  Created:	Oct 5, 2003
      9 *  Copyright:	Monta Vista Software, Inc.
     10 */
     11
     12#include <linux/linkage.h>
     13#include <asm/assembler.h>
     14#include <asm/unwind.h>
     15
     16#ifdef __ARMEB__
     17#define xh r0
     18#define xl r1
     19#define yh r2
     20#define yl r3
     21#else
     22#define xl r0
     23#define xh r1
     24#define yl r2
     25#define yh r3
     26#endif
     27
     28/*
     29 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
     30 *
     31 * Note: Calling convention is totally non standard for optimal code.
     32 *       This is meant to be used by do_div() from include/asm/div64.h only.
     33 *
     34 * Input parameters:
     35 * 	xh-xl	= dividend (clobbered)
     36 * 	r4	= divisor (preserved)
     37 *
     38 * Output values:
     39 * 	yh-yl	= result
     40 * 	xh	= remainder
     41 *
     42 * Clobbered regs: xl, ip
     43 */
     44
     45ENTRY(__do_div64)
     46UNWIND(.fnstart)
     47
     48	@ Test for easy paths first.
     49	subs	ip, r4, #1
     50	bls	9f			@ divisor is 0 or 1
     51	tst	ip, r4
     52	beq	8f			@ divisor is power of 2
     53
     54	@ See if we need to handle upper 32-bit result.
     55	cmp	xh, r4
     56	mov	yh, #0
     57	blo	3f
     58
     59	@ Align divisor with upper part of dividend.
     60	@ The aligned divisor is stored in yl preserving the original.
     61	@ The bit position is stored in ip.
     62
     63#if __LINUX_ARM_ARCH__ >= 5
     64
     65	clz	yl, r4
     66	clz	ip, xh
     67	sub	yl, yl, ip
     68	mov	ip, #1
     69	mov	ip, ip, lsl yl
     70	mov	yl, r4, lsl yl
     71
     72#else
     73
     74	mov	yl, r4
     75	mov	ip, #1
     761:	cmp	yl, #0x80000000
     77	cmpcc	yl, xh
     78	movcc	yl, yl, lsl #1
     79	movcc	ip, ip, lsl #1
     80	bcc	1b
     81
     82#endif
     83
     84	@ The division loop for needed upper bit positions.
     85 	@ Break out early if dividend reaches 0.
     862:	cmp	xh, yl
     87	orrcs	yh, yh, ip
     88	subscs	xh, xh, yl
     89	movsne	ip, ip, lsr #1
     90	mov	yl, yl, lsr #1
     91	bne	2b
     92
     93	@ See if we need to handle lower 32-bit result.
     943:	cmp	xh, #0
     95	mov	yl, #0
     96	cmpeq	xl, r4
     97	movlo	xh, xl
     98	retlo	lr
     99
    100	@ The division loop for lower bit positions.
    101	@ Here we shift remainer bits leftwards rather than moving the
    102	@ divisor for comparisons, considering the carry-out bit as well.
    103	mov	ip, #0x80000000
    1044:	movs	xl, xl, lsl #1
    105	adcs	xh, xh, xh
    106	beq	6f
    107	cmpcc	xh, r4
    1085:	orrcs	yl, yl, ip
    109	subcs	xh, xh, r4
    110	movs	ip, ip, lsr #1
    111	bne	4b
    112	ret	lr
    113
    114	@ The top part of remainder became zero.  If carry is set
    115	@ (the 33th bit) this is a false positive so resume the loop.
    116	@ Otherwise, if lower part is also null then we are done.
    1176:	bcs	5b
    118	cmp	xl, #0
    119	reteq	lr
    120
    121	@ We still have remainer bits in the low part.  Bring them up.
    122
    123#if __LINUX_ARM_ARCH__ >= 5
    124
    125	clz	xh, xl			@ we know xh is zero here so...
    126	add	xh, xh, #1
    127	mov	xl, xl, lsl xh
    128	mov	ip, ip, lsr xh
    129
    130#else
    131
    1327:	movs	xl, xl, lsl #1
    133	mov	ip, ip, lsr #1
    134	bcc	7b
    135
    136#endif
    137
    138	@ Current remainder is now 1.  It is worthless to compare with
    139	@ divisor at this point since divisor can not be smaller than 3 here.
    140	@ If possible, branch for another shift in the division loop.
    141	@ If no bit position left then we are done.
    142	movs	ip, ip, lsr #1
    143	mov	xh, #1
    144	bne	4b
    145	ret	lr
    146
    1478:	@ Division by a power of 2: determine what that divisor order is
    148	@ then simply shift values around
    149
    150#if __LINUX_ARM_ARCH__ >= 5
    151
    152	clz	ip, r4
    153	rsb	ip, ip, #31
    154
    155#else
    156
    157	mov	yl, r4
    158	cmp	r4, #(1 << 16)
    159	mov	ip, #0
    160	movhs	yl, yl, lsr #16
    161	movhs	ip, #16
    162
    163	cmp	yl, #(1 << 8)
    164	movhs	yl, yl, lsr #8
    165	addhs	ip, ip, #8
    166
    167	cmp	yl, #(1 << 4)
    168	movhs	yl, yl, lsr #4
    169	addhs	ip, ip, #4
    170
    171	cmp	yl, #(1 << 2)
    172	addhi	ip, ip, #3
    173	addls	ip, ip, yl, lsr #1
    174
    175#endif
    176
    177	mov	yh, xh, lsr ip
    178	mov	yl, xl, lsr ip
    179	rsb	ip, ip, #32
    180 ARM(	orr	yl, yl, xh, lsl ip	)
    181 THUMB(	lsl	xh, xh, ip		)
    182 THUMB(	orr	yl, yl, xh		)
    183	mov	xh, xl, lsl ip
    184	mov	xh, xh, lsr ip
    185	ret	lr
    186
    187	@ eq -> division by 1: obvious enough...
    1889:	moveq	yl, xl
    189	moveq	yh, xh
    190	moveq	xh, #0
    191	reteq	lr
    192UNWIND(.fnend)
    193
    194UNWIND(.fnstart)
    195UNWIND(.pad #4)
    196UNWIND(.save {lr})
    197Ldiv0_64:
    198	@ Division by 0:
    199	str	lr, [sp, #-8]!
    200	bl	__div0
    201
    202	@ as wrong as it could be...
    203	mov	yl, #0
    204	mov	yh, #0
    205	mov	xh, #0
    206	ldr	pc, [sp], #8
    207
    208UNWIND(.fnend)
    209ENDPROC(__do_div64)