div64.S - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
div64.S (3131B)
      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Divide a 64-bit unsigned number by a 32-bit unsigned number.
      4 * This routine assumes that the top 32 bits of the dividend are
      5 * non-zero to start with.
      6 * On entry, r3 points to the dividend, which get overwritten with
      7 * the 64-bit quotient, and r4 contains the divisor.
      8 * On exit, r3 contains the remainder.
      9 *
     10 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
     11 */
     12#include "ppc_asm.h"
     13
     14	.globl __div64_32
     15__div64_32:
     16	lwz	r5,0(r3)	# get the dividend into r5/r6
     17	lwz	r6,4(r3)
     18	cmplw	r5,r4
     19	li	r7,0
     20	li	r8,0
     21	blt	1f
     22	divwu	r7,r5,r4	# if dividend.hi >= divisor,
     23	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
     24	subf.	r5,r0,r5	# dividend.hi %= divisor
     25	beq	3f
     261:	mr	r11,r5		# here dividend.hi != 0
     27	andis.	r0,r5,0xc000
     28	bne	2f
     29	cntlzw	r0,r5		# we are shifting the dividend right
     30	li	r10,-1		# to make it < 2^32, and shifting
     31	srw	r10,r10,r0	# the divisor right the same amount,
     32	addc	r9,r4,r10	# rounding up (so the estimate cannot
     33	andc	r11,r6,r10	# ever be too large, only too small)
     34	andc	r9,r9,r10
     35	addze	r9,r9
     36	or	r11,r5,r11
     37	rotlw	r9,r9,r0
     38	rotlw	r11,r11,r0
     39	divwu	r11,r11,r9	# then we divide the shifted quantities
     402:	mullw	r10,r11,r4	# to get an estimate of the quotient,
     41	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
     42	subfc	r6,r10,r6	# take the product from the divisor,
     43	add	r8,r8,r11	# and add the estimate to the accumulated
     44	subfe.	r5,r9,r5	# quotient
     45	bne	1b
     463:	cmplw	r6,r4
     47	blt	4f
     48	divwu	r0,r6,r4	# perform the remaining 32-bit division
     49	mullw	r10,r0,r4	# and get the remainder
     50	add	r8,r8,r0
     51	subf	r6,r10,r6
     524:	stw	r7,0(r3)	# return the quotient in *r3
     53	stw	r8,4(r3)
     54	mr	r3,r6		# return the remainder in r3
     55	blr
     56
     57/*
     58 * Extended precision shifts.
     59 *
     60 * Updated to be valid for shift counts from 0 to 63 inclusive.
     61 * -- Gabriel
     62 *
     63 * R3/R4 has 64 bit value
     64 * R5    has shift count
     65 * result in R3/R4
     66 *
     67 *  ashrdi3: arithmetic right shift (sign propagation)	
     68 *  lshrdi3: logical right shift
     69 *  ashldi3: left shift
     70 */
     71	.globl __ashrdi3
     72__ashrdi3:
     73	subfic	r6,r5,32
     74	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
     75	addi	r7,r5,32	# could be xori, or addi with -32
     76	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
     77	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
     78	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
     79	or	r4,r4,r6	# LSW |= t1
     80	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
     81	sraw	r3,r3,r5	# MSW = MSW >> count
     82	or	r4,r4,r7	# LSW |= t2
     83	blr
     84
     85	.globl __ashldi3
     86__ashldi3:
     87	subfic	r6,r5,32
     88	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
     89	addi	r7,r5,32	# could be xori, or addi with -32
     90	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
     91	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
     92	or	r3,r3,r6	# MSW |= t1
     93	slw	r4,r4,r5	# LSW = LSW << count
     94	or	r3,r3,r7	# MSW |= t2
     95	blr
     96
     97	.globl __lshrdi3
     98__lshrdi3:
     99	subfic	r6,r5,32
    100	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
    101	addi	r7,r5,32	# could be xori, or addi with -32
    102	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
    103	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
    104	or	r4,r4,r6	# LSW |= t1
    105	srw	r3,r3,r5	# MSW = MSW >> count
    106	or	r4,r4,r7	# LSW |= t2
    107	blr