cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

udivsi3_i4i-Os.S (2315B)


      1/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
      2 *
      3 * Copyright (C) 2006 Free Software Foundation, Inc.
      4 */
      5
      6/* Moderately Space-optimized libgcc routines for the Renesas SH /
      7   STMicroelectronics ST40 CPUs.
      8   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
      9
     10/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
     11   sh4-200 run times:
     12   udiv small divisor: 55 cycles
     13   udiv large divisor: 52 cycles
     14   sdiv small divisor, positive result: 59 cycles
     15   sdiv large divisor, positive result: 56 cycles
     16   sdiv small divisor, negative result: 65 cycles (*)
     17   sdiv large divisor, negative result: 62 cycles (*)
     18   (*): r2 is restored in the rts delay slot and has a lingering latency
     19        of two more cycles.  */
     20	.balign 4
     21	.global	__udivsi3_i4i
     22	.global	__udivsi3_i4
     23	.set	__udivsi3_i4, __udivsi3_i4i
     24	.type	__udivsi3_i4i, @function
     25	.type	__sdivsi3_i4i, @function
     26__udivsi3_i4i:
     27	sts pr,r1
     28	mov.l r4,@-r15
     29	extu.w r5,r0
     30	cmp/eq r5,r0
     31	swap.w r4,r0
     32	shlr16 r4
     33	bf/s large_divisor
     34	div0u
     35	mov.l r5,@-r15
     36	shll16 r5
     37sdiv_small_divisor:
     38	div1 r5,r4
     39	bsr div6
     40	div1 r5,r4
     41	div1 r5,r4
     42	bsr div6
     43	div1 r5,r4
     44	xtrct r4,r0
     45	xtrct r0,r4
     46	bsr div7
     47	swap.w r4,r4
     48	div1 r5,r4
     49	bsr div7
     50	div1 r5,r4
     51	xtrct r4,r0
     52	mov.l @r15+,r5
     53	swap.w r0,r0
     54	mov.l @r15+,r4
     55	jmp @r1
     56	rotcl r0
     57div7:
     58	div1 r5,r4
     59div6:
     60	            div1 r5,r4; div1 r5,r4; div1 r5,r4
     61	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
     62
     63divx3:
     64	rotcl r0
     65	div1 r5,r4
     66	rotcl r0
     67	div1 r5,r4
     68	rotcl r0
     69	rts
     70	div1 r5,r4
     71
     72large_divisor:
     73	mov.l r5,@-r15
     74sdiv_large_divisor:
     75	xor r4,r0
     76	.rept 4
     77	rotcl r0
     78	bsr divx3
     79	div1 r5,r4
     80	.endr
     81	mov.l @r15+,r5
     82	mov.l @r15+,r4
     83	jmp @r1
     84	rotcl r0
     85
     86	.global	__sdivsi3_i4i
     87	.global __sdivsi3_i4
     88	.global __sdivsi3
     89	.set	__sdivsi3_i4, __sdivsi3_i4i
     90	.set	__sdivsi3, __sdivsi3_i4i
     91__sdivsi3_i4i:
     92	mov.l r4,@-r15
     93	cmp/pz r5
     94	mov.l r5,@-r15
     95	bt/s pos_divisor
     96	cmp/pz r4
     97	neg r5,r5
     98	extu.w r5,r0
     99	bt/s neg_result
    100	cmp/eq r5,r0
    101	neg r4,r4
    102pos_result:
    103	swap.w r4,r0
    104	bra sdiv_check_divisor
    105	sts pr,r1
    106pos_divisor:
    107	extu.w r5,r0
    108	bt/s pos_result
    109	cmp/eq r5,r0
    110	neg r4,r4
    111neg_result:
    112	mova negate_result,r0
    113	;
    114	mov r0,r1
    115	swap.w r4,r0
    116	lds r2,macl
    117	sts pr,r2
    118sdiv_check_divisor:
    119	shlr16 r4
    120	bf/s sdiv_large_divisor
    121	div0u
    122	bra sdiv_small_divisor
    123	shll16 r5
    124	.balign 4
    125negate_result:
    126	neg r0,r0
    127	jmp @r2
    128	sts macl,r2