wm_shrx.S (6312B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2 .file "wm_shrx.S" 3/*---------------------------------------------------------------------------+ 4 | wm_shrx.S | 5 | | 6 | 64 bit right shift functions | 7 | | 8 | Copyright (C) 1992,1995 | 9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 10 | Australia. E-mail billm@jacobi.maths.monash.edu.au | 11 | | 12 | Call from C as: | 13 | unsigned FPU_shrx(void *arg1, unsigned arg2) | 14 | and | 15 | unsigned FPU_shrxs(void *arg1, unsigned arg2) | 16 | | 17 +---------------------------------------------------------------------------*/ 18 19#include "fpu_emu.h" 20 21.text 22/*---------------------------------------------------------------------------+ 23 | unsigned FPU_shrx(void *arg1, unsigned arg2) | 24 | | 25 | Extended shift right function. | 26 | Fastest for small shifts. | 27 | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 28 | right by the number of bits specified by the second arg (arg2). | 29 | Forms a 96 bit quantity from the 64 bit arg and eax: | 30 | [ 64 bit arg ][ eax ] | 31 | shift right ---------> | 32 | The eax register is initialized to 0 before the shifting. | 33 | Results returned in the 64 bit arg and eax. | 34 +---------------------------------------------------------------------------*/ 35 36SYM_FUNC_START(FPU_shrx) 37 push %ebp 38 movl %esp,%ebp 39 pushl %esi 40 movl PARAM2,%ecx 41 movl PARAM1,%esi 42 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 43 jnc L_more_than_31 44 45/* less than 32 bits */ 46 pushl %ebx 47 movl (%esi),%ebx /* lsl */ 48 movl 4(%esi),%edx /* msl */ 49 xorl %eax,%eax /* extension */ 50 shrd %cl,%ebx,%eax 51 shrd %cl,%edx,%ebx 52 shr %cl,%edx 53 movl %ebx,(%esi) 54 movl %edx,4(%esi) 55 popl %ebx 56 popl %esi 57 leave 58 RET 59 60L_more_than_31: 61 cmpl $64,%ecx 62 jnc L_more_than_63 63 64 subb $32,%cl 65 movl (%esi),%eax /* lsl */ 66 movl 4(%esi),%edx /* msl */ 67 shrd %cl,%edx,%eax 68 shr %cl,%edx 69 movl %edx,(%esi) 70 movl $0,4(%esi) 71 popl %esi 72 leave 73 RET 74 75L_more_than_63: 76 cmpl $96,%ecx 77 jnc L_more_than_95 78 79 subb $64,%cl 80 movl 4(%esi),%eax /* msl */ 81 shr %cl,%eax 82 xorl %edx,%edx 83 movl %edx,(%esi) 84 movl %edx,4(%esi) 85 popl %esi 86 leave 87 RET 88 89L_more_than_95: 90 xorl %eax,%eax 91 movl %eax,(%esi) 92 movl %eax,4(%esi) 93 popl %esi 94 leave 95 RET 96SYM_FUNC_END(FPU_shrx) 97 98 99/*---------------------------------------------------------------------------+ 100 | unsigned FPU_shrxs(void *arg1, unsigned arg2) | 101 | | 102 | Extended shift right function (optimized for small floating point | 103 | integers). | 104 | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 105 | right by the number of bits specified by the second arg (arg2). | 106 | Forms a 96 bit quantity from the 64 bit arg and eax: | 107 | [ 64 bit arg ][ eax ] | 108 | shift right ---------> | 109 | The eax register is initialized to 0 before the shifting. | 110 | The lower 8 bits of eax are lost and replaced by a flag which is | 111 | set (to 0x01) if any bit, apart from the first one, is set in the | 112 | part which has been shifted out of the arg. | 113 | Results returned in the 64 bit arg and eax. | 114 +---------------------------------------------------------------------------*/ 115SYM_FUNC_START(FPU_shrxs) 116 push %ebp 117 movl %esp,%ebp 118 pushl %esi 119 pushl %ebx 120 movl PARAM2,%ecx 121 movl PARAM1,%esi 122 cmpl $64,%ecx /* shrd only works for 0..31 bits */ 123 jnc Ls_more_than_63 124 125 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 126 jc Ls_less_than_32 127 128/* We got here without jumps by assuming that the most common requirement 129 is for small integers */ 130/* Shift by [32..63] bits */ 131 subb $32,%cl 132 movl (%esi),%eax /* lsl */ 133 movl 4(%esi),%edx /* msl */ 134 xorl %ebx,%ebx 135 shrd %cl,%eax,%ebx 136 shrd %cl,%edx,%eax 137 shr %cl,%edx 138 orl %ebx,%ebx /* test these 32 bits */ 139 setne %bl 140 test $0x7fffffff,%eax /* and 31 bits here */ 141 setne %bh 142 orw %bx,%bx /* Any of the 63 bit set ? */ 143 setne %al 144 movl %edx,(%esi) 145 movl $0,4(%esi) 146 popl %ebx 147 popl %esi 148 leave 149 RET 150 151/* Shift by [0..31] bits */ 152Ls_less_than_32: 153 movl (%esi),%ebx /* lsl */ 154 movl 4(%esi),%edx /* msl */ 155 xorl %eax,%eax /* extension */ 156 shrd %cl,%ebx,%eax 157 shrd %cl,%edx,%ebx 158 shr %cl,%edx 159 test $0x7fffffff,%eax /* only need to look at eax here */ 160 setne %al 161 movl %ebx,(%esi) 162 movl %edx,4(%esi) 163 popl %ebx 164 popl %esi 165 leave 166 RET 167 168/* Shift by [64..95] bits */ 169Ls_more_than_63: 170 cmpl $96,%ecx 171 jnc Ls_more_than_95 172 173 subb $64,%cl 174 movl (%esi),%ebx /* lsl */ 175 movl 4(%esi),%eax /* msl */ 176 xorl %edx,%edx /* extension */ 177 shrd %cl,%ebx,%edx 178 shrd %cl,%eax,%ebx 179 shr %cl,%eax 180 orl %ebx,%edx 181 setne %bl 182 test $0x7fffffff,%eax /* only need to look at eax here */ 183 setne %bh 184 orw %bx,%bx 185 setne %al 186 xorl %edx,%edx 187 movl %edx,(%esi) /* set to zero */ 188 movl %edx,4(%esi) /* set to zero */ 189 popl %ebx 190 popl %esi 191 leave 192 RET 193 194Ls_more_than_95: 195/* Shift by [96..inf) bits */ 196 xorl %eax,%eax 197 movl (%esi),%ebx 198 orl 4(%esi),%ebx 199 setne %al 200 xorl %ebx,%ebx 201 movl %ebx,(%esi) 202 movl %ebx,4(%esi) 203 popl %ebx 204 popl %esi 205 leave 206 RET 207SYM_FUNC_END(FPU_shrxs)