udivsi3_i4i.S (9131B)
1/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0 2 3 Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 4 2004, 2005, 2006 5 Free Software Foundation, Inc. 6*/ 7 8!! libgcc routines for the Renesas / SuperH SH CPUs. 9!! Contributed by Steve Chamberlain. 10!! sac@cygnus.com 11 12!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines 13!! recoded in assembly by Toshiyasu Morita 14!! tm@netcom.com 15 16/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and 17 ELF local label prefixes by J"orn Rennecke 18 amylaar@cygnus.com */ 19 20/* This code used shld, thus is not suitable for SH1 / SH2. */ 21 22/* Signed / unsigned division without use of FPU, optimized for SH4. 23 Uses a lookup table for divisors in the range -128 .. +128, and 24 div1 with case distinction for larger divisors in three more ranges. 25 The code is lumped together with the table to allow the use of mova. */ 26#ifdef CONFIG_CPU_LITTLE_ENDIAN 27#define L_LSB 0 28#define L_LSWMSB 1 29#define L_MSWLSB 2 30#else 31#define L_LSB 3 32#define L_LSWMSB 2 33#define L_MSWLSB 1 34#endif 35 36 .balign 4 37 .global __udivsi3_i4i 38 .global __udivsi3_i4 39 .set __udivsi3_i4, __udivsi3_i4i 40 .type __udivsi3_i4i, @function 41__udivsi3_i4i: 42 mov.w c128_w, r1 43 div0u 44 mov r4,r0 45 shlr8 r0 46 cmp/hi r1,r5 47 extu.w r5,r1 48 bf udiv_le128 49 cmp/eq r5,r1 50 bf udiv_ge64k 51 shlr r0 52 mov r5,r1 53 shll16 r5 54 mov.l r4,@-r15 55 div1 r5,r0 56 mov.l r1,@-r15 57 div1 r5,r0 58 div1 r5,r0 59 bra udiv_25 60 div1 r5,r0 61 62div_le128: 63 mova div_table_ix,r0 64 bra div_le128_2 65 mov.b @(r0,r5),r1 66udiv_le128: 67 mov.l r4,@-r15 68 mova div_table_ix,r0 69 mov.b @(r0,r5),r1 70 mov.l r5,@-r15 71div_le128_2: 72 mova div_table_inv,r0 73 mov.l @(r0,r1),r1 74 mov r5,r0 75 tst #0xfe,r0 76 mova div_table_clz,r0 77 dmulu.l r1,r4 78 mov.b @(r0,r5),r1 79 bt/s div_by_1 80 mov r4,r0 81 mov.l @r15+,r5 82 sts mach,r0 83 /* clrt */ 84 addc r4,r0 85 mov.l @r15+,r4 86 rotcr r0 87 rts 88 shld r1,r0 89 90div_by_1_neg: 91 neg r4,r0 92div_by_1: 93 mov.l @r15+,r5 94 rts 95 mov.l @r15+,r4 96 97div_ge64k: 98 bt/s div_r8 99 div0u 100 shll8 r5 101 bra div_ge64k_2 102 div1 r5,r0 103udiv_ge64k: 104 cmp/hi r0,r5 105 mov r5,r1 106 bt udiv_r8 107 shll8 r5 108 mov.l r4,@-r15 109 div1 r5,r0 110 mov.l r1,@-r15 111div_ge64k_2: 112 div1 r5,r0 113 mov.l zero_l,r1 114 .rept 4 115 div1 r5,r0 116 .endr 117 mov.l r1,@-r15 118 div1 r5,r0 119 mov.w m256_w,r1 120 div1 r5,r0 121 mov.b r0,@(L_LSWMSB,r15) 122 xor r4,r0 123 and r1,r0 124 bra div_ge64k_end 125 xor r4,r0 126 127div_r8: 128 shll16 r4 129 bra div_r8_2 130 shll8 r4 131udiv_r8: 132 mov.l r4,@-r15 133 shll16 r4 134 clrt 135 shll8 r4 136 mov.l r5,@-r15 137div_r8_2: 138 rotcl r4 139 mov r0,r1 140 div1 r5,r1 141 mov r4,r0 142 rotcl r0 143 mov r5,r4 144 div1 r5,r1 145 .rept 5 146 rotcl r0; div1 r5,r1 147 .endr 148 rotcl r0 149 mov.l @r15+,r5 150 div1 r4,r1 151 mov.l @r15+,r4 152 rts 153 rotcl r0 154 155 .global __sdivsi3_i4i 156 .global __sdivsi3_i4 157 .global __sdivsi3 158 .set __sdivsi3_i4, __sdivsi3_i4i 159 .set __sdivsi3, __sdivsi3_i4i 160 .type __sdivsi3_i4i, @function 161 /* This is link-compatible with a __sdivsi3 call, 162 but we effectively clobber only r1. */ 163__sdivsi3_i4i: 164 mov.l r4,@-r15 165 cmp/pz r5 166 mov.w c128_w, r1 167 bt/s pos_divisor 168 cmp/pz r4 169 mov.l r5,@-r15 170 neg r5,r5 171 bt/s neg_result 172 cmp/hi r1,r5 173 neg r4,r4 174pos_result: 175 extu.w r5,r0 176 bf div_le128 177 cmp/eq r5,r0 178 mov r4,r0 179 shlr8 r0 180 bf/s div_ge64k 181 cmp/hi r0,r5 182 div0u 183 shll16 r5 184 div1 r5,r0 185 div1 r5,r0 186 div1 r5,r0 187udiv_25: 188 mov.l zero_l,r1 189 div1 r5,r0 190 div1 r5,r0 191 mov.l r1,@-r15 192 .rept 3 193 div1 r5,r0 194 .endr 195 mov.b r0,@(L_MSWLSB,r15) 196 xtrct r4,r0 197 swap.w r0,r0 198 .rept 8 199 div1 r5,r0 200 .endr 201 mov.b r0,@(L_LSWMSB,r15) 202div_ge64k_end: 203 .rept 8 204 div1 r5,r0 205 .endr 206 mov.l @r15+,r4 ! zero-extension and swap using LS unit. 207 extu.b r0,r0 208 mov.l @r15+,r5 209 or r4,r0 210 mov.l @r15+,r4 211 rts 212 rotcl r0 213 214div_le128_neg: 215 tst #0xfe,r0 216 mova div_table_ix,r0 217 mov.b @(r0,r5),r1 218 mova div_table_inv,r0 219 bt/s div_by_1_neg 220 mov.l @(r0,r1),r1 221 mova div_table_clz,r0 222 dmulu.l r1,r4 223 mov.b @(r0,r5),r1 224 mov.l @r15+,r5 225 sts mach,r0 226 /* clrt */ 227 addc r4,r0 228 mov.l @r15+,r4 229 rotcr r0 230 shld r1,r0 231 rts 232 neg r0,r0 233 234pos_divisor: 235 mov.l r5,@-r15 236 bt/s pos_result 237 cmp/hi r1,r5 238 neg r4,r4 239neg_result: 240 extu.w r5,r0 241 bf div_le128_neg 242 cmp/eq r5,r0 243 mov r4,r0 244 shlr8 r0 245 bf/s div_ge64k_neg 246 cmp/hi r0,r5 247 div0u 248 mov.l zero_l,r1 249 shll16 r5 250 div1 r5,r0 251 mov.l r1,@-r15 252 .rept 7 253 div1 r5,r0 254 .endr 255 mov.b r0,@(L_MSWLSB,r15) 256 xtrct r4,r0 257 swap.w r0,r0 258 .rept 8 259 div1 r5,r0 260 .endr 261 mov.b r0,@(L_LSWMSB,r15) 262div_ge64k_neg_end: 263 .rept 8 264 div1 r5,r0 265 .endr 266 mov.l @r15+,r4 ! zero-extension and swap using LS unit. 267 extu.b r0,r1 268 mov.l @r15+,r5 269 or r4,r1 270div_r8_neg_end: 271 mov.l @r15+,r4 272 rotcl r1 273 rts 274 neg r1,r0 275 276div_ge64k_neg: 277 bt/s div_r8_neg 278 div0u 279 shll8 r5 280 mov.l zero_l,r1 281 .rept 6 282 div1 r5,r0 283 .endr 284 mov.l r1,@-r15 285 div1 r5,r0 286 mov.w m256_w,r1 287 div1 r5,r0 288 mov.b r0,@(L_LSWMSB,r15) 289 xor r4,r0 290 and r1,r0 291 bra div_ge64k_neg_end 292 xor r4,r0 293 294c128_w: 295 .word 128 296 297div_r8_neg: 298 clrt 299 shll16 r4 300 mov r4,r1 301 shll8 r1 302 mov r5,r4 303 .rept 7 304 rotcl r1; div1 r5,r0 305 .endr 306 mov.l @r15+,r5 307 rotcl r1 308 bra div_r8_neg_end 309 div1 r4,r0 310 311m256_w: 312 .word 0xff00 313/* This table has been generated by divtab-sh4.c. */ 314 .balign 4 315div_table_clz: 316 .byte 0 317 .byte 1 318 .byte 0 319 .byte -1 320 .byte -1 321 .byte -2 322 .byte -2 323 .byte -2 324 .byte -2 325 .byte -3 326 .byte -3 327 .byte -3 328 .byte -3 329 .byte -3 330 .byte -3 331 .byte -3 332 .byte -3 333 .byte -4 334 .byte -4 335 .byte -4 336 .byte -4 337 .byte -4 338 .byte -4 339 .byte -4 340 .byte -4 341 .byte -4 342 .byte -4 343 .byte -4 344 .byte -4 345 .byte -4 346 .byte -4 347 .byte -4 348 .byte -4 349 .byte -5 350 .byte -5 351 .byte -5 352 .byte -5 353 .byte -5 354 .byte -5 355 .byte -5 356 .byte -5 357 .byte -5 358 .byte -5 359 .byte -5 360 .byte -5 361 .byte -5 362 .byte -5 363 .byte -5 364 .byte -5 365 .byte -5 366 .byte -5 367 .byte -5 368 .byte -5 369 .byte -5 370 .byte -5 371 .byte -5 372 .byte -5 373 .byte -5 374 .byte -5 375 .byte -5 376 .byte -5 377 .byte -5 378 .byte -5 379 .byte -5 380 .byte -5 381 .byte -6 382 .byte -6 383 .byte -6 384 .byte -6 385 .byte -6 386 .byte -6 387 .byte -6 388 .byte -6 389 .byte -6 390 .byte -6 391 .byte -6 392 .byte -6 393 .byte -6 394 .byte -6 395 .byte -6 396 .byte -6 397 .byte -6 398 .byte -6 399 .byte -6 400 .byte -6 401 .byte -6 402 .byte -6 403 .byte -6 404 .byte -6 405 .byte -6 406 .byte -6 407 .byte -6 408 .byte -6 409 .byte -6 410 .byte -6 411 .byte -6 412 .byte -6 413 .byte -6 414 .byte -6 415 .byte -6 416 .byte -6 417 .byte -6 418 .byte -6 419 .byte -6 420 .byte -6 421 .byte -6 422 .byte -6 423 .byte -6 424 .byte -6 425 .byte -6 426 .byte -6 427 .byte -6 428 .byte -6 429 .byte -6 430 .byte -6 431 .byte -6 432 .byte -6 433 .byte -6 434 .byte -6 435 .byte -6 436 .byte -6 437 .byte -6 438 .byte -6 439 .byte -6 440 .byte -6 441 .byte -6 442 .byte -6 443 .byte -6 444/* Lookup table translating positive divisor to index into table of 445 normalized inverse. N.B. the '0' entry is also the last entry of the 446 previous table, and causes an unaligned access for division by zero. */ 447div_table_ix: 448 .byte -6 449 .byte -128 450 .byte -128 451 .byte 0 452 .byte -128 453 .byte -64 454 .byte 0 455 .byte 64 456 .byte -128 457 .byte -96 458 .byte -64 459 .byte -32 460 .byte 0 461 .byte 32 462 .byte 64 463 .byte 96 464 .byte -128 465 .byte -112 466 .byte -96 467 .byte -80 468 .byte -64 469 .byte -48 470 .byte -32 471 .byte -16 472 .byte 0 473 .byte 16 474 .byte 32 475 .byte 48 476 .byte 64 477 .byte 80 478 .byte 96 479 .byte 112 480 .byte -128 481 .byte -120 482 .byte -112 483 .byte -104 484 .byte -96 485 .byte -88 486 .byte -80 487 .byte -72 488 .byte -64 489 .byte -56 490 .byte -48 491 .byte -40 492 .byte -32 493 .byte -24 494 .byte -16 495 .byte -8 496 .byte 0 497 .byte 8 498 .byte 16 499 .byte 24 500 .byte 32 501 .byte 40 502 .byte 48 503 .byte 56 504 .byte 64 505 .byte 72 506 .byte 80 507 .byte 88 508 .byte 96 509 .byte 104 510 .byte 112 511 .byte 120 512 .byte -128 513 .byte -124 514 .byte -120 515 .byte -116 516 .byte -112 517 .byte -108 518 .byte -104 519 .byte -100 520 .byte -96 521 .byte -92 522 .byte -88 523 .byte -84 524 .byte -80 525 .byte -76 526 .byte -72 527 .byte -68 528 .byte -64 529 .byte -60 530 .byte -56 531 .byte -52 532 .byte -48 533 .byte -44 534 .byte -40 535 .byte -36 536 .byte -32 537 .byte -28 538 .byte -24 539 .byte -20 540 .byte -16 541 .byte -12 542 .byte -8 543 .byte -4 544 .byte 0 545 .byte 4 546 .byte 8 547 .byte 12 548 .byte 16 549 .byte 20 550 .byte 24 551 .byte 28 552 .byte 32 553 .byte 36 554 .byte 40 555 .byte 44 556 .byte 48 557 .byte 52 558 .byte 56 559 .byte 60 560 .byte 64 561 .byte 68 562 .byte 72 563 .byte 76 564 .byte 80 565 .byte 84 566 .byte 88 567 .byte 92 568 .byte 96 569 .byte 100 570 .byte 104 571 .byte 108 572 .byte 112 573 .byte 116 574 .byte 120 575 .byte 124 576 .byte -128 577/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ 578 .balign 4 579zero_l: 580 .long 0x0 581 .long 0xF81F81F9 582 .long 0xF07C1F08 583 .long 0xE9131AC0 584 .long 0xE1E1E1E2 585 .long 0xDAE6076C 586 .long 0xD41D41D5 587 .long 0xCD856891 588 .long 0xC71C71C8 589 .long 0xC0E07039 590 .long 0xBACF914D 591 .long 0xB4E81B4F 592 .long 0xAF286BCB 593 .long 0xA98EF607 594 .long 0xA41A41A5 595 .long 0x9EC8E952 596 .long 0x9999999A 597 .long 0x948B0FCE 598 .long 0x8F9C18FA 599 .long 0x8ACB90F7 600 .long 0x86186187 601 .long 0x81818182 602 .long 0x7D05F418 603 .long 0x78A4C818 604 .long 0x745D1746 605 .long 0x702E05C1 606 .long 0x6C16C16D 607 .long 0x68168169 608 .long 0x642C8591 609 .long 0x60581606 610 .long 0x5C9882BA 611 .long 0x58ED2309 612div_table_inv: 613 .long 0x55555556 614 .long 0x51D07EAF 615 .long 0x4E5E0A73 616 .long 0x4AFD6A06 617 .long 0x47AE147B 618 .long 0x446F8657 619 .long 0x41414142 620 .long 0x3E22CBCF 621 .long 0x3B13B13C 622 .long 0x38138139 623 .long 0x3521CFB3 624 .long 0x323E34A3 625 .long 0x2F684BDB 626 .long 0x2C9FB4D9 627 .long 0x29E4129F 628 .long 0x27350B89 629 .long 0x24924925 630 .long 0x21FB7813 631 .long 0x1F7047DD 632 .long 0x1CF06ADB 633 .long 0x1A7B9612 634 .long 0x18118119 635 .long 0x15B1E5F8 636 .long 0x135C8114 637 .long 0x11111112 638 .long 0xECF56BF 639 .long 0xC9714FC 640 .long 0xA6810A7 641 .long 0x8421085 642 .long 0x624DD30 643 .long 0x4104105 644 .long 0x2040811 645 /* maximum error: 0.987342 scaled: 0.921875*/