cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

longlong.h (39795B)


      1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
      2 * Note: I added some stuff for use with gnupg
      3 *
      4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
      5 *	2000, 2001, 2002, 2003 Free Software Foundation, Inc.
      6 *
      7 * This file is free software; you can redistribute it and/or modify
      8 * it under the terms of the GNU Library General Public License as published by
      9 * the Free Software Foundation; either version 2 of the License, or (at your
     10 * option) any later version.
     11 *
     12 * This file is distributed in the hope that it will be useful, but
     13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
     15 * License for more details.
     16 *
     17 * You should have received a copy of the GNU Library General Public License
     18 * along with this file; see the file COPYING.LIB.  If not, write to
     19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
     20 * MA 02111-1307, USA. */
     21
     22#include <linux/count_zeros.h>
     23
     24/* You have to define the following before including this file:
     25 *
     26 * UWtype -- An unsigned type, default type for operations (typically a "word")
     27 * UHWtype -- An unsigned type, at least half the size of UWtype.
     28 * UDWtype -- An unsigned type, at least twice as large a UWtype
     29 * W_TYPE_SIZE -- size in bits of UWtype
     30 *
     31 * SItype, USItype -- Signed and unsigned 32 bit types.
     32 * DItype, UDItype -- Signed and unsigned 64 bit types.
     33 *
     34 * On a 32 bit machine UWtype should typically be USItype;
     35 * on a 64 bit machine, UWtype should typically be UDItype.
     36*/
     37
     38#define __BITS4 (W_TYPE_SIZE / 4)
     39#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
     40#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
     41#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
     42
     43/* This is used to make sure no undesirable sharing between different libraries
     44	that use this file takes place.  */
     45#ifndef __MPN
     46#define __MPN(x) __##x
     47#endif
     48
     49/* Define auxiliary asm macros.
     50 *
     51 * 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
     52 * UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
     53 * word product in HIGH_PROD and LOW_PROD.
     54 *
     55 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
     56 * UDWtype product.  This is just a variant of umul_ppmm.
     57
     58 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
     59 * denominator) divides a UDWtype, composed by the UWtype integers
     60 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
     61 * in QUOTIENT and the remainder in REMAINDER.	HIGH_NUMERATOR must be less
     62 * than DENOMINATOR for correct operation.  If, in addition, the most
     63 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
     64 * UDIV_NEEDS_NORMALIZATION is defined to 1.
     65 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
     66 * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
     67 * is rounded towards 0.
     68 *
     69 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
     70 * msb to the first non-zero bit in the UWtype X.  This is the number of
     71 * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
     72 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
     73 *
     74 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
     75 * from the least significant end.
     76 *
     77 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
     78 * high_addend_2, low_addend_2) adds two UWtype integers, composed by
     79 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
     80 * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
     81 * (i.e. carry out) is not stored anywhere, and is lost.
     82 *
     83 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
     84 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
     85 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
     86 * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
     87 * and LOW_DIFFERENCE.	Overflow (i.e. carry out) is not stored anywhere,
     88 * and is lost.
     89 *
     90 * If any of these macros are left undefined for a particular CPU,
     91 * C macros are used.  */
     92
     93/* The CPUs come in alphabetical order below.
     94 *
     95 * Please add support for more CPUs here, or improve the current support
     96 * for the CPUs below!	*/
     97
     98#if defined(__GNUC__) && !defined(NO_ASM)
     99
    100/* We sometimes need to clobber "cc" with gcc2, but that would not be
    101	understood by gcc1.	Use cpp to avoid major code duplication.  */
    102#if __GNUC__ < 2
    103#define __CLOBBER_CC
    104#define __AND_CLOBBER_CC
    105#else /* __GNUC__ >= 2 */
    106#define __CLOBBER_CC : "cc"
    107#define __AND_CLOBBER_CC , "cc"
    108#endif /* __GNUC__ < 2 */
    109
    110/***************************************
    111	**************  A29K  *****************
    112	***************************************/
    113#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
    114#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    115	__asm__ ("add %1,%4,%5\n" \
    116		"addc %0,%2,%3" \
    117	: "=r" ((USItype)(sh)), \
    118		"=&r" ((USItype)(sl)) \
    119	: "%r" ((USItype)(ah)), \
    120		"rI" ((USItype)(bh)), \
    121		"%r" ((USItype)(al)), \
    122		"rI" ((USItype)(bl)))
    123#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    124	__asm__ ("sub %1,%4,%5\n" \
    125		"subc %0,%2,%3" \
    126	: "=r" ((USItype)(sh)), \
    127		"=&r" ((USItype)(sl)) \
    128	: "r" ((USItype)(ah)), \
    129		"rI" ((USItype)(bh)), \
    130		"r" ((USItype)(al)), \
    131		"rI" ((USItype)(bl)))
    132#define umul_ppmm(xh, xl, m0, m1) \
    133do { \
    134		USItype __m0 = (m0), __m1 = (m1); \
    135		__asm__ ("multiplu %0,%1,%2" \
    136		: "=r" ((USItype)(xl)) \
    137		: "r" (__m0), \
    138			"r" (__m1)); \
    139		__asm__ ("multmu %0,%1,%2" \
    140		: "=r" ((USItype)(xh)) \
    141		: "r" (__m0), \
    142			"r" (__m1)); \
    143} while (0)
    144#define udiv_qrnnd(q, r, n1, n0, d) \
    145	__asm__ ("dividu %0,%3,%4" \
    146	: "=r" ((USItype)(q)), \
    147		"=q" ((USItype)(r)) \
    148	: "1" ((USItype)(n1)), \
    149		"r" ((USItype)(n0)), \
    150		"r" ((USItype)(d)))
    151#endif /* __a29k__ */
    152
    153#if defined(__alpha) && W_TYPE_SIZE == 64
    154#define umul_ppmm(ph, pl, m0, m1)			\
    155do {							\
    156	UDItype __m0 = (m0), __m1 = (m1);		\
    157	(ph) = __builtin_alpha_umulh(__m0, __m1);	\
    158	(pl) = __m0 * __m1;                             \
    159} while (0)
    160#define UMUL_TIME 46
    161#ifndef LONGLONG_STANDALONE
    162#define udiv_qrnnd(q, r, n1, n0, d) \
    163do { UDItype __r; \
    164	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
    165	(r) = __r; \
    166} while (0)
    167extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
    168#define UDIV_TIME 220
    169#endif /* LONGLONG_STANDALONE */
    170#endif /* __alpha */
    171
    172/***************************************
    173	**************  ARM  ******************
    174	***************************************/
    175#if defined(__arm__) && W_TYPE_SIZE == 32
    176#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    177	__asm__ ("adds %1, %4, %5\n" \
    178		"adc  %0, %2, %3" \
    179	: "=r" (sh), \
    180		"=&r" (sl) \
    181	: "%r" ((USItype)(ah)), \
    182		"rI" ((USItype)(bh)), \
    183		"%r" ((USItype)(al)), \
    184		"rI" ((USItype)(bl)))
    185#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    186	__asm__ ("subs %1, %4, %5\n" \
    187		"sbc  %0, %2, %3" \
    188	: "=r" (sh), \
    189		"=&r" (sl) \
    190	: "r" ((USItype)(ah)), \
    191		"rI" ((USItype)(bh)), \
    192		"r" ((USItype)(al)), \
    193		"rI" ((USItype)(bl)))
    194#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
    195#define umul_ppmm(xh, xl, a, b) \
    196	__asm__ ("@ Inlined umul_ppmm\n" \
    197		"mov	%|r0, %2, lsr #16		@ AAAA\n" \
    198		"mov	%|r2, %3, lsr #16		@ BBBB\n" \
    199		"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n" \
    200		"bic	%0, %3, %|r2, lsl #16		@ bbbb\n" \
    201		"mul	%1, %|r1, %|r2			@ aaaa * BBBB\n" \
    202		"mul	%|r2, %|r0, %|r2		@ AAAA * BBBB\n" \
    203		"mul	%|r1, %0, %|r1			@ aaaa * bbbb\n" \
    204		"mul	%0, %|r0, %0			@ AAAA * bbbb\n" \
    205		"adds	%|r0, %1, %0			@ central sum\n" \
    206		"addcs	%|r2, %|r2, #65536\n" \
    207		"adds	%1, %|r1, %|r0, lsl #16\n" \
    208		"adc	%0, %|r2, %|r0, lsr #16" \
    209	: "=&r" (xh), \
    210		"=r" (xl) \
    211	: "r" ((USItype)(a)), \
    212		"r" ((USItype)(b)) \
    213	: "r0", "r1", "r2")
    214#else
    215#define umul_ppmm(xh, xl, a, b) \
    216	__asm__ ("@ Inlined umul_ppmm\n" \
    217		"umull %1, %0, %2, %3" \
    218	: "=&r" (xh), \
    219		"=&r" (xl) \
    220	: "r" ((USItype)(a)), \
    221		"r" ((USItype)(b)) \
    222	: "r0", "r1")
    223#endif
    224#define UMUL_TIME 20
    225#define UDIV_TIME 100
    226#endif /* __arm__ */
    227
    228/***************************************
    229	**************  CLIPPER  **************
    230	***************************************/
    231#if defined(__clipper__) && W_TYPE_SIZE == 32
    232#define umul_ppmm(w1, w0, u, v) \
    233	({union {UDItype __ll; \
    234		struct {USItype __l, __h; } __i; \
    235	} __xx; \
    236	__asm__ ("mulwux %2,%0" \
    237	: "=r" (__xx.__ll) \
    238	: "%0" ((USItype)(u)), \
    239		"r" ((USItype)(v))); \
    240	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
    241#define smul_ppmm(w1, w0, u, v) \
    242	({union {DItype __ll; \
    243		struct {SItype __l, __h; } __i; \
    244	} __xx; \
    245	__asm__ ("mulwx %2,%0" \
    246	: "=r" (__xx.__ll) \
    247	: "%0" ((SItype)(u)), \
    248		"r" ((SItype)(v))); \
    249	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
    250#define __umulsidi3(u, v) \
    251	({UDItype __w; \
    252	__asm__ ("mulwux %2,%0" \
    253	: "=r" (__w) \
    254	: "%0" ((USItype)(u)), \
    255		"r" ((USItype)(v))); \
    256	__w; })
    257#endif /* __clipper__ */
    258
    259/***************************************
    260	**************  GMICRO  ***************
    261	***************************************/
    262#if defined(__gmicro__) && W_TYPE_SIZE == 32
    263#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    264	__asm__ ("add.w %5,%1\n" \
    265		"addx %3,%0" \
    266	: "=g" ((USItype)(sh)), \
    267		"=&g" ((USItype)(sl)) \
    268	: "%0" ((USItype)(ah)), \
    269		"g" ((USItype)(bh)), \
    270		"%1" ((USItype)(al)), \
    271		"g" ((USItype)(bl)))
    272#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    273	__asm__ ("sub.w %5,%1\n" \
    274		"subx %3,%0" \
    275	: "=g" ((USItype)(sh)), \
    276		"=&g" ((USItype)(sl)) \
    277	: "0" ((USItype)(ah)), \
    278		"g" ((USItype)(bh)), \
    279		"1" ((USItype)(al)), \
    280		"g" ((USItype)(bl)))
    281#define umul_ppmm(ph, pl, m0, m1) \
    282	__asm__ ("mulx %3,%0,%1" \
    283	: "=g" ((USItype)(ph)), \
    284		"=r" ((USItype)(pl)) \
    285	: "%0" ((USItype)(m0)), \
    286		"g" ((USItype)(m1)))
    287#define udiv_qrnnd(q, r, nh, nl, d) \
    288	__asm__ ("divx %4,%0,%1" \
    289	: "=g" ((USItype)(q)), \
    290		"=r" ((USItype)(r)) \
    291	: "1" ((USItype)(nh)), \
    292		"0" ((USItype)(nl)), \
    293		"g" ((USItype)(d)))
    294#endif
    295
    296/***************************************
    297	**************  HPPA  *****************
    298	***************************************/
    299#if defined(__hppa) && W_TYPE_SIZE == 32
    300#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    301	__asm__ ("add %4,%5,%1\n" \
    302		   "addc %2,%3,%0" \
    303	: "=r" ((USItype)(sh)), \
    304	     "=&r" ((USItype)(sl)) \
    305	: "%rM" ((USItype)(ah)), \
    306	     "rM" ((USItype)(bh)), \
    307	     "%rM" ((USItype)(al)), \
    308	     "rM" ((USItype)(bl)))
    309#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    310	__asm__ ("sub %4,%5,%1\n" \
    311	   "subb %2,%3,%0" \
    312	: "=r" ((USItype)(sh)), \
    313	     "=&r" ((USItype)(sl)) \
    314	: "rM" ((USItype)(ah)), \
    315	     "rM" ((USItype)(bh)), \
    316	     "rM" ((USItype)(al)), \
    317	     "rM" ((USItype)(bl)))
    318#if 0 && defined(_PA_RISC1_1)
    319/* xmpyu uses floating point register which is not allowed in Linux kernel. */
    320#define umul_ppmm(wh, wl, u, v) \
    321do { \
    322	union {UDItype __ll; \
    323	struct {USItype __h, __l; } __i; \
    324	} __xx; \
    325	__asm__ ("xmpyu %1,%2,%0" \
    326	: "=*f" (__xx.__ll) \
    327	: "*f" ((USItype)(u)), \
    328	       "*f" ((USItype)(v))); \
    329	(wh) = __xx.__i.__h; \
    330	(wl) = __xx.__i.__l; \
    331} while (0)
    332#define UMUL_TIME 8
    333#define UDIV_TIME 60
    334#else
    335#define UMUL_TIME 40
    336#define UDIV_TIME 80
    337#endif
    338#if 0 /* #ifndef LONGLONG_STANDALONE */
    339#define udiv_qrnnd(q, r, n1, n0, d) \
    340do { USItype __r; \
    341	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
    342	(r) = __r; \
    343} while (0)
    344extern USItype __udiv_qrnnd();
    345#endif /* LONGLONG_STANDALONE */
    346#endif /* hppa */
    347
    348/***************************************
    349	**************  I370  *****************
    350	***************************************/
    351#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
    352#define umul_ppmm(xh, xl, m0, m1) \
    353do { \
    354	union {UDItype __ll; \
    355	   struct {USItype __h, __l; } __i; \
    356	} __xx; \
    357	USItype __m0 = (m0), __m1 = (m1); \
    358	__asm__ ("mr %0,%3" \
    359	: "=r" (__xx.__i.__h), \
    360	       "=r" (__xx.__i.__l) \
    361	: "%1" (__m0), \
    362	       "r" (__m1)); \
    363	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
    364	(xh) += ((((SItype) __m0 >> 31) & __m1) \
    365	     + (((SItype) __m1 >> 31) & __m0)); \
    366} while (0)
    367#define smul_ppmm(xh, xl, m0, m1) \
    368do { \
    369	union {DItype __ll; \
    370	   struct {USItype __h, __l; } __i; \
    371	} __xx; \
    372	__asm__ ("mr %0,%3" \
    373	: "=r" (__xx.__i.__h), \
    374	       "=r" (__xx.__i.__l) \
    375	: "%1" (m0), \
    376	       "r" (m1)); \
    377	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
    378} while (0)
    379#define sdiv_qrnnd(q, r, n1, n0, d) \
    380do { \
    381	union {DItype __ll; \
    382	   struct {USItype __h, __l; } __i; \
    383	} __xx; \
    384	__xx.__i.__h = n1; __xx.__i.__l = n0; \
    385	__asm__ ("dr %0,%2" \
    386	: "=r" (__xx.__ll) \
    387	: "0" (__xx.__ll), "r" (d)); \
    388	(q) = __xx.__i.__l; (r) = __xx.__i.__h; \
    389} while (0)
    390#endif
    391
    392/***************************************
    393	**************  I386  *****************
    394	***************************************/
    395#undef __i386__
    396#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
    397#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    398	__asm__ ("addl %5,%1\n" \
    399	   "adcl %3,%0" \
    400	: "=r" (sh), \
    401	     "=&r" (sl) \
    402	: "%0" ((USItype)(ah)), \
    403	     "g" ((USItype)(bh)), \
    404	     "%1" ((USItype)(al)), \
    405	     "g" ((USItype)(bl)))
    406#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    407	__asm__ ("subl %5,%1\n" \
    408	   "sbbl %3,%0" \
    409	: "=r" (sh), \
    410	     "=&r" (sl) \
    411	: "0" ((USItype)(ah)), \
    412	     "g" ((USItype)(bh)), \
    413	     "1" ((USItype)(al)), \
    414	     "g" ((USItype)(bl)))
    415#define umul_ppmm(w1, w0, u, v) \
    416	__asm__ ("mull %3" \
    417	: "=a" (w0), \
    418	     "=d" (w1) \
    419	: "%0" ((USItype)(u)), \
    420	     "rm" ((USItype)(v)))
    421#define udiv_qrnnd(q, r, n1, n0, d) \
    422	__asm__ ("divl %4" \
    423	: "=a" (q), \
    424	     "=d" (r) \
    425	: "0" ((USItype)(n0)), \
    426	     "1" ((USItype)(n1)), \
    427	     "rm" ((USItype)(d)))
    428#ifndef UMUL_TIME
    429#define UMUL_TIME 40
    430#endif
    431#ifndef UDIV_TIME
    432#define UDIV_TIME 40
    433#endif
    434#endif /* 80x86 */
    435
    436/***************************************
    437	**************  I860  *****************
    438	***************************************/
    439#if defined(__i860__) && W_TYPE_SIZE == 32
    440#define rshift_rhlc(r, h, l, c) \
    441	__asm__ ("shr %3,r0,r0\n" \
    442	"shrd %1,%2,%0" \
    443	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
    444#endif /* i860 */
    445
    446/***************************************
    447	**************  I960  *****************
    448	***************************************/
    449#if defined(__i960__) && W_TYPE_SIZE == 32
    450#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    451	__asm__ ("cmpo 1,0\n" \
    452	"addc %5,%4,%1\n" \
    453	"addc %3,%2,%0" \
    454	: "=r" ((USItype)(sh)), \
    455	     "=&r" ((USItype)(sl)) \
    456	: "%dI" ((USItype)(ah)), \
    457	     "dI" ((USItype)(bh)), \
    458	     "%dI" ((USItype)(al)), \
    459	     "dI" ((USItype)(bl)))
    460#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    461	__asm__ ("cmpo 0,0\n" \
    462	"subc %5,%4,%1\n" \
    463	"subc %3,%2,%0" \
    464	: "=r" ((USItype)(sh)), \
    465	     "=&r" ((USItype)(sl)) \
    466	: "dI" ((USItype)(ah)), \
    467	     "dI" ((USItype)(bh)), \
    468	     "dI" ((USItype)(al)), \
    469	     "dI" ((USItype)(bl)))
    470#define umul_ppmm(w1, w0, u, v) \
    471	({union {UDItype __ll; \
    472	   struct {USItype __l, __h; } __i; \
    473	} __xx; \
    474	__asm__ ("emul        %2,%1,%0" \
    475	: "=d" (__xx.__ll) \
    476	: "%dI" ((USItype)(u)), \
    477	     "dI" ((USItype)(v))); \
    478	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
    479#define __umulsidi3(u, v) \
    480	({UDItype __w; \
    481	__asm__ ("emul      %2,%1,%0" \
    482	: "=d" (__w) \
    483	: "%dI" ((USItype)(u)), \
    484	       "dI" ((USItype)(v))); \
    485	__w; })
    486#define udiv_qrnnd(q, r, nh, nl, d) \
    487do { \
    488	union {UDItype __ll; \
    489	   struct {USItype __l, __h; } __i; \
    490	} __nn; \
    491	__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
    492	__asm__ ("ediv %d,%n,%0" \
    493	: "=d" (__rq.__ll) \
    494	: "dI" (__nn.__ll), \
    495	     "dI" ((USItype)(d))); \
    496	(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
    497} while (0)
    498#if defined(__i960mx)		/* what is the proper symbol to test??? */
    499#define rshift_rhlc(r, h, l, c) \
    500do { \
    501	union {UDItype __ll; \
    502	   struct {USItype __l, __h; } __i; \
    503	} __nn; \
    504	__nn.__i.__h = (h); __nn.__i.__l = (l); \
    505	__asm__ ("shre %2,%1,%0" \
    506	: "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
    507}
    508#endif /* i960mx */
    509#endif /* i960 */
    510
    511/***************************************
    512	**************  68000	****************
    513	***************************************/
    514#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
    515#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    516	__asm__ ("add%.l %5,%1\n" \
    517	   "addx%.l %3,%0" \
    518	: "=d" ((USItype)(sh)), \
    519	     "=&d" ((USItype)(sl)) \
    520	: "%0" ((USItype)(ah)), \
    521	     "d" ((USItype)(bh)), \
    522	     "%1" ((USItype)(al)), \
    523	     "g" ((USItype)(bl)))
    524#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    525	__asm__ ("sub%.l %5,%1\n" \
    526	   "subx%.l %3,%0" \
    527	: "=d" ((USItype)(sh)), \
    528	     "=&d" ((USItype)(sl)) \
    529	: "0" ((USItype)(ah)), \
    530	     "d" ((USItype)(bh)), \
    531	     "1" ((USItype)(al)), \
    532	     "g" ((USItype)(bl)))
    533#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
    534#define umul_ppmm(w1, w0, u, v) \
    535	__asm__ ("mulu%.l %3,%1:%0" \
    536	: "=d" ((USItype)(w0)), \
    537	     "=d" ((USItype)(w1)) \
    538	: "%0" ((USItype)(u)), \
    539	     "dmi" ((USItype)(v)))
    540#define UMUL_TIME 45
    541#define udiv_qrnnd(q, r, n1, n0, d) \
    542	__asm__ ("divu%.l %4,%1:%0" \
    543	: "=d" ((USItype)(q)), \
    544	     "=d" ((USItype)(r)) \
    545	: "0" ((USItype)(n0)), \
    546	     "1" ((USItype)(n1)), \
    547	     "dmi" ((USItype)(d)))
    548#define UDIV_TIME 90
    549#define sdiv_qrnnd(q, r, n1, n0, d) \
    550	__asm__ ("divs%.l %4,%1:%0" \
    551	: "=d" ((USItype)(q)), \
    552	     "=d" ((USItype)(r)) \
    553	: "0" ((USItype)(n0)), \
    554	     "1" ((USItype)(n1)), \
    555	     "dmi" ((USItype)(d)))
    556#else /* not mc68020 */
    557#define umul_ppmm(xh, xl, a, b) \
    558do { USItype __umul_tmp1, __umul_tmp2; \
    559	__asm__ ("| Inlined umul_ppmm\n" \
    560	"move%.l %5,%3\n" \
    561	"move%.l %2,%0\n" \
    562	"move%.w %3,%1\n" \
    563	"swap	%3\n" \
    564	"swap	%0\n" \
    565	"mulu	%2,%1\n" \
    566	"mulu	%3,%0\n" \
    567	"mulu	%2,%3\n" \
    568	"swap	%2\n" \
    569	"mulu	%5,%2\n" \
    570	"add%.l	%3,%2\n" \
    571	"jcc	1f\n" \
    572	"add%.l	%#0x10000,%0\n" \
    573	"1:	move%.l %2,%3\n" \
    574	"clr%.w	%2\n" \
    575	"swap	%2\n" \
    576	"swap	%3\n" \
    577	"clr%.w	%3\n" \
    578	"add%.l	%3,%1\n" \
    579	"addx%.l %2,%0\n" \
    580	"| End inlined umul_ppmm" \
    581	: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
    582		"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
    583	: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
    584} while (0)
    585#define UMUL_TIME 100
    586#define UDIV_TIME 400
    587#endif /* not mc68020 */
    588#endif /* mc68000 */
    589
    590/***************************************
    591	**************  88000	****************
    592	***************************************/
    593#if defined(__m88000__) && W_TYPE_SIZE == 32
    594#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    595	__asm__ ("addu.co %1,%r4,%r5\n" \
    596	   "addu.ci %0,%r2,%r3" \
    597	: "=r" ((USItype)(sh)), \
    598	     "=&r" ((USItype)(sl)) \
    599	: "%rJ" ((USItype)(ah)), \
    600	     "rJ" ((USItype)(bh)), \
    601	     "%rJ" ((USItype)(al)), \
    602	     "rJ" ((USItype)(bl)))
    603#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    604	__asm__ ("subu.co %1,%r4,%r5\n" \
    605	   "subu.ci %0,%r2,%r3" \
    606	: "=r" ((USItype)(sh)), \
    607	     "=&r" ((USItype)(sl)) \
    608	: "rJ" ((USItype)(ah)), \
    609	     "rJ" ((USItype)(bh)), \
    610	     "rJ" ((USItype)(al)), \
    611	     "rJ" ((USItype)(bl)))
    612#if defined(__m88110__)
    613#define umul_ppmm(wh, wl, u, v) \
    614do { \
    615	union {UDItype __ll; \
    616	   struct {USItype __h, __l; } __i; \
    617	} __x; \
    618	__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
    619	(wh) = __x.__i.__h; \
    620	(wl) = __x.__i.__l; \
    621} while (0)
    622#define udiv_qrnnd(q, r, n1, n0, d) \
    623	({union {UDItype __ll; \
    624	   struct {USItype __h, __l; } __i; \
    625	} __x, __q; \
    626	__x.__i.__h = (n1); __x.__i.__l = (n0); \
    627	__asm__ ("divu.d %0,%1,%2" \
    628	: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
    629	(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
    630#define UMUL_TIME 5
    631#define UDIV_TIME 25
    632#else
    633#define UMUL_TIME 17
    634#define UDIV_TIME 150
    635#endif /* __m88110__ */
    636#endif /* __m88000__ */
    637
    638/***************************************
    639	**************  MIPS  *****************
    640	***************************************/
    641#if defined(__mips__) && W_TYPE_SIZE == 32
    642#define umul_ppmm(w1, w0, u, v)			\
    643do {						\
    644	UDItype __ll = (UDItype)(u) * (v);	\
    645	w1 = __ll >> 32;			\
    646	w0 = __ll;				\
    647} while (0)
    648#define UMUL_TIME 10
    649#define UDIV_TIME 100
    650#endif /* __mips__ */
    651
    652/***************************************
    653	**************  MIPS/64  **************
    654	***************************************/
    655#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
    656#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 && defined(CONFIG_CC_IS_GCC)
    657/*
    658 * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
    659 * code below, so we special case MIPS64r6 until the compiler can do better.
    660 */
    661#define umul_ppmm(w1, w0, u, v)						\
    662do {									\
    663	__asm__ ("dmulu %0,%1,%2"					\
    664		 : "=d" ((UDItype)(w0))					\
    665		 : "d" ((UDItype)(u)),					\
    666		   "d" ((UDItype)(v)));					\
    667	__asm__ ("dmuhu %0,%1,%2"					\
    668		 : "=d" ((UDItype)(w1))					\
    669		 : "d" ((UDItype)(u)),					\
    670		   "d" ((UDItype)(v)));					\
    671} while (0)
    672#else
    673#define umul_ppmm(w1, w0, u, v) \
    674do {									\
    675	typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
    676	__ll_UTItype __ll = (__ll_UTItype)(u) * (v);			\
    677	w1 = __ll >> 64;						\
    678	w0 = __ll;							\
    679} while (0)
    680#endif
    681#define UMUL_TIME 20
    682#define UDIV_TIME 140
    683#endif /* __mips__ */
    684
    685/***************************************
    686	**************  32000	****************
    687	***************************************/
    688#if defined(__ns32000__) && W_TYPE_SIZE == 32
    689#define umul_ppmm(w1, w0, u, v) \
    690	({union {UDItype __ll; \
    691	   struct {USItype __l, __h; } __i; \
    692	} __xx; \
    693	__asm__ ("meid %2,%0" \
    694	: "=g" (__xx.__ll) \
    695	: "%0" ((USItype)(u)), \
    696	     "g" ((USItype)(v))); \
    697	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
    698#define __umulsidi3(u, v) \
    699	({UDItype __w; \
    700	__asm__ ("meid %2,%0" \
    701	: "=g" (__w) \
    702	: "%0" ((USItype)(u)), \
    703	       "g" ((USItype)(v))); \
    704	__w; })
    705#define udiv_qrnnd(q, r, n1, n0, d) \
    706	({union {UDItype __ll; \
    707	   struct {USItype __l, __h; } __i; \
    708	} __xx; \
    709	__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
    710	__asm__ ("deid %2,%0" \
    711	: "=g" (__xx.__ll) \
    712	: "0" (__xx.__ll), \
    713	     "g" ((USItype)(d))); \
    714	(r) = __xx.__i.__l; (q) = __xx.__i.__h; })
    715#endif /* __ns32000__ */
    716
    717/***************************************
    718	**************  PPC  ******************
    719	***************************************/
    720#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
    721#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    722do { \
    723	if (__builtin_constant_p(bh) && (bh) == 0) \
    724		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
    725		: "=r" (sh), \
    726		"=&r" (sl) \
    727		: "%r" ((USItype)(ah)), \
    728		"%r" ((USItype)(al)), \
    729		"rI" ((USItype)(bl))); \
    730	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
    731		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
    732		: "=r" (sh), \
    733		"=&r" (sl) \
    734		: "%r" ((USItype)(ah)), \
    735		"%r" ((USItype)(al)), \
    736		"rI" ((USItype)(bl))); \
    737	else \
    738		__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
    739		: "=r" (sh), \
    740		"=&r" (sl) \
    741		: "%r" ((USItype)(ah)), \
    742		"r" ((USItype)(bh)), \
    743		"%r" ((USItype)(al)), \
    744		"rI" ((USItype)(bl))); \
    745} while (0)
    746#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    747do { \
    748	if (__builtin_constant_p(ah) && (ah) == 0) \
    749		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
    750		: "=r" (sh), \
    751		"=&r" (sl) \
    752		: "r" ((USItype)(bh)), \
    753		"rI" ((USItype)(al)), \
    754		"r" ((USItype)(bl))); \
    755	else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
    756		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
    757		: "=r" (sh), \
    758		"=&r" (sl) \
    759		: "r" ((USItype)(bh)), \
    760		"rI" ((USItype)(al)), \
    761		"r" ((USItype)(bl))); \
    762	else if (__builtin_constant_p(bh) && (bh) == 0) \
    763		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
    764		: "=r" (sh), \
    765		"=&r" (sl) \
    766		: "r" ((USItype)(ah)), \
    767		"rI" ((USItype)(al)), \
    768		"r" ((USItype)(bl))); \
    769	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
    770		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
    771		: "=r" (sh), \
    772		"=&r" (sl) \
    773		: "r" ((USItype)(ah)), \
    774		"rI" ((USItype)(al)), \
    775		"r" ((USItype)(bl))); \
    776	else \
    777		__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
    778		: "=r" (sh), \
    779		"=&r" (sl) \
    780		: "r" ((USItype)(ah)), \
    781		"r" ((USItype)(bh)), \
    782		"rI" ((USItype)(al)), \
    783		"r" ((USItype)(bl))); \
    784} while (0)
    785#if defined(_ARCH_PPC)
    786#define umul_ppmm(ph, pl, m0, m1) \
    787do { \
    788	USItype __m0 = (m0), __m1 = (m1); \
    789	__asm__ ("mulhwu %0,%1,%2" \
    790	: "=r" (ph) \
    791	: "%r" (__m0), \
    792	"r" (__m1)); \
    793	(pl) = __m0 * __m1; \
    794} while (0)
    795#define UMUL_TIME 15
    796#define smul_ppmm(ph, pl, m0, m1) \
    797do { \
    798	SItype __m0 = (m0), __m1 = (m1); \
    799	__asm__ ("mulhw %0,%1,%2" \
    800	: "=r" ((SItype) ph) \
    801	: "%r" (__m0), \
    802	"r" (__m1)); \
    803	(pl) = __m0 * __m1; \
    804} while (0)
    805#define SMUL_TIME 14
    806#define UDIV_TIME 120
    807#else
    808#define umul_ppmm(xh, xl, m0, m1) \
    809do { \
    810	USItype __m0 = (m0), __m1 = (m1); \
    811	__asm__ ("mul %0,%2,%3" \
    812	: "=r" ((USItype)(xh)), \
    813	"=q" ((USItype)(xl)) \
    814	: "r" (__m0), \
    815	"r" (__m1)); \
    816	(xh) += ((((SItype) __m0 >> 31) & __m1) \
    817	+ (((SItype) __m1 >> 31) & __m0)); \
    818} while (0)
    819#define UMUL_TIME 8
    820#define smul_ppmm(xh, xl, m0, m1) \
    821	__asm__ ("mul %0,%2,%3" \
    822	: "=r" ((SItype)(xh)), \
    823	"=q" ((SItype)(xl)) \
    824	: "r" (m0), \
    825	"r" (m1))
    826#define SMUL_TIME 4
    827#define sdiv_qrnnd(q, r, nh, nl, d) \
    828	__asm__ ("div %0,%2,%4" \
    829	: "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
    830	: "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
    831#define UDIV_TIME 100
    832#endif
    833#endif /* Power architecture variants.  */
    834
    835/***************************************
    836	**************  PYR  ******************
    837	***************************************/
    838#if defined(__pyr__) && W_TYPE_SIZE == 32
    839#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    840	__asm__ ("addw        %5,%1\n" \
    841	"addwc	%3,%0" \
    842	: "=r" ((USItype)(sh)), \
    843	"=&r" ((USItype)(sl)) \
    844	: "%0" ((USItype)(ah)), \
    845	"g" ((USItype)(bh)), \
    846	"%1" ((USItype)(al)), \
    847	"g" ((USItype)(bl)))
    848#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    849	__asm__ ("subw        %5,%1\n" \
    850	"subwb	%3,%0" \
    851	: "=r" ((USItype)(sh)), \
    852	"=&r" ((USItype)(sl)) \
    853	: "0" ((USItype)(ah)), \
    854	"g" ((USItype)(bh)), \
    855	"1" ((USItype)(al)), \
    856	"g" ((USItype)(bl)))
    857	/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
    858#define umul_ppmm(w1, w0, u, v) \
    859	({union {UDItype __ll; \
    860	struct {USItype __h, __l; } __i; \
    861	} __xx; \
    862	__asm__ ("movw %1,%R0\n" \
    863	"uemul %2,%0" \
    864	: "=&r" (__xx.__ll) \
    865	: "g" ((USItype) (u)), \
    866	"g" ((USItype)(v))); \
    867	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
    868#endif /* __pyr__ */
    869
    870/***************************************
    871	**************  RT/ROMP  **************
    872	***************************************/
    873#if defined(__ibm032__) /* RT/ROMP */	&& W_TYPE_SIZE == 32
    874#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    875	__asm__ ("a %1,%5\n" \
    876	"ae %0,%3" \
    877	: "=r" ((USItype)(sh)), \
    878	"=&r" ((USItype)(sl)) \
    879	: "%0" ((USItype)(ah)), \
    880	"r" ((USItype)(bh)), \
    881	"%1" ((USItype)(al)), \
    882	"r" ((USItype)(bl)))
    883#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    884	__asm__ ("s %1,%5\n" \
    885	"se %0,%3" \
    886	: "=r" ((USItype)(sh)), \
    887	"=&r" ((USItype)(sl)) \
    888	: "0" ((USItype)(ah)), \
    889	"r" ((USItype)(bh)), \
    890	"1" ((USItype)(al)), \
    891	"r" ((USItype)(bl)))
    892#define umul_ppmm(ph, pl, m0, m1) \
    893do { \
    894	USItype __m0 = (m0), __m1 = (m1); \
    895	__asm__ ( \
    896	"s       r2,r2\n" \
    897	"mts	r10,%2\n" \
    898	"m	r2,%3\n" \
    899	"m	r2,%3\n" \
    900	"m	r2,%3\n" \
    901	"m	r2,%3\n" \
    902	"m	r2,%3\n" \
    903	"m	r2,%3\n" \
    904	"m	r2,%3\n" \
    905	"m	r2,%3\n" \
    906	"m	r2,%3\n" \
    907	"m	r2,%3\n" \
    908	"m	r2,%3\n" \
    909	"m	r2,%3\n" \
    910	"m	r2,%3\n" \
    911	"m	r2,%3\n" \
    912	"m	r2,%3\n" \
    913	"m	r2,%3\n" \
    914	"cas	%0,r2,r0\n" \
    915	"mfs	r10,%1" \
    916	: "=r" ((USItype)(ph)), \
    917	"=r" ((USItype)(pl)) \
    918	: "%r" (__m0), \
    919	"r" (__m1) \
    920	: "r2"); \
    921	(ph) += ((((SItype) __m0 >> 31) & __m1) \
    922	+ (((SItype) __m1 >> 31) & __m0)); \
    923} while (0)
    924#define UMUL_TIME 20
    925#define UDIV_TIME 200
    926#endif /* RT/ROMP */
    927
    928/***************************************
    929	**************  SH2  ******************
    930	***************************************/
    931#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
    932	&& W_TYPE_SIZE == 32
    933#define umul_ppmm(w1, w0, u, v) \
    934	__asm__ ( \
    935	"dmulu.l %2,%3\n" \
    936	"sts	macl,%1\n" \
    937	"sts	mach,%0" \
    938	: "=r" ((USItype)(w1)), \
    939	"=r" ((USItype)(w0)) \
    940	: "r" ((USItype)(u)), \
    941	"r" ((USItype)(v)) \
    942	: "macl", "mach")
    943#define UMUL_TIME 5
    944#endif
    945
    946/***************************************
    947	**************  SPARC	****************
    948	***************************************/
    949#if defined(__sparc__) && W_TYPE_SIZE == 32
    950#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    951	__asm__ ("addcc %r4,%5,%1\n" \
    952	"addx %r2,%3,%0" \
    953	: "=r" ((USItype)(sh)), \
    954	"=&r" ((USItype)(sl)) \
    955	: "%rJ" ((USItype)(ah)), \
    956	"rI" ((USItype)(bh)), \
    957	"%rJ" ((USItype)(al)), \
    958	"rI" ((USItype)(bl)) \
    959	__CLOBBER_CC)
    960#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    961	__asm__ ("subcc %r4,%5,%1\n" \
    962	"subx %r2,%3,%0" \
    963	: "=r" ((USItype)(sh)), \
    964	"=&r" ((USItype)(sl)) \
    965	: "rJ" ((USItype)(ah)), \
    966	"rI" ((USItype)(bh)), \
    967	"rJ" ((USItype)(al)), \
    968	"rI" ((USItype)(bl)) \
    969	__CLOBBER_CC)
    970#if defined(__sparc_v8__)
    971/* Don't match immediate range because, 1) it is not often useful,
    972	2) the 'I' flag thinks of the range as a 13 bit signed interval,
    973	while we want to match a 13 bit interval, sign extended to 32 bits,
    974	but INTERPRETED AS UNSIGNED.  */
    975#define umul_ppmm(w1, w0, u, v) \
    976	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
    977	: "=r" ((USItype)(w1)), \
    978	"=r" ((USItype)(w0)) \
    979	: "r" ((USItype)(u)), \
    980	"r" ((USItype)(v)))
    981#define UMUL_TIME 5
    982#ifndef SUPERSPARC		/* SuperSPARC's udiv only handles 53 bit dividends */
    983#define udiv_qrnnd(q, r, n1, n0, d) \
    984do { \
    985	USItype __q; \
    986	__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
    987	: "=r" ((USItype)(__q)) \
    988	: "r" ((USItype)(n1)), \
    989	"r" ((USItype)(n0)), \
    990	"r" ((USItype)(d))); \
    991	(r) = (n0) - __q * (d); \
    992	(q) = __q; \
    993} while (0)
    994#define UDIV_TIME 25
    995#endif /* SUPERSPARC */
    996#else /* ! __sparc_v8__ */
    997#if defined(__sparclite__)
    998/* This has hardware multiply but not divide.  It also has two additional
    999	instructions scan (ffs from high bit) and divscc.  */
   1000#define umul_ppmm(w1, w0, u, v) \
   1001	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
   1002	: "=r" ((USItype)(w1)), \
   1003	"=r" ((USItype)(w0)) \
   1004	: "r" ((USItype)(u)), \
   1005	"r" ((USItype)(v)))
   1006#define UMUL_TIME 5
   1007#define udiv_qrnnd(q, r, n1, n0, d) \
   1008	__asm__ ("! Inlined udiv_qrnnd\n" \
   1009	"wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n" \
   1010	"tst	%%g0\n" \
   1011	"divscc	%3,%4,%%g1\n" \
   1012	"divscc	%%g1,%4,%%g1\n" \
   1013	"divscc	%%g1,%4,%%g1\n" \
   1014	"divscc	%%g1,%4,%%g1\n" \
   1015	"divscc	%%g1,%4,%%g1\n" \
   1016	"divscc	%%g1,%4,%%g1\n" \
   1017	"divscc	%%g1,%4,%%g1\n" \
   1018	"divscc	%%g1,%4,%%g1\n" \
   1019	"divscc	%%g1,%4,%%g1\n" \
   1020	"divscc	%%g1,%4,%%g1\n" \
   1021	"divscc	%%g1,%4,%%g1\n" \
   1022	"divscc	%%g1,%4,%%g1\n" \
   1023	"divscc	%%g1,%4,%%g1\n" \
   1024	"divscc	%%g1,%4,%%g1\n" \
   1025	"divscc	%%g1,%4,%%g1\n" \
   1026	"divscc	%%g1,%4,%%g1\n" \
   1027	"divscc	%%g1,%4,%%g1\n" \
   1028	"divscc	%%g1,%4,%%g1\n" \
   1029	"divscc	%%g1,%4,%%g1\n" \
   1030	"divscc	%%g1,%4,%%g1\n" \
   1031	"divscc	%%g1,%4,%%g1\n" \
   1032	"divscc	%%g1,%4,%%g1\n" \
   1033	"divscc	%%g1,%4,%%g1\n" \
   1034	"divscc	%%g1,%4,%%g1\n" \
   1035	"divscc	%%g1,%4,%%g1\n" \
   1036	"divscc	%%g1,%4,%%g1\n" \
   1037	"divscc	%%g1,%4,%%g1\n" \
   1038	"divscc	%%g1,%4,%%g1\n" \
   1039	"divscc	%%g1,%4,%%g1\n" \
   1040	"divscc	%%g1,%4,%%g1\n" \
   1041	"divscc	%%g1,%4,%%g1\n" \
   1042	"divscc	%%g1,%4,%0\n" \
   1043	"rd	%%y,%1\n" \
   1044	"bl,a 1f\n" \
   1045	"add	%1,%4,%1\n" \
   1046	"1:	! End of inline udiv_qrnnd" \
   1047	: "=r" ((USItype)(q)), \
   1048	"=r" ((USItype)(r)) \
   1049	: "r" ((USItype)(n1)), \
   1050	"r" ((USItype)(n0)), \
   1051	"rI" ((USItype)(d)) \
   1052	: "%g1" __AND_CLOBBER_CC)
   1053#define UDIV_TIME 37
   1054#endif /* __sparclite__ */
   1055#endif /* __sparc_v8__ */
   1056	/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
   1057#ifndef umul_ppmm
   1058#define umul_ppmm(w1, w0, u, v) \
   1059	__asm__ ("! Inlined umul_ppmm\n" \
   1060	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
   1061	"sra	%3,31,%%g2	! Don't move this insn\n" \
   1062	"and	%2,%%g2,%%g2	! Don't move this insn\n" \
   1063	"andcc	%%g0,0,%%g1	! Don't move this insn\n" \
   1064	"mulscc	%%g1,%3,%%g1\n" \
   1065	"mulscc	%%g1,%3,%%g1\n" \
   1066	"mulscc	%%g1,%3,%%g1\n" \
   1067	"mulscc	%%g1,%3,%%g1\n" \
   1068	"mulscc	%%g1,%3,%%g1\n" \
   1069	"mulscc	%%g1,%3,%%g1\n" \
   1070	"mulscc	%%g1,%3,%%g1\n" \
   1071	"mulscc	%%g1,%3,%%g1\n" \
   1072	"mulscc	%%g1,%3,%%g1\n" \
   1073	"mulscc	%%g1,%3,%%g1\n" \
   1074	"mulscc	%%g1,%3,%%g1\n" \
   1075	"mulscc	%%g1,%3,%%g1\n" \
   1076	"mulscc	%%g1,%3,%%g1\n" \
   1077	"mulscc	%%g1,%3,%%g1\n" \
   1078	"mulscc	%%g1,%3,%%g1\n" \
   1079	"mulscc	%%g1,%3,%%g1\n" \
   1080	"mulscc	%%g1,%3,%%g1\n" \
   1081	"mulscc	%%g1,%3,%%g1\n" \
   1082	"mulscc	%%g1,%3,%%g1\n" \
   1083	"mulscc	%%g1,%3,%%g1\n" \
   1084	"mulscc	%%g1,%3,%%g1\n" \
   1085	"mulscc	%%g1,%3,%%g1\n" \
   1086	"mulscc	%%g1,%3,%%g1\n" \
   1087	"mulscc	%%g1,%3,%%g1\n" \
   1088	"mulscc	%%g1,%3,%%g1\n" \
   1089	"mulscc	%%g1,%3,%%g1\n" \
   1090	"mulscc	%%g1,%3,%%g1\n" \
   1091	"mulscc	%%g1,%3,%%g1\n" \
   1092	"mulscc	%%g1,%3,%%g1\n" \
   1093	"mulscc	%%g1,%3,%%g1\n" \
   1094	"mulscc	%%g1,%3,%%g1\n" \
   1095	"mulscc	%%g1,%3,%%g1\n" \
   1096	"mulscc	%%g1,0,%%g1\n" \
   1097	"add	%%g1,%%g2,%0\n" \
   1098	"rd	%%y,%1" \
   1099	: "=r" ((USItype)(w1)), \
   1100	"=r" ((USItype)(w0)) \
   1101	: "%rI" ((USItype)(u)), \
   1102	"r" ((USItype)(v)) \
   1103	: "%g1", "%g2" __AND_CLOBBER_CC)
   1104#define UMUL_TIME 39		/* 39 instructions */
   1105/* It's quite necessary to add this much assembler for the sparc.
   1106   The default udiv_qrnnd (in C) is more than 10 times slower!  */
   1107#define udiv_qrnnd(q, r, n1, n0, d) \
   1108  __asm__ ("! Inlined udiv_qrnnd\n\t"					\
   1109	   "mov	32,%%g1\n\t"						\
   1110	   "subcc	%1,%2,%%g0\n\t"					\
   1111	   "1:	bcs	5f\n\t"						\
   1112	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
   1113	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
   1114	   "addx	%1,%1,%1	! so this can't give carry\n\t"	\
   1115	   "subcc	%%g1,1,%%g1\n\t"				\
   1116	   "2:	bne	1b\n\t"						\
   1117	   "subcc	%1,%2,%%g0\n\t"					\
   1118	   "bcs	3f\n\t"							\
   1119	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
   1120	   "b		3f\n\t"						\
   1121	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
   1122	   "4:	sub	%1,%2,%1\n\t"					\
   1123	   "5:	addxcc	%1,%1,%1\n\t"					\
   1124	   "bcc	2b\n\t"							\
   1125	   "subcc	%%g1,1,%%g1\n\t"				\
   1126	   "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
   1127	   "bne	4b\n\t"							\
   1128	   "addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n\t" \
   1129	   "sub	%1,%2,%1\n\t"						\
   1130	   "3:	xnor	%0,0,%0\n\t"					\
   1131	   "! End of inline udiv_qrnnd\n"				\
   1132	   : "=&r" ((USItype)(q)),					\
   1133	     "=&r" ((USItype)(r))					\
   1134	   : "r" ((USItype)(d)),					\
   1135	     "1" ((USItype)(n1)),					\
   1136	     "0" ((USItype)(n0)) : "%g1", "cc")
   1137#define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
   1138#endif
   1139#endif /* __sparc__ */
   1140
   1141/***************************************
   1142	**************  VAX  ******************
   1143	***************************************/
   1144#if defined(__vax__) && W_TYPE_SIZE == 32
   1145#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1146	__asm__ ("addl2 %5,%1\n" \
   1147	"adwc %3,%0" \
   1148	: "=g" ((USItype)(sh)), \
   1149	"=&g" ((USItype)(sl)) \
   1150	: "%0" ((USItype)(ah)), \
   1151	"g" ((USItype)(bh)), \
   1152	"%1" ((USItype)(al)), \
   1153	"g" ((USItype)(bl)))
   1154#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1155	__asm__ ("subl2 %5,%1\n" \
   1156	"sbwc %3,%0" \
   1157	: "=g" ((USItype)(sh)), \
   1158	"=&g" ((USItype)(sl)) \
   1159	: "0" ((USItype)(ah)), \
   1160	"g" ((USItype)(bh)), \
   1161	"1" ((USItype)(al)), \
   1162	"g" ((USItype)(bl)))
   1163#define umul_ppmm(xh, xl, m0, m1) \
   1164do { \
   1165	union {UDItype __ll; \
   1166	struct {USItype __l, __h; } __i; \
   1167	} __xx; \
   1168	USItype __m0 = (m0), __m1 = (m1); \
   1169	__asm__ ("emul %1,%2,$0,%0" \
   1170	: "=g" (__xx.__ll) \
   1171	: "g" (__m0), \
   1172	"g" (__m1)); \
   1173	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
   1174	(xh) += ((((SItype) __m0 >> 31) & __m1) \
   1175	+ (((SItype) __m1 >> 31) & __m0)); \
   1176} while (0)
   1177#define sdiv_qrnnd(q, r, n1, n0, d) \
   1178do { \
   1179	union {DItype __ll; \
   1180	struct {SItype __l, __h; } __i; \
   1181	} __xx; \
   1182	__xx.__i.__h = n1; __xx.__i.__l = n0; \
   1183	__asm__ ("ediv %3,%2,%0,%1" \
   1184	: "=g" (q), "=g" (r) \
   1185	: "g" (__xx.__ll), "g" (d)); \
   1186} while (0)
   1187#endif /* __vax__ */
   1188
   1189/***************************************
   1190	**************  Z8000	****************
   1191	***************************************/
   1192#if defined(__z8000__) && W_TYPE_SIZE == 16
   1193#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1194	__asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
   1195	: "=r" ((unsigned int)(sh)), \
   1196	"=&r" ((unsigned int)(sl)) \
   1197	: "%0" ((unsigned int)(ah)), \
   1198	"r" ((unsigned int)(bh)), \
   1199	"%1" ((unsigned int)(al)), \
   1200	"rQR" ((unsigned int)(bl)))
   1201#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1202	__asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
   1203	: "=r" ((unsigned int)(sh)), \
   1204	"=&r" ((unsigned int)(sl)) \
   1205	: "0" ((unsigned int)(ah)), \
   1206	"r" ((unsigned int)(bh)), \
   1207	"1" ((unsigned int)(al)), \
   1208	"rQR" ((unsigned int)(bl)))
   1209#define umul_ppmm(xh, xl, m0, m1) \
   1210do { \
   1211	union {long int __ll; \
   1212	struct {unsigned int __h, __l; } __i; \
   1213	} __xx; \
   1214	unsigned int __m0 = (m0), __m1 = (m1); \
   1215	__asm__ ("mult      %S0,%H3" \
   1216	: "=r" (__xx.__i.__h), \
   1217	"=r" (__xx.__i.__l) \
   1218	: "%1" (__m0), \
   1219	"rQR" (__m1)); \
   1220	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
   1221	(xh) += ((((signed int) __m0 >> 15) & __m1) \
   1222	+ (((signed int) __m1 >> 15) & __m0)); \
   1223} while (0)
   1224#endif /* __z8000__ */
   1225
   1226#endif /* __GNUC__ */
   1227
   1228/***************************************
   1229	***********  Generic Versions	********
   1230	***************************************/
   1231#if !defined(umul_ppmm) && defined(__umulsidi3)
   1232#define umul_ppmm(ph, pl, m0, m1) \
   1233{ \
   1234	UDWtype __ll = __umulsidi3(m0, m1); \
   1235	ph = (UWtype) (__ll >> W_TYPE_SIZE); \
   1236	pl = (UWtype) __ll; \
   1237}
   1238#endif
   1239
   1240#if !defined(__umulsidi3)
   1241#define __umulsidi3(u, v) \
   1242	({UWtype __hi, __lo; \
   1243	umul_ppmm(__hi, __lo, u, v); \
   1244	((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
   1245#endif
   1246
   1247	/* If this machine has no inline assembler, use C macros.  */
   1248
   1249#if !defined(add_ssaaaa)
   1250#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1251do { \
   1252	UWtype __x; \
   1253	__x = (al) + (bl); \
   1254	(sh) = (ah) + (bh) + (__x < (al)); \
   1255	(sl) = __x; \
   1256} while (0)
   1257#endif
   1258
   1259#if !defined(sub_ddmmss)
   1260#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1261do { \
   1262	UWtype __x; \
   1263	__x = (al) - (bl); \
   1264	(sh) = (ah) - (bh) - (__x > (al)); \
   1265	(sl) = __x; \
   1266} while (0)
   1267#endif
   1268
   1269#if !defined(umul_ppmm)
   1270#define umul_ppmm(w1, w0, u, v) \
   1271do { \
   1272	UWtype __x0, __x1, __x2, __x3; \
   1273	UHWtype __ul, __vl, __uh, __vh; \
   1274	UWtype __u = (u), __v = (v); \
   1275	\
   1276	__ul = __ll_lowpart(__u); \
   1277	__uh = __ll_highpart(__u); \
   1278	__vl = __ll_lowpart(__v); \
   1279	__vh = __ll_highpart(__v); \
   1280	\
   1281	__x0 = (UWtype) __ul * __vl; \
   1282	__x1 = (UWtype) __ul * __vh; \
   1283	__x2 = (UWtype) __uh * __vl; \
   1284	__x3 = (UWtype) __uh * __vh; \
   1285	\
   1286	__x1 += __ll_highpart(__x0);/* this can't give carry */ \
   1287	__x1 += __x2;		/* but this indeed can */ \
   1288	if (__x1 < __x2)		/* did we get it? */ \
   1289	__x3 += __ll_B;		/* yes, add it in the proper pos. */ \
   1290	\
   1291	(w1) = __x3 + __ll_highpart(__x1); \
   1292	(w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
   1293} while (0)
   1294#endif
   1295
   1296#if !defined(umul_ppmm)
   1297#define smul_ppmm(w1, w0, u, v) \
   1298do { \
   1299	UWtype __w1; \
   1300	UWtype __m0 = (u), __m1 = (v); \
   1301	umul_ppmm(__w1, w0, __m0, __m1); \
   1302	(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
   1303	- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
   1304} while (0)
   1305#endif
   1306
   1307	/* Define this unconditionally, so it can be used for debugging.  */
   1308#define __udiv_qrnnd_c(q, r, n1, n0, d) \
   1309do { \
   1310	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
   1311	__d1 = __ll_highpart(d); \
   1312	__d0 = __ll_lowpart(d); \
   1313	\
   1314	__r1 = (n1) % __d1; \
   1315	__q1 = (n1) / __d1; \
   1316	__m = (UWtype) __q1 * __d0; \
   1317	__r1 = __r1 * __ll_B | __ll_highpart(n0); \
   1318	if (__r1 < __m) { \
   1319		__q1--, __r1 += (d); \
   1320		if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
   1321		if (__r1 < __m) \
   1322			__q1--, __r1 += (d); \
   1323	} \
   1324	__r1 -= __m; \
   1325	\
   1326	__r0 = __r1 % __d1; \
   1327	__q0 = __r1 / __d1; \
   1328	__m = (UWtype) __q0 * __d0; \
   1329	__r0 = __r0 * __ll_B | __ll_lowpart(n0); \
   1330	if (__r0 < __m) { \
   1331		__q0--, __r0 += (d); \
   1332		if (__r0 >= (d)) \
   1333			if (__r0 < __m) \
   1334				__q0--, __r0 += (d); \
   1335	} \
   1336	__r0 -= __m; \
   1337	\
   1338	(q) = (UWtype) __q1 * __ll_B | __q0; \
   1339	(r) = __r0; \
   1340} while (0)
   1341
   1342/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
   1343	__udiv_w_sdiv (defined in libgcc or elsewhere).  */
   1344#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
   1345#define udiv_qrnnd(q, r, nh, nl, d) \
   1346do { \
   1347	UWtype __r; \
   1348	(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
   1349	(r) = __r; \
   1350} while (0)
   1351#endif
   1352
   1353	/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
   1354#if !defined(udiv_qrnnd)
   1355#define UDIV_NEEDS_NORMALIZATION 1
   1356#define udiv_qrnnd __udiv_qrnnd_c
   1357#endif
   1358
   1359#ifndef UDIV_NEEDS_NORMALIZATION
   1360#define UDIV_NEEDS_NORMALIZATION 0
   1361#endif