cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fmpyfadd.c (79585B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
      4 *
      5 * Floating-point emulation code
      6 *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
      7 */
      8/*
      9 * BEGIN_DESC
     10 *
     11 *  File:
     12 *	@(#)	pa/spmath/fmpyfadd.c		$Revision: 1.1 $
     13 *
     14 *  Purpose:
     15 *	Double Floating-point Multiply Fused Add
     16 *	Double Floating-point Multiply Negate Fused Add
     17 *	Single Floating-point Multiply Fused Add
     18 *	Single Floating-point Multiply Negate Fused Add
     19 *
     20 *  External Interfaces:
     21 *	dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
     22 *	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
     23 *	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
     24 *	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
     25 *
     26 *  Internal Interfaces:
     27 *
     28 *  Theory:
     29 *	<<please update with a overview of the operation of this file>>
     30 *
     31 * END_DESC
     32*/
     33
     34
     35#include "float.h"
     36#include "sgl_float.h"
     37#include "dbl_float.h"
     38
     39
     40/*
     41 *  Double Floating-point Multiply Fused Add
     42 */
     43
     44int
     45dbl_fmpyfadd(
     46	    dbl_floating_point *src1ptr,
     47	    dbl_floating_point *src2ptr,
     48	    dbl_floating_point *src3ptr,
     49	    unsigned int *status,
     50	    dbl_floating_point *dstptr)
     51{
     52	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
     53	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
     54	unsigned int rightp1, rightp2, rightp3, rightp4;
     55	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
     56	register int mpy_exponent, add_exponent, count;
     57	boolean inexact = FALSE, is_tiny = FALSE;
     58
     59	unsigned int signlessleft1, signlessright1, save;
     60	register int result_exponent, diff_exponent;
     61	int sign_save, jumpsize;
     62	
     63	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
     64	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
     65	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
     66
     67	/* 
     68	 * set sign bit of result of multiply
     69	 */
     70	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
     71		Dbl_setnegativezerop1(resultp1); 
     72	else Dbl_setzerop1(resultp1);
     73
     74	/*
     75	 * Generate multiply exponent 
     76	 */
     77	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
     78
     79	/*
     80	 * check first operand for NaN's or infinity
     81	 */
     82	if (Dbl_isinfinity_exponent(opnd1p1)) {
     83		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
     84			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
     85			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
     86				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
     87					/* 
     88					 * invalid since operands are infinity 
     89					 * and zero 
     90					 */
     91					if (Is_invalidtrap_enabled())
     92						return(OPC_2E_INVALIDEXCEPTION);
     93					Set_invalidflag();
     94					Dbl_makequietnan(resultp1,resultp2);
     95					Dbl_copytoptr(resultp1,resultp2,dstptr);
     96					return(NOEXCEPTION);
     97				}
     98				/*
     99				 * Check third operand for infinity with a
    100				 *  sign opposite of the multiply result
    101				 */
    102				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
    103				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
    104					/* 
    105					 * invalid since attempting a magnitude
    106					 * subtraction of infinities
    107					 */
    108					if (Is_invalidtrap_enabled())
    109						return(OPC_2E_INVALIDEXCEPTION);
    110					Set_invalidflag();
    111					Dbl_makequietnan(resultp1,resultp2);
    112					Dbl_copytoptr(resultp1,resultp2,dstptr);
    113					return(NOEXCEPTION);
    114				}
    115
    116				/*
    117			 	 * return infinity
    118			 	 */
    119				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
    120				Dbl_copytoptr(resultp1,resultp2,dstptr);
    121				return(NOEXCEPTION);
    122			}
    123		}
    124		else {
    125			/*
    126		 	 * is NaN; signaling or quiet?
    127		 	 */
    128			if (Dbl_isone_signaling(opnd1p1)) {
    129				/* trap if INVALIDTRAP enabled */
    130				if (Is_invalidtrap_enabled()) 
    131			    		return(OPC_2E_INVALIDEXCEPTION);
    132				/* make NaN quiet */
    133				Set_invalidflag();
    134				Dbl_set_quiet(opnd1p1);
    135			}
    136			/* 
    137			 * is second operand a signaling NaN? 
    138			 */
    139			else if (Dbl_is_signalingnan(opnd2p1)) {
    140				/* trap if INVALIDTRAP enabled */
    141				if (Is_invalidtrap_enabled())
    142			    		return(OPC_2E_INVALIDEXCEPTION);
    143				/* make NaN quiet */
    144				Set_invalidflag();
    145				Dbl_set_quiet(opnd2p1);
    146				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
    147				return(NOEXCEPTION);
    148			}
    149			/* 
    150			 * is third operand a signaling NaN? 
    151			 */
    152			else if (Dbl_is_signalingnan(opnd3p1)) {
    153				/* trap if INVALIDTRAP enabled */
    154				if (Is_invalidtrap_enabled())
    155			    		return(OPC_2E_INVALIDEXCEPTION);
    156				/* make NaN quiet */
    157				Set_invalidflag();
    158				Dbl_set_quiet(opnd3p1);
    159				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    160				return(NOEXCEPTION);
    161			}
    162			/*
    163		 	 * return quiet NaN
    164		 	 */
    165			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
    166			return(NOEXCEPTION);
    167		}
    168	}
    169
    170	/*
    171	 * check second operand for NaN's or infinity
    172	 */
    173	if (Dbl_isinfinity_exponent(opnd2p1)) {
    174		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
    175			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
    176				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
    177					/* 
    178					 * invalid since multiply operands are
    179					 * zero & infinity
    180					 */
    181					if (Is_invalidtrap_enabled())
    182						return(OPC_2E_INVALIDEXCEPTION);
    183					Set_invalidflag();
    184					Dbl_makequietnan(opnd2p1,opnd2p2);
    185					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
    186					return(NOEXCEPTION);
    187				}
    188
    189				/*
    190				 * Check third operand for infinity with a
    191				 *  sign opposite of the multiply result
    192				 */
    193				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
    194				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
    195					/* 
    196					 * invalid since attempting a magnitude
    197					 * subtraction of infinities
    198					 */
    199					if (Is_invalidtrap_enabled())
    200				       		return(OPC_2E_INVALIDEXCEPTION);
    201				       	Set_invalidflag();
    202				       	Dbl_makequietnan(resultp1,resultp2);
    203					Dbl_copytoptr(resultp1,resultp2,dstptr);
    204					return(NOEXCEPTION);
    205				}
    206
    207				/*
    208				 * return infinity
    209				 */
    210				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
    211				Dbl_copytoptr(resultp1,resultp2,dstptr);
    212				return(NOEXCEPTION);
    213			}
    214		}
    215		else {
    216			/*
    217			 * is NaN; signaling or quiet?
    218			 */
    219			if (Dbl_isone_signaling(opnd2p1)) {
    220				/* trap if INVALIDTRAP enabled */
    221				if (Is_invalidtrap_enabled())
    222					return(OPC_2E_INVALIDEXCEPTION);
    223				/* make NaN quiet */
    224				Set_invalidflag();
    225				Dbl_set_quiet(opnd2p1);
    226			}
    227			/* 
    228			 * is third operand a signaling NaN? 
    229			 */
    230			else if (Dbl_is_signalingnan(opnd3p1)) {
    231			       	/* trap if INVALIDTRAP enabled */
    232			       	if (Is_invalidtrap_enabled())
    233				   		return(OPC_2E_INVALIDEXCEPTION);
    234			       	/* make NaN quiet */
    235			       	Set_invalidflag();
    236			       	Dbl_set_quiet(opnd3p1);
    237				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    238		       		return(NOEXCEPTION);
    239			}
    240			/*
    241			 * return quiet NaN
    242			 */
    243			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
    244			return(NOEXCEPTION);
    245		}
    246	}
    247
    248	/*
    249	 * check third operand for NaN's or infinity
    250	 */
    251	if (Dbl_isinfinity_exponent(opnd3p1)) {
    252		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
    253			/* return infinity */
    254			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    255			return(NOEXCEPTION);
    256		} else {
    257			/*
    258			 * is NaN; signaling or quiet?
    259			 */
    260			if (Dbl_isone_signaling(opnd3p1)) {
    261				/* trap if INVALIDTRAP enabled */
    262				if (Is_invalidtrap_enabled())
    263					return(OPC_2E_INVALIDEXCEPTION);
    264				/* make NaN quiet */
    265				Set_invalidflag();
    266				Dbl_set_quiet(opnd3p1);
    267			}
    268			/*
    269			 * return quiet NaN
    270 			 */
    271			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    272			return(NOEXCEPTION);
    273		}
    274    	}
    275
    276	/*
    277	 * Generate multiply mantissa
    278	 */
    279	if (Dbl_isnotzero_exponent(opnd1p1)) {
    280		/* set hidden bit */
    281		Dbl_clear_signexponent_set_hidden(opnd1p1);
    282	}
    283	else {
    284		/* check for zero */
    285		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
    286			/*
    287			 * Perform the add opnd3 with zero here.
    288			 */
    289			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
    290				if (Is_rounding_mode(ROUNDMINUS)) {
    291					Dbl_or_signs(opnd3p1,resultp1);
    292				} else {
    293					Dbl_and_signs(opnd3p1,resultp1);
    294				}
    295			}
    296			/*
    297			 * Now let's check for trapped underflow case.
    298			 */
    299			else if (Dbl_iszero_exponent(opnd3p1) &&
    300			         Is_underflowtrap_enabled()) {
    301                    		/* need to normalize results mantissa */
    302                    		sign_save = Dbl_signextendedsign(opnd3p1);
    303				result_exponent = 0;
    304                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
    305                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
    306                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
    307                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
    308							unfl);
    309                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    310                    		/* inexact = FALSE */
    311                    		return(OPC_2E_UNDERFLOWEXCEPTION);
    312			}
    313			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    314			return(NOEXCEPTION);
    315		}
    316		/* is denormalized, adjust exponent */
    317		Dbl_clear_signexponent(opnd1p1);
    318		Dbl_leftshiftby1(opnd1p1,opnd1p2);
    319		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
    320	}
    321	/* opnd2 needs to have hidden bit set with msb in hidden bit */
    322	if (Dbl_isnotzero_exponent(opnd2p1)) {
    323		Dbl_clear_signexponent_set_hidden(opnd2p1);
    324	}
    325	else {
    326		/* check for zero */
    327		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
    328			/*
    329			 * Perform the add opnd3 with zero here.
    330			 */
    331			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
    332				if (Is_rounding_mode(ROUNDMINUS)) {
    333					Dbl_or_signs(opnd3p1,resultp1);
    334				} else {
    335					Dbl_and_signs(opnd3p1,resultp1);
    336				}
    337			}
    338			/*
    339			 * Now let's check for trapped underflow case.
    340			 */
    341			else if (Dbl_iszero_exponent(opnd3p1) &&
    342			    Is_underflowtrap_enabled()) {
    343                    		/* need to normalize results mantissa */
    344                    		sign_save = Dbl_signextendedsign(opnd3p1);
    345				result_exponent = 0;
    346                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
    347                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
    348                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
    349                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
    350							unfl);
    351                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    352                    		/* inexact = FALSE */
    353				return(OPC_2E_UNDERFLOWEXCEPTION);
    354			}
    355			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    356			return(NOEXCEPTION);
    357		}
    358		/* is denormalized; want to normalize */
    359		Dbl_clear_signexponent(opnd2p1);
    360		Dbl_leftshiftby1(opnd2p1,opnd2p2);
    361		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
    362	}
    363
    364	/* Multiply the first two source mantissas together */
    365
    366	/* 
    367	 * The intermediate result will be kept in tmpres,
    368	 * which needs enough room for 106 bits of mantissa,
    369	 * so lets call it a Double extended.
    370	 */
    371	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
    372
    373	/* 
    374	 * Four bits at a time are inspected in each loop, and a 
    375	 * simple shift and add multiply algorithm is used. 
    376	 */ 
    377	for (count = DBL_P-1; count >= 0; count -= 4) {
    378		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
    379		if (Dbit28p2(opnd1p2)) {
    380	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
    381			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
    382			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
    383		}
    384		if (Dbit29p2(opnd1p2)) {
    385			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
    386			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
    387		}
    388		if (Dbit30p2(opnd1p2)) {
    389			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
    390			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
    391		}
    392		if (Dbit31p2(opnd1p2)) {
    393			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
    394			 opnd2p1, opnd2p2, 0, 0);
    395		}
    396		Dbl_rightshiftby4(opnd1p1,opnd1p2);
    397	}
    398	if (Is_dexthiddenoverflow(tmpresp1)) {
    399		/* result mantissa >= 2 (mantissa overflow) */
    400		mpy_exponent++;
    401		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
    402	}
    403
    404	/*
    405	 * Restore the sign of the mpy result which was saved in resultp1.
    406	 * The exponent will continue to be kept in mpy_exponent.
    407	 */
    408	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
    409
    410	/* 
    411	 * No rounding is required, since the result of the multiply
    412	 * is exact in the extended format.
    413	 */
    414
    415	/*
    416	 * Now we are ready to perform the add portion of the operation.
    417	 *
    418	 * The exponents need to be kept as integers for now, since the
    419	 * multiply result might not fit into the exponent field.  We
    420	 * can't overflow or underflow because of this yet, since the
    421	 * add could bring the final result back into range.
    422	 */
    423	add_exponent = Dbl_exponent(opnd3p1);
    424
    425	/*
    426	 * Check for denormalized or zero add operand.
    427	 */
    428	if (add_exponent == 0) {
    429		/* check for zero */
    430		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
    431			/* right is zero */
    432			/* Left can't be zero and must be result.
    433			 *
    434			 * The final result is now in tmpres and mpy_exponent,
    435			 * and needs to be rounded and squeezed back into
    436			 * double precision format from double extended.
    437			 */
    438			result_exponent = mpy_exponent;
    439			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
    440				resultp1,resultp2,resultp3,resultp4);
    441			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
    442			goto round;
    443		}
    444
    445		/* 
    446		 * Neither are zeroes.  
    447		 * Adjust exponent and normalize add operand.
    448		 */
    449		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
    450		Dbl_clear_signexponent(opnd3p1);
    451		Dbl_leftshiftby1(opnd3p1,opnd3p2);
    452		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
    453		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
    454	} else {
    455		Dbl_clear_exponent_set_hidden(opnd3p1);
    456	}
    457	/*
    458	 * Copy opnd3 to the double extended variable called right.
    459	 */
    460	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
    461
    462	/*
    463	 * A zero "save" helps discover equal operands (for later),
    464	 * and is used in swapping operands (if needed).
    465	 */
    466	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
    467
    468	/*
    469	 * Compare magnitude of operands.
    470	 */
    471	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
    472	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
    473	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
    474	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
    475		/*
    476		 * Set the left operand to the larger one by XOR swap.
    477		 * First finish the first word "save".
    478		 */
    479		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
    480		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
    481		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
    482			rightp2,rightp3,rightp4);
    483		/* also setup exponents used in rest of routine */
    484		diff_exponent = add_exponent - mpy_exponent;
    485		result_exponent = add_exponent;
    486	} else {
    487		/* also setup exponents used in rest of routine */
    488		diff_exponent = mpy_exponent - add_exponent;
    489		result_exponent = mpy_exponent;
    490	}
    491	/* Invariant: left is not smaller than right. */
    492
    493	/*
    494	 * Special case alignment of operands that would force alignment
    495	 * beyond the extent of the extension.  A further optimization
    496	 * could special case this but only reduces the path length for
    497	 * this infrequent case.
    498	 */
    499	if (diff_exponent > DBLEXT_THRESHOLD) {
    500		diff_exponent = DBLEXT_THRESHOLD;
    501	}
    502
    503	/* Align right operand by shifting it to the right */
    504	Dblext_clear_sign(rightp1);
    505	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
    506		/*shifted by*/diff_exponent);
    507	
    508	/* Treat sum and difference of the operands separately. */
    509	if ((int)save < 0) {
    510		/*
    511		 * Difference of the two operands.  Overflow can occur if the
    512		 * multiply overflowed.  A borrow can occur out of the hidden
    513		 * bit and force a post normalization phase.
    514		 */
    515		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
    516			rightp1,rightp2,rightp3,rightp4,
    517			resultp1,resultp2,resultp3,resultp4);
    518		sign_save = Dbl_signextendedsign(resultp1);
    519		if (Dbl_iszero_hidden(resultp1)) {
    520			/* Handle normalization */
    521		/* A straightforward algorithm would now shift the
    522		 * result and extension left until the hidden bit
    523		 * becomes one.  Not all of the extension bits need
    524		 * participate in the shift.  Only the two most 
    525		 * significant bits (round and guard) are needed.
    526		 * If only a single shift is needed then the guard
    527		 * bit becomes a significant low order bit and the
    528		 * extension must participate in the rounding.
    529		 * If more than a single shift is needed, then all
    530		 * bits to the right of the guard bit are zeros, 
    531		 * and the guard bit may or may not be zero. */
    532			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
    533				resultp4);
    534
    535			/* Need to check for a zero result.  The sign and
    536			 * exponent fields have already been zeroed.  The more
    537			 * efficient test of the full object can be used.
    538			 */
    539			 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
    540				/* Must have been "x-x" or "x+(-x)". */
    541				if (Is_rounding_mode(ROUNDMINUS))
    542					Dbl_setone_sign(resultp1);
    543				Dbl_copytoptr(resultp1,resultp2,dstptr);
    544				return(NOEXCEPTION);
    545			}
    546			result_exponent--;
    547
    548			/* Look to see if normalization is finished. */
    549			if (Dbl_isone_hidden(resultp1)) {
    550				/* No further normalization is needed */
    551				goto round;
    552			}
    553
    554			/* Discover first one bit to determine shift amount.
    555			 * Use a modified binary search.  We have already
    556			 * shifted the result one position right and still
    557			 * not found a one so the remainder of the extension
    558			 * must be zero and simplifies rounding. */
    559			/* Scan bytes */
    560			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
    561				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
    562				result_exponent -= 8;
    563			}
    564			/* Now narrow it down to the nibble */
    565			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
    566				/* The lower nibble contains the
    567				 * normalizing one */
    568				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
    569				result_exponent -= 4;
    570			}
    571			/* Select case where first bit is set (already
    572			 * normalized) otherwise select the proper shift. */
    573			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
    574			if (jumpsize <= 7) switch(jumpsize) {
    575			case 1:
    576				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
    577					resultp4);
    578				result_exponent -= 3;
    579				break;
    580			case 2:
    581			case 3:
    582				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
    583					resultp4);
    584				result_exponent -= 2;
    585				break;
    586			case 4:
    587			case 5:
    588			case 6:
    589			case 7:
    590				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
    591					resultp4);
    592				result_exponent -= 1;
    593				break;
    594			}
    595		} /* end if (hidden...)... */
    596	/* Fall through and round */
    597	} /* end if (save < 0)... */
    598	else {
    599		/* Add magnitudes */
    600		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
    601			rightp1,rightp2,rightp3,rightp4,
    602			/*to*/resultp1,resultp2,resultp3,resultp4);
    603		sign_save = Dbl_signextendedsign(resultp1);
    604		if (Dbl_isone_hiddenoverflow(resultp1)) {
    605	    		/* Prenormalization required. */
    606	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
    607				resultp4);
    608	    		result_exponent++;
    609		} /* end if hiddenoverflow... */
    610	} /* end else ...add magnitudes... */
    611
    612	/* Round the result.  If the extension and lower two words are
    613	 * all zeros, then the result is exact.  Otherwise round in the
    614	 * correct direction.  Underflow is possible. If a postnormalization
    615	 * is necessary, then the mantissa is all zeros so no shift is needed.
    616	 */
    617  round:
    618	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
    619		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
    620			result_exponent,is_tiny);
    621	}
    622	Dbl_set_sign(resultp1,/*using*/sign_save);
    623	if (Dblext_isnotzero_mantissap3(resultp3) || 
    624	    Dblext_isnotzero_mantissap4(resultp4)) {
    625		inexact = TRUE;
    626		switch(Rounding_mode()) {
    627		case ROUNDNEAREST: /* The default. */
    628			if (Dblext_isone_highp3(resultp3)) {
    629				/* at least 1/2 ulp */
    630				if (Dblext_isnotzero_low31p3(resultp3) ||
    631				    Dblext_isnotzero_mantissap4(resultp4) ||
    632				    Dblext_isone_lowp2(resultp2)) {
    633					/* either exactly half way and odd or
    634					 * more than 1/2ulp */
    635					Dbl_increment(resultp1,resultp2);
    636				}
    637			}
    638	    		break;
    639
    640		case ROUNDPLUS:
    641	    		if (Dbl_iszero_sign(resultp1)) {
    642				/* Round up positive results */
    643				Dbl_increment(resultp1,resultp2);
    644			}
    645			break;
    646	    
    647		case ROUNDMINUS:
    648	    		if (Dbl_isone_sign(resultp1)) {
    649				/* Round down negative results */
    650				Dbl_increment(resultp1,resultp2);
    651			}
    652	    
    653		case ROUNDZERO:;
    654			/* truncate is simple */
    655		} /* end switch... */
    656		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
    657	}
    658	if (result_exponent >= DBL_INFINITY_EXPONENT) {
    659                /* trap if OVERFLOWTRAP enabled */
    660                if (Is_overflowtrap_enabled()) {
    661                        /*
    662                         * Adjust bias of result
    663                         */
    664                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
    665                        Dbl_copytoptr(resultp1,resultp2,dstptr);
    666                        if (inexact)
    667                            if (Is_inexacttrap_enabled())
    668                                return (OPC_2E_OVERFLOWEXCEPTION |
    669					OPC_2E_INEXACTEXCEPTION);
    670                            else Set_inexactflag();
    671                        return (OPC_2E_OVERFLOWEXCEPTION);
    672                }
    673                inexact = TRUE;
    674                Set_overflowflag();
    675                /* set result to infinity or largest number */
    676                Dbl_setoverflow(resultp1,resultp2);
    677
    678	} else if (result_exponent <= 0) {	/* underflow case */
    679		if (Is_underflowtrap_enabled()) {
    680                        /*
    681                         * Adjust bias of result
    682                         */
    683                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
    684			Dbl_copytoptr(resultp1,resultp2,dstptr);
    685                        if (inexact)
    686                            if (Is_inexacttrap_enabled())
    687                                return (OPC_2E_UNDERFLOWEXCEPTION |
    688					OPC_2E_INEXACTEXCEPTION);
    689                            else Set_inexactflag();
    690	    		return(OPC_2E_UNDERFLOWEXCEPTION);
    691		}
    692		else if (inexact && is_tiny) Set_underflowflag();
    693	}
    694	else Dbl_set_exponent(resultp1,result_exponent);
    695	Dbl_copytoptr(resultp1,resultp2,dstptr);
    696	if (inexact) 
    697		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
    698		else Set_inexactflag();
    699    	return(NOEXCEPTION);
    700}
    701
    702/*
    703 *  Double Floating-point Multiply Negate Fused Add
    704 */
    705
    706dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
    707
    708dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
    709unsigned int *status;
    710{
    711	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
    712	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
    713	unsigned int rightp1, rightp2, rightp3, rightp4;
    714	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
    715	register int mpy_exponent, add_exponent, count;
    716	boolean inexact = FALSE, is_tiny = FALSE;
    717
    718	unsigned int signlessleft1, signlessright1, save;
    719	register int result_exponent, diff_exponent;
    720	int sign_save, jumpsize;
    721	
    722	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
    723	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
    724	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
    725
    726	/* 
    727	 * set sign bit of result of multiply
    728	 */
    729	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
    730		Dbl_setzerop1(resultp1);
    731	else
    732		Dbl_setnegativezerop1(resultp1); 
    733
    734	/*
    735	 * Generate multiply exponent 
    736	 */
    737	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
    738
    739	/*
    740	 * check first operand for NaN's or infinity
    741	 */
    742	if (Dbl_isinfinity_exponent(opnd1p1)) {
    743		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
    744			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
    745			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
    746				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
    747					/* 
    748					 * invalid since operands are infinity 
    749					 * and zero 
    750					 */
    751					if (Is_invalidtrap_enabled())
    752						return(OPC_2E_INVALIDEXCEPTION);
    753					Set_invalidflag();
    754					Dbl_makequietnan(resultp1,resultp2);
    755					Dbl_copytoptr(resultp1,resultp2,dstptr);
    756					return(NOEXCEPTION);
    757				}
    758				/*
    759				 * Check third operand for infinity with a
    760				 *  sign opposite of the multiply result
    761				 */
    762				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
    763				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
    764					/* 
    765					 * invalid since attempting a magnitude
    766					 * subtraction of infinities
    767					 */
    768					if (Is_invalidtrap_enabled())
    769						return(OPC_2E_INVALIDEXCEPTION);
    770					Set_invalidflag();
    771					Dbl_makequietnan(resultp1,resultp2);
    772					Dbl_copytoptr(resultp1,resultp2,dstptr);
    773					return(NOEXCEPTION);
    774				}
    775
    776				/*
    777			 	 * return infinity
    778			 	 */
    779				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
    780				Dbl_copytoptr(resultp1,resultp2,dstptr);
    781				return(NOEXCEPTION);
    782			}
    783		}
    784		else {
    785			/*
    786		 	 * is NaN; signaling or quiet?
    787		 	 */
    788			if (Dbl_isone_signaling(opnd1p1)) {
    789				/* trap if INVALIDTRAP enabled */
    790				if (Is_invalidtrap_enabled()) 
    791			    		return(OPC_2E_INVALIDEXCEPTION);
    792				/* make NaN quiet */
    793				Set_invalidflag();
    794				Dbl_set_quiet(opnd1p1);
    795			}
    796			/* 
    797			 * is second operand a signaling NaN? 
    798			 */
    799			else if (Dbl_is_signalingnan(opnd2p1)) {
    800				/* trap if INVALIDTRAP enabled */
    801				if (Is_invalidtrap_enabled())
    802			    		return(OPC_2E_INVALIDEXCEPTION);
    803				/* make NaN quiet */
    804				Set_invalidflag();
    805				Dbl_set_quiet(opnd2p1);
    806				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
    807				return(NOEXCEPTION);
    808			}
    809			/* 
    810			 * is third operand a signaling NaN? 
    811			 */
    812			else if (Dbl_is_signalingnan(opnd3p1)) {
    813				/* trap if INVALIDTRAP enabled */
    814				if (Is_invalidtrap_enabled())
    815			    		return(OPC_2E_INVALIDEXCEPTION);
    816				/* make NaN quiet */
    817				Set_invalidflag();
    818				Dbl_set_quiet(opnd3p1);
    819				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    820				return(NOEXCEPTION);
    821			}
    822			/*
    823		 	 * return quiet NaN
    824		 	 */
    825			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
    826			return(NOEXCEPTION);
    827		}
    828	}
    829
    830	/*
    831	 * check second operand for NaN's or infinity
    832	 */
    833	if (Dbl_isinfinity_exponent(opnd2p1)) {
    834		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
    835			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
    836				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
    837					/* 
    838					 * invalid since multiply operands are
    839					 * zero & infinity
    840					 */
    841					if (Is_invalidtrap_enabled())
    842						return(OPC_2E_INVALIDEXCEPTION);
    843					Set_invalidflag();
    844					Dbl_makequietnan(opnd2p1,opnd2p2);
    845					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
    846					return(NOEXCEPTION);
    847				}
    848
    849				/*
    850				 * Check third operand for infinity with a
    851				 *  sign opposite of the multiply result
    852				 */
    853				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
    854				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
    855					/* 
    856					 * invalid since attempting a magnitude
    857					 * subtraction of infinities
    858					 */
    859					if (Is_invalidtrap_enabled())
    860				       		return(OPC_2E_INVALIDEXCEPTION);
    861				       	Set_invalidflag();
    862				       	Dbl_makequietnan(resultp1,resultp2);
    863					Dbl_copytoptr(resultp1,resultp2,dstptr);
    864					return(NOEXCEPTION);
    865				}
    866
    867				/*
    868				 * return infinity
    869				 */
    870				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
    871				Dbl_copytoptr(resultp1,resultp2,dstptr);
    872				return(NOEXCEPTION);
    873			}
    874		}
    875		else {
    876			/*
    877			 * is NaN; signaling or quiet?
    878			 */
    879			if (Dbl_isone_signaling(opnd2p1)) {
    880				/* trap if INVALIDTRAP enabled */
    881				if (Is_invalidtrap_enabled())
    882					return(OPC_2E_INVALIDEXCEPTION);
    883				/* make NaN quiet */
    884				Set_invalidflag();
    885				Dbl_set_quiet(opnd2p1);
    886			}
    887			/* 
    888			 * is third operand a signaling NaN? 
    889			 */
    890			else if (Dbl_is_signalingnan(opnd3p1)) {
    891			       	/* trap if INVALIDTRAP enabled */
    892			       	if (Is_invalidtrap_enabled())
    893				   		return(OPC_2E_INVALIDEXCEPTION);
    894			       	/* make NaN quiet */
    895			       	Set_invalidflag();
    896			       	Dbl_set_quiet(opnd3p1);
    897				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    898		       		return(NOEXCEPTION);
    899			}
    900			/*
    901			 * return quiet NaN
    902			 */
    903			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
    904			return(NOEXCEPTION);
    905		}
    906	}
    907
    908	/*
    909	 * check third operand for NaN's or infinity
    910	 */
    911	if (Dbl_isinfinity_exponent(opnd3p1)) {
    912		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
    913			/* return infinity */
    914			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    915			return(NOEXCEPTION);
    916		} else {
    917			/*
    918			 * is NaN; signaling or quiet?
    919			 */
    920			if (Dbl_isone_signaling(opnd3p1)) {
    921				/* trap if INVALIDTRAP enabled */
    922				if (Is_invalidtrap_enabled())
    923					return(OPC_2E_INVALIDEXCEPTION);
    924				/* make NaN quiet */
    925				Set_invalidflag();
    926				Dbl_set_quiet(opnd3p1);
    927			}
    928			/*
    929			 * return quiet NaN
    930 			 */
    931			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    932			return(NOEXCEPTION);
    933		}
    934    	}
    935
    936	/*
    937	 * Generate multiply mantissa
    938	 */
    939	if (Dbl_isnotzero_exponent(opnd1p1)) {
    940		/* set hidden bit */
    941		Dbl_clear_signexponent_set_hidden(opnd1p1);
    942	}
    943	else {
    944		/* check for zero */
    945		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
    946			/*
    947			 * Perform the add opnd3 with zero here.
    948			 */
    949			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
    950				if (Is_rounding_mode(ROUNDMINUS)) {
    951					Dbl_or_signs(opnd3p1,resultp1);
    952				} else {
    953					Dbl_and_signs(opnd3p1,resultp1);
    954				}
    955			}
    956			/*
    957			 * Now let's check for trapped underflow case.
    958			 */
    959			else if (Dbl_iszero_exponent(opnd3p1) &&
    960			         Is_underflowtrap_enabled()) {
    961                    		/* need to normalize results mantissa */
    962                    		sign_save = Dbl_signextendedsign(opnd3p1);
    963				result_exponent = 0;
    964                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
    965                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
    966                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
    967                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
    968							unfl);
    969                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    970                    		/* inexact = FALSE */
    971                    		return(OPC_2E_UNDERFLOWEXCEPTION);
    972			}
    973			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
    974			return(NOEXCEPTION);
    975		}
    976		/* is denormalized, adjust exponent */
    977		Dbl_clear_signexponent(opnd1p1);
    978		Dbl_leftshiftby1(opnd1p1,opnd1p2);
    979		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
    980	}
    981	/* opnd2 needs to have hidden bit set with msb in hidden bit */
    982	if (Dbl_isnotzero_exponent(opnd2p1)) {
    983		Dbl_clear_signexponent_set_hidden(opnd2p1);
    984	}
    985	else {
    986		/* check for zero */
    987		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
    988			/*
    989			 * Perform the add opnd3 with zero here.
    990			 */
    991			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
    992				if (Is_rounding_mode(ROUNDMINUS)) {
    993					Dbl_or_signs(opnd3p1,resultp1);
    994				} else {
    995					Dbl_and_signs(opnd3p1,resultp1);
    996				}
    997			}
    998			/*
    999			 * Now let's check for trapped underflow case.
   1000			 */
   1001			else if (Dbl_iszero_exponent(opnd3p1) &&
   1002			    Is_underflowtrap_enabled()) {
   1003                    		/* need to normalize results mantissa */
   1004                    		sign_save = Dbl_signextendedsign(opnd3p1);
   1005				result_exponent = 0;
   1006                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
   1007                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
   1008                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
   1009                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
   1010							unfl);
   1011                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
   1012                    		/* inexact = FALSE */
   1013                    		return(OPC_2E_UNDERFLOWEXCEPTION);
   1014			}
   1015			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
   1016			return(NOEXCEPTION);
   1017		}
   1018		/* is denormalized; want to normalize */
   1019		Dbl_clear_signexponent(opnd2p1);
   1020		Dbl_leftshiftby1(opnd2p1,opnd2p2);
   1021		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
   1022	}
   1023
   1024	/* Multiply the first two source mantissas together */
   1025
   1026	/* 
   1027	 * The intermediate result will be kept in tmpres,
   1028	 * which needs enough room for 106 bits of mantissa,
   1029	 * so lets call it a Double extended.
   1030	 */
   1031	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
   1032
   1033	/* 
   1034	 * Four bits at a time are inspected in each loop, and a 
   1035	 * simple shift and add multiply algorithm is used. 
   1036	 */ 
   1037	for (count = DBL_P-1; count >= 0; count -= 4) {
   1038		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
   1039		if (Dbit28p2(opnd1p2)) {
   1040	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
   1041			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
   1042			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
   1043		}
   1044		if (Dbit29p2(opnd1p2)) {
   1045			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
   1046			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
   1047		}
   1048		if (Dbit30p2(opnd1p2)) {
   1049			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
   1050			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
   1051		}
   1052		if (Dbit31p2(opnd1p2)) {
   1053			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
   1054			 opnd2p1, opnd2p2, 0, 0);
   1055		}
   1056		Dbl_rightshiftby4(opnd1p1,opnd1p2);
   1057	}
   1058	if (Is_dexthiddenoverflow(tmpresp1)) {
   1059		/* result mantissa >= 2 (mantissa overflow) */
   1060		mpy_exponent++;
   1061		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
   1062	}
   1063
   1064	/*
   1065	 * Restore the sign of the mpy result which was saved in resultp1.
   1066	 * The exponent will continue to be kept in mpy_exponent.
   1067	 */
   1068	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
   1069
   1070	/* 
   1071	 * No rounding is required, since the result of the multiply
   1072	 * is exact in the extended format.
   1073	 */
   1074
   1075	/*
   1076	 * Now we are ready to perform the add portion of the operation.
   1077	 *
   1078	 * The exponents need to be kept as integers for now, since the
   1079	 * multiply result might not fit into the exponent field.  We
   1080	 * can't overflow or underflow because of this yet, since the
   1081	 * add could bring the final result back into range.
   1082	 */
   1083	add_exponent = Dbl_exponent(opnd3p1);
   1084
   1085	/*
   1086	 * Check for denormalized or zero add operand.
   1087	 */
   1088	if (add_exponent == 0) {
   1089		/* check for zero */
   1090		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
   1091			/* right is zero */
   1092			/* Left can't be zero and must be result.
   1093			 *
   1094			 * The final result is now in tmpres and mpy_exponent,
   1095			 * and needs to be rounded and squeezed back into
   1096			 * double precision format from double extended.
   1097			 */
   1098			result_exponent = mpy_exponent;
   1099			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
   1100				resultp1,resultp2,resultp3,resultp4);
   1101			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
   1102			goto round;
   1103		}
   1104
   1105		/* 
   1106		 * Neither are zeroes.  
   1107		 * Adjust exponent and normalize add operand.
   1108		 */
   1109		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
   1110		Dbl_clear_signexponent(opnd3p1);
   1111		Dbl_leftshiftby1(opnd3p1,opnd3p2);
   1112		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
   1113		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
   1114	} else {
   1115		Dbl_clear_exponent_set_hidden(opnd3p1);
   1116	}
   1117	/*
   1118	 * Copy opnd3 to the double extended variable called right.
   1119	 */
   1120	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
   1121
   1122	/*
   1123	 * A zero "save" helps discover equal operands (for later),
   1124	 * and is used in swapping operands (if needed).
   1125	 */
   1126	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
   1127
   1128	/*
   1129	 * Compare magnitude of operands.
   1130	 */
   1131	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
   1132	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
   1133	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
   1134	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
   1135		/*
   1136		 * Set the left operand to the larger one by XOR swap.
   1137		 * First finish the first word "save".
   1138		 */
   1139		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
   1140		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
   1141		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
   1142			rightp2,rightp3,rightp4);
   1143		/* also setup exponents used in rest of routine */
   1144		diff_exponent = add_exponent - mpy_exponent;
   1145		result_exponent = add_exponent;
   1146	} else {
   1147		/* also setup exponents used in rest of routine */
   1148		diff_exponent = mpy_exponent - add_exponent;
   1149		result_exponent = mpy_exponent;
   1150	}
   1151	/* Invariant: left is not smaller than right. */
   1152
   1153	/*
   1154	 * Special case alignment of operands that would force alignment
   1155	 * beyond the extent of the extension.  A further optimization
   1156	 * could special case this but only reduces the path length for
   1157	 * this infrequent case.
   1158	 */
   1159	if (diff_exponent > DBLEXT_THRESHOLD) {
   1160		diff_exponent = DBLEXT_THRESHOLD;
   1161	}
   1162
   1163	/* Align right operand by shifting it to the right */
   1164	Dblext_clear_sign(rightp1);
   1165	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
   1166		/*shifted by*/diff_exponent);
   1167	
   1168	/* Treat sum and difference of the operands separately. */
   1169	if ((int)save < 0) {
   1170		/*
   1171		 * Difference of the two operands.  Overflow can occur if the
   1172		 * multiply overflowed.  A borrow can occur out of the hidden
   1173		 * bit and force a post normalization phase.
   1174		 */
   1175		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
   1176			rightp1,rightp2,rightp3,rightp4,
   1177			resultp1,resultp2,resultp3,resultp4);
   1178		sign_save = Dbl_signextendedsign(resultp1);
   1179		if (Dbl_iszero_hidden(resultp1)) {
   1180			/* Handle normalization */
   1181		/* A straightforward algorithm would now shift the
   1182		 * result and extension left until the hidden bit
   1183		 * becomes one.  Not all of the extension bits need
   1184		 * participate in the shift.  Only the two most 
   1185		 * significant bits (round and guard) are needed.
   1186		 * If only a single shift is needed then the guard
   1187		 * bit becomes a significant low order bit and the
   1188		 * extension must participate in the rounding.
   1189		 * If more than a single shift is needed, then all
   1190		 * bits to the right of the guard bit are zeros, 
   1191		 * and the guard bit may or may not be zero. */
   1192			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
   1193				resultp4);
   1194
   1195			/* Need to check for a zero result.  The sign and
   1196			 * exponent fields have already been zeroed.  The more
   1197			 * efficient test of the full object can be used.
   1198			 */
   1199			 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
   1200				/* Must have been "x-x" or "x+(-x)". */
   1201				if (Is_rounding_mode(ROUNDMINUS))
   1202					Dbl_setone_sign(resultp1);
   1203				Dbl_copytoptr(resultp1,resultp2,dstptr);
   1204				return(NOEXCEPTION);
   1205			}
   1206			result_exponent--;
   1207
   1208			/* Look to see if normalization is finished. */
   1209			if (Dbl_isone_hidden(resultp1)) {
   1210				/* No further normalization is needed */
   1211				goto round;
   1212			}
   1213
   1214			/* Discover first one bit to determine shift amount.
   1215			 * Use a modified binary search.  We have already
   1216			 * shifted the result one position right and still
   1217			 * not found a one so the remainder of the extension
   1218			 * must be zero and simplifies rounding. */
   1219			/* Scan bytes */
   1220			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
   1221				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
   1222				result_exponent -= 8;
   1223			}
   1224			/* Now narrow it down to the nibble */
   1225			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
   1226				/* The lower nibble contains the
   1227				 * normalizing one */
   1228				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
   1229				result_exponent -= 4;
   1230			}
   1231			/* Select case where first bit is set (already
   1232			 * normalized) otherwise select the proper shift. */
   1233			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
   1234			if (jumpsize <= 7) switch(jumpsize) {
   1235			case 1:
   1236				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
   1237					resultp4);
   1238				result_exponent -= 3;
   1239				break;
   1240			case 2:
   1241			case 3:
   1242				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
   1243					resultp4);
   1244				result_exponent -= 2;
   1245				break;
   1246			case 4:
   1247			case 5:
   1248			case 6:
   1249			case 7:
   1250				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
   1251					resultp4);
   1252				result_exponent -= 1;
   1253				break;
   1254			}
   1255		} /* end if (hidden...)... */
   1256	/* Fall through and round */
   1257	} /* end if (save < 0)... */
   1258	else {
   1259		/* Add magnitudes */
   1260		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
   1261			rightp1,rightp2,rightp3,rightp4,
   1262			/*to*/resultp1,resultp2,resultp3,resultp4);
   1263		sign_save = Dbl_signextendedsign(resultp1);
   1264		if (Dbl_isone_hiddenoverflow(resultp1)) {
   1265	    		/* Prenormalization required. */
   1266	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
   1267				resultp4);
   1268	    		result_exponent++;
   1269		} /* end if hiddenoverflow... */
   1270	} /* end else ...add magnitudes... */
   1271
   1272	/* Round the result.  If the extension and lower two words are
   1273	 * all zeros, then the result is exact.  Otherwise round in the
   1274	 * correct direction.  Underflow is possible. If a postnormalization
   1275	 * is necessary, then the mantissa is all zeros so no shift is needed.
   1276	 */
   1277  round:
   1278	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
   1279		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
   1280			result_exponent,is_tiny);
   1281	}
   1282	Dbl_set_sign(resultp1,/*using*/sign_save);
   1283	if (Dblext_isnotzero_mantissap3(resultp3) || 
   1284	    Dblext_isnotzero_mantissap4(resultp4)) {
   1285		inexact = TRUE;
   1286		switch(Rounding_mode()) {
   1287		case ROUNDNEAREST: /* The default. */
   1288			if (Dblext_isone_highp3(resultp3)) {
   1289				/* at least 1/2 ulp */
   1290				if (Dblext_isnotzero_low31p3(resultp3) ||
   1291				    Dblext_isnotzero_mantissap4(resultp4) ||
   1292				    Dblext_isone_lowp2(resultp2)) {
   1293					/* either exactly half way and odd or
   1294					 * more than 1/2ulp */
   1295					Dbl_increment(resultp1,resultp2);
   1296				}
   1297			}
   1298	    		break;
   1299
   1300		case ROUNDPLUS:
   1301	    		if (Dbl_iszero_sign(resultp1)) {
   1302				/* Round up positive results */
   1303				Dbl_increment(resultp1,resultp2);
   1304			}
   1305			break;
   1306	    
   1307		case ROUNDMINUS:
   1308	    		if (Dbl_isone_sign(resultp1)) {
   1309				/* Round down negative results */
   1310				Dbl_increment(resultp1,resultp2);
   1311			}
   1312	    
   1313		case ROUNDZERO:;
   1314			/* truncate is simple */
   1315		} /* end switch... */
   1316		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
   1317	}
   1318	if (result_exponent >= DBL_INFINITY_EXPONENT) {
   1319		/* Overflow */
   1320		if (Is_overflowtrap_enabled()) {
   1321                        /*
   1322                         * Adjust bias of result
   1323                         */
   1324                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
   1325                        Dbl_copytoptr(resultp1,resultp2,dstptr);
   1326                        if (inexact)
   1327                            if (Is_inexacttrap_enabled())
   1328                                return (OPC_2E_OVERFLOWEXCEPTION |
   1329					OPC_2E_INEXACTEXCEPTION);
   1330                            else Set_inexactflag();
   1331                        return (OPC_2E_OVERFLOWEXCEPTION);
   1332		}
   1333		inexact = TRUE;
   1334		Set_overflowflag();
   1335		Dbl_setoverflow(resultp1,resultp2);
   1336	} else if (result_exponent <= 0) {	/* underflow case */
   1337		if (Is_underflowtrap_enabled()) {
   1338                        /*
   1339                         * Adjust bias of result
   1340                         */
   1341                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
   1342			Dbl_copytoptr(resultp1,resultp2,dstptr);
   1343                        if (inexact)
   1344                            if (Is_inexacttrap_enabled())
   1345                                return (OPC_2E_UNDERFLOWEXCEPTION |
   1346					OPC_2E_INEXACTEXCEPTION);
   1347                            else Set_inexactflag();
   1348	    		return(OPC_2E_UNDERFLOWEXCEPTION);
   1349		}
   1350		else if (inexact && is_tiny) Set_underflowflag();
   1351	}
   1352	else Dbl_set_exponent(resultp1,result_exponent);
   1353	Dbl_copytoptr(resultp1,resultp2,dstptr);
   1354	if (inexact) 
   1355		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
   1356		else Set_inexactflag();
   1357    	return(NOEXCEPTION);
   1358}
   1359
   1360/*
   1361 *  Single Floating-point Multiply Fused Add
   1362 */
   1363
   1364sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
   1365
   1366sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
   1367unsigned int *status;
   1368{
   1369	unsigned int opnd1, opnd2, opnd3;
   1370	register unsigned int tmpresp1, tmpresp2;
   1371	unsigned int rightp1, rightp2;
   1372	unsigned int resultp1, resultp2 = 0;
   1373	register int mpy_exponent, add_exponent, count;
   1374	boolean inexact = FALSE, is_tiny = FALSE;
   1375
   1376	unsigned int signlessleft1, signlessright1, save;
   1377	register int result_exponent, diff_exponent;
   1378	int sign_save, jumpsize;
   1379	
   1380	Sgl_copyfromptr(src1ptr,opnd1);
   1381	Sgl_copyfromptr(src2ptr,opnd2);
   1382	Sgl_copyfromptr(src3ptr,opnd3);
   1383
   1384	/* 
   1385	 * set sign bit of result of multiply
   1386	 */
   1387	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
   1388		Sgl_setnegativezero(resultp1); 
   1389	else Sgl_setzero(resultp1);
   1390
   1391	/*
   1392	 * Generate multiply exponent 
   1393	 */
   1394	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
   1395
   1396	/*
   1397	 * check first operand for NaN's or infinity
   1398	 */
   1399	if (Sgl_isinfinity_exponent(opnd1)) {
   1400		if (Sgl_iszero_mantissa(opnd1)) {
   1401			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
   1402				if (Sgl_iszero_exponentmantissa(opnd2)) {
   1403					/* 
   1404					 * invalid since operands are infinity 
   1405					 * and zero 
   1406					 */
   1407					if (Is_invalidtrap_enabled())
   1408						return(OPC_2E_INVALIDEXCEPTION);
   1409					Set_invalidflag();
   1410					Sgl_makequietnan(resultp1);
   1411					Sgl_copytoptr(resultp1,dstptr);
   1412					return(NOEXCEPTION);
   1413				}
   1414				/*
   1415				 * Check third operand for infinity with a
   1416				 *  sign opposite of the multiply result
   1417				 */
   1418				if (Sgl_isinfinity(opnd3) &&
   1419				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
   1420					/* 
   1421					 * invalid since attempting a magnitude
   1422					 * subtraction of infinities
   1423					 */
   1424					if (Is_invalidtrap_enabled())
   1425						return(OPC_2E_INVALIDEXCEPTION);
   1426					Set_invalidflag();
   1427					Sgl_makequietnan(resultp1);
   1428					Sgl_copytoptr(resultp1,dstptr);
   1429					return(NOEXCEPTION);
   1430				}
   1431
   1432				/*
   1433			 	 * return infinity
   1434			 	 */
   1435				Sgl_setinfinity_exponentmantissa(resultp1);
   1436				Sgl_copytoptr(resultp1,dstptr);
   1437				return(NOEXCEPTION);
   1438			}
   1439		}
   1440		else {
   1441			/*
   1442		 	 * is NaN; signaling or quiet?
   1443		 	 */
   1444			if (Sgl_isone_signaling(opnd1)) {
   1445				/* trap if INVALIDTRAP enabled */
   1446				if (Is_invalidtrap_enabled()) 
   1447			    		return(OPC_2E_INVALIDEXCEPTION);
   1448				/* make NaN quiet */
   1449				Set_invalidflag();
   1450				Sgl_set_quiet(opnd1);
   1451			}
   1452			/* 
   1453			 * is second operand a signaling NaN? 
   1454			 */
   1455			else if (Sgl_is_signalingnan(opnd2)) {
   1456				/* trap if INVALIDTRAP enabled */
   1457				if (Is_invalidtrap_enabled())
   1458			    		return(OPC_2E_INVALIDEXCEPTION);
   1459				/* make NaN quiet */
   1460				Set_invalidflag();
   1461				Sgl_set_quiet(opnd2);
   1462				Sgl_copytoptr(opnd2,dstptr);
   1463				return(NOEXCEPTION);
   1464			}
   1465			/* 
   1466			 * is third operand a signaling NaN? 
   1467			 */
   1468			else if (Sgl_is_signalingnan(opnd3)) {
   1469				/* trap if INVALIDTRAP enabled */
   1470				if (Is_invalidtrap_enabled())
   1471			    		return(OPC_2E_INVALIDEXCEPTION);
   1472				/* make NaN quiet */
   1473				Set_invalidflag();
   1474				Sgl_set_quiet(opnd3);
   1475				Sgl_copytoptr(opnd3,dstptr);
   1476				return(NOEXCEPTION);
   1477			}
   1478			/*
   1479		 	 * return quiet NaN
   1480		 	 */
   1481			Sgl_copytoptr(opnd1,dstptr);
   1482			return(NOEXCEPTION);
   1483		}
   1484	}
   1485
   1486	/*
   1487	 * check second operand for NaN's or infinity
   1488	 */
   1489	if (Sgl_isinfinity_exponent(opnd2)) {
   1490		if (Sgl_iszero_mantissa(opnd2)) {
   1491			if (Sgl_isnotnan(opnd3)) {
   1492				if (Sgl_iszero_exponentmantissa(opnd1)) {
   1493					/* 
   1494					 * invalid since multiply operands are
   1495					 * zero & infinity
   1496					 */
   1497					if (Is_invalidtrap_enabled())
   1498						return(OPC_2E_INVALIDEXCEPTION);
   1499					Set_invalidflag();
   1500					Sgl_makequietnan(opnd2);
   1501					Sgl_copytoptr(opnd2,dstptr);
   1502					return(NOEXCEPTION);
   1503				}
   1504
   1505				/*
   1506				 * Check third operand for infinity with a
   1507				 *  sign opposite of the multiply result
   1508				 */
   1509				if (Sgl_isinfinity(opnd3) &&
   1510				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
   1511					/* 
   1512					 * invalid since attempting a magnitude
   1513					 * subtraction of infinities
   1514					 */
   1515					if (Is_invalidtrap_enabled())
   1516				       		return(OPC_2E_INVALIDEXCEPTION);
   1517				       	Set_invalidflag();
   1518				       	Sgl_makequietnan(resultp1);
   1519					Sgl_copytoptr(resultp1,dstptr);
   1520					return(NOEXCEPTION);
   1521				}
   1522
   1523				/*
   1524				 * return infinity
   1525				 */
   1526				Sgl_setinfinity_exponentmantissa(resultp1);
   1527				Sgl_copytoptr(resultp1,dstptr);
   1528				return(NOEXCEPTION);
   1529			}
   1530		}
   1531		else {
   1532			/*
   1533			 * is NaN; signaling or quiet?
   1534			 */
   1535			if (Sgl_isone_signaling(opnd2)) {
   1536				/* trap if INVALIDTRAP enabled */
   1537				if (Is_invalidtrap_enabled())
   1538					return(OPC_2E_INVALIDEXCEPTION);
   1539				/* make NaN quiet */
   1540				Set_invalidflag();
   1541				Sgl_set_quiet(opnd2);
   1542			}
   1543			/* 
   1544			 * is third operand a signaling NaN? 
   1545			 */
   1546			else if (Sgl_is_signalingnan(opnd3)) {
   1547			       	/* trap if INVALIDTRAP enabled */
   1548			       	if (Is_invalidtrap_enabled())
   1549				   		return(OPC_2E_INVALIDEXCEPTION);
   1550			       	/* make NaN quiet */
   1551			       	Set_invalidflag();
   1552			       	Sgl_set_quiet(opnd3);
   1553				Sgl_copytoptr(opnd3,dstptr);
   1554		       		return(NOEXCEPTION);
   1555			}
   1556			/*
   1557			 * return quiet NaN
   1558			 */
   1559			Sgl_copytoptr(opnd2,dstptr);
   1560			return(NOEXCEPTION);
   1561		}
   1562	}
   1563
   1564	/*
   1565	 * check third operand for NaN's or infinity
   1566	 */
   1567	if (Sgl_isinfinity_exponent(opnd3)) {
   1568		if (Sgl_iszero_mantissa(opnd3)) {
   1569			/* return infinity */
   1570			Sgl_copytoptr(opnd3,dstptr);
   1571			return(NOEXCEPTION);
   1572		} else {
   1573			/*
   1574			 * is NaN; signaling or quiet?
   1575			 */
   1576			if (Sgl_isone_signaling(opnd3)) {
   1577				/* trap if INVALIDTRAP enabled */
   1578				if (Is_invalidtrap_enabled())
   1579					return(OPC_2E_INVALIDEXCEPTION);
   1580				/* make NaN quiet */
   1581				Set_invalidflag();
   1582				Sgl_set_quiet(opnd3);
   1583			}
   1584			/*
   1585			 * return quiet NaN
   1586 			 */
   1587			Sgl_copytoptr(opnd3,dstptr);
   1588			return(NOEXCEPTION);
   1589		}
   1590    	}
   1591
   1592	/*
   1593	 * Generate multiply mantissa
   1594	 */
   1595	if (Sgl_isnotzero_exponent(opnd1)) {
   1596		/* set hidden bit */
   1597		Sgl_clear_signexponent_set_hidden(opnd1);
   1598	}
   1599	else {
   1600		/* check for zero */
   1601		if (Sgl_iszero_mantissa(opnd1)) {
   1602			/*
   1603			 * Perform the add opnd3 with zero here.
   1604			 */
   1605			if (Sgl_iszero_exponentmantissa(opnd3)) {
   1606				if (Is_rounding_mode(ROUNDMINUS)) {
   1607					Sgl_or_signs(opnd3,resultp1);
   1608				} else {
   1609					Sgl_and_signs(opnd3,resultp1);
   1610				}
   1611			}
   1612			/*
   1613			 * Now let's check for trapped underflow case.
   1614			 */
   1615			else if (Sgl_iszero_exponent(opnd3) &&
   1616			         Is_underflowtrap_enabled()) {
   1617                    		/* need to normalize results mantissa */
   1618                    		sign_save = Sgl_signextendedsign(opnd3);
   1619				result_exponent = 0;
   1620                    		Sgl_leftshiftby1(opnd3);
   1621                    		Sgl_normalize(opnd3,result_exponent);
   1622                    		Sgl_set_sign(opnd3,/*using*/sign_save);
   1623                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
   1624							unfl);
   1625                    		Sgl_copytoptr(opnd3,dstptr);
   1626                    		/* inexact = FALSE */
   1627                    		return(OPC_2E_UNDERFLOWEXCEPTION);
   1628			}
   1629			Sgl_copytoptr(opnd3,dstptr);
   1630			return(NOEXCEPTION);
   1631		}
   1632		/* is denormalized, adjust exponent */
   1633		Sgl_clear_signexponent(opnd1);
   1634		Sgl_leftshiftby1(opnd1);
   1635		Sgl_normalize(opnd1,mpy_exponent);
   1636	}
   1637	/* opnd2 needs to have hidden bit set with msb in hidden bit */
   1638	if (Sgl_isnotzero_exponent(opnd2)) {
   1639		Sgl_clear_signexponent_set_hidden(opnd2);
   1640	}
   1641	else {
   1642		/* check for zero */
   1643		if (Sgl_iszero_mantissa(opnd2)) {
   1644			/*
   1645			 * Perform the add opnd3 with zero here.
   1646			 */
   1647			if (Sgl_iszero_exponentmantissa(opnd3)) {
   1648				if (Is_rounding_mode(ROUNDMINUS)) {
   1649					Sgl_or_signs(opnd3,resultp1);
   1650				} else {
   1651					Sgl_and_signs(opnd3,resultp1);
   1652				}
   1653			}
   1654			/*
   1655			 * Now let's check for trapped underflow case.
   1656			 */
   1657			else if (Sgl_iszero_exponent(opnd3) &&
   1658			    Is_underflowtrap_enabled()) {
   1659                    		/* need to normalize results mantissa */
   1660                    		sign_save = Sgl_signextendedsign(opnd3);
   1661				result_exponent = 0;
   1662                    		Sgl_leftshiftby1(opnd3);
   1663                    		Sgl_normalize(opnd3,result_exponent);
   1664                    		Sgl_set_sign(opnd3,/*using*/sign_save);
   1665                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
   1666							unfl);
   1667                    		Sgl_copytoptr(opnd3,dstptr);
   1668                    		/* inexact = FALSE */
   1669                    		return(OPC_2E_UNDERFLOWEXCEPTION);
   1670			}
   1671			Sgl_copytoptr(opnd3,dstptr);
   1672			return(NOEXCEPTION);
   1673		}
   1674		/* is denormalized; want to normalize */
   1675		Sgl_clear_signexponent(opnd2);
   1676		Sgl_leftshiftby1(opnd2);
   1677		Sgl_normalize(opnd2,mpy_exponent);
   1678	}
   1679
   1680	/* Multiply the first two source mantissas together */
   1681
   1682	/* 
   1683	 * The intermediate result will be kept in tmpres,
   1684	 * which needs enough room for 106 bits of mantissa,
   1685	 * so lets call it a Double extended.
   1686	 */
   1687	Sglext_setzero(tmpresp1,tmpresp2);
   1688
   1689	/* 
   1690	 * Four bits at a time are inspected in each loop, and a 
   1691	 * simple shift and add multiply algorithm is used. 
   1692	 */ 
   1693	for (count = SGL_P-1; count >= 0; count -= 4) {
   1694		Sglext_rightshiftby4(tmpresp1,tmpresp2);
   1695		if (Sbit28(opnd1)) {
   1696	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
   1697			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
   1698		}
   1699		if (Sbit29(opnd1)) {
   1700			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
   1701		}
   1702		if (Sbit30(opnd1)) {
   1703			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
   1704		}
   1705		if (Sbit31(opnd1)) {
   1706			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
   1707		}
   1708		Sgl_rightshiftby4(opnd1);
   1709	}
   1710	if (Is_sexthiddenoverflow(tmpresp1)) {
   1711		/* result mantissa >= 2 (mantissa overflow) */
   1712		mpy_exponent++;
   1713		Sglext_rightshiftby4(tmpresp1,tmpresp2);
   1714	} else {
   1715		Sglext_rightshiftby3(tmpresp1,tmpresp2);
   1716	}
   1717
   1718	/*
   1719	 * Restore the sign of the mpy result which was saved in resultp1.
   1720	 * The exponent will continue to be kept in mpy_exponent.
   1721	 */
   1722	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
   1723
   1724	/* 
   1725	 * No rounding is required, since the result of the multiply
   1726	 * is exact in the extended format.
   1727	 */
   1728
   1729	/*
   1730	 * Now we are ready to perform the add portion of the operation.
   1731	 *
   1732	 * The exponents need to be kept as integers for now, since the
   1733	 * multiply result might not fit into the exponent field.  We
   1734	 * can't overflow or underflow because of this yet, since the
   1735	 * add could bring the final result back into range.
   1736	 */
   1737	add_exponent = Sgl_exponent(opnd3);
   1738
   1739	/*
   1740	 * Check for denormalized or zero add operand.
   1741	 */
   1742	if (add_exponent == 0) {
   1743		/* check for zero */
   1744		if (Sgl_iszero_mantissa(opnd3)) {
   1745			/* right is zero */
   1746			/* Left can't be zero and must be result.
   1747			 *
   1748			 * The final result is now in tmpres and mpy_exponent,
   1749			 * and needs to be rounded and squeezed back into
   1750			 * double precision format from double extended.
   1751			 */
   1752			result_exponent = mpy_exponent;
   1753			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
   1754			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
   1755			goto round;
   1756		}
   1757
   1758		/* 
   1759		 * Neither are zeroes.  
   1760		 * Adjust exponent and normalize add operand.
   1761		 */
   1762		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
   1763		Sgl_clear_signexponent(opnd3);
   1764		Sgl_leftshiftby1(opnd3);
   1765		Sgl_normalize(opnd3,add_exponent);
   1766		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
   1767	} else {
   1768		Sgl_clear_exponent_set_hidden(opnd3);
   1769	}
   1770	/*
   1771	 * Copy opnd3 to the double extended variable called right.
   1772	 */
   1773	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
   1774
   1775	/*
   1776	 * A zero "save" helps discover equal operands (for later),
   1777	 * and is used in swapping operands (if needed).
   1778	 */
   1779	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
   1780
   1781	/*
   1782	 * Compare magnitude of operands.
   1783	 */
   1784	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
   1785	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
   1786	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
   1787	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
   1788		/*
   1789		 * Set the left operand to the larger one by XOR swap.
   1790		 * First finish the first word "save".
   1791		 */
   1792		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
   1793		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
   1794		Sglext_swap_lower(tmpresp2,rightp2);
   1795		/* also setup exponents used in rest of routine */
   1796		diff_exponent = add_exponent - mpy_exponent;
   1797		result_exponent = add_exponent;
   1798	} else {
   1799		/* also setup exponents used in rest of routine */
   1800		diff_exponent = mpy_exponent - add_exponent;
   1801		result_exponent = mpy_exponent;
   1802	}
   1803	/* Invariant: left is not smaller than right. */
   1804
   1805	/*
   1806	 * Special case alignment of operands that would force alignment
   1807	 * beyond the extent of the extension.  A further optimization
   1808	 * could special case this but only reduces the path length for
   1809	 * this infrequent case.
   1810	 */
   1811	if (diff_exponent > SGLEXT_THRESHOLD) {
   1812		diff_exponent = SGLEXT_THRESHOLD;
   1813	}
   1814
   1815	/* Align right operand by shifting it to the right */
   1816	Sglext_clear_sign(rightp1);
   1817	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
   1818	
   1819	/* Treat sum and difference of the operands separately. */
   1820	if ((int)save < 0) {
   1821		/*
   1822		 * Difference of the two operands.  Overflow can occur if the
   1823		 * multiply overflowed.  A borrow can occur out of the hidden
   1824		 * bit and force a post normalization phase.
   1825		 */
   1826		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
   1827			resultp1,resultp2);
   1828		sign_save = Sgl_signextendedsign(resultp1);
   1829		if (Sgl_iszero_hidden(resultp1)) {
   1830			/* Handle normalization */
   1831		/* A straightforward algorithm would now shift the
   1832		 * result and extension left until the hidden bit
   1833		 * becomes one.  Not all of the extension bits need
   1834		 * participate in the shift.  Only the two most 
   1835		 * significant bits (round and guard) are needed.
   1836		 * If only a single shift is needed then the guard
   1837		 * bit becomes a significant low order bit and the
   1838		 * extension must participate in the rounding.
   1839		 * If more than a single shift is needed, then all
   1840		 * bits to the right of the guard bit are zeros, 
   1841		 * and the guard bit may or may not be zero. */
   1842			Sglext_leftshiftby1(resultp1,resultp2);
   1843
   1844			/* Need to check for a zero result.  The sign and
   1845			 * exponent fields have already been zeroed.  The more
   1846			 * efficient test of the full object can be used.
   1847			 */
   1848			 if (Sglext_iszero(resultp1,resultp2)) {
   1849				/* Must have been "x-x" or "x+(-x)". */
   1850				if (Is_rounding_mode(ROUNDMINUS))
   1851					Sgl_setone_sign(resultp1);
   1852				Sgl_copytoptr(resultp1,dstptr);
   1853				return(NOEXCEPTION);
   1854			}
   1855			result_exponent--;
   1856
   1857			/* Look to see if normalization is finished. */
   1858			if (Sgl_isone_hidden(resultp1)) {
   1859				/* No further normalization is needed */
   1860				goto round;
   1861			}
   1862
   1863			/* Discover first one bit to determine shift amount.
   1864			 * Use a modified binary search.  We have already
   1865			 * shifted the result one position right and still
   1866			 * not found a one so the remainder of the extension
   1867			 * must be zero and simplifies rounding. */
   1868			/* Scan bytes */
   1869			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
   1870				Sglext_leftshiftby8(resultp1,resultp2);
   1871				result_exponent -= 8;
   1872			}
   1873			/* Now narrow it down to the nibble */
   1874			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
   1875				/* The lower nibble contains the
   1876				 * normalizing one */
   1877				Sglext_leftshiftby4(resultp1,resultp2);
   1878				result_exponent -= 4;
   1879			}
   1880			/* Select case where first bit is set (already
   1881			 * normalized) otherwise select the proper shift. */
   1882			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
   1883			if (jumpsize <= 7) switch(jumpsize) {
   1884			case 1:
   1885				Sglext_leftshiftby3(resultp1,resultp2);
   1886				result_exponent -= 3;
   1887				break;
   1888			case 2:
   1889			case 3:
   1890				Sglext_leftshiftby2(resultp1,resultp2);
   1891				result_exponent -= 2;
   1892				break;
   1893			case 4:
   1894			case 5:
   1895			case 6:
   1896			case 7:
   1897				Sglext_leftshiftby1(resultp1,resultp2);
   1898				result_exponent -= 1;
   1899				break;
   1900			}
   1901		} /* end if (hidden...)... */
   1902	/* Fall through and round */
   1903	} /* end if (save < 0)... */
   1904	else {
   1905		/* Add magnitudes */
   1906		Sglext_addition(tmpresp1,tmpresp2,
   1907			rightp1,rightp2, /*to*/resultp1,resultp2);
   1908		sign_save = Sgl_signextendedsign(resultp1);
   1909		if (Sgl_isone_hiddenoverflow(resultp1)) {
   1910	    		/* Prenormalization required. */
   1911	    		Sglext_arithrightshiftby1(resultp1,resultp2);
   1912	    		result_exponent++;
   1913		} /* end if hiddenoverflow... */
   1914	} /* end else ...add magnitudes... */
   1915
   1916	/* Round the result.  If the extension and lower two words are
   1917	 * all zeros, then the result is exact.  Otherwise round in the
   1918	 * correct direction.  Underflow is possible. If a postnormalization
   1919	 * is necessary, then the mantissa is all zeros so no shift is needed.
   1920	 */
   1921  round:
   1922	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
   1923		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
   1924	}
   1925	Sgl_set_sign(resultp1,/*using*/sign_save);
   1926	if (Sglext_isnotzero_mantissap2(resultp2)) {
   1927		inexact = TRUE;
   1928		switch(Rounding_mode()) {
   1929		case ROUNDNEAREST: /* The default. */
   1930			if (Sglext_isone_highp2(resultp2)) {
   1931				/* at least 1/2 ulp */
   1932				if (Sglext_isnotzero_low31p2(resultp2) ||
   1933				    Sglext_isone_lowp1(resultp1)) {
   1934					/* either exactly half way and odd or
   1935					 * more than 1/2ulp */
   1936					Sgl_increment(resultp1);
   1937				}
   1938			}
   1939	    		break;
   1940
   1941		case ROUNDPLUS:
   1942	    		if (Sgl_iszero_sign(resultp1)) {
   1943				/* Round up positive results */
   1944				Sgl_increment(resultp1);
   1945			}
   1946			break;
   1947	    
   1948		case ROUNDMINUS:
   1949	    		if (Sgl_isone_sign(resultp1)) {
   1950				/* Round down negative results */
   1951				Sgl_increment(resultp1);
   1952			}
   1953	    
   1954		case ROUNDZERO:;
   1955			/* truncate is simple */
   1956		} /* end switch... */
   1957		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
   1958	}
   1959	if (result_exponent >= SGL_INFINITY_EXPONENT) {
   1960		/* Overflow */
   1961		if (Is_overflowtrap_enabled()) {
   1962                        /*
   1963                         * Adjust bias of result
   1964                         */
   1965                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
   1966                        Sgl_copytoptr(resultp1,dstptr);
   1967                        if (inexact)
   1968                            if (Is_inexacttrap_enabled())
   1969                                return (OPC_2E_OVERFLOWEXCEPTION |
   1970					OPC_2E_INEXACTEXCEPTION);
   1971                            else Set_inexactflag();
   1972                        return (OPC_2E_OVERFLOWEXCEPTION);
   1973		}
   1974		inexact = TRUE;
   1975		Set_overflowflag();
   1976		Sgl_setoverflow(resultp1);
   1977	} else if (result_exponent <= 0) {	/* underflow case */
   1978		if (Is_underflowtrap_enabled()) {
   1979                        /*
   1980                         * Adjust bias of result
   1981                         */
   1982                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
   1983			Sgl_copytoptr(resultp1,dstptr);
   1984                        if (inexact)
   1985                            if (Is_inexacttrap_enabled())
   1986                                return (OPC_2E_UNDERFLOWEXCEPTION |
   1987					OPC_2E_INEXACTEXCEPTION);
   1988                            else Set_inexactflag();
   1989	    		return(OPC_2E_UNDERFLOWEXCEPTION);
   1990		}
   1991		else if (inexact && is_tiny) Set_underflowflag();
   1992	}
   1993	else Sgl_set_exponent(resultp1,result_exponent);
   1994	Sgl_copytoptr(resultp1,dstptr);
   1995	if (inexact) 
   1996		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
   1997		else Set_inexactflag();
   1998    	return(NOEXCEPTION);
   1999}
   2000
   2001/*
   2002 *  Single Floating-point Multiply Negate Fused Add
   2003 */
   2004
   2005sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
   2006
   2007sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
   2008unsigned int *status;
   2009{
   2010	unsigned int opnd1, opnd2, opnd3;
   2011	register unsigned int tmpresp1, tmpresp2;
   2012	unsigned int rightp1, rightp2;
   2013	unsigned int resultp1, resultp2 = 0;
   2014	register int mpy_exponent, add_exponent, count;
   2015	boolean inexact = FALSE, is_tiny = FALSE;
   2016
   2017	unsigned int signlessleft1, signlessright1, save;
   2018	register int result_exponent, diff_exponent;
   2019	int sign_save, jumpsize;
   2020	
   2021	Sgl_copyfromptr(src1ptr,opnd1);
   2022	Sgl_copyfromptr(src2ptr,opnd2);
   2023	Sgl_copyfromptr(src3ptr,opnd3);
   2024
   2025	/* 
   2026	 * set sign bit of result of multiply
   2027	 */
   2028	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
   2029		Sgl_setzero(resultp1);
   2030	else 
   2031		Sgl_setnegativezero(resultp1); 
   2032
   2033	/*
   2034	 * Generate multiply exponent 
   2035	 */
   2036	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
   2037
   2038	/*
   2039	 * check first operand for NaN's or infinity
   2040	 */
   2041	if (Sgl_isinfinity_exponent(opnd1)) {
   2042		if (Sgl_iszero_mantissa(opnd1)) {
   2043			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
   2044				if (Sgl_iszero_exponentmantissa(opnd2)) {
   2045					/* 
   2046					 * invalid since operands are infinity 
   2047					 * and zero 
   2048					 */
   2049					if (Is_invalidtrap_enabled())
   2050						return(OPC_2E_INVALIDEXCEPTION);
   2051					Set_invalidflag();
   2052					Sgl_makequietnan(resultp1);
   2053					Sgl_copytoptr(resultp1,dstptr);
   2054					return(NOEXCEPTION);
   2055				}
   2056				/*
   2057				 * Check third operand for infinity with a
   2058				 *  sign opposite of the multiply result
   2059				 */
   2060				if (Sgl_isinfinity(opnd3) &&
   2061				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
   2062					/* 
   2063					 * invalid since attempting a magnitude
   2064					 * subtraction of infinities
   2065					 */
   2066					if (Is_invalidtrap_enabled())
   2067						return(OPC_2E_INVALIDEXCEPTION);
   2068					Set_invalidflag();
   2069					Sgl_makequietnan(resultp1);
   2070					Sgl_copytoptr(resultp1,dstptr);
   2071					return(NOEXCEPTION);
   2072				}
   2073
   2074				/*
   2075			 	 * return infinity
   2076			 	 */
   2077				Sgl_setinfinity_exponentmantissa(resultp1);
   2078				Sgl_copytoptr(resultp1,dstptr);
   2079				return(NOEXCEPTION);
   2080			}
   2081		}
   2082		else {
   2083			/*
   2084		 	 * is NaN; signaling or quiet?
   2085		 	 */
   2086			if (Sgl_isone_signaling(opnd1)) {
   2087				/* trap if INVALIDTRAP enabled */
   2088				if (Is_invalidtrap_enabled()) 
   2089			    		return(OPC_2E_INVALIDEXCEPTION);
   2090				/* make NaN quiet */
   2091				Set_invalidflag();
   2092				Sgl_set_quiet(opnd1);
   2093			}
   2094			/* 
   2095			 * is second operand a signaling NaN? 
   2096			 */
   2097			else if (Sgl_is_signalingnan(opnd2)) {
   2098				/* trap if INVALIDTRAP enabled */
   2099				if (Is_invalidtrap_enabled())
   2100			    		return(OPC_2E_INVALIDEXCEPTION);
   2101				/* make NaN quiet */
   2102				Set_invalidflag();
   2103				Sgl_set_quiet(opnd2);
   2104				Sgl_copytoptr(opnd2,dstptr);
   2105				return(NOEXCEPTION);
   2106			}
   2107			/* 
   2108			 * is third operand a signaling NaN? 
   2109			 */
   2110			else if (Sgl_is_signalingnan(opnd3)) {
   2111				/* trap if INVALIDTRAP enabled */
   2112				if (Is_invalidtrap_enabled())
   2113			    		return(OPC_2E_INVALIDEXCEPTION);
   2114				/* make NaN quiet */
   2115				Set_invalidflag();
   2116				Sgl_set_quiet(opnd3);
   2117				Sgl_copytoptr(opnd3,dstptr);
   2118				return(NOEXCEPTION);
   2119			}
   2120			/*
   2121		 	 * return quiet NaN
   2122		 	 */
   2123			Sgl_copytoptr(opnd1,dstptr);
   2124			return(NOEXCEPTION);
   2125		}
   2126	}
   2127
   2128	/*
   2129	 * check second operand for NaN's or infinity
   2130	 */
   2131	if (Sgl_isinfinity_exponent(opnd2)) {
   2132		if (Sgl_iszero_mantissa(opnd2)) {
   2133			if (Sgl_isnotnan(opnd3)) {
   2134				if (Sgl_iszero_exponentmantissa(opnd1)) {
   2135					/* 
   2136					 * invalid since multiply operands are
   2137					 * zero & infinity
   2138					 */
   2139					if (Is_invalidtrap_enabled())
   2140						return(OPC_2E_INVALIDEXCEPTION);
   2141					Set_invalidflag();
   2142					Sgl_makequietnan(opnd2);
   2143					Sgl_copytoptr(opnd2,dstptr);
   2144					return(NOEXCEPTION);
   2145				}
   2146
   2147				/*
   2148				 * Check third operand for infinity with a
   2149				 *  sign opposite of the multiply result
   2150				 */
   2151				if (Sgl_isinfinity(opnd3) &&
   2152				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
   2153					/* 
   2154					 * invalid since attempting a magnitude
   2155					 * subtraction of infinities
   2156					 */
   2157					if (Is_invalidtrap_enabled())
   2158				       		return(OPC_2E_INVALIDEXCEPTION);
   2159				       	Set_invalidflag();
   2160				       	Sgl_makequietnan(resultp1);
   2161					Sgl_copytoptr(resultp1,dstptr);
   2162					return(NOEXCEPTION);
   2163				}
   2164
   2165				/*
   2166				 * return infinity
   2167				 */
   2168				Sgl_setinfinity_exponentmantissa(resultp1);
   2169				Sgl_copytoptr(resultp1,dstptr);
   2170				return(NOEXCEPTION);
   2171			}
   2172		}
   2173		else {
   2174			/*
   2175			 * is NaN; signaling or quiet?
   2176			 */
   2177			if (Sgl_isone_signaling(opnd2)) {
   2178				/* trap if INVALIDTRAP enabled */
   2179				if (Is_invalidtrap_enabled())
   2180					return(OPC_2E_INVALIDEXCEPTION);
   2181				/* make NaN quiet */
   2182				Set_invalidflag();
   2183				Sgl_set_quiet(opnd2);
   2184			}
   2185			/* 
   2186			 * is third operand a signaling NaN? 
   2187			 */
   2188			else if (Sgl_is_signalingnan(opnd3)) {
   2189			       	/* trap if INVALIDTRAP enabled */
   2190			       	if (Is_invalidtrap_enabled())
   2191				   		return(OPC_2E_INVALIDEXCEPTION);
   2192			       	/* make NaN quiet */
   2193			       	Set_invalidflag();
   2194			       	Sgl_set_quiet(opnd3);
   2195				Sgl_copytoptr(opnd3,dstptr);
   2196		       		return(NOEXCEPTION);
   2197			}
   2198			/*
   2199			 * return quiet NaN
   2200			 */
   2201			Sgl_copytoptr(opnd2,dstptr);
   2202			return(NOEXCEPTION);
   2203		}
   2204	}
   2205
   2206	/*
   2207	 * check third operand for NaN's or infinity
   2208	 */
   2209	if (Sgl_isinfinity_exponent(opnd3)) {
   2210		if (Sgl_iszero_mantissa(opnd3)) {
   2211			/* return infinity */
   2212			Sgl_copytoptr(opnd3,dstptr);
   2213			return(NOEXCEPTION);
   2214		} else {
   2215			/*
   2216			 * is NaN; signaling or quiet?
   2217			 */
   2218			if (Sgl_isone_signaling(opnd3)) {
   2219				/* trap if INVALIDTRAP enabled */
   2220				if (Is_invalidtrap_enabled())
   2221					return(OPC_2E_INVALIDEXCEPTION);
   2222				/* make NaN quiet */
   2223				Set_invalidflag();
   2224				Sgl_set_quiet(opnd3);
   2225			}
   2226			/*
   2227			 * return quiet NaN
   2228 			 */
   2229			Sgl_copytoptr(opnd3,dstptr);
   2230			return(NOEXCEPTION);
   2231		}
   2232    	}
   2233
   2234	/*
   2235	 * Generate multiply mantissa
   2236	 */
   2237	if (Sgl_isnotzero_exponent(opnd1)) {
   2238		/* set hidden bit */
   2239		Sgl_clear_signexponent_set_hidden(opnd1);
   2240	}
   2241	else {
   2242		/* check for zero */
   2243		if (Sgl_iszero_mantissa(opnd1)) {
   2244			/*
   2245			 * Perform the add opnd3 with zero here.
   2246			 */
   2247			if (Sgl_iszero_exponentmantissa(opnd3)) {
   2248				if (Is_rounding_mode(ROUNDMINUS)) {
   2249					Sgl_or_signs(opnd3,resultp1);
   2250				} else {
   2251					Sgl_and_signs(opnd3,resultp1);
   2252				}
   2253			}
   2254			/*
   2255			 * Now let's check for trapped underflow case.
   2256			 */
   2257			else if (Sgl_iszero_exponent(opnd3) &&
   2258			         Is_underflowtrap_enabled()) {
   2259                    		/* need to normalize results mantissa */
   2260                    		sign_save = Sgl_signextendedsign(opnd3);
   2261				result_exponent = 0;
   2262                    		Sgl_leftshiftby1(opnd3);
   2263                    		Sgl_normalize(opnd3,result_exponent);
   2264                    		Sgl_set_sign(opnd3,/*using*/sign_save);
   2265                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
   2266							unfl);
   2267                    		Sgl_copytoptr(opnd3,dstptr);
   2268                    		/* inexact = FALSE */
   2269                    		return(OPC_2E_UNDERFLOWEXCEPTION);
   2270			}
   2271			Sgl_copytoptr(opnd3,dstptr);
   2272			return(NOEXCEPTION);
   2273		}
   2274		/* is denormalized, adjust exponent */
   2275		Sgl_clear_signexponent(opnd1);
   2276		Sgl_leftshiftby1(opnd1);
   2277		Sgl_normalize(opnd1,mpy_exponent);
   2278	}
   2279	/* opnd2 needs to have hidden bit set with msb in hidden bit */
   2280	if (Sgl_isnotzero_exponent(opnd2)) {
   2281		Sgl_clear_signexponent_set_hidden(opnd2);
   2282	}
   2283	else {
   2284		/* check for zero */
   2285		if (Sgl_iszero_mantissa(opnd2)) {
   2286			/*
   2287			 * Perform the add opnd3 with zero here.
   2288			 */
   2289			if (Sgl_iszero_exponentmantissa(opnd3)) {
   2290				if (Is_rounding_mode(ROUNDMINUS)) {
   2291					Sgl_or_signs(opnd3,resultp1);
   2292				} else {
   2293					Sgl_and_signs(opnd3,resultp1);
   2294				}
   2295			}
   2296			/*
   2297			 * Now let's check for trapped underflow case.
   2298			 */
   2299			else if (Sgl_iszero_exponent(opnd3) &&
   2300			    Is_underflowtrap_enabled()) {
   2301                    		/* need to normalize results mantissa */
   2302                    		sign_save = Sgl_signextendedsign(opnd3);
   2303				result_exponent = 0;
   2304                    		Sgl_leftshiftby1(opnd3);
   2305                    		Sgl_normalize(opnd3,result_exponent);
   2306                    		Sgl_set_sign(opnd3,/*using*/sign_save);
   2307                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
   2308							unfl);
   2309                    		Sgl_copytoptr(opnd3,dstptr);
   2310                    		/* inexact = FALSE */
   2311                    		return(OPC_2E_UNDERFLOWEXCEPTION);
   2312			}
   2313			Sgl_copytoptr(opnd3,dstptr);
   2314			return(NOEXCEPTION);
   2315		}
   2316		/* is denormalized; want to normalize */
   2317		Sgl_clear_signexponent(opnd2);
   2318		Sgl_leftshiftby1(opnd2);
   2319		Sgl_normalize(opnd2,mpy_exponent);
   2320	}
   2321
   2322	/* Multiply the first two source mantissas together */
   2323
   2324	/* 
   2325	 * The intermediate result will be kept in tmpres,
   2326	 * which needs enough room for 106 bits of mantissa,
   2327	 * so lets call it a Double extended.
   2328	 */
   2329	Sglext_setzero(tmpresp1,tmpresp2);
   2330
   2331	/* 
   2332	 * Four bits at a time are inspected in each loop, and a 
   2333	 * simple shift and add multiply algorithm is used. 
   2334	 */ 
   2335	for (count = SGL_P-1; count >= 0; count -= 4) {
   2336		Sglext_rightshiftby4(tmpresp1,tmpresp2);
   2337		if (Sbit28(opnd1)) {
   2338	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
   2339			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
   2340		}
   2341		if (Sbit29(opnd1)) {
   2342			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
   2343		}
   2344		if (Sbit30(opnd1)) {
   2345			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
   2346		}
   2347		if (Sbit31(opnd1)) {
   2348			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
   2349		}
   2350		Sgl_rightshiftby4(opnd1);
   2351	}
   2352	if (Is_sexthiddenoverflow(tmpresp1)) {
   2353		/* result mantissa >= 2 (mantissa overflow) */
   2354		mpy_exponent++;
   2355		Sglext_rightshiftby4(tmpresp1,tmpresp2);
   2356	} else {
   2357		Sglext_rightshiftby3(tmpresp1,tmpresp2);
   2358	}
   2359
   2360	/*
   2361	 * Restore the sign of the mpy result which was saved in resultp1.
   2362	 * The exponent will continue to be kept in mpy_exponent.
   2363	 */
   2364	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
   2365
   2366	/* 
   2367	 * No rounding is required, since the result of the multiply
   2368	 * is exact in the extended format.
   2369	 */
   2370
   2371	/*
   2372	 * Now we are ready to perform the add portion of the operation.
   2373	 *
   2374	 * The exponents need to be kept as integers for now, since the
   2375	 * multiply result might not fit into the exponent field.  We
   2376	 * can't overflow or underflow because of this yet, since the
   2377	 * add could bring the final result back into range.
   2378	 */
   2379	add_exponent = Sgl_exponent(opnd3);
   2380
   2381	/*
   2382	 * Check for denormalized or zero add operand.
   2383	 */
   2384	if (add_exponent == 0) {
   2385		/* check for zero */
   2386		if (Sgl_iszero_mantissa(opnd3)) {
   2387			/* right is zero */
   2388			/* Left can't be zero and must be result.
   2389			 *
   2390			 * The final result is now in tmpres and mpy_exponent,
   2391			 * and needs to be rounded and squeezed back into
   2392			 * double precision format from double extended.
   2393			 */
   2394			result_exponent = mpy_exponent;
   2395			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
   2396			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
   2397			goto round;
   2398		}
   2399
   2400		/* 
   2401		 * Neither are zeroes.  
   2402		 * Adjust exponent and normalize add operand.
   2403		 */
   2404		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
   2405		Sgl_clear_signexponent(opnd3);
   2406		Sgl_leftshiftby1(opnd3);
   2407		Sgl_normalize(opnd3,add_exponent);
   2408		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
   2409	} else {
   2410		Sgl_clear_exponent_set_hidden(opnd3);
   2411	}
   2412	/*
   2413	 * Copy opnd3 to the double extended variable called right.
   2414	 */
   2415	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
   2416
   2417	/*
   2418	 * A zero "save" helps discover equal operands (for later),
   2419	 * and is used in swapping operands (if needed).
   2420	 */
   2421	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
   2422
   2423	/*
   2424	 * Compare magnitude of operands.
   2425	 */
   2426	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
   2427	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
   2428	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
   2429	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
   2430		/*
   2431		 * Set the left operand to the larger one by XOR swap.
   2432		 * First finish the first word "save".
   2433		 */
   2434		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
   2435		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
   2436		Sglext_swap_lower(tmpresp2,rightp2);
   2437		/* also setup exponents used in rest of routine */
   2438		diff_exponent = add_exponent - mpy_exponent;
   2439		result_exponent = add_exponent;
   2440	} else {
   2441		/* also setup exponents used in rest of routine */
   2442		diff_exponent = mpy_exponent - add_exponent;
   2443		result_exponent = mpy_exponent;
   2444	}
   2445	/* Invariant: left is not smaller than right. */
   2446
   2447	/*
   2448	 * Special case alignment of operands that would force alignment
   2449	 * beyond the extent of the extension.  A further optimization
   2450	 * could special case this but only reduces the path length for
   2451	 * this infrequent case.
   2452	 */
   2453	if (diff_exponent > SGLEXT_THRESHOLD) {
   2454		diff_exponent = SGLEXT_THRESHOLD;
   2455	}
   2456
   2457	/* Align right operand by shifting it to the right */
   2458	Sglext_clear_sign(rightp1);
   2459	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
   2460	
   2461	/* Treat sum and difference of the operands separately. */
   2462	if ((int)save < 0) {
   2463		/*
   2464		 * Difference of the two operands.  Overflow can occur if the
   2465		 * multiply overflowed.  A borrow can occur out of the hidden
   2466		 * bit and force a post normalization phase.
   2467		 */
   2468		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
   2469			resultp1,resultp2);
   2470		sign_save = Sgl_signextendedsign(resultp1);
   2471		if (Sgl_iszero_hidden(resultp1)) {
   2472			/* Handle normalization */
   2473		/* A straightforward algorithm would now shift the
   2474		 * result and extension left until the hidden bit
   2475		 * becomes one.  Not all of the extension bits need
   2476		 * participate in the shift.  Only the two most 
   2477		 * significant bits (round and guard) are needed.
   2478		 * If only a single shift is needed then the guard
   2479		 * bit becomes a significant low order bit and the
   2480		 * extension must participate in the rounding.
   2481		 * If more than a single shift is needed, then all
   2482		 * bits to the right of the guard bit are zeros, 
   2483		 * and the guard bit may or may not be zero. */
   2484			Sglext_leftshiftby1(resultp1,resultp2);
   2485
   2486			/* Need to check for a zero result.  The sign and
   2487			 * exponent fields have already been zeroed.  The more
   2488			 * efficient test of the full object can be used.
   2489			 */
   2490			 if (Sglext_iszero(resultp1,resultp2)) {
   2491				/* Must have been "x-x" or "x+(-x)". */
   2492				if (Is_rounding_mode(ROUNDMINUS))
   2493					Sgl_setone_sign(resultp1);
   2494				Sgl_copytoptr(resultp1,dstptr);
   2495				return(NOEXCEPTION);
   2496			}
   2497			result_exponent--;
   2498
   2499			/* Look to see if normalization is finished. */
   2500			if (Sgl_isone_hidden(resultp1)) {
   2501				/* No further normalization is needed */
   2502				goto round;
   2503			}
   2504
   2505			/* Discover first one bit to determine shift amount.
   2506			 * Use a modified binary search.  We have already
   2507			 * shifted the result one position right and still
   2508			 * not found a one so the remainder of the extension
   2509			 * must be zero and simplifies rounding. */
   2510			/* Scan bytes */
   2511			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
   2512				Sglext_leftshiftby8(resultp1,resultp2);
   2513				result_exponent -= 8;
   2514			}
   2515			/* Now narrow it down to the nibble */
   2516			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
   2517				/* The lower nibble contains the
   2518				 * normalizing one */
   2519				Sglext_leftshiftby4(resultp1,resultp2);
   2520				result_exponent -= 4;
   2521			}
   2522			/* Select case where first bit is set (already
   2523			 * normalized) otherwise select the proper shift. */
   2524			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
   2525			if (jumpsize <= 7) switch(jumpsize) {
   2526			case 1:
   2527				Sglext_leftshiftby3(resultp1,resultp2);
   2528				result_exponent -= 3;
   2529				break;
   2530			case 2:
   2531			case 3:
   2532				Sglext_leftshiftby2(resultp1,resultp2);
   2533				result_exponent -= 2;
   2534				break;
   2535			case 4:
   2536			case 5:
   2537			case 6:
   2538			case 7:
   2539				Sglext_leftshiftby1(resultp1,resultp2);
   2540				result_exponent -= 1;
   2541				break;
   2542			}
   2543		} /* end if (hidden...)... */
   2544	/* Fall through and round */
   2545	} /* end if (save < 0)... */
   2546	else {
   2547		/* Add magnitudes */
   2548		Sglext_addition(tmpresp1,tmpresp2,
   2549			rightp1,rightp2, /*to*/resultp1,resultp2);
   2550		sign_save = Sgl_signextendedsign(resultp1);
   2551		if (Sgl_isone_hiddenoverflow(resultp1)) {
   2552	    		/* Prenormalization required. */
   2553	    		Sglext_arithrightshiftby1(resultp1,resultp2);
   2554	    		result_exponent++;
   2555		} /* end if hiddenoverflow... */
   2556	} /* end else ...add magnitudes... */
   2557
   2558	/* Round the result.  If the extension and lower two words are
   2559	 * all zeros, then the result is exact.  Otherwise round in the
   2560	 * correct direction.  Underflow is possible. If a postnormalization
   2561	 * is necessary, then the mantissa is all zeros so no shift is needed.
   2562	 */
   2563  round:
   2564	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
   2565		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
   2566	}
   2567	Sgl_set_sign(resultp1,/*using*/sign_save);
   2568	if (Sglext_isnotzero_mantissap2(resultp2)) {
   2569		inexact = TRUE;
   2570		switch(Rounding_mode()) {
   2571		case ROUNDNEAREST: /* The default. */
   2572			if (Sglext_isone_highp2(resultp2)) {
   2573				/* at least 1/2 ulp */
   2574				if (Sglext_isnotzero_low31p2(resultp2) ||
   2575				    Sglext_isone_lowp1(resultp1)) {
   2576					/* either exactly half way and odd or
   2577					 * more than 1/2ulp */
   2578					Sgl_increment(resultp1);
   2579				}
   2580			}
   2581	    		break;
   2582
   2583		case ROUNDPLUS:
   2584	    		if (Sgl_iszero_sign(resultp1)) {
   2585				/* Round up positive results */
   2586				Sgl_increment(resultp1);
   2587			}
   2588			break;
   2589	    
   2590		case ROUNDMINUS:
   2591	    		if (Sgl_isone_sign(resultp1)) {
   2592				/* Round down negative results */
   2593				Sgl_increment(resultp1);
   2594			}
   2595	    
   2596		case ROUNDZERO:;
   2597			/* truncate is simple */
   2598		} /* end switch... */
   2599		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
   2600	}
   2601	if (result_exponent >= SGL_INFINITY_EXPONENT) {
   2602		/* Overflow */
   2603		if (Is_overflowtrap_enabled()) {
   2604                        /*
   2605                         * Adjust bias of result
   2606                         */
   2607                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
   2608                        Sgl_copytoptr(resultp1,dstptr);
   2609                        if (inexact)
   2610                            if (Is_inexacttrap_enabled())
   2611                                return (OPC_2E_OVERFLOWEXCEPTION |
   2612					OPC_2E_INEXACTEXCEPTION);
   2613                            else Set_inexactflag();
   2614                        return (OPC_2E_OVERFLOWEXCEPTION);
   2615		}
   2616		inexact = TRUE;
   2617		Set_overflowflag();
   2618		Sgl_setoverflow(resultp1);
   2619	} else if (result_exponent <= 0) {	/* underflow case */
   2620		if (Is_underflowtrap_enabled()) {
   2621                        /*
   2622                         * Adjust bias of result
   2623                         */
   2624                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
   2625			Sgl_copytoptr(resultp1,dstptr);
   2626                        if (inexact)
   2627                            if (Is_inexacttrap_enabled())
   2628                                return (OPC_2E_UNDERFLOWEXCEPTION |
   2629					OPC_2E_INEXACTEXCEPTION);
   2630                            else Set_inexactflag();
   2631	    		return(OPC_2E_UNDERFLOWEXCEPTION);
   2632		}
   2633		else if (inexact && is_tiny) Set_underflowflag();
   2634	}
   2635	else Sgl_set_exponent(resultp1,result_exponent);
   2636	Sgl_copytoptr(resultp1,dstptr);
   2637	if (inexact) 
   2638		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
   2639		else Set_inexactflag();
   2640    	return(NOEXCEPTION);
   2641}
   2642