sp_maddf.c (6564B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * IEEE754 floating point arithmetic 4 * single precision: MADDF.f (Fused Multiply Add) 5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 6 * 7 * MIPS floating point support 8 * Copyright (C) 2015 Imagination Technologies, Ltd. 9 * Author: Markos Chandras <markos.chandras@imgtec.com> 10 */ 11 12#include "ieee754sp.h" 13 14 15static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, 16 union ieee754sp y, enum maddf_flags flags) 17{ 18 int re; 19 int rs; 20 unsigned int rm; 21 u64 rm64; 22 u64 zm64; 23 int s; 24 25 COMPXSP; 26 COMPYSP; 27 COMPZSP; 28 29 EXPLODEXSP; 30 EXPLODEYSP; 31 EXPLODEZSP; 32 33 FLUSHXSP; 34 FLUSHYSP; 35 FLUSHZSP; 36 37 ieee754_clearcx(); 38 39 rs = xs ^ ys; 40 if (flags & MADDF_NEGATE_PRODUCT) 41 rs ^= 1; 42 if (flags & MADDF_NEGATE_ADDITION) 43 zs ^= 1; 44 45 /* 46 * Handle the cases when at least one of x, y or z is a NaN. 47 * Order of precedence is sNaN, qNaN and z, x, y. 48 */ 49 if (zc == IEEE754_CLASS_SNAN) 50 return ieee754sp_nanxcpt(z); 51 if (xc == IEEE754_CLASS_SNAN) 52 return ieee754sp_nanxcpt(x); 53 if (yc == IEEE754_CLASS_SNAN) 54 return ieee754sp_nanxcpt(y); 55 if (zc == IEEE754_CLASS_QNAN) 56 return z; 57 if (xc == IEEE754_CLASS_QNAN) 58 return x; 59 if (yc == IEEE754_CLASS_QNAN) 60 return y; 61 62 if (zc == IEEE754_CLASS_DNORM) 63 SPDNORMZ; 64 /* ZERO z cases are handled separately below */ 65 66 switch (CLPAIR(xc, yc)) { 67 68 69 /* 70 * Infinity handling 71 */ 72 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 73 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 74 ieee754_setcx(IEEE754_INVALID_OPERATION); 75 return ieee754sp_indef(); 76 77 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 78 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 79 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 80 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 81 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 82 if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { 83 /* 84 * Cases of addition of infinities with opposite signs 85 * or subtraction of infinities with same signs. 86 */ 87 ieee754_setcx(IEEE754_INVALID_OPERATION); 88 return ieee754sp_indef(); 89 } 90 /* 91 * z is here either not an infinity, or an infinity having the 92 * same sign as product (x*y). The result must be an infinity, 93 * and its sign is determined only by the sign of product (x*y). 94 */ 95 return ieee754sp_inf(rs); 96 97 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 98 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 99 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 100 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 101 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 102 if (zc == IEEE754_CLASS_INF) 103 return ieee754sp_inf(zs); 104 if (zc == IEEE754_CLASS_ZERO) { 105 /* Handle cases +0 + (-0) and similar ones. */ 106 if (zs == rs) 107 /* 108 * Cases of addition of zeros of equal signs 109 * or subtraction of zeroes of opposite signs. 110 * The sign of the resulting zero is in any 111 * such case determined only by the sign of z. 112 */ 113 return z; 114 115 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 116 } 117 /* x*y is here 0, and z is not 0, so just return z */ 118 return z; 119 120 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 121 SPDNORMX; 122 fallthrough; 123 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 124 if (zc == IEEE754_CLASS_INF) 125 return ieee754sp_inf(zs); 126 SPDNORMY; 127 break; 128 129 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 130 if (zc == IEEE754_CLASS_INF) 131 return ieee754sp_inf(zs); 132 SPDNORMX; 133 break; 134 135 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 136 if (zc == IEEE754_CLASS_INF) 137 return ieee754sp_inf(zs); 138 /* continue to real computations */ 139 } 140 141 /* Finally get to do some computation */ 142 143 /* 144 * Do the multiplication bit first 145 * 146 * rm = xm * ym, re = xe + ye basically 147 * 148 * At this point xm and ym should have been normalized. 149 */ 150 151 /* rm = xm * ym, re = xe+ye basically */ 152 assert(xm & SP_HIDDEN_BIT); 153 assert(ym & SP_HIDDEN_BIT); 154 155 re = xe + ye; 156 157 /* Multiple 24 bit xm and ym to give 48 bit results */ 158 rm64 = (uint64_t)xm * ym; 159 160 /* Shunt to top of word */ 161 rm64 = rm64 << 16; 162 163 /* Put explicit bit at bit 62 if necessary */ 164 if ((int64_t) rm64 < 0) { 165 rm64 = rm64 >> 1; 166 re++; 167 } 168 169 assert(rm64 & (1 << 62)); 170 171 if (zc == IEEE754_CLASS_ZERO) { 172 /* 173 * Move explicit bit from bit 62 to bit 26 since the 174 * ieee754sp_format code expects the mantissa to be 175 * 27 bits wide (24 + 3 rounding bits). 176 */ 177 rm = XSPSRS64(rm64, (62 - 26)); 178 return ieee754sp_format(rs, re, rm); 179 } 180 181 /* Move explicit bit from bit 23 to bit 62 */ 182 zm64 = (uint64_t)zm << (62 - 23); 183 assert(zm64 & (1 << 62)); 184 185 /* Make the exponents the same */ 186 if (ze > re) { 187 /* 188 * Have to shift r fraction right to align. 189 */ 190 s = ze - re; 191 rm64 = XSPSRS64(rm64, s); 192 re += s; 193 } else if (re > ze) { 194 /* 195 * Have to shift z fraction right to align. 196 */ 197 s = re - ze; 198 zm64 = XSPSRS64(zm64, s); 199 ze += s; 200 } 201 assert(ze == re); 202 assert(ze <= SP_EMAX); 203 204 /* Do the addition */ 205 if (zs == rs) { 206 /* 207 * Generate 64 bit result by adding two 63 bit numbers 208 * leaving result in zm64, zs and ze. 209 */ 210 zm64 = zm64 + rm64; 211 if ((int64_t)zm64 < 0) { /* carry out */ 212 zm64 = XSPSRS1(zm64); 213 ze++; 214 } 215 } else { 216 if (zm64 >= rm64) { 217 zm64 = zm64 - rm64; 218 } else { 219 zm64 = rm64 - zm64; 220 zs = rs; 221 } 222 if (zm64 == 0) 223 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 224 225 /* 226 * Put explicit bit at bit 62 if necessary. 227 */ 228 while ((zm64 >> 62) == 0) { 229 zm64 <<= 1; 230 ze--; 231 } 232 } 233 234 /* 235 * Move explicit bit from bit 62 to bit 26 since the 236 * ieee754sp_format code expects the mantissa to be 237 * 27 bits wide (24 + 3 rounding bits). 238 */ 239 zm = XSPSRS64(zm64, (62 - 26)); 240 241 return ieee754sp_format(zs, ze, zm); 242} 243 244union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, 245 union ieee754sp y) 246{ 247 return _sp_maddf(z, x, y, 0); 248} 249 250union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, 251 union ieee754sp y) 252{ 253 return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 254} 255 256union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, 257 union ieee754sp y) 258{ 259 return _sp_maddf(z, x, y, 0); 260} 261 262union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, 263 union ieee754sp y) 264{ 265 return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); 266} 267 268union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, 269 union ieee754sp y) 270{ 271 return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); 272} 273 274union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, 275 union ieee754sp y) 276{ 277 return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 278}