dp_maddf.c (8011B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * IEEE754 floating point arithmetic 4 * double precision: MADDF.f (Fused Multiply Add) 5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 6 * 7 * MIPS floating point support 8 * Copyright (C) 2015 Imagination Technologies, Ltd. 9 * Author: Markos Chandras <markos.chandras@imgtec.com> 10 */ 11 12#include "ieee754dp.h" 13 14 15/* 128 bits shift right logical with rounding. */ 16static void srl128(u64 *hptr, u64 *lptr, int count) 17{ 18 u64 low; 19 20 if (count >= 128) { 21 *lptr = *hptr != 0 || *lptr != 0; 22 *hptr = 0; 23 } else if (count >= 64) { 24 if (count == 64) { 25 *lptr = *hptr | (*lptr != 0); 26 } else { 27 low = *lptr; 28 *lptr = *hptr >> (count - 64); 29 *lptr |= (*hptr << (128 - count)) != 0 || low != 0; 30 } 31 *hptr = 0; 32 } else { 33 low = *lptr; 34 *lptr = low >> count | *hptr << (64 - count); 35 *lptr |= (low << (64 - count)) != 0; 36 *hptr = *hptr >> count; 37 } 38} 39 40static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, 41 union ieee754dp y, enum maddf_flags flags) 42{ 43 int re; 44 int rs; 45 unsigned int lxm; 46 unsigned int hxm; 47 unsigned int lym; 48 unsigned int hym; 49 u64 lrm; 50 u64 hrm; 51 u64 lzm; 52 u64 hzm; 53 u64 t; 54 u64 at; 55 int s; 56 57 COMPXDP; 58 COMPYDP; 59 COMPZDP; 60 61 EXPLODEXDP; 62 EXPLODEYDP; 63 EXPLODEZDP; 64 65 FLUSHXDP; 66 FLUSHYDP; 67 FLUSHZDP; 68 69 ieee754_clearcx(); 70 71 rs = xs ^ ys; 72 if (flags & MADDF_NEGATE_PRODUCT) 73 rs ^= 1; 74 if (flags & MADDF_NEGATE_ADDITION) 75 zs ^= 1; 76 77 /* 78 * Handle the cases when at least one of x, y or z is a NaN. 79 * Order of precedence is sNaN, qNaN and z, x, y. 80 */ 81 if (zc == IEEE754_CLASS_SNAN) 82 return ieee754dp_nanxcpt(z); 83 if (xc == IEEE754_CLASS_SNAN) 84 return ieee754dp_nanxcpt(x); 85 if (yc == IEEE754_CLASS_SNAN) 86 return ieee754dp_nanxcpt(y); 87 if (zc == IEEE754_CLASS_QNAN) 88 return z; 89 if (xc == IEEE754_CLASS_QNAN) 90 return x; 91 if (yc == IEEE754_CLASS_QNAN) 92 return y; 93 94 if (zc == IEEE754_CLASS_DNORM) 95 DPDNORMZ; 96 /* ZERO z cases are handled separately below */ 97 98 switch (CLPAIR(xc, yc)) { 99 100 /* 101 * Infinity handling 102 */ 103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 104 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 105 ieee754_setcx(IEEE754_INVALID_OPERATION); 106 return ieee754dp_indef(); 107 108 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 109 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 110 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 111 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 112 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 113 if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { 114 /* 115 * Cases of addition of infinities with opposite signs 116 * or subtraction of infinities with same signs. 117 */ 118 ieee754_setcx(IEEE754_INVALID_OPERATION); 119 return ieee754dp_indef(); 120 } 121 /* 122 * z is here either not an infinity, or an infinity having the 123 * same sign as product (x*y). The result must be an infinity, 124 * and its sign is determined only by the sign of product (x*y). 125 */ 126 return ieee754dp_inf(rs); 127 128 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 129 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 130 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 131 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 132 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 133 if (zc == IEEE754_CLASS_INF) 134 return ieee754dp_inf(zs); 135 if (zc == IEEE754_CLASS_ZERO) { 136 /* Handle cases +0 + (-0) and similar ones. */ 137 if (zs == rs) 138 /* 139 * Cases of addition of zeros of equal signs 140 * or subtraction of zeroes of opposite signs. 141 * The sign of the resulting zero is in any 142 * such case determined only by the sign of z. 143 */ 144 return z; 145 146 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 147 } 148 /* x*y is here 0, and z is not 0, so just return z */ 149 return z; 150 151 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 152 DPDNORMX; 153 fallthrough; 154 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 155 if (zc == IEEE754_CLASS_INF) 156 return ieee754dp_inf(zs); 157 DPDNORMY; 158 break; 159 160 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 161 if (zc == IEEE754_CLASS_INF) 162 return ieee754dp_inf(zs); 163 DPDNORMX; 164 break; 165 166 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 167 if (zc == IEEE754_CLASS_INF) 168 return ieee754dp_inf(zs); 169 /* continue to real computations */ 170 } 171 172 /* Finally get to do some computation */ 173 174 /* 175 * Do the multiplication bit first 176 * 177 * rm = xm * ym, re = xe + ye basically 178 * 179 * At this point xm and ym should have been normalized. 180 */ 181 assert(xm & DP_HIDDEN_BIT); 182 assert(ym & DP_HIDDEN_BIT); 183 184 re = xe + ye; 185 186 /* shunt to top of word */ 187 xm <<= 64 - (DP_FBITS + 1); 188 ym <<= 64 - (DP_FBITS + 1); 189 190 /* 191 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. 192 */ 193 194 lxm = xm; 195 hxm = xm >> 32; 196 lym = ym; 197 hym = ym >> 32; 198 199 lrm = DPXMULT(lxm, lym); 200 hrm = DPXMULT(hxm, hym); 201 202 t = DPXMULT(lxm, hym); 203 204 at = lrm + (t << 32); 205 hrm += at < lrm; 206 lrm = at; 207 208 hrm = hrm + (t >> 32); 209 210 t = DPXMULT(hxm, lym); 211 212 at = lrm + (t << 32); 213 hrm += at < lrm; 214 lrm = at; 215 216 hrm = hrm + (t >> 32); 217 218 /* Put explicit bit at bit 126 if necessary */ 219 if ((int64_t)hrm < 0) { 220 lrm = (hrm << 63) | (lrm >> 1); 221 hrm = hrm >> 1; 222 re++; 223 } 224 225 assert(hrm & (1 << 62)); 226 227 if (zc == IEEE754_CLASS_ZERO) { 228 /* 229 * Move explicit bit from bit 126 to bit 55 since the 230 * ieee754dp_format code expects the mantissa to be 231 * 56 bits wide (53 + 3 rounding bits). 232 */ 233 srl128(&hrm, &lrm, (126 - 55)); 234 return ieee754dp_format(rs, re, lrm); 235 } 236 237 /* Move explicit bit from bit 52 to bit 126 */ 238 lzm = 0; 239 hzm = zm << 10; 240 assert(hzm & (1 << 62)); 241 242 /* Make the exponents the same */ 243 if (ze > re) { 244 /* 245 * Have to shift y fraction right to align. 246 */ 247 s = ze - re; 248 srl128(&hrm, &lrm, s); 249 re += s; 250 } else if (re > ze) { 251 /* 252 * Have to shift x fraction right to align. 253 */ 254 s = re - ze; 255 srl128(&hzm, &lzm, s); 256 ze += s; 257 } 258 assert(ze == re); 259 assert(ze <= DP_EMAX); 260 261 /* Do the addition */ 262 if (zs == rs) { 263 /* 264 * Generate 128 bit result by adding two 127 bit numbers 265 * leaving result in hzm:lzm, zs and ze. 266 */ 267 hzm = hzm + hrm + (lzm > (lzm + lrm)); 268 lzm = lzm + lrm; 269 if ((int64_t)hzm < 0) { /* carry out */ 270 srl128(&hzm, &lzm, 1); 271 ze++; 272 } 273 } else { 274 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { 275 hzm = hzm - hrm - (lzm < lrm); 276 lzm = lzm - lrm; 277 } else { 278 hzm = hrm - hzm - (lrm < lzm); 279 lzm = lrm - lzm; 280 zs = rs; 281 } 282 if (lzm == 0 && hzm == 0) 283 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 284 285 /* 286 * Put explicit bit at bit 126 if necessary. 287 */ 288 if (hzm == 0) { 289 /* left shift by 63 or 64 bits */ 290 if ((int64_t)lzm < 0) { 291 /* MSB of lzm is the explicit bit */ 292 hzm = lzm >> 1; 293 lzm = lzm << 63; 294 ze -= 63; 295 } else { 296 hzm = lzm; 297 lzm = 0; 298 ze -= 64; 299 } 300 } 301 302 t = 0; 303 while ((hzm >> (62 - t)) == 0) 304 t++; 305 306 assert(t <= 62); 307 if (t) { 308 hzm = hzm << t | lzm >> (64 - t); 309 lzm = lzm << t; 310 ze -= t; 311 } 312 } 313 314 /* 315 * Move explicit bit from bit 126 to bit 55 since the 316 * ieee754dp_format code expects the mantissa to be 317 * 56 bits wide (53 + 3 rounding bits). 318 */ 319 srl128(&hzm, &lzm, (126 - 55)); 320 321 return ieee754dp_format(zs, ze, lzm); 322} 323 324union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 325 union ieee754dp y) 326{ 327 return _dp_maddf(z, x, y, 0); 328} 329 330union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 331 union ieee754dp y) 332{ 333 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 334} 335 336union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, 337 union ieee754dp y) 338{ 339 return _dp_maddf(z, x, y, 0); 340} 341 342union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, 343 union ieee754dp y) 344{ 345 return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION); 346} 347 348union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, 349 union ieee754dp y) 350{ 351 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); 352} 353 354union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, 355 union ieee754dp y) 356{ 357 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 358}