sp_maddf.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
sp_maddf.c (6564B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * IEEE754 floating point arithmetic
      4 * single precision: MADDF.f (Fused Multiply Add)
      5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
      6 *
      7 * MIPS floating point support
      8 * Copyright (C) 2015 Imagination Technologies, Ltd.
      9 * Author: Markos Chandras <markos.chandras@imgtec.com>
     10 */
     11
     12#include "ieee754sp.h"
     13
     14
     15static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
     16				 union ieee754sp y, enum maddf_flags flags)
     17{
     18	int re;
     19	int rs;
     20	unsigned int rm;
     21	u64 rm64;
     22	u64 zm64;
     23	int s;
     24
     25	COMPXSP;
     26	COMPYSP;
     27	COMPZSP;
     28
     29	EXPLODEXSP;
     30	EXPLODEYSP;
     31	EXPLODEZSP;
     32
     33	FLUSHXSP;
     34	FLUSHYSP;
     35	FLUSHZSP;
     36
     37	ieee754_clearcx();
     38
     39	rs = xs ^ ys;
     40	if (flags & MADDF_NEGATE_PRODUCT)
     41		rs ^= 1;
     42	if (flags & MADDF_NEGATE_ADDITION)
     43		zs ^= 1;
     44
     45	/*
     46	 * Handle the cases when at least one of x, y or z is a NaN.
     47	 * Order of precedence is sNaN, qNaN and z, x, y.
     48	 */
     49	if (zc == IEEE754_CLASS_SNAN)
     50		return ieee754sp_nanxcpt(z);
     51	if (xc == IEEE754_CLASS_SNAN)
     52		return ieee754sp_nanxcpt(x);
     53	if (yc == IEEE754_CLASS_SNAN)
     54		return ieee754sp_nanxcpt(y);
     55	if (zc == IEEE754_CLASS_QNAN)
     56		return z;
     57	if (xc == IEEE754_CLASS_QNAN)
     58		return x;
     59	if (yc == IEEE754_CLASS_QNAN)
     60		return y;
     61
     62	if (zc == IEEE754_CLASS_DNORM)
     63		SPDNORMZ;
     64	/* ZERO z cases are handled separately below */
     65
     66	switch (CLPAIR(xc, yc)) {
     67
     68
     69	/*
     70	 * Infinity handling
     71	 */
     72	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
     73	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
     74		ieee754_setcx(IEEE754_INVALID_OPERATION);
     75		return ieee754sp_indef();
     76
     77	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
     78	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
     79	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
     80	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
     81	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
     82		if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
     83			/*
     84			 * Cases of addition of infinities with opposite signs
     85			 * or subtraction of infinities with same signs.
     86			 */
     87			ieee754_setcx(IEEE754_INVALID_OPERATION);
     88			return ieee754sp_indef();
     89		}
     90		/*
     91		 * z is here either not an infinity, or an infinity having the
     92		 * same sign as product (x*y). The result must be an infinity,
     93		 * and its sign is determined only by the sign of product (x*y).
     94		 */
     95		return ieee754sp_inf(rs);
     96
     97	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
     98	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
     99	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
    100	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
    101	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
    102		if (zc == IEEE754_CLASS_INF)
    103			return ieee754sp_inf(zs);
    104		if (zc == IEEE754_CLASS_ZERO) {
    105			/* Handle cases +0 + (-0) and similar ones. */
    106			if (zs == rs)
    107				/*
    108				 * Cases of addition of zeros of equal signs
    109				 * or subtraction of zeroes of opposite signs.
    110				 * The sign of the resulting zero is in any
    111				 * such case determined only by the sign of z.
    112				 */
    113				return z;
    114
    115			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
    116		}
    117		/* x*y is here 0, and z is not 0, so just return z */
    118		return z;
    119
    120	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
    121		SPDNORMX;
    122		fallthrough;
    123	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
    124		if (zc == IEEE754_CLASS_INF)
    125			return ieee754sp_inf(zs);
    126		SPDNORMY;
    127		break;
    128
    129	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
    130		if (zc == IEEE754_CLASS_INF)
    131			return ieee754sp_inf(zs);
    132		SPDNORMX;
    133		break;
    134
    135	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
    136		if (zc == IEEE754_CLASS_INF)
    137			return ieee754sp_inf(zs);
    138		/* continue to real computations */
    139	}
    140
    141	/* Finally get to do some computation */
    142
    143	/*
    144	 * Do the multiplication bit first
    145	 *
    146	 * rm = xm * ym, re = xe + ye basically
    147	 *
    148	 * At this point xm and ym should have been normalized.
    149	 */
    150
    151	/* rm = xm * ym, re = xe+ye basically */
    152	assert(xm & SP_HIDDEN_BIT);
    153	assert(ym & SP_HIDDEN_BIT);
    154
    155	re = xe + ye;
    156
    157	/* Multiple 24 bit xm and ym to give 48 bit results */
    158	rm64 = (uint64_t)xm * ym;
    159
    160	/* Shunt to top of word */
    161	rm64 = rm64 << 16;
    162
    163	/* Put explicit bit at bit 62 if necessary */
    164	if ((int64_t) rm64 < 0) {
    165		rm64 = rm64 >> 1;
    166		re++;
    167	}
    168
    169	assert(rm64 & (1 << 62));
    170
    171	if (zc == IEEE754_CLASS_ZERO) {
    172		/*
    173		 * Move explicit bit from bit 62 to bit 26 since the
    174		 * ieee754sp_format code expects the mantissa to be
    175		 * 27 bits wide (24 + 3 rounding bits).
    176		 */
    177		rm = XSPSRS64(rm64, (62 - 26));
    178		return ieee754sp_format(rs, re, rm);
    179	}
    180
    181	/* Move explicit bit from bit 23 to bit 62 */
    182	zm64 = (uint64_t)zm << (62 - 23);
    183	assert(zm64 & (1 << 62));
    184
    185	/* Make the exponents the same */
    186	if (ze > re) {
    187		/*
    188		 * Have to shift r fraction right to align.
    189		 */
    190		s = ze - re;
    191		rm64 = XSPSRS64(rm64, s);
    192		re += s;
    193	} else if (re > ze) {
    194		/*
    195		 * Have to shift z fraction right to align.
    196		 */
    197		s = re - ze;
    198		zm64 = XSPSRS64(zm64, s);
    199		ze += s;
    200	}
    201	assert(ze == re);
    202	assert(ze <= SP_EMAX);
    203
    204	/* Do the addition */
    205	if (zs == rs) {
    206		/*
    207		 * Generate 64 bit result by adding two 63 bit numbers
    208		 * leaving result in zm64, zs and ze.
    209		 */
    210		zm64 = zm64 + rm64;
    211		if ((int64_t)zm64 < 0) {	/* carry out */
    212			zm64 = XSPSRS1(zm64);
    213			ze++;
    214		}
    215	} else {
    216		if (zm64 >= rm64) {
    217			zm64 = zm64 - rm64;
    218		} else {
    219			zm64 = rm64 - zm64;
    220			zs = rs;
    221		}
    222		if (zm64 == 0)
    223			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
    224
    225		/*
    226		 * Put explicit bit at bit 62 if necessary.
    227		 */
    228		while ((zm64 >> 62) == 0) {
    229			zm64 <<= 1;
    230			ze--;
    231		}
    232	}
    233
    234	/*
    235	 * Move explicit bit from bit 62 to bit 26 since the
    236	 * ieee754sp_format code expects the mantissa to be
    237	 * 27 bits wide (24 + 3 rounding bits).
    238	 */
    239	zm = XSPSRS64(zm64, (62 - 26));
    240
    241	return ieee754sp_format(zs, ze, zm);
    242}
    243
    244union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
    245				union ieee754sp y)
    246{
    247	return _sp_maddf(z, x, y, 0);
    248}
    249
    250union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
    251				union ieee754sp y)
    252{
    253	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
    254}
    255
    256union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
    257				union ieee754sp y)
    258{
    259	return _sp_maddf(z, x, y, 0);
    260}
    261
    262union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
    263				union ieee754sp y)
    264{
    265	return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
    266}
    267
    268union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
    269				union ieee754sp y)
    270{
    271	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
    272}
    273
    274union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
    275				union ieee754sp y)
    276{
    277	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
    278}