fp-bench.c - cachepc-qemu - Fork of AMDESE/qemu with changes for cachepc side-channel attack

	cachepc-qemu Fork of AMDESE/qemu with changes for cachepc side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-qemu
	Log \| Files \| Refs \| Submodules \| LICENSE \| sfeed.txt
fp-bench.c (19961B)
      1/*
      2 * fp-bench.c - A collection of simple floating point microbenchmarks.
      3 *
      4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
      5 *
      6 * License: GNU GPL, version 2 or later.
      7 *   See the COPYING file in the top-level directory.
      8 */
      9#ifndef HW_POISON_H
     10#error Must define HW_POISON_H to work around TARGET_* poisoning
     11#endif
     12
     13#include "qemu/osdep.h"
     14#include <math.h>
     15#include <fenv.h>
     16#include "qemu/timer.h"
     17#include "qemu/int128.h"
     18#include "fpu/softfloat.h"
     19
     20/* amortize the computation of random inputs */
     21#define OPS_PER_ITER     50000
     22
     23#define MAX_OPERANDS 3
     24
     25#define SEED_A 0xdeadfacedeadface
     26#define SEED_B 0xbadc0feebadc0fee
     27#define SEED_C 0xbeefdeadbeefdead
     28
     29enum op {
     30    OP_ADD,
     31    OP_SUB,
     32    OP_MUL,
     33    OP_DIV,
     34    OP_FMA,
     35    OP_SQRT,
     36    OP_CMP,
     37    OP_MAX_NR,
     38};
     39
     40static const char * const op_names[] = {
     41    [OP_ADD] = "add",
     42    [OP_SUB] = "sub",
     43    [OP_MUL] = "mul",
     44    [OP_DIV] = "div",
     45    [OP_FMA] = "mulAdd",
     46    [OP_SQRT] = "sqrt",
     47    [OP_CMP] = "cmp",
     48    [OP_MAX_NR] = NULL,
     49};
     50
     51enum precision {
     52    PREC_SINGLE,
     53    PREC_DOUBLE,
     54    PREC_QUAD,
     55    PREC_FLOAT32,
     56    PREC_FLOAT64,
     57    PREC_FLOAT128,
     58    PREC_MAX_NR,
     59};
     60
     61enum rounding {
     62    ROUND_EVEN,
     63    ROUND_ZERO,
     64    ROUND_DOWN,
     65    ROUND_UP,
     66    ROUND_TIEAWAY,
     67    N_ROUND_MODES,
     68};
     69
     70static const char * const round_names[] = {
     71    [ROUND_EVEN] = "even",
     72    [ROUND_ZERO] = "zero",
     73    [ROUND_DOWN] = "down",
     74    [ROUND_UP] = "up",
     75    [ROUND_TIEAWAY] = "tieaway",
     76};
     77
     78enum tester {
     79    TESTER_SOFT,
     80    TESTER_HOST,
     81    TESTER_MAX_NR,
     82};
     83
     84static const char * const tester_names[] = {
     85    [TESTER_SOFT] = "soft",
     86    [TESTER_HOST] = "host",
     87    [TESTER_MAX_NR] = NULL,
     88};
     89
     90union fp {
     91    float f;
     92    double d;
     93    float32 f32;
     94    float64 f64;
     95    float128 f128;
     96    uint64_t u64;
     97};
     98
     99struct op_state;
    100
    101typedef float (*float_func_t)(const struct op_state *s);
    102typedef double (*double_func_t)(const struct op_state *s);
    103
    104union fp_func {
    105    float_func_t float_func;
    106    double_func_t double_func;
    107};
    108
    109typedef void (*bench_func_t)(void);
    110
    111struct op_desc {
    112    const char * const name;
    113};
    114
    115#define DEFAULT_DURATION_SECS 1
    116
    117static uint64_t random_ops[MAX_OPERANDS] = {
    118    SEED_A, SEED_B, SEED_C,
    119};
    120
    121static float128 random_quad_ops[MAX_OPERANDS] = {
    122    {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
    123};
    124static float_status soft_status;
    125static enum precision precision;
    126static enum op operation;
    127static enum tester tester;
    128static uint64_t n_completed_ops;
    129static unsigned int duration = DEFAULT_DURATION_SECS;
    130static int64_t ns_elapsed;
    131/* disable optimizations with volatile */
    132static volatile union fp res;
    133
    134/*
    135 * From: https://en.wikipedia.org/wiki/Xorshift
    136 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
    137 * guaranteed to be >= INT_MAX).
    138 */
    139static uint64_t xorshift64star(uint64_t x)
    140{
    141    x ^= x >> 12; /* a */
    142    x ^= x << 25; /* b */
    143    x ^= x >> 27; /* c */
    144    return x * UINT64_C(2685821657736338717);
    145}
    146
    147static void update_random_ops(int n_ops, enum precision prec)
    148{
    149    int i;
    150
    151    for (i = 0; i < n_ops; i++) {
    152
    153        switch (prec) {
    154        case PREC_SINGLE:
    155        case PREC_FLOAT32:
    156        {
    157            uint64_t r = random_ops[i];
    158            do {
    159                r = xorshift64star(r);
    160            } while (!float32_is_normal(r));
    161            random_ops[i] = r;
    162            break;
    163        }
    164        case PREC_DOUBLE:
    165        case PREC_FLOAT64:
    166        {
    167            uint64_t r = random_ops[i];
    168            do {
    169                r = xorshift64star(r);
    170            } while (!float64_is_normal(r));
    171            random_ops[i] = r;
    172            break;
    173        }
    174        case PREC_QUAD:
    175        case PREC_FLOAT128:
    176        {
    177            float128 r = random_quad_ops[i];
    178            uint64_t hi = r.high;
    179            uint64_t lo = r.low;
    180            do {
    181                hi = xorshift64star(hi);
    182                lo = xorshift64star(lo);
    183                r = make_float128(hi, lo);
    184            } while (!float128_is_normal(r));
    185            random_quad_ops[i] = r;
    186            break;
    187        }
    188        default:
    189            g_assert_not_reached();
    190        }
    191    }
    192}
    193
    194static void fill_random(union fp *ops, int n_ops, enum precision prec,
    195                        bool no_neg)
    196{
    197    int i;
    198
    199    for (i = 0; i < n_ops; i++) {
    200        switch (prec) {
    201        case PREC_SINGLE:
    202        case PREC_FLOAT32:
    203            ops[i].f32 = make_float32(random_ops[i]);
    204            if (no_neg && float32_is_neg(ops[i].f32)) {
    205                ops[i].f32 = float32_chs(ops[i].f32);
    206            }
    207            break;
    208        case PREC_DOUBLE:
    209        case PREC_FLOAT64:
    210            ops[i].f64 = make_float64(random_ops[i]);
    211            if (no_neg && float64_is_neg(ops[i].f64)) {
    212                ops[i].f64 = float64_chs(ops[i].f64);
    213            }
    214            break;
    215        case PREC_QUAD:
    216        case PREC_FLOAT128:
    217            ops[i].f128 = random_quad_ops[i];
    218            if (no_neg && float128_is_neg(ops[i].f128)) {
    219                ops[i].f128 = float128_chs(ops[i].f128);
    220            }
    221            break;
    222        default:
    223            g_assert_not_reached();
    224        }
    225    }
    226}
    227
    228/*
    229 * The main benchmark function. Instead of (ab)using macros, we rely
    230 * on the compiler to unfold this at compile-time.
    231 */
    232static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
    233{
    234    int64_t tf = get_clock() + duration * 1000000000LL;
    235
    236    while (get_clock() < tf) {
    237        union fp ops[MAX_OPERANDS];
    238        int64_t t0;
    239        int i;
    240
    241        update_random_ops(n_ops, prec);
    242        switch (prec) {
    243        case PREC_SINGLE:
    244            fill_random(ops, n_ops, prec, no_neg);
    245            t0 = get_clock();
    246            for (i = 0; i < OPS_PER_ITER; i++) {
    247                float a = ops[0].f;
    248                float b = ops[1].f;
    249                float c = ops[2].f;
    250
    251                switch (op) {
    252                case OP_ADD:
    253                    res.f = a + b;
    254                    break;
    255                case OP_SUB:
    256                    res.f = a - b;
    257                    break;
    258                case OP_MUL:
    259                    res.f = a * b;
    260                    break;
    261                case OP_DIV:
    262                    res.f = a / b;
    263                    break;
    264                case OP_FMA:
    265                    res.f = fmaf(a, b, c);
    266                    break;
    267                case OP_SQRT:
    268                    res.f = sqrtf(a);
    269                    break;
    270                case OP_CMP:
    271                    res.u64 = isgreater(a, b);
    272                    break;
    273                default:
    274                    g_assert_not_reached();
    275                }
    276            }
    277            break;
    278        case PREC_DOUBLE:
    279            fill_random(ops, n_ops, prec, no_neg);
    280            t0 = get_clock();
    281            for (i = 0; i < OPS_PER_ITER; i++) {
    282                double a = ops[0].d;
    283                double b = ops[1].d;
    284                double c = ops[2].d;
    285
    286                switch (op) {
    287                case OP_ADD:
    288                    res.d = a + b;
    289                    break;
    290                case OP_SUB:
    291                    res.d = a - b;
    292                    break;
    293                case OP_MUL:
    294                    res.d = a * b;
    295                    break;
    296                case OP_DIV:
    297                    res.d = a / b;
    298                    break;
    299                case OP_FMA:
    300                    res.d = fma(a, b, c);
    301                    break;
    302                case OP_SQRT:
    303                    res.d = sqrt(a);
    304                    break;
    305                case OP_CMP:
    306                    res.u64 = isgreater(a, b);
    307                    break;
    308                default:
    309                    g_assert_not_reached();
    310                }
    311            }
    312            break;
    313        case PREC_FLOAT32:
    314            fill_random(ops, n_ops, prec, no_neg);
    315            t0 = get_clock();
    316            for (i = 0; i < OPS_PER_ITER; i++) {
    317                float32 a = ops[0].f32;
    318                float32 b = ops[1].f32;
    319                float32 c = ops[2].f32;
    320
    321                switch (op) {
    322                case OP_ADD:
    323                    res.f32 = float32_add(a, b, &soft_status);
    324                    break;
    325                case OP_SUB:
    326                    res.f32 = float32_sub(a, b, &soft_status);
    327                    break;
    328                case OP_MUL:
    329                    res.f = float32_mul(a, b, &soft_status);
    330                    break;
    331                case OP_DIV:
    332                    res.f32 = float32_div(a, b, &soft_status);
    333                    break;
    334                case OP_FMA:
    335                    res.f32 = float32_muladd(a, b, c, 0, &soft_status);
    336                    break;
    337                case OP_SQRT:
    338                    res.f32 = float32_sqrt(a, &soft_status);
    339                    break;
    340                case OP_CMP:
    341                    res.u64 = float32_compare_quiet(a, b, &soft_status);
    342                    break;
    343                default:
    344                    g_assert_not_reached();
    345                }
    346            }
    347            break;
    348        case PREC_FLOAT64:
    349            fill_random(ops, n_ops, prec, no_neg);
    350            t0 = get_clock();
    351            for (i = 0; i < OPS_PER_ITER; i++) {
    352                float64 a = ops[0].f64;
    353                float64 b = ops[1].f64;
    354                float64 c = ops[2].f64;
    355
    356                switch (op) {
    357                case OP_ADD:
    358                    res.f64 = float64_add(a, b, &soft_status);
    359                    break;
    360                case OP_SUB:
    361                    res.f64 = float64_sub(a, b, &soft_status);
    362                    break;
    363                case OP_MUL:
    364                    res.f = float64_mul(a, b, &soft_status);
    365                    break;
    366                case OP_DIV:
    367                    res.f64 = float64_div(a, b, &soft_status);
    368                    break;
    369                case OP_FMA:
    370                    res.f64 = float64_muladd(a, b, c, 0, &soft_status);
    371                    break;
    372                case OP_SQRT:
    373                    res.f64 = float64_sqrt(a, &soft_status);
    374                    break;
    375                case OP_CMP:
    376                    res.u64 = float64_compare_quiet(a, b, &soft_status);
    377                    break;
    378                default:
    379                    g_assert_not_reached();
    380                }
    381            }
    382            break;
    383        case PREC_FLOAT128:
    384            fill_random(ops, n_ops, prec, no_neg);
    385            t0 = get_clock();
    386            for (i = 0; i < OPS_PER_ITER; i++) {
    387                float128 a = ops[0].f128;
    388                float128 b = ops[1].f128;
    389                float128 c = ops[2].f128;
    390
    391                switch (op) {
    392                case OP_ADD:
    393                    res.f128 = float128_add(a, b, &soft_status);
    394                    break;
    395                case OP_SUB:
    396                    res.f128 = float128_sub(a, b, &soft_status);
    397                    break;
    398                case OP_MUL:
    399                    res.f128 = float128_mul(a, b, &soft_status);
    400                    break;
    401                case OP_DIV:
    402                    res.f128 = float128_div(a, b, &soft_status);
    403                    break;
    404                case OP_FMA:
    405                    res.f128 = float128_muladd(a, b, c, 0, &soft_status);
    406                    break;
    407                case OP_SQRT:
    408                    res.f128 = float128_sqrt(a, &soft_status);
    409                    break;
    410                case OP_CMP:
    411                    res.u64 = float128_compare_quiet(a, b, &soft_status);
    412                    break;
    413                default:
    414                    g_assert_not_reached();
    415                }
    416            }
    417            break;
    418        default:
    419            g_assert_not_reached();
    420        }
    421        ns_elapsed += get_clock() - t0;
    422        n_completed_ops += OPS_PER_ITER;
    423    }
    424}
    425
    426#define GEN_BENCH(name, type, prec, op, n_ops)          \
    427    static void __attribute__((flatten)) name(void)     \
    428    {                                                   \
    429        bench(prec, op, n_ops, false);                  \
    430    }
    431
    432#define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops)   \
    433    static void __attribute__((flatten)) name(void)     \
    434    {                                                   \
    435        bench(prec, op, n_ops, true);                   \
    436    }
    437
    438#define GEN_BENCH_ALL_TYPES(opname, op, n_ops)                          \
    439    GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
    440    GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
    441    GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
    442    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
    443    GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
    444
    445GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
    446GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
    447GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
    448GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
    449GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
    450GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
    451#undef GEN_BENCH_ALL_TYPES
    452
    453#define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n)                         \
    454    GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
    455    GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
    456    GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
    457    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
    458    GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
    459
    460GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
    461#undef GEN_BENCH_ALL_TYPES_NO_NEG
    462
    463#undef GEN_BENCH_NO_NEG
    464#undef GEN_BENCH
    465
    466#define GEN_BENCH_FUNCS(opname, op)                             \
    467    [op] = {                                                    \
    468        [PREC_SINGLE]    = bench_ ## opname ## _float,          \
    469        [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
    470        [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
    471        [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
    472        [PREC_FLOAT128]   = bench_ ## opname ## _float128,      \
    473    }
    474
    475static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
    476    GEN_BENCH_FUNCS(add, OP_ADD),
    477    GEN_BENCH_FUNCS(sub, OP_SUB),
    478    GEN_BENCH_FUNCS(mul, OP_MUL),
    479    GEN_BENCH_FUNCS(div, OP_DIV),
    480    GEN_BENCH_FUNCS(fma, OP_FMA),
    481    GEN_BENCH_FUNCS(sqrt, OP_SQRT),
    482    GEN_BENCH_FUNCS(cmp, OP_CMP),
    483};
    484
    485#undef GEN_BENCH_FUNCS
    486
    487static void run_bench(void)
    488{
    489    bench_func_t f;
    490
    491    f = bench_funcs[operation][precision];
    492    g_assert(f);
    493    f();
    494}
    495
    496/* @arr must be NULL-terminated */
    497static int find_name(const char * const *arr, const char *name)
    498{
    499    int i;
    500
    501    for (i = 0; arr[i] != NULL; i++) {
    502        if (strcmp(name, arr[i]) == 0) {
    503            return i;
    504        }
    505    }
    506    return -1;
    507}
    508
    509static void usage_complete(int argc, char *argv[])
    510{
    511    gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
    512    gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
    513
    514    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
    515    fprintf(stderr, "options:\n");
    516    fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
    517            DEFAULT_DURATION_SECS);
    518    fprintf(stderr, " -h = show this help message.\n");
    519    fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
    520            op_list, op_names[0]);
    521    fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
    522            "Default: single\n");
    523    fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
    524            "Default: even\n");
    525    fprintf(stderr, " -t = tester (%s). Default: %s\n",
    526            tester_list, tester_names[0]);
    527    fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
    528            "Default: disabled\n");
    529    fprintf(stderr, " -Z = flush output to zero (soft tester only). "
    530            "Default: disabled\n");
    531
    532    g_free(tester_list);
    533    g_free(op_list);
    534}
    535
    536static int round_name_to_mode(const char *name)
    537{
    538    int i;
    539
    540    for (i = 0; i < N_ROUND_MODES; i++) {
    541        if (!strcmp(round_names[i], name)) {
    542            return i;
    543        }
    544    }
    545    return -1;
    546}
    547
    548static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
    549{
    550    fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
    551            round_names[rounding]);
    552    exit(EXIT_FAILURE);
    553}
    554
    555static void set_host_precision(enum rounding rounding)
    556{
    557    int rhost;
    558
    559    switch (rounding) {
    560    case ROUND_EVEN:
    561        rhost = FE_TONEAREST;
    562        break;
    563    case ROUND_ZERO:
    564        rhost = FE_TOWARDZERO;
    565        break;
    566    case ROUND_DOWN:
    567        rhost = FE_DOWNWARD;
    568        break;
    569    case ROUND_UP:
    570        rhost = FE_UPWARD;
    571        break;
    572    case ROUND_TIEAWAY:
    573        die_host_rounding(rounding);
    574        return;
    575    default:
    576        g_assert_not_reached();
    577    }
    578
    579    if (fesetround(rhost)) {
    580        die_host_rounding(rounding);
    581    }
    582}
    583
    584static void set_soft_precision(enum rounding rounding)
    585{
    586    signed char mode;
    587
    588    switch (rounding) {
    589    case ROUND_EVEN:
    590        mode = float_round_nearest_even;
    591        break;
    592    case ROUND_ZERO:
    593        mode = float_round_to_zero;
    594        break;
    595    case ROUND_DOWN:
    596        mode = float_round_down;
    597        break;
    598    case ROUND_UP:
    599        mode = float_round_up;
    600        break;
    601    case ROUND_TIEAWAY:
    602        mode = float_round_ties_away;
    603        break;
    604    default:
    605        g_assert_not_reached();
    606    }
    607    soft_status.float_rounding_mode = mode;
    608}
    609
    610static void parse_args(int argc, char *argv[])
    611{
    612    int c;
    613    int val;
    614    int rounding = ROUND_EVEN;
    615
    616    for (;;) {
    617        c = getopt(argc, argv, "d:ho:p:r:t:zZ");
    618        if (c < 0) {
    619            break;
    620        }
    621        switch (c) {
    622        case 'd':
    623            duration = atoi(optarg);
    624            break;
    625        case 'h':
    626            usage_complete(argc, argv);
    627            exit(EXIT_SUCCESS);
    628        case 'o':
    629            val = find_name(op_names, optarg);
    630            if (val < 0) {
    631                fprintf(stderr, "Unsupported op '%s'\n", optarg);
    632                exit(EXIT_FAILURE);
    633            }
    634            operation = val;
    635            break;
    636        case 'p':
    637            if (!strcmp(optarg, "single")) {
    638                precision = PREC_SINGLE;
    639            } else if (!strcmp(optarg, "double")) {
    640                precision = PREC_DOUBLE;
    641            } else if (!strcmp(optarg, "quad")) {
    642                precision = PREC_QUAD;
    643            } else {
    644                fprintf(stderr, "Unsupported precision '%s'\n", optarg);
    645                exit(EXIT_FAILURE);
    646            }
    647            break;
    648        case 'r':
    649            rounding = round_name_to_mode(optarg);
    650            if (rounding < 0) {
    651                fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
    652                exit(EXIT_FAILURE);
    653            }
    654            break;
    655        case 't':
    656            val = find_name(tester_names, optarg);
    657            if (val < 0) {
    658                fprintf(stderr, "Unsupported tester '%s'\n", optarg);
    659                exit(EXIT_FAILURE);
    660            }
    661            tester = val;
    662            break;
    663        case 'z':
    664            soft_status.flush_inputs_to_zero = 1;
    665            break;
    666        case 'Z':
    667            soft_status.flush_to_zero = 1;
    668            break;
    669        }
    670    }
    671
    672    /* set precision and rounding mode based on the tester */
    673    switch (tester) {
    674    case TESTER_HOST:
    675        set_host_precision(rounding);
    676        break;
    677    case TESTER_SOFT:
    678        set_soft_precision(rounding);
    679        switch (precision) {
    680        case PREC_SINGLE:
    681            precision = PREC_FLOAT32;
    682            break;
    683        case PREC_DOUBLE:
    684            precision = PREC_FLOAT64;
    685            break;
    686        case PREC_QUAD:
    687            precision = PREC_FLOAT128;
    688            break;
    689        default:
    690            g_assert_not_reached();
    691        }
    692        break;
    693    default:
    694        g_assert_not_reached();
    695    }
    696}
    697
    698static void pr_stats(void)
    699{
    700    printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
    701}
    702
    703int main(int argc, char *argv[])
    704{
    705    parse_args(argc, argv);
    706    run_bench();
    707    pr_stats();
    708    return 0;
    709}