cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

optimize.c (52095B)


      1/*
      2 * Optimizations for Tiny Code Generator for QEMU
      3 *
      4 * Copyright (c) 2010 Samsung Electronics.
      5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
      6 *
      7 * Permission is hereby granted, free of charge, to any person obtaining a copy
      8 * of this software and associated documentation files (the "Software"), to deal
      9 * in the Software without restriction, including without limitation the rights
     10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11 * copies of the Software, and to permit persons to whom the Software is
     12 * furnished to do so, subject to the following conditions:
     13 *
     14 * The above copyright notice and this permission notice shall be included in
     15 * all copies or substantial portions of the Software.
     16 *
     17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23 * THE SOFTWARE.
     24 */
     25
     26#include "qemu/osdep.h"
     27#include "tcg/tcg-op.h"
     28#include "tcg-internal.h"
     29
     30#define CASE_OP_32_64(x)                        \
     31        glue(glue(case INDEX_op_, x), _i32):    \
     32        glue(glue(case INDEX_op_, x), _i64)
     33
     34#define CASE_OP_32_64_VEC(x)                    \
     35        glue(glue(case INDEX_op_, x), _i32):    \
     36        glue(glue(case INDEX_op_, x), _i64):    \
     37        glue(glue(case INDEX_op_, x), _vec)
     38
     39typedef struct TempOptInfo {
     40    bool is_const;
     41    TCGTemp *prev_copy;
     42    TCGTemp *next_copy;
     43    uint64_t val;
     44    uint64_t mask;
     45} TempOptInfo;
     46
     47static inline TempOptInfo *ts_info(TCGTemp *ts)
     48{
     49    return ts->state_ptr;
     50}
     51
     52static inline TempOptInfo *arg_info(TCGArg arg)
     53{
     54    return ts_info(arg_temp(arg));
     55}
     56
     57static inline bool ts_is_const(TCGTemp *ts)
     58{
     59    return ts_info(ts)->is_const;
     60}
     61
     62static inline bool arg_is_const(TCGArg arg)
     63{
     64    return ts_is_const(arg_temp(arg));
     65}
     66
     67static inline bool ts_is_copy(TCGTemp *ts)
     68{
     69    return ts_info(ts)->next_copy != ts;
     70}
     71
     72/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
     73static void reset_ts(TCGTemp *ts)
     74{
     75    TempOptInfo *ti = ts_info(ts);
     76    TempOptInfo *pi = ts_info(ti->prev_copy);
     77    TempOptInfo *ni = ts_info(ti->next_copy);
     78
     79    ni->prev_copy = ti->prev_copy;
     80    pi->next_copy = ti->next_copy;
     81    ti->next_copy = ts;
     82    ti->prev_copy = ts;
     83    ti->is_const = false;
     84    ti->mask = -1;
     85}
     86
     87static void reset_temp(TCGArg arg)
     88{
     89    reset_ts(arg_temp(arg));
     90}
     91
     92/* Initialize and activate a temporary.  */
     93static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
     94{
     95    size_t idx = temp_idx(ts);
     96    TempOptInfo *ti;
     97
     98    if (test_bit(idx, temps_used->l)) {
     99        return;
    100    }
    101    set_bit(idx, temps_used->l);
    102
    103    ti = ts->state_ptr;
    104    if (ti == NULL) {
    105        ti = tcg_malloc(sizeof(TempOptInfo));
    106        ts->state_ptr = ti;
    107    }
    108
    109    ti->next_copy = ts;
    110    ti->prev_copy = ts;
    111    if (ts->kind == TEMP_CONST) {
    112        ti->is_const = true;
    113        ti->val = ts->val;
    114        ti->mask = ts->val;
    115        if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
    116            /* High bits of a 32-bit quantity are garbage.  */
    117            ti->mask |= ~0xffffffffull;
    118        }
    119    } else {
    120        ti->is_const = false;
    121        ti->mask = -1;
    122    }
    123}
    124
    125static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
    126{
    127    init_ts_info(temps_used, arg_temp(arg));
    128}
    129
    130static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
    131{
    132    TCGTemp *i, *g, *l;
    133
    134    /* If this is already readonly, we can't do better. */
    135    if (temp_readonly(ts)) {
    136        return ts;
    137    }
    138
    139    g = l = NULL;
    140    for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
    141        if (temp_readonly(i)) {
    142            return i;
    143        } else if (i->kind > ts->kind) {
    144            if (i->kind == TEMP_GLOBAL) {
    145                g = i;
    146            } else if (i->kind == TEMP_LOCAL) {
    147                l = i;
    148            }
    149        }
    150    }
    151
    152    /* If we didn't find a better representation, return the same temp. */
    153    return g ? g : l ? l : ts;
    154}
    155
    156static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
    157{
    158    TCGTemp *i;
    159
    160    if (ts1 == ts2) {
    161        return true;
    162    }
    163
    164    if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
    165        return false;
    166    }
    167
    168    for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
    169        if (i == ts2) {
    170            return true;
    171        }
    172    }
    173
    174    return false;
    175}
    176
    177static bool args_are_copies(TCGArg arg1, TCGArg arg2)
    178{
    179    return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
    180}
    181
    182static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
    183{
    184    TCGTemp *dst_ts = arg_temp(dst);
    185    TCGTemp *src_ts = arg_temp(src);
    186    const TCGOpDef *def;
    187    TempOptInfo *di;
    188    TempOptInfo *si;
    189    uint64_t mask;
    190    TCGOpcode new_op;
    191
    192    if (ts_are_copies(dst_ts, src_ts)) {
    193        tcg_op_remove(s, op);
    194        return;
    195    }
    196
    197    reset_ts(dst_ts);
    198    di = ts_info(dst_ts);
    199    si = ts_info(src_ts);
    200    def = &tcg_op_defs[op->opc];
    201    if (def->flags & TCG_OPF_VECTOR) {
    202        new_op = INDEX_op_mov_vec;
    203    } else if (def->flags & TCG_OPF_64BIT) {
    204        new_op = INDEX_op_mov_i64;
    205    } else {
    206        new_op = INDEX_op_mov_i32;
    207    }
    208    op->opc = new_op;
    209    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
    210    op->args[0] = dst;
    211    op->args[1] = src;
    212
    213    mask = si->mask;
    214    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
    215        /* High bits of the destination are now garbage.  */
    216        mask |= ~0xffffffffull;
    217    }
    218    di->mask = mask;
    219
    220    if (src_ts->type == dst_ts->type) {
    221        TempOptInfo *ni = ts_info(si->next_copy);
    222
    223        di->next_copy = si->next_copy;
    224        di->prev_copy = src_ts;
    225        ni->prev_copy = dst_ts;
    226        si->next_copy = dst_ts;
    227        di->is_const = si->is_const;
    228        di->val = si->val;
    229    }
    230}
    231
    232static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
    233                             TCGOp *op, TCGArg dst, uint64_t val)
    234{
    235    const TCGOpDef *def = &tcg_op_defs[op->opc];
    236    TCGType type;
    237    TCGTemp *tv;
    238
    239    if (def->flags & TCG_OPF_VECTOR) {
    240        type = TCGOP_VECL(op) + TCG_TYPE_V64;
    241    } else if (def->flags & TCG_OPF_64BIT) {
    242        type = TCG_TYPE_I64;
    243    } else {
    244        type = TCG_TYPE_I32;
    245    }
    246
    247    /* Convert movi to mov with constant temp. */
    248    tv = tcg_constant_internal(type, val);
    249    init_ts_info(temps_used, tv);
    250    tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
    251}
    252
    253static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
    254{
    255    uint64_t l64, h64;
    256
    257    switch (op) {
    258    CASE_OP_32_64(add):
    259        return x + y;
    260
    261    CASE_OP_32_64(sub):
    262        return x - y;
    263
    264    CASE_OP_32_64(mul):
    265        return x * y;
    266
    267    CASE_OP_32_64(and):
    268        return x & y;
    269
    270    CASE_OP_32_64(or):
    271        return x | y;
    272
    273    CASE_OP_32_64(xor):
    274        return x ^ y;
    275
    276    case INDEX_op_shl_i32:
    277        return (uint32_t)x << (y & 31);
    278
    279    case INDEX_op_shl_i64:
    280        return (uint64_t)x << (y & 63);
    281
    282    case INDEX_op_shr_i32:
    283        return (uint32_t)x >> (y & 31);
    284
    285    case INDEX_op_shr_i64:
    286        return (uint64_t)x >> (y & 63);
    287
    288    case INDEX_op_sar_i32:
    289        return (int32_t)x >> (y & 31);
    290
    291    case INDEX_op_sar_i64:
    292        return (int64_t)x >> (y & 63);
    293
    294    case INDEX_op_rotr_i32:
    295        return ror32(x, y & 31);
    296
    297    case INDEX_op_rotr_i64:
    298        return ror64(x, y & 63);
    299
    300    case INDEX_op_rotl_i32:
    301        return rol32(x, y & 31);
    302
    303    case INDEX_op_rotl_i64:
    304        return rol64(x, y & 63);
    305
    306    CASE_OP_32_64(not):
    307        return ~x;
    308
    309    CASE_OP_32_64(neg):
    310        return -x;
    311
    312    CASE_OP_32_64(andc):
    313        return x & ~y;
    314
    315    CASE_OP_32_64(orc):
    316        return x | ~y;
    317
    318    CASE_OP_32_64(eqv):
    319        return ~(x ^ y);
    320
    321    CASE_OP_32_64(nand):
    322        return ~(x & y);
    323
    324    CASE_OP_32_64(nor):
    325        return ~(x | y);
    326
    327    case INDEX_op_clz_i32:
    328        return (uint32_t)x ? clz32(x) : y;
    329
    330    case INDEX_op_clz_i64:
    331        return x ? clz64(x) : y;
    332
    333    case INDEX_op_ctz_i32:
    334        return (uint32_t)x ? ctz32(x) : y;
    335
    336    case INDEX_op_ctz_i64:
    337        return x ? ctz64(x) : y;
    338
    339    case INDEX_op_ctpop_i32:
    340        return ctpop32(x);
    341
    342    case INDEX_op_ctpop_i64:
    343        return ctpop64(x);
    344
    345    CASE_OP_32_64(ext8s):
    346        return (int8_t)x;
    347
    348    CASE_OP_32_64(ext16s):
    349        return (int16_t)x;
    350
    351    CASE_OP_32_64(ext8u):
    352        return (uint8_t)x;
    353
    354    CASE_OP_32_64(ext16u):
    355        return (uint16_t)x;
    356
    357    CASE_OP_32_64(bswap16):
    358        x = bswap16(x);
    359        return y & TCG_BSWAP_OS ? (int16_t)x : x;
    360
    361    CASE_OP_32_64(bswap32):
    362        x = bswap32(x);
    363        return y & TCG_BSWAP_OS ? (int32_t)x : x;
    364
    365    case INDEX_op_bswap64_i64:
    366        return bswap64(x);
    367
    368    case INDEX_op_ext_i32_i64:
    369    case INDEX_op_ext32s_i64:
    370        return (int32_t)x;
    371
    372    case INDEX_op_extu_i32_i64:
    373    case INDEX_op_extrl_i64_i32:
    374    case INDEX_op_ext32u_i64:
    375        return (uint32_t)x;
    376
    377    case INDEX_op_extrh_i64_i32:
    378        return (uint64_t)x >> 32;
    379
    380    case INDEX_op_muluh_i32:
    381        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
    382    case INDEX_op_mulsh_i32:
    383        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
    384
    385    case INDEX_op_muluh_i64:
    386        mulu64(&l64, &h64, x, y);
    387        return h64;
    388    case INDEX_op_mulsh_i64:
    389        muls64(&l64, &h64, x, y);
    390        return h64;
    391
    392    case INDEX_op_div_i32:
    393        /* Avoid crashing on divide by zero, otherwise undefined.  */
    394        return (int32_t)x / ((int32_t)y ? : 1);
    395    case INDEX_op_divu_i32:
    396        return (uint32_t)x / ((uint32_t)y ? : 1);
    397    case INDEX_op_div_i64:
    398        return (int64_t)x / ((int64_t)y ? : 1);
    399    case INDEX_op_divu_i64:
    400        return (uint64_t)x / ((uint64_t)y ? : 1);
    401
    402    case INDEX_op_rem_i32:
    403        return (int32_t)x % ((int32_t)y ? : 1);
    404    case INDEX_op_remu_i32:
    405        return (uint32_t)x % ((uint32_t)y ? : 1);
    406    case INDEX_op_rem_i64:
    407        return (int64_t)x % ((int64_t)y ? : 1);
    408    case INDEX_op_remu_i64:
    409        return (uint64_t)x % ((uint64_t)y ? : 1);
    410
    411    default:
    412        fprintf(stderr,
    413                "Unrecognized operation %d in do_constant_folding.\n", op);
    414        tcg_abort();
    415    }
    416}
    417
    418static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
    419{
    420    const TCGOpDef *def = &tcg_op_defs[op];
    421    uint64_t res = do_constant_folding_2(op, x, y);
    422    if (!(def->flags & TCG_OPF_64BIT)) {
    423        res = (int32_t)res;
    424    }
    425    return res;
    426}
    427
    428static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
    429{
    430    switch (c) {
    431    case TCG_COND_EQ:
    432        return x == y;
    433    case TCG_COND_NE:
    434        return x != y;
    435    case TCG_COND_LT:
    436        return (int32_t)x < (int32_t)y;
    437    case TCG_COND_GE:
    438        return (int32_t)x >= (int32_t)y;
    439    case TCG_COND_LE:
    440        return (int32_t)x <= (int32_t)y;
    441    case TCG_COND_GT:
    442        return (int32_t)x > (int32_t)y;
    443    case TCG_COND_LTU:
    444        return x < y;
    445    case TCG_COND_GEU:
    446        return x >= y;
    447    case TCG_COND_LEU:
    448        return x <= y;
    449    case TCG_COND_GTU:
    450        return x > y;
    451    default:
    452        tcg_abort();
    453    }
    454}
    455
    456static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
    457{
    458    switch (c) {
    459    case TCG_COND_EQ:
    460        return x == y;
    461    case TCG_COND_NE:
    462        return x != y;
    463    case TCG_COND_LT:
    464        return (int64_t)x < (int64_t)y;
    465    case TCG_COND_GE:
    466        return (int64_t)x >= (int64_t)y;
    467    case TCG_COND_LE:
    468        return (int64_t)x <= (int64_t)y;
    469    case TCG_COND_GT:
    470        return (int64_t)x > (int64_t)y;
    471    case TCG_COND_LTU:
    472        return x < y;
    473    case TCG_COND_GEU:
    474        return x >= y;
    475    case TCG_COND_LEU:
    476        return x <= y;
    477    case TCG_COND_GTU:
    478        return x > y;
    479    default:
    480        tcg_abort();
    481    }
    482}
    483
    484static bool do_constant_folding_cond_eq(TCGCond c)
    485{
    486    switch (c) {
    487    case TCG_COND_GT:
    488    case TCG_COND_LTU:
    489    case TCG_COND_LT:
    490    case TCG_COND_GTU:
    491    case TCG_COND_NE:
    492        return 0;
    493    case TCG_COND_GE:
    494    case TCG_COND_GEU:
    495    case TCG_COND_LE:
    496    case TCG_COND_LEU:
    497    case TCG_COND_EQ:
    498        return 1;
    499    default:
    500        tcg_abort();
    501    }
    502}
    503
    504/* Return 2 if the condition can't be simplified, and the result
    505   of the condition (0 or 1) if it can */
    506static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
    507                                       TCGArg y, TCGCond c)
    508{
    509    uint64_t xv = arg_info(x)->val;
    510    uint64_t yv = arg_info(y)->val;
    511
    512    if (arg_is_const(x) && arg_is_const(y)) {
    513        const TCGOpDef *def = &tcg_op_defs[op];
    514        tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
    515        if (def->flags & TCG_OPF_64BIT) {
    516            return do_constant_folding_cond_64(xv, yv, c);
    517        } else {
    518            return do_constant_folding_cond_32(xv, yv, c);
    519        }
    520    } else if (args_are_copies(x, y)) {
    521        return do_constant_folding_cond_eq(c);
    522    } else if (arg_is_const(y) && yv == 0) {
    523        switch (c) {
    524        case TCG_COND_LTU:
    525            return 0;
    526        case TCG_COND_GEU:
    527            return 1;
    528        default:
    529            return 2;
    530        }
    531    }
    532    return 2;
    533}
    534
    535/* Return 2 if the condition can't be simplified, and the result
    536   of the condition (0 or 1) if it can */
    537static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
    538{
    539    TCGArg al = p1[0], ah = p1[1];
    540    TCGArg bl = p2[0], bh = p2[1];
    541
    542    if (arg_is_const(bl) && arg_is_const(bh)) {
    543        tcg_target_ulong blv = arg_info(bl)->val;
    544        tcg_target_ulong bhv = arg_info(bh)->val;
    545        uint64_t b = deposit64(blv, 32, 32, bhv);
    546
    547        if (arg_is_const(al) && arg_is_const(ah)) {
    548            tcg_target_ulong alv = arg_info(al)->val;
    549            tcg_target_ulong ahv = arg_info(ah)->val;
    550            uint64_t a = deposit64(alv, 32, 32, ahv);
    551            return do_constant_folding_cond_64(a, b, c);
    552        }
    553        if (b == 0) {
    554            switch (c) {
    555            case TCG_COND_LTU:
    556                return 0;
    557            case TCG_COND_GEU:
    558                return 1;
    559            default:
    560                break;
    561            }
    562        }
    563    }
    564    if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
    565        return do_constant_folding_cond_eq(c);
    566    }
    567    return 2;
    568}
    569
    570static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
    571{
    572    TCGArg a1 = *p1, a2 = *p2;
    573    int sum = 0;
    574    sum += arg_is_const(a1);
    575    sum -= arg_is_const(a2);
    576
    577    /* Prefer the constant in second argument, and then the form
    578       op a, a, b, which is better handled on non-RISC hosts. */
    579    if (sum > 0 || (sum == 0 && dest == a2)) {
    580        *p1 = a2;
    581        *p2 = a1;
    582        return true;
    583    }
    584    return false;
    585}
    586
    587static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
    588{
    589    int sum = 0;
    590    sum += arg_is_const(p1[0]);
    591    sum += arg_is_const(p1[1]);
    592    sum -= arg_is_const(p2[0]);
    593    sum -= arg_is_const(p2[1]);
    594    if (sum > 0) {
    595        TCGArg t;
    596        t = p1[0], p1[0] = p2[0], p2[0] = t;
    597        t = p1[1], p1[1] = p2[1], p2[1] = t;
    598        return true;
    599    }
    600    return false;
    601}
    602
    603/* Propagate constants and copies, fold constant expressions. */
    604void tcg_optimize(TCGContext *s)
    605{
    606    int nb_temps, nb_globals, i;
    607    TCGOp *op, *op_next, *prev_mb = NULL;
    608    TCGTempSet temps_used;
    609
    610    /* Array VALS has an element for each temp.
    611       If this temp holds a constant then its value is kept in VALS' element.
    612       If this temp is a copy of other ones then the other copies are
    613       available through the doubly linked circular list. */
    614
    615    nb_temps = s->nb_temps;
    616    nb_globals = s->nb_globals;
    617
    618    memset(&temps_used, 0, sizeof(temps_used));
    619    for (i = 0; i < nb_temps; ++i) {
    620        s->temps[i].state_ptr = NULL;
    621    }
    622
    623    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
    624        uint64_t mask, partmask, affected, tmp;
    625        int nb_oargs, nb_iargs;
    626        TCGOpcode opc = op->opc;
    627        const TCGOpDef *def = &tcg_op_defs[opc];
    628
    629        /* Count the arguments, and initialize the temps that are
    630           going to be used */
    631        if (opc == INDEX_op_call) {
    632            nb_oargs = TCGOP_CALLO(op);
    633            nb_iargs = TCGOP_CALLI(op);
    634            for (i = 0; i < nb_oargs + nb_iargs; i++) {
    635                TCGTemp *ts = arg_temp(op->args[i]);
    636                if (ts) {
    637                    init_ts_info(&temps_used, ts);
    638                }
    639            }
    640        } else {
    641            nb_oargs = def->nb_oargs;
    642            nb_iargs = def->nb_iargs;
    643            for (i = 0; i < nb_oargs + nb_iargs; i++) {
    644                init_arg_info(&temps_used, op->args[i]);
    645            }
    646        }
    647
    648        /* Do copy propagation */
    649        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
    650            TCGTemp *ts = arg_temp(op->args[i]);
    651            if (ts && ts_is_copy(ts)) {
    652                op->args[i] = temp_arg(find_better_copy(s, ts));
    653            }
    654        }
    655
    656        /* For commutative operations make constant second argument */
    657        switch (opc) {
    658        CASE_OP_32_64_VEC(add):
    659        CASE_OP_32_64_VEC(mul):
    660        CASE_OP_32_64_VEC(and):
    661        CASE_OP_32_64_VEC(or):
    662        CASE_OP_32_64_VEC(xor):
    663        CASE_OP_32_64(eqv):
    664        CASE_OP_32_64(nand):
    665        CASE_OP_32_64(nor):
    666        CASE_OP_32_64(muluh):
    667        CASE_OP_32_64(mulsh):
    668            swap_commutative(op->args[0], &op->args[1], &op->args[2]);
    669            break;
    670        CASE_OP_32_64(brcond):
    671            if (swap_commutative(-1, &op->args[0], &op->args[1])) {
    672                op->args[2] = tcg_swap_cond(op->args[2]);
    673            }
    674            break;
    675        CASE_OP_32_64(setcond):
    676            if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
    677                op->args[3] = tcg_swap_cond(op->args[3]);
    678            }
    679            break;
    680        CASE_OP_32_64(movcond):
    681            if (swap_commutative(-1, &op->args[1], &op->args[2])) {
    682                op->args[5] = tcg_swap_cond(op->args[5]);
    683            }
    684            /* For movcond, we canonicalize the "false" input reg to match
    685               the destination reg so that the tcg backend can implement
    686               a "move if true" operation.  */
    687            if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
    688                op->args[5] = tcg_invert_cond(op->args[5]);
    689            }
    690            break;
    691        CASE_OP_32_64(add2):
    692            swap_commutative(op->args[0], &op->args[2], &op->args[4]);
    693            swap_commutative(op->args[1], &op->args[3], &op->args[5]);
    694            break;
    695        CASE_OP_32_64(mulu2):
    696        CASE_OP_32_64(muls2):
    697            swap_commutative(op->args[0], &op->args[2], &op->args[3]);
    698            break;
    699        case INDEX_op_brcond2_i32:
    700            if (swap_commutative2(&op->args[0], &op->args[2])) {
    701                op->args[4] = tcg_swap_cond(op->args[4]);
    702            }
    703            break;
    704        case INDEX_op_setcond2_i32:
    705            if (swap_commutative2(&op->args[1], &op->args[3])) {
    706                op->args[5] = tcg_swap_cond(op->args[5]);
    707            }
    708            break;
    709        default:
    710            break;
    711        }
    712
    713        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
    714           and "sub r, 0, a => neg r, a" case.  */
    715        switch (opc) {
    716        CASE_OP_32_64(shl):
    717        CASE_OP_32_64(shr):
    718        CASE_OP_32_64(sar):
    719        CASE_OP_32_64(rotl):
    720        CASE_OP_32_64(rotr):
    721            if (arg_is_const(op->args[1])
    722                && arg_info(op->args[1])->val == 0) {
    723                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
    724                continue;
    725            }
    726            break;
    727        CASE_OP_32_64_VEC(sub):
    728            {
    729                TCGOpcode neg_op;
    730                bool have_neg;
    731
    732                if (arg_is_const(op->args[2])) {
    733                    /* Proceed with possible constant folding. */
    734                    break;
    735                }
    736                if (opc == INDEX_op_sub_i32) {
    737                    neg_op = INDEX_op_neg_i32;
    738                    have_neg = TCG_TARGET_HAS_neg_i32;
    739                } else if (opc == INDEX_op_sub_i64) {
    740                    neg_op = INDEX_op_neg_i64;
    741                    have_neg = TCG_TARGET_HAS_neg_i64;
    742                } else if (TCG_TARGET_HAS_neg_vec) {
    743                    TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
    744                    unsigned vece = TCGOP_VECE(op);
    745                    neg_op = INDEX_op_neg_vec;
    746                    have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
    747                } else {
    748                    break;
    749                }
    750                if (!have_neg) {
    751                    break;
    752                }
    753                if (arg_is_const(op->args[1])
    754                    && arg_info(op->args[1])->val == 0) {
    755                    op->opc = neg_op;
    756                    reset_temp(op->args[0]);
    757                    op->args[1] = op->args[2];
    758                    continue;
    759                }
    760            }
    761            break;
    762        CASE_OP_32_64_VEC(xor):
    763        CASE_OP_32_64(nand):
    764            if (!arg_is_const(op->args[1])
    765                && arg_is_const(op->args[2])
    766                && arg_info(op->args[2])->val == -1) {
    767                i = 1;
    768                goto try_not;
    769            }
    770            break;
    771        CASE_OP_32_64(nor):
    772            if (!arg_is_const(op->args[1])
    773                && arg_is_const(op->args[2])
    774                && arg_info(op->args[2])->val == 0) {
    775                i = 1;
    776                goto try_not;
    777            }
    778            break;
    779        CASE_OP_32_64_VEC(andc):
    780            if (!arg_is_const(op->args[2])
    781                && arg_is_const(op->args[1])
    782                && arg_info(op->args[1])->val == -1) {
    783                i = 2;
    784                goto try_not;
    785            }
    786            break;
    787        CASE_OP_32_64_VEC(orc):
    788        CASE_OP_32_64(eqv):
    789            if (!arg_is_const(op->args[2])
    790                && arg_is_const(op->args[1])
    791                && arg_info(op->args[1])->val == 0) {
    792                i = 2;
    793                goto try_not;
    794            }
    795            break;
    796        try_not:
    797            {
    798                TCGOpcode not_op;
    799                bool have_not;
    800
    801                if (def->flags & TCG_OPF_VECTOR) {
    802                    not_op = INDEX_op_not_vec;
    803                    have_not = TCG_TARGET_HAS_not_vec;
    804                } else if (def->flags & TCG_OPF_64BIT) {
    805                    not_op = INDEX_op_not_i64;
    806                    have_not = TCG_TARGET_HAS_not_i64;
    807                } else {
    808                    not_op = INDEX_op_not_i32;
    809                    have_not = TCG_TARGET_HAS_not_i32;
    810                }
    811                if (!have_not) {
    812                    break;
    813                }
    814                op->opc = not_op;
    815                reset_temp(op->args[0]);
    816                op->args[1] = op->args[i];
    817                continue;
    818            }
    819        default:
    820            break;
    821        }
    822
    823        /* Simplify expression for "op r, a, const => mov r, a" cases */
    824        switch (opc) {
    825        CASE_OP_32_64_VEC(add):
    826        CASE_OP_32_64_VEC(sub):
    827        CASE_OP_32_64_VEC(or):
    828        CASE_OP_32_64_VEC(xor):
    829        CASE_OP_32_64_VEC(andc):
    830        CASE_OP_32_64(shl):
    831        CASE_OP_32_64(shr):
    832        CASE_OP_32_64(sar):
    833        CASE_OP_32_64(rotl):
    834        CASE_OP_32_64(rotr):
    835            if (!arg_is_const(op->args[1])
    836                && arg_is_const(op->args[2])
    837                && arg_info(op->args[2])->val == 0) {
    838                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
    839                continue;
    840            }
    841            break;
    842        CASE_OP_32_64_VEC(and):
    843        CASE_OP_32_64_VEC(orc):
    844        CASE_OP_32_64(eqv):
    845            if (!arg_is_const(op->args[1])
    846                && arg_is_const(op->args[2])
    847                && arg_info(op->args[2])->val == -1) {
    848                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
    849                continue;
    850            }
    851            break;
    852        default:
    853            break;
    854        }
    855
    856        /* Simplify using known-zero bits. Currently only ops with a single
    857           output argument is supported. */
    858        mask = -1;
    859        affected = -1;
    860        switch (opc) {
    861        CASE_OP_32_64(ext8s):
    862            if ((arg_info(op->args[1])->mask & 0x80) != 0) {
    863                break;
    864            }
    865            QEMU_FALLTHROUGH;
    866        CASE_OP_32_64(ext8u):
    867            mask = 0xff;
    868            goto and_const;
    869        CASE_OP_32_64(ext16s):
    870            if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
    871                break;
    872            }
    873            QEMU_FALLTHROUGH;
    874        CASE_OP_32_64(ext16u):
    875            mask = 0xffff;
    876            goto and_const;
    877        case INDEX_op_ext32s_i64:
    878            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
    879                break;
    880            }
    881            QEMU_FALLTHROUGH;
    882        case INDEX_op_ext32u_i64:
    883            mask = 0xffffffffU;
    884            goto and_const;
    885
    886        CASE_OP_32_64(and):
    887            mask = arg_info(op->args[2])->mask;
    888            if (arg_is_const(op->args[2])) {
    889        and_const:
    890                affected = arg_info(op->args[1])->mask & ~mask;
    891            }
    892            mask = arg_info(op->args[1])->mask & mask;
    893            break;
    894
    895        case INDEX_op_ext_i32_i64:
    896            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
    897                break;
    898            }
    899            QEMU_FALLTHROUGH;
    900        case INDEX_op_extu_i32_i64:
    901            /* We do not compute affected as it is a size changing op.  */
    902            mask = (uint32_t)arg_info(op->args[1])->mask;
    903            break;
    904
    905        CASE_OP_32_64(andc):
    906            /* Known-zeros does not imply known-ones.  Therefore unless
    907               op->args[2] is constant, we can't infer anything from it.  */
    908            if (arg_is_const(op->args[2])) {
    909                mask = ~arg_info(op->args[2])->mask;
    910                goto and_const;
    911            }
    912            /* But we certainly know nothing outside args[1] may be set. */
    913            mask = arg_info(op->args[1])->mask;
    914            break;
    915
    916        case INDEX_op_sar_i32:
    917            if (arg_is_const(op->args[2])) {
    918                tmp = arg_info(op->args[2])->val & 31;
    919                mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
    920            }
    921            break;
    922        case INDEX_op_sar_i64:
    923            if (arg_is_const(op->args[2])) {
    924                tmp = arg_info(op->args[2])->val & 63;
    925                mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
    926            }
    927            break;
    928
    929        case INDEX_op_shr_i32:
    930            if (arg_is_const(op->args[2])) {
    931                tmp = arg_info(op->args[2])->val & 31;
    932                mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
    933            }
    934            break;
    935        case INDEX_op_shr_i64:
    936            if (arg_is_const(op->args[2])) {
    937                tmp = arg_info(op->args[2])->val & 63;
    938                mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
    939            }
    940            break;
    941
    942        case INDEX_op_extrl_i64_i32:
    943            mask = (uint32_t)arg_info(op->args[1])->mask;
    944            break;
    945        case INDEX_op_extrh_i64_i32:
    946            mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
    947            break;
    948
    949        CASE_OP_32_64(shl):
    950            if (arg_is_const(op->args[2])) {
    951                tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
    952                mask = arg_info(op->args[1])->mask << tmp;
    953            }
    954            break;
    955
    956        CASE_OP_32_64(neg):
    957            /* Set to 1 all bits to the left of the rightmost.  */
    958            mask = -(arg_info(op->args[1])->mask
    959                     & -arg_info(op->args[1])->mask);
    960            break;
    961
    962        CASE_OP_32_64(deposit):
    963            mask = deposit64(arg_info(op->args[1])->mask,
    964                             op->args[3], op->args[4],
    965                             arg_info(op->args[2])->mask);
    966            break;
    967
    968        CASE_OP_32_64(extract):
    969            mask = extract64(arg_info(op->args[1])->mask,
    970                             op->args[2], op->args[3]);
    971            if (op->args[2] == 0) {
    972                affected = arg_info(op->args[1])->mask & ~mask;
    973            }
    974            break;
    975        CASE_OP_32_64(sextract):
    976            mask = sextract64(arg_info(op->args[1])->mask,
    977                              op->args[2], op->args[3]);
    978            if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
    979                affected = arg_info(op->args[1])->mask & ~mask;
    980            }
    981            break;
    982
    983        CASE_OP_32_64(or):
    984        CASE_OP_32_64(xor):
    985            mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
    986            break;
    987
    988        case INDEX_op_clz_i32:
    989        case INDEX_op_ctz_i32:
    990            mask = arg_info(op->args[2])->mask | 31;
    991            break;
    992
    993        case INDEX_op_clz_i64:
    994        case INDEX_op_ctz_i64:
    995            mask = arg_info(op->args[2])->mask | 63;
    996            break;
    997
    998        case INDEX_op_ctpop_i32:
    999            mask = 32 | 31;
   1000            break;
   1001        case INDEX_op_ctpop_i64:
   1002            mask = 64 | 63;
   1003            break;
   1004
   1005        CASE_OP_32_64(setcond):
   1006        case INDEX_op_setcond2_i32:
   1007            mask = 1;
   1008            break;
   1009
   1010        CASE_OP_32_64(movcond):
   1011            mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
   1012            break;
   1013
   1014        CASE_OP_32_64(ld8u):
   1015            mask = 0xff;
   1016            break;
   1017        CASE_OP_32_64(ld16u):
   1018            mask = 0xffff;
   1019            break;
   1020        case INDEX_op_ld32u_i64:
   1021            mask = 0xffffffffu;
   1022            break;
   1023
   1024        CASE_OP_32_64(qemu_ld):
   1025            {
   1026                MemOpIdx oi = op->args[nb_oargs + nb_iargs];
   1027                MemOp mop = get_memop(oi);
   1028                if (!(mop & MO_SIGN)) {
   1029                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
   1030                }
   1031            }
   1032            break;
   1033
   1034        CASE_OP_32_64(bswap16):
   1035            mask = arg_info(op->args[1])->mask;
   1036            if (mask <= 0xffff) {
   1037                op->args[2] |= TCG_BSWAP_IZ;
   1038            }
   1039            mask = bswap16(mask);
   1040            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
   1041            case TCG_BSWAP_OZ:
   1042                break;
   1043            case TCG_BSWAP_OS:
   1044                mask = (int16_t)mask;
   1045                break;
   1046            default: /* undefined high bits */
   1047                mask |= MAKE_64BIT_MASK(16, 48);
   1048                break;
   1049            }
   1050            break;
   1051
   1052        case INDEX_op_bswap32_i64:
   1053            mask = arg_info(op->args[1])->mask;
   1054            if (mask <= 0xffffffffu) {
   1055                op->args[2] |= TCG_BSWAP_IZ;
   1056            }
   1057            mask = bswap32(mask);
   1058            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
   1059            case TCG_BSWAP_OZ:
   1060                break;
   1061            case TCG_BSWAP_OS:
   1062                mask = (int32_t)mask;
   1063                break;
   1064            default: /* undefined high bits */
   1065                mask |= MAKE_64BIT_MASK(32, 32);
   1066                break;
   1067            }
   1068            break;
   1069
   1070        default:
   1071            break;
   1072        }
   1073
   1074        /* 32-bit ops generate 32-bit results.  For the result is zero test
   1075           below, we can ignore high bits, but for further optimizations we
   1076           need to record that the high bits contain garbage.  */
   1077        partmask = mask;
   1078        if (!(def->flags & TCG_OPF_64BIT)) {
   1079            mask |= ~(tcg_target_ulong)0xffffffffu;
   1080            partmask &= 0xffffffffu;
   1081            affected &= 0xffffffffu;
   1082        }
   1083
   1084        if (partmask == 0) {
   1085            tcg_debug_assert(nb_oargs == 1);
   1086            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
   1087            continue;
   1088        }
   1089        if (affected == 0) {
   1090            tcg_debug_assert(nb_oargs == 1);
   1091            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
   1092            continue;
   1093        }
   1094
   1095        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
   1096        switch (opc) {
   1097        CASE_OP_32_64_VEC(and):
   1098        CASE_OP_32_64_VEC(mul):
   1099        CASE_OP_32_64(muluh):
   1100        CASE_OP_32_64(mulsh):
   1101            if (arg_is_const(op->args[2])
   1102                && arg_info(op->args[2])->val == 0) {
   1103                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
   1104                continue;
   1105            }
   1106            break;
   1107        default:
   1108            break;
   1109        }
   1110
   1111        /* Simplify expression for "op r, a, a => mov r, a" cases */
   1112        switch (opc) {
   1113        CASE_OP_32_64_VEC(or):
   1114        CASE_OP_32_64_VEC(and):
   1115            if (args_are_copies(op->args[1], op->args[2])) {
   1116                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
   1117                continue;
   1118            }
   1119            break;
   1120        default:
   1121            break;
   1122        }
   1123
   1124        /* Simplify expression for "op r, a, a => movi r, 0" cases */
   1125        switch (opc) {
   1126        CASE_OP_32_64_VEC(andc):
   1127        CASE_OP_32_64_VEC(sub):
   1128        CASE_OP_32_64_VEC(xor):
   1129            if (args_are_copies(op->args[1], op->args[2])) {
   1130                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
   1131                continue;
   1132            }
   1133            break;
   1134        default:
   1135            break;
   1136        }
   1137
   1138        /* Propagate constants through copy operations and do constant
   1139           folding.  Constants will be substituted to arguments by register
   1140           allocator where needed and possible.  Also detect copies. */
   1141        switch (opc) {
   1142        CASE_OP_32_64_VEC(mov):
   1143            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
   1144            break;
   1145
   1146        case INDEX_op_dup_vec:
   1147            if (arg_is_const(op->args[1])) {
   1148                tmp = arg_info(op->args[1])->val;
   1149                tmp = dup_const(TCGOP_VECE(op), tmp);
   1150                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1151                break;
   1152            }
   1153            goto do_default;
   1154
   1155        case INDEX_op_dup2_vec:
   1156            assert(TCG_TARGET_REG_BITS == 32);
   1157            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
   1158                tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
   1159                                 deposit64(arg_info(op->args[1])->val, 32, 32,
   1160                                           arg_info(op->args[2])->val));
   1161                break;
   1162            } else if (args_are_copies(op->args[1], op->args[2])) {
   1163                op->opc = INDEX_op_dup_vec;
   1164                TCGOP_VECE(op) = MO_32;
   1165                nb_iargs = 1;
   1166            }
   1167            goto do_default;
   1168
   1169        CASE_OP_32_64(not):
   1170        CASE_OP_32_64(neg):
   1171        CASE_OP_32_64(ext8s):
   1172        CASE_OP_32_64(ext8u):
   1173        CASE_OP_32_64(ext16s):
   1174        CASE_OP_32_64(ext16u):
   1175        CASE_OP_32_64(ctpop):
   1176        case INDEX_op_ext32s_i64:
   1177        case INDEX_op_ext32u_i64:
   1178        case INDEX_op_ext_i32_i64:
   1179        case INDEX_op_extu_i32_i64:
   1180        case INDEX_op_extrl_i64_i32:
   1181        case INDEX_op_extrh_i64_i32:
   1182            if (arg_is_const(op->args[1])) {
   1183                tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
   1184                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1185                break;
   1186            }
   1187            goto do_default;
   1188
   1189        CASE_OP_32_64(bswap16):
   1190        CASE_OP_32_64(bswap32):
   1191        case INDEX_op_bswap64_i64:
   1192            if (arg_is_const(op->args[1])) {
   1193                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
   1194                                          op->args[2]);
   1195                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1196                break;
   1197            }
   1198            goto do_default;
   1199
   1200        CASE_OP_32_64(add):
   1201        CASE_OP_32_64(sub):
   1202        CASE_OP_32_64(mul):
   1203        CASE_OP_32_64(or):
   1204        CASE_OP_32_64(and):
   1205        CASE_OP_32_64(xor):
   1206        CASE_OP_32_64(shl):
   1207        CASE_OP_32_64(shr):
   1208        CASE_OP_32_64(sar):
   1209        CASE_OP_32_64(rotl):
   1210        CASE_OP_32_64(rotr):
   1211        CASE_OP_32_64(andc):
   1212        CASE_OP_32_64(orc):
   1213        CASE_OP_32_64(eqv):
   1214        CASE_OP_32_64(nand):
   1215        CASE_OP_32_64(nor):
   1216        CASE_OP_32_64(muluh):
   1217        CASE_OP_32_64(mulsh):
   1218        CASE_OP_32_64(div):
   1219        CASE_OP_32_64(divu):
   1220        CASE_OP_32_64(rem):
   1221        CASE_OP_32_64(remu):
   1222            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
   1223                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
   1224                                          arg_info(op->args[2])->val);
   1225                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1226                break;
   1227            }
   1228            goto do_default;
   1229
   1230        CASE_OP_32_64(clz):
   1231        CASE_OP_32_64(ctz):
   1232            if (arg_is_const(op->args[1])) {
   1233                TCGArg v = arg_info(op->args[1])->val;
   1234                if (v != 0) {
   1235                    tmp = do_constant_folding(opc, v, 0);
   1236                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1237                } else {
   1238                    tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
   1239                }
   1240                break;
   1241            }
   1242            goto do_default;
   1243
   1244        CASE_OP_32_64(deposit):
   1245            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
   1246                tmp = deposit64(arg_info(op->args[1])->val,
   1247                                op->args[3], op->args[4],
   1248                                arg_info(op->args[2])->val);
   1249                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1250                break;
   1251            }
   1252            goto do_default;
   1253
   1254        CASE_OP_32_64(extract):
   1255            if (arg_is_const(op->args[1])) {
   1256                tmp = extract64(arg_info(op->args[1])->val,
   1257                                op->args[2], op->args[3]);
   1258                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1259                break;
   1260            }
   1261            goto do_default;
   1262
   1263        CASE_OP_32_64(sextract):
   1264            if (arg_is_const(op->args[1])) {
   1265                tmp = sextract64(arg_info(op->args[1])->val,
   1266                                 op->args[2], op->args[3]);
   1267                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1268                break;
   1269            }
   1270            goto do_default;
   1271
   1272        CASE_OP_32_64(extract2):
   1273            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
   1274                uint64_t v1 = arg_info(op->args[1])->val;
   1275                uint64_t v2 = arg_info(op->args[2])->val;
   1276                int shr = op->args[3];
   1277
   1278                if (opc == INDEX_op_extract2_i64) {
   1279                    tmp = (v1 >> shr) | (v2 << (64 - shr));
   1280                } else {
   1281                    tmp = (int32_t)(((uint32_t)v1 >> shr) |
   1282                                    ((uint32_t)v2 << (32 - shr)));
   1283                }
   1284                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1285                break;
   1286            }
   1287            goto do_default;
   1288
   1289        CASE_OP_32_64(setcond):
   1290            tmp = do_constant_folding_cond(opc, op->args[1],
   1291                                           op->args[2], op->args[3]);
   1292            if (tmp != 2) {
   1293                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1294                break;
   1295            }
   1296            goto do_default;
   1297
   1298        CASE_OP_32_64(brcond):
   1299            tmp = do_constant_folding_cond(opc, op->args[0],
   1300                                           op->args[1], op->args[2]);
   1301            if (tmp != 2) {
   1302                if (tmp) {
   1303                    memset(&temps_used, 0, sizeof(temps_used));
   1304                    op->opc = INDEX_op_br;
   1305                    op->args[0] = op->args[3];
   1306                } else {
   1307                    tcg_op_remove(s, op);
   1308                }
   1309                break;
   1310            }
   1311            goto do_default;
   1312
   1313        CASE_OP_32_64(movcond):
   1314            tmp = do_constant_folding_cond(opc, op->args[1],
   1315                                           op->args[2], op->args[5]);
   1316            if (tmp != 2) {
   1317                tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
   1318                break;
   1319            }
   1320            if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
   1321                uint64_t tv = arg_info(op->args[3])->val;
   1322                uint64_t fv = arg_info(op->args[4])->val;
   1323                TCGCond cond = op->args[5];
   1324
   1325                if (fv == 1 && tv == 0) {
   1326                    cond = tcg_invert_cond(cond);
   1327                } else if (!(tv == 1 && fv == 0)) {
   1328                    goto do_default;
   1329                }
   1330                op->args[3] = cond;
   1331                op->opc = opc = (opc == INDEX_op_movcond_i32
   1332                                 ? INDEX_op_setcond_i32
   1333                                 : INDEX_op_setcond_i64);
   1334                nb_iargs = 2;
   1335            }
   1336            goto do_default;
   1337
   1338        case INDEX_op_add2_i32:
   1339        case INDEX_op_sub2_i32:
   1340            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
   1341                && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
   1342                uint32_t al = arg_info(op->args[2])->val;
   1343                uint32_t ah = arg_info(op->args[3])->val;
   1344                uint32_t bl = arg_info(op->args[4])->val;
   1345                uint32_t bh = arg_info(op->args[5])->val;
   1346                uint64_t a = ((uint64_t)ah << 32) | al;
   1347                uint64_t b = ((uint64_t)bh << 32) | bl;
   1348                TCGArg rl, rh;
   1349                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
   1350
   1351                if (opc == INDEX_op_add2_i32) {
   1352                    a += b;
   1353                } else {
   1354                    a -= b;
   1355                }
   1356
   1357                rl = op->args[0];
   1358                rh = op->args[1];
   1359                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
   1360                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
   1361                break;
   1362            }
   1363            goto do_default;
   1364
   1365        case INDEX_op_mulu2_i32:
   1366            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
   1367                uint32_t a = arg_info(op->args[2])->val;
   1368                uint32_t b = arg_info(op->args[3])->val;
   1369                uint64_t r = (uint64_t)a * b;
   1370                TCGArg rl, rh;
   1371                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
   1372
   1373                rl = op->args[0];
   1374                rh = op->args[1];
   1375                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
   1376                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
   1377                break;
   1378            }
   1379            goto do_default;
   1380
   1381        case INDEX_op_brcond2_i32:
   1382            tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
   1383                                            op->args[4]);
   1384            if (tmp != 2) {
   1385                if (tmp) {
   1386            do_brcond_true:
   1387                    memset(&temps_used, 0, sizeof(temps_used));
   1388                    op->opc = INDEX_op_br;
   1389                    op->args[0] = op->args[5];
   1390                } else {
   1391            do_brcond_false:
   1392                    tcg_op_remove(s, op);
   1393                }
   1394            } else if ((op->args[4] == TCG_COND_LT
   1395                        || op->args[4] == TCG_COND_GE)
   1396                       && arg_is_const(op->args[2])
   1397                       && arg_info(op->args[2])->val == 0
   1398                       && arg_is_const(op->args[3])
   1399                       && arg_info(op->args[3])->val == 0) {
   1400                /* Simplify LT/GE comparisons vs zero to a single compare
   1401                   vs the high word of the input.  */
   1402            do_brcond_high:
   1403                memset(&temps_used, 0, sizeof(temps_used));
   1404                op->opc = INDEX_op_brcond_i32;
   1405                op->args[0] = op->args[1];
   1406                op->args[1] = op->args[3];
   1407                op->args[2] = op->args[4];
   1408                op->args[3] = op->args[5];
   1409            } else if (op->args[4] == TCG_COND_EQ) {
   1410                /* Simplify EQ comparisons where one of the pairs
   1411                   can be simplified.  */
   1412                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
   1413                                               op->args[0], op->args[2],
   1414                                               TCG_COND_EQ);
   1415                if (tmp == 0) {
   1416                    goto do_brcond_false;
   1417                } else if (tmp == 1) {
   1418                    goto do_brcond_high;
   1419                }
   1420                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
   1421                                               op->args[1], op->args[3],
   1422                                               TCG_COND_EQ);
   1423                if (tmp == 0) {
   1424                    goto do_brcond_false;
   1425                } else if (tmp != 1) {
   1426                    goto do_default;
   1427                }
   1428            do_brcond_low:
   1429                memset(&temps_used, 0, sizeof(temps_used));
   1430                op->opc = INDEX_op_brcond_i32;
   1431                op->args[1] = op->args[2];
   1432                op->args[2] = op->args[4];
   1433                op->args[3] = op->args[5];
   1434            } else if (op->args[4] == TCG_COND_NE) {
   1435                /* Simplify NE comparisons where one of the pairs
   1436                   can be simplified.  */
   1437                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
   1438                                               op->args[0], op->args[2],
   1439                                               TCG_COND_NE);
   1440                if (tmp == 0) {
   1441                    goto do_brcond_high;
   1442                } else if (tmp == 1) {
   1443                    goto do_brcond_true;
   1444                }
   1445                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
   1446                                               op->args[1], op->args[3],
   1447                                               TCG_COND_NE);
   1448                if (tmp == 0) {
   1449                    goto do_brcond_low;
   1450                } else if (tmp == 1) {
   1451                    goto do_brcond_true;
   1452                }
   1453                goto do_default;
   1454            } else {
   1455                goto do_default;
   1456            }
   1457            break;
   1458
   1459        case INDEX_op_setcond2_i32:
   1460            tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
   1461                                            op->args[5]);
   1462            if (tmp != 2) {
   1463            do_setcond_const:
   1464                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
   1465            } else if ((op->args[5] == TCG_COND_LT
   1466                        || op->args[5] == TCG_COND_GE)
   1467                       && arg_is_const(op->args[3])
   1468                       && arg_info(op->args[3])->val == 0
   1469                       && arg_is_const(op->args[4])
   1470                       && arg_info(op->args[4])->val == 0) {
   1471                /* Simplify LT/GE comparisons vs zero to a single compare
   1472                   vs the high word of the input.  */
   1473            do_setcond_high:
   1474                reset_temp(op->args[0]);
   1475                arg_info(op->args[0])->mask = 1;
   1476                op->opc = INDEX_op_setcond_i32;
   1477                op->args[1] = op->args[2];
   1478                op->args[2] = op->args[4];
   1479                op->args[3] = op->args[5];
   1480            } else if (op->args[5] == TCG_COND_EQ) {
   1481                /* Simplify EQ comparisons where one of the pairs
   1482                   can be simplified.  */
   1483                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
   1484                                               op->args[1], op->args[3],
   1485                                               TCG_COND_EQ);
   1486                if (tmp == 0) {
   1487                    goto do_setcond_const;
   1488                } else if (tmp == 1) {
   1489                    goto do_setcond_high;
   1490                }
   1491                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
   1492                                               op->args[2], op->args[4],
   1493                                               TCG_COND_EQ);
   1494                if (tmp == 0) {
   1495                    goto do_setcond_high;
   1496                } else if (tmp != 1) {
   1497                    goto do_default;
   1498                }
   1499            do_setcond_low:
   1500                reset_temp(op->args[0]);
   1501                arg_info(op->args[0])->mask = 1;
   1502                op->opc = INDEX_op_setcond_i32;
   1503                op->args[2] = op->args[3];
   1504                op->args[3] = op->args[5];
   1505            } else if (op->args[5] == TCG_COND_NE) {
   1506                /* Simplify NE comparisons where one of the pairs
   1507                   can be simplified.  */
   1508                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
   1509                                               op->args[1], op->args[3],
   1510                                               TCG_COND_NE);
   1511                if (tmp == 0) {
   1512                    goto do_setcond_high;
   1513                } else if (tmp == 1) {
   1514                    goto do_setcond_const;
   1515                }
   1516                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
   1517                                               op->args[2], op->args[4],
   1518                                               TCG_COND_NE);
   1519                if (tmp == 0) {
   1520                    goto do_setcond_low;
   1521                } else if (tmp == 1) {
   1522                    goto do_setcond_const;
   1523                }
   1524                goto do_default;
   1525            } else {
   1526                goto do_default;
   1527            }
   1528            break;
   1529
   1530        case INDEX_op_call:
   1531            if (!(tcg_call_flags(op)
   1532                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
   1533                for (i = 0; i < nb_globals; i++) {
   1534                    if (test_bit(i, temps_used.l)) {
   1535                        reset_ts(&s->temps[i]);
   1536                    }
   1537                }
   1538            }
   1539            goto do_reset_output;
   1540
   1541        default:
   1542        do_default:
   1543            /* Default case: we know nothing about operation (or were unable
   1544               to compute the operation result) so no propagation is done.
   1545               We trash everything if the operation is the end of a basic
   1546               block, otherwise we only trash the output args.  "mask" is
   1547               the non-zero bits mask for the first output arg.  */
   1548            if (def->flags & TCG_OPF_BB_END) {
   1549                memset(&temps_used, 0, sizeof(temps_used));
   1550            } else {
   1551        do_reset_output:
   1552                for (i = 0; i < nb_oargs; i++) {
   1553                    reset_temp(op->args[i]);
   1554                    /* Save the corresponding known-zero bits mask for the
   1555                       first output argument (only one supported so far). */
   1556                    if (i == 0) {
   1557                        arg_info(op->args[i])->mask = mask;
   1558                    }
   1559                }
   1560            }
   1561            break;
   1562        }
   1563
   1564        /* Eliminate duplicate and redundant fence instructions.  */
   1565        if (prev_mb) {
   1566            switch (opc) {
   1567            case INDEX_op_mb:
   1568                /* Merge two barriers of the same type into one,
   1569                 * or a weaker barrier into a stronger one,
   1570                 * or two weaker barriers into a stronger one.
   1571                 *   mb X; mb Y => mb X|Y
   1572                 *   mb; strl => mb; st
   1573                 *   ldaq; mb => ld; mb
   1574                 *   ldaq; strl => ld; mb; st
   1575                 * Other combinations are also merged into a strong
   1576                 * barrier.  This is stricter than specified but for
   1577                 * the purposes of TCG is better than not optimizing.
   1578                 */
   1579                prev_mb->args[0] |= op->args[0];
   1580                tcg_op_remove(s, op);
   1581                break;
   1582
   1583            default:
   1584                /* Opcodes that end the block stop the optimization.  */
   1585                if ((def->flags & TCG_OPF_BB_END) == 0) {
   1586                    break;
   1587                }
   1588                /* fallthru */
   1589            case INDEX_op_qemu_ld_i32:
   1590            case INDEX_op_qemu_ld_i64:
   1591            case INDEX_op_qemu_st_i32:
   1592            case INDEX_op_qemu_st8_i32:
   1593            case INDEX_op_qemu_st_i64:
   1594            case INDEX_op_call:
   1595                /* Opcodes that touch guest memory stop the optimization.  */
   1596                prev_mb = NULL;
   1597                break;
   1598            }
   1599        } else if (opc == INDEX_op_mb) {
   1600            prev_mb = op;
   1601        }
   1602    }
   1603}