cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

tcg-op-vec.c (24244B)


      1/*
      2 * Tiny Code Generator for QEMU
      3 *
      4 * Copyright (c) 2018 Linaro, Inc.
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "tcg/tcg.h"
     22#include "tcg/tcg-op.h"
     23#include "tcg/tcg-mo.h"
     24
     25/* Reduce the number of ifdefs below.  This assumes that all uses of
     26   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
     27   the compiler can eliminate.  */
     28#if TCG_TARGET_REG_BITS == 64
     29extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
     30extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
     31#define TCGV_LOW  TCGV_LOW_link_error
     32#define TCGV_HIGH TCGV_HIGH_link_error
     33#endif
     34
     35/*
     36 * Vector optional opcode tracking.
     37 * Except for the basic logical operations (and, or, xor), and
     38 * data movement (mov, ld, st, dupi), many vector opcodes are
     39 * optional and may not be supported on the host.  Thank Intel
     40 * for the irregularity in their instruction set.
     41 *
     42 * The gvec expanders allow custom vector operations to be composed,
     43 * generally via the .fniv callback in the GVecGen* structures.  At
     44 * the same time, in deciding whether to use this hook we need to
     45 * know if the host supports the required operations.  This is
     46 * presented as an array of opcodes, terminated by 0.  Each opcode
     47 * is assumed to be expanded with the given VECE.
     48 *
     49 * For debugging, we want to validate this array.  Therefore, when
     50 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
     51 * will validate that their opcode is present in the list.
     52 */
     53#ifdef CONFIG_DEBUG_TCG
     54void tcg_assert_listed_vecop(TCGOpcode op)
     55{
     56    const TCGOpcode *p = tcg_ctx->vecop_list;
     57    if (p) {
     58        for (; *p; ++p) {
     59            if (*p == op) {
     60                return;
     61            }
     62        }
     63        g_assert_not_reached();
     64    }
     65}
     66#endif
     67
     68bool tcg_can_emit_vecop_list(const TCGOpcode *list,
     69                             TCGType type, unsigned vece)
     70{
     71    if (list == NULL) {
     72        return true;
     73    }
     74
     75    for (; *list; ++list) {
     76        TCGOpcode opc = *list;
     77
     78#ifdef CONFIG_DEBUG_TCG
     79        switch (opc) {
     80        case INDEX_op_and_vec:
     81        case INDEX_op_or_vec:
     82        case INDEX_op_xor_vec:
     83        case INDEX_op_mov_vec:
     84        case INDEX_op_dup_vec:
     85        case INDEX_op_dup2_vec:
     86        case INDEX_op_ld_vec:
     87        case INDEX_op_st_vec:
     88        case INDEX_op_bitsel_vec:
     89            /* These opcodes are mandatory and should not be listed.  */
     90            g_assert_not_reached();
     91        case INDEX_op_not_vec:
     92            /* These opcodes have generic expansions using the above.  */
     93            g_assert_not_reached();
     94        default:
     95            break;
     96        }
     97#endif
     98
     99        if (tcg_can_emit_vec_op(opc, type, vece)) {
    100            continue;
    101        }
    102
    103        /*
    104         * The opcode list is created by front ends based on what they
    105         * actually invoke.  We must mirror the logic in the routines
    106         * below for generic expansions using other opcodes.
    107         */
    108        switch (opc) {
    109        case INDEX_op_neg_vec:
    110            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
    111                continue;
    112            }
    113            break;
    114        case INDEX_op_abs_vec:
    115            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
    116                && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
    117                    || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
    118                    || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
    119                continue;
    120            }
    121            break;
    122        case INDEX_op_usadd_vec:
    123            if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
    124                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
    125                continue;
    126            }
    127            break;
    128        case INDEX_op_ussub_vec:
    129            if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
    130                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
    131                continue;
    132            }
    133            break;
    134        case INDEX_op_cmpsel_vec:
    135        case INDEX_op_smin_vec:
    136        case INDEX_op_smax_vec:
    137        case INDEX_op_umin_vec:
    138        case INDEX_op_umax_vec:
    139            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
    140                continue;
    141            }
    142            break;
    143        default:
    144            break;
    145        }
    146        return false;
    147    }
    148    return true;
    149}
    150
    151void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
    152{
    153    TCGOp *op = tcg_emit_op(opc);
    154    TCGOP_VECL(op) = type - TCG_TYPE_V64;
    155    TCGOP_VECE(op) = vece;
    156    op->args[0] = r;
    157    op->args[1] = a;
    158}
    159
    160void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
    161               TCGArg r, TCGArg a, TCGArg b)
    162{
    163    TCGOp *op = tcg_emit_op(opc);
    164    TCGOP_VECL(op) = type - TCG_TYPE_V64;
    165    TCGOP_VECE(op) = vece;
    166    op->args[0] = r;
    167    op->args[1] = a;
    168    op->args[2] = b;
    169}
    170
    171void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
    172               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
    173{
    174    TCGOp *op = tcg_emit_op(opc);
    175    TCGOP_VECL(op) = type - TCG_TYPE_V64;
    176    TCGOP_VECE(op) = vece;
    177    op->args[0] = r;
    178    op->args[1] = a;
    179    op->args[2] = b;
    180    op->args[3] = c;
    181}
    182
    183static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
    184                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
    185{
    186    TCGOp *op = tcg_emit_op(opc);
    187    TCGOP_VECL(op) = type - TCG_TYPE_V64;
    188    TCGOP_VECE(op) = vece;
    189    op->args[0] = r;
    190    op->args[1] = a;
    191    op->args[2] = b;
    192    op->args[3] = c;
    193    op->args[4] = d;
    194    op->args[5] = e;
    195}
    196
    197static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
    198{
    199    TCGTemp *rt = tcgv_vec_temp(r);
    200    TCGTemp *at = tcgv_vec_temp(a);
    201    TCGType type = rt->base_type;
    202
    203    /* Must enough inputs for the output.  */
    204    tcg_debug_assert(at->base_type >= type);
    205    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
    206}
    207
    208static void vec_gen_op3(TCGOpcode opc, unsigned vece,
    209                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
    210{
    211    TCGTemp *rt = tcgv_vec_temp(r);
    212    TCGTemp *at = tcgv_vec_temp(a);
    213    TCGTemp *bt = tcgv_vec_temp(b);
    214    TCGType type = rt->base_type;
    215
    216    /* Must enough inputs for the output.  */
    217    tcg_debug_assert(at->base_type >= type);
    218    tcg_debug_assert(bt->base_type >= type);
    219    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
    220}
    221
    222void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
    223{
    224    if (r != a) {
    225        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
    226    }
    227}
    228
    229TCGv_vec tcg_const_zeros_vec(TCGType type)
    230{
    231    TCGv_vec ret = tcg_temp_new_vec(type);
    232    tcg_gen_dupi_vec(MO_64, ret, 0);
    233    return ret;
    234}
    235
    236TCGv_vec tcg_const_ones_vec(TCGType type)
    237{
    238    TCGv_vec ret = tcg_temp_new_vec(type);
    239    tcg_gen_dupi_vec(MO_64, ret, -1);
    240    return ret;
    241}
    242
    243TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
    244{
    245    TCGTemp *t = tcgv_vec_temp(m);
    246    return tcg_const_zeros_vec(t->base_type);
    247}
    248
    249TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
    250{
    251    TCGTemp *t = tcgv_vec_temp(m);
    252    return tcg_const_ones_vec(t->base_type);
    253}
    254
    255void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
    256{
    257    TCGTemp *rt = tcgv_vec_temp(r);
    258    tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
    259}
    260
    261void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
    262{
    263    TCGArg ri = tcgv_vec_arg(r);
    264    TCGTemp *rt = arg_temp(ri);
    265    TCGType type = rt->base_type;
    266
    267    if (TCG_TARGET_REG_BITS == 64) {
    268        TCGArg ai = tcgv_i64_arg(a);
    269        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
    270    } else if (vece == MO_64) {
    271        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
    272        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
    273        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
    274    } else {
    275        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
    276        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
    277    }
    278}
    279
    280void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
    281{
    282    TCGArg ri = tcgv_vec_arg(r);
    283    TCGArg ai = tcgv_i32_arg(a);
    284    TCGTemp *rt = arg_temp(ri);
    285    TCGType type = rt->base_type;
    286
    287    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
    288}
    289
    290void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
    291                         tcg_target_long ofs)
    292{
    293    TCGArg ri = tcgv_vec_arg(r);
    294    TCGArg bi = tcgv_ptr_arg(b);
    295    TCGTemp *rt = arg_temp(ri);
    296    TCGType type = rt->base_type;
    297
    298    vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
    299}
    300
    301static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
    302{
    303    TCGArg ri = tcgv_vec_arg(r);
    304    TCGArg bi = tcgv_ptr_arg(b);
    305    TCGTemp *rt = arg_temp(ri);
    306    TCGType type = rt->base_type;
    307
    308    vec_gen_3(opc, type, 0, ri, bi, o);
    309}
    310
    311void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
    312{
    313    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
    314}
    315
    316void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
    317{
    318    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
    319}
    320
    321void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
    322{
    323    TCGArg ri = tcgv_vec_arg(r);
    324    TCGArg bi = tcgv_ptr_arg(b);
    325    TCGTemp *rt = arg_temp(ri);
    326    TCGType type = rt->base_type;
    327
    328    tcg_debug_assert(low_type >= TCG_TYPE_V64);
    329    tcg_debug_assert(low_type <= type);
    330    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
    331}
    332
    333void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    334{
    335    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
    336}
    337
    338void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    339{
    340    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
    341}
    342
    343void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    344{
    345    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
    346}
    347
    348void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    349{
    350    if (TCG_TARGET_HAS_andc_vec) {
    351        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
    352    } else {
    353        TCGv_vec t = tcg_temp_new_vec_matching(r);
    354        tcg_gen_not_vec(0, t, b);
    355        tcg_gen_and_vec(0, r, a, t);
    356        tcg_temp_free_vec(t);
    357    }
    358}
    359
    360void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    361{
    362    if (TCG_TARGET_HAS_orc_vec) {
    363        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
    364    } else {
    365        TCGv_vec t = tcg_temp_new_vec_matching(r);
    366        tcg_gen_not_vec(0, t, b);
    367        tcg_gen_or_vec(0, r, a, t);
    368        tcg_temp_free_vec(t);
    369    }
    370}
    371
    372void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    373{
    374    /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
    375    tcg_gen_and_vec(0, r, a, b);
    376    tcg_gen_not_vec(0, r, r);
    377}
    378
    379void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    380{
    381    /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
    382    tcg_gen_or_vec(0, r, a, b);
    383    tcg_gen_not_vec(0, r, r);
    384}
    385
    386void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    387{
    388    /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
    389    tcg_gen_xor_vec(0, r, a, b);
    390    tcg_gen_not_vec(0, r, r);
    391}
    392
    393static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
    394{
    395    TCGTemp *rt = tcgv_vec_temp(r);
    396    TCGTemp *at = tcgv_vec_temp(a);
    397    TCGArg ri = temp_arg(rt);
    398    TCGArg ai = temp_arg(at);
    399    TCGType type = rt->base_type;
    400    int can;
    401
    402    tcg_debug_assert(at->base_type >= type);
    403    tcg_assert_listed_vecop(opc);
    404    can = tcg_can_emit_vec_op(opc, type, vece);
    405    if (can > 0) {
    406        vec_gen_2(opc, type, vece, ri, ai);
    407    } else if (can < 0) {
    408        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    409        tcg_expand_vec_op(opc, type, vece, ri, ai);
    410        tcg_swap_vecop_list(hold_list);
    411    } else {
    412        return false;
    413    }
    414    return true;
    415}
    416
    417void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
    418{
    419    const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    420
    421    if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
    422        TCGv_vec t = tcg_const_ones_vec_matching(r);
    423        tcg_gen_xor_vec(0, r, a, t);
    424        tcg_temp_free_vec(t);
    425    }
    426    tcg_swap_vecop_list(hold_list);
    427}
    428
    429void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
    430{
    431    const TCGOpcode *hold_list;
    432
    433    tcg_assert_listed_vecop(INDEX_op_neg_vec);
    434    hold_list = tcg_swap_vecop_list(NULL);
    435
    436    if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
    437        TCGv_vec t = tcg_const_zeros_vec_matching(r);
    438        tcg_gen_sub_vec(vece, r, t, a);
    439        tcg_temp_free_vec(t);
    440    }
    441    tcg_swap_vecop_list(hold_list);
    442}
    443
    444void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
    445{
    446    const TCGOpcode *hold_list;
    447
    448    tcg_assert_listed_vecop(INDEX_op_abs_vec);
    449    hold_list = tcg_swap_vecop_list(NULL);
    450
    451    if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
    452        TCGType type = tcgv_vec_temp(r)->base_type;
    453        TCGv_vec t = tcg_temp_new_vec(type);
    454
    455        tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
    456        if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
    457            tcg_gen_neg_vec(vece, t, a);
    458            tcg_gen_smax_vec(vece, r, a, t);
    459        } else {
    460            if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
    461                tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
    462            } else {
    463                tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
    464                                tcg_constant_vec(type, vece, 0));
    465            }
    466            tcg_gen_xor_vec(vece, r, a, t);
    467            tcg_gen_sub_vec(vece, r, r, t);
    468        }
    469
    470        tcg_temp_free_vec(t);
    471    }
    472    tcg_swap_vecop_list(hold_list);
    473}
    474
    475static void do_shifti(TCGOpcode opc, unsigned vece,
    476                      TCGv_vec r, TCGv_vec a, int64_t i)
    477{
    478    TCGTemp *rt = tcgv_vec_temp(r);
    479    TCGTemp *at = tcgv_vec_temp(a);
    480    TCGArg ri = temp_arg(rt);
    481    TCGArg ai = temp_arg(at);
    482    TCGType type = rt->base_type;
    483    int can;
    484
    485    tcg_debug_assert(at->base_type == type);
    486    tcg_debug_assert(i >= 0 && i < (8 << vece));
    487    tcg_assert_listed_vecop(opc);
    488
    489    if (i == 0) {
    490        tcg_gen_mov_vec(r, a);
    491        return;
    492    }
    493
    494    can = tcg_can_emit_vec_op(opc, type, vece);
    495    if (can > 0) {
    496        vec_gen_3(opc, type, vece, ri, ai, i);
    497    } else {
    498        /* We leave the choice of expansion via scalar or vector shift
    499           to the target.  Often, but not always, dupi can feed a vector
    500           shift easier than a scalar.  */
    501        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    502        tcg_debug_assert(can < 0);
    503        tcg_expand_vec_op(opc, type, vece, ri, ai, i);
    504        tcg_swap_vecop_list(hold_list);
    505    }
    506}
    507
    508void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    509{
    510    do_shifti(INDEX_op_shli_vec, vece, r, a, i);
    511}
    512
    513void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    514{
    515    do_shifti(INDEX_op_shri_vec, vece, r, a, i);
    516}
    517
    518void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    519{
    520    do_shifti(INDEX_op_sari_vec, vece, r, a, i);
    521}
    522
    523void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    524{
    525    do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
    526}
    527
    528void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    529{
    530    int bits = 8 << vece;
    531    tcg_debug_assert(i >= 0 && i < bits);
    532    do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
    533}
    534
    535void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
    536                     TCGv_vec r, TCGv_vec a, TCGv_vec b)
    537{
    538    TCGTemp *rt = tcgv_vec_temp(r);
    539    TCGTemp *at = tcgv_vec_temp(a);
    540    TCGTemp *bt = tcgv_vec_temp(b);
    541    TCGArg ri = temp_arg(rt);
    542    TCGArg ai = temp_arg(at);
    543    TCGArg bi = temp_arg(bt);
    544    TCGType type = rt->base_type;
    545    int can;
    546
    547    tcg_debug_assert(at->base_type >= type);
    548    tcg_debug_assert(bt->base_type >= type);
    549    tcg_assert_listed_vecop(INDEX_op_cmp_vec);
    550    can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
    551    if (can > 0) {
    552        vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
    553    } else {
    554        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    555        tcg_debug_assert(can < 0);
    556        tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
    557        tcg_swap_vecop_list(hold_list);
    558    }
    559}
    560
    561static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
    562                   TCGv_vec b, TCGOpcode opc)
    563{
    564    TCGTemp *rt = tcgv_vec_temp(r);
    565    TCGTemp *at = tcgv_vec_temp(a);
    566    TCGTemp *bt = tcgv_vec_temp(b);
    567    TCGArg ri = temp_arg(rt);
    568    TCGArg ai = temp_arg(at);
    569    TCGArg bi = temp_arg(bt);
    570    TCGType type = rt->base_type;
    571    int can;
    572
    573    tcg_debug_assert(at->base_type >= type);
    574    tcg_debug_assert(bt->base_type >= type);
    575    tcg_assert_listed_vecop(opc);
    576    can = tcg_can_emit_vec_op(opc, type, vece);
    577    if (can > 0) {
    578        vec_gen_3(opc, type, vece, ri, ai, bi);
    579    } else if (can < 0) {
    580        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    581        tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
    582        tcg_swap_vecop_list(hold_list);
    583    } else {
    584        return false;
    585    }
    586    return true;
    587}
    588
    589static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
    590                          TCGv_vec b, TCGOpcode opc)
    591{
    592    bool ok = do_op3(vece, r, a, b, opc);
    593    tcg_debug_assert(ok);
    594}
    595
    596void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    597{
    598    do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
    599}
    600
    601void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    602{
    603    do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
    604}
    605
    606void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    607{
    608    do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
    609}
    610
    611void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    612{
    613    do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
    614}
    615
    616void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    617{
    618    if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
    619        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    620        TCGv_vec t = tcg_temp_new_vec_matching(r);
    621
    622        /* usadd(a, b) = min(a, ~b) + b */
    623        tcg_gen_not_vec(vece, t, b);
    624        tcg_gen_umin_vec(vece, t, t, a);
    625        tcg_gen_add_vec(vece, r, t, b);
    626
    627        tcg_temp_free_vec(t);
    628        tcg_swap_vecop_list(hold_list);
    629    }
    630}
    631
    632void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    633{
    634    do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
    635}
    636
    637void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    638{
    639    if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
    640        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    641        TCGv_vec t = tcg_temp_new_vec_matching(r);
    642
    643        /* ussub(a, b) = max(a, b) - b */
    644        tcg_gen_umax_vec(vece, t, a, b);
    645        tcg_gen_sub_vec(vece, r, t, b);
    646
    647        tcg_temp_free_vec(t);
    648        tcg_swap_vecop_list(hold_list);
    649    }
    650}
    651
    652static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
    653                      TCGv_vec b, TCGOpcode opc, TCGCond cond)
    654{
    655    if (!do_op3(vece, r, a, b, opc)) {
    656        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    657        tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
    658        tcg_swap_vecop_list(hold_list);
    659    }
    660}
    661
    662void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    663{
    664    do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
    665}
    666
    667void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    668{
    669    do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
    670}
    671
    672void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    673{
    674    do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
    675}
    676
    677void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    678{
    679    do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
    680}
    681
    682void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    683{
    684    do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
    685}
    686
    687void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    688{
    689    do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
    690}
    691
    692void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    693{
    694    do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
    695}
    696
    697void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    698{
    699    do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
    700}
    701
    702void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    703{
    704    do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
    705}
    706
    707static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
    708                      TCGv_i32 s, TCGOpcode opc)
    709{
    710    TCGTemp *rt = tcgv_vec_temp(r);
    711    TCGTemp *at = tcgv_vec_temp(a);
    712    TCGTemp *st = tcgv_i32_temp(s);
    713    TCGArg ri = temp_arg(rt);
    714    TCGArg ai = temp_arg(at);
    715    TCGArg si = temp_arg(st);
    716    TCGType type = rt->base_type;
    717    int can;
    718
    719    tcg_debug_assert(at->base_type >= type);
    720    tcg_assert_listed_vecop(opc);
    721    can = tcg_can_emit_vec_op(opc, type, vece);
    722    if (can > 0) {
    723        vec_gen_3(opc, type, vece, ri, ai, si);
    724    } else if (can < 0) {
    725        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    726        tcg_expand_vec_op(opc, type, vece, ri, ai, si);
    727        tcg_swap_vecop_list(hold_list);
    728    } else {
    729        g_assert_not_reached();
    730    }
    731}
    732
    733void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
    734{
    735    do_shifts(vece, r, a, b, INDEX_op_shls_vec);
    736}
    737
    738void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
    739{
    740    do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
    741}
    742
    743void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
    744{
    745    do_shifts(vece, r, a, b, INDEX_op_sars_vec);
    746}
    747
    748void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
    749{
    750    do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
    751}
    752
    753void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
    754                        TCGv_vec b, TCGv_vec c)
    755{
    756    TCGTemp *rt = tcgv_vec_temp(r);
    757    TCGTemp *at = tcgv_vec_temp(a);
    758    TCGTemp *bt = tcgv_vec_temp(b);
    759    TCGTemp *ct = tcgv_vec_temp(c);
    760    TCGType type = rt->base_type;
    761
    762    tcg_debug_assert(at->base_type >= type);
    763    tcg_debug_assert(bt->base_type >= type);
    764    tcg_debug_assert(ct->base_type >= type);
    765
    766    if (TCG_TARGET_HAS_bitsel_vec) {
    767        vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
    768                  temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
    769    } else {
    770        TCGv_vec t = tcg_temp_new_vec(type);
    771        tcg_gen_and_vec(MO_8, t, a, b);
    772        tcg_gen_andc_vec(MO_8, r, c, a);
    773        tcg_gen_or_vec(MO_8, r, r, t);
    774        tcg_temp_free_vec(t);
    775    }
    776}
    777
    778void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
    779                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
    780{
    781    TCGTemp *rt = tcgv_vec_temp(r);
    782    TCGTemp *at = tcgv_vec_temp(a);
    783    TCGTemp *bt = tcgv_vec_temp(b);
    784    TCGTemp *ct = tcgv_vec_temp(c);
    785    TCGTemp *dt = tcgv_vec_temp(d);
    786    TCGArg ri = temp_arg(rt);
    787    TCGArg ai = temp_arg(at);
    788    TCGArg bi = temp_arg(bt);
    789    TCGArg ci = temp_arg(ct);
    790    TCGArg di = temp_arg(dt);
    791    TCGType type = rt->base_type;
    792    const TCGOpcode *hold_list;
    793    int can;
    794
    795    tcg_debug_assert(at->base_type >= type);
    796    tcg_debug_assert(bt->base_type >= type);
    797    tcg_debug_assert(ct->base_type >= type);
    798    tcg_debug_assert(dt->base_type >= type);
    799
    800    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
    801    hold_list = tcg_swap_vecop_list(NULL);
    802    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
    803
    804    if (can > 0) {
    805        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
    806    } else if (can < 0) {
    807        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
    808                          ri, ai, bi, ci, di, cond);
    809    } else {
    810        TCGv_vec t = tcg_temp_new_vec(type);
    811        tcg_gen_cmp_vec(cond, vece, t, a, b);
    812        tcg_gen_bitsel_vec(vece, r, t, c, d);
    813        tcg_temp_free_vec(t);
    814    }
    815    tcg_swap_vecop_list(hold_list);
    816}