translate-mve.c - cachepc-qemu - Fork of AMDESE/qemu with changes for cachepc side-channel attack

	cachepc-qemu Fork of AMDESE/qemu with changes for cachepc side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-qemu
	Log \| Files \| Refs \| Submodules \| LICENSE \| sfeed.txt
translate-mve.c (76335B)
      1/*
      2 *  ARM translation: M-profile MVE instructions
      3 *
      4 *  Copyright (c) 2021 Linaro, Ltd.
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "tcg/tcg-op.h"
     22#include "tcg/tcg-op-gvec.h"
     23#include "exec/exec-all.h"
     24#include "exec/gen-icount.h"
     25#include "translate.h"
     26#include "translate-a32.h"
     27
     28static inline int vidup_imm(DisasContext *s, int x)
     29{
     30    return 1 << x;
     31}
     32
     33/* Include the generated decoder */
     34#include "decode-mve.c.inc"
     35
     36typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
     37typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     38typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
     39typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
     40typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
     41typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     42typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     43typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
     44typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
     45typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
     46typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
     47typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
     48typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
     49typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
     50typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     51typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     52typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     53
     54/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
     55static inline long mve_qreg_offset(unsigned reg)
     56{
     57    return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
     58}
     59
     60static TCGv_ptr mve_qreg_ptr(unsigned reg)
     61{
     62    TCGv_ptr ret = tcg_temp_new_ptr();
     63    tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
     64    return ret;
     65}
     66
     67static bool mve_no_predication(DisasContext *s)
     68{
     69    /*
     70     * Return true if we are executing the entire MVE instruction
     71     * with no predication or partial-execution, and so we can safely
     72     * use an inline TCG vector implementation.
     73     */
     74    return s->eci == 0 && s->mve_no_pred;
     75}
     76
     77static bool mve_check_qreg_bank(DisasContext *s, int qmask)
     78{
     79    /*
     80     * Check whether Qregs are in range. For v8.1M only Q0..Q7
     81     * are supported, see VFPSmallRegisterBank().
     82     */
     83    return qmask < 8;
     84}
     85
     86bool mve_eci_check(DisasContext *s)
     87{
     88    /*
     89     * This is a beatwise insn: check that ECI is valid (not a
     90     * reserved value) and note that we are handling it.
     91     * Return true if OK, false if we generated an exception.
     92     */
     93    s->eci_handled = true;
     94    switch (s->eci) {
     95    case ECI_NONE:
     96    case ECI_A0:
     97    case ECI_A0A1:
     98    case ECI_A0A1A2:
     99    case ECI_A0A1A2B0:
    100        return true;
    101    default:
    102        /* Reserved value: INVSTATE UsageFault */
    103        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
    104                           default_exception_el(s));
    105        return false;
    106    }
    107}
    108
    109void mve_update_eci(DisasContext *s)
    110{
    111    /*
    112     * The helper function will always update the CPUState field,
    113     * so we only need to update the DisasContext field.
    114     */
    115    if (s->eci) {
    116        s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
    117    }
    118}
    119
    120void mve_update_and_store_eci(DisasContext *s)
    121{
    122    /*
    123     * For insns which don't call a helper function that will call
    124     * mve_advance_vpt(), this version updates s->eci and also stores
    125     * it out to the CPUState field.
    126     */
    127    if (s->eci) {
    128        mve_update_eci(s);
    129        store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
    130    }
    131}
    132
    133static bool mve_skip_first_beat(DisasContext *s)
    134{
    135    /* Return true if PSR.ECI says we must skip the first beat of this insn */
    136    switch (s->eci) {
    137    case ECI_NONE:
    138        return false;
    139    case ECI_A0:
    140    case ECI_A0A1:
    141    case ECI_A0A1A2:
    142    case ECI_A0A1A2B0:
    143        return true;
    144    default:
    145        g_assert_not_reached();
    146    }
    147}
    148
    149static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
    150                    unsigned msize)
    151{
    152    TCGv_i32 addr;
    153    uint32_t offset;
    154    TCGv_ptr qreg;
    155
    156    if (!dc_isar_feature(aa32_mve, s) ||
    157        !mve_check_qreg_bank(s, a->qd) ||
    158        !fn) {
    159        return false;
    160    }
    161
    162    /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
    163    if (a->rn == 15 || (a->rn == 13 && a->w)) {
    164        return false;
    165    }
    166
    167    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    168        return true;
    169    }
    170
    171    offset = a->imm << msize;
    172    if (!a->a) {
    173        offset = -offset;
    174    }
    175    addr = load_reg(s, a->rn);
    176    if (a->p) {
    177        tcg_gen_addi_i32(addr, addr, offset);
    178    }
    179
    180    qreg = mve_qreg_ptr(a->qd);
    181    fn(cpu_env, qreg, addr);
    182    tcg_temp_free_ptr(qreg);
    183
    184    /*
    185     * Writeback always happens after the last beat of the insn,
    186     * regardless of predication
    187     */
    188    if (a->w) {
    189        if (!a->p) {
    190            tcg_gen_addi_i32(addr, addr, offset);
    191        }
    192        store_reg(s, a->rn, addr);
    193    } else {
    194        tcg_temp_free_i32(addr);
    195    }
    196    mve_update_eci(s);
    197    return true;
    198}
    199
    200static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
    201{
    202    static MVEGenLdStFn * const ldstfns[4][2] = {
    203        { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
    204        { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
    205        { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
    206        { NULL, NULL }
    207    };
    208    return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
    209}
    210
    211#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
    212    static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
    213    {                                                           \
    214        static MVEGenLdStFn * const ldstfns[2][2] = {           \
    215            { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
    216            { NULL, gen_helper_mve_##ULD },                     \
    217        };                                                      \
    218        return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
    219    }
    220
    221DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
    222DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
    223DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
    224
    225static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
    226{
    227    TCGv_i32 addr;
    228    TCGv_ptr qd, qm;
    229
    230    if (!dc_isar_feature(aa32_mve, s) ||
    231        !mve_check_qreg_bank(s, a->qd | a->qm) ||
    232        !fn || a->rn == 15) {
    233        /* Rn case is UNPREDICTABLE */
    234        return false;
    235    }
    236
    237    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    238        return true;
    239    }
    240
    241    addr = load_reg(s, a->rn);
    242
    243    qd = mve_qreg_ptr(a->qd);
    244    qm = mve_qreg_ptr(a->qm);
    245    fn(cpu_env, qd, qm, addr);
    246    tcg_temp_free_ptr(qd);
    247    tcg_temp_free_ptr(qm);
    248    tcg_temp_free_i32(addr);
    249    mve_update_eci(s);
    250    return true;
    251}
    252
    253/*
    254 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
    255 * signextended to halfword elements in register". _os_ indicates that
    256 * the offsets in Qm should be scaled by the element size.
    257 */
    258/* This macro is just to make the arrays more compact in these functions */
    259#define F(N) gen_helper_mve_##N
    260
    261/* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
    262static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
    263{
    264    static MVEGenLdStSGFn * const fns[2][4][4] = { {
    265            { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
    266            { NULL, NULL,           F(vldrh_sg_sw), NULL },
    267            { NULL, NULL,           NULL,           NULL },
    268            { NULL, NULL,           NULL,           NULL }
    269        }, {
    270            { NULL, NULL,              NULL,              NULL },
    271            { NULL, NULL,              F(vldrh_sg_os_sw), NULL },
    272            { NULL, NULL,              NULL,              NULL },
    273            { NULL, NULL,              NULL,              NULL }
    274        }
    275    };
    276    if (a->qd == a->qm) {
    277        return false; /* UNPREDICTABLE */
    278    }
    279    return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
    280}
    281
    282static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
    283{
    284    static MVEGenLdStSGFn * const fns[2][4][4] = { {
    285            { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
    286            { NULL,           F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
    287            { NULL,           NULL,           F(vldrw_sg_uw), NULL },
    288            { NULL,           NULL,           NULL,           F(vldrd_sg_ud) }
    289        }, {
    290            { NULL, NULL,              NULL,              NULL },
    291            { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
    292            { NULL, NULL,              F(vldrw_sg_os_uw), NULL },
    293            { NULL, NULL,              NULL,              F(vldrd_sg_os_ud) }
    294        }
    295    };
    296    if (a->qd == a->qm) {
    297        return false; /* UNPREDICTABLE */
    298    }
    299    return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
    300}
    301
    302static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
    303{
    304    static MVEGenLdStSGFn * const fns[2][4][4] = { {
    305            { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
    306            { NULL,           F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
    307            { NULL,           NULL,           F(vstrw_sg_uw), NULL },
    308            { NULL,           NULL,           NULL,           F(vstrd_sg_ud) }
    309        }, {
    310            { NULL, NULL,              NULL,              NULL },
    311            { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
    312            { NULL, NULL,              F(vstrw_sg_os_uw), NULL },
    313            { NULL, NULL,              NULL,              F(vstrd_sg_os_ud) }
    314        }
    315    };
    316    return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
    317}
    318
    319#undef F
    320
    321static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
    322                           MVEGenLdStSGFn *fn, unsigned msize)
    323{
    324    uint32_t offset;
    325    TCGv_ptr qd, qm;
    326
    327    if (!dc_isar_feature(aa32_mve, s) ||
    328        !mve_check_qreg_bank(s, a->qd | a->qm) ||
    329        !fn) {
    330        return false;
    331    }
    332
    333    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    334        return true;
    335    }
    336
    337    offset = a->imm << msize;
    338    if (!a->a) {
    339        offset = -offset;
    340    }
    341
    342    qd = mve_qreg_ptr(a->qd);
    343    qm = mve_qreg_ptr(a->qm);
    344    fn(cpu_env, qd, qm, tcg_constant_i32(offset));
    345    tcg_temp_free_ptr(qd);
    346    tcg_temp_free_ptr(qm);
    347    mve_update_eci(s);
    348    return true;
    349}
    350
    351static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    352{
    353    static MVEGenLdStSGFn * const fns[] = {
    354        gen_helper_mve_vldrw_sg_uw,
    355        gen_helper_mve_vldrw_sg_wb_uw,
    356    };
    357    if (a->qd == a->qm) {
    358        return false; /* UNPREDICTABLE */
    359    }
    360    return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
    361}
    362
    363static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    364{
    365    static MVEGenLdStSGFn * const fns[] = {
    366        gen_helper_mve_vldrd_sg_ud,
    367        gen_helper_mve_vldrd_sg_wb_ud,
    368    };
    369    if (a->qd == a->qm) {
    370        return false; /* UNPREDICTABLE */
    371    }
    372    return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
    373}
    374
    375static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    376{
    377    static MVEGenLdStSGFn * const fns[] = {
    378        gen_helper_mve_vstrw_sg_uw,
    379        gen_helper_mve_vstrw_sg_wb_uw,
    380    };
    381    return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
    382}
    383
    384static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    385{
    386    static MVEGenLdStSGFn * const fns[] = {
    387        gen_helper_mve_vstrd_sg_ud,
    388        gen_helper_mve_vstrd_sg_wb_ud,
    389    };
    390    return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
    391}
    392
    393static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
    394                        int addrinc)
    395{
    396    TCGv_i32 rn;
    397
    398    if (!dc_isar_feature(aa32_mve, s) ||
    399        !mve_check_qreg_bank(s, a->qd) ||
    400        !fn || (a->rn == 13 && a->w) || a->rn == 15) {
    401        /* Variously UNPREDICTABLE or UNDEF or related-encoding */
    402        return false;
    403    }
    404    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    405        return true;
    406    }
    407
    408    rn = load_reg(s, a->rn);
    409    /*
    410     * We pass the index of Qd, not a pointer, because the helper must
    411     * access multiple Q registers starting at Qd and working up.
    412     */
    413    fn(cpu_env, tcg_constant_i32(a->qd), rn);
    414
    415    if (a->w) {
    416        tcg_gen_addi_i32(rn, rn, addrinc);
    417        store_reg(s, a->rn, rn);
    418    } else {
    419        tcg_temp_free_i32(rn);
    420    }
    421    mve_update_and_store_eci(s);
    422    return true;
    423}
    424
    425/* This macro is just to make the arrays more compact in these functions */
    426#define F(N) gen_helper_mve_##N
    427
    428static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
    429{
    430    static MVEGenLdStIlFn * const fns[4][4] = {
    431        { F(vld20b), F(vld20h), F(vld20w), NULL, },
    432        { F(vld21b), F(vld21h), F(vld21w), NULL, },
    433        { NULL, NULL, NULL, NULL },
    434        { NULL, NULL, NULL, NULL },
    435    };
    436    if (a->qd > 6) {
    437        return false;
    438    }
    439    return do_vldst_il(s, a, fns[a->pat][a->size], 32);
    440}
    441
    442static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
    443{
    444    static MVEGenLdStIlFn * const fns[4][4] = {
    445        { F(vld40b), F(vld40h), F(vld40w), NULL, },
    446        { F(vld41b), F(vld41h), F(vld41w), NULL, },
    447        { F(vld42b), F(vld42h), F(vld42w), NULL, },
    448        { F(vld43b), F(vld43h), F(vld43w), NULL, },
    449    };
    450    if (a->qd > 4) {
    451        return false;
    452    }
    453    return do_vldst_il(s, a, fns[a->pat][a->size], 64);
    454}
    455
    456static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
    457{
    458    static MVEGenLdStIlFn * const fns[4][4] = {
    459        { F(vst20b), F(vst20h), F(vst20w), NULL, },
    460        { F(vst21b), F(vst21h), F(vst21w), NULL, },
    461        { NULL, NULL, NULL, NULL },
    462        { NULL, NULL, NULL, NULL },
    463    };
    464    if (a->qd > 6) {
    465        return false;
    466    }
    467    return do_vldst_il(s, a, fns[a->pat][a->size], 32);
    468}
    469
    470static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
    471{
    472    static MVEGenLdStIlFn * const fns[4][4] = {
    473        { F(vst40b), F(vst40h), F(vst40w), NULL, },
    474        { F(vst41b), F(vst41h), F(vst41w), NULL, },
    475        { F(vst42b), F(vst42h), F(vst42w), NULL, },
    476        { F(vst43b), F(vst43h), F(vst43w), NULL, },
    477    };
    478    if (a->qd > 4) {
    479        return false;
    480    }
    481    return do_vldst_il(s, a, fns[a->pat][a->size], 64);
    482}
    483
    484#undef F
    485
    486static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
    487{
    488    TCGv_ptr qd;
    489    TCGv_i32 rt;
    490
    491    if (!dc_isar_feature(aa32_mve, s) ||
    492        !mve_check_qreg_bank(s, a->qd)) {
    493        return false;
    494    }
    495    if (a->rt == 13 || a->rt == 15) {
    496        /* UNPREDICTABLE; we choose to UNDEF */
    497        return false;
    498    }
    499    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    500        return true;
    501    }
    502
    503    rt = load_reg(s, a->rt);
    504    if (mve_no_predication(s)) {
    505        tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
    506    } else {
    507        qd = mve_qreg_ptr(a->qd);
    508        tcg_gen_dup_i32(a->size, rt, rt);
    509        gen_helper_mve_vdup(cpu_env, qd, rt);
    510        tcg_temp_free_ptr(qd);
    511    }
    512    tcg_temp_free_i32(rt);
    513    mve_update_eci(s);
    514    return true;
    515}
    516
    517static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
    518                       GVecGen2Fn vecfn)
    519{
    520    TCGv_ptr qd, qm;
    521
    522    if (!dc_isar_feature(aa32_mve, s) ||
    523        !mve_check_qreg_bank(s, a->qd | a->qm) ||
    524        !fn) {
    525        return false;
    526    }
    527
    528    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    529        return true;
    530    }
    531
    532    if (vecfn && mve_no_predication(s)) {
    533        vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
    534    } else {
    535        qd = mve_qreg_ptr(a->qd);
    536        qm = mve_qreg_ptr(a->qm);
    537        fn(cpu_env, qd, qm);
    538        tcg_temp_free_ptr(qd);
    539        tcg_temp_free_ptr(qm);
    540    }
    541    mve_update_eci(s);
    542    return true;
    543}
    544
    545static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
    546{
    547    return do_1op_vec(s, a, fn, NULL);
    548}
    549
    550#define DO_1OP_VEC(INSN, FN, VECFN)                             \
    551    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    552    {                                                           \
    553        static MVEGenOneOpFn * const fns[] = {                  \
    554            gen_helper_mve_##FN##b,                             \
    555            gen_helper_mve_##FN##h,                             \
    556            gen_helper_mve_##FN##w,                             \
    557            NULL,                                               \
    558        };                                                      \
    559        return do_1op_vec(s, a, fns[a->size], VECFN);           \
    560    }
    561
    562#define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
    563
    564DO_1OP(VCLZ, vclz)
    565DO_1OP(VCLS, vcls)
    566DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
    567DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
    568DO_1OP(VQABS, vqabs)
    569DO_1OP(VQNEG, vqneg)
    570DO_1OP(VMAXA, vmaxa)
    571DO_1OP(VMINA, vmina)
    572
    573/*
    574 * For simple float/int conversions we use the fixed-point
    575 * conversion helpers with a zero shift count
    576 */
    577#define DO_VCVT(INSN, HFN, SFN)                                         \
    578    static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    579    {                                                                   \
    580        gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0));         \
    581    }                                                                   \
    582    static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    583    {                                                                   \
    584        gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0));         \
    585    }                                                                   \
    586    static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
    587    {                                                                   \
    588        static MVEGenOneOpFn * const fns[] = {                          \
    589            NULL,                                                       \
    590            gen_##INSN##h,                                              \
    591            gen_##INSN##s,                                              \
    592            NULL,                                                       \
    593        };                                                              \
    594        if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
    595            return false;                                               \
    596        }                                                               \
    597        return do_1op(s, a, fns[a->size]);                              \
    598    }
    599
    600DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
    601DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
    602DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
    603DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
    604
    605static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
    606                          enum arm_fprounding rmode, bool u)
    607{
    608    /*
    609     * Handle VCVT fp to int with specified rounding mode.
    610     * This is a 1op fn but we must pass the rounding mode as
    611     * an immediate to the helper.
    612     */
    613    TCGv_ptr qd, qm;
    614    static MVEGenVCVTRmodeFn * const fns[4][2] = {
    615        { NULL, NULL },
    616        { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
    617        { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
    618        { NULL, NULL },
    619    };
    620    MVEGenVCVTRmodeFn *fn = fns[a->size][u];
    621
    622    if (!dc_isar_feature(aa32_mve_fp, s) ||
    623        !mve_check_qreg_bank(s, a->qd | a->qm) ||
    624        !fn) {
    625        return false;
    626    }
    627
    628    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    629        return true;
    630    }
    631
    632    qd = mve_qreg_ptr(a->qd);
    633    qm = mve_qreg_ptr(a->qm);
    634    fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
    635    tcg_temp_free_ptr(qd);
    636    tcg_temp_free_ptr(qm);
    637    mve_update_eci(s);
    638    return true;
    639}
    640
    641#define DO_VCVT_RMODE(INSN, RMODE, U)                           \
    642    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    643    {                                                           \
    644        return do_vcvt_rmode(s, a, RMODE, U);                   \
    645    }                                                           \
    646
    647DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
    648DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
    649DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
    650DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
    651DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
    652DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
    653DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
    654DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
    655
    656#define DO_VCVT_SH(INSN, FN)                                    \
    657    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    658    {                                                           \
    659        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
    660            return false;                                       \
    661        }                                                       \
    662        return do_1op(s, a, gen_helper_mve_##FN);               \
    663    }                                                           \
    664
    665DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
    666DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
    667DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
    668DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
    669
    670#define DO_VRINT(INSN, RMODE)                                           \
    671    static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    672    {                                                                   \
    673        gen_helper_mve_vrint_rm_h(env, qd, qm,                          \
    674                                  tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
    675    }                                                                   \
    676    static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    677    {                                                                   \
    678        gen_helper_mve_vrint_rm_s(env, qd, qm,                          \
    679                                  tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
    680    }                                                                   \
    681    static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
    682    {                                                                   \
    683        static MVEGenOneOpFn * const fns[] = {                          \
    684            NULL,                                                       \
    685            gen_##INSN##h,                                              \
    686            gen_##INSN##s,                                              \
    687            NULL,                                                       \
    688        };                                                              \
    689        if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
    690            return false;                                               \
    691        }                                                               \
    692        return do_1op(s, a, fns[a->size]);                              \
    693    }
    694
    695DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
    696DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
    697DO_VRINT(VRINTZ, FPROUNDING_ZERO)
    698DO_VRINT(VRINTM, FPROUNDING_NEGINF)
    699DO_VRINT(VRINTP, FPROUNDING_POSINF)
    700
    701static bool trans_VRINTX(DisasContext *s, arg_1op *a)
    702{
    703    static MVEGenOneOpFn * const fns[] = {
    704        NULL,
    705        gen_helper_mve_vrintx_h,
    706        gen_helper_mve_vrintx_s,
    707        NULL,
    708    };
    709    if (!dc_isar_feature(aa32_mve_fp, s)) {
    710        return false;
    711    }
    712    return do_1op(s, a, fns[a->size]);
    713}
    714
    715/* Narrowing moves: only size 0 and 1 are valid */
    716#define DO_VMOVN(INSN, FN) \
    717    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    718    {                                                           \
    719        static MVEGenOneOpFn * const fns[] = {                  \
    720            gen_helper_mve_##FN##b,                             \
    721            gen_helper_mve_##FN##h,                             \
    722            NULL,                                               \
    723            NULL,                                               \
    724        };                                                      \
    725        return do_1op(s, a, fns[a->size]);                      \
    726    }
    727
    728DO_VMOVN(VMOVNB, vmovnb)
    729DO_VMOVN(VMOVNT, vmovnt)
    730DO_VMOVN(VQMOVUNB, vqmovunb)
    731DO_VMOVN(VQMOVUNT, vqmovunt)
    732DO_VMOVN(VQMOVN_BS, vqmovnbs)
    733DO_VMOVN(VQMOVN_TS, vqmovnts)
    734DO_VMOVN(VQMOVN_BU, vqmovnbu)
    735DO_VMOVN(VQMOVN_TU, vqmovntu)
    736
    737static bool trans_VREV16(DisasContext *s, arg_1op *a)
    738{
    739    static MVEGenOneOpFn * const fns[] = {
    740        gen_helper_mve_vrev16b,
    741        NULL,
    742        NULL,
    743        NULL,
    744    };
    745    return do_1op(s, a, fns[a->size]);
    746}
    747
    748static bool trans_VREV32(DisasContext *s, arg_1op *a)
    749{
    750    static MVEGenOneOpFn * const fns[] = {
    751        gen_helper_mve_vrev32b,
    752        gen_helper_mve_vrev32h,
    753        NULL,
    754        NULL,
    755    };
    756    return do_1op(s, a, fns[a->size]);
    757}
    758
    759static bool trans_VREV64(DisasContext *s, arg_1op *a)
    760{
    761    static MVEGenOneOpFn * const fns[] = {
    762        gen_helper_mve_vrev64b,
    763        gen_helper_mve_vrev64h,
    764        gen_helper_mve_vrev64w,
    765        NULL,
    766    };
    767    return do_1op(s, a, fns[a->size]);
    768}
    769
    770static bool trans_VMVN(DisasContext *s, arg_1op *a)
    771{
    772    return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
    773}
    774
    775static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
    776{
    777    static MVEGenOneOpFn * const fns[] = {
    778        NULL,
    779        gen_helper_mve_vfabsh,
    780        gen_helper_mve_vfabss,
    781        NULL,
    782    };
    783    if (!dc_isar_feature(aa32_mve_fp, s)) {
    784        return false;
    785    }
    786    return do_1op(s, a, fns[a->size]);
    787}
    788
    789static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
    790{
    791    static MVEGenOneOpFn * const fns[] = {
    792        NULL,
    793        gen_helper_mve_vfnegh,
    794        gen_helper_mve_vfnegs,
    795        NULL,
    796    };
    797    if (!dc_isar_feature(aa32_mve_fp, s)) {
    798        return false;
    799    }
    800    return do_1op(s, a, fns[a->size]);
    801}
    802
    803static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
    804                       GVecGen3Fn *vecfn)
    805{
    806    TCGv_ptr qd, qn, qm;
    807
    808    if (!dc_isar_feature(aa32_mve, s) ||
    809        !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
    810        !fn) {
    811        return false;
    812    }
    813    if (!mve_eci_check(s) || !vfp_access_check(s)) {
    814        return true;
    815    }
    816
    817    if (vecfn && mve_no_predication(s)) {
    818        vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
    819              mve_qreg_offset(a->qm), 16, 16);
    820    } else {
    821        qd = mve_qreg_ptr(a->qd);
    822        qn = mve_qreg_ptr(a->qn);
    823        qm = mve_qreg_ptr(a->qm);
    824        fn(cpu_env, qd, qn, qm);
    825        tcg_temp_free_ptr(qd);
    826        tcg_temp_free_ptr(qn);
    827        tcg_temp_free_ptr(qm);
    828    }
    829    mve_update_eci(s);
    830    return true;
    831}
    832
    833static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
    834{
    835    return do_2op_vec(s, a, fn, NULL);
    836}
    837
    838#define DO_LOGIC(INSN, HELPER, VECFN)                           \
    839    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
    840    {                                                           \
    841        return do_2op_vec(s, a, HELPER, VECFN);                 \
    842    }
    843
    844DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
    845DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
    846DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
    847DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
    848DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
    849
    850static bool trans_VPSEL(DisasContext *s, arg_2op *a)
    851{
    852    /* This insn updates predication bits */
    853    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
    854    return do_2op(s, a, gen_helper_mve_vpsel);
    855}
    856
    857#define DO_2OP_VEC(INSN, FN, VECFN)                             \
    858    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
    859    {                                                           \
    860        static MVEGenTwoOpFn * const fns[] = {                  \
    861            gen_helper_mve_##FN##b,                             \
    862            gen_helper_mve_##FN##h,                             \
    863            gen_helper_mve_##FN##w,                             \
    864            NULL,                                               \
    865        };                                                      \
    866        return do_2op_vec(s, a, fns[a->size], VECFN);           \
    867    }
    868
    869#define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
    870
    871DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
    872DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
    873DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
    874DO_2OP(VMULH_S, vmulhs)
    875DO_2OP(VMULH_U, vmulhu)
    876DO_2OP(VRMULH_S, vrmulhs)
    877DO_2OP(VRMULH_U, vrmulhu)
    878DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
    879DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
    880DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
    881DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
    882DO_2OP(VABD_S, vabds)
    883DO_2OP(VABD_U, vabdu)
    884DO_2OP(VHADD_S, vhadds)
    885DO_2OP(VHADD_U, vhaddu)
    886DO_2OP(VHSUB_S, vhsubs)
    887DO_2OP(VHSUB_U, vhsubu)
    888DO_2OP(VMULL_BS, vmullbs)
    889DO_2OP(VMULL_BU, vmullbu)
    890DO_2OP(VMULL_TS, vmullts)
    891DO_2OP(VMULL_TU, vmulltu)
    892DO_2OP(VQDMULH, vqdmulh)
    893DO_2OP(VQRDMULH, vqrdmulh)
    894DO_2OP(VQADD_S, vqadds)
    895DO_2OP(VQADD_U, vqaddu)
    896DO_2OP(VQSUB_S, vqsubs)
    897DO_2OP(VQSUB_U, vqsubu)
    898DO_2OP(VSHL_S, vshls)
    899DO_2OP(VSHL_U, vshlu)
    900DO_2OP(VRSHL_S, vrshls)
    901DO_2OP(VRSHL_U, vrshlu)
    902DO_2OP(VQSHL_S, vqshls)
    903DO_2OP(VQSHL_U, vqshlu)
    904DO_2OP(VQRSHL_S, vqrshls)
    905DO_2OP(VQRSHL_U, vqrshlu)
    906DO_2OP(VQDMLADH, vqdmladh)
    907DO_2OP(VQDMLADHX, vqdmladhx)
    908DO_2OP(VQRDMLADH, vqrdmladh)
    909DO_2OP(VQRDMLADHX, vqrdmladhx)
    910DO_2OP(VQDMLSDH, vqdmlsdh)
    911DO_2OP(VQDMLSDHX, vqdmlsdhx)
    912DO_2OP(VQRDMLSDH, vqrdmlsdh)
    913DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
    914DO_2OP(VRHADD_S, vrhadds)
    915DO_2OP(VRHADD_U, vrhaddu)
    916/*
    917 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
    918 * so we can reuse the DO_2OP macro. (Our implementation calculates the
    919 * "expected" results in this case.) Similarly for VHCADD.
    920 */
    921DO_2OP(VCADD90, vcadd90)
    922DO_2OP(VCADD270, vcadd270)
    923DO_2OP(VHCADD90, vhcadd90)
    924DO_2OP(VHCADD270, vhcadd270)
    925
    926static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
    927{
    928    static MVEGenTwoOpFn * const fns[] = {
    929        NULL,
    930        gen_helper_mve_vqdmullbh,
    931        gen_helper_mve_vqdmullbw,
    932        NULL,
    933    };
    934    if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
    935        /* UNPREDICTABLE; we choose to undef */
    936        return false;
    937    }
    938    return do_2op(s, a, fns[a->size]);
    939}
    940
    941static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
    942{
    943    static MVEGenTwoOpFn * const fns[] = {
    944        NULL,
    945        gen_helper_mve_vqdmullth,
    946        gen_helper_mve_vqdmulltw,
    947        NULL,
    948    };
    949    if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
    950        /* UNPREDICTABLE; we choose to undef */
    951        return false;
    952    }
    953    return do_2op(s, a, fns[a->size]);
    954}
    955
    956static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
    957{
    958    /*
    959     * Note that a->size indicates the output size, ie VMULL.P8
    960     * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
    961     * is the 16x16->32 operation and a->size is MO_32.
    962     */
    963    static MVEGenTwoOpFn * const fns[] = {
    964        NULL,
    965        gen_helper_mve_vmullpbh,
    966        gen_helper_mve_vmullpbw,
    967        NULL,
    968    };
    969    return do_2op(s, a, fns[a->size]);
    970}
    971
    972static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
    973{
    974    /* a->size is as for trans_VMULLP_B */
    975    static MVEGenTwoOpFn * const fns[] = {
    976        NULL,
    977        gen_helper_mve_vmullpth,
    978        gen_helper_mve_vmullptw,
    979        NULL,
    980    };
    981    return do_2op(s, a, fns[a->size]);
    982}
    983
    984/*
    985 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
    986 * of the 32-bit elements in each lane of the input vectors, where the
    987 * carry-out of each add is the carry-in of the next.  The initial carry
    988 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
    989 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
    990 * These insns are subject to beat-wise execution.  Partial execution
    991 * of an I=1 (initial carry input fixed) insn which does not
    992 * execute the first beat must start with the current FPSCR.NZCV
    993 * value, not the fixed constant input.
    994 */
    995static bool trans_VADC(DisasContext *s, arg_2op *a)
    996{
    997    return do_2op(s, a, gen_helper_mve_vadc);
    998}
    999
   1000static bool trans_VADCI(DisasContext *s, arg_2op *a)
   1001{
   1002    if (mve_skip_first_beat(s)) {
   1003        return trans_VADC(s, a);
   1004    }
   1005    return do_2op(s, a, gen_helper_mve_vadci);
   1006}
   1007
   1008static bool trans_VSBC(DisasContext *s, arg_2op *a)
   1009{
   1010    return do_2op(s, a, gen_helper_mve_vsbc);
   1011}
   1012
   1013static bool trans_VSBCI(DisasContext *s, arg_2op *a)
   1014{
   1015    if (mve_skip_first_beat(s)) {
   1016        return trans_VSBC(s, a);
   1017    }
   1018    return do_2op(s, a, gen_helper_mve_vsbci);
   1019}
   1020
   1021#define DO_2OP_FP(INSN, FN)                                     \
   1022    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
   1023    {                                                           \
   1024        static MVEGenTwoOpFn * const fns[] = {                  \
   1025            NULL,                                               \
   1026            gen_helper_mve_##FN##h,                             \
   1027            gen_helper_mve_##FN##s,                             \
   1028            NULL,                                               \
   1029        };                                                      \
   1030        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   1031            return false;                                       \
   1032        }                                                       \
   1033        return do_2op(s, a, fns[a->size]);                      \
   1034    }
   1035
   1036DO_2OP_FP(VADD_fp, vfadd)
   1037DO_2OP_FP(VSUB_fp, vfsub)
   1038DO_2OP_FP(VMUL_fp, vfmul)
   1039DO_2OP_FP(VABD_fp, vfabd)
   1040DO_2OP_FP(VMAXNM, vmaxnm)
   1041DO_2OP_FP(VMINNM, vminnm)
   1042DO_2OP_FP(VCADD90_fp, vfcadd90)
   1043DO_2OP_FP(VCADD270_fp, vfcadd270)
   1044DO_2OP_FP(VFMA, vfma)
   1045DO_2OP_FP(VFMS, vfms)
   1046DO_2OP_FP(VCMUL0, vcmul0)
   1047DO_2OP_FP(VCMUL90, vcmul90)
   1048DO_2OP_FP(VCMUL180, vcmul180)
   1049DO_2OP_FP(VCMUL270, vcmul270)
   1050DO_2OP_FP(VCMLA0, vcmla0)
   1051DO_2OP_FP(VCMLA90, vcmla90)
   1052DO_2OP_FP(VCMLA180, vcmla180)
   1053DO_2OP_FP(VCMLA270, vcmla270)
   1054DO_2OP_FP(VMAXNMA, vmaxnma)
   1055DO_2OP_FP(VMINNMA, vminnma)
   1056
   1057static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
   1058                          MVEGenTwoOpScalarFn fn)
   1059{
   1060    TCGv_ptr qd, qn;
   1061    TCGv_i32 rm;
   1062
   1063    if (!dc_isar_feature(aa32_mve, s) ||
   1064        !mve_check_qreg_bank(s, a->qd | a->qn) ||
   1065        !fn) {
   1066        return false;
   1067    }
   1068    if (a->rm == 13 || a->rm == 15) {
   1069        /* UNPREDICTABLE */
   1070        return false;
   1071    }
   1072    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1073        return true;
   1074    }
   1075
   1076    qd = mve_qreg_ptr(a->qd);
   1077    qn = mve_qreg_ptr(a->qn);
   1078    rm = load_reg(s, a->rm);
   1079    fn(cpu_env, qd, qn, rm);
   1080    tcg_temp_free_i32(rm);
   1081    tcg_temp_free_ptr(qd);
   1082    tcg_temp_free_ptr(qn);
   1083    mve_update_eci(s);
   1084    return true;
   1085}
   1086
   1087#define DO_2OP_SCALAR(INSN, FN)                                 \
   1088    static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
   1089    {                                                           \
   1090        static MVEGenTwoOpScalarFn * const fns[] = {            \
   1091            gen_helper_mve_##FN##b,                             \
   1092            gen_helper_mve_##FN##h,                             \
   1093            gen_helper_mve_##FN##w,                             \
   1094            NULL,                                               \
   1095        };                                                      \
   1096        return do_2op_scalar(s, a, fns[a->size]);               \
   1097    }
   1098
   1099DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
   1100DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
   1101DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
   1102DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
   1103DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
   1104DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
   1105DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
   1106DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
   1107DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
   1108DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
   1109DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
   1110DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
   1111DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
   1112DO_2OP_SCALAR(VBRSR, vbrsr)
   1113DO_2OP_SCALAR(VMLA, vmla)
   1114DO_2OP_SCALAR(VMLAS, vmlas)
   1115DO_2OP_SCALAR(VQDMLAH, vqdmlah)
   1116DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
   1117DO_2OP_SCALAR(VQDMLASH, vqdmlash)
   1118DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
   1119
   1120static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
   1121{
   1122    static MVEGenTwoOpScalarFn * const fns[] = {
   1123        NULL,
   1124        gen_helper_mve_vqdmullb_scalarh,
   1125        gen_helper_mve_vqdmullb_scalarw,
   1126        NULL,
   1127    };
   1128    if (a->qd == a->qn && a->size == MO_32) {
   1129        /* UNPREDICTABLE; we choose to undef */
   1130        return false;
   1131    }
   1132    return do_2op_scalar(s, a, fns[a->size]);
   1133}
   1134
   1135static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
   1136{
   1137    static MVEGenTwoOpScalarFn * const fns[] = {
   1138        NULL,
   1139        gen_helper_mve_vqdmullt_scalarh,
   1140        gen_helper_mve_vqdmullt_scalarw,
   1141        NULL,
   1142    };
   1143    if (a->qd == a->qn && a->size == MO_32) {
   1144        /* UNPREDICTABLE; we choose to undef */
   1145        return false;
   1146    }
   1147    return do_2op_scalar(s, a, fns[a->size]);
   1148}
   1149
   1150
   1151#define DO_2OP_FP_SCALAR(INSN, FN)                              \
   1152    static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
   1153    {                                                           \
   1154        static MVEGenTwoOpScalarFn * const fns[] = {            \
   1155            NULL,                                               \
   1156            gen_helper_mve_##FN##h,                             \
   1157            gen_helper_mve_##FN##s,                             \
   1158            NULL,                                               \
   1159        };                                                      \
   1160        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   1161            return false;                                       \
   1162        }                                                       \
   1163        return do_2op_scalar(s, a, fns[a->size]);               \
   1164    }
   1165
   1166DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
   1167DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
   1168DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
   1169DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
   1170DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
   1171
   1172static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
   1173                             MVEGenLongDualAccOpFn *fn)
   1174{
   1175    TCGv_ptr qn, qm;
   1176    TCGv_i64 rda;
   1177    TCGv_i32 rdalo, rdahi;
   1178
   1179    if (!dc_isar_feature(aa32_mve, s) ||
   1180        !mve_check_qreg_bank(s, a->qn | a->qm) ||
   1181        !fn) {
   1182        return false;
   1183    }
   1184    /*
   1185     * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
   1186     * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
   1187     */
   1188    if (a->rdahi == 13 || a->rdahi == 15) {
   1189        return false;
   1190    }
   1191    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1192        return true;
   1193    }
   1194
   1195    qn = mve_qreg_ptr(a->qn);
   1196    qm = mve_qreg_ptr(a->qm);
   1197
   1198    /*
   1199     * This insn is subject to beat-wise execution. Partial execution
   1200     * of an A=0 (no-accumulate) insn which does not execute the first
   1201     * beat must start with the current rda value, not 0.
   1202     */
   1203    if (a->a || mve_skip_first_beat(s)) {
   1204        rda = tcg_temp_new_i64();
   1205        rdalo = load_reg(s, a->rdalo);
   1206        rdahi = load_reg(s, a->rdahi);
   1207        tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   1208        tcg_temp_free_i32(rdalo);
   1209        tcg_temp_free_i32(rdahi);
   1210    } else {
   1211        rda = tcg_const_i64(0);
   1212    }
   1213
   1214    fn(rda, cpu_env, qn, qm, rda);
   1215    tcg_temp_free_ptr(qn);
   1216    tcg_temp_free_ptr(qm);
   1217
   1218    rdalo = tcg_temp_new_i32();
   1219    rdahi = tcg_temp_new_i32();
   1220    tcg_gen_extrl_i64_i32(rdalo, rda);
   1221    tcg_gen_extrh_i64_i32(rdahi, rda);
   1222    store_reg(s, a->rdalo, rdalo);
   1223    store_reg(s, a->rdahi, rdahi);
   1224    tcg_temp_free_i64(rda);
   1225    mve_update_eci(s);
   1226    return true;
   1227}
   1228
   1229static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
   1230{
   1231    static MVEGenLongDualAccOpFn * const fns[4][2] = {
   1232        { NULL, NULL },
   1233        { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
   1234        { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
   1235        { NULL, NULL },
   1236    };
   1237    return do_long_dual_acc(s, a, fns[a->size][a->x]);
   1238}
   1239
   1240static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
   1241{
   1242    static MVEGenLongDualAccOpFn * const fns[4][2] = {
   1243        { NULL, NULL },
   1244        { gen_helper_mve_vmlaldavuh, NULL },
   1245        { gen_helper_mve_vmlaldavuw, NULL },
   1246        { NULL, NULL },
   1247    };
   1248    return do_long_dual_acc(s, a, fns[a->size][a->x]);
   1249}
   1250
   1251static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
   1252{
   1253    static MVEGenLongDualAccOpFn * const fns[4][2] = {
   1254        { NULL, NULL },
   1255        { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
   1256        { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
   1257        { NULL, NULL },
   1258    };
   1259    return do_long_dual_acc(s, a, fns[a->size][a->x]);
   1260}
   1261
   1262static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
   1263{
   1264    static MVEGenLongDualAccOpFn * const fns[] = {
   1265        gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
   1266    };
   1267    return do_long_dual_acc(s, a, fns[a->x]);
   1268}
   1269
   1270static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
   1271{
   1272    static MVEGenLongDualAccOpFn * const fns[] = {
   1273        gen_helper_mve_vrmlaldavhuw, NULL,
   1274    };
   1275    return do_long_dual_acc(s, a, fns[a->x]);
   1276}
   1277
   1278static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
   1279{
   1280    static MVEGenLongDualAccOpFn * const fns[] = {
   1281        gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
   1282    };
   1283    return do_long_dual_acc(s, a, fns[a->x]);
   1284}
   1285
   1286static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
   1287{
   1288    TCGv_ptr qn, qm;
   1289    TCGv_i32 rda;
   1290
   1291    if (!dc_isar_feature(aa32_mve, s) ||
   1292        !mve_check_qreg_bank(s, a->qn) ||
   1293        !fn) {
   1294        return false;
   1295    }
   1296    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1297        return true;
   1298    }
   1299
   1300    qn = mve_qreg_ptr(a->qn);
   1301    qm = mve_qreg_ptr(a->qm);
   1302
   1303    /*
   1304     * This insn is subject to beat-wise execution. Partial execution
   1305     * of an A=0 (no-accumulate) insn which does not execute the first
   1306     * beat must start with the current rda value, not 0.
   1307     */
   1308    if (a->a || mve_skip_first_beat(s)) {
   1309        rda = load_reg(s, a->rda);
   1310    } else {
   1311        rda = tcg_const_i32(0);
   1312    }
   1313
   1314    fn(rda, cpu_env, qn, qm, rda);
   1315    store_reg(s, a->rda, rda);
   1316    tcg_temp_free_ptr(qn);
   1317    tcg_temp_free_ptr(qm);
   1318
   1319    mve_update_eci(s);
   1320    return true;
   1321}
   1322
   1323#define DO_DUAL_ACC(INSN, FN)                                           \
   1324    static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
   1325    {                                                                   \
   1326        static MVEGenDualAccOpFn * const fns[4][2] = {                  \
   1327            { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
   1328            { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
   1329            { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
   1330            { NULL, NULL },                                             \
   1331        };                                                              \
   1332        return do_dual_acc(s, a, fns[a->size][a->x]);                   \
   1333    }
   1334
   1335DO_DUAL_ACC(VMLADAV_S, vmladavs)
   1336DO_DUAL_ACC(VMLSDAV, vmlsdav)
   1337
   1338static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
   1339{
   1340    static MVEGenDualAccOpFn * const fns[4][2] = {
   1341        { gen_helper_mve_vmladavub, NULL },
   1342        { gen_helper_mve_vmladavuh, NULL },
   1343        { gen_helper_mve_vmladavuw, NULL },
   1344        { NULL, NULL },
   1345    };
   1346    return do_dual_acc(s, a, fns[a->size][a->x]);
   1347}
   1348
   1349static void gen_vpst(DisasContext *s, uint32_t mask)
   1350{
   1351    /*
   1352     * Set the VPR mask fields. We take advantage of MASK01 and MASK23
   1353     * being adjacent fields in the register.
   1354     *
   1355     * Updating the masks is not predicated, but it is subject to beat-wise
   1356     * execution, and the mask is updated on the odd-numbered beats.
   1357     * So if PSR.ECI says we should skip beat 1, we mustn't update the
   1358     * 01 mask field.
   1359     */
   1360    TCGv_i32 vpr = load_cpu_field(v7m.vpr);
   1361    switch (s->eci) {
   1362    case ECI_NONE:
   1363    case ECI_A0:
   1364        /* Update both 01 and 23 fields */
   1365        tcg_gen_deposit_i32(vpr, vpr,
   1366                            tcg_constant_i32(mask | (mask << 4)),
   1367                            R_V7M_VPR_MASK01_SHIFT,
   1368                            R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
   1369        break;
   1370    case ECI_A0A1:
   1371    case ECI_A0A1A2:
   1372    case ECI_A0A1A2B0:
   1373        /* Update only the 23 mask field */
   1374        tcg_gen_deposit_i32(vpr, vpr,
   1375                            tcg_constant_i32(mask),
   1376                            R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
   1377        break;
   1378    default:
   1379        g_assert_not_reached();
   1380    }
   1381    store_cpu_field(vpr, v7m.vpr);
   1382}
   1383
   1384static bool trans_VPST(DisasContext *s, arg_VPST *a)
   1385{
   1386    /* mask == 0 is a "related encoding" */
   1387    if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
   1388        return false;
   1389    }
   1390    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1391        return true;
   1392    }
   1393    gen_vpst(s, a->mask);
   1394    mve_update_and_store_eci(s);
   1395    return true;
   1396}
   1397
   1398static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
   1399{
   1400    /*
   1401     * Invert the predicate in VPR.P0. We have call out to
   1402     * a helper because this insn itself is beatwise and can
   1403     * be predicated.
   1404     */
   1405    if (!dc_isar_feature(aa32_mve, s)) {
   1406        return false;
   1407    }
   1408    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1409        return true;
   1410    }
   1411
   1412    gen_helper_mve_vpnot(cpu_env);
   1413    /* This insn updates predication bits */
   1414    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   1415    mve_update_eci(s);
   1416    return true;
   1417}
   1418
   1419static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
   1420{
   1421    /* VADDV: vector add across vector */
   1422    static MVEGenVADDVFn * const fns[4][2] = {
   1423        { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
   1424        { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
   1425        { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
   1426        { NULL, NULL }
   1427    };
   1428    TCGv_ptr qm;
   1429    TCGv_i32 rda;
   1430
   1431    if (!dc_isar_feature(aa32_mve, s) ||
   1432        a->size == 3) {
   1433        return false;
   1434    }
   1435    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1436        return true;
   1437    }
   1438
   1439    /*
   1440     * This insn is subject to beat-wise execution. Partial execution
   1441     * of an A=0 (no-accumulate) insn which does not execute the first
   1442     * beat must start with the current value of Rda, not zero.
   1443     */
   1444    if (a->a || mve_skip_first_beat(s)) {
   1445        /* Accumulate input from Rda */
   1446        rda = load_reg(s, a->rda);
   1447    } else {
   1448        /* Accumulate starting at zero */
   1449        rda = tcg_const_i32(0);
   1450    }
   1451
   1452    qm = mve_qreg_ptr(a->qm);
   1453    fns[a->size][a->u](rda, cpu_env, qm, rda);
   1454    store_reg(s, a->rda, rda);
   1455    tcg_temp_free_ptr(qm);
   1456
   1457    mve_update_eci(s);
   1458    return true;
   1459}
   1460
   1461static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
   1462{
   1463    /*
   1464     * Vector Add Long Across Vector: accumulate the 32-bit
   1465     * elements of the vector into a 64-bit result stored in
   1466     * a pair of general-purpose registers.
   1467     * No need to check Qm's bank: it is only 3 bits in decode.
   1468     */
   1469    TCGv_ptr qm;
   1470    TCGv_i64 rda;
   1471    TCGv_i32 rdalo, rdahi;
   1472
   1473    if (!dc_isar_feature(aa32_mve, s)) {
   1474        return false;
   1475    }
   1476    /*
   1477     * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
   1478     * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
   1479     */
   1480    if (a->rdahi == 13 || a->rdahi == 15) {
   1481        return false;
   1482    }
   1483    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1484        return true;
   1485    }
   1486
   1487    /*
   1488     * This insn is subject to beat-wise execution. Partial execution
   1489     * of an A=0 (no-accumulate) insn which does not execute the first
   1490     * beat must start with the current value of RdaHi:RdaLo, not zero.
   1491     */
   1492    if (a->a || mve_skip_first_beat(s)) {
   1493        /* Accumulate input from RdaHi:RdaLo */
   1494        rda = tcg_temp_new_i64();
   1495        rdalo = load_reg(s, a->rdalo);
   1496        rdahi = load_reg(s, a->rdahi);
   1497        tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   1498        tcg_temp_free_i32(rdalo);
   1499        tcg_temp_free_i32(rdahi);
   1500    } else {
   1501        /* Accumulate starting at zero */
   1502        rda = tcg_const_i64(0);
   1503    }
   1504
   1505    qm = mve_qreg_ptr(a->qm);
   1506    if (a->u) {
   1507        gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
   1508    } else {
   1509        gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
   1510    }
   1511    tcg_temp_free_ptr(qm);
   1512
   1513    rdalo = tcg_temp_new_i32();
   1514    rdahi = tcg_temp_new_i32();
   1515    tcg_gen_extrl_i64_i32(rdalo, rda);
   1516    tcg_gen_extrh_i64_i32(rdahi, rda);
   1517    store_reg(s, a->rdalo, rdalo);
   1518    store_reg(s, a->rdahi, rdahi);
   1519    tcg_temp_free_i64(rda);
   1520    mve_update_eci(s);
   1521    return true;
   1522}
   1523
   1524static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
   1525                    GVecGen2iFn *vecfn)
   1526{
   1527    TCGv_ptr qd;
   1528    uint64_t imm;
   1529
   1530    if (!dc_isar_feature(aa32_mve, s) ||
   1531        !mve_check_qreg_bank(s, a->qd) ||
   1532        !fn) {
   1533        return false;
   1534    }
   1535    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1536        return true;
   1537    }
   1538
   1539    imm = asimd_imm_const(a->imm, a->cmode, a->op);
   1540
   1541    if (vecfn && mve_no_predication(s)) {
   1542        vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
   1543              imm, 16, 16);
   1544    } else {
   1545        qd = mve_qreg_ptr(a->qd);
   1546        fn(cpu_env, qd, tcg_constant_i64(imm));
   1547        tcg_temp_free_ptr(qd);
   1548    }
   1549    mve_update_eci(s);
   1550    return true;
   1551}
   1552
   1553static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
   1554                           int64_t c, uint32_t oprsz, uint32_t maxsz)
   1555{
   1556    tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
   1557}
   1558
   1559static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
   1560{
   1561    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
   1562    MVEGenOneOpImmFn *fn;
   1563    GVecGen2iFn *vecfn;
   1564
   1565    if ((a->cmode & 1) && a->cmode < 12) {
   1566        if (a->op) {
   1567            /*
   1568             * For op=1, the immediate will be inverted by asimd_imm_const(),
   1569             * so the VBIC becomes a logical AND operation.
   1570             */
   1571            fn = gen_helper_mve_vandi;
   1572            vecfn = tcg_gen_gvec_andi;
   1573        } else {
   1574            fn = gen_helper_mve_vorri;
   1575            vecfn = tcg_gen_gvec_ori;
   1576        }
   1577    } else {
   1578        /* There is one unallocated cmode/op combination in this space */
   1579        if (a->cmode == 15 && a->op == 1) {
   1580            return false;
   1581        }
   1582        /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
   1583        fn = gen_helper_mve_vmovi;
   1584        vecfn = gen_gvec_vmovi;
   1585    }
   1586    return do_1imm(s, a, fn, vecfn);
   1587}
   1588
   1589static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
   1590                          bool negateshift, GVecGen2iFn vecfn)
   1591{
   1592    TCGv_ptr qd, qm;
   1593    int shift = a->shift;
   1594
   1595    if (!dc_isar_feature(aa32_mve, s) ||
   1596        !mve_check_qreg_bank(s, a->qd | a->qm) ||
   1597        !fn) {
   1598        return false;
   1599    }
   1600    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1601        return true;
   1602    }
   1603
   1604    /*
   1605     * When we handle a right shift insn using a left-shift helper
   1606     * which permits a negative shift count to indicate a right-shift,
   1607     * we must negate the shift count.
   1608     */
   1609    if (negateshift) {
   1610        shift = -shift;
   1611    }
   1612
   1613    if (vecfn && mve_no_predication(s)) {
   1614        vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
   1615              shift, 16, 16);
   1616    } else {
   1617        qd = mve_qreg_ptr(a->qd);
   1618        qm = mve_qreg_ptr(a->qm);
   1619        fn(cpu_env, qd, qm, tcg_constant_i32(shift));
   1620        tcg_temp_free_ptr(qd);
   1621        tcg_temp_free_ptr(qm);
   1622    }
   1623    mve_update_eci(s);
   1624    return true;
   1625}
   1626
   1627static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
   1628                      bool negateshift)
   1629{
   1630    return do_2shift_vec(s, a, fn, negateshift, NULL);
   1631}
   1632
   1633#define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN)                     \
   1634    static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
   1635    {                                                                   \
   1636        static MVEGenTwoOpShiftFn * const fns[] = {                     \
   1637            gen_helper_mve_##FN##b,                                     \
   1638            gen_helper_mve_##FN##h,                                     \
   1639            gen_helper_mve_##FN##w,                                     \
   1640            NULL,                                                       \
   1641        };                                                              \
   1642        return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN);   \
   1643    }
   1644
   1645#define DO_2SHIFT(INSN, FN, NEGATESHIFT)        \
   1646    DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
   1647
   1648static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
   1649                           int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1650{
   1651    /*
   1652     * We get here with a negated shift count, and we must handle
   1653     * shifts by the element size, which tcg_gen_gvec_sari() does not do.
   1654     */
   1655    shift = -shift;
   1656    if (shift == (8 << vece)) {
   1657        shift--;
   1658    }
   1659    tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
   1660}
   1661
   1662static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
   1663                           int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1664{
   1665    /*
   1666     * We get here with a negated shift count, and we must handle
   1667     * shifts by the element size, which tcg_gen_gvec_shri() does not do.
   1668     */
   1669    shift = -shift;
   1670    if (shift == (8 << vece)) {
   1671        tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
   1672    } else {
   1673        tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
   1674    }
   1675}
   1676
   1677DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
   1678DO_2SHIFT(VQSHLI_S, vqshli_s, false)
   1679DO_2SHIFT(VQSHLI_U, vqshli_u, false)
   1680DO_2SHIFT(VQSHLUI, vqshlui_s, false)
   1681/* These right shifts use a left-shift helper with negated shift count */
   1682DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
   1683DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
   1684DO_2SHIFT(VRSHRI_S, vrshli_s, true)
   1685DO_2SHIFT(VRSHRI_U, vrshli_u, true)
   1686
   1687DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
   1688DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
   1689
   1690#define DO_2SHIFT_FP(INSN, FN)                                  \
   1691    static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
   1692    {                                                           \
   1693        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   1694            return false;                                       \
   1695        }                                                       \
   1696        return do_2shift(s, a, gen_helper_mve_##FN, false);     \
   1697    }
   1698
   1699DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
   1700DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
   1701DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
   1702DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
   1703DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
   1704DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
   1705DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
   1706DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
   1707
   1708static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
   1709                             MVEGenTwoOpShiftFn *fn)
   1710{
   1711    TCGv_ptr qda;
   1712    TCGv_i32 rm;
   1713
   1714    if (!dc_isar_feature(aa32_mve, s) ||
   1715        !mve_check_qreg_bank(s, a->qda) ||
   1716        a->rm == 13 || a->rm == 15 || !fn) {
   1717        /* Rm cases are UNPREDICTABLE */
   1718        return false;
   1719    }
   1720    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1721        return true;
   1722    }
   1723
   1724    qda = mve_qreg_ptr(a->qda);
   1725    rm = load_reg(s, a->rm);
   1726    fn(cpu_env, qda, qda, rm);
   1727    tcg_temp_free_ptr(qda);
   1728    tcg_temp_free_i32(rm);
   1729    mve_update_eci(s);
   1730    return true;
   1731}
   1732
   1733#define DO_2SHIFT_SCALAR(INSN, FN)                                      \
   1734    static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
   1735    {                                                                   \
   1736        static MVEGenTwoOpShiftFn * const fns[] = {                     \
   1737            gen_helper_mve_##FN##b,                                     \
   1738            gen_helper_mve_##FN##h,                                     \
   1739            gen_helper_mve_##FN##w,                                     \
   1740            NULL,                                                       \
   1741        };                                                              \
   1742        return do_2shift_scalar(s, a, fns[a->size]);                    \
   1743    }
   1744
   1745DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
   1746DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
   1747DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
   1748DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
   1749DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
   1750DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
   1751DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
   1752DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
   1753
   1754#define DO_VSHLL(INSN, FN)                                              \
   1755    static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
   1756    {                                                                   \
   1757        static MVEGenTwoOpShiftFn * const fns[] = {                     \
   1758            gen_helper_mve_##FN##b,                                     \
   1759            gen_helper_mve_##FN##h,                                     \
   1760        };                                                              \
   1761        return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN);  \
   1762    }
   1763
   1764/*
   1765 * For the VSHLL vector helpers, the vece is the size of the input
   1766 * (ie MO_8 or MO_16); the helpers want to work in the output size.
   1767 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
   1768 */
   1769static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
   1770                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1771{
   1772    unsigned ovece = vece + 1;
   1773    unsigned ibits = vece == MO_8 ? 8 : 16;
   1774    tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
   1775    tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
   1776}
   1777
   1778static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
   1779                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1780{
   1781    unsigned ovece = vece + 1;
   1782    tcg_gen_gvec_andi(ovece, dofs, aofs,
   1783                      ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
   1784    tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
   1785}
   1786
   1787static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
   1788                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1789{
   1790    unsigned ovece = vece + 1;
   1791    unsigned ibits = vece == MO_8 ? 8 : 16;
   1792    if (shift == 0) {
   1793        tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
   1794    } else {
   1795        tcg_gen_gvec_andi(ovece, dofs, aofs,
   1796                          ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
   1797        tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
   1798    }
   1799}
   1800
   1801static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
   1802                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1803{
   1804    unsigned ovece = vece + 1;
   1805    unsigned ibits = vece == MO_8 ? 8 : 16;
   1806    if (shift == 0) {
   1807        tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
   1808    } else {
   1809        tcg_gen_gvec_andi(ovece, dofs, aofs,
   1810                          ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
   1811        tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
   1812    }
   1813}
   1814
   1815DO_VSHLL(VSHLL_BS, vshllbs)
   1816DO_VSHLL(VSHLL_BU, vshllbu)
   1817DO_VSHLL(VSHLL_TS, vshllts)
   1818DO_VSHLL(VSHLL_TU, vshlltu)
   1819
   1820#define DO_2SHIFT_N(INSN, FN)                                   \
   1821    static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
   1822    {                                                           \
   1823        static MVEGenTwoOpShiftFn * const fns[] = {             \
   1824            gen_helper_mve_##FN##b,                             \
   1825            gen_helper_mve_##FN##h,                             \
   1826        };                                                      \
   1827        return do_2shift(s, a, fns[a->size], false);            \
   1828    }
   1829
   1830DO_2SHIFT_N(VSHRNB, vshrnb)
   1831DO_2SHIFT_N(VSHRNT, vshrnt)
   1832DO_2SHIFT_N(VRSHRNB, vrshrnb)
   1833DO_2SHIFT_N(VRSHRNT, vrshrnt)
   1834DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
   1835DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
   1836DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
   1837DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
   1838DO_2SHIFT_N(VQSHRUNB, vqshrunb)
   1839DO_2SHIFT_N(VQSHRUNT, vqshrunt)
   1840DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
   1841DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
   1842DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
   1843DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
   1844DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
   1845DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
   1846
   1847static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
   1848{
   1849    /*
   1850     * Whole Vector Left Shift with Carry. The carry is taken
   1851     * from a general purpose register and written back there.
   1852     * An imm of 0 means "shift by 32".
   1853     */
   1854    TCGv_ptr qd;
   1855    TCGv_i32 rdm;
   1856
   1857    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
   1858        return false;
   1859    }
   1860    if (a->rdm == 13 || a->rdm == 15) {
   1861        /* CONSTRAINED UNPREDICTABLE: we UNDEF */
   1862        return false;
   1863    }
   1864    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1865        return true;
   1866    }
   1867
   1868    qd = mve_qreg_ptr(a->qd);
   1869    rdm = load_reg(s, a->rdm);
   1870    gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
   1871    store_reg(s, a->rdm, rdm);
   1872    tcg_temp_free_ptr(qd);
   1873    mve_update_eci(s);
   1874    return true;
   1875}
   1876
   1877static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
   1878{
   1879    TCGv_ptr qd;
   1880    TCGv_i32 rn;
   1881
   1882    /*
   1883     * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
   1884     * This fills the vector with elements of successively increasing
   1885     * or decreasing values, starting from Rn.
   1886     */
   1887    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
   1888        return false;
   1889    }
   1890    if (a->size == MO_64) {
   1891        /* size 0b11 is another encoding */
   1892        return false;
   1893    }
   1894    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1895        return true;
   1896    }
   1897
   1898    qd = mve_qreg_ptr(a->qd);
   1899    rn = load_reg(s, a->rn);
   1900    fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
   1901    store_reg(s, a->rn, rn);
   1902    tcg_temp_free_ptr(qd);
   1903    mve_update_eci(s);
   1904    return true;
   1905}
   1906
   1907static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
   1908{
   1909    TCGv_ptr qd;
   1910    TCGv_i32 rn, rm;
   1911
   1912    /*
   1913     * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
   1914     * This fills the vector with elements of successively increasing
   1915     * or decreasing values, starting from Rn. Rm specifies a point where
   1916     * the count wraps back around to 0. The updated offset is written back
   1917     * to Rn.
   1918     */
   1919    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
   1920        return false;
   1921    }
   1922    if (!fn || a->rm == 13 || a->rm == 15) {
   1923        /*
   1924         * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
   1925         * Rm == 13 is VIWDUP, VDWDUP.
   1926         */
   1927        return false;
   1928    }
   1929    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1930        return true;
   1931    }
   1932
   1933    qd = mve_qreg_ptr(a->qd);
   1934    rn = load_reg(s, a->rn);
   1935    rm = load_reg(s, a->rm);
   1936    fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
   1937    store_reg(s, a->rn, rn);
   1938    tcg_temp_free_ptr(qd);
   1939    tcg_temp_free_i32(rm);
   1940    mve_update_eci(s);
   1941    return true;
   1942}
   1943
   1944static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
   1945{
   1946    static MVEGenVIDUPFn * const fns[] = {
   1947        gen_helper_mve_vidupb,
   1948        gen_helper_mve_viduph,
   1949        gen_helper_mve_vidupw,
   1950        NULL,
   1951    };
   1952    return do_vidup(s, a, fns[a->size]);
   1953}
   1954
   1955static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
   1956{
   1957    static MVEGenVIDUPFn * const fns[] = {
   1958        gen_helper_mve_vidupb,
   1959        gen_helper_mve_viduph,
   1960        gen_helper_mve_vidupw,
   1961        NULL,
   1962    };
   1963    /* VDDUP is just like VIDUP but with a negative immediate */
   1964    a->imm = -a->imm;
   1965    return do_vidup(s, a, fns[a->size]);
   1966}
   1967
   1968static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
   1969{
   1970    static MVEGenVIWDUPFn * const fns[] = {
   1971        gen_helper_mve_viwdupb,
   1972        gen_helper_mve_viwduph,
   1973        gen_helper_mve_viwdupw,
   1974        NULL,
   1975    };
   1976    return do_viwdup(s, a, fns[a->size]);
   1977}
   1978
   1979static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
   1980{
   1981    static MVEGenVIWDUPFn * const fns[] = {
   1982        gen_helper_mve_vdwdupb,
   1983        gen_helper_mve_vdwduph,
   1984        gen_helper_mve_vdwdupw,
   1985        NULL,
   1986    };
   1987    return do_viwdup(s, a, fns[a->size]);
   1988}
   1989
   1990static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
   1991{
   1992    TCGv_ptr qn, qm;
   1993
   1994    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
   1995        !fn) {
   1996        return false;
   1997    }
   1998    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1999        return true;
   2000    }
   2001
   2002    qn = mve_qreg_ptr(a->qn);
   2003    qm = mve_qreg_ptr(a->qm);
   2004    fn(cpu_env, qn, qm);
   2005    tcg_temp_free_ptr(qn);
   2006    tcg_temp_free_ptr(qm);
   2007    if (a->mask) {
   2008        /* VPT */
   2009        gen_vpst(s, a->mask);
   2010    }
   2011    /* This insn updates predication bits */
   2012    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   2013    mve_update_eci(s);
   2014    return true;
   2015}
   2016
   2017static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
   2018                           MVEGenScalarCmpFn *fn)
   2019{
   2020    TCGv_ptr qn;
   2021    TCGv_i32 rm;
   2022
   2023    if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
   2024        return false;
   2025    }
   2026    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2027        return true;
   2028    }
   2029
   2030    qn = mve_qreg_ptr(a->qn);
   2031    if (a->rm == 15) {
   2032        /* Encoding Rm=0b1111 means "constant zero" */
   2033        rm = tcg_constant_i32(0);
   2034    } else {
   2035        rm = load_reg(s, a->rm);
   2036    }
   2037    fn(cpu_env, qn, rm);
   2038    tcg_temp_free_ptr(qn);
   2039    tcg_temp_free_i32(rm);
   2040    if (a->mask) {
   2041        /* VPT */
   2042        gen_vpst(s, a->mask);
   2043    }
   2044    /* This insn updates predication bits */
   2045    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   2046    mve_update_eci(s);
   2047    return true;
   2048}
   2049
   2050#define DO_VCMP(INSN, FN)                                       \
   2051    static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
   2052    {                                                           \
   2053        static MVEGenCmpFn * const fns[] = {                    \
   2054            gen_helper_mve_##FN##b,                             \
   2055            gen_helper_mve_##FN##h,                             \
   2056            gen_helper_mve_##FN##w,                             \
   2057            NULL,                                               \
   2058        };                                                      \
   2059        return do_vcmp(s, a, fns[a->size]);                     \
   2060    }                                                           \
   2061    static bool trans_##INSN##_scalar(DisasContext *s,          \
   2062                                      arg_vcmp_scalar *a)       \
   2063    {                                                           \
   2064        static MVEGenScalarCmpFn * const fns[] = {              \
   2065            gen_helper_mve_##FN##_scalarb,                      \
   2066            gen_helper_mve_##FN##_scalarh,                      \
   2067            gen_helper_mve_##FN##_scalarw,                      \
   2068            NULL,                                               \
   2069        };                                                      \
   2070        return do_vcmp_scalar(s, a, fns[a->size]);              \
   2071    }
   2072
   2073DO_VCMP(VCMPEQ, vcmpeq)
   2074DO_VCMP(VCMPNE, vcmpne)
   2075DO_VCMP(VCMPCS, vcmpcs)
   2076DO_VCMP(VCMPHI, vcmphi)
   2077DO_VCMP(VCMPGE, vcmpge)
   2078DO_VCMP(VCMPLT, vcmplt)
   2079DO_VCMP(VCMPGT, vcmpgt)
   2080DO_VCMP(VCMPLE, vcmple)
   2081
   2082#define DO_VCMP_FP(INSN, FN)                                    \
   2083    static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
   2084    {                                                           \
   2085        static MVEGenCmpFn * const fns[] = {                    \
   2086            NULL,                                               \
   2087            gen_helper_mve_##FN##h,                             \
   2088            gen_helper_mve_##FN##s,                             \
   2089            NULL,                                               \
   2090        };                                                      \
   2091        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   2092            return false;                                       \
   2093        }                                                       \
   2094        return do_vcmp(s, a, fns[a->size]);                     \
   2095    }                                                           \
   2096    static bool trans_##INSN##_scalar(DisasContext *s,          \
   2097                                      arg_vcmp_scalar *a)       \
   2098    {                                                           \
   2099        static MVEGenScalarCmpFn * const fns[] = {              \
   2100            NULL,                                               \
   2101            gen_helper_mve_##FN##_scalarh,                      \
   2102            gen_helper_mve_##FN##_scalars,                      \
   2103            NULL,                                               \
   2104        };                                                      \
   2105        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   2106            return false;                                       \
   2107        }                                                       \
   2108        return do_vcmp_scalar(s, a, fns[a->size]);              \
   2109    }
   2110
   2111DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
   2112DO_VCMP_FP(VCMPNE_fp, vfcmpne)
   2113DO_VCMP_FP(VCMPGE_fp, vfcmpge)
   2114DO_VCMP_FP(VCMPLT_fp, vfcmplt)
   2115DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
   2116DO_VCMP_FP(VCMPLE_fp, vfcmple)
   2117
   2118static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
   2119{
   2120    /*
   2121     * MIN/MAX operations across a vector: compute the min or
   2122     * max of the initial value in a general purpose register
   2123     * and all the elements in the vector, and store it back
   2124     * into the general purpose register.
   2125     */
   2126    TCGv_ptr qm;
   2127    TCGv_i32 rda;
   2128
   2129    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
   2130        !fn || a->rda == 13 || a->rda == 15) {
   2131        /* Rda cases are UNPREDICTABLE */
   2132        return false;
   2133    }
   2134    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2135        return true;
   2136    }
   2137
   2138    qm = mve_qreg_ptr(a->qm);
   2139    rda = load_reg(s, a->rda);
   2140    fn(rda, cpu_env, qm, rda);
   2141    store_reg(s, a->rda, rda);
   2142    tcg_temp_free_ptr(qm);
   2143    mve_update_eci(s);
   2144    return true;
   2145}
   2146
   2147#define DO_VMAXV(INSN, FN)                                      \
   2148    static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
   2149    {                                                           \
   2150        static MVEGenVADDVFn * const fns[] = {                  \
   2151            gen_helper_mve_##FN##b,                             \
   2152            gen_helper_mve_##FN##h,                             \
   2153            gen_helper_mve_##FN##w,                             \
   2154            NULL,                                               \
   2155        };                                                      \
   2156        return do_vmaxv(s, a, fns[a->size]);                    \
   2157    }
   2158
   2159DO_VMAXV(VMAXV_S, vmaxvs)
   2160DO_VMAXV(VMAXV_U, vmaxvu)
   2161DO_VMAXV(VMAXAV, vmaxav)
   2162DO_VMAXV(VMINV_S, vminvs)
   2163DO_VMAXV(VMINV_U, vminvu)
   2164DO_VMAXV(VMINAV, vminav)
   2165
   2166#define DO_VMAXV_FP(INSN, FN)                                   \
   2167    static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
   2168    {                                                           \
   2169        static MVEGenVADDVFn * const fns[] = {                  \
   2170            NULL,                                               \
   2171            gen_helper_mve_##FN##h,                             \
   2172            gen_helper_mve_##FN##s,                             \
   2173            NULL,                                               \
   2174        };                                                      \
   2175        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   2176            return false;                                       \
   2177        }                                                       \
   2178        return do_vmaxv(s, a, fns[a->size]);                    \
   2179    }
   2180
   2181DO_VMAXV_FP(VMAXNMV, vmaxnmv)
   2182DO_VMAXV_FP(VMINNMV, vminnmv)
   2183DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
   2184DO_VMAXV_FP(VMINNMAV, vminnmav)
   2185
   2186static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
   2187{
   2188    /* Absolute difference accumulated across vector */
   2189    TCGv_ptr qn, qm;
   2190    TCGv_i32 rda;
   2191
   2192    if (!dc_isar_feature(aa32_mve, s) ||
   2193        !mve_check_qreg_bank(s, a->qm | a->qn) ||
   2194        !fn || a->rda == 13 || a->rda == 15) {
   2195        /* Rda cases are UNPREDICTABLE */
   2196        return false;
   2197    }
   2198    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2199        return true;
   2200    }
   2201
   2202    qm = mve_qreg_ptr(a->qm);
   2203    qn = mve_qreg_ptr(a->qn);
   2204    rda = load_reg(s, a->rda);
   2205    fn(rda, cpu_env, qn, qm, rda);
   2206    store_reg(s, a->rda, rda);
   2207    tcg_temp_free_ptr(qm);
   2208    tcg_temp_free_ptr(qn);
   2209    mve_update_eci(s);
   2210    return true;
   2211}
   2212
   2213#define DO_VABAV(INSN, FN)                                      \
   2214    static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
   2215    {                                                           \
   2216        static MVEGenVABAVFn * const fns[] = {                  \
   2217            gen_helper_mve_##FN##b,                             \
   2218            gen_helper_mve_##FN##h,                             \
   2219            gen_helper_mve_##FN##w,                             \
   2220            NULL,                                               \
   2221        };                                                      \
   2222        return do_vabav(s, a, fns[a->size]);                    \
   2223    }
   2224
   2225DO_VABAV(VABAV_S, vabavs)
   2226DO_VABAV(VABAV_U, vabavu)
   2227
   2228static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
   2229{
   2230    /*
   2231     * VMOV two 32-bit vector lanes to two general-purpose registers.
   2232     * This insn is not predicated but it is subject to beat-wise
   2233     * execution if it is not in an IT block. For us this means
   2234     * only that if PSR.ECI says we should not be executing the beat
   2235     * corresponding to the lane of the vector register being accessed
   2236     * then we should skip perfoming the move, and that we need to do
   2237     * the usual check for bad ECI state and advance of ECI state.
   2238     * (If PSR.ECI is non-zero then we cannot be in an IT block.)
   2239     */
   2240    TCGv_i32 tmp;
   2241    int vd;
   2242
   2243    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
   2244        a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
   2245        a->rt == a->rt2) {
   2246        /* Rt/Rt2 cases are UNPREDICTABLE */
   2247        return false;
   2248    }
   2249    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2250        return true;
   2251    }
   2252
   2253    /* Convert Qreg index to Dreg for read_neon_element32() etc */
   2254    vd = a->qd * 2;
   2255
   2256    if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
   2257        tmp = tcg_temp_new_i32();
   2258        read_neon_element32(tmp, vd, a->idx, MO_32);
   2259        store_reg(s, a->rt, tmp);
   2260    }
   2261    if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
   2262        tmp = tcg_temp_new_i32();
   2263        read_neon_element32(tmp, vd + 1, a->idx, MO_32);
   2264        store_reg(s, a->rt2, tmp);
   2265    }
   2266
   2267    mve_update_and_store_eci(s);
   2268    return true;
   2269}
   2270
   2271static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
   2272{
   2273    /*
   2274     * VMOV two general-purpose registers to two 32-bit vector lanes.
   2275     * This insn is not predicated but it is subject to beat-wise
   2276     * execution if it is not in an IT block. For us this means
   2277     * only that if PSR.ECI says we should not be executing the beat
   2278     * corresponding to the lane of the vector register being accessed
   2279     * then we should skip perfoming the move, and that we need to do
   2280     * the usual check for bad ECI state and advance of ECI state.
   2281     * (If PSR.ECI is non-zero then we cannot be in an IT block.)
   2282     */
   2283    TCGv_i32 tmp;
   2284    int vd;
   2285
   2286    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
   2287        a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
   2288        /* Rt/Rt2 cases are UNPREDICTABLE */
   2289        return false;
   2290    }
   2291    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2292        return true;
   2293    }
   2294
   2295    /* Convert Qreg idx to Dreg for read_neon_element32() etc */
   2296    vd = a->qd * 2;
   2297
   2298    if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
   2299        tmp = load_reg(s, a->rt);
   2300        write_neon_element32(tmp, vd, a->idx, MO_32);
   2301        tcg_temp_free_i32(tmp);
   2302    }
   2303    if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
   2304        tmp = load_reg(s, a->rt2);
   2305        write_neon_element32(tmp, vd + 1, a->idx, MO_32);
   2306        tcg_temp_free_i32(tmp);
   2307    }
   2308
   2309    mve_update_and_store_eci(s);
   2310    return true;
   2311}