translate-neon.c - cachepc-qemu - Fork of AMDESE/qemu with changes for cachepc side-channel attack

	cachepc-qemu Fork of AMDESE/qemu with changes for cachepc side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-qemu
	Log \| Files \| Refs \| Submodules \| LICENSE \| sfeed.txt
translate-neon.c (126404B)
      1/*
      2 *  ARM translation: AArch32 Neon instructions
      3 *
      4 *  Copyright (c) 2003 Fabrice Bellard
      5 *  Copyright (c) 2005-2007 CodeSourcery
      6 *  Copyright (c) 2007 OpenedHand, Ltd.
      7 *  Copyright (c) 2020 Linaro, Ltd.
      8 *
      9 * This library is free software; you can redistribute it and/or
     10 * modify it under the terms of the GNU Lesser General Public
     11 * License as published by the Free Software Foundation; either
     12 * version 2.1 of the License, or (at your option) any later version.
     13 *
     14 * This library is distributed in the hope that it will be useful,
     15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17 * Lesser General Public License for more details.
     18 *
     19 * You should have received a copy of the GNU Lesser General Public
     20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     21 */
     22
     23#include "qemu/osdep.h"
     24#include "tcg/tcg-op.h"
     25#include "tcg/tcg-op-gvec.h"
     26#include "exec/exec-all.h"
     27#include "exec/gen-icount.h"
     28#include "translate.h"
     29#include "translate-a32.h"
     30
     31/* Include the generated Neon decoder */
     32#include "decode-neon-dp.c.inc"
     33#include "decode-neon-ls.c.inc"
     34#include "decode-neon-shared.c.inc"
     35
     36static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
     37{
     38    TCGv_ptr ret = tcg_temp_new_ptr();
     39    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
     40    return ret;
     41}
     42
     43static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
     44{
     45    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
     46
     47    switch (mop) {
     48    case MO_UB:
     49        tcg_gen_ld8u_i32(var, cpu_env, offset);
     50        break;
     51    case MO_UW:
     52        tcg_gen_ld16u_i32(var, cpu_env, offset);
     53        break;
     54    case MO_UL:
     55        tcg_gen_ld_i32(var, cpu_env, offset);
     56        break;
     57    default:
     58        g_assert_not_reached();
     59    }
     60}
     61
     62static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
     63{
     64    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
     65
     66    switch (mop) {
     67    case MO_UB:
     68        tcg_gen_ld8u_i64(var, cpu_env, offset);
     69        break;
     70    case MO_UW:
     71        tcg_gen_ld16u_i64(var, cpu_env, offset);
     72        break;
     73    case MO_UL:
     74        tcg_gen_ld32u_i64(var, cpu_env, offset);
     75        break;
     76    case MO_Q:
     77        tcg_gen_ld_i64(var, cpu_env, offset);
     78        break;
     79    default:
     80        g_assert_not_reached();
     81    }
     82}
     83
     84static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
     85{
     86    long offset = neon_element_offset(reg, ele, size);
     87
     88    switch (size) {
     89    case MO_8:
     90        tcg_gen_st8_i32(var, cpu_env, offset);
     91        break;
     92    case MO_16:
     93        tcg_gen_st16_i32(var, cpu_env, offset);
     94        break;
     95    case MO_32:
     96        tcg_gen_st_i32(var, cpu_env, offset);
     97        break;
     98    default:
     99        g_assert_not_reached();
    100    }
    101}
    102
    103static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
    104{
    105    long offset = neon_element_offset(reg, ele, size);
    106
    107    switch (size) {
    108    case MO_8:
    109        tcg_gen_st8_i64(var, cpu_env, offset);
    110        break;
    111    case MO_16:
    112        tcg_gen_st16_i64(var, cpu_env, offset);
    113        break;
    114    case MO_32:
    115        tcg_gen_st32_i64(var, cpu_env, offset);
    116        break;
    117    case MO_64:
    118        tcg_gen_st_i64(var, cpu_env, offset);
    119        break;
    120    default:
    121        g_assert_not_reached();
    122    }
    123}
    124
    125static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
    126                         int data, gen_helper_gvec_4 *fn_gvec)
    127{
    128    /* UNDEF accesses to D16-D31 if they don't exist. */
    129    if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
    130        return false;
    131    }
    132
    133    /*
    134     * UNDEF accesses to odd registers for each bit of Q.
    135     * Q will be 0b111 for all Q-reg instructions, otherwise
    136     * when we have mixed Q- and D-reg inputs.
    137     */
    138    if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
    139        return false;
    140    }
    141
    142    if (!vfp_access_check(s)) {
    143        return true;
    144    }
    145
    146    int opr_sz = q ? 16 : 8;
    147    tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
    148                       vfp_reg_offset(1, vn),
    149                       vfp_reg_offset(1, vm),
    150                       vfp_reg_offset(1, vd),
    151                       opr_sz, opr_sz, data, fn_gvec);
    152    return true;
    153}
    154
    155static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
    156                              int data, ARMFPStatusFlavour fp_flavour,
    157                              gen_helper_gvec_4_ptr *fn_gvec_ptr)
    158{
    159    /* UNDEF accesses to D16-D31 if they don't exist. */
    160    if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
    161        return false;
    162    }
    163
    164    /*
    165     * UNDEF accesses to odd registers for each bit of Q.
    166     * Q will be 0b111 for all Q-reg instructions, otherwise
    167     * when we have mixed Q- and D-reg inputs.
    168     */
    169    if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
    170        return false;
    171    }
    172
    173    if (!vfp_access_check(s)) {
    174        return true;
    175    }
    176
    177    int opr_sz = q ? 16 : 8;
    178    TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
    179
    180    tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
    181                       vfp_reg_offset(1, vn),
    182                       vfp_reg_offset(1, vm),
    183                       vfp_reg_offset(1, vd),
    184                       fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
    185    tcg_temp_free_ptr(fpst);
    186    return true;
    187}
    188
    189static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
    190{
    191    if (!dc_isar_feature(aa32_vcma, s)) {
    192        return false;
    193    }
    194    if (a->size == MO_16) {
    195        if (!dc_isar_feature(aa32_fp16_arith, s)) {
    196            return false;
    197        }
    198        return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
    199                                 FPST_STD_F16, gen_helper_gvec_fcmlah);
    200    }
    201    return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
    202                             FPST_STD, gen_helper_gvec_fcmlas);
    203}
    204
    205static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
    206{
    207    int opr_sz;
    208    TCGv_ptr fpst;
    209    gen_helper_gvec_3_ptr *fn_gvec_ptr;
    210
    211    if (!dc_isar_feature(aa32_vcma, s)
    212        || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
    213        return false;
    214    }
    215
    216    /* UNDEF accesses to D16-D31 if they don't exist. */
    217    if (!dc_isar_feature(aa32_simd_r32, s) &&
    218        ((a->vd | a->vn | a->vm) & 0x10)) {
    219        return false;
    220    }
    221
    222    if ((a->vn | a->vm | a->vd) & a->q) {
    223        return false;
    224    }
    225
    226    if (!vfp_access_check(s)) {
    227        return true;
    228    }
    229
    230    opr_sz = (1 + a->q) * 8;
    231    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
    232    fn_gvec_ptr = (a->size == MO_16) ?
    233        gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
    234    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
    235                       vfp_reg_offset(1, a->vn),
    236                       vfp_reg_offset(1, a->vm),
    237                       fpst, opr_sz, opr_sz, a->rot,
    238                       fn_gvec_ptr);
    239    tcg_temp_free_ptr(fpst);
    240    return true;
    241}
    242
    243static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
    244{
    245    if (!dc_isar_feature(aa32_dp, s)) {
    246        return false;
    247    }
    248    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    249                        gen_helper_gvec_sdot_b);
    250}
    251
    252static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
    253{
    254    if (!dc_isar_feature(aa32_dp, s)) {
    255        return false;
    256    }
    257    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    258                        gen_helper_gvec_udot_b);
    259}
    260
    261static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
    262{
    263    if (!dc_isar_feature(aa32_i8mm, s)) {
    264        return false;
    265    }
    266    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    267                        gen_helper_gvec_usdot_b);
    268}
    269
    270static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
    271{
    272    if (!dc_isar_feature(aa32_bf16, s)) {
    273        return false;
    274    }
    275    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    276                        gen_helper_gvec_bfdot);
    277}
    278
    279static bool trans_VFML(DisasContext *s, arg_VFML *a)
    280{
    281    int opr_sz;
    282
    283    if (!dc_isar_feature(aa32_fhm, s)) {
    284        return false;
    285    }
    286
    287    /* UNDEF accesses to D16-D31 if they don't exist. */
    288    if (!dc_isar_feature(aa32_simd_r32, s) &&
    289        (a->vd & 0x10)) {
    290        return false;
    291    }
    292
    293    if (a->vd & a->q) {
    294        return false;
    295    }
    296
    297    if (!vfp_access_check(s)) {
    298        return true;
    299    }
    300
    301    opr_sz = (1 + a->q) * 8;
    302    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
    303                       vfp_reg_offset(a->q, a->vn),
    304                       vfp_reg_offset(a->q, a->vm),
    305                       cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
    306                       gen_helper_gvec_fmlal_a32);
    307    return true;
    308}
    309
    310static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
    311{
    312    int data = (a->index << 2) | a->rot;
    313
    314    if (!dc_isar_feature(aa32_vcma, s)) {
    315        return false;
    316    }
    317    if (a->size == MO_16) {
    318        if (!dc_isar_feature(aa32_fp16_arith, s)) {
    319            return false;
    320        }
    321        return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
    322                                 FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
    323    }
    324    return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
    325                             FPST_STD, gen_helper_gvec_fcmlas_idx);
    326}
    327
    328static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
    329{
    330    if (!dc_isar_feature(aa32_dp, s)) {
    331        return false;
    332    }
    333    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    334                        gen_helper_gvec_sdot_idx_b);
    335}
    336
    337static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
    338{
    339    if (!dc_isar_feature(aa32_dp, s)) {
    340        return false;
    341    }
    342    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    343                        gen_helper_gvec_udot_idx_b);
    344}
    345
    346static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
    347{
    348    if (!dc_isar_feature(aa32_i8mm, s)) {
    349        return false;
    350    }
    351    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    352                        gen_helper_gvec_usdot_idx_b);
    353}
    354
    355static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
    356{
    357    if (!dc_isar_feature(aa32_i8mm, s)) {
    358        return false;
    359    }
    360    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    361                        gen_helper_gvec_sudot_idx_b);
    362}
    363
    364static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
    365{
    366    if (!dc_isar_feature(aa32_bf16, s)) {
    367        return false;
    368    }
    369    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    370                        gen_helper_gvec_bfdot_idx);
    371}
    372
    373static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
    374{
    375    int opr_sz;
    376
    377    if (!dc_isar_feature(aa32_fhm, s)) {
    378        return false;
    379    }
    380
    381    /* UNDEF accesses to D16-D31 if they don't exist. */
    382    if (!dc_isar_feature(aa32_simd_r32, s) &&
    383        ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
    384        return false;
    385    }
    386
    387    if (a->vd & a->q) {
    388        return false;
    389    }
    390
    391    if (!vfp_access_check(s)) {
    392        return true;
    393    }
    394
    395    opr_sz = (1 + a->q) * 8;
    396    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
    397                       vfp_reg_offset(a->q, a->vn),
    398                       vfp_reg_offset(a->q, a->rm),
    399                       cpu_env, opr_sz, opr_sz,
    400                       (a->index << 2) | a->s, /* is_2 == 0 */
    401                       gen_helper_gvec_fmlal_idx_a32);
    402    return true;
    403}
    404
    405static struct {
    406    int nregs;
    407    int interleave;
    408    int spacing;
    409} const neon_ls_element_type[11] = {
    410    {1, 4, 1},
    411    {1, 4, 2},
    412    {4, 1, 1},
    413    {2, 2, 2},
    414    {1, 3, 1},
    415    {1, 3, 2},
    416    {3, 1, 1},
    417    {1, 1, 1},
    418    {1, 2, 1},
    419    {1, 2, 2},
    420    {2, 1, 1}
    421};
    422
    423static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
    424                                      int stride)
    425{
    426    if (rm != 15) {
    427        TCGv_i32 base;
    428
    429        base = load_reg(s, rn);
    430        if (rm == 13) {
    431            tcg_gen_addi_i32(base, base, stride);
    432        } else {
    433            TCGv_i32 index;
    434            index = load_reg(s, rm);
    435            tcg_gen_add_i32(base, base, index);
    436            tcg_temp_free_i32(index);
    437        }
    438        store_reg(s, rn, base);
    439    }
    440}
    441
    442static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
    443{
    444    /* Neon load/store multiple structures */
    445    int nregs, interleave, spacing, reg, n;
    446    MemOp mop, align, endian;
    447    int mmu_idx = get_mem_index(s);
    448    int size = a->size;
    449    TCGv_i64 tmp64;
    450    TCGv_i32 addr, tmp;
    451
    452    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    453        return false;
    454    }
    455
    456    /* UNDEF accesses to D16-D31 if they don't exist */
    457    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
    458        return false;
    459    }
    460    if (a->itype > 10) {
    461        return false;
    462    }
    463    /* Catch UNDEF cases for bad values of align field */
    464    switch (a->itype & 0xc) {
    465    case 4:
    466        if (a->align >= 2) {
    467            return false;
    468        }
    469        break;
    470    case 8:
    471        if (a->align == 3) {
    472            return false;
    473        }
    474        break;
    475    default:
    476        break;
    477    }
    478    nregs = neon_ls_element_type[a->itype].nregs;
    479    interleave = neon_ls_element_type[a->itype].interleave;
    480    spacing = neon_ls_element_type[a->itype].spacing;
    481    if (size == 3 && (interleave | spacing) != 1) {
    482        return false;
    483    }
    484
    485    if (!vfp_access_check(s)) {
    486        return true;
    487    }
    488
    489    /* For our purposes, bytes are always little-endian.  */
    490    endian = s->be_data;
    491    if (size == 0) {
    492        endian = MO_LE;
    493    }
    494
    495    /* Enforce alignment requested by the instruction */
    496    if (a->align) {
    497        align = pow2_align(a->align + 2); /* 4 ** a->align */
    498    } else {
    499        align = s->align_mem ? MO_ALIGN : 0;
    500    }
    501
    502    /*
    503     * Consecutive little-endian elements from a single register
    504     * can be promoted to a larger little-endian operation.
    505     */
    506    if (interleave == 1 && endian == MO_LE) {
    507        /* Retain any natural alignment. */
    508        if (align == MO_ALIGN) {
    509            align = pow2_align(size);
    510        }
    511        size = 3;
    512    }
    513
    514    tmp64 = tcg_temp_new_i64();
    515    addr = tcg_temp_new_i32();
    516    tmp = tcg_const_i32(1 << size);
    517    load_reg_var(s, addr, a->rn);
    518
    519    mop = endian | size | align;
    520    for (reg = 0; reg < nregs; reg++) {
    521        for (n = 0; n < 8 >> size; n++) {
    522            int xs;
    523            for (xs = 0; xs < interleave; xs++) {
    524                int tt = a->vd + reg + spacing * xs;
    525
    526                if (a->l) {
    527                    gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
    528                    neon_store_element64(tt, n, size, tmp64);
    529                } else {
    530                    neon_load_element64(tmp64, tt, n, size);
    531                    gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
    532                }
    533                tcg_gen_add_i32(addr, addr, tmp);
    534
    535                /* Subsequent memory operations inherit alignment */
    536                mop &= ~MO_AMASK;
    537            }
    538        }
    539    }
    540    tcg_temp_free_i32(addr);
    541    tcg_temp_free_i32(tmp);
    542    tcg_temp_free_i64(tmp64);
    543
    544    gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
    545    return true;
    546}
    547
    548static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
    549{
    550    /* Neon load single structure to all lanes */
    551    int reg, stride, vec_size;
    552    int vd = a->vd;
    553    int size = a->size;
    554    int nregs = a->n + 1;
    555    TCGv_i32 addr, tmp;
    556    MemOp mop, align;
    557
    558    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    559        return false;
    560    }
    561
    562    /* UNDEF accesses to D16-D31 if they don't exist */
    563    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
    564        return false;
    565    }
    566
    567    align = 0;
    568    if (size == 3) {
    569        if (nregs != 4 || a->a == 0) {
    570            return false;
    571        }
    572        /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
    573        size = MO_32;
    574        align = MO_ALIGN_16;
    575    } else if (a->a) {
    576        switch (nregs) {
    577        case 1:
    578            if (size == 0) {
    579                return false;
    580            }
    581            align = MO_ALIGN;
    582            break;
    583        case 2:
    584            align = pow2_align(size + 1);
    585            break;
    586        case 3:
    587            return false;
    588        case 4:
    589            align = pow2_align(size + 2);
    590            break;
    591        default:
    592            g_assert_not_reached();
    593        }
    594    }
    595
    596    if (!vfp_access_check(s)) {
    597        return true;
    598    }
    599
    600    /*
    601     * VLD1 to all lanes: T bit indicates how many Dregs to write.
    602     * VLD2/3/4 to all lanes: T bit indicates register stride.
    603     */
    604    stride = a->t ? 2 : 1;
    605    vec_size = nregs == 1 ? stride * 8 : 8;
    606    mop = size | align;
    607    tmp = tcg_temp_new_i32();
    608    addr = tcg_temp_new_i32();
    609    load_reg_var(s, addr, a->rn);
    610    for (reg = 0; reg < nregs; reg++) {
    611        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
    612        if ((vd & 1) && vec_size == 16) {
    613            /*
    614             * We cannot write 16 bytes at once because the
    615             * destination is unaligned.
    616             */
    617            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
    618                                 8, 8, tmp);
    619            tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
    620                             neon_full_reg_offset(vd), 8, 8);
    621        } else {
    622            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
    623                                 vec_size, vec_size, tmp);
    624        }
    625        tcg_gen_addi_i32(addr, addr, 1 << size);
    626        vd += stride;
    627
    628        /* Subsequent memory operations inherit alignment */
    629        mop &= ~MO_AMASK;
    630    }
    631    tcg_temp_free_i32(tmp);
    632    tcg_temp_free_i32(addr);
    633
    634    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
    635
    636    return true;
    637}
    638
    639static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
    640{
    641    /* Neon load/store single structure to one lane */
    642    int reg;
    643    int nregs = a->n + 1;
    644    int vd = a->vd;
    645    TCGv_i32 addr, tmp;
    646    MemOp mop;
    647
    648    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    649        return false;
    650    }
    651
    652    /* UNDEF accesses to D16-D31 if they don't exist */
    653    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
    654        return false;
    655    }
    656
    657    /* Catch the UNDEF cases. This is unavoidably a bit messy. */
    658    switch (nregs) {
    659    case 1:
    660        if (((a->align & (1 << a->size)) != 0) ||
    661            (a->size == 2 && (a->align == 1 || a->align == 2))) {
    662            return false;
    663        }
    664        break;
    665    case 3:
    666        if ((a->align & 1) != 0) {
    667            return false;
    668        }
    669        /* fall through */
    670    case 2:
    671        if (a->size == 2 && (a->align & 2) != 0) {
    672            return false;
    673        }
    674        break;
    675    case 4:
    676        if (a->size == 2 && a->align == 3) {
    677            return false;
    678        }
    679        break;
    680    default:
    681        abort();
    682    }
    683    if ((vd + a->stride * (nregs - 1)) > 31) {
    684        /*
    685         * Attempts to write off the end of the register file are
    686         * UNPREDICTABLE; we choose to UNDEF because otherwise we would
    687         * access off the end of the array that holds the register data.
    688         */
    689        return false;
    690    }
    691
    692    if (!vfp_access_check(s)) {
    693        return true;
    694    }
    695
    696    /* Pick up SCTLR settings */
    697    mop = finalize_memop(s, a->size);
    698
    699    if (a->align) {
    700        MemOp align_op;
    701
    702        switch (nregs) {
    703        case 1:
    704            /* For VLD1, use natural alignment. */
    705            align_op = MO_ALIGN;
    706            break;
    707        case 2:
    708            /* For VLD2, use double alignment. */
    709            align_op = pow2_align(a->size + 1);
    710            break;
    711        case 4:
    712            if (a->size == MO_32) {
    713                /*
    714                 * For VLD4.32, align = 1 is double alignment, align = 2 is
    715                 * quad alignment; align = 3 is rejected above.
    716                 */
    717                align_op = pow2_align(a->size + a->align);
    718            } else {
    719                /* For VLD4.8 and VLD.16, we want quad alignment. */
    720                align_op = pow2_align(a->size + 2);
    721            }
    722            break;
    723        default:
    724            /* For VLD3, the alignment field is zero and rejected above. */
    725            g_assert_not_reached();
    726        }
    727
    728        mop = (mop & ~MO_AMASK) | align_op;
    729    }
    730
    731    tmp = tcg_temp_new_i32();
    732    addr = tcg_temp_new_i32();
    733    load_reg_var(s, addr, a->rn);
    734
    735    for (reg = 0; reg < nregs; reg++) {
    736        if (a->l) {
    737            gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
    738            neon_store_element(vd, a->reg_idx, a->size, tmp);
    739        } else { /* Store */
    740            neon_load_element(tmp, vd, a->reg_idx, a->size);
    741            gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
    742        }
    743        vd += a->stride;
    744        tcg_gen_addi_i32(addr, addr, 1 << a->size);
    745
    746        /* Subsequent memory operations inherit alignment */
    747        mop &= ~MO_AMASK;
    748    }
    749    tcg_temp_free_i32(addr);
    750    tcg_temp_free_i32(tmp);
    751
    752    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
    753
    754    return true;
    755}
    756
    757static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
    758{
    759    int vec_size = a->q ? 16 : 8;
    760    int rd_ofs = neon_full_reg_offset(a->vd);
    761    int rn_ofs = neon_full_reg_offset(a->vn);
    762    int rm_ofs = neon_full_reg_offset(a->vm);
    763
    764    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    765        return false;
    766    }
    767
    768    /* UNDEF accesses to D16-D31 if they don't exist. */
    769    if (!dc_isar_feature(aa32_simd_r32, s) &&
    770        ((a->vd | a->vn | a->vm) & 0x10)) {
    771        return false;
    772    }
    773
    774    if ((a->vn | a->vm | a->vd) & a->q) {
    775        return false;
    776    }
    777
    778    if (!vfp_access_check(s)) {
    779        return true;
    780    }
    781
    782    fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
    783    return true;
    784}
    785
    786#define DO_3SAME(INSN, FUNC)                                            \
    787    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    788    {                                                                   \
    789        return do_3same(s, a, FUNC);                                    \
    790    }
    791
    792DO_3SAME(VADD, tcg_gen_gvec_add)
    793DO_3SAME(VSUB, tcg_gen_gvec_sub)
    794DO_3SAME(VAND, tcg_gen_gvec_and)
    795DO_3SAME(VBIC, tcg_gen_gvec_andc)
    796DO_3SAME(VORR, tcg_gen_gvec_or)
    797DO_3SAME(VORN, tcg_gen_gvec_orc)
    798DO_3SAME(VEOR, tcg_gen_gvec_xor)
    799DO_3SAME(VSHL_S, gen_gvec_sshl)
    800DO_3SAME(VSHL_U, gen_gvec_ushl)
    801DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
    802DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
    803DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
    804DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
    805
    806/* These insns are all gvec_bitsel but with the inputs in various orders. */
    807#define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
    808    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    809                                uint32_t rn_ofs, uint32_t rm_ofs,       \
    810                                uint32_t oprsz, uint32_t maxsz)         \
    811    {                                                                   \
    812        tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
    813    }                                                                   \
    814    DO_3SAME(INSN, gen_##INSN##_3s)
    815
    816DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
    817DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
    818DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
    819
    820#define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
    821    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    822    {                                                                   \
    823        if (a->size == 3) {                                             \
    824            return false;                                               \
    825        }                                                               \
    826        return do_3same(s, a, FUNC);                                    \
    827    }
    828
    829DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
    830DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
    831DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
    832DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
    833DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
    834DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
    835DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
    836DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
    837DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
    838DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
    839DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
    840DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
    841
    842#define DO_3SAME_CMP(INSN, COND)                                        \
    843    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    844                                uint32_t rn_ofs, uint32_t rm_ofs,       \
    845                                uint32_t oprsz, uint32_t maxsz)         \
    846    {                                                                   \
    847        tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
    848    }                                                                   \
    849    DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
    850
    851DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
    852DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
    853DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
    854DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
    855DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
    856
    857#define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
    858    static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
    859                         uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
    860    {                                                                      \
    861        tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
    862    }
    863
    864WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
    865
    866static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
    867{
    868    if (a->size != 0) {
    869        return false;
    870    }
    871    return do_3same(s, a, gen_VMUL_p_3s);
    872}
    873
    874#define DO_VQRDMLAH(INSN, FUNC)                                         \
    875    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    876    {                                                                   \
    877        if (!dc_isar_feature(aa32_rdm, s)) {                            \
    878            return false;                                               \
    879        }                                                               \
    880        if (a->size != 1 && a->size != 2) {                             \
    881            return false;                                               \
    882        }                                                               \
    883        return do_3same(s, a, FUNC);                                    \
    884    }
    885
    886DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
    887DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
    888
    889#define DO_SHA1(NAME, FUNC)                                             \
    890    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
    891    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
    892    {                                                                   \
    893        if (!dc_isar_feature(aa32_sha1, s)) {                           \
    894            return false;                                               \
    895        }                                                               \
    896        return do_3same(s, a, gen_##NAME##_3s);                         \
    897    }
    898
    899DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
    900DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
    901DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
    902DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
    903
    904#define DO_SHA2(NAME, FUNC)                                             \
    905    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
    906    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
    907    {                                                                   \
    908        if (!dc_isar_feature(aa32_sha2, s)) {                           \
    909            return false;                                               \
    910        }                                                               \
    911        return do_3same(s, a, gen_##NAME##_3s);                         \
    912    }
    913
    914DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
    915DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
    916DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
    917
    918#define DO_3SAME_64(INSN, FUNC)                                         \
    919    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    920                                uint32_t rn_ofs, uint32_t rm_ofs,       \
    921                                uint32_t oprsz, uint32_t maxsz)         \
    922    {                                                                   \
    923        static const GVecGen3 op = { .fni8 = FUNC };                    \
    924        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
    925    }                                                                   \
    926    DO_3SAME(INSN, gen_##INSN##_3s)
    927
    928#define DO_3SAME_64_ENV(INSN, FUNC)                                     \
    929    static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
    930    {                                                                   \
    931        FUNC(d, cpu_env, n, m);                                         \
    932    }                                                                   \
    933    DO_3SAME_64(INSN, gen_##INSN##_elt)
    934
    935DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
    936DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
    937DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
    938DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
    939DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
    940DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
    941
    942#define DO_3SAME_32(INSN, FUNC)                                         \
    943    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    944                                uint32_t rn_ofs, uint32_t rm_ofs,       \
    945                                uint32_t oprsz, uint32_t maxsz)         \
    946    {                                                                   \
    947        static const GVecGen3 ops[4] = {                                \
    948            { .fni4 = gen_helper_neon_##FUNC##8 },                      \
    949            { .fni4 = gen_helper_neon_##FUNC##16 },                     \
    950            { .fni4 = gen_helper_neon_##FUNC##32 },                     \
    951            { 0 },                                                      \
    952        };                                                              \
    953        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
    954    }                                                                   \
    955    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    956    {                                                                   \
    957        if (a->size > 2) {                                              \
    958            return false;                                               \
    959        }                                                               \
    960        return do_3same(s, a, gen_##INSN##_3s);                         \
    961    }
    962
    963/*
    964 * Some helper functions need to be passed the cpu_env. In order
    965 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
    966 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
    967 * and which call a NeonGenTwoOpEnvFn().
    968 */
    969#define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
    970    static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
    971    {                                                                   \
    972        FUNC(d, cpu_env, n, m);                                         \
    973    }
    974
    975#define DO_3SAME_32_ENV(INSN, FUNC)                                     \
    976    WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
    977    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
    978    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
    979    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    980                                uint32_t rn_ofs, uint32_t rm_ofs,       \
    981                                uint32_t oprsz, uint32_t maxsz)         \
    982    {                                                                   \
    983        static const GVecGen3 ops[4] = {                                \
    984            { .fni4 = gen_##INSN##_tramp8 },                            \
    985            { .fni4 = gen_##INSN##_tramp16 },                           \
    986            { .fni4 = gen_##INSN##_tramp32 },                           \
    987            { 0 },                                                      \
    988        };                                                              \
    989        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
    990    }                                                                   \
    991    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    992    {                                                                   \
    993        if (a->size > 2) {                                              \
    994            return false;                                               \
    995        }                                                               \
    996        return do_3same(s, a, gen_##INSN##_3s);                         \
    997    }
    998
    999DO_3SAME_32(VHADD_S, hadd_s)
   1000DO_3SAME_32(VHADD_U, hadd_u)
   1001DO_3SAME_32(VHSUB_S, hsub_s)
   1002DO_3SAME_32(VHSUB_U, hsub_u)
   1003DO_3SAME_32(VRHADD_S, rhadd_s)
   1004DO_3SAME_32(VRHADD_U, rhadd_u)
   1005DO_3SAME_32(VRSHL_S, rshl_s)
   1006DO_3SAME_32(VRSHL_U, rshl_u)
   1007
   1008DO_3SAME_32_ENV(VQSHL_S, qshl_s)
   1009DO_3SAME_32_ENV(VQSHL_U, qshl_u)
   1010DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
   1011DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
   1012
   1013static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
   1014{
   1015    /* Operations handled pairwise 32 bits at a time */
   1016    TCGv_i32 tmp, tmp2, tmp3;
   1017
   1018    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1019        return false;
   1020    }
   1021
   1022    /* UNDEF accesses to D16-D31 if they don't exist. */
   1023    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1024        ((a->vd | a->vn | a->vm) & 0x10)) {
   1025        return false;
   1026    }
   1027
   1028    if (a->size == 3) {
   1029        return false;
   1030    }
   1031
   1032    if (!vfp_access_check(s)) {
   1033        return true;
   1034    }
   1035
   1036    assert(a->q == 0); /* enforced by decode patterns */
   1037
   1038    /*
   1039     * Note that we have to be careful not to clobber the source operands
   1040     * in the "vm == vd" case by storing the result of the first pass too
   1041     * early. Since Q is 0 there are always just two passes, so instead
   1042     * of a complicated loop over each pass we just unroll.
   1043     */
   1044    tmp = tcg_temp_new_i32();
   1045    tmp2 = tcg_temp_new_i32();
   1046    tmp3 = tcg_temp_new_i32();
   1047
   1048    read_neon_element32(tmp, a->vn, 0, MO_32);
   1049    read_neon_element32(tmp2, a->vn, 1, MO_32);
   1050    fn(tmp, tmp, tmp2);
   1051
   1052    read_neon_element32(tmp3, a->vm, 0, MO_32);
   1053    read_neon_element32(tmp2, a->vm, 1, MO_32);
   1054    fn(tmp3, tmp3, tmp2);
   1055
   1056    write_neon_element32(tmp, a->vd, 0, MO_32);
   1057    write_neon_element32(tmp3, a->vd, 1, MO_32);
   1058
   1059    tcg_temp_free_i32(tmp);
   1060    tcg_temp_free_i32(tmp2);
   1061    tcg_temp_free_i32(tmp3);
   1062    return true;
   1063}
   1064
   1065#define DO_3SAME_PAIR(INSN, func)                                       \
   1066    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
   1067    {                                                                   \
   1068        static NeonGenTwoOpFn * const fns[] = {                         \
   1069            gen_helper_neon_##func##8,                                  \
   1070            gen_helper_neon_##func##16,                                 \
   1071            gen_helper_neon_##func##32,                                 \
   1072        };                                                              \
   1073        if (a->size > 2) {                                              \
   1074            return false;                                               \
   1075        }                                                               \
   1076        return do_3same_pair(s, a, fns[a->size]);                       \
   1077    }
   1078
   1079/* 32-bit pairwise ops end up the same as the elementwise versions.  */
   1080#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
   1081#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
   1082#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
   1083#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
   1084#define gen_helper_neon_padd_u32  tcg_gen_add_i32
   1085
   1086DO_3SAME_PAIR(VPMAX_S, pmax_s)
   1087DO_3SAME_PAIR(VPMIN_S, pmin_s)
   1088DO_3SAME_PAIR(VPMAX_U, pmax_u)
   1089DO_3SAME_PAIR(VPMIN_U, pmin_u)
   1090DO_3SAME_PAIR(VPADD, padd_u)
   1091
   1092#define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
   1093    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
   1094    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
   1095    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
   1096                                uint32_t rn_ofs, uint32_t rm_ofs,       \
   1097                                uint32_t oprsz, uint32_t maxsz)         \
   1098    {                                                                   \
   1099        static const GVecGen3 ops[2] = {                                \
   1100            { .fni4 = gen_##INSN##_tramp16 },                           \
   1101            { .fni4 = gen_##INSN##_tramp32 },                           \
   1102        };                                                              \
   1103        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
   1104    }                                                                   \
   1105    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
   1106    {                                                                   \
   1107        if (a->size != 1 && a->size != 2) {                             \
   1108            return false;                                               \
   1109        }                                                               \
   1110        return do_3same(s, a, gen_##INSN##_3s);                         \
   1111    }
   1112
   1113DO_3SAME_VQDMULH(VQDMULH, qdmulh)
   1114DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
   1115
   1116#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
   1117    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
   1118                         uint32_t rn_ofs, uint32_t rm_ofs,              \
   1119                         uint32_t oprsz, uint32_t maxsz)                \
   1120    {                                                                   \
   1121        TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
   1122        tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
   1123                           oprsz, maxsz, 0, FUNC);                      \
   1124        tcg_temp_free_ptr(fpst);                                        \
   1125    }
   1126
   1127#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
   1128    WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
   1129    WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
   1130    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
   1131    {                                                                   \
   1132        if (a->size == MO_16) {                                         \
   1133            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
   1134                return false;                                           \
   1135            }                                                           \
   1136            return do_3same(s, a, gen_##INSN##_fp16_3s);                \
   1137        }                                                               \
   1138        return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
   1139    }
   1140
   1141
   1142DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
   1143DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
   1144DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
   1145DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
   1146DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
   1147DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
   1148DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
   1149DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
   1150DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
   1151DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
   1152DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
   1153DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
   1154DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
   1155DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
   1156DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
   1157DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
   1158DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
   1159
   1160WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
   1161WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
   1162WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
   1163WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
   1164
   1165static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
   1166{
   1167    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
   1168        return false;
   1169    }
   1170
   1171    if (a->size == MO_16) {
   1172        if (!dc_isar_feature(aa32_fp16_arith, s)) {
   1173            return false;
   1174        }
   1175        return do_3same(s, a, gen_VMAXNM_fp16_3s);
   1176    }
   1177    return do_3same(s, a, gen_VMAXNM_fp32_3s);
   1178}
   1179
   1180static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
   1181{
   1182    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
   1183        return false;
   1184    }
   1185
   1186    if (a->size == MO_16) {
   1187        if (!dc_isar_feature(aa32_fp16_arith, s)) {
   1188            return false;
   1189        }
   1190        return do_3same(s, a, gen_VMINNM_fp16_3s);
   1191    }
   1192    return do_3same(s, a, gen_VMINNM_fp32_3s);
   1193}
   1194
   1195static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
   1196                             gen_helper_gvec_3_ptr *fn)
   1197{
   1198    /* FP pairwise operations */
   1199    TCGv_ptr fpstatus;
   1200
   1201    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1202        return false;
   1203    }
   1204
   1205    /* UNDEF accesses to D16-D31 if they don't exist. */
   1206    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1207        ((a->vd | a->vn | a->vm) & 0x10)) {
   1208        return false;
   1209    }
   1210
   1211    if (!vfp_access_check(s)) {
   1212        return true;
   1213    }
   1214
   1215    assert(a->q == 0); /* enforced by decode patterns */
   1216
   1217
   1218    fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
   1219    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
   1220                       vfp_reg_offset(1, a->vn),
   1221                       vfp_reg_offset(1, a->vm),
   1222                       fpstatus, 8, 8, 0, fn);
   1223    tcg_temp_free_ptr(fpstatus);
   1224
   1225    return true;
   1226}
   1227
   1228/*
   1229 * For all the functions using this macro, size == 1 means fp16,
   1230 * which is an architecture extension we don't implement yet.
   1231 */
   1232#define DO_3S_FP_PAIR(INSN,FUNC)                                    \
   1233    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
   1234    {                                                               \
   1235        if (a->size == MO_16) {                                     \
   1236            if (!dc_isar_feature(aa32_fp16_arith, s)) {             \
   1237                return false;                                       \
   1238            }                                                       \
   1239            return do_3same_fp_pair(s, a, FUNC##h);                 \
   1240        }                                                           \
   1241        return do_3same_fp_pair(s, a, FUNC##s);                     \
   1242    }
   1243
   1244DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
   1245DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
   1246DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
   1247
   1248static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
   1249{
   1250    /* Handle a 2-reg-shift insn which can be vectorized. */
   1251    int vec_size = a->q ? 16 : 8;
   1252    int rd_ofs = neon_full_reg_offset(a->vd);
   1253    int rm_ofs = neon_full_reg_offset(a->vm);
   1254
   1255    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1256        return false;
   1257    }
   1258
   1259    /* UNDEF accesses to D16-D31 if they don't exist. */
   1260    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1261        ((a->vd | a->vm) & 0x10)) {
   1262        return false;
   1263    }
   1264
   1265    if ((a->vm | a->vd) & a->q) {
   1266        return false;
   1267    }
   1268
   1269    if (!vfp_access_check(s)) {
   1270        return true;
   1271    }
   1272
   1273    fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
   1274    return true;
   1275}
   1276
   1277#define DO_2SH(INSN, FUNC)                                              \
   1278    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1279    {                                                                   \
   1280        return do_vector_2sh(s, a, FUNC);                               \
   1281    }                                                                   \
   1282
   1283DO_2SH(VSHL, tcg_gen_gvec_shli)
   1284DO_2SH(VSLI, gen_gvec_sli)
   1285DO_2SH(VSRI, gen_gvec_sri)
   1286DO_2SH(VSRA_S, gen_gvec_ssra)
   1287DO_2SH(VSRA_U, gen_gvec_usra)
   1288DO_2SH(VRSHR_S, gen_gvec_srshr)
   1289DO_2SH(VRSHR_U, gen_gvec_urshr)
   1290DO_2SH(VRSRA_S, gen_gvec_srsra)
   1291DO_2SH(VRSRA_U, gen_gvec_ursra)
   1292
   1293static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
   1294{
   1295    /* Signed shift out of range results in all-sign-bits */
   1296    a->shift = MIN(a->shift, (8 << a->size) - 1);
   1297    return do_vector_2sh(s, a, tcg_gen_gvec_sari);
   1298}
   1299
   1300static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   1301                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1302{
   1303    tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
   1304}
   1305
   1306static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
   1307{
   1308    /* Shift out of range is architecturally valid and results in zero. */
   1309    if (a->shift >= (8 << a->size)) {
   1310        return do_vector_2sh(s, a, gen_zero_rd_2sh);
   1311    } else {
   1312        return do_vector_2sh(s, a, tcg_gen_gvec_shri);
   1313    }
   1314}
   1315
   1316static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
   1317                             NeonGenTwo64OpEnvFn *fn)
   1318{
   1319    /*
   1320     * 2-reg-and-shift operations, size == 3 case, where the
   1321     * function needs to be passed cpu_env.
   1322     */
   1323    TCGv_i64 constimm;
   1324    int pass;
   1325
   1326    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1327        return false;
   1328    }
   1329
   1330    /* UNDEF accesses to D16-D31 if they don't exist. */
   1331    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1332        ((a->vd | a->vm) & 0x10)) {
   1333        return false;
   1334    }
   1335
   1336    if ((a->vm | a->vd) & a->q) {
   1337        return false;
   1338    }
   1339
   1340    if (!vfp_access_check(s)) {
   1341        return true;
   1342    }
   1343
   1344    /*
   1345     * To avoid excessive duplication of ops we implement shift
   1346     * by immediate using the variable shift operations.
   1347     */
   1348    constimm = tcg_const_i64(dup_const(a->size, a->shift));
   1349
   1350    for (pass = 0; pass < a->q + 1; pass++) {
   1351        TCGv_i64 tmp = tcg_temp_new_i64();
   1352
   1353        read_neon_element64(tmp, a->vm, pass, MO_64);
   1354        fn(tmp, cpu_env, tmp, constimm);
   1355        write_neon_element64(tmp, a->vd, pass, MO_64);
   1356        tcg_temp_free_i64(tmp);
   1357    }
   1358    tcg_temp_free_i64(constimm);
   1359    return true;
   1360}
   1361
   1362static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
   1363                             NeonGenTwoOpEnvFn *fn)
   1364{
   1365    /*
   1366     * 2-reg-and-shift operations, size < 3 case, where the
   1367     * helper needs to be passed cpu_env.
   1368     */
   1369    TCGv_i32 constimm, tmp;
   1370    int pass;
   1371
   1372    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1373        return false;
   1374    }
   1375
   1376    /* UNDEF accesses to D16-D31 if they don't exist. */
   1377    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1378        ((a->vd | a->vm) & 0x10)) {
   1379        return false;
   1380    }
   1381
   1382    if ((a->vm | a->vd) & a->q) {
   1383        return false;
   1384    }
   1385
   1386    if (!vfp_access_check(s)) {
   1387        return true;
   1388    }
   1389
   1390    /*
   1391     * To avoid excessive duplication of ops we implement shift
   1392     * by immediate using the variable shift operations.
   1393     */
   1394    constimm = tcg_const_i32(dup_const(a->size, a->shift));
   1395    tmp = tcg_temp_new_i32();
   1396
   1397    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   1398        read_neon_element32(tmp, a->vm, pass, MO_32);
   1399        fn(tmp, cpu_env, tmp, constimm);
   1400        write_neon_element32(tmp, a->vd, pass, MO_32);
   1401    }
   1402    tcg_temp_free_i32(tmp);
   1403    tcg_temp_free_i32(constimm);
   1404    return true;
   1405}
   1406
   1407#define DO_2SHIFT_ENV(INSN, FUNC)                                       \
   1408    static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
   1409    {                                                                   \
   1410        return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
   1411    }                                                                   \
   1412    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1413    {                                                                   \
   1414        static NeonGenTwoOpEnvFn * const fns[] = {                      \
   1415            gen_helper_neon_##FUNC##8,                                  \
   1416            gen_helper_neon_##FUNC##16,                                 \
   1417            gen_helper_neon_##FUNC##32,                                 \
   1418        };                                                              \
   1419        assert(a->size < ARRAY_SIZE(fns));                              \
   1420        return do_2shift_env_32(s, a, fns[a->size]);                    \
   1421    }
   1422
   1423DO_2SHIFT_ENV(VQSHLU, qshlu_s)
   1424DO_2SHIFT_ENV(VQSHL_U, qshl_u)
   1425DO_2SHIFT_ENV(VQSHL_S, qshl_s)
   1426
   1427static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
   1428                                NeonGenTwo64OpFn *shiftfn,
   1429                                NeonGenNarrowEnvFn *narrowfn)
   1430{
   1431    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
   1432    TCGv_i64 constimm, rm1, rm2;
   1433    TCGv_i32 rd;
   1434
   1435    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1436        return false;
   1437    }
   1438
   1439    /* UNDEF accesses to D16-D31 if they don't exist. */
   1440    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1441        ((a->vd | a->vm) & 0x10)) {
   1442        return false;
   1443    }
   1444
   1445    if (a->vm & 1) {
   1446        return false;
   1447    }
   1448
   1449    if (!vfp_access_check(s)) {
   1450        return true;
   1451    }
   1452
   1453    /*
   1454     * This is always a right shift, and the shiftfn is always a
   1455     * left-shift helper, which thus needs the negated shift count.
   1456     */
   1457    constimm = tcg_const_i64(-a->shift);
   1458    rm1 = tcg_temp_new_i64();
   1459    rm2 = tcg_temp_new_i64();
   1460    rd = tcg_temp_new_i32();
   1461
   1462    /* Load both inputs first to avoid potential overwrite if rm == rd */
   1463    read_neon_element64(rm1, a->vm, 0, MO_64);
   1464    read_neon_element64(rm2, a->vm, 1, MO_64);
   1465
   1466    shiftfn(rm1, rm1, constimm);
   1467    narrowfn(rd, cpu_env, rm1);
   1468    write_neon_element32(rd, a->vd, 0, MO_32);
   1469
   1470    shiftfn(rm2, rm2, constimm);
   1471    narrowfn(rd, cpu_env, rm2);
   1472    write_neon_element32(rd, a->vd, 1, MO_32);
   1473
   1474    tcg_temp_free_i32(rd);
   1475    tcg_temp_free_i64(rm1);
   1476    tcg_temp_free_i64(rm2);
   1477    tcg_temp_free_i64(constimm);
   1478
   1479    return true;
   1480}
   1481
   1482static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
   1483                                NeonGenTwoOpFn *shiftfn,
   1484                                NeonGenNarrowEnvFn *narrowfn)
   1485{
   1486    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
   1487    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
   1488    TCGv_i64 rtmp;
   1489    uint32_t imm;
   1490
   1491    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1492        return false;
   1493    }
   1494
   1495    /* UNDEF accesses to D16-D31 if they don't exist. */
   1496    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1497        ((a->vd | a->vm) & 0x10)) {
   1498        return false;
   1499    }
   1500
   1501    if (a->vm & 1) {
   1502        return false;
   1503    }
   1504
   1505    if (!vfp_access_check(s)) {
   1506        return true;
   1507    }
   1508
   1509    /*
   1510     * This is always a right shift, and the shiftfn is always a
   1511     * left-shift helper, which thus needs the negated shift count
   1512     * duplicated into each lane of the immediate value.
   1513     */
   1514    if (a->size == 1) {
   1515        imm = (uint16_t)(-a->shift);
   1516        imm |= imm << 16;
   1517    } else {
   1518        /* size == 2 */
   1519        imm = -a->shift;
   1520    }
   1521    constimm = tcg_const_i32(imm);
   1522
   1523    /* Load all inputs first to avoid potential overwrite */
   1524    rm1 = tcg_temp_new_i32();
   1525    rm2 = tcg_temp_new_i32();
   1526    rm3 = tcg_temp_new_i32();
   1527    rm4 = tcg_temp_new_i32();
   1528    read_neon_element32(rm1, a->vm, 0, MO_32);
   1529    read_neon_element32(rm2, a->vm, 1, MO_32);
   1530    read_neon_element32(rm3, a->vm, 2, MO_32);
   1531    read_neon_element32(rm4, a->vm, 3, MO_32);
   1532    rtmp = tcg_temp_new_i64();
   1533
   1534    shiftfn(rm1, rm1, constimm);
   1535    shiftfn(rm2, rm2, constimm);
   1536
   1537    tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
   1538    tcg_temp_free_i32(rm2);
   1539
   1540    narrowfn(rm1, cpu_env, rtmp);
   1541    write_neon_element32(rm1, a->vd, 0, MO_32);
   1542    tcg_temp_free_i32(rm1);
   1543
   1544    shiftfn(rm3, rm3, constimm);
   1545    shiftfn(rm4, rm4, constimm);
   1546    tcg_temp_free_i32(constimm);
   1547
   1548    tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
   1549    tcg_temp_free_i32(rm4);
   1550
   1551    narrowfn(rm3, cpu_env, rtmp);
   1552    tcg_temp_free_i64(rtmp);
   1553    write_neon_element32(rm3, a->vd, 1, MO_32);
   1554    tcg_temp_free_i32(rm3);
   1555    return true;
   1556}
   1557
   1558#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
   1559    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1560    {                                                                   \
   1561        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
   1562    }
   1563#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
   1564    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1565    {                                                                   \
   1566        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
   1567    }
   1568
   1569static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
   1570{
   1571    tcg_gen_extrl_i64_i32(dest, src);
   1572}
   1573
   1574static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
   1575{
   1576    gen_helper_neon_narrow_u16(dest, src);
   1577}
   1578
   1579static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
   1580{
   1581    gen_helper_neon_narrow_u8(dest, src);
   1582}
   1583
   1584DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
   1585DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
   1586DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
   1587
   1588DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
   1589DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
   1590DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
   1591
   1592DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
   1593DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
   1594DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
   1595
   1596DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
   1597DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
   1598DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
   1599DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
   1600DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
   1601DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
   1602
   1603DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
   1604DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
   1605DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
   1606
   1607DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
   1608DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
   1609DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
   1610
   1611DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
   1612DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
   1613DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
   1614
   1615static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
   1616                         NeonGenWidenFn *widenfn, bool u)
   1617{
   1618    TCGv_i64 tmp;
   1619    TCGv_i32 rm0, rm1;
   1620    uint64_t widen_mask = 0;
   1621
   1622    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1623        return false;
   1624    }
   1625
   1626    /* UNDEF accesses to D16-D31 if they don't exist. */
   1627    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1628        ((a->vd | a->vm) & 0x10)) {
   1629        return false;
   1630    }
   1631
   1632    if (a->vd & 1) {
   1633        return false;
   1634    }
   1635
   1636    if (!vfp_access_check(s)) {
   1637        return true;
   1638    }
   1639
   1640    /*
   1641     * This is a widen-and-shift operation. The shift is always less
   1642     * than the width of the source type, so after widening the input
   1643     * vector we can simply shift the whole 64-bit widened register,
   1644     * and then clear the potential overflow bits resulting from left
   1645     * bits of the narrow input appearing as right bits of the left
   1646     * neighbour narrow input. Calculate a mask of bits to clear.
   1647     */
   1648    if ((a->shift != 0) && (a->size < 2 || u)) {
   1649        int esize = 8 << a->size;
   1650        widen_mask = MAKE_64BIT_MASK(0, esize);
   1651        widen_mask >>= esize - a->shift;
   1652        widen_mask = dup_const(a->size + 1, widen_mask);
   1653    }
   1654
   1655    rm0 = tcg_temp_new_i32();
   1656    rm1 = tcg_temp_new_i32();
   1657    read_neon_element32(rm0, a->vm, 0, MO_32);
   1658    read_neon_element32(rm1, a->vm, 1, MO_32);
   1659    tmp = tcg_temp_new_i64();
   1660
   1661    widenfn(tmp, rm0);
   1662    tcg_temp_free_i32(rm0);
   1663    if (a->shift != 0) {
   1664        tcg_gen_shli_i64(tmp, tmp, a->shift);
   1665        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
   1666    }
   1667    write_neon_element64(tmp, a->vd, 0, MO_64);
   1668
   1669    widenfn(tmp, rm1);
   1670    tcg_temp_free_i32(rm1);
   1671    if (a->shift != 0) {
   1672        tcg_gen_shli_i64(tmp, tmp, a->shift);
   1673        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
   1674    }
   1675    write_neon_element64(tmp, a->vd, 1, MO_64);
   1676    tcg_temp_free_i64(tmp);
   1677    return true;
   1678}
   1679
   1680static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
   1681{
   1682    static NeonGenWidenFn * const widenfn[] = {
   1683        gen_helper_neon_widen_s8,
   1684        gen_helper_neon_widen_s16,
   1685        tcg_gen_ext_i32_i64,
   1686    };
   1687    return do_vshll_2sh(s, a, widenfn[a->size], false);
   1688}
   1689
   1690static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
   1691{
   1692    static NeonGenWidenFn * const widenfn[] = {
   1693        gen_helper_neon_widen_u8,
   1694        gen_helper_neon_widen_u16,
   1695        tcg_gen_extu_i32_i64,
   1696    };
   1697    return do_vshll_2sh(s, a, widenfn[a->size], true);
   1698}
   1699
   1700static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
   1701                      gen_helper_gvec_2_ptr *fn)
   1702{
   1703    /* FP operations in 2-reg-and-shift group */
   1704    int vec_size = a->q ? 16 : 8;
   1705    int rd_ofs = neon_full_reg_offset(a->vd);
   1706    int rm_ofs = neon_full_reg_offset(a->vm);
   1707    TCGv_ptr fpst;
   1708
   1709    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1710        return false;
   1711    }
   1712
   1713    if (a->size == MO_16) {
   1714        if (!dc_isar_feature(aa32_fp16_arith, s)) {
   1715            return false;
   1716        }
   1717    }
   1718
   1719    /* UNDEF accesses to D16-D31 if they don't exist. */
   1720    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1721        ((a->vd | a->vm) & 0x10)) {
   1722        return false;
   1723    }
   1724
   1725    if ((a->vm | a->vd) & a->q) {
   1726        return false;
   1727    }
   1728
   1729    if (!vfp_access_check(s)) {
   1730        return true;
   1731    }
   1732
   1733    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
   1734    tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
   1735    tcg_temp_free_ptr(fpst);
   1736    return true;
   1737}
   1738
   1739#define DO_FP_2SH(INSN, FUNC)                                           \
   1740    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1741    {                                                                   \
   1742        return do_fp_2sh(s, a, FUNC);                                   \
   1743    }
   1744
   1745DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
   1746DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
   1747DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
   1748DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
   1749
   1750DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
   1751DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
   1752DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
   1753DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
   1754
   1755static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
   1756                        GVecGen2iFn *fn)
   1757{
   1758    uint64_t imm;
   1759    int reg_ofs, vec_size;
   1760
   1761    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1762        return false;
   1763    }
   1764
   1765    /* UNDEF accesses to D16-D31 if they don't exist. */
   1766    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
   1767        return false;
   1768    }
   1769
   1770    if (a->vd & a->q) {
   1771        return false;
   1772    }
   1773
   1774    if (!vfp_access_check(s)) {
   1775        return true;
   1776    }
   1777
   1778    reg_ofs = neon_full_reg_offset(a->vd);
   1779    vec_size = a->q ? 16 : 8;
   1780    imm = asimd_imm_const(a->imm, a->cmode, a->op);
   1781
   1782    fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
   1783    return true;
   1784}
   1785
   1786static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
   1787                        int64_t c, uint32_t oprsz, uint32_t maxsz)
   1788{
   1789    tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
   1790}
   1791
   1792static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
   1793{
   1794    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
   1795    GVecGen2iFn *fn;
   1796
   1797    if ((a->cmode & 1) && a->cmode < 12) {
   1798        /* for op=1, the imm will be inverted, so BIC becomes AND. */
   1799        fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
   1800    } else {
   1801        /* There is one unallocated cmode/op combination in this space */
   1802        if (a->cmode == 15 && a->op == 1) {
   1803            return false;
   1804        }
   1805        fn = gen_VMOV_1r;
   1806    }
   1807    return do_1reg_imm(s, a, fn);
   1808}
   1809
   1810static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
   1811                           NeonGenWidenFn *widenfn,
   1812                           NeonGenTwo64OpFn *opfn,
   1813                           int src1_mop, int src2_mop)
   1814{
   1815    /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
   1816    TCGv_i64 rn0_64, rn1_64, rm_64;
   1817
   1818    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1819        return false;
   1820    }
   1821
   1822    /* UNDEF accesses to D16-D31 if they don't exist. */
   1823    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1824        ((a->vd | a->vn | a->vm) & 0x10)) {
   1825        return false;
   1826    }
   1827
   1828    if (!opfn) {
   1829        /* size == 3 case, which is an entirely different insn group */
   1830        return false;
   1831    }
   1832
   1833    if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
   1834        return false;
   1835    }
   1836
   1837    if (!vfp_access_check(s)) {
   1838        return true;
   1839    }
   1840
   1841    rn0_64 = tcg_temp_new_i64();
   1842    rn1_64 = tcg_temp_new_i64();
   1843    rm_64 = tcg_temp_new_i64();
   1844
   1845    if (src1_mop >= 0) {
   1846        read_neon_element64(rn0_64, a->vn, 0, src1_mop);
   1847    } else {
   1848        TCGv_i32 tmp = tcg_temp_new_i32();
   1849        read_neon_element32(tmp, a->vn, 0, MO_32);
   1850        widenfn(rn0_64, tmp);
   1851        tcg_temp_free_i32(tmp);
   1852    }
   1853    if (src2_mop >= 0) {
   1854        read_neon_element64(rm_64, a->vm, 0, src2_mop);
   1855    } else {
   1856        TCGv_i32 tmp = tcg_temp_new_i32();
   1857        read_neon_element32(tmp, a->vm, 0, MO_32);
   1858        widenfn(rm_64, tmp);
   1859        tcg_temp_free_i32(tmp);
   1860    }
   1861
   1862    opfn(rn0_64, rn0_64, rm_64);
   1863
   1864    /*
   1865     * Load second pass inputs before storing the first pass result, to
   1866     * avoid incorrect results if a narrow input overlaps with the result.
   1867     */
   1868    if (src1_mop >= 0) {
   1869        read_neon_element64(rn1_64, a->vn, 1, src1_mop);
   1870    } else {
   1871        TCGv_i32 tmp = tcg_temp_new_i32();
   1872        read_neon_element32(tmp, a->vn, 1, MO_32);
   1873        widenfn(rn1_64, tmp);
   1874        tcg_temp_free_i32(tmp);
   1875    }
   1876    if (src2_mop >= 0) {
   1877        read_neon_element64(rm_64, a->vm, 1, src2_mop);
   1878    } else {
   1879        TCGv_i32 tmp = tcg_temp_new_i32();
   1880        read_neon_element32(tmp, a->vm, 1, MO_32);
   1881        widenfn(rm_64, tmp);
   1882        tcg_temp_free_i32(tmp);
   1883    }
   1884
   1885    write_neon_element64(rn0_64, a->vd, 0, MO_64);
   1886
   1887    opfn(rn1_64, rn1_64, rm_64);
   1888    write_neon_element64(rn1_64, a->vd, 1, MO_64);
   1889
   1890    tcg_temp_free_i64(rn0_64);
   1891    tcg_temp_free_i64(rn1_64);
   1892    tcg_temp_free_i64(rm_64);
   1893
   1894    return true;
   1895}
   1896
   1897#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
   1898    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
   1899    {                                                                   \
   1900        static NeonGenWidenFn * const widenfn[] = {                     \
   1901            gen_helper_neon_widen_##S##8,                               \
   1902            gen_helper_neon_widen_##S##16,                              \
   1903            NULL, NULL,                                                 \
   1904        };                                                              \
   1905        static NeonGenTwo64OpFn * const addfn[] = {                     \
   1906            gen_helper_neon_##OP##l_u16,                                \
   1907            gen_helper_neon_##OP##l_u32,                                \
   1908            tcg_gen_##OP##_i64,                                         \
   1909            NULL,                                                       \
   1910        };                                                              \
   1911        int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
   1912        return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
   1913                              SRC1WIDE ? MO_Q : narrow_mop,             \
   1914                              narrow_mop);                              \
   1915    }
   1916
   1917DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
   1918DO_PREWIDEN(VADDL_U, u, add, false, 0)
   1919DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
   1920DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
   1921DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
   1922DO_PREWIDEN(VADDW_U, u, add, true, 0)
   1923DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
   1924DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
   1925
   1926static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
   1927                         NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
   1928{
   1929    /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
   1930    TCGv_i64 rn_64, rm_64;
   1931    TCGv_i32 rd0, rd1;
   1932
   1933    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1934        return false;
   1935    }
   1936
   1937    /* UNDEF accesses to D16-D31 if they don't exist. */
   1938    if (!dc_isar_feature(aa32_simd_r32, s) &&
   1939        ((a->vd | a->vn | a->vm) & 0x10)) {
   1940        return false;
   1941    }
   1942
   1943    if (!opfn || !narrowfn) {
   1944        /* size == 3 case, which is an entirely different insn group */
   1945        return false;
   1946    }
   1947
   1948    if ((a->vn | a->vm) & 1) {
   1949        return false;
   1950    }
   1951
   1952    if (!vfp_access_check(s)) {
   1953        return true;
   1954    }
   1955
   1956    rn_64 = tcg_temp_new_i64();
   1957    rm_64 = tcg_temp_new_i64();
   1958    rd0 = tcg_temp_new_i32();
   1959    rd1 = tcg_temp_new_i32();
   1960
   1961    read_neon_element64(rn_64, a->vn, 0, MO_64);
   1962    read_neon_element64(rm_64, a->vm, 0, MO_64);
   1963
   1964    opfn(rn_64, rn_64, rm_64);
   1965
   1966    narrowfn(rd0, rn_64);
   1967
   1968    read_neon_element64(rn_64, a->vn, 1, MO_64);
   1969    read_neon_element64(rm_64, a->vm, 1, MO_64);
   1970
   1971    opfn(rn_64, rn_64, rm_64);
   1972
   1973    narrowfn(rd1, rn_64);
   1974
   1975    write_neon_element32(rd0, a->vd, 0, MO_32);
   1976    write_neon_element32(rd1, a->vd, 1, MO_32);
   1977
   1978    tcg_temp_free_i32(rd0);
   1979    tcg_temp_free_i32(rd1);
   1980    tcg_temp_free_i64(rn_64);
   1981    tcg_temp_free_i64(rm_64);
   1982
   1983    return true;
   1984}
   1985
   1986#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
   1987    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
   1988    {                                                                   \
   1989        static NeonGenTwo64OpFn * const addfn[] = {                     \
   1990            gen_helper_neon_##OP##l_u16,                                \
   1991            gen_helper_neon_##OP##l_u32,                                \
   1992            tcg_gen_##OP##_i64,                                         \
   1993            NULL,                                                       \
   1994        };                                                              \
   1995        static NeonGenNarrowFn * const narrowfn[] = {                   \
   1996            gen_helper_neon_##NARROWTYPE##_high_u8,                     \
   1997            gen_helper_neon_##NARROWTYPE##_high_u16,                    \
   1998            EXTOP,                                                      \
   1999            NULL,                                                       \
   2000        };                                                              \
   2001        return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
   2002    }
   2003
   2004static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
   2005{
   2006    tcg_gen_addi_i64(rn, rn, 1u << 31);
   2007    tcg_gen_extrh_i64_i32(rd, rn);
   2008}
   2009
   2010DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
   2011DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
   2012DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
   2013DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
   2014
   2015static bool do_long_3d(DisasContext *s, arg_3diff *a,
   2016                       NeonGenTwoOpWidenFn *opfn,
   2017                       NeonGenTwo64OpFn *accfn)
   2018{
   2019    /*
   2020     * 3-regs different lengths, long operations.
   2021     * These perform an operation on two inputs that returns a double-width
   2022     * result, and then possibly perform an accumulation operation of
   2023     * that result into the double-width destination.
   2024     */
   2025    TCGv_i64 rd0, rd1, tmp;
   2026    TCGv_i32 rn, rm;
   2027
   2028    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2029        return false;
   2030    }
   2031
   2032    /* UNDEF accesses to D16-D31 if they don't exist. */
   2033    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2034        ((a->vd | a->vn | a->vm) & 0x10)) {
   2035        return false;
   2036    }
   2037
   2038    if (!opfn) {
   2039        /* size == 3 case, which is an entirely different insn group */
   2040        return false;
   2041    }
   2042
   2043    if (a->vd & 1) {
   2044        return false;
   2045    }
   2046
   2047    if (!vfp_access_check(s)) {
   2048        return true;
   2049    }
   2050
   2051    rd0 = tcg_temp_new_i64();
   2052    rd1 = tcg_temp_new_i64();
   2053
   2054    rn = tcg_temp_new_i32();
   2055    rm = tcg_temp_new_i32();
   2056    read_neon_element32(rn, a->vn, 0, MO_32);
   2057    read_neon_element32(rm, a->vm, 0, MO_32);
   2058    opfn(rd0, rn, rm);
   2059
   2060    read_neon_element32(rn, a->vn, 1, MO_32);
   2061    read_neon_element32(rm, a->vm, 1, MO_32);
   2062    opfn(rd1, rn, rm);
   2063    tcg_temp_free_i32(rn);
   2064    tcg_temp_free_i32(rm);
   2065
   2066    /* Don't store results until after all loads: they might overlap */
   2067    if (accfn) {
   2068        tmp = tcg_temp_new_i64();
   2069        read_neon_element64(tmp, a->vd, 0, MO_64);
   2070        accfn(rd0, tmp, rd0);
   2071        read_neon_element64(tmp, a->vd, 1, MO_64);
   2072        accfn(rd1, tmp, rd1);
   2073        tcg_temp_free_i64(tmp);
   2074    }
   2075
   2076    write_neon_element64(rd0, a->vd, 0, MO_64);
   2077    write_neon_element64(rd1, a->vd, 1, MO_64);
   2078    tcg_temp_free_i64(rd0);
   2079    tcg_temp_free_i64(rd1);
   2080
   2081    return true;
   2082}
   2083
   2084static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
   2085{
   2086    static NeonGenTwoOpWidenFn * const opfn[] = {
   2087        gen_helper_neon_abdl_s16,
   2088        gen_helper_neon_abdl_s32,
   2089        gen_helper_neon_abdl_s64,
   2090        NULL,
   2091    };
   2092
   2093    return do_long_3d(s, a, opfn[a->size], NULL);
   2094}
   2095
   2096static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
   2097{
   2098    static NeonGenTwoOpWidenFn * const opfn[] = {
   2099        gen_helper_neon_abdl_u16,
   2100        gen_helper_neon_abdl_u32,
   2101        gen_helper_neon_abdl_u64,
   2102        NULL,
   2103    };
   2104
   2105    return do_long_3d(s, a, opfn[a->size], NULL);
   2106}
   2107
   2108static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
   2109{
   2110    static NeonGenTwoOpWidenFn * const opfn[] = {
   2111        gen_helper_neon_abdl_s16,
   2112        gen_helper_neon_abdl_s32,
   2113        gen_helper_neon_abdl_s64,
   2114        NULL,
   2115    };
   2116    static NeonGenTwo64OpFn * const addfn[] = {
   2117        gen_helper_neon_addl_u16,
   2118        gen_helper_neon_addl_u32,
   2119        tcg_gen_add_i64,
   2120        NULL,
   2121    };
   2122
   2123    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
   2124}
   2125
   2126static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
   2127{
   2128    static NeonGenTwoOpWidenFn * const opfn[] = {
   2129        gen_helper_neon_abdl_u16,
   2130        gen_helper_neon_abdl_u32,
   2131        gen_helper_neon_abdl_u64,
   2132        NULL,
   2133    };
   2134    static NeonGenTwo64OpFn * const addfn[] = {
   2135        gen_helper_neon_addl_u16,
   2136        gen_helper_neon_addl_u32,
   2137        tcg_gen_add_i64,
   2138        NULL,
   2139    };
   2140
   2141    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
   2142}
   2143
   2144static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2145{
   2146    TCGv_i32 lo = tcg_temp_new_i32();
   2147    TCGv_i32 hi = tcg_temp_new_i32();
   2148
   2149    tcg_gen_muls2_i32(lo, hi, rn, rm);
   2150    tcg_gen_concat_i32_i64(rd, lo, hi);
   2151
   2152    tcg_temp_free_i32(lo);
   2153    tcg_temp_free_i32(hi);
   2154}
   2155
   2156static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2157{
   2158    TCGv_i32 lo = tcg_temp_new_i32();
   2159    TCGv_i32 hi = tcg_temp_new_i32();
   2160
   2161    tcg_gen_mulu2_i32(lo, hi, rn, rm);
   2162    tcg_gen_concat_i32_i64(rd, lo, hi);
   2163
   2164    tcg_temp_free_i32(lo);
   2165    tcg_temp_free_i32(hi);
   2166}
   2167
   2168static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
   2169{
   2170    static NeonGenTwoOpWidenFn * const opfn[] = {
   2171        gen_helper_neon_mull_s8,
   2172        gen_helper_neon_mull_s16,
   2173        gen_mull_s32,
   2174        NULL,
   2175    };
   2176
   2177    return do_long_3d(s, a, opfn[a->size], NULL);
   2178}
   2179
   2180static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
   2181{
   2182    static NeonGenTwoOpWidenFn * const opfn[] = {
   2183        gen_helper_neon_mull_u8,
   2184        gen_helper_neon_mull_u16,
   2185        gen_mull_u32,
   2186        NULL,
   2187    };
   2188
   2189    return do_long_3d(s, a, opfn[a->size], NULL);
   2190}
   2191
   2192#define DO_VMLAL(INSN,MULL,ACC)                                         \
   2193    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
   2194    {                                                                   \
   2195        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
   2196            gen_helper_neon_##MULL##8,                                  \
   2197            gen_helper_neon_##MULL##16,                                 \
   2198            gen_##MULL##32,                                             \
   2199            NULL,                                                       \
   2200        };                                                              \
   2201        static NeonGenTwo64OpFn * const accfn[] = {                     \
   2202            gen_helper_neon_##ACC##l_u16,                               \
   2203            gen_helper_neon_##ACC##l_u32,                               \
   2204            tcg_gen_##ACC##_i64,                                        \
   2205            NULL,                                                       \
   2206        };                                                              \
   2207        return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
   2208    }
   2209
   2210DO_VMLAL(VMLAL_S,mull_s,add)
   2211DO_VMLAL(VMLAL_U,mull_u,add)
   2212DO_VMLAL(VMLSL_S,mull_s,sub)
   2213DO_VMLAL(VMLSL_U,mull_u,sub)
   2214
   2215static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2216{
   2217    gen_helper_neon_mull_s16(rd, rn, rm);
   2218    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
   2219}
   2220
   2221static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2222{
   2223    gen_mull_s32(rd, rn, rm);
   2224    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
   2225}
   2226
   2227static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
   2228{
   2229    static NeonGenTwoOpWidenFn * const opfn[] = {
   2230        NULL,
   2231        gen_VQDMULL_16,
   2232        gen_VQDMULL_32,
   2233        NULL,
   2234    };
   2235
   2236    return do_long_3d(s, a, opfn[a->size], NULL);
   2237}
   2238
   2239static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2240{
   2241    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
   2242}
   2243
   2244static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2245{
   2246    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
   2247}
   2248
   2249static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
   2250{
   2251    static NeonGenTwoOpWidenFn * const opfn[] = {
   2252        NULL,
   2253        gen_VQDMULL_16,
   2254        gen_VQDMULL_32,
   2255        NULL,
   2256    };
   2257    static NeonGenTwo64OpFn * const accfn[] = {
   2258        NULL,
   2259        gen_VQDMLAL_acc_16,
   2260        gen_VQDMLAL_acc_32,
   2261        NULL,
   2262    };
   2263
   2264    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
   2265}
   2266
   2267static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2268{
   2269    gen_helper_neon_negl_u32(rm, rm);
   2270    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
   2271}
   2272
   2273static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2274{
   2275    tcg_gen_neg_i64(rm, rm);
   2276    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
   2277}
   2278
   2279static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
   2280{
   2281    static NeonGenTwoOpWidenFn * const opfn[] = {
   2282        NULL,
   2283        gen_VQDMULL_16,
   2284        gen_VQDMULL_32,
   2285        NULL,
   2286    };
   2287    static NeonGenTwo64OpFn * const accfn[] = {
   2288        NULL,
   2289        gen_VQDMLSL_acc_16,
   2290        gen_VQDMLSL_acc_32,
   2291        NULL,
   2292    };
   2293
   2294    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
   2295}
   2296
   2297static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
   2298{
   2299    gen_helper_gvec_3 *fn_gvec;
   2300
   2301    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2302        return false;
   2303    }
   2304
   2305    /* UNDEF accesses to D16-D31 if they don't exist. */
   2306    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2307        ((a->vd | a->vn | a->vm) & 0x10)) {
   2308        return false;
   2309    }
   2310
   2311    if (a->vd & 1) {
   2312        return false;
   2313    }
   2314
   2315    switch (a->size) {
   2316    case 0:
   2317        fn_gvec = gen_helper_neon_pmull_h;
   2318        break;
   2319    case 2:
   2320        if (!dc_isar_feature(aa32_pmull, s)) {
   2321            return false;
   2322        }
   2323        fn_gvec = gen_helper_gvec_pmull_q;
   2324        break;
   2325    default:
   2326        return false;
   2327    }
   2328
   2329    if (!vfp_access_check(s)) {
   2330        return true;
   2331    }
   2332
   2333    tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
   2334                       neon_full_reg_offset(a->vn),
   2335                       neon_full_reg_offset(a->vm),
   2336                       16, 16, 0, fn_gvec);
   2337    return true;
   2338}
   2339
   2340static void gen_neon_dup_low16(TCGv_i32 var)
   2341{
   2342    TCGv_i32 tmp = tcg_temp_new_i32();
   2343    tcg_gen_ext16u_i32(var, var);
   2344    tcg_gen_shli_i32(tmp, var, 16);
   2345    tcg_gen_or_i32(var, var, tmp);
   2346    tcg_temp_free_i32(tmp);
   2347}
   2348
   2349static void gen_neon_dup_high16(TCGv_i32 var)
   2350{
   2351    TCGv_i32 tmp = tcg_temp_new_i32();
   2352    tcg_gen_andi_i32(var, var, 0xffff0000);
   2353    tcg_gen_shri_i32(tmp, var, 16);
   2354    tcg_gen_or_i32(var, var, tmp);
   2355    tcg_temp_free_i32(tmp);
   2356}
   2357
   2358static inline TCGv_i32 neon_get_scalar(int size, int reg)
   2359{
   2360    TCGv_i32 tmp = tcg_temp_new_i32();
   2361    if (size == MO_16) {
   2362        read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
   2363        if (reg & 8) {
   2364            gen_neon_dup_high16(tmp);
   2365        } else {
   2366            gen_neon_dup_low16(tmp);
   2367        }
   2368    } else {
   2369        read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
   2370    }
   2371    return tmp;
   2372}
   2373
   2374static bool do_2scalar(DisasContext *s, arg_2scalar *a,
   2375                       NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
   2376{
   2377    /*
   2378     * Two registers and a scalar: perform an operation between
   2379     * the input elements and the scalar, and then possibly
   2380     * perform an accumulation operation of that result into the
   2381     * destination.
   2382     */
   2383    TCGv_i32 scalar, tmp;
   2384    int pass;
   2385
   2386    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2387        return false;
   2388    }
   2389
   2390    /* UNDEF accesses to D16-D31 if they don't exist. */
   2391    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2392        ((a->vd | a->vn | a->vm) & 0x10)) {
   2393        return false;
   2394    }
   2395
   2396    if (!opfn) {
   2397        /* Bad size (including size == 3, which is a different insn group) */
   2398        return false;
   2399    }
   2400
   2401    if (a->q && ((a->vd | a->vn) & 1)) {
   2402        return false;
   2403    }
   2404
   2405    if (!vfp_access_check(s)) {
   2406        return true;
   2407    }
   2408
   2409    scalar = neon_get_scalar(a->size, a->vm);
   2410    tmp = tcg_temp_new_i32();
   2411
   2412    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   2413        read_neon_element32(tmp, a->vn, pass, MO_32);
   2414        opfn(tmp, tmp, scalar);
   2415        if (accfn) {
   2416            TCGv_i32 rd = tcg_temp_new_i32();
   2417            read_neon_element32(rd, a->vd, pass, MO_32);
   2418            accfn(tmp, rd, tmp);
   2419            tcg_temp_free_i32(rd);
   2420        }
   2421        write_neon_element32(tmp, a->vd, pass, MO_32);
   2422    }
   2423    tcg_temp_free_i32(tmp);
   2424    tcg_temp_free_i32(scalar);
   2425    return true;
   2426}
   2427
   2428static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
   2429{
   2430    static NeonGenTwoOpFn * const opfn[] = {
   2431        NULL,
   2432        gen_helper_neon_mul_u16,
   2433        tcg_gen_mul_i32,
   2434        NULL,
   2435    };
   2436
   2437    return do_2scalar(s, a, opfn[a->size], NULL);
   2438}
   2439
   2440static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
   2441{
   2442    static NeonGenTwoOpFn * const opfn[] = {
   2443        NULL,
   2444        gen_helper_neon_mul_u16,
   2445        tcg_gen_mul_i32,
   2446        NULL,
   2447    };
   2448    static NeonGenTwoOpFn * const accfn[] = {
   2449        NULL,
   2450        gen_helper_neon_add_u16,
   2451        tcg_gen_add_i32,
   2452        NULL,
   2453    };
   2454
   2455    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
   2456}
   2457
   2458static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
   2459{
   2460    static NeonGenTwoOpFn * const opfn[] = {
   2461        NULL,
   2462        gen_helper_neon_mul_u16,
   2463        tcg_gen_mul_i32,
   2464        NULL,
   2465    };
   2466    static NeonGenTwoOpFn * const accfn[] = {
   2467        NULL,
   2468        gen_helper_neon_sub_u16,
   2469        tcg_gen_sub_i32,
   2470        NULL,
   2471    };
   2472
   2473    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
   2474}
   2475
   2476static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
   2477                              gen_helper_gvec_3_ptr *fn)
   2478{
   2479    /* Two registers and a scalar, using gvec */
   2480    int vec_size = a->q ? 16 : 8;
   2481    int rd_ofs = neon_full_reg_offset(a->vd);
   2482    int rn_ofs = neon_full_reg_offset(a->vn);
   2483    int rm_ofs;
   2484    int idx;
   2485    TCGv_ptr fpstatus;
   2486
   2487    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2488        return false;
   2489    }
   2490
   2491    /* UNDEF accesses to D16-D31 if they don't exist. */
   2492    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2493        ((a->vd | a->vn | a->vm) & 0x10)) {
   2494        return false;
   2495    }
   2496
   2497    if (!fn) {
   2498        /* Bad size (including size == 3, which is a different insn group) */
   2499        return false;
   2500    }
   2501
   2502    if (a->q && ((a->vd | a->vn) & 1)) {
   2503        return false;
   2504    }
   2505
   2506    if (!vfp_access_check(s)) {
   2507        return true;
   2508    }
   2509
   2510    /* a->vm is M:Vm, which encodes both register and index */
   2511    idx = extract32(a->vm, a->size + 2, 2);
   2512    a->vm = extract32(a->vm, 0, a->size + 2);
   2513    rm_ofs = neon_full_reg_offset(a->vm);
   2514
   2515    fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
   2516    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
   2517                       vec_size, vec_size, idx, fn);
   2518    tcg_temp_free_ptr(fpstatus);
   2519    return true;
   2520}
   2521
   2522#define DO_VMUL_F_2sc(NAME, FUNC)                                       \
   2523    static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
   2524    {                                                                   \
   2525        static gen_helper_gvec_3_ptr * const opfn[] = {                 \
   2526            NULL,                                                       \
   2527            gen_helper_##FUNC##_h,                                      \
   2528            gen_helper_##FUNC##_s,                                      \
   2529            NULL,                                                       \
   2530        };                                                              \
   2531        if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
   2532            return false;                                               \
   2533        }                                                               \
   2534        return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
   2535    }
   2536
   2537DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
   2538DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
   2539DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
   2540
   2541WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
   2542WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
   2543WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
   2544WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
   2545
   2546static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
   2547{
   2548    static NeonGenTwoOpFn * const opfn[] = {
   2549        NULL,
   2550        gen_VQDMULH_16,
   2551        gen_VQDMULH_32,
   2552        NULL,
   2553    };
   2554
   2555    return do_2scalar(s, a, opfn[a->size], NULL);
   2556}
   2557
   2558static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
   2559{
   2560    static NeonGenTwoOpFn * const opfn[] = {
   2561        NULL,
   2562        gen_VQRDMULH_16,
   2563        gen_VQRDMULH_32,
   2564        NULL,
   2565    };
   2566
   2567    return do_2scalar(s, a, opfn[a->size], NULL);
   2568}
   2569
   2570static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
   2571                            NeonGenThreeOpEnvFn *opfn)
   2572{
   2573    /*
   2574     * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
   2575     * performs a kind of fused op-then-accumulate using a helper
   2576     * function that takes all of rd, rn and the scalar at once.
   2577     */
   2578    TCGv_i32 scalar, rn, rd;
   2579    int pass;
   2580
   2581    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2582        return false;
   2583    }
   2584
   2585    if (!dc_isar_feature(aa32_rdm, s)) {
   2586        return false;
   2587    }
   2588
   2589    /* UNDEF accesses to D16-D31 if they don't exist. */
   2590    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2591        ((a->vd | a->vn | a->vm) & 0x10)) {
   2592        return false;
   2593    }
   2594
   2595    if (!opfn) {
   2596        /* Bad size (including size == 3, which is a different insn group) */
   2597        return false;
   2598    }
   2599
   2600    if (a->q && ((a->vd | a->vn) & 1)) {
   2601        return false;
   2602    }
   2603
   2604    if (!vfp_access_check(s)) {
   2605        return true;
   2606    }
   2607
   2608    scalar = neon_get_scalar(a->size, a->vm);
   2609    rn = tcg_temp_new_i32();
   2610    rd = tcg_temp_new_i32();
   2611
   2612    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   2613        read_neon_element32(rn, a->vn, pass, MO_32);
   2614        read_neon_element32(rd, a->vd, pass, MO_32);
   2615        opfn(rd, cpu_env, rn, scalar, rd);
   2616        write_neon_element32(rd, a->vd, pass, MO_32);
   2617    }
   2618    tcg_temp_free_i32(rn);
   2619    tcg_temp_free_i32(rd);
   2620    tcg_temp_free_i32(scalar);
   2621
   2622    return true;
   2623}
   2624
   2625static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
   2626{
   2627    static NeonGenThreeOpEnvFn *opfn[] = {
   2628        NULL,
   2629        gen_helper_neon_qrdmlah_s16,
   2630        gen_helper_neon_qrdmlah_s32,
   2631        NULL,
   2632    };
   2633    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
   2634}
   2635
   2636static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
   2637{
   2638    static NeonGenThreeOpEnvFn *opfn[] = {
   2639        NULL,
   2640        gen_helper_neon_qrdmlsh_s16,
   2641        gen_helper_neon_qrdmlsh_s32,
   2642        NULL,
   2643    };
   2644    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
   2645}
   2646
   2647static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
   2648                            NeonGenTwoOpWidenFn *opfn,
   2649                            NeonGenTwo64OpFn *accfn)
   2650{
   2651    /*
   2652     * Two registers and a scalar, long operations: perform an
   2653     * operation on the input elements and the scalar which produces
   2654     * a double-width result, and then possibly perform an accumulation
   2655     * operation of that result into the destination.
   2656     */
   2657    TCGv_i32 scalar, rn;
   2658    TCGv_i64 rn0_64, rn1_64;
   2659
   2660    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2661        return false;
   2662    }
   2663
   2664    /* UNDEF accesses to D16-D31 if they don't exist. */
   2665    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2666        ((a->vd | a->vn | a->vm) & 0x10)) {
   2667        return false;
   2668    }
   2669
   2670    if (!opfn) {
   2671        /* Bad size (including size == 3, which is a different insn group) */
   2672        return false;
   2673    }
   2674
   2675    if (a->vd & 1) {
   2676        return false;
   2677    }
   2678
   2679    if (!vfp_access_check(s)) {
   2680        return true;
   2681    }
   2682
   2683    scalar = neon_get_scalar(a->size, a->vm);
   2684
   2685    /* Load all inputs before writing any outputs, in case of overlap */
   2686    rn = tcg_temp_new_i32();
   2687    read_neon_element32(rn, a->vn, 0, MO_32);
   2688    rn0_64 = tcg_temp_new_i64();
   2689    opfn(rn0_64, rn, scalar);
   2690
   2691    read_neon_element32(rn, a->vn, 1, MO_32);
   2692    rn1_64 = tcg_temp_new_i64();
   2693    opfn(rn1_64, rn, scalar);
   2694    tcg_temp_free_i32(rn);
   2695    tcg_temp_free_i32(scalar);
   2696
   2697    if (accfn) {
   2698        TCGv_i64 t64 = tcg_temp_new_i64();
   2699        read_neon_element64(t64, a->vd, 0, MO_64);
   2700        accfn(rn0_64, t64, rn0_64);
   2701        read_neon_element64(t64, a->vd, 1, MO_64);
   2702        accfn(rn1_64, t64, rn1_64);
   2703        tcg_temp_free_i64(t64);
   2704    }
   2705
   2706    write_neon_element64(rn0_64, a->vd, 0, MO_64);
   2707    write_neon_element64(rn1_64, a->vd, 1, MO_64);
   2708    tcg_temp_free_i64(rn0_64);
   2709    tcg_temp_free_i64(rn1_64);
   2710    return true;
   2711}
   2712
   2713static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
   2714{
   2715    static NeonGenTwoOpWidenFn * const opfn[] = {
   2716        NULL,
   2717        gen_helper_neon_mull_s16,
   2718        gen_mull_s32,
   2719        NULL,
   2720    };
   2721
   2722    return do_2scalar_long(s, a, opfn[a->size], NULL);
   2723}
   2724
   2725static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
   2726{
   2727    static NeonGenTwoOpWidenFn * const opfn[] = {
   2728        NULL,
   2729        gen_helper_neon_mull_u16,
   2730        gen_mull_u32,
   2731        NULL,
   2732    };
   2733
   2734    return do_2scalar_long(s, a, opfn[a->size], NULL);
   2735}
   2736
   2737#define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
   2738    static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
   2739    {                                                                   \
   2740        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
   2741            NULL,                                                       \
   2742            gen_helper_neon_##MULL##16,                                 \
   2743            gen_##MULL##32,                                             \
   2744            NULL,                                                       \
   2745        };                                                              \
   2746        static NeonGenTwo64OpFn * const accfn[] = {                     \
   2747            NULL,                                                       \
   2748            gen_helper_neon_##ACC##l_u32,                               \
   2749            tcg_gen_##ACC##_i64,                                        \
   2750            NULL,                                                       \
   2751        };                                                              \
   2752        return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
   2753    }
   2754
   2755DO_VMLAL_2SC(VMLAL_S, mull_s, add)
   2756DO_VMLAL_2SC(VMLAL_U, mull_u, add)
   2757DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
   2758DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
   2759
   2760static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
   2761{
   2762    static NeonGenTwoOpWidenFn * const opfn[] = {
   2763        NULL,
   2764        gen_VQDMULL_16,
   2765        gen_VQDMULL_32,
   2766        NULL,
   2767    };
   2768
   2769    return do_2scalar_long(s, a, opfn[a->size], NULL);
   2770}
   2771
   2772static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
   2773{
   2774    static NeonGenTwoOpWidenFn * const opfn[] = {
   2775        NULL,
   2776        gen_VQDMULL_16,
   2777        gen_VQDMULL_32,
   2778        NULL,
   2779    };
   2780    static NeonGenTwo64OpFn * const accfn[] = {
   2781        NULL,
   2782        gen_VQDMLAL_acc_16,
   2783        gen_VQDMLAL_acc_32,
   2784        NULL,
   2785    };
   2786
   2787    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
   2788}
   2789
   2790static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
   2791{
   2792    static NeonGenTwoOpWidenFn * const opfn[] = {
   2793        NULL,
   2794        gen_VQDMULL_16,
   2795        gen_VQDMULL_32,
   2796        NULL,
   2797    };
   2798    static NeonGenTwo64OpFn * const accfn[] = {
   2799        NULL,
   2800        gen_VQDMLSL_acc_16,
   2801        gen_VQDMLSL_acc_32,
   2802        NULL,
   2803    };
   2804
   2805    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
   2806}
   2807
   2808static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
   2809{
   2810    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2811        return false;
   2812    }
   2813
   2814    /* UNDEF accesses to D16-D31 if they don't exist. */
   2815    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2816        ((a->vd | a->vn | a->vm) & 0x10)) {
   2817        return false;
   2818    }
   2819
   2820    if ((a->vn | a->vm | a->vd) & a->q) {
   2821        return false;
   2822    }
   2823
   2824    if (a->imm > 7 && !a->q) {
   2825        return false;
   2826    }
   2827
   2828    if (!vfp_access_check(s)) {
   2829        return true;
   2830    }
   2831
   2832    if (!a->q) {
   2833        /* Extract 64 bits from <Vm:Vn> */
   2834        TCGv_i64 left, right, dest;
   2835
   2836        left = tcg_temp_new_i64();
   2837        right = tcg_temp_new_i64();
   2838        dest = tcg_temp_new_i64();
   2839
   2840        read_neon_element64(right, a->vn, 0, MO_64);
   2841        read_neon_element64(left, a->vm, 0, MO_64);
   2842        tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
   2843        write_neon_element64(dest, a->vd, 0, MO_64);
   2844
   2845        tcg_temp_free_i64(left);
   2846        tcg_temp_free_i64(right);
   2847        tcg_temp_free_i64(dest);
   2848    } else {
   2849        /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
   2850        TCGv_i64 left, middle, right, destleft, destright;
   2851
   2852        left = tcg_temp_new_i64();
   2853        middle = tcg_temp_new_i64();
   2854        right = tcg_temp_new_i64();
   2855        destleft = tcg_temp_new_i64();
   2856        destright = tcg_temp_new_i64();
   2857
   2858        if (a->imm < 8) {
   2859            read_neon_element64(right, a->vn, 0, MO_64);
   2860            read_neon_element64(middle, a->vn, 1, MO_64);
   2861            tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
   2862            read_neon_element64(left, a->vm, 0, MO_64);
   2863            tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
   2864        } else {
   2865            read_neon_element64(right, a->vn, 1, MO_64);
   2866            read_neon_element64(middle, a->vm, 0, MO_64);
   2867            tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
   2868            read_neon_element64(left, a->vm, 1, MO_64);
   2869            tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
   2870        }
   2871
   2872        write_neon_element64(destright, a->vd, 0, MO_64);
   2873        write_neon_element64(destleft, a->vd, 1, MO_64);
   2874
   2875        tcg_temp_free_i64(destright);
   2876        tcg_temp_free_i64(destleft);
   2877        tcg_temp_free_i64(right);
   2878        tcg_temp_free_i64(middle);
   2879        tcg_temp_free_i64(left);
   2880    }
   2881    return true;
   2882}
   2883
   2884static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
   2885{
   2886    TCGv_i64 val, def;
   2887    TCGv_i32 desc;
   2888
   2889    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2890        return false;
   2891    }
   2892
   2893    /* UNDEF accesses to D16-D31 if they don't exist. */
   2894    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2895        ((a->vd | a->vn | a->vm) & 0x10)) {
   2896        return false;
   2897    }
   2898
   2899    if ((a->vn + a->len + 1) > 32) {
   2900        /*
   2901         * This is UNPREDICTABLE; we choose to UNDEF to avoid the
   2902         * helper function running off the end of the register file.
   2903         */
   2904        return false;
   2905    }
   2906
   2907    if (!vfp_access_check(s)) {
   2908        return true;
   2909    }
   2910
   2911    desc = tcg_const_i32((a->vn << 2) | a->len);
   2912    def = tcg_temp_new_i64();
   2913    if (a->op) {
   2914        read_neon_element64(def, a->vd, 0, MO_64);
   2915    } else {
   2916        tcg_gen_movi_i64(def, 0);
   2917    }
   2918    val = tcg_temp_new_i64();
   2919    read_neon_element64(val, a->vm, 0, MO_64);
   2920
   2921    gen_helper_neon_tbl(val, cpu_env, desc, val, def);
   2922    write_neon_element64(val, a->vd, 0, MO_64);
   2923
   2924    tcg_temp_free_i64(def);
   2925    tcg_temp_free_i64(val);
   2926    tcg_temp_free_i32(desc);
   2927    return true;
   2928}
   2929
   2930static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
   2931{
   2932    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2933        return false;
   2934    }
   2935
   2936    /* UNDEF accesses to D16-D31 if they don't exist. */
   2937    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2938        ((a->vd | a->vm) & 0x10)) {
   2939        return false;
   2940    }
   2941
   2942    if (a->vd & a->q) {
   2943        return false;
   2944    }
   2945
   2946    if (!vfp_access_check(s)) {
   2947        return true;
   2948    }
   2949
   2950    tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
   2951                         neon_element_offset(a->vm, a->index, a->size),
   2952                         a->q ? 16 : 8, a->q ? 16 : 8);
   2953    return true;
   2954}
   2955
   2956static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
   2957{
   2958    int pass, half;
   2959    TCGv_i32 tmp[2];
   2960
   2961    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2962        return false;
   2963    }
   2964
   2965    /* UNDEF accesses to D16-D31 if they don't exist. */
   2966    if (!dc_isar_feature(aa32_simd_r32, s) &&
   2967        ((a->vd | a->vm) & 0x10)) {
   2968        return false;
   2969    }
   2970
   2971    if ((a->vd | a->vm) & a->q) {
   2972        return false;
   2973    }
   2974
   2975    if (a->size == 3) {
   2976        return false;
   2977    }
   2978
   2979    if (!vfp_access_check(s)) {
   2980        return true;
   2981    }
   2982
   2983    tmp[0] = tcg_temp_new_i32();
   2984    tmp[1] = tcg_temp_new_i32();
   2985
   2986    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
   2987        for (half = 0; half < 2; half++) {
   2988            read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
   2989            switch (a->size) {
   2990            case 0:
   2991                tcg_gen_bswap32_i32(tmp[half], tmp[half]);
   2992                break;
   2993            case 1:
   2994                gen_swap_half(tmp[half], tmp[half]);
   2995                break;
   2996            case 2:
   2997                break;
   2998            default:
   2999                g_assert_not_reached();
   3000            }
   3001        }
   3002        write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
   3003        write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
   3004    }
   3005
   3006    tcg_temp_free_i32(tmp[0]);
   3007    tcg_temp_free_i32(tmp[1]);
   3008    return true;
   3009}
   3010
   3011static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
   3012                              NeonGenWidenFn *widenfn,
   3013                              NeonGenTwo64OpFn *opfn,
   3014                              NeonGenTwo64OpFn *accfn)
   3015{
   3016    /*
   3017     * Pairwise long operations: widen both halves of the pair,
   3018     * combine the pairs with the opfn, and then possibly accumulate
   3019     * into the destination with the accfn.
   3020     */
   3021    int pass;
   3022
   3023    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3024        return false;
   3025    }
   3026
   3027    /* UNDEF accesses to D16-D31 if they don't exist. */
   3028    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3029        ((a->vd | a->vm) & 0x10)) {
   3030        return false;
   3031    }
   3032
   3033    if ((a->vd | a->vm) & a->q) {
   3034        return false;
   3035    }
   3036
   3037    if (!widenfn) {
   3038        return false;
   3039    }
   3040
   3041    if (!vfp_access_check(s)) {
   3042        return true;
   3043    }
   3044
   3045    for (pass = 0; pass < a->q + 1; pass++) {
   3046        TCGv_i32 tmp;
   3047        TCGv_i64 rm0_64, rm1_64, rd_64;
   3048
   3049        rm0_64 = tcg_temp_new_i64();
   3050        rm1_64 = tcg_temp_new_i64();
   3051        rd_64 = tcg_temp_new_i64();
   3052
   3053        tmp = tcg_temp_new_i32();
   3054        read_neon_element32(tmp, a->vm, pass * 2, MO_32);
   3055        widenfn(rm0_64, tmp);
   3056        read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
   3057        widenfn(rm1_64, tmp);
   3058        tcg_temp_free_i32(tmp);
   3059
   3060        opfn(rd_64, rm0_64, rm1_64);
   3061        tcg_temp_free_i64(rm0_64);
   3062        tcg_temp_free_i64(rm1_64);
   3063
   3064        if (accfn) {
   3065            TCGv_i64 tmp64 = tcg_temp_new_i64();
   3066            read_neon_element64(tmp64, a->vd, pass, MO_64);
   3067            accfn(rd_64, tmp64, rd_64);
   3068            tcg_temp_free_i64(tmp64);
   3069        }
   3070        write_neon_element64(rd_64, a->vd, pass, MO_64);
   3071        tcg_temp_free_i64(rd_64);
   3072    }
   3073    return true;
   3074}
   3075
   3076static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
   3077{
   3078    static NeonGenWidenFn * const widenfn[] = {
   3079        gen_helper_neon_widen_s8,
   3080        gen_helper_neon_widen_s16,
   3081        tcg_gen_ext_i32_i64,
   3082        NULL,
   3083    };
   3084    static NeonGenTwo64OpFn * const opfn[] = {
   3085        gen_helper_neon_paddl_u16,
   3086        gen_helper_neon_paddl_u32,
   3087        tcg_gen_add_i64,
   3088        NULL,
   3089    };
   3090
   3091    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
   3092}
   3093
   3094static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
   3095{
   3096    static NeonGenWidenFn * const widenfn[] = {
   3097        gen_helper_neon_widen_u8,
   3098        gen_helper_neon_widen_u16,
   3099        tcg_gen_extu_i32_i64,
   3100        NULL,
   3101    };
   3102    static NeonGenTwo64OpFn * const opfn[] = {
   3103        gen_helper_neon_paddl_u16,
   3104        gen_helper_neon_paddl_u32,
   3105        tcg_gen_add_i64,
   3106        NULL,
   3107    };
   3108
   3109    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
   3110}
   3111
   3112static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
   3113{
   3114    static NeonGenWidenFn * const widenfn[] = {
   3115        gen_helper_neon_widen_s8,
   3116        gen_helper_neon_widen_s16,
   3117        tcg_gen_ext_i32_i64,
   3118        NULL,
   3119    };
   3120    static NeonGenTwo64OpFn * const opfn[] = {
   3121        gen_helper_neon_paddl_u16,
   3122        gen_helper_neon_paddl_u32,
   3123        tcg_gen_add_i64,
   3124        NULL,
   3125    };
   3126    static NeonGenTwo64OpFn * const accfn[] = {
   3127        gen_helper_neon_addl_u16,
   3128        gen_helper_neon_addl_u32,
   3129        tcg_gen_add_i64,
   3130        NULL,
   3131    };
   3132
   3133    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
   3134                             accfn[a->size]);
   3135}
   3136
   3137static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
   3138{
   3139    static NeonGenWidenFn * const widenfn[] = {
   3140        gen_helper_neon_widen_u8,
   3141        gen_helper_neon_widen_u16,
   3142        tcg_gen_extu_i32_i64,
   3143        NULL,
   3144    };
   3145    static NeonGenTwo64OpFn * const opfn[] = {
   3146        gen_helper_neon_paddl_u16,
   3147        gen_helper_neon_paddl_u32,
   3148        tcg_gen_add_i64,
   3149        NULL,
   3150    };
   3151    static NeonGenTwo64OpFn * const accfn[] = {
   3152        gen_helper_neon_addl_u16,
   3153        gen_helper_neon_addl_u32,
   3154        tcg_gen_add_i64,
   3155        NULL,
   3156    };
   3157
   3158    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
   3159                             accfn[a->size]);
   3160}
   3161
   3162typedef void ZipFn(TCGv_ptr, TCGv_ptr);
   3163
   3164static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
   3165                       ZipFn *fn)
   3166{
   3167    TCGv_ptr pd, pm;
   3168
   3169    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3170        return false;
   3171    }
   3172
   3173    /* UNDEF accesses to D16-D31 if they don't exist. */
   3174    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3175        ((a->vd | a->vm) & 0x10)) {
   3176        return false;
   3177    }
   3178
   3179    if ((a->vd | a->vm) & a->q) {
   3180        return false;
   3181    }
   3182
   3183    if (!fn) {
   3184        /* Bad size or size/q combination */
   3185        return false;
   3186    }
   3187
   3188    if (!vfp_access_check(s)) {
   3189        return true;
   3190    }
   3191
   3192    pd = vfp_reg_ptr(true, a->vd);
   3193    pm = vfp_reg_ptr(true, a->vm);
   3194    fn(pd, pm);
   3195    tcg_temp_free_ptr(pd);
   3196    tcg_temp_free_ptr(pm);
   3197    return true;
   3198}
   3199
   3200static bool trans_VUZP(DisasContext *s, arg_2misc *a)
   3201{
   3202    static ZipFn * const fn[2][4] = {
   3203        {
   3204            gen_helper_neon_unzip8,
   3205            gen_helper_neon_unzip16,
   3206            NULL,
   3207            NULL,
   3208        }, {
   3209            gen_helper_neon_qunzip8,
   3210            gen_helper_neon_qunzip16,
   3211            gen_helper_neon_qunzip32,
   3212            NULL,
   3213        }
   3214    };
   3215    return do_zip_uzp(s, a, fn[a->q][a->size]);
   3216}
   3217
   3218static bool trans_VZIP(DisasContext *s, arg_2misc *a)
   3219{
   3220    static ZipFn * const fn[2][4] = {
   3221        {
   3222            gen_helper_neon_zip8,
   3223            gen_helper_neon_zip16,
   3224            NULL,
   3225            NULL,
   3226        }, {
   3227            gen_helper_neon_qzip8,
   3228            gen_helper_neon_qzip16,
   3229            gen_helper_neon_qzip32,
   3230            NULL,
   3231        }
   3232    };
   3233    return do_zip_uzp(s, a, fn[a->q][a->size]);
   3234}
   3235
   3236static bool do_vmovn(DisasContext *s, arg_2misc *a,
   3237                     NeonGenNarrowEnvFn *narrowfn)
   3238{
   3239    TCGv_i64 rm;
   3240    TCGv_i32 rd0, rd1;
   3241
   3242    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3243        return false;
   3244    }
   3245
   3246    /* UNDEF accesses to D16-D31 if they don't exist. */
   3247    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3248        ((a->vd | a->vm) & 0x10)) {
   3249        return false;
   3250    }
   3251
   3252    if (a->vm & 1) {
   3253        return false;
   3254    }
   3255
   3256    if (!narrowfn) {
   3257        return false;
   3258    }
   3259
   3260    if (!vfp_access_check(s)) {
   3261        return true;
   3262    }
   3263
   3264    rm = tcg_temp_new_i64();
   3265    rd0 = tcg_temp_new_i32();
   3266    rd1 = tcg_temp_new_i32();
   3267
   3268    read_neon_element64(rm, a->vm, 0, MO_64);
   3269    narrowfn(rd0, cpu_env, rm);
   3270    read_neon_element64(rm, a->vm, 1, MO_64);
   3271    narrowfn(rd1, cpu_env, rm);
   3272    write_neon_element32(rd0, a->vd, 0, MO_32);
   3273    write_neon_element32(rd1, a->vd, 1, MO_32);
   3274    tcg_temp_free_i32(rd0);
   3275    tcg_temp_free_i32(rd1);
   3276    tcg_temp_free_i64(rm);
   3277    return true;
   3278}
   3279
   3280#define DO_VMOVN(INSN, FUNC)                                    \
   3281    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
   3282    {                                                           \
   3283        static NeonGenNarrowEnvFn * const narrowfn[] = {        \
   3284            FUNC##8,                                            \
   3285            FUNC##16,                                           \
   3286            FUNC##32,                                           \
   3287            NULL,                                               \
   3288        };                                                      \
   3289        return do_vmovn(s, a, narrowfn[a->size]);               \
   3290    }
   3291
   3292DO_VMOVN(VMOVN, gen_neon_narrow_u)
   3293DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
   3294DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
   3295DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
   3296
   3297static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
   3298{
   3299    TCGv_i32 rm0, rm1;
   3300    TCGv_i64 rd;
   3301    static NeonGenWidenFn * const widenfns[] = {
   3302        gen_helper_neon_widen_u8,
   3303        gen_helper_neon_widen_u16,
   3304        tcg_gen_extu_i32_i64,
   3305        NULL,
   3306    };
   3307    NeonGenWidenFn *widenfn = widenfns[a->size];
   3308
   3309    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3310        return false;
   3311    }
   3312
   3313    /* UNDEF accesses to D16-D31 if they don't exist. */
   3314    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3315        ((a->vd | a->vm) & 0x10)) {
   3316        return false;
   3317    }
   3318
   3319    if (a->vd & 1) {
   3320        return false;
   3321    }
   3322
   3323    if (!widenfn) {
   3324        return false;
   3325    }
   3326
   3327    if (!vfp_access_check(s)) {
   3328        return true;
   3329    }
   3330
   3331    rd = tcg_temp_new_i64();
   3332    rm0 = tcg_temp_new_i32();
   3333    rm1 = tcg_temp_new_i32();
   3334
   3335    read_neon_element32(rm0, a->vm, 0, MO_32);
   3336    read_neon_element32(rm1, a->vm, 1, MO_32);
   3337
   3338    widenfn(rd, rm0);
   3339    tcg_gen_shli_i64(rd, rd, 8 << a->size);
   3340    write_neon_element64(rd, a->vd, 0, MO_64);
   3341    widenfn(rd, rm1);
   3342    tcg_gen_shli_i64(rd, rd, 8 << a->size);
   3343    write_neon_element64(rd, a->vd, 1, MO_64);
   3344
   3345    tcg_temp_free_i64(rd);
   3346    tcg_temp_free_i32(rm0);
   3347    tcg_temp_free_i32(rm1);
   3348    return true;
   3349}
   3350
   3351static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
   3352{
   3353    TCGv_ptr fpst;
   3354    TCGv_i64 tmp;
   3355    TCGv_i32 dst0, dst1;
   3356
   3357    if (!dc_isar_feature(aa32_bf16, s)) {
   3358        return false;
   3359    }
   3360
   3361    /* UNDEF accesses to D16-D31 if they don't exist. */
   3362    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3363        ((a->vd | a->vm) & 0x10)) {
   3364        return false;
   3365    }
   3366
   3367    if ((a->vm & 1) || (a->size != 1)) {
   3368        return false;
   3369    }
   3370
   3371    if (!vfp_access_check(s)) {
   3372        return true;
   3373    }
   3374
   3375    fpst = fpstatus_ptr(FPST_STD);
   3376    tmp = tcg_temp_new_i64();
   3377    dst0 = tcg_temp_new_i32();
   3378    dst1 = tcg_temp_new_i32();
   3379
   3380    read_neon_element64(tmp, a->vm, 0, MO_64);
   3381    gen_helper_bfcvt_pair(dst0, tmp, fpst);
   3382
   3383    read_neon_element64(tmp, a->vm, 1, MO_64);
   3384    gen_helper_bfcvt_pair(dst1, tmp, fpst);
   3385
   3386    write_neon_element32(dst0, a->vd, 0, MO_32);
   3387    write_neon_element32(dst1, a->vd, 1, MO_32);
   3388
   3389    tcg_temp_free_i64(tmp);
   3390    tcg_temp_free_i32(dst0);
   3391    tcg_temp_free_i32(dst1);
   3392    tcg_temp_free_ptr(fpst);
   3393    return true;
   3394}
   3395
   3396static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
   3397{
   3398    TCGv_ptr fpst;
   3399    TCGv_i32 ahp, tmp, tmp2, tmp3;
   3400
   3401    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
   3402        !dc_isar_feature(aa32_fp16_spconv, s)) {
   3403        return false;
   3404    }
   3405
   3406    /* UNDEF accesses to D16-D31 if they don't exist. */
   3407    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3408        ((a->vd | a->vm) & 0x10)) {
   3409        return false;
   3410    }
   3411
   3412    if ((a->vm & 1) || (a->size != 1)) {
   3413        return false;
   3414    }
   3415
   3416    if (!vfp_access_check(s)) {
   3417        return true;
   3418    }
   3419
   3420    fpst = fpstatus_ptr(FPST_STD);
   3421    ahp = get_ahp_flag();
   3422    tmp = tcg_temp_new_i32();
   3423    read_neon_element32(tmp, a->vm, 0, MO_32);
   3424    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
   3425    tmp2 = tcg_temp_new_i32();
   3426    read_neon_element32(tmp2, a->vm, 1, MO_32);
   3427    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
   3428    tcg_gen_shli_i32(tmp2, tmp2, 16);
   3429    tcg_gen_or_i32(tmp2, tmp2, tmp);
   3430    read_neon_element32(tmp, a->vm, 2, MO_32);
   3431    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
   3432    tmp3 = tcg_temp_new_i32();
   3433    read_neon_element32(tmp3, a->vm, 3, MO_32);
   3434    write_neon_element32(tmp2, a->vd, 0, MO_32);
   3435    tcg_temp_free_i32(tmp2);
   3436    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
   3437    tcg_gen_shli_i32(tmp3, tmp3, 16);
   3438    tcg_gen_or_i32(tmp3, tmp3, tmp);
   3439    write_neon_element32(tmp3, a->vd, 1, MO_32);
   3440    tcg_temp_free_i32(tmp3);
   3441    tcg_temp_free_i32(tmp);
   3442    tcg_temp_free_i32(ahp);
   3443    tcg_temp_free_ptr(fpst);
   3444
   3445    return true;
   3446}
   3447
   3448static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
   3449{
   3450    TCGv_ptr fpst;
   3451    TCGv_i32 ahp, tmp, tmp2, tmp3;
   3452
   3453    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
   3454        !dc_isar_feature(aa32_fp16_spconv, s)) {
   3455        return false;
   3456    }
   3457
   3458    /* UNDEF accesses to D16-D31 if they don't exist. */
   3459    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3460        ((a->vd | a->vm) & 0x10)) {
   3461        return false;
   3462    }
   3463
   3464    if ((a->vd & 1) || (a->size != 1)) {
   3465        return false;
   3466    }
   3467
   3468    if (!vfp_access_check(s)) {
   3469        return true;
   3470    }
   3471
   3472    fpst = fpstatus_ptr(FPST_STD);
   3473    ahp = get_ahp_flag();
   3474    tmp3 = tcg_temp_new_i32();
   3475    tmp2 = tcg_temp_new_i32();
   3476    tmp = tcg_temp_new_i32();
   3477    read_neon_element32(tmp, a->vm, 0, MO_32);
   3478    read_neon_element32(tmp2, a->vm, 1, MO_32);
   3479    tcg_gen_ext16u_i32(tmp3, tmp);
   3480    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
   3481    write_neon_element32(tmp3, a->vd, 0, MO_32);
   3482    tcg_gen_shri_i32(tmp, tmp, 16);
   3483    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
   3484    write_neon_element32(tmp, a->vd, 1, MO_32);
   3485    tcg_temp_free_i32(tmp);
   3486    tcg_gen_ext16u_i32(tmp3, tmp2);
   3487    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
   3488    write_neon_element32(tmp3, a->vd, 2, MO_32);
   3489    tcg_temp_free_i32(tmp3);
   3490    tcg_gen_shri_i32(tmp2, tmp2, 16);
   3491    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
   3492    write_neon_element32(tmp2, a->vd, 3, MO_32);
   3493    tcg_temp_free_i32(tmp2);
   3494    tcg_temp_free_i32(ahp);
   3495    tcg_temp_free_ptr(fpst);
   3496
   3497    return true;
   3498}
   3499
   3500static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
   3501{
   3502    int vec_size = a->q ? 16 : 8;
   3503    int rd_ofs = neon_full_reg_offset(a->vd);
   3504    int rm_ofs = neon_full_reg_offset(a->vm);
   3505
   3506    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3507        return false;
   3508    }
   3509
   3510    /* UNDEF accesses to D16-D31 if they don't exist. */
   3511    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3512        ((a->vd | a->vm) & 0x10)) {
   3513        return false;
   3514    }
   3515
   3516    if (a->size == 3) {
   3517        return false;
   3518    }
   3519
   3520    if ((a->vd | a->vm) & a->q) {
   3521        return false;
   3522    }
   3523
   3524    if (!vfp_access_check(s)) {
   3525        return true;
   3526    }
   3527
   3528    fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
   3529
   3530    return true;
   3531}
   3532
   3533#define DO_2MISC_VEC(INSN, FN)                                  \
   3534    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
   3535    {                                                           \
   3536        return do_2misc_vec(s, a, FN);                          \
   3537    }
   3538
   3539DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
   3540DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
   3541DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
   3542DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
   3543DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
   3544DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
   3545DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
   3546
   3547static bool trans_VMVN(DisasContext *s, arg_2misc *a)
   3548{
   3549    if (a->size != 0) {
   3550        return false;
   3551    }
   3552    return do_2misc_vec(s, a, tcg_gen_gvec_not);
   3553}
   3554
   3555#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
   3556    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
   3557                         uint32_t rm_ofs, uint32_t oprsz,               \
   3558                         uint32_t maxsz)                                \
   3559    {                                                                   \
   3560        tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
   3561                           DATA, FUNC);                                 \
   3562    }
   3563
   3564#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
   3565    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
   3566                         uint32_t rm_ofs, uint32_t oprsz,               \
   3567                         uint32_t maxsz)                                \
   3568    {                                                                   \
   3569        tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
   3570    }
   3571
   3572WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
   3573WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
   3574WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
   3575WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
   3576WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
   3577WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
   3578WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
   3579
   3580#define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
   3581    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
   3582    {                                                           \
   3583        if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
   3584            return false;                                       \
   3585        }                                                       \
   3586        return do_2misc_vec(s, a, gen_##INSN);                  \
   3587    }
   3588
   3589DO_2M_CRYPTO(AESE, aa32_aes, 0)
   3590DO_2M_CRYPTO(AESD, aa32_aes, 0)
   3591DO_2M_CRYPTO(AESMC, aa32_aes, 0)
   3592DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
   3593DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
   3594DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
   3595DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
   3596
   3597static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
   3598{
   3599    TCGv_i32 tmp;
   3600    int pass;
   3601
   3602    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
   3603    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3604        return false;
   3605    }
   3606
   3607    /* UNDEF accesses to D16-D31 if they don't exist. */
   3608    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3609        ((a->vd | a->vm) & 0x10)) {
   3610        return false;
   3611    }
   3612
   3613    if (!fn) {
   3614        return false;
   3615    }
   3616
   3617    if ((a->vd | a->vm) & a->q) {
   3618        return false;
   3619    }
   3620
   3621    if (!vfp_access_check(s)) {
   3622        return true;
   3623    }
   3624
   3625    tmp = tcg_temp_new_i32();
   3626    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   3627        read_neon_element32(tmp, a->vm, pass, MO_32);
   3628        fn(tmp, tmp);
   3629        write_neon_element32(tmp, a->vd, pass, MO_32);
   3630    }
   3631    tcg_temp_free_i32(tmp);
   3632
   3633    return true;
   3634}
   3635
   3636static bool trans_VREV32(DisasContext *s, arg_2misc *a)
   3637{
   3638    static NeonGenOneOpFn * const fn[] = {
   3639        tcg_gen_bswap32_i32,
   3640        gen_swap_half,
   3641        NULL,
   3642        NULL,
   3643    };
   3644    return do_2misc(s, a, fn[a->size]);
   3645}
   3646
   3647static bool trans_VREV16(DisasContext *s, arg_2misc *a)
   3648{
   3649    if (a->size != 0) {
   3650        return false;
   3651    }
   3652    return do_2misc(s, a, gen_rev16);
   3653}
   3654
   3655static bool trans_VCLS(DisasContext *s, arg_2misc *a)
   3656{
   3657    static NeonGenOneOpFn * const fn[] = {
   3658        gen_helper_neon_cls_s8,
   3659        gen_helper_neon_cls_s16,
   3660        gen_helper_neon_cls_s32,
   3661        NULL,
   3662    };
   3663    return do_2misc(s, a, fn[a->size]);
   3664}
   3665
   3666static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
   3667{
   3668    tcg_gen_clzi_i32(rd, rm, 32);
   3669}
   3670
   3671static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
   3672{
   3673    static NeonGenOneOpFn * const fn[] = {
   3674        gen_helper_neon_clz_u8,
   3675        gen_helper_neon_clz_u16,
   3676        do_VCLZ_32,
   3677        NULL,
   3678    };
   3679    return do_2misc(s, a, fn[a->size]);
   3680}
   3681
   3682static bool trans_VCNT(DisasContext *s, arg_2misc *a)
   3683{
   3684    if (a->size != 0) {
   3685        return false;
   3686    }
   3687    return do_2misc(s, a, gen_helper_neon_cnt_u8);
   3688}
   3689
   3690static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3691                       uint32_t oprsz, uint32_t maxsz)
   3692{
   3693    tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
   3694                      vece == MO_16 ? 0x7fff : 0x7fffffff,
   3695                      oprsz, maxsz);
   3696}
   3697
   3698static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
   3699{
   3700    if (a->size == MO_16) {
   3701        if (!dc_isar_feature(aa32_fp16_arith, s)) {
   3702            return false;
   3703        }
   3704    } else if (a->size != MO_32) {
   3705        return false;
   3706    }
   3707    return do_2misc_vec(s, a, gen_VABS_F);
   3708}
   3709
   3710static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3711                       uint32_t oprsz, uint32_t maxsz)
   3712{
   3713    tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
   3714                      vece == MO_16 ? 0x8000 : 0x80000000,
   3715                      oprsz, maxsz);
   3716}
   3717
   3718static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
   3719{
   3720    if (a->size == MO_16) {
   3721        if (!dc_isar_feature(aa32_fp16_arith, s)) {
   3722            return false;
   3723        }
   3724    } else if (a->size != MO_32) {
   3725        return false;
   3726    }
   3727    return do_2misc_vec(s, a, gen_VNEG_F);
   3728}
   3729
   3730static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
   3731{
   3732    if (a->size != 2) {
   3733        return false;
   3734    }
   3735    return do_2misc(s, a, gen_helper_recpe_u32);
   3736}
   3737
   3738static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
   3739{
   3740    if (a->size != 2) {
   3741        return false;
   3742    }
   3743    return do_2misc(s, a, gen_helper_rsqrte_u32);
   3744}
   3745
   3746#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
   3747    static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
   3748    {                                                   \
   3749        FUNC(d, cpu_env, m);                            \
   3750    }
   3751
   3752WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
   3753WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
   3754WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
   3755WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
   3756WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
   3757WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
   3758
   3759static bool trans_VQABS(DisasContext *s, arg_2misc *a)
   3760{
   3761    static NeonGenOneOpFn * const fn[] = {
   3762        gen_VQABS_s8,
   3763        gen_VQABS_s16,
   3764        gen_VQABS_s32,
   3765        NULL,
   3766    };
   3767    return do_2misc(s, a, fn[a->size]);
   3768}
   3769
   3770static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
   3771{
   3772    static NeonGenOneOpFn * const fn[] = {
   3773        gen_VQNEG_s8,
   3774        gen_VQNEG_s16,
   3775        gen_VQNEG_s32,
   3776        NULL,
   3777    };
   3778    return do_2misc(s, a, fn[a->size]);
   3779}
   3780
   3781#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
   3782    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
   3783                           uint32_t rm_ofs,                             \
   3784                           uint32_t oprsz, uint32_t maxsz)              \
   3785    {                                                                   \
   3786        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
   3787            NULL, HFUNC, SFUNC, NULL,                                   \
   3788        };                                                              \
   3789        TCGv_ptr fpst;                                                  \
   3790        fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
   3791        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
   3792                           fns[vece]);                                  \
   3793        tcg_temp_free_ptr(fpst);                                        \
   3794    }                                                                   \
   3795    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
   3796    {                                                                   \
   3797        if (a->size == MO_16) {                                         \
   3798            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
   3799                return false;                                           \
   3800            }                                                           \
   3801        } else if (a->size != MO_32) {                                  \
   3802            return false;                                               \
   3803        }                                                               \
   3804        return do_2misc_vec(s, a, gen_##INSN);                          \
   3805    }
   3806
   3807DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
   3808DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
   3809DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
   3810DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
   3811DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
   3812DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
   3813DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
   3814DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
   3815DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
   3816DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
   3817DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
   3818
   3819DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
   3820
   3821static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
   3822{
   3823    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
   3824        return false;
   3825    }
   3826    return trans_VRINTX_impl(s, a);
   3827}
   3828
   3829#define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
   3830    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
   3831                           uint32_t rm_ofs,                             \
   3832                           uint32_t oprsz, uint32_t maxsz)              \
   3833    {                                                                   \
   3834        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
   3835            NULL,                                                       \
   3836            gen_helper_gvec_##OP##h,                                    \
   3837            gen_helper_gvec_##OP##s,                                    \
   3838            NULL,                                                       \
   3839        };                                                              \
   3840        TCGv_ptr fpst;                                                  \
   3841        fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
   3842        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
   3843                           arm_rmode_to_sf(RMODE), fns[vece]);          \
   3844        tcg_temp_free_ptr(fpst);                                        \
   3845    }                                                                   \
   3846    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
   3847    {                                                                   \
   3848        if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
   3849            return false;                                               \
   3850        }                                                               \
   3851        if (a->size == MO_16) {                                         \
   3852            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
   3853                return false;                                           \
   3854            }                                                           \
   3855        } else if (a->size != MO_32) {                                  \
   3856            return false;                                               \
   3857        }                                                               \
   3858        return do_2misc_vec(s, a, gen_##INSN);                          \
   3859    }
   3860
   3861DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
   3862DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
   3863DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
   3864DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
   3865DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
   3866DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
   3867DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
   3868DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
   3869
   3870DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
   3871DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
   3872DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
   3873DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
   3874DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
   3875
   3876static bool trans_VSWP(DisasContext *s, arg_2misc *a)
   3877{
   3878    TCGv_i64 rm, rd;
   3879    int pass;
   3880
   3881    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3882        return false;
   3883    }
   3884
   3885    /* UNDEF accesses to D16-D31 if they don't exist. */
   3886    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3887        ((a->vd | a->vm) & 0x10)) {
   3888        return false;
   3889    }
   3890
   3891    if (a->size != 0) {
   3892        return false;
   3893    }
   3894
   3895    if ((a->vd | a->vm) & a->q) {
   3896        return false;
   3897    }
   3898
   3899    if (!vfp_access_check(s)) {
   3900        return true;
   3901    }
   3902
   3903    rm = tcg_temp_new_i64();
   3904    rd = tcg_temp_new_i64();
   3905    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
   3906        read_neon_element64(rm, a->vm, pass, MO_64);
   3907        read_neon_element64(rd, a->vd, pass, MO_64);
   3908        write_neon_element64(rm, a->vd, pass, MO_64);
   3909        write_neon_element64(rd, a->vm, pass, MO_64);
   3910    }
   3911    tcg_temp_free_i64(rm);
   3912    tcg_temp_free_i64(rd);
   3913
   3914    return true;
   3915}
   3916static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
   3917{
   3918    TCGv_i32 rd, tmp;
   3919
   3920    rd = tcg_temp_new_i32();
   3921    tmp = tcg_temp_new_i32();
   3922
   3923    tcg_gen_shli_i32(rd, t0, 8);
   3924    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
   3925    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
   3926    tcg_gen_or_i32(rd, rd, tmp);
   3927
   3928    tcg_gen_shri_i32(t1, t1, 8);
   3929    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
   3930    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
   3931    tcg_gen_or_i32(t1, t1, tmp);
   3932    tcg_gen_mov_i32(t0, rd);
   3933
   3934    tcg_temp_free_i32(tmp);
   3935    tcg_temp_free_i32(rd);
   3936}
   3937
   3938static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
   3939{
   3940    TCGv_i32 rd, tmp;
   3941
   3942    rd = tcg_temp_new_i32();
   3943    tmp = tcg_temp_new_i32();
   3944
   3945    tcg_gen_shli_i32(rd, t0, 16);
   3946    tcg_gen_andi_i32(tmp, t1, 0xffff);
   3947    tcg_gen_or_i32(rd, rd, tmp);
   3948    tcg_gen_shri_i32(t1, t1, 16);
   3949    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
   3950    tcg_gen_or_i32(t1, t1, tmp);
   3951    tcg_gen_mov_i32(t0, rd);
   3952
   3953    tcg_temp_free_i32(tmp);
   3954    tcg_temp_free_i32(rd);
   3955}
   3956
   3957static bool trans_VTRN(DisasContext *s, arg_2misc *a)
   3958{
   3959    TCGv_i32 tmp, tmp2;
   3960    int pass;
   3961
   3962    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3963        return false;
   3964    }
   3965
   3966    /* UNDEF accesses to D16-D31 if they don't exist. */
   3967    if (!dc_isar_feature(aa32_simd_r32, s) &&
   3968        ((a->vd | a->vm) & 0x10)) {
   3969        return false;
   3970    }
   3971
   3972    if ((a->vd | a->vm) & a->q) {
   3973        return false;
   3974    }
   3975
   3976    if (a->size == 3) {
   3977        return false;
   3978    }
   3979
   3980    if (!vfp_access_check(s)) {
   3981        return true;
   3982    }
   3983
   3984    tmp = tcg_temp_new_i32();
   3985    tmp2 = tcg_temp_new_i32();
   3986    if (a->size == MO_32) {
   3987        for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
   3988            read_neon_element32(tmp, a->vm, pass, MO_32);
   3989            read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
   3990            write_neon_element32(tmp2, a->vm, pass, MO_32);
   3991            write_neon_element32(tmp, a->vd, pass + 1, MO_32);
   3992        }
   3993    } else {
   3994        for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   3995            read_neon_element32(tmp, a->vm, pass, MO_32);
   3996            read_neon_element32(tmp2, a->vd, pass, MO_32);
   3997            if (a->size == MO_8) {
   3998                gen_neon_trn_u8(tmp, tmp2);
   3999            } else {
   4000                gen_neon_trn_u16(tmp, tmp2);
   4001            }
   4002            write_neon_element32(tmp2, a->vm, pass, MO_32);
   4003            write_neon_element32(tmp, a->vd, pass, MO_32);
   4004        }
   4005    }
   4006    tcg_temp_free_i32(tmp);
   4007    tcg_temp_free_i32(tmp2);
   4008    return true;
   4009}
   4010
   4011static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
   4012{
   4013    if (!dc_isar_feature(aa32_i8mm, s)) {
   4014        return false;
   4015    }
   4016    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4017                        gen_helper_gvec_smmla_b);
   4018}
   4019
   4020static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
   4021{
   4022    if (!dc_isar_feature(aa32_i8mm, s)) {
   4023        return false;
   4024    }
   4025    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4026                        gen_helper_gvec_ummla_b);
   4027}
   4028
   4029static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
   4030{
   4031    if (!dc_isar_feature(aa32_i8mm, s)) {
   4032        return false;
   4033    }
   4034    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4035                        gen_helper_gvec_usmmla_b);
   4036}
   4037
   4038static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
   4039{
   4040    if (!dc_isar_feature(aa32_bf16, s)) {
   4041        return false;
   4042    }
   4043    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4044                        gen_helper_gvec_bfmmla);
   4045}
   4046
   4047static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
   4048{
   4049    if (!dc_isar_feature(aa32_bf16, s)) {
   4050        return false;
   4051    }
   4052    return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
   4053                             gen_helper_gvec_bfmlal);
   4054}
   4055
   4056static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
   4057{
   4058    if (!dc_isar_feature(aa32_bf16, s)) {
   4059        return false;
   4060    }
   4061    return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
   4062                             (a->index << 1) | a->q, FPST_STD,
   4063                             gen_helper_gvec_bfmlal_idx);
   4064}