cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

translate-sve.c (265498B)


      1/*
      2 * AArch64 SVE translation
      3 *
      4 * Copyright (c) 2018 Linaro, Ltd
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "cpu.h"
     22#include "exec/exec-all.h"
     23#include "tcg/tcg-op.h"
     24#include "tcg/tcg-op-gvec.h"
     25#include "tcg/tcg-gvec-desc.h"
     26#include "qemu/log.h"
     27#include "arm_ldst.h"
     28#include "translate.h"
     29#include "internals.h"
     30#include "exec/helper-proto.h"
     31#include "exec/helper-gen.h"
     32#include "exec/log.h"
     33#include "translate-a64.h"
     34#include "fpu/softfloat.h"
     35
     36
     37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
     38                         TCGv_i64, uint32_t, uint32_t);
     39
     40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
     41                                     TCGv_ptr, TCGv_i32);
     42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
     43                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
     44
     45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
     46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
     47                                         TCGv_ptr, TCGv_i64, TCGv_i32);
     48
     49/*
     50 * Helpers for extracting complex instruction fields.
     51 */
     52
     53/* See e.g. ASR (immediate, predicated).
     54 * Returns -1 for unallocated encoding; diagnose later.
     55 */
     56static int tszimm_esz(DisasContext *s, int x)
     57{
     58    x >>= 3;  /* discard imm3 */
     59    return 31 - clz32(x);
     60}
     61
     62static int tszimm_shr(DisasContext *s, int x)
     63{
     64    return (16 << tszimm_esz(s, x)) - x;
     65}
     66
     67/* See e.g. LSL (immediate, predicated).  */
     68static int tszimm_shl(DisasContext *s, int x)
     69{
     70    return x - (8 << tszimm_esz(s, x));
     71}
     72
     73/* The SH bit is in bit 8.  Extract the low 8 and shift.  */
     74static inline int expand_imm_sh8s(DisasContext *s, int x)
     75{
     76    return (int8_t)x << (x & 0x100 ? 8 : 0);
     77}
     78
     79static inline int expand_imm_sh8u(DisasContext *s, int x)
     80{
     81    return (uint8_t)x << (x & 0x100 ? 8 : 0);
     82}
     83
     84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
     85 * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
     86 */
     87static inline int msz_dtype(DisasContext *s, int msz)
     88{
     89    static const uint8_t dtype[4] = { 0, 5, 10, 15 };
     90    return dtype[msz];
     91}
     92
     93/*
     94 * Include the generated decoder.
     95 */
     96
     97#include "decode-sve.c.inc"
     98
     99/*
    100 * Implement all of the translator functions referenced by the decoder.
    101 */
    102
    103/* Return the offset info CPUARMState of the predicate vector register Pn.
    104 * Note for this purpose, FFR is P16.
    105 */
    106static inline int pred_full_reg_offset(DisasContext *s, int regno)
    107{
    108    return offsetof(CPUARMState, vfp.pregs[regno]);
    109}
    110
    111/* Return the byte size of the whole predicate register, VL / 64.  */
    112static inline int pred_full_reg_size(DisasContext *s)
    113{
    114    return s->sve_len >> 3;
    115}
    116
    117/* Round up the size of a register to a size allowed by
    118 * the tcg vector infrastructure.  Any operation which uses this
    119 * size may assume that the bits above pred_full_reg_size are zero,
    120 * and must leave them the same way.
    121 *
    122 * Note that this is not needed for the vector registers as they
    123 * are always properly sized for tcg vectors.
    124 */
    125static int size_for_gvec(int size)
    126{
    127    if (size <= 8) {
    128        return 8;
    129    } else {
    130        return QEMU_ALIGN_UP(size, 16);
    131    }
    132}
    133
    134static int pred_gvec_reg_size(DisasContext *s)
    135{
    136    return size_for_gvec(pred_full_reg_size(s));
    137}
    138
    139/* Invoke an out-of-line helper on 2 Zregs. */
    140static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
    141                            int rd, int rn, int data)
    142{
    143    unsigned vsz = vec_full_reg_size(s);
    144    tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
    145                       vec_full_reg_offset(s, rn),
    146                       vsz, vsz, data, fn);
    147}
    148
    149/* Invoke an out-of-line helper on 3 Zregs. */
    150static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
    151                             int rd, int rn, int rm, int data)
    152{
    153    unsigned vsz = vec_full_reg_size(s);
    154    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
    155                       vec_full_reg_offset(s, rn),
    156                       vec_full_reg_offset(s, rm),
    157                       vsz, vsz, data, fn);
    158}
    159
    160/* Invoke an out-of-line helper on 4 Zregs. */
    161static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
    162                              int rd, int rn, int rm, int ra, int data)
    163{
    164    unsigned vsz = vec_full_reg_size(s);
    165    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
    166                       vec_full_reg_offset(s, rn),
    167                       vec_full_reg_offset(s, rm),
    168                       vec_full_reg_offset(s, ra),
    169                       vsz, vsz, data, fn);
    170}
    171
    172/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
    173static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
    174                             int rd, int rn, int pg, int data)
    175{
    176    unsigned vsz = vec_full_reg_size(s);
    177    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
    178                       vec_full_reg_offset(s, rn),
    179                       pred_full_reg_offset(s, pg),
    180                       vsz, vsz, data, fn);
    181}
    182
    183/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
    184static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
    185                              int rd, int rn, int rm, int pg, int data)
    186{
    187    unsigned vsz = vec_full_reg_size(s);
    188    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
    189                       vec_full_reg_offset(s, rn),
    190                       vec_full_reg_offset(s, rm),
    191                       pred_full_reg_offset(s, pg),
    192                       vsz, vsz, data, fn);
    193}
    194
    195/* Invoke a vector expander on two Zregs.  */
    196static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
    197                           int esz, int rd, int rn)
    198{
    199    unsigned vsz = vec_full_reg_size(s);
    200    gvec_fn(esz, vec_full_reg_offset(s, rd),
    201            vec_full_reg_offset(s, rn), vsz, vsz);
    202}
    203
    204/* Invoke a vector expander on three Zregs.  */
    205static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
    206                            int esz, int rd, int rn, int rm)
    207{
    208    unsigned vsz = vec_full_reg_size(s);
    209    gvec_fn(esz, vec_full_reg_offset(s, rd),
    210            vec_full_reg_offset(s, rn),
    211            vec_full_reg_offset(s, rm), vsz, vsz);
    212}
    213
    214/* Invoke a vector expander on four Zregs.  */
    215static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
    216                             int esz, int rd, int rn, int rm, int ra)
    217{
    218    unsigned vsz = vec_full_reg_size(s);
    219    gvec_fn(esz, vec_full_reg_offset(s, rd),
    220            vec_full_reg_offset(s, rn),
    221            vec_full_reg_offset(s, rm),
    222            vec_full_reg_offset(s, ra), vsz, vsz);
    223}
    224
    225/* Invoke a vector move on two Zregs.  */
    226static bool do_mov_z(DisasContext *s, int rd, int rn)
    227{
    228    if (sve_access_check(s)) {
    229        gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
    230    }
    231    return true;
    232}
    233
    234/* Initialize a Zreg with replications of a 64-bit immediate.  */
    235static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
    236{
    237    unsigned vsz = vec_full_reg_size(s);
    238    tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
    239}
    240
    241/* Invoke a vector expander on three Pregs.  */
    242static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
    243                            int rd, int rn, int rm)
    244{
    245    unsigned psz = pred_gvec_reg_size(s);
    246    gvec_fn(MO_64, pred_full_reg_offset(s, rd),
    247            pred_full_reg_offset(s, rn),
    248            pred_full_reg_offset(s, rm), psz, psz);
    249}
    250
    251/* Invoke a vector move on two Pregs.  */
    252static bool do_mov_p(DisasContext *s, int rd, int rn)
    253{
    254    if (sve_access_check(s)) {
    255        unsigned psz = pred_gvec_reg_size(s);
    256        tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
    257                         pred_full_reg_offset(s, rn), psz, psz);
    258    }
    259    return true;
    260}
    261
    262/* Set the cpu flags as per a return from an SVE helper.  */
    263static void do_pred_flags(TCGv_i32 t)
    264{
    265    tcg_gen_mov_i32(cpu_NF, t);
    266    tcg_gen_andi_i32(cpu_ZF, t, 2);
    267    tcg_gen_andi_i32(cpu_CF, t, 1);
    268    tcg_gen_movi_i32(cpu_VF, 0);
    269}
    270
    271/* Subroutines computing the ARM PredTest psuedofunction.  */
    272static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
    273{
    274    TCGv_i32 t = tcg_temp_new_i32();
    275
    276    gen_helper_sve_predtest1(t, d, g);
    277    do_pred_flags(t);
    278    tcg_temp_free_i32(t);
    279}
    280
    281static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
    282{
    283    TCGv_ptr dptr = tcg_temp_new_ptr();
    284    TCGv_ptr gptr = tcg_temp_new_ptr();
    285    TCGv_i32 t;
    286
    287    tcg_gen_addi_ptr(dptr, cpu_env, dofs);
    288    tcg_gen_addi_ptr(gptr, cpu_env, gofs);
    289    t = tcg_const_i32(words);
    290
    291    gen_helper_sve_predtest(t, dptr, gptr, t);
    292    tcg_temp_free_ptr(dptr);
    293    tcg_temp_free_ptr(gptr);
    294
    295    do_pred_flags(t);
    296    tcg_temp_free_i32(t);
    297}
    298
    299/* For each element size, the bits within a predicate word that are active.  */
    300const uint64_t pred_esz_masks[4] = {
    301    0xffffffffffffffffull, 0x5555555555555555ull,
    302    0x1111111111111111ull, 0x0101010101010101ull
    303};
    304
    305/*
    306 *** SVE Logical - Unpredicated Group
    307 */
    308
    309static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
    310{
    311    if (sve_access_check(s)) {
    312        gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
    313    }
    314    return true;
    315}
    316
    317static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
    318{
    319    return do_zzz_fn(s, a, tcg_gen_gvec_and);
    320}
    321
    322static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
    323{
    324    return do_zzz_fn(s, a, tcg_gen_gvec_or);
    325}
    326
    327static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
    328{
    329    return do_zzz_fn(s, a, tcg_gen_gvec_xor);
    330}
    331
    332static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
    333{
    334    return do_zzz_fn(s, a, tcg_gen_gvec_andc);
    335}
    336
    337static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
    338{
    339    TCGv_i64 t = tcg_temp_new_i64();
    340    uint64_t mask = dup_const(MO_8, 0xff >> sh);
    341
    342    tcg_gen_xor_i64(t, n, m);
    343    tcg_gen_shri_i64(d, t, sh);
    344    tcg_gen_shli_i64(t, t, 8 - sh);
    345    tcg_gen_andi_i64(d, d, mask);
    346    tcg_gen_andi_i64(t, t, ~mask);
    347    tcg_gen_or_i64(d, d, t);
    348    tcg_temp_free_i64(t);
    349}
    350
    351static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
    352{
    353    TCGv_i64 t = tcg_temp_new_i64();
    354    uint64_t mask = dup_const(MO_16, 0xffff >> sh);
    355
    356    tcg_gen_xor_i64(t, n, m);
    357    tcg_gen_shri_i64(d, t, sh);
    358    tcg_gen_shli_i64(t, t, 16 - sh);
    359    tcg_gen_andi_i64(d, d, mask);
    360    tcg_gen_andi_i64(t, t, ~mask);
    361    tcg_gen_or_i64(d, d, t);
    362    tcg_temp_free_i64(t);
    363}
    364
    365static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
    366{
    367    tcg_gen_xor_i32(d, n, m);
    368    tcg_gen_rotri_i32(d, d, sh);
    369}
    370
    371static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
    372{
    373    tcg_gen_xor_i64(d, n, m);
    374    tcg_gen_rotri_i64(d, d, sh);
    375}
    376
    377static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
    378                        TCGv_vec m, int64_t sh)
    379{
    380    tcg_gen_xor_vec(vece, d, n, m);
    381    tcg_gen_rotri_vec(vece, d, d, sh);
    382}
    383
    384void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
    385                  uint32_t rm_ofs, int64_t shift,
    386                  uint32_t opr_sz, uint32_t max_sz)
    387{
    388    static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
    389    static const GVecGen3i ops[4] = {
    390        { .fni8 = gen_xar8_i64,
    391          .fniv = gen_xar_vec,
    392          .fno = gen_helper_sve2_xar_b,
    393          .opt_opc = vecop,
    394          .vece = MO_8 },
    395        { .fni8 = gen_xar16_i64,
    396          .fniv = gen_xar_vec,
    397          .fno = gen_helper_sve2_xar_h,
    398          .opt_opc = vecop,
    399          .vece = MO_16 },
    400        { .fni4 = gen_xar_i32,
    401          .fniv = gen_xar_vec,
    402          .fno = gen_helper_sve2_xar_s,
    403          .opt_opc = vecop,
    404          .vece = MO_32 },
    405        { .fni8 = gen_xar_i64,
    406          .fniv = gen_xar_vec,
    407          .fno = gen_helper_gvec_xar_d,
    408          .opt_opc = vecop,
    409          .vece = MO_64 }
    410    };
    411    int esize = 8 << vece;
    412
    413    /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
    414    tcg_debug_assert(shift >= 0);
    415    tcg_debug_assert(shift <= esize);
    416    shift &= esize - 1;
    417
    418    if (shift == 0) {
    419        /* xar with no rotate devolves to xor. */
    420        tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
    421    } else {
    422        tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
    423                        shift, &ops[vece]);
    424    }
    425}
    426
    427static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
    428{
    429    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
    430        return false;
    431    }
    432    if (sve_access_check(s)) {
    433        unsigned vsz = vec_full_reg_size(s);
    434        gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
    435                     vec_full_reg_offset(s, a->rn),
    436                     vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
    437    }
    438    return true;
    439}
    440
    441static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
    442{
    443    if (!dc_isar_feature(aa64_sve2, s)) {
    444        return false;
    445    }
    446    if (sve_access_check(s)) {
    447        gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
    448    }
    449    return true;
    450}
    451
    452static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
    453{
    454    tcg_gen_xor_i64(d, n, m);
    455    tcg_gen_xor_i64(d, d, k);
    456}
    457
    458static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
    459                         TCGv_vec m, TCGv_vec k)
    460{
    461    tcg_gen_xor_vec(vece, d, n, m);
    462    tcg_gen_xor_vec(vece, d, d, k);
    463}
    464
    465static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
    466                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
    467{
    468    static const GVecGen4 op = {
    469        .fni8 = gen_eor3_i64,
    470        .fniv = gen_eor3_vec,
    471        .fno = gen_helper_sve2_eor3,
    472        .vece = MO_64,
    473        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
    474    };
    475    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
    476}
    477
    478static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
    479{
    480    return do_sve2_zzzz_fn(s, a, gen_eor3);
    481}
    482
    483static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
    484{
    485    tcg_gen_andc_i64(d, m, k);
    486    tcg_gen_xor_i64(d, d, n);
    487}
    488
    489static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
    490                         TCGv_vec m, TCGv_vec k)
    491{
    492    tcg_gen_andc_vec(vece, d, m, k);
    493    tcg_gen_xor_vec(vece, d, d, n);
    494}
    495
    496static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
    497                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
    498{
    499    static const GVecGen4 op = {
    500        .fni8 = gen_bcax_i64,
    501        .fniv = gen_bcax_vec,
    502        .fno = gen_helper_sve2_bcax,
    503        .vece = MO_64,
    504        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
    505    };
    506    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
    507}
    508
    509static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
    510{
    511    return do_sve2_zzzz_fn(s, a, gen_bcax);
    512}
    513
    514static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
    515                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
    516{
    517    /* BSL differs from the generic bitsel in argument ordering. */
    518    tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
    519}
    520
    521static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
    522{
    523    return do_sve2_zzzz_fn(s, a, gen_bsl);
    524}
    525
    526static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
    527{
    528    tcg_gen_andc_i64(n, k, n);
    529    tcg_gen_andc_i64(m, m, k);
    530    tcg_gen_or_i64(d, n, m);
    531}
    532
    533static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
    534                          TCGv_vec m, TCGv_vec k)
    535{
    536    if (TCG_TARGET_HAS_bitsel_vec) {
    537        tcg_gen_not_vec(vece, n, n);
    538        tcg_gen_bitsel_vec(vece, d, k, n, m);
    539    } else {
    540        tcg_gen_andc_vec(vece, n, k, n);
    541        tcg_gen_andc_vec(vece, m, m, k);
    542        tcg_gen_or_vec(vece, d, n, m);
    543    }
    544}
    545
    546static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
    547                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
    548{
    549    static const GVecGen4 op = {
    550        .fni8 = gen_bsl1n_i64,
    551        .fniv = gen_bsl1n_vec,
    552        .fno = gen_helper_sve2_bsl1n,
    553        .vece = MO_64,
    554        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
    555    };
    556    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
    557}
    558
    559static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
    560{
    561    return do_sve2_zzzz_fn(s, a, gen_bsl1n);
    562}
    563
    564static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
    565{
    566    /*
    567     * Z[dn] = (n & k) | (~m & ~k)
    568     *       =         | ~(m | k)
    569     */
    570    tcg_gen_and_i64(n, n, k);
    571    if (TCG_TARGET_HAS_orc_i64) {
    572        tcg_gen_or_i64(m, m, k);
    573        tcg_gen_orc_i64(d, n, m);
    574    } else {
    575        tcg_gen_nor_i64(m, m, k);
    576        tcg_gen_or_i64(d, n, m);
    577    }
    578}
    579
    580static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
    581                          TCGv_vec m, TCGv_vec k)
    582{
    583    if (TCG_TARGET_HAS_bitsel_vec) {
    584        tcg_gen_not_vec(vece, m, m);
    585        tcg_gen_bitsel_vec(vece, d, k, n, m);
    586    } else {
    587        tcg_gen_and_vec(vece, n, n, k);
    588        tcg_gen_or_vec(vece, m, m, k);
    589        tcg_gen_orc_vec(vece, d, n, m);
    590    }
    591}
    592
    593static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
    594                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
    595{
    596    static const GVecGen4 op = {
    597        .fni8 = gen_bsl2n_i64,
    598        .fniv = gen_bsl2n_vec,
    599        .fno = gen_helper_sve2_bsl2n,
    600        .vece = MO_64,
    601        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
    602    };
    603    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
    604}
    605
    606static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
    607{
    608    return do_sve2_zzzz_fn(s, a, gen_bsl2n);
    609}
    610
    611static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
    612{
    613    tcg_gen_and_i64(n, n, k);
    614    tcg_gen_andc_i64(m, m, k);
    615    tcg_gen_nor_i64(d, n, m);
    616}
    617
    618static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
    619                          TCGv_vec m, TCGv_vec k)
    620{
    621    tcg_gen_bitsel_vec(vece, d, k, n, m);
    622    tcg_gen_not_vec(vece, d, d);
    623}
    624
    625static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
    626                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
    627{
    628    static const GVecGen4 op = {
    629        .fni8 = gen_nbsl_i64,
    630        .fniv = gen_nbsl_vec,
    631        .fno = gen_helper_sve2_nbsl,
    632        .vece = MO_64,
    633        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
    634    };
    635    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
    636}
    637
    638static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
    639{
    640    return do_sve2_zzzz_fn(s, a, gen_nbsl);
    641}
    642
    643/*
    644 *** SVE Integer Arithmetic - Unpredicated Group
    645 */
    646
    647static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
    648{
    649    return do_zzz_fn(s, a, tcg_gen_gvec_add);
    650}
    651
    652static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
    653{
    654    return do_zzz_fn(s, a, tcg_gen_gvec_sub);
    655}
    656
    657static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
    658{
    659    return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
    660}
    661
    662static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
    663{
    664    return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
    665}
    666
    667static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
    668{
    669    return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
    670}
    671
    672static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
    673{
    674    return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
    675}
    676
    677/*
    678 *** SVE Integer Arithmetic - Binary Predicated Group
    679 */
    680
    681static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
    682{
    683    if (fn == NULL) {
    684        return false;
    685    }
    686    if (sve_access_check(s)) {
    687        gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
    688    }
    689    return true;
    690}
    691
    692/* Select active elememnts from Zn and inactive elements from Zm,
    693 * storing the result in Zd.
    694 */
    695static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
    696{
    697    static gen_helper_gvec_4 * const fns[4] = {
    698        gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
    699        gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
    700    };
    701    gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
    702}
    703
    704#define DO_ZPZZ(NAME, name) \
    705static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
    706{                                                                         \
    707    static gen_helper_gvec_4 * const fns[4] = {                           \
    708        gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
    709        gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
    710    };                                                                    \
    711    return do_zpzz_ool(s, a, fns[a->esz]);                                \
    712}
    713
    714DO_ZPZZ(AND, and)
    715DO_ZPZZ(EOR, eor)
    716DO_ZPZZ(ORR, orr)
    717DO_ZPZZ(BIC, bic)
    718
    719DO_ZPZZ(ADD, add)
    720DO_ZPZZ(SUB, sub)
    721
    722DO_ZPZZ(SMAX, smax)
    723DO_ZPZZ(UMAX, umax)
    724DO_ZPZZ(SMIN, smin)
    725DO_ZPZZ(UMIN, umin)
    726DO_ZPZZ(SABD, sabd)
    727DO_ZPZZ(UABD, uabd)
    728
    729DO_ZPZZ(MUL, mul)
    730DO_ZPZZ(SMULH, smulh)
    731DO_ZPZZ(UMULH, umulh)
    732
    733DO_ZPZZ(ASR, asr)
    734DO_ZPZZ(LSR, lsr)
    735DO_ZPZZ(LSL, lsl)
    736
    737static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
    738{
    739    static gen_helper_gvec_4 * const fns[4] = {
    740        NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
    741    };
    742    return do_zpzz_ool(s, a, fns[a->esz]);
    743}
    744
    745static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
    746{
    747    static gen_helper_gvec_4 * const fns[4] = {
    748        NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
    749    };
    750    return do_zpzz_ool(s, a, fns[a->esz]);
    751}
    752
    753static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
    754{
    755    if (sve_access_check(s)) {
    756        do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
    757    }
    758    return true;
    759}
    760
    761#undef DO_ZPZZ
    762
    763/*
    764 *** SVE Integer Arithmetic - Unary Predicated Group
    765 */
    766
    767static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
    768{
    769    if (fn == NULL) {
    770        return false;
    771    }
    772    if (sve_access_check(s)) {
    773        gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
    774    }
    775    return true;
    776}
    777
    778#define DO_ZPZ(NAME, name) \
    779static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
    780{                                                                   \
    781    static gen_helper_gvec_3 * const fns[4] = {                     \
    782        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
    783        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
    784    };                                                              \
    785    return do_zpz_ool(s, a, fns[a->esz]);                           \
    786}
    787
    788DO_ZPZ(CLS, cls)
    789DO_ZPZ(CLZ, clz)
    790DO_ZPZ(CNT_zpz, cnt_zpz)
    791DO_ZPZ(CNOT, cnot)
    792DO_ZPZ(NOT_zpz, not_zpz)
    793DO_ZPZ(ABS, abs)
    794DO_ZPZ(NEG, neg)
    795
    796static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
    797{
    798    static gen_helper_gvec_3 * const fns[4] = {
    799        NULL,
    800        gen_helper_sve_fabs_h,
    801        gen_helper_sve_fabs_s,
    802        gen_helper_sve_fabs_d
    803    };
    804    return do_zpz_ool(s, a, fns[a->esz]);
    805}
    806
    807static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
    808{
    809    static gen_helper_gvec_3 * const fns[4] = {
    810        NULL,
    811        gen_helper_sve_fneg_h,
    812        gen_helper_sve_fneg_s,
    813        gen_helper_sve_fneg_d
    814    };
    815    return do_zpz_ool(s, a, fns[a->esz]);
    816}
    817
    818static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
    819{
    820    static gen_helper_gvec_3 * const fns[4] = {
    821        NULL,
    822        gen_helper_sve_sxtb_h,
    823        gen_helper_sve_sxtb_s,
    824        gen_helper_sve_sxtb_d
    825    };
    826    return do_zpz_ool(s, a, fns[a->esz]);
    827}
    828
    829static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
    830{
    831    static gen_helper_gvec_3 * const fns[4] = {
    832        NULL,
    833        gen_helper_sve_uxtb_h,
    834        gen_helper_sve_uxtb_s,
    835        gen_helper_sve_uxtb_d
    836    };
    837    return do_zpz_ool(s, a, fns[a->esz]);
    838}
    839
    840static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
    841{
    842    static gen_helper_gvec_3 * const fns[4] = {
    843        NULL, NULL,
    844        gen_helper_sve_sxth_s,
    845        gen_helper_sve_sxth_d
    846    };
    847    return do_zpz_ool(s, a, fns[a->esz]);
    848}
    849
    850static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
    851{
    852    static gen_helper_gvec_3 * const fns[4] = {
    853        NULL, NULL,
    854        gen_helper_sve_uxth_s,
    855        gen_helper_sve_uxth_d
    856    };
    857    return do_zpz_ool(s, a, fns[a->esz]);
    858}
    859
    860static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
    861{
    862    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
    863}
    864
    865static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
    866{
    867    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
    868}
    869
    870#undef DO_ZPZ
    871
    872/*
    873 *** SVE Integer Reduction Group
    874 */
    875
    876typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
    877static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
    878                       gen_helper_gvec_reduc *fn)
    879{
    880    unsigned vsz = vec_full_reg_size(s);
    881    TCGv_ptr t_zn, t_pg;
    882    TCGv_i32 desc;
    883    TCGv_i64 temp;
    884
    885    if (fn == NULL) {
    886        return false;
    887    }
    888    if (!sve_access_check(s)) {
    889        return true;
    890    }
    891
    892    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
    893    temp = tcg_temp_new_i64();
    894    t_zn = tcg_temp_new_ptr();
    895    t_pg = tcg_temp_new_ptr();
    896
    897    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
    898    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
    899    fn(temp, t_zn, t_pg, desc);
    900    tcg_temp_free_ptr(t_zn);
    901    tcg_temp_free_ptr(t_pg);
    902    tcg_temp_free_i32(desc);
    903
    904    write_fp_dreg(s, a->rd, temp);
    905    tcg_temp_free_i64(temp);
    906    return true;
    907}
    908
    909#define DO_VPZ(NAME, name) \
    910static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
    911{                                                                        \
    912    static gen_helper_gvec_reduc * const fns[4] = {                      \
    913        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
    914        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
    915    };                                                                   \
    916    return do_vpz_ool(s, a, fns[a->esz]);                                \
    917}
    918
    919DO_VPZ(ORV, orv)
    920DO_VPZ(ANDV, andv)
    921DO_VPZ(EORV, eorv)
    922
    923DO_VPZ(UADDV, uaddv)
    924DO_VPZ(SMAXV, smaxv)
    925DO_VPZ(UMAXV, umaxv)
    926DO_VPZ(SMINV, sminv)
    927DO_VPZ(UMINV, uminv)
    928
    929static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
    930{
    931    static gen_helper_gvec_reduc * const fns[4] = {
    932        gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
    933        gen_helper_sve_saddv_s, NULL
    934    };
    935    return do_vpz_ool(s, a, fns[a->esz]);
    936}
    937
    938#undef DO_VPZ
    939
    940/*
    941 *** SVE Shift by Immediate - Predicated Group
    942 */
    943
    944/*
    945 * Copy Zn into Zd, storing zeros into inactive elements.
    946 * If invert, store zeros into the active elements.
    947 */
    948static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
    949                        int esz, bool invert)
    950{
    951    static gen_helper_gvec_3 * const fns[4] = {
    952        gen_helper_sve_movz_b, gen_helper_sve_movz_h,
    953        gen_helper_sve_movz_s, gen_helper_sve_movz_d,
    954    };
    955
    956    if (sve_access_check(s)) {
    957        gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
    958    }
    959    return true;
    960}
    961
    962static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
    963                        gen_helper_gvec_3 *fn)
    964{
    965    if (sve_access_check(s)) {
    966        gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
    967    }
    968    return true;
    969}
    970
    971static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
    972{
    973    static gen_helper_gvec_3 * const fns[4] = {
    974        gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
    975        gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
    976    };
    977    if (a->esz < 0) {
    978        /* Invalid tsz encoding -- see tszimm_esz. */
    979        return false;
    980    }
    981    /* Shift by element size is architecturally valid.  For
    982       arithmetic right-shift, it's the same as by one less. */
    983    a->imm = MIN(a->imm, (8 << a->esz) - 1);
    984    return do_zpzi_ool(s, a, fns[a->esz]);
    985}
    986
    987static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
    988{
    989    static gen_helper_gvec_3 * const fns[4] = {
    990        gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
    991        gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
    992    };
    993    if (a->esz < 0) {
    994        return false;
    995    }
    996    /* Shift by element size is architecturally valid.
    997       For logical shifts, it is a zeroing operation.  */
    998    if (a->imm >= (8 << a->esz)) {
    999        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
   1000    } else {
   1001        return do_zpzi_ool(s, a, fns[a->esz]);
   1002    }
   1003}
   1004
   1005static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
   1006{
   1007    static gen_helper_gvec_3 * const fns[4] = {
   1008        gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
   1009        gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
   1010    };
   1011    if (a->esz < 0) {
   1012        return false;
   1013    }
   1014    /* Shift by element size is architecturally valid.
   1015       For logical shifts, it is a zeroing operation.  */
   1016    if (a->imm >= (8 << a->esz)) {
   1017        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
   1018    } else {
   1019        return do_zpzi_ool(s, a, fns[a->esz]);
   1020    }
   1021}
   1022
   1023static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
   1024{
   1025    static gen_helper_gvec_3 * const fns[4] = {
   1026        gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
   1027        gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
   1028    };
   1029    if (a->esz < 0) {
   1030        return false;
   1031    }
   1032    /* Shift by element size is architecturally valid.  For arithmetic
   1033       right shift for division, it is a zeroing operation.  */
   1034    if (a->imm >= (8 << a->esz)) {
   1035        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
   1036    } else {
   1037        return do_zpzi_ool(s, a, fns[a->esz]);
   1038    }
   1039}
   1040
   1041static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
   1042{
   1043    static gen_helper_gvec_3 * const fns[4] = {
   1044        gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
   1045        gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
   1046    };
   1047    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
   1048        return false;
   1049    }
   1050    return do_zpzi_ool(s, a, fns[a->esz]);
   1051}
   1052
   1053static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
   1054{
   1055    static gen_helper_gvec_3 * const fns[4] = {
   1056        gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
   1057        gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
   1058    };
   1059    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
   1060        return false;
   1061    }
   1062    return do_zpzi_ool(s, a, fns[a->esz]);
   1063}
   1064
   1065static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
   1066{
   1067    static gen_helper_gvec_3 * const fns[4] = {
   1068        gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
   1069        gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
   1070    };
   1071    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
   1072        return false;
   1073    }
   1074    return do_zpzi_ool(s, a, fns[a->esz]);
   1075}
   1076
   1077static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
   1078{
   1079    static gen_helper_gvec_3 * const fns[4] = {
   1080        gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
   1081        gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
   1082    };
   1083    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
   1084        return false;
   1085    }
   1086    return do_zpzi_ool(s, a, fns[a->esz]);
   1087}
   1088
   1089static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
   1090{
   1091    static gen_helper_gvec_3 * const fns[4] = {
   1092        gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
   1093        gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
   1094    };
   1095    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
   1096        return false;
   1097    }
   1098    return do_zpzi_ool(s, a, fns[a->esz]);
   1099}
   1100
   1101/*
   1102 *** SVE Bitwise Shift - Predicated Group
   1103 */
   1104
   1105#define DO_ZPZW(NAME, name) \
   1106static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
   1107{                                                                         \
   1108    static gen_helper_gvec_4 * const fns[3] = {                           \
   1109        gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
   1110        gen_helper_sve_##name##_zpzw_s,                                   \
   1111    };                                                                    \
   1112    if (a->esz < 0 || a->esz >= 3) {                                      \
   1113        return false;                                                     \
   1114    }                                                                     \
   1115    return do_zpzz_ool(s, a, fns[a->esz]);                                \
   1116}
   1117
   1118DO_ZPZW(ASR, asr)
   1119DO_ZPZW(LSR, lsr)
   1120DO_ZPZW(LSL, lsl)
   1121
   1122#undef DO_ZPZW
   1123
   1124/*
   1125 *** SVE Bitwise Shift - Unpredicated Group
   1126 */
   1127
   1128static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
   1129                         void (*gvec_fn)(unsigned, uint32_t, uint32_t,
   1130                                         int64_t, uint32_t, uint32_t))
   1131{
   1132    if (a->esz < 0) {
   1133        /* Invalid tsz encoding -- see tszimm_esz. */
   1134        return false;
   1135    }
   1136    if (sve_access_check(s)) {
   1137        unsigned vsz = vec_full_reg_size(s);
   1138        /* Shift by element size is architecturally valid.  For
   1139           arithmetic right-shift, it's the same as by one less.
   1140           Otherwise it is a zeroing operation.  */
   1141        if (a->imm >= 8 << a->esz) {
   1142            if (asr) {
   1143                a->imm = (8 << a->esz) - 1;
   1144            } else {
   1145                do_dupi_z(s, a->rd, 0);
   1146                return true;
   1147            }
   1148        }
   1149        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
   1150                vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
   1151    }
   1152    return true;
   1153}
   1154
   1155static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
   1156{
   1157    return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
   1158}
   1159
   1160static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
   1161{
   1162    return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
   1163}
   1164
   1165static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
   1166{
   1167    return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
   1168}
   1169
   1170static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
   1171{
   1172    if (fn == NULL) {
   1173        return false;
   1174    }
   1175    if (sve_access_check(s)) {
   1176        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
   1177    }
   1178    return true;
   1179}
   1180
   1181#define DO_ZZW(NAME, name) \
   1182static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
   1183{                                                                         \
   1184    static gen_helper_gvec_3 * const fns[4] = {                           \
   1185        gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
   1186        gen_helper_sve_##name##_zzw_s, NULL                               \
   1187    };                                                                    \
   1188    return do_zzw_ool(s, a, fns[a->esz]);                                 \
   1189}
   1190
   1191DO_ZZW(ASR, asr)
   1192DO_ZZW(LSR, lsr)
   1193DO_ZZW(LSL, lsl)
   1194
   1195#undef DO_ZZW
   1196
   1197/*
   1198 *** SVE Integer Multiply-Add Group
   1199 */
   1200
   1201static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
   1202                         gen_helper_gvec_5 *fn)
   1203{
   1204    if (sve_access_check(s)) {
   1205        unsigned vsz = vec_full_reg_size(s);
   1206        tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
   1207                           vec_full_reg_offset(s, a->ra),
   1208                           vec_full_reg_offset(s, a->rn),
   1209                           vec_full_reg_offset(s, a->rm),
   1210                           pred_full_reg_offset(s, a->pg),
   1211                           vsz, vsz, 0, fn);
   1212    }
   1213    return true;
   1214}
   1215
   1216#define DO_ZPZZZ(NAME, name) \
   1217static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
   1218{                                                                    \
   1219    static gen_helper_gvec_5 * const fns[4] = {                      \
   1220        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
   1221        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
   1222    };                                                               \
   1223    return do_zpzzz_ool(s, a, fns[a->esz]);                          \
   1224}
   1225
   1226DO_ZPZZZ(MLA, mla)
   1227DO_ZPZZZ(MLS, mls)
   1228
   1229#undef DO_ZPZZZ
   1230
   1231/*
   1232 *** SVE Index Generation Group
   1233 */
   1234
   1235static void do_index(DisasContext *s, int esz, int rd,
   1236                     TCGv_i64 start, TCGv_i64 incr)
   1237{
   1238    unsigned vsz = vec_full_reg_size(s);
   1239    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
   1240    TCGv_ptr t_zd = tcg_temp_new_ptr();
   1241
   1242    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
   1243    if (esz == 3) {
   1244        gen_helper_sve_index_d(t_zd, start, incr, desc);
   1245    } else {
   1246        typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
   1247        static index_fn * const fns[3] = {
   1248            gen_helper_sve_index_b,
   1249            gen_helper_sve_index_h,
   1250            gen_helper_sve_index_s,
   1251        };
   1252        TCGv_i32 s32 = tcg_temp_new_i32();
   1253        TCGv_i32 i32 = tcg_temp_new_i32();
   1254
   1255        tcg_gen_extrl_i64_i32(s32, start);
   1256        tcg_gen_extrl_i64_i32(i32, incr);
   1257        fns[esz](t_zd, s32, i32, desc);
   1258
   1259        tcg_temp_free_i32(s32);
   1260        tcg_temp_free_i32(i32);
   1261    }
   1262    tcg_temp_free_ptr(t_zd);
   1263    tcg_temp_free_i32(desc);
   1264}
   1265
   1266static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
   1267{
   1268    if (sve_access_check(s)) {
   1269        TCGv_i64 start = tcg_const_i64(a->imm1);
   1270        TCGv_i64 incr = tcg_const_i64(a->imm2);
   1271        do_index(s, a->esz, a->rd, start, incr);
   1272        tcg_temp_free_i64(start);
   1273        tcg_temp_free_i64(incr);
   1274    }
   1275    return true;
   1276}
   1277
   1278static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
   1279{
   1280    if (sve_access_check(s)) {
   1281        TCGv_i64 start = tcg_const_i64(a->imm);
   1282        TCGv_i64 incr = cpu_reg(s, a->rm);
   1283        do_index(s, a->esz, a->rd, start, incr);
   1284        tcg_temp_free_i64(start);
   1285    }
   1286    return true;
   1287}
   1288
   1289static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
   1290{
   1291    if (sve_access_check(s)) {
   1292        TCGv_i64 start = cpu_reg(s, a->rn);
   1293        TCGv_i64 incr = tcg_const_i64(a->imm);
   1294        do_index(s, a->esz, a->rd, start, incr);
   1295        tcg_temp_free_i64(incr);
   1296    }
   1297    return true;
   1298}
   1299
   1300static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
   1301{
   1302    if (sve_access_check(s)) {
   1303        TCGv_i64 start = cpu_reg(s, a->rn);
   1304        TCGv_i64 incr = cpu_reg(s, a->rm);
   1305        do_index(s, a->esz, a->rd, start, incr);
   1306    }
   1307    return true;
   1308}
   1309
   1310/*
   1311 *** SVE Stack Allocation Group
   1312 */
   1313
   1314static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
   1315{
   1316    if (sve_access_check(s)) {
   1317        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
   1318        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
   1319        tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
   1320    }
   1321    return true;
   1322}
   1323
   1324static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
   1325{
   1326    if (sve_access_check(s)) {
   1327        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
   1328        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
   1329        tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
   1330    }
   1331    return true;
   1332}
   1333
   1334static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
   1335{
   1336    if (sve_access_check(s)) {
   1337        TCGv_i64 reg = cpu_reg(s, a->rd);
   1338        tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
   1339    }
   1340    return true;
   1341}
   1342
   1343/*
   1344 *** SVE Compute Vector Address Group
   1345 */
   1346
   1347static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
   1348{
   1349    if (sve_access_check(s)) {
   1350        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
   1351    }
   1352    return true;
   1353}
   1354
   1355static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
   1356{
   1357    return do_adr(s, a, gen_helper_sve_adr_p32);
   1358}
   1359
   1360static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
   1361{
   1362    return do_adr(s, a, gen_helper_sve_adr_p64);
   1363}
   1364
   1365static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
   1366{
   1367    return do_adr(s, a, gen_helper_sve_adr_s32);
   1368}
   1369
   1370static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
   1371{
   1372    return do_adr(s, a, gen_helper_sve_adr_u32);
   1373}
   1374
   1375/*
   1376 *** SVE Integer Misc - Unpredicated Group
   1377 */
   1378
   1379static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
   1380{
   1381    static gen_helper_gvec_2 * const fns[4] = {
   1382        NULL,
   1383        gen_helper_sve_fexpa_h,
   1384        gen_helper_sve_fexpa_s,
   1385        gen_helper_sve_fexpa_d,
   1386    };
   1387    if (a->esz == 0) {
   1388        return false;
   1389    }
   1390    if (sve_access_check(s)) {
   1391        gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
   1392    }
   1393    return true;
   1394}
   1395
   1396static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
   1397{
   1398    static gen_helper_gvec_3 * const fns[4] = {
   1399        NULL,
   1400        gen_helper_sve_ftssel_h,
   1401        gen_helper_sve_ftssel_s,
   1402        gen_helper_sve_ftssel_d,
   1403    };
   1404    if (a->esz == 0) {
   1405        return false;
   1406    }
   1407    if (sve_access_check(s)) {
   1408        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
   1409    }
   1410    return true;
   1411}
   1412
   1413/*
   1414 *** SVE Predicate Logical Operations Group
   1415 */
   1416
   1417static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
   1418                          const GVecGen4 *gvec_op)
   1419{
   1420    if (!sve_access_check(s)) {
   1421        return true;
   1422    }
   1423
   1424    unsigned psz = pred_gvec_reg_size(s);
   1425    int dofs = pred_full_reg_offset(s, a->rd);
   1426    int nofs = pred_full_reg_offset(s, a->rn);
   1427    int mofs = pred_full_reg_offset(s, a->rm);
   1428    int gofs = pred_full_reg_offset(s, a->pg);
   1429
   1430    if (!a->s) {
   1431        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
   1432        return true;
   1433    }
   1434
   1435    if (psz == 8) {
   1436        /* Do the operation and the flags generation in temps.  */
   1437        TCGv_i64 pd = tcg_temp_new_i64();
   1438        TCGv_i64 pn = tcg_temp_new_i64();
   1439        TCGv_i64 pm = tcg_temp_new_i64();
   1440        TCGv_i64 pg = tcg_temp_new_i64();
   1441
   1442        tcg_gen_ld_i64(pn, cpu_env, nofs);
   1443        tcg_gen_ld_i64(pm, cpu_env, mofs);
   1444        tcg_gen_ld_i64(pg, cpu_env, gofs);
   1445
   1446        gvec_op->fni8(pd, pn, pm, pg);
   1447        tcg_gen_st_i64(pd, cpu_env, dofs);
   1448
   1449        do_predtest1(pd, pg);
   1450
   1451        tcg_temp_free_i64(pd);
   1452        tcg_temp_free_i64(pn);
   1453        tcg_temp_free_i64(pm);
   1454        tcg_temp_free_i64(pg);
   1455    } else {
   1456        /* The operation and flags generation is large.  The computation
   1457         * of the flags depends on the original contents of the guarding
   1458         * predicate.  If the destination overwrites the guarding predicate,
   1459         * then the easiest way to get this right is to save a copy.
   1460          */
   1461        int tofs = gofs;
   1462        if (a->rd == a->pg) {
   1463            tofs = offsetof(CPUARMState, vfp.preg_tmp);
   1464            tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
   1465        }
   1466
   1467        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
   1468        do_predtest(s, dofs, tofs, psz / 8);
   1469    }
   1470    return true;
   1471}
   1472
   1473static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
   1474{
   1475    tcg_gen_and_i64(pd, pn, pm);
   1476    tcg_gen_and_i64(pd, pd, pg);
   1477}
   1478
   1479static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
   1480                           TCGv_vec pm, TCGv_vec pg)
   1481{
   1482    tcg_gen_and_vec(vece, pd, pn, pm);
   1483    tcg_gen_and_vec(vece, pd, pd, pg);
   1484}
   1485
   1486static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
   1487{
   1488    static const GVecGen4 op = {
   1489        .fni8 = gen_and_pg_i64,
   1490        .fniv = gen_and_pg_vec,
   1491        .fno = gen_helper_sve_and_pppp,
   1492        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   1493    };
   1494
   1495    if (!a->s) {
   1496        if (!sve_access_check(s)) {
   1497            return true;
   1498        }
   1499        if (a->rn == a->rm) {
   1500            if (a->pg == a->rn) {
   1501                do_mov_p(s, a->rd, a->rn);
   1502            } else {
   1503                gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
   1504            }
   1505            return true;
   1506        } else if (a->pg == a->rn || a->pg == a->rm) {
   1507            gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
   1508            return true;
   1509        }
   1510    }
   1511    return do_pppp_flags(s, a, &op);
   1512}
   1513
   1514static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
   1515{
   1516    tcg_gen_andc_i64(pd, pn, pm);
   1517    tcg_gen_and_i64(pd, pd, pg);
   1518}
   1519
   1520static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
   1521                           TCGv_vec pm, TCGv_vec pg)
   1522{
   1523    tcg_gen_andc_vec(vece, pd, pn, pm);
   1524    tcg_gen_and_vec(vece, pd, pd, pg);
   1525}
   1526
   1527static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
   1528{
   1529    static const GVecGen4 op = {
   1530        .fni8 = gen_bic_pg_i64,
   1531        .fniv = gen_bic_pg_vec,
   1532        .fno = gen_helper_sve_bic_pppp,
   1533        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   1534    };
   1535
   1536    if (!a->s && a->pg == a->rn) {
   1537        if (sve_access_check(s)) {
   1538            gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
   1539        }
   1540        return true;
   1541    }
   1542    return do_pppp_flags(s, a, &op);
   1543}
   1544
   1545static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
   1546{
   1547    tcg_gen_xor_i64(pd, pn, pm);
   1548    tcg_gen_and_i64(pd, pd, pg);
   1549}
   1550
   1551static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
   1552                           TCGv_vec pm, TCGv_vec pg)
   1553{
   1554    tcg_gen_xor_vec(vece, pd, pn, pm);
   1555    tcg_gen_and_vec(vece, pd, pd, pg);
   1556}
   1557
   1558static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
   1559{
   1560    static const GVecGen4 op = {
   1561        .fni8 = gen_eor_pg_i64,
   1562        .fniv = gen_eor_pg_vec,
   1563        .fno = gen_helper_sve_eor_pppp,
   1564        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   1565    };
   1566    return do_pppp_flags(s, a, &op);
   1567}
   1568
   1569static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
   1570{
   1571    if (a->s) {
   1572        return false;
   1573    }
   1574    if (sve_access_check(s)) {
   1575        unsigned psz = pred_gvec_reg_size(s);
   1576        tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
   1577                            pred_full_reg_offset(s, a->pg),
   1578                            pred_full_reg_offset(s, a->rn),
   1579                            pred_full_reg_offset(s, a->rm), psz, psz);
   1580    }
   1581    return true;
   1582}
   1583
   1584static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
   1585{
   1586    tcg_gen_or_i64(pd, pn, pm);
   1587    tcg_gen_and_i64(pd, pd, pg);
   1588}
   1589
   1590static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
   1591                           TCGv_vec pm, TCGv_vec pg)
   1592{
   1593    tcg_gen_or_vec(vece, pd, pn, pm);
   1594    tcg_gen_and_vec(vece, pd, pd, pg);
   1595}
   1596
   1597static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
   1598{
   1599    static const GVecGen4 op = {
   1600        .fni8 = gen_orr_pg_i64,
   1601        .fniv = gen_orr_pg_vec,
   1602        .fno = gen_helper_sve_orr_pppp,
   1603        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   1604    };
   1605
   1606    if (!a->s && a->pg == a->rn && a->rn == a->rm) {
   1607        return do_mov_p(s, a->rd, a->rn);
   1608    }
   1609    return do_pppp_flags(s, a, &op);
   1610}
   1611
   1612static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
   1613{
   1614    tcg_gen_orc_i64(pd, pn, pm);
   1615    tcg_gen_and_i64(pd, pd, pg);
   1616}
   1617
   1618static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
   1619                           TCGv_vec pm, TCGv_vec pg)
   1620{
   1621    tcg_gen_orc_vec(vece, pd, pn, pm);
   1622    tcg_gen_and_vec(vece, pd, pd, pg);
   1623}
   1624
   1625static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
   1626{
   1627    static const GVecGen4 op = {
   1628        .fni8 = gen_orn_pg_i64,
   1629        .fniv = gen_orn_pg_vec,
   1630        .fno = gen_helper_sve_orn_pppp,
   1631        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   1632    };
   1633    return do_pppp_flags(s, a, &op);
   1634}
   1635
   1636static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
   1637{
   1638    tcg_gen_or_i64(pd, pn, pm);
   1639    tcg_gen_andc_i64(pd, pg, pd);
   1640}
   1641
   1642static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
   1643                           TCGv_vec pm, TCGv_vec pg)
   1644{
   1645    tcg_gen_or_vec(vece, pd, pn, pm);
   1646    tcg_gen_andc_vec(vece, pd, pg, pd);
   1647}
   1648
   1649static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
   1650{
   1651    static const GVecGen4 op = {
   1652        .fni8 = gen_nor_pg_i64,
   1653        .fniv = gen_nor_pg_vec,
   1654        .fno = gen_helper_sve_nor_pppp,
   1655        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   1656    };
   1657    return do_pppp_flags(s, a, &op);
   1658}
   1659
   1660static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
   1661{
   1662    tcg_gen_and_i64(pd, pn, pm);
   1663    tcg_gen_andc_i64(pd, pg, pd);
   1664}
   1665
   1666static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
   1667                           TCGv_vec pm, TCGv_vec pg)
   1668{
   1669    tcg_gen_and_vec(vece, pd, pn, pm);
   1670    tcg_gen_andc_vec(vece, pd, pg, pd);
   1671}
   1672
   1673static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
   1674{
   1675    static const GVecGen4 op = {
   1676        .fni8 = gen_nand_pg_i64,
   1677        .fniv = gen_nand_pg_vec,
   1678        .fno = gen_helper_sve_nand_pppp,
   1679        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   1680    };
   1681    return do_pppp_flags(s, a, &op);
   1682}
   1683
   1684/*
   1685 *** SVE Predicate Misc Group
   1686 */
   1687
   1688static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
   1689{
   1690    if (sve_access_check(s)) {
   1691        int nofs = pred_full_reg_offset(s, a->rn);
   1692        int gofs = pred_full_reg_offset(s, a->pg);
   1693        int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
   1694
   1695        if (words == 1) {
   1696            TCGv_i64 pn = tcg_temp_new_i64();
   1697            TCGv_i64 pg = tcg_temp_new_i64();
   1698
   1699            tcg_gen_ld_i64(pn, cpu_env, nofs);
   1700            tcg_gen_ld_i64(pg, cpu_env, gofs);
   1701            do_predtest1(pn, pg);
   1702
   1703            tcg_temp_free_i64(pn);
   1704            tcg_temp_free_i64(pg);
   1705        } else {
   1706            do_predtest(s, nofs, gofs, words);
   1707        }
   1708    }
   1709    return true;
   1710}
   1711
   1712/* See the ARM pseudocode DecodePredCount.  */
   1713static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
   1714{
   1715    unsigned elements = fullsz >> esz;
   1716    unsigned bound;
   1717
   1718    switch (pattern) {
   1719    case 0x0: /* POW2 */
   1720        return pow2floor(elements);
   1721    case 0x1: /* VL1 */
   1722    case 0x2: /* VL2 */
   1723    case 0x3: /* VL3 */
   1724    case 0x4: /* VL4 */
   1725    case 0x5: /* VL5 */
   1726    case 0x6: /* VL6 */
   1727    case 0x7: /* VL7 */
   1728    case 0x8: /* VL8 */
   1729        bound = pattern;
   1730        break;
   1731    case 0x9: /* VL16 */
   1732    case 0xa: /* VL32 */
   1733    case 0xb: /* VL64 */
   1734    case 0xc: /* VL128 */
   1735    case 0xd: /* VL256 */
   1736        bound = 16 << (pattern - 9);
   1737        break;
   1738    case 0x1d: /* MUL4 */
   1739        return elements - elements % 4;
   1740    case 0x1e: /* MUL3 */
   1741        return elements - elements % 3;
   1742    case 0x1f: /* ALL */
   1743        return elements;
   1744    default:   /* #uimm5 */
   1745        return 0;
   1746    }
   1747    return elements >= bound ? bound : 0;
   1748}
   1749
   1750/* This handles all of the predicate initialization instructions,
   1751 * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
   1752 * so that decode_pred_count returns 0.  For SETFFR, we will have
   1753 * set RD == 16 == FFR.
   1754 */
   1755static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
   1756{
   1757    if (!sve_access_check(s)) {
   1758        return true;
   1759    }
   1760
   1761    unsigned fullsz = vec_full_reg_size(s);
   1762    unsigned ofs = pred_full_reg_offset(s, rd);
   1763    unsigned numelem, setsz, i;
   1764    uint64_t word, lastword;
   1765    TCGv_i64 t;
   1766
   1767    numelem = decode_pred_count(fullsz, pat, esz);
   1768
   1769    /* Determine what we must store into each bit, and how many.  */
   1770    if (numelem == 0) {
   1771        lastword = word = 0;
   1772        setsz = fullsz;
   1773    } else {
   1774        setsz = numelem << esz;
   1775        lastword = word = pred_esz_masks[esz];
   1776        if (setsz % 64) {
   1777            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
   1778        }
   1779    }
   1780
   1781    t = tcg_temp_new_i64();
   1782    if (fullsz <= 64) {
   1783        tcg_gen_movi_i64(t, lastword);
   1784        tcg_gen_st_i64(t, cpu_env, ofs);
   1785        goto done;
   1786    }
   1787
   1788    if (word == lastword) {
   1789        unsigned maxsz = size_for_gvec(fullsz / 8);
   1790        unsigned oprsz = size_for_gvec(setsz / 8);
   1791
   1792        if (oprsz * 8 == setsz) {
   1793            tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
   1794            goto done;
   1795        }
   1796    }
   1797
   1798    setsz /= 8;
   1799    fullsz /= 8;
   1800
   1801    tcg_gen_movi_i64(t, word);
   1802    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
   1803        tcg_gen_st_i64(t, cpu_env, ofs + i);
   1804    }
   1805    if (lastword != word) {
   1806        tcg_gen_movi_i64(t, lastword);
   1807        tcg_gen_st_i64(t, cpu_env, ofs + i);
   1808        i += 8;
   1809    }
   1810    if (i < fullsz) {
   1811        tcg_gen_movi_i64(t, 0);
   1812        for (; i < fullsz; i += 8) {
   1813            tcg_gen_st_i64(t, cpu_env, ofs + i);
   1814        }
   1815    }
   1816
   1817 done:
   1818    tcg_temp_free_i64(t);
   1819
   1820    /* PTRUES */
   1821    if (setflag) {
   1822        tcg_gen_movi_i32(cpu_NF, -(word != 0));
   1823        tcg_gen_movi_i32(cpu_CF, word == 0);
   1824        tcg_gen_movi_i32(cpu_VF, 0);
   1825        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
   1826    }
   1827    return true;
   1828}
   1829
   1830static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
   1831{
   1832    return do_predset(s, a->esz, a->rd, a->pat, a->s);
   1833}
   1834
   1835static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
   1836{
   1837    /* Note pat == 31 is #all, to set all elements.  */
   1838    return do_predset(s, 0, FFR_PRED_NUM, 31, false);
   1839}
   1840
   1841static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
   1842{
   1843    /* Note pat == 32 is #unimp, to set no elements.  */
   1844    return do_predset(s, 0, a->rd, 32, false);
   1845}
   1846
   1847static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
   1848{
   1849    /* The path through do_pppp_flags is complicated enough to want to avoid
   1850     * duplication.  Frob the arguments into the form of a predicated AND.
   1851     */
   1852    arg_rprr_s alt_a = {
   1853        .rd = a->rd, .pg = a->pg, .s = a->s,
   1854        .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
   1855    };
   1856    return trans_AND_pppp(s, &alt_a);
   1857}
   1858
   1859static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
   1860{
   1861    return do_mov_p(s, a->rd, FFR_PRED_NUM);
   1862}
   1863
   1864static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
   1865{
   1866    return do_mov_p(s, FFR_PRED_NUM, a->rn);
   1867}
   1868
   1869static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
   1870                            void (*gen_fn)(TCGv_i32, TCGv_ptr,
   1871                                           TCGv_ptr, TCGv_i32))
   1872{
   1873    if (!sve_access_check(s)) {
   1874        return true;
   1875    }
   1876
   1877    TCGv_ptr t_pd = tcg_temp_new_ptr();
   1878    TCGv_ptr t_pg = tcg_temp_new_ptr();
   1879    TCGv_i32 t;
   1880    unsigned desc = 0;
   1881
   1882    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
   1883    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
   1884
   1885    tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
   1886    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
   1887    t = tcg_const_i32(desc);
   1888
   1889    gen_fn(t, t_pd, t_pg, t);
   1890    tcg_temp_free_ptr(t_pd);
   1891    tcg_temp_free_ptr(t_pg);
   1892
   1893    do_pred_flags(t);
   1894    tcg_temp_free_i32(t);
   1895    return true;
   1896}
   1897
   1898static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
   1899{
   1900    return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
   1901}
   1902
   1903static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
   1904{
   1905    return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
   1906}
   1907
   1908/*
   1909 *** SVE Element Count Group
   1910 */
   1911
   1912/* Perform an inline saturating addition of a 32-bit value within
   1913 * a 64-bit register.  The second operand is known to be positive,
   1914 * which halves the comparisions we must perform to bound the result.
   1915 */
   1916static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
   1917{
   1918    int64_t ibound;
   1919    TCGv_i64 bound;
   1920    TCGCond cond;
   1921
   1922    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
   1923    if (u) {
   1924        tcg_gen_ext32u_i64(reg, reg);
   1925    } else {
   1926        tcg_gen_ext32s_i64(reg, reg);
   1927    }
   1928    if (d) {
   1929        tcg_gen_sub_i64(reg, reg, val);
   1930        ibound = (u ? 0 : INT32_MIN);
   1931        cond = TCG_COND_LT;
   1932    } else {
   1933        tcg_gen_add_i64(reg, reg, val);
   1934        ibound = (u ? UINT32_MAX : INT32_MAX);
   1935        cond = TCG_COND_GT;
   1936    }
   1937    bound = tcg_const_i64(ibound);
   1938    tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
   1939    tcg_temp_free_i64(bound);
   1940}
   1941
   1942/* Similarly with 64-bit values.  */
   1943static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
   1944{
   1945    TCGv_i64 t0 = tcg_temp_new_i64();
   1946    TCGv_i64 t1 = tcg_temp_new_i64();
   1947    TCGv_i64 t2;
   1948
   1949    if (u) {
   1950        if (d) {
   1951            tcg_gen_sub_i64(t0, reg, val);
   1952            tcg_gen_movi_i64(t1, 0);
   1953            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
   1954        } else {
   1955            tcg_gen_add_i64(t0, reg, val);
   1956            tcg_gen_movi_i64(t1, -1);
   1957            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
   1958        }
   1959    } else {
   1960        if (d) {
   1961            /* Detect signed overflow for subtraction.  */
   1962            tcg_gen_xor_i64(t0, reg, val);
   1963            tcg_gen_sub_i64(t1, reg, val);
   1964            tcg_gen_xor_i64(reg, reg, t1);
   1965            tcg_gen_and_i64(t0, t0, reg);
   1966
   1967            /* Bound the result.  */
   1968            tcg_gen_movi_i64(reg, INT64_MIN);
   1969            t2 = tcg_const_i64(0);
   1970            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
   1971        } else {
   1972            /* Detect signed overflow for addition.  */
   1973            tcg_gen_xor_i64(t0, reg, val);
   1974            tcg_gen_add_i64(reg, reg, val);
   1975            tcg_gen_xor_i64(t1, reg, val);
   1976            tcg_gen_andc_i64(t0, t1, t0);
   1977
   1978            /* Bound the result.  */
   1979            tcg_gen_movi_i64(t1, INT64_MAX);
   1980            t2 = tcg_const_i64(0);
   1981            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
   1982        }
   1983        tcg_temp_free_i64(t2);
   1984    }
   1985    tcg_temp_free_i64(t0);
   1986    tcg_temp_free_i64(t1);
   1987}
   1988
   1989/* Similarly with a vector and a scalar operand.  */
   1990static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
   1991                              TCGv_i64 val, bool u, bool d)
   1992{
   1993    unsigned vsz = vec_full_reg_size(s);
   1994    TCGv_ptr dptr, nptr;
   1995    TCGv_i32 t32, desc;
   1996    TCGv_i64 t64;
   1997
   1998    dptr = tcg_temp_new_ptr();
   1999    nptr = tcg_temp_new_ptr();
   2000    tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
   2001    tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
   2002    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
   2003
   2004    switch (esz) {
   2005    case MO_8:
   2006        t32 = tcg_temp_new_i32();
   2007        tcg_gen_extrl_i64_i32(t32, val);
   2008        if (d) {
   2009            tcg_gen_neg_i32(t32, t32);
   2010        }
   2011        if (u) {
   2012            gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
   2013        } else {
   2014            gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
   2015        }
   2016        tcg_temp_free_i32(t32);
   2017        break;
   2018
   2019    case MO_16:
   2020        t32 = tcg_temp_new_i32();
   2021        tcg_gen_extrl_i64_i32(t32, val);
   2022        if (d) {
   2023            tcg_gen_neg_i32(t32, t32);
   2024        }
   2025        if (u) {
   2026            gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
   2027        } else {
   2028            gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
   2029        }
   2030        tcg_temp_free_i32(t32);
   2031        break;
   2032
   2033    case MO_32:
   2034        t64 = tcg_temp_new_i64();
   2035        if (d) {
   2036            tcg_gen_neg_i64(t64, val);
   2037        } else {
   2038            tcg_gen_mov_i64(t64, val);
   2039        }
   2040        if (u) {
   2041            gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
   2042        } else {
   2043            gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
   2044        }
   2045        tcg_temp_free_i64(t64);
   2046        break;
   2047
   2048    case MO_64:
   2049        if (u) {
   2050            if (d) {
   2051                gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
   2052            } else {
   2053                gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
   2054            }
   2055        } else if (d) {
   2056            t64 = tcg_temp_new_i64();
   2057            tcg_gen_neg_i64(t64, val);
   2058            gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
   2059            tcg_temp_free_i64(t64);
   2060        } else {
   2061            gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
   2062        }
   2063        break;
   2064
   2065    default:
   2066        g_assert_not_reached();
   2067    }
   2068
   2069    tcg_temp_free_ptr(dptr);
   2070    tcg_temp_free_ptr(nptr);
   2071    tcg_temp_free_i32(desc);
   2072}
   2073
   2074static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
   2075{
   2076    if (sve_access_check(s)) {
   2077        unsigned fullsz = vec_full_reg_size(s);
   2078        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
   2079        tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
   2080    }
   2081    return true;
   2082}
   2083
   2084static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
   2085{
   2086    if (sve_access_check(s)) {
   2087        unsigned fullsz = vec_full_reg_size(s);
   2088        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
   2089        int inc = numelem * a->imm * (a->d ? -1 : 1);
   2090        TCGv_i64 reg = cpu_reg(s, a->rd);
   2091
   2092        tcg_gen_addi_i64(reg, reg, inc);
   2093    }
   2094    return true;
   2095}
   2096
   2097static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
   2098{
   2099    if (!sve_access_check(s)) {
   2100        return true;
   2101    }
   2102
   2103    unsigned fullsz = vec_full_reg_size(s);
   2104    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
   2105    int inc = numelem * a->imm;
   2106    TCGv_i64 reg = cpu_reg(s, a->rd);
   2107
   2108    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
   2109    if (inc == 0) {
   2110        if (a->u) {
   2111            tcg_gen_ext32u_i64(reg, reg);
   2112        } else {
   2113            tcg_gen_ext32s_i64(reg, reg);
   2114        }
   2115    } else {
   2116        TCGv_i64 t = tcg_const_i64(inc);
   2117        do_sat_addsub_32(reg, t, a->u, a->d);
   2118        tcg_temp_free_i64(t);
   2119    }
   2120    return true;
   2121}
   2122
   2123static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
   2124{
   2125    if (!sve_access_check(s)) {
   2126        return true;
   2127    }
   2128
   2129    unsigned fullsz = vec_full_reg_size(s);
   2130    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
   2131    int inc = numelem * a->imm;
   2132    TCGv_i64 reg = cpu_reg(s, a->rd);
   2133
   2134    if (inc != 0) {
   2135        TCGv_i64 t = tcg_const_i64(inc);
   2136        do_sat_addsub_64(reg, t, a->u, a->d);
   2137        tcg_temp_free_i64(t);
   2138    }
   2139    return true;
   2140}
   2141
   2142static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
   2143{
   2144    if (a->esz == 0) {
   2145        return false;
   2146    }
   2147
   2148    unsigned fullsz = vec_full_reg_size(s);
   2149    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
   2150    int inc = numelem * a->imm;
   2151
   2152    if (inc != 0) {
   2153        if (sve_access_check(s)) {
   2154            TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
   2155            tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
   2156                              vec_full_reg_offset(s, a->rn),
   2157                              t, fullsz, fullsz);
   2158            tcg_temp_free_i64(t);
   2159        }
   2160    } else {
   2161        do_mov_z(s, a->rd, a->rn);
   2162    }
   2163    return true;
   2164}
   2165
   2166static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
   2167{
   2168    if (a->esz == 0) {
   2169        return false;
   2170    }
   2171
   2172    unsigned fullsz = vec_full_reg_size(s);
   2173    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
   2174    int inc = numelem * a->imm;
   2175
   2176    if (inc != 0) {
   2177        if (sve_access_check(s)) {
   2178            TCGv_i64 t = tcg_const_i64(inc);
   2179            do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
   2180            tcg_temp_free_i64(t);
   2181        }
   2182    } else {
   2183        do_mov_z(s, a->rd, a->rn);
   2184    }
   2185    return true;
   2186}
   2187
   2188/*
   2189 *** SVE Bitwise Immediate Group
   2190 */
   2191
   2192static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
   2193{
   2194    uint64_t imm;
   2195    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
   2196                                extract32(a->dbm, 0, 6),
   2197                                extract32(a->dbm, 6, 6))) {
   2198        return false;
   2199    }
   2200    if (sve_access_check(s)) {
   2201        unsigned vsz = vec_full_reg_size(s);
   2202        gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
   2203                vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
   2204    }
   2205    return true;
   2206}
   2207
   2208static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
   2209{
   2210    return do_zz_dbm(s, a, tcg_gen_gvec_andi);
   2211}
   2212
   2213static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
   2214{
   2215    return do_zz_dbm(s, a, tcg_gen_gvec_ori);
   2216}
   2217
   2218static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
   2219{
   2220    return do_zz_dbm(s, a, tcg_gen_gvec_xori);
   2221}
   2222
   2223static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
   2224{
   2225    uint64_t imm;
   2226    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
   2227                                extract32(a->dbm, 0, 6),
   2228                                extract32(a->dbm, 6, 6))) {
   2229        return false;
   2230    }
   2231    if (sve_access_check(s)) {
   2232        do_dupi_z(s, a->rd, imm);
   2233    }
   2234    return true;
   2235}
   2236
   2237/*
   2238 *** SVE Integer Wide Immediate - Predicated Group
   2239 */
   2240
   2241/* Implement all merging copies.  This is used for CPY (immediate),
   2242 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
   2243 */
   2244static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
   2245                     TCGv_i64 val)
   2246{
   2247    typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
   2248    static gen_cpy * const fns[4] = {
   2249        gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
   2250        gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
   2251    };
   2252    unsigned vsz = vec_full_reg_size(s);
   2253    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
   2254    TCGv_ptr t_zd = tcg_temp_new_ptr();
   2255    TCGv_ptr t_zn = tcg_temp_new_ptr();
   2256    TCGv_ptr t_pg = tcg_temp_new_ptr();
   2257
   2258    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
   2259    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
   2260    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
   2261
   2262    fns[esz](t_zd, t_zn, t_pg, val, desc);
   2263
   2264    tcg_temp_free_ptr(t_zd);
   2265    tcg_temp_free_ptr(t_zn);
   2266    tcg_temp_free_ptr(t_pg);
   2267    tcg_temp_free_i32(desc);
   2268}
   2269
   2270static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
   2271{
   2272    if (a->esz == 0) {
   2273        return false;
   2274    }
   2275    if (sve_access_check(s)) {
   2276        /* Decode the VFP immediate.  */
   2277        uint64_t imm = vfp_expand_imm(a->esz, a->imm);
   2278        TCGv_i64 t_imm = tcg_const_i64(imm);
   2279        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
   2280        tcg_temp_free_i64(t_imm);
   2281    }
   2282    return true;
   2283}
   2284
   2285static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
   2286{
   2287    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
   2288        return false;
   2289    }
   2290    if (sve_access_check(s)) {
   2291        TCGv_i64 t_imm = tcg_const_i64(a->imm);
   2292        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
   2293        tcg_temp_free_i64(t_imm);
   2294    }
   2295    return true;
   2296}
   2297
   2298static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
   2299{
   2300    static gen_helper_gvec_2i * const fns[4] = {
   2301        gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
   2302        gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
   2303    };
   2304
   2305    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
   2306        return false;
   2307    }
   2308    if (sve_access_check(s)) {
   2309        unsigned vsz = vec_full_reg_size(s);
   2310        TCGv_i64 t_imm = tcg_const_i64(a->imm);
   2311        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
   2312                            pred_full_reg_offset(s, a->pg),
   2313                            t_imm, vsz, vsz, 0, fns[a->esz]);
   2314        tcg_temp_free_i64(t_imm);
   2315    }
   2316    return true;
   2317}
   2318
   2319/*
   2320 *** SVE Permute Extract Group
   2321 */
   2322
   2323static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
   2324{
   2325    if (!sve_access_check(s)) {
   2326        return true;
   2327    }
   2328
   2329    unsigned vsz = vec_full_reg_size(s);
   2330    unsigned n_ofs = imm >= vsz ? 0 : imm;
   2331    unsigned n_siz = vsz - n_ofs;
   2332    unsigned d = vec_full_reg_offset(s, rd);
   2333    unsigned n = vec_full_reg_offset(s, rn);
   2334    unsigned m = vec_full_reg_offset(s, rm);
   2335
   2336    /* Use host vector move insns if we have appropriate sizes
   2337     * and no unfortunate overlap.
   2338     */
   2339    if (m != d
   2340        && n_ofs == size_for_gvec(n_ofs)
   2341        && n_siz == size_for_gvec(n_siz)
   2342        && (d != n || n_siz <= n_ofs)) {
   2343        tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
   2344        if (n_ofs != 0) {
   2345            tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
   2346        }
   2347    } else {
   2348        tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
   2349    }
   2350    return true;
   2351}
   2352
   2353static bool trans_EXT(DisasContext *s, arg_EXT *a)
   2354{
   2355    return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
   2356}
   2357
   2358static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
   2359{
   2360    if (!dc_isar_feature(aa64_sve2, s)) {
   2361        return false;
   2362    }
   2363    return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
   2364}
   2365
   2366/*
   2367 *** SVE Permute - Unpredicated Group
   2368 */
   2369
   2370static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
   2371{
   2372    if (sve_access_check(s)) {
   2373        unsigned vsz = vec_full_reg_size(s);
   2374        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
   2375                             vsz, vsz, cpu_reg_sp(s, a->rn));
   2376    }
   2377    return true;
   2378}
   2379
   2380static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
   2381{
   2382    if ((a->imm & 0x1f) == 0) {
   2383        return false;
   2384    }
   2385    if (sve_access_check(s)) {
   2386        unsigned vsz = vec_full_reg_size(s);
   2387        unsigned dofs = vec_full_reg_offset(s, a->rd);
   2388        unsigned esz, index;
   2389
   2390        esz = ctz32(a->imm);
   2391        index = a->imm >> (esz + 1);
   2392
   2393        if ((index << esz) < vsz) {
   2394            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
   2395            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
   2396        } else {
   2397            /*
   2398             * While dup_mem handles 128-bit elements, dup_imm does not.
   2399             * Thankfully element size doesn't matter for splatting zero.
   2400             */
   2401            tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
   2402        }
   2403    }
   2404    return true;
   2405}
   2406
   2407static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
   2408{
   2409    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
   2410    static gen_insr * const fns[4] = {
   2411        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
   2412        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
   2413    };
   2414    unsigned vsz = vec_full_reg_size(s);
   2415    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
   2416    TCGv_ptr t_zd = tcg_temp_new_ptr();
   2417    TCGv_ptr t_zn = tcg_temp_new_ptr();
   2418
   2419    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
   2420    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
   2421
   2422    fns[a->esz](t_zd, t_zn, val, desc);
   2423
   2424    tcg_temp_free_ptr(t_zd);
   2425    tcg_temp_free_ptr(t_zn);
   2426    tcg_temp_free_i32(desc);
   2427}
   2428
   2429static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
   2430{
   2431    if (sve_access_check(s)) {
   2432        TCGv_i64 t = tcg_temp_new_i64();
   2433        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
   2434        do_insr_i64(s, a, t);
   2435        tcg_temp_free_i64(t);
   2436    }
   2437    return true;
   2438}
   2439
   2440static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
   2441{
   2442    if (sve_access_check(s)) {
   2443        do_insr_i64(s, a, cpu_reg(s, a->rm));
   2444    }
   2445    return true;
   2446}
   2447
   2448static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
   2449{
   2450    static gen_helper_gvec_2 * const fns[4] = {
   2451        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
   2452        gen_helper_sve_rev_s, gen_helper_sve_rev_d
   2453    };
   2454
   2455    if (sve_access_check(s)) {
   2456        gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
   2457    }
   2458    return true;
   2459}
   2460
   2461static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
   2462{
   2463    static gen_helper_gvec_3 * const fns[4] = {
   2464        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
   2465        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
   2466    };
   2467
   2468    if (sve_access_check(s)) {
   2469        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
   2470    }
   2471    return true;
   2472}
   2473
   2474static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
   2475{
   2476    static gen_helper_gvec_4 * const fns[4] = {
   2477        gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
   2478        gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
   2479    };
   2480
   2481    if (!dc_isar_feature(aa64_sve2, s)) {
   2482        return false;
   2483    }
   2484    if (sve_access_check(s)) {
   2485        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
   2486                          (a->rn + 1) % 32, a->rm, 0);
   2487    }
   2488    return true;
   2489}
   2490
   2491static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
   2492{
   2493    static gen_helper_gvec_3 * const fns[4] = {
   2494        gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
   2495        gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
   2496    };
   2497
   2498    if (!dc_isar_feature(aa64_sve2, s)) {
   2499        return false;
   2500    }
   2501    if (sve_access_check(s)) {
   2502        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
   2503    }
   2504    return true;
   2505}
   2506
   2507static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
   2508{
   2509    static gen_helper_gvec_2 * const fns[4][2] = {
   2510        { NULL, NULL },
   2511        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
   2512        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
   2513        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
   2514    };
   2515
   2516    if (a->esz == 0) {
   2517        return false;
   2518    }
   2519    if (sve_access_check(s)) {
   2520        unsigned vsz = vec_full_reg_size(s);
   2521        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
   2522                           vec_full_reg_offset(s, a->rn)
   2523                           + (a->h ? vsz / 2 : 0),
   2524                           vsz, vsz, 0, fns[a->esz][a->u]);
   2525    }
   2526    return true;
   2527}
   2528
   2529/*
   2530 *** SVE Permute - Predicates Group
   2531 */
   2532
   2533static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
   2534                          gen_helper_gvec_3 *fn)
   2535{
   2536    if (!sve_access_check(s)) {
   2537        return true;
   2538    }
   2539
   2540    unsigned vsz = pred_full_reg_size(s);
   2541
   2542    TCGv_ptr t_d = tcg_temp_new_ptr();
   2543    TCGv_ptr t_n = tcg_temp_new_ptr();
   2544    TCGv_ptr t_m = tcg_temp_new_ptr();
   2545    TCGv_i32 t_desc;
   2546    uint32_t desc = 0;
   2547
   2548    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
   2549    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
   2550    desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
   2551
   2552    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
   2553    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
   2554    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
   2555    t_desc = tcg_const_i32(desc);
   2556
   2557    fn(t_d, t_n, t_m, t_desc);
   2558
   2559    tcg_temp_free_ptr(t_d);
   2560    tcg_temp_free_ptr(t_n);
   2561    tcg_temp_free_ptr(t_m);
   2562    tcg_temp_free_i32(t_desc);
   2563    return true;
   2564}
   2565
   2566static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
   2567                          gen_helper_gvec_2 *fn)
   2568{
   2569    if (!sve_access_check(s)) {
   2570        return true;
   2571    }
   2572
   2573    unsigned vsz = pred_full_reg_size(s);
   2574    TCGv_ptr t_d = tcg_temp_new_ptr();
   2575    TCGv_ptr t_n = tcg_temp_new_ptr();
   2576    TCGv_i32 t_desc;
   2577    uint32_t desc = 0;
   2578
   2579    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
   2580    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
   2581
   2582    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
   2583    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
   2584    desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
   2585    t_desc = tcg_const_i32(desc);
   2586
   2587    fn(t_d, t_n, t_desc);
   2588
   2589    tcg_temp_free_i32(t_desc);
   2590    tcg_temp_free_ptr(t_d);
   2591    tcg_temp_free_ptr(t_n);
   2592    return true;
   2593}
   2594
   2595static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
   2596{
   2597    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
   2598}
   2599
   2600static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
   2601{
   2602    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
   2603}
   2604
   2605static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
   2606{
   2607    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
   2608}
   2609
   2610static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
   2611{
   2612    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
   2613}
   2614
   2615static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
   2616{
   2617    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
   2618}
   2619
   2620static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
   2621{
   2622    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
   2623}
   2624
   2625static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
   2626{
   2627    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
   2628}
   2629
   2630static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
   2631{
   2632    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
   2633}
   2634
   2635static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
   2636{
   2637    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
   2638}
   2639
   2640/*
   2641 *** SVE Permute - Interleaving Group
   2642 */
   2643
   2644static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
   2645{
   2646    static gen_helper_gvec_3 * const fns[4] = {
   2647        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
   2648        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
   2649    };
   2650
   2651    if (sve_access_check(s)) {
   2652        unsigned vsz = vec_full_reg_size(s);
   2653        unsigned high_ofs = high ? vsz / 2 : 0;
   2654        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
   2655                           vec_full_reg_offset(s, a->rn) + high_ofs,
   2656                           vec_full_reg_offset(s, a->rm) + high_ofs,
   2657                           vsz, vsz, 0, fns[a->esz]);
   2658    }
   2659    return true;
   2660}
   2661
   2662static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
   2663                            gen_helper_gvec_3 *fn)
   2664{
   2665    if (sve_access_check(s)) {
   2666        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
   2667    }
   2668    return true;
   2669}
   2670
   2671static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
   2672{
   2673    return do_zip(s, a, false);
   2674}
   2675
   2676static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
   2677{
   2678    return do_zip(s, a, true);
   2679}
   2680
   2681static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
   2682{
   2683    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   2684        return false;
   2685    }
   2686    if (sve_access_check(s)) {
   2687        unsigned vsz = vec_full_reg_size(s);
   2688        unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
   2689        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
   2690                           vec_full_reg_offset(s, a->rn) + high_ofs,
   2691                           vec_full_reg_offset(s, a->rm) + high_ofs,
   2692                           vsz, vsz, 0, gen_helper_sve2_zip_q);
   2693    }
   2694    return true;
   2695}
   2696
   2697static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
   2698{
   2699    return do_zip_q(s, a, false);
   2700}
   2701
   2702static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
   2703{
   2704    return do_zip_q(s, a, true);
   2705}
   2706
   2707static gen_helper_gvec_3 * const uzp_fns[4] = {
   2708    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
   2709    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
   2710};
   2711
   2712static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
   2713{
   2714    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
   2715}
   2716
   2717static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
   2718{
   2719    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
   2720}
   2721
   2722static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
   2723{
   2724    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   2725        return false;
   2726    }
   2727    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
   2728}
   2729
   2730static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
   2731{
   2732    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   2733        return false;
   2734    }
   2735    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
   2736}
   2737
   2738static gen_helper_gvec_3 * const trn_fns[4] = {
   2739    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
   2740    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
   2741};
   2742
   2743static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
   2744{
   2745    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
   2746}
   2747
   2748static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
   2749{
   2750    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
   2751}
   2752
   2753static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
   2754{
   2755    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   2756        return false;
   2757    }
   2758    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
   2759}
   2760
   2761static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
   2762{
   2763    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   2764        return false;
   2765    }
   2766    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
   2767}
   2768
   2769/*
   2770 *** SVE Permute Vector - Predicated Group
   2771 */
   2772
   2773static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
   2774{
   2775    static gen_helper_gvec_3 * const fns[4] = {
   2776        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
   2777    };
   2778    return do_zpz_ool(s, a, fns[a->esz]);
   2779}
   2780
   2781/* Call the helper that computes the ARM LastActiveElement pseudocode
   2782 * function, scaled by the element size.  This includes the not found
   2783 * indication; e.g. not found for esz=3 is -8.
   2784 */
   2785static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
   2786{
   2787    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
   2788     * round up, as we do elsewhere, because we need the exact size.
   2789     */
   2790    TCGv_ptr t_p = tcg_temp_new_ptr();
   2791    TCGv_i32 t_desc;
   2792    unsigned desc = 0;
   2793
   2794    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
   2795    desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
   2796
   2797    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
   2798    t_desc = tcg_const_i32(desc);
   2799
   2800    gen_helper_sve_last_active_element(ret, t_p, t_desc);
   2801
   2802    tcg_temp_free_i32(t_desc);
   2803    tcg_temp_free_ptr(t_p);
   2804}
   2805
   2806/* Increment LAST to the offset of the next element in the vector,
   2807 * wrapping around to 0.
   2808 */
   2809static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
   2810{
   2811    unsigned vsz = vec_full_reg_size(s);
   2812
   2813    tcg_gen_addi_i32(last, last, 1 << esz);
   2814    if (is_power_of_2(vsz)) {
   2815        tcg_gen_andi_i32(last, last, vsz - 1);
   2816    } else {
   2817        TCGv_i32 max = tcg_const_i32(vsz);
   2818        TCGv_i32 zero = tcg_const_i32(0);
   2819        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
   2820        tcg_temp_free_i32(max);
   2821        tcg_temp_free_i32(zero);
   2822    }
   2823}
   2824
   2825/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
   2826static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
   2827{
   2828    unsigned vsz = vec_full_reg_size(s);
   2829
   2830    if (is_power_of_2(vsz)) {
   2831        tcg_gen_andi_i32(last, last, vsz - 1);
   2832    } else {
   2833        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
   2834        TCGv_i32 zero = tcg_const_i32(0);
   2835        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
   2836        tcg_temp_free_i32(max);
   2837        tcg_temp_free_i32(zero);
   2838    }
   2839}
   2840
   2841/* Load an unsigned element of ESZ from BASE+OFS.  */
   2842static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
   2843{
   2844    TCGv_i64 r = tcg_temp_new_i64();
   2845
   2846    switch (esz) {
   2847    case 0:
   2848        tcg_gen_ld8u_i64(r, base, ofs);
   2849        break;
   2850    case 1:
   2851        tcg_gen_ld16u_i64(r, base, ofs);
   2852        break;
   2853    case 2:
   2854        tcg_gen_ld32u_i64(r, base, ofs);
   2855        break;
   2856    case 3:
   2857        tcg_gen_ld_i64(r, base, ofs);
   2858        break;
   2859    default:
   2860        g_assert_not_reached();
   2861    }
   2862    return r;
   2863}
   2864
   2865/* Load an unsigned element of ESZ from RM[LAST].  */
   2866static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
   2867                                 int rm, int esz)
   2868{
   2869    TCGv_ptr p = tcg_temp_new_ptr();
   2870    TCGv_i64 r;
   2871
   2872    /* Convert offset into vector into offset into ENV.
   2873     * The final adjustment for the vector register base
   2874     * is added via constant offset to the load.
   2875     */
   2876#ifdef HOST_WORDS_BIGENDIAN
   2877    /* Adjust for element ordering.  See vec_reg_offset.  */
   2878    if (esz < 3) {
   2879        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
   2880    }
   2881#endif
   2882    tcg_gen_ext_i32_ptr(p, last);
   2883    tcg_gen_add_ptr(p, p, cpu_env);
   2884
   2885    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
   2886    tcg_temp_free_ptr(p);
   2887
   2888    return r;
   2889}
   2890
   2891/* Compute CLAST for a Zreg.  */
   2892static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
   2893{
   2894    TCGv_i32 last;
   2895    TCGLabel *over;
   2896    TCGv_i64 ele;
   2897    unsigned vsz, esz = a->esz;
   2898
   2899    if (!sve_access_check(s)) {
   2900        return true;
   2901    }
   2902
   2903    last = tcg_temp_local_new_i32();
   2904    over = gen_new_label();
   2905
   2906    find_last_active(s, last, esz, a->pg);
   2907
   2908    /* There is of course no movcond for a 2048-bit vector,
   2909     * so we must branch over the actual store.
   2910     */
   2911    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
   2912
   2913    if (!before) {
   2914        incr_last_active(s, last, esz);
   2915    }
   2916
   2917    ele = load_last_active(s, last, a->rm, esz);
   2918    tcg_temp_free_i32(last);
   2919
   2920    vsz = vec_full_reg_size(s);
   2921    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
   2922    tcg_temp_free_i64(ele);
   2923
   2924    /* If this insn used MOVPRFX, we may need a second move.  */
   2925    if (a->rd != a->rn) {
   2926        TCGLabel *done = gen_new_label();
   2927        tcg_gen_br(done);
   2928
   2929        gen_set_label(over);
   2930        do_mov_z(s, a->rd, a->rn);
   2931
   2932        gen_set_label(done);
   2933    } else {
   2934        gen_set_label(over);
   2935    }
   2936    return true;
   2937}
   2938
   2939static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
   2940{
   2941    return do_clast_vector(s, a, false);
   2942}
   2943
   2944static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
   2945{
   2946    return do_clast_vector(s, a, true);
   2947}
   2948
   2949/* Compute CLAST for a scalar.  */
   2950static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
   2951                            bool before, TCGv_i64 reg_val)
   2952{
   2953    TCGv_i32 last = tcg_temp_new_i32();
   2954    TCGv_i64 ele, cmp, zero;
   2955
   2956    find_last_active(s, last, esz, pg);
   2957
   2958    /* Extend the original value of last prior to incrementing.  */
   2959    cmp = tcg_temp_new_i64();
   2960    tcg_gen_ext_i32_i64(cmp, last);
   2961
   2962    if (!before) {
   2963        incr_last_active(s, last, esz);
   2964    }
   2965
   2966    /* The conceit here is that while last < 0 indicates not found, after
   2967     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
   2968     * from which we can load garbage.  We then discard the garbage with
   2969     * a conditional move.
   2970     */
   2971    ele = load_last_active(s, last, rm, esz);
   2972    tcg_temp_free_i32(last);
   2973
   2974    zero = tcg_const_i64(0);
   2975    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
   2976
   2977    tcg_temp_free_i64(zero);
   2978    tcg_temp_free_i64(cmp);
   2979    tcg_temp_free_i64(ele);
   2980}
   2981
   2982/* Compute CLAST for a Vreg.  */
   2983static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
   2984{
   2985    if (sve_access_check(s)) {
   2986        int esz = a->esz;
   2987        int ofs = vec_reg_offset(s, a->rd, 0, esz);
   2988        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
   2989
   2990        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
   2991        write_fp_dreg(s, a->rd, reg);
   2992        tcg_temp_free_i64(reg);
   2993    }
   2994    return true;
   2995}
   2996
   2997static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
   2998{
   2999    return do_clast_fp(s, a, false);
   3000}
   3001
   3002static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
   3003{
   3004    return do_clast_fp(s, a, true);
   3005}
   3006
   3007/* Compute CLAST for a Xreg.  */
   3008static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
   3009{
   3010    TCGv_i64 reg;
   3011
   3012    if (!sve_access_check(s)) {
   3013        return true;
   3014    }
   3015
   3016    reg = cpu_reg(s, a->rd);
   3017    switch (a->esz) {
   3018    case 0:
   3019        tcg_gen_ext8u_i64(reg, reg);
   3020        break;
   3021    case 1:
   3022        tcg_gen_ext16u_i64(reg, reg);
   3023        break;
   3024    case 2:
   3025        tcg_gen_ext32u_i64(reg, reg);
   3026        break;
   3027    case 3:
   3028        break;
   3029    default:
   3030        g_assert_not_reached();
   3031    }
   3032
   3033    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
   3034    return true;
   3035}
   3036
   3037static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
   3038{
   3039    return do_clast_general(s, a, false);
   3040}
   3041
   3042static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
   3043{
   3044    return do_clast_general(s, a, true);
   3045}
   3046
   3047/* Compute LAST for a scalar.  */
   3048static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
   3049                               int pg, int rm, bool before)
   3050{
   3051    TCGv_i32 last = tcg_temp_new_i32();
   3052    TCGv_i64 ret;
   3053
   3054    find_last_active(s, last, esz, pg);
   3055    if (before) {
   3056        wrap_last_active(s, last, esz);
   3057    } else {
   3058        incr_last_active(s, last, esz);
   3059    }
   3060
   3061    ret = load_last_active(s, last, rm, esz);
   3062    tcg_temp_free_i32(last);
   3063    return ret;
   3064}
   3065
   3066/* Compute LAST for a Vreg.  */
   3067static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
   3068{
   3069    if (sve_access_check(s)) {
   3070        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
   3071        write_fp_dreg(s, a->rd, val);
   3072        tcg_temp_free_i64(val);
   3073    }
   3074    return true;
   3075}
   3076
   3077static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
   3078{
   3079    return do_last_fp(s, a, false);
   3080}
   3081
   3082static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
   3083{
   3084    return do_last_fp(s, a, true);
   3085}
   3086
   3087/* Compute LAST for a Xreg.  */
   3088static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
   3089{
   3090    if (sve_access_check(s)) {
   3091        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
   3092        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
   3093        tcg_temp_free_i64(val);
   3094    }
   3095    return true;
   3096}
   3097
   3098static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
   3099{
   3100    return do_last_general(s, a, false);
   3101}
   3102
   3103static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
   3104{
   3105    return do_last_general(s, a, true);
   3106}
   3107
   3108static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
   3109{
   3110    if (sve_access_check(s)) {
   3111        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
   3112    }
   3113    return true;
   3114}
   3115
   3116static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
   3117{
   3118    if (sve_access_check(s)) {
   3119        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
   3120        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
   3121        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
   3122        tcg_temp_free_i64(t);
   3123    }
   3124    return true;
   3125}
   3126
   3127static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
   3128{
   3129    static gen_helper_gvec_3 * const fns[4] = {
   3130        NULL,
   3131        gen_helper_sve_revb_h,
   3132        gen_helper_sve_revb_s,
   3133        gen_helper_sve_revb_d,
   3134    };
   3135    return do_zpz_ool(s, a, fns[a->esz]);
   3136}
   3137
   3138static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
   3139{
   3140    static gen_helper_gvec_3 * const fns[4] = {
   3141        NULL,
   3142        NULL,
   3143        gen_helper_sve_revh_s,
   3144        gen_helper_sve_revh_d,
   3145    };
   3146    return do_zpz_ool(s, a, fns[a->esz]);
   3147}
   3148
   3149static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
   3150{
   3151    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
   3152}
   3153
   3154static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
   3155{
   3156    static gen_helper_gvec_3 * const fns[4] = {
   3157        gen_helper_sve_rbit_b,
   3158        gen_helper_sve_rbit_h,
   3159        gen_helper_sve_rbit_s,
   3160        gen_helper_sve_rbit_d,
   3161    };
   3162    return do_zpz_ool(s, a, fns[a->esz]);
   3163}
   3164
   3165static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
   3166{
   3167    if (sve_access_check(s)) {
   3168        gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
   3169                          a->rd, a->rn, a->rm, a->pg, a->esz);
   3170    }
   3171    return true;
   3172}
   3173
   3174static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
   3175{
   3176    if (!dc_isar_feature(aa64_sve2, s)) {
   3177        return false;
   3178    }
   3179    if (sve_access_check(s)) {
   3180        gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
   3181                          a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
   3182    }
   3183    return true;
   3184}
   3185
   3186/*
   3187 *** SVE Integer Compare - Vectors Group
   3188 */
   3189
   3190static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
   3191                          gen_helper_gvec_flags_4 *gen_fn)
   3192{
   3193    TCGv_ptr pd, zn, zm, pg;
   3194    unsigned vsz;
   3195    TCGv_i32 t;
   3196
   3197    if (gen_fn == NULL) {
   3198        return false;
   3199    }
   3200    if (!sve_access_check(s)) {
   3201        return true;
   3202    }
   3203
   3204    vsz = vec_full_reg_size(s);
   3205    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
   3206    pd = tcg_temp_new_ptr();
   3207    zn = tcg_temp_new_ptr();
   3208    zm = tcg_temp_new_ptr();
   3209    pg = tcg_temp_new_ptr();
   3210
   3211    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
   3212    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
   3213    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
   3214    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
   3215
   3216    gen_fn(t, pd, zn, zm, pg, t);
   3217
   3218    tcg_temp_free_ptr(pd);
   3219    tcg_temp_free_ptr(zn);
   3220    tcg_temp_free_ptr(zm);
   3221    tcg_temp_free_ptr(pg);
   3222
   3223    do_pred_flags(t);
   3224
   3225    tcg_temp_free_i32(t);
   3226    return true;
   3227}
   3228
   3229#define DO_PPZZ(NAME, name) \
   3230static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
   3231{                                                                         \
   3232    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
   3233        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
   3234        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
   3235    };                                                                    \
   3236    return do_ppzz_flags(s, a, fns[a->esz]);                              \
   3237}
   3238
   3239DO_PPZZ(CMPEQ, cmpeq)
   3240DO_PPZZ(CMPNE, cmpne)
   3241DO_PPZZ(CMPGT, cmpgt)
   3242DO_PPZZ(CMPGE, cmpge)
   3243DO_PPZZ(CMPHI, cmphi)
   3244DO_PPZZ(CMPHS, cmphs)
   3245
   3246#undef DO_PPZZ
   3247
   3248#define DO_PPZW(NAME, name) \
   3249static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
   3250{                                                                         \
   3251    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
   3252        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
   3253        gen_helper_sve_##name##_ppzw_s, NULL                              \
   3254    };                                                                    \
   3255    return do_ppzz_flags(s, a, fns[a->esz]);                              \
   3256}
   3257
   3258DO_PPZW(CMPEQ, cmpeq)
   3259DO_PPZW(CMPNE, cmpne)
   3260DO_PPZW(CMPGT, cmpgt)
   3261DO_PPZW(CMPGE, cmpge)
   3262DO_PPZW(CMPHI, cmphi)
   3263DO_PPZW(CMPHS, cmphs)
   3264DO_PPZW(CMPLT, cmplt)
   3265DO_PPZW(CMPLE, cmple)
   3266DO_PPZW(CMPLO, cmplo)
   3267DO_PPZW(CMPLS, cmpls)
   3268
   3269#undef DO_PPZW
   3270
   3271/*
   3272 *** SVE Integer Compare - Immediate Groups
   3273 */
   3274
   3275static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
   3276                          gen_helper_gvec_flags_3 *gen_fn)
   3277{
   3278    TCGv_ptr pd, zn, pg;
   3279    unsigned vsz;
   3280    TCGv_i32 t;
   3281
   3282    if (gen_fn == NULL) {
   3283        return false;
   3284    }
   3285    if (!sve_access_check(s)) {
   3286        return true;
   3287    }
   3288
   3289    vsz = vec_full_reg_size(s);
   3290    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
   3291    pd = tcg_temp_new_ptr();
   3292    zn = tcg_temp_new_ptr();
   3293    pg = tcg_temp_new_ptr();
   3294
   3295    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
   3296    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
   3297    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
   3298
   3299    gen_fn(t, pd, zn, pg, t);
   3300
   3301    tcg_temp_free_ptr(pd);
   3302    tcg_temp_free_ptr(zn);
   3303    tcg_temp_free_ptr(pg);
   3304
   3305    do_pred_flags(t);
   3306
   3307    tcg_temp_free_i32(t);
   3308    return true;
   3309}
   3310
   3311#define DO_PPZI(NAME, name) \
   3312static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
   3313{                                                                         \
   3314    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
   3315        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
   3316        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
   3317    };                                                                    \
   3318    return do_ppzi_flags(s, a, fns[a->esz]);                              \
   3319}
   3320
   3321DO_PPZI(CMPEQ, cmpeq)
   3322DO_PPZI(CMPNE, cmpne)
   3323DO_PPZI(CMPGT, cmpgt)
   3324DO_PPZI(CMPGE, cmpge)
   3325DO_PPZI(CMPHI, cmphi)
   3326DO_PPZI(CMPHS, cmphs)
   3327DO_PPZI(CMPLT, cmplt)
   3328DO_PPZI(CMPLE, cmple)
   3329DO_PPZI(CMPLO, cmplo)
   3330DO_PPZI(CMPLS, cmpls)
   3331
   3332#undef DO_PPZI
   3333
   3334/*
   3335 *** SVE Partition Break Group
   3336 */
   3337
   3338static bool do_brk3(DisasContext *s, arg_rprr_s *a,
   3339                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
   3340{
   3341    if (!sve_access_check(s)) {
   3342        return true;
   3343    }
   3344
   3345    unsigned vsz = pred_full_reg_size(s);
   3346
   3347    /* Predicate sizes may be smaller and cannot use simd_desc.  */
   3348    TCGv_ptr d = tcg_temp_new_ptr();
   3349    TCGv_ptr n = tcg_temp_new_ptr();
   3350    TCGv_ptr m = tcg_temp_new_ptr();
   3351    TCGv_ptr g = tcg_temp_new_ptr();
   3352    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
   3353
   3354    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
   3355    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
   3356    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
   3357    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
   3358
   3359    if (a->s) {
   3360        fn_s(t, d, n, m, g, t);
   3361        do_pred_flags(t);
   3362    } else {
   3363        fn(d, n, m, g, t);
   3364    }
   3365    tcg_temp_free_ptr(d);
   3366    tcg_temp_free_ptr(n);
   3367    tcg_temp_free_ptr(m);
   3368    tcg_temp_free_ptr(g);
   3369    tcg_temp_free_i32(t);
   3370    return true;
   3371}
   3372
   3373static bool do_brk2(DisasContext *s, arg_rpr_s *a,
   3374                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
   3375{
   3376    if (!sve_access_check(s)) {
   3377        return true;
   3378    }
   3379
   3380    unsigned vsz = pred_full_reg_size(s);
   3381
   3382    /* Predicate sizes may be smaller and cannot use simd_desc.  */
   3383    TCGv_ptr d = tcg_temp_new_ptr();
   3384    TCGv_ptr n = tcg_temp_new_ptr();
   3385    TCGv_ptr g = tcg_temp_new_ptr();
   3386    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
   3387
   3388    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
   3389    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
   3390    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
   3391
   3392    if (a->s) {
   3393        fn_s(t, d, n, g, t);
   3394        do_pred_flags(t);
   3395    } else {
   3396        fn(d, n, g, t);
   3397    }
   3398    tcg_temp_free_ptr(d);
   3399    tcg_temp_free_ptr(n);
   3400    tcg_temp_free_ptr(g);
   3401    tcg_temp_free_i32(t);
   3402    return true;
   3403}
   3404
   3405static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
   3406{
   3407    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
   3408}
   3409
   3410static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
   3411{
   3412    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
   3413}
   3414
   3415static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
   3416{
   3417    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
   3418}
   3419
   3420static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
   3421{
   3422    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
   3423}
   3424
   3425static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
   3426{
   3427    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
   3428}
   3429
   3430static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
   3431{
   3432    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
   3433}
   3434
   3435static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
   3436{
   3437    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
   3438}
   3439
   3440/*
   3441 *** SVE Predicate Count Group
   3442 */
   3443
   3444static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
   3445{
   3446    unsigned psz = pred_full_reg_size(s);
   3447
   3448    if (psz <= 8) {
   3449        uint64_t psz_mask;
   3450
   3451        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
   3452        if (pn != pg) {
   3453            TCGv_i64 g = tcg_temp_new_i64();
   3454            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
   3455            tcg_gen_and_i64(val, val, g);
   3456            tcg_temp_free_i64(g);
   3457        }
   3458
   3459        /* Reduce the pred_esz_masks value simply to reduce the
   3460         * size of the code generated here.
   3461         */
   3462        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
   3463        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
   3464
   3465        tcg_gen_ctpop_i64(val, val);
   3466    } else {
   3467        TCGv_ptr t_pn = tcg_temp_new_ptr();
   3468        TCGv_ptr t_pg = tcg_temp_new_ptr();
   3469        unsigned desc = 0;
   3470        TCGv_i32 t_desc;
   3471
   3472        desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
   3473        desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
   3474
   3475        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
   3476        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
   3477        t_desc = tcg_const_i32(desc);
   3478
   3479        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
   3480        tcg_temp_free_ptr(t_pn);
   3481        tcg_temp_free_ptr(t_pg);
   3482        tcg_temp_free_i32(t_desc);
   3483    }
   3484}
   3485
   3486static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
   3487{
   3488    if (sve_access_check(s)) {
   3489        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
   3490    }
   3491    return true;
   3492}
   3493
   3494static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
   3495{
   3496    if (sve_access_check(s)) {
   3497        TCGv_i64 reg = cpu_reg(s, a->rd);
   3498        TCGv_i64 val = tcg_temp_new_i64();
   3499
   3500        do_cntp(s, val, a->esz, a->pg, a->pg);
   3501        if (a->d) {
   3502            tcg_gen_sub_i64(reg, reg, val);
   3503        } else {
   3504            tcg_gen_add_i64(reg, reg, val);
   3505        }
   3506        tcg_temp_free_i64(val);
   3507    }
   3508    return true;
   3509}
   3510
   3511static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
   3512{
   3513    if (a->esz == 0) {
   3514        return false;
   3515    }
   3516    if (sve_access_check(s)) {
   3517        unsigned vsz = vec_full_reg_size(s);
   3518        TCGv_i64 val = tcg_temp_new_i64();
   3519        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
   3520
   3521        do_cntp(s, val, a->esz, a->pg, a->pg);
   3522        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
   3523                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
   3524    }
   3525    return true;
   3526}
   3527
   3528static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
   3529{
   3530    if (sve_access_check(s)) {
   3531        TCGv_i64 reg = cpu_reg(s, a->rd);
   3532        TCGv_i64 val = tcg_temp_new_i64();
   3533
   3534        do_cntp(s, val, a->esz, a->pg, a->pg);
   3535        do_sat_addsub_32(reg, val, a->u, a->d);
   3536    }
   3537    return true;
   3538}
   3539
   3540static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
   3541{
   3542    if (sve_access_check(s)) {
   3543        TCGv_i64 reg = cpu_reg(s, a->rd);
   3544        TCGv_i64 val = tcg_temp_new_i64();
   3545
   3546        do_cntp(s, val, a->esz, a->pg, a->pg);
   3547        do_sat_addsub_64(reg, val, a->u, a->d);
   3548    }
   3549    return true;
   3550}
   3551
   3552static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
   3553{
   3554    if (a->esz == 0) {
   3555        return false;
   3556    }
   3557    if (sve_access_check(s)) {
   3558        TCGv_i64 val = tcg_temp_new_i64();
   3559        do_cntp(s, val, a->esz, a->pg, a->pg);
   3560        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
   3561    }
   3562    return true;
   3563}
   3564
   3565/*
   3566 *** SVE Integer Compare Scalars Group
   3567 */
   3568
   3569static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
   3570{
   3571    if (!sve_access_check(s)) {
   3572        return true;
   3573    }
   3574
   3575    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
   3576    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
   3577    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
   3578    TCGv_i64 cmp = tcg_temp_new_i64();
   3579
   3580    tcg_gen_setcond_i64(cond, cmp, rn, rm);
   3581    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
   3582    tcg_temp_free_i64(cmp);
   3583
   3584    /* VF = !NF & !CF.  */
   3585    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
   3586    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
   3587
   3588    /* Both NF and VF actually look at bit 31.  */
   3589    tcg_gen_neg_i32(cpu_NF, cpu_NF);
   3590    tcg_gen_neg_i32(cpu_VF, cpu_VF);
   3591    return true;
   3592}
   3593
   3594static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
   3595{
   3596    TCGv_i64 op0, op1, t0, t1, tmax;
   3597    TCGv_i32 t2, t3;
   3598    TCGv_ptr ptr;
   3599    unsigned vsz = vec_full_reg_size(s);
   3600    unsigned desc = 0;
   3601    TCGCond cond;
   3602    uint64_t maxval;
   3603    /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
   3604    bool eq = a->eq == a->lt;
   3605
   3606    /* The greater-than conditions are all SVE2. */
   3607    if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
   3608        return false;
   3609    }
   3610    if (!sve_access_check(s)) {
   3611        return true;
   3612    }
   3613
   3614    op0 = read_cpu_reg(s, a->rn, 1);
   3615    op1 = read_cpu_reg(s, a->rm, 1);
   3616
   3617    if (!a->sf) {
   3618        if (a->u) {
   3619            tcg_gen_ext32u_i64(op0, op0);
   3620            tcg_gen_ext32u_i64(op1, op1);
   3621        } else {
   3622            tcg_gen_ext32s_i64(op0, op0);
   3623            tcg_gen_ext32s_i64(op1, op1);
   3624        }
   3625    }
   3626
   3627    /* For the helper, compress the different conditions into a computation
   3628     * of how many iterations for which the condition is true.
   3629     */
   3630    t0 = tcg_temp_new_i64();
   3631    t1 = tcg_temp_new_i64();
   3632
   3633    if (a->lt) {
   3634        tcg_gen_sub_i64(t0, op1, op0);
   3635        if (a->u) {
   3636            maxval = a->sf ? UINT64_MAX : UINT32_MAX;
   3637            cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
   3638        } else {
   3639            maxval = a->sf ? INT64_MAX : INT32_MAX;
   3640            cond = eq ? TCG_COND_LE : TCG_COND_LT;
   3641        }
   3642    } else {
   3643        tcg_gen_sub_i64(t0, op0, op1);
   3644        if (a->u) {
   3645            maxval = 0;
   3646            cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
   3647        } else {
   3648            maxval = a->sf ? INT64_MIN : INT32_MIN;
   3649            cond = eq ? TCG_COND_GE : TCG_COND_GT;
   3650        }
   3651    }
   3652
   3653    tmax = tcg_const_i64(vsz >> a->esz);
   3654    if (eq) {
   3655        /* Equality means one more iteration.  */
   3656        tcg_gen_addi_i64(t0, t0, 1);
   3657
   3658        /*
   3659         * For the less-than while, if op1 is maxval (and the only time
   3660         * the addition above could overflow), then we produce an all-true
   3661         * predicate by setting the count to the vector length.  This is
   3662         * because the pseudocode is described as an increment + compare
   3663         * loop, and the maximum integer would always compare true.
   3664         * Similarly, the greater-than while has the same issue with the
   3665         * minimum integer due to the decrement + compare loop.
   3666         */
   3667        tcg_gen_movi_i64(t1, maxval);
   3668        tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
   3669    }
   3670
   3671    /* Bound to the maximum.  */
   3672    tcg_gen_umin_i64(t0, t0, tmax);
   3673    tcg_temp_free_i64(tmax);
   3674
   3675    /* Set the count to zero if the condition is false.  */
   3676    tcg_gen_movi_i64(t1, 0);
   3677    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
   3678    tcg_temp_free_i64(t1);
   3679
   3680    /* Since we're bounded, pass as a 32-bit type.  */
   3681    t2 = tcg_temp_new_i32();
   3682    tcg_gen_extrl_i64_i32(t2, t0);
   3683    tcg_temp_free_i64(t0);
   3684
   3685    /* Scale elements to bits.  */
   3686    tcg_gen_shli_i32(t2, t2, a->esz);
   3687
   3688    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
   3689    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
   3690    t3 = tcg_const_i32(desc);
   3691
   3692    ptr = tcg_temp_new_ptr();
   3693    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
   3694
   3695    if (a->lt) {
   3696        gen_helper_sve_whilel(t2, ptr, t2, t3);
   3697    } else {
   3698        gen_helper_sve_whileg(t2, ptr, t2, t3);
   3699    }
   3700    do_pred_flags(t2);
   3701
   3702    tcg_temp_free_ptr(ptr);
   3703    tcg_temp_free_i32(t2);
   3704    tcg_temp_free_i32(t3);
   3705    return true;
   3706}
   3707
   3708static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
   3709{
   3710    TCGv_i64 op0, op1, diff, t1, tmax;
   3711    TCGv_i32 t2, t3;
   3712    TCGv_ptr ptr;
   3713    unsigned vsz = vec_full_reg_size(s);
   3714    unsigned desc = 0;
   3715
   3716    if (!dc_isar_feature(aa64_sve2, s)) {
   3717        return false;
   3718    }
   3719    if (!sve_access_check(s)) {
   3720        return true;
   3721    }
   3722
   3723    op0 = read_cpu_reg(s, a->rn, 1);
   3724    op1 = read_cpu_reg(s, a->rm, 1);
   3725
   3726    tmax = tcg_const_i64(vsz);
   3727    diff = tcg_temp_new_i64();
   3728
   3729    if (a->rw) {
   3730        /* WHILERW */
   3731        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
   3732        t1 = tcg_temp_new_i64();
   3733        tcg_gen_sub_i64(diff, op0, op1);
   3734        tcg_gen_sub_i64(t1, op1, op0);
   3735        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
   3736        tcg_temp_free_i64(t1);
   3737        /* Round down to a multiple of ESIZE.  */
   3738        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
   3739        /* If op1 == op0, diff == 0, and the condition is always true. */
   3740        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
   3741    } else {
   3742        /* WHILEWR */
   3743        tcg_gen_sub_i64(diff, op1, op0);
   3744        /* Round down to a multiple of ESIZE.  */
   3745        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
   3746        /* If op0 >= op1, diff <= 0, the condition is always true. */
   3747        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
   3748    }
   3749
   3750    /* Bound to the maximum.  */
   3751    tcg_gen_umin_i64(diff, diff, tmax);
   3752    tcg_temp_free_i64(tmax);
   3753
   3754    /* Since we're bounded, pass as a 32-bit type.  */
   3755    t2 = tcg_temp_new_i32();
   3756    tcg_gen_extrl_i64_i32(t2, diff);
   3757    tcg_temp_free_i64(diff);
   3758
   3759    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
   3760    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
   3761    t3 = tcg_const_i32(desc);
   3762
   3763    ptr = tcg_temp_new_ptr();
   3764    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
   3765
   3766    gen_helper_sve_whilel(t2, ptr, t2, t3);
   3767    do_pred_flags(t2);
   3768
   3769    tcg_temp_free_ptr(ptr);
   3770    tcg_temp_free_i32(t2);
   3771    tcg_temp_free_i32(t3);
   3772    return true;
   3773}
   3774
   3775/*
   3776 *** SVE Integer Wide Immediate - Unpredicated Group
   3777 */
   3778
   3779static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
   3780{
   3781    if (a->esz == 0) {
   3782        return false;
   3783    }
   3784    if (sve_access_check(s)) {
   3785        unsigned vsz = vec_full_reg_size(s);
   3786        int dofs = vec_full_reg_offset(s, a->rd);
   3787        uint64_t imm;
   3788
   3789        /* Decode the VFP immediate.  */
   3790        imm = vfp_expand_imm(a->esz, a->imm);
   3791        tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
   3792    }
   3793    return true;
   3794}
   3795
   3796static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
   3797{
   3798    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
   3799        return false;
   3800    }
   3801    if (sve_access_check(s)) {
   3802        unsigned vsz = vec_full_reg_size(s);
   3803        int dofs = vec_full_reg_offset(s, a->rd);
   3804
   3805        tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
   3806    }
   3807    return true;
   3808}
   3809
   3810static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
   3811{
   3812    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
   3813        return false;
   3814    }
   3815    if (sve_access_check(s)) {
   3816        unsigned vsz = vec_full_reg_size(s);
   3817        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
   3818                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
   3819    }
   3820    return true;
   3821}
   3822
   3823static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
   3824{
   3825    a->imm = -a->imm;
   3826    return trans_ADD_zzi(s, a);
   3827}
   3828
   3829static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
   3830{
   3831    static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
   3832    static const GVecGen2s op[4] = {
   3833        { .fni8 = tcg_gen_vec_sub8_i64,
   3834          .fniv = tcg_gen_sub_vec,
   3835          .fno = gen_helper_sve_subri_b,
   3836          .opt_opc = vecop_list,
   3837          .vece = MO_8,
   3838          .scalar_first = true },
   3839        { .fni8 = tcg_gen_vec_sub16_i64,
   3840          .fniv = tcg_gen_sub_vec,
   3841          .fno = gen_helper_sve_subri_h,
   3842          .opt_opc = vecop_list,
   3843          .vece = MO_16,
   3844          .scalar_first = true },
   3845        { .fni4 = tcg_gen_sub_i32,
   3846          .fniv = tcg_gen_sub_vec,
   3847          .fno = gen_helper_sve_subri_s,
   3848          .opt_opc = vecop_list,
   3849          .vece = MO_32,
   3850          .scalar_first = true },
   3851        { .fni8 = tcg_gen_sub_i64,
   3852          .fniv = tcg_gen_sub_vec,
   3853          .fno = gen_helper_sve_subri_d,
   3854          .opt_opc = vecop_list,
   3855          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3856          .vece = MO_64,
   3857          .scalar_first = true }
   3858    };
   3859
   3860    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
   3861        return false;
   3862    }
   3863    if (sve_access_check(s)) {
   3864        unsigned vsz = vec_full_reg_size(s);
   3865        TCGv_i64 c = tcg_const_i64(a->imm);
   3866        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
   3867                        vec_full_reg_offset(s, a->rn),
   3868                        vsz, vsz, c, &op[a->esz]);
   3869        tcg_temp_free_i64(c);
   3870    }
   3871    return true;
   3872}
   3873
   3874static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
   3875{
   3876    if (sve_access_check(s)) {
   3877        unsigned vsz = vec_full_reg_size(s);
   3878        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
   3879                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
   3880    }
   3881    return true;
   3882}
   3883
   3884static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
   3885{
   3886    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
   3887        return false;
   3888    }
   3889    if (sve_access_check(s)) {
   3890        TCGv_i64 val = tcg_const_i64(a->imm);
   3891        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
   3892        tcg_temp_free_i64(val);
   3893    }
   3894    return true;
   3895}
   3896
   3897static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
   3898{
   3899    return do_zzi_sat(s, a, false, false);
   3900}
   3901
   3902static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
   3903{
   3904    return do_zzi_sat(s, a, true, false);
   3905}
   3906
   3907static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
   3908{
   3909    return do_zzi_sat(s, a, false, true);
   3910}
   3911
   3912static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
   3913{
   3914    return do_zzi_sat(s, a, true, true);
   3915}
   3916
   3917static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
   3918{
   3919    if (sve_access_check(s)) {
   3920        unsigned vsz = vec_full_reg_size(s);
   3921        TCGv_i64 c = tcg_const_i64(a->imm);
   3922
   3923        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
   3924                            vec_full_reg_offset(s, a->rn),
   3925                            c, vsz, vsz, 0, fn);
   3926        tcg_temp_free_i64(c);
   3927    }
   3928    return true;
   3929}
   3930
   3931#define DO_ZZI(NAME, name) \
   3932static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
   3933{                                                                       \
   3934    static gen_helper_gvec_2i * const fns[4] = {                        \
   3935        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
   3936        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
   3937    };                                                                  \
   3938    return do_zzi_ool(s, a, fns[a->esz]);                               \
   3939}
   3940
   3941DO_ZZI(SMAX, smax)
   3942DO_ZZI(UMAX, umax)
   3943DO_ZZI(SMIN, smin)
   3944DO_ZZI(UMIN, umin)
   3945
   3946#undef DO_ZZI
   3947
   3948static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
   3949{
   3950    static gen_helper_gvec_4 * const fns[2][2] = {
   3951        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
   3952        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
   3953    };
   3954
   3955    if (sve_access_check(s)) {
   3956        gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
   3957    }
   3958    return true;
   3959}
   3960
   3961/*
   3962 * SVE Multiply - Indexed
   3963 */
   3964
   3965static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
   3966                        gen_helper_gvec_4 *fn)
   3967{
   3968    if (fn == NULL) {
   3969        return false;
   3970    }
   3971    if (sve_access_check(s)) {
   3972        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
   3973    }
   3974    return true;
   3975}
   3976
   3977#define DO_RRXR(NAME, FUNC) \
   3978    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
   3979    { return do_zzxz_ool(s, a, FUNC); }
   3980
   3981DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
   3982DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
   3983DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
   3984DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
   3985
   3986static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
   3987{
   3988    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
   3989        return false;
   3990    }
   3991    return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
   3992}
   3993
   3994static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
   3995{
   3996    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
   3997        return false;
   3998    }
   3999    return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
   4000}
   4001
   4002#undef DO_RRXR
   4003
   4004static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
   4005                             gen_helper_gvec_3 *fn)
   4006{
   4007    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
   4008        return false;
   4009    }
   4010    if (sve_access_check(s)) {
   4011        unsigned vsz = vec_full_reg_size(s);
   4012        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
   4013                           vec_full_reg_offset(s, rn),
   4014                           vec_full_reg_offset(s, rm),
   4015                           vsz, vsz, data, fn);
   4016    }
   4017    return true;
   4018}
   4019
   4020#define DO_SVE2_RRX(NAME, FUNC) \
   4021    static bool NAME(DisasContext *s, arg_rrx_esz *a)  \
   4022    { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
   4023
   4024DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
   4025DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
   4026DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
   4027
   4028DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
   4029DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
   4030DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
   4031
   4032DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
   4033DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
   4034DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
   4035
   4036#undef DO_SVE2_RRX
   4037
   4038#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
   4039    static bool NAME(DisasContext *s, arg_rrx_esz *a)           \
   4040    {                                                           \
   4041        return do_sve2_zzz_data(s, a->rd, a->rn, a->rm,         \
   4042                                (a->index << 1) | TOP, FUNC);   \
   4043    }
   4044
   4045DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
   4046DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
   4047DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
   4048DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
   4049
   4050DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
   4051DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
   4052DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
   4053DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
   4054
   4055DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
   4056DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
   4057DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
   4058DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
   4059
   4060#undef DO_SVE2_RRX_TB
   4061
   4062static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
   4063                              int data, gen_helper_gvec_4 *fn)
   4064{
   4065    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
   4066        return false;
   4067    }
   4068    if (sve_access_check(s)) {
   4069        unsigned vsz = vec_full_reg_size(s);
   4070        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
   4071                           vec_full_reg_offset(s, rn),
   4072                           vec_full_reg_offset(s, rm),
   4073                           vec_full_reg_offset(s, ra),
   4074                           vsz, vsz, data, fn);
   4075    }
   4076    return true;
   4077}
   4078
   4079#define DO_SVE2_RRXR(NAME, FUNC) \
   4080    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
   4081    { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
   4082
   4083DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
   4084DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
   4085DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
   4086
   4087DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
   4088DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
   4089DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
   4090
   4091DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
   4092DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
   4093DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
   4094
   4095DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
   4096DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
   4097DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
   4098
   4099#undef DO_SVE2_RRXR
   4100
   4101#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
   4102    static bool NAME(DisasContext *s, arg_rrxr_esz *a)          \
   4103    {                                                           \
   4104        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
   4105                                 (a->index << 1) | TOP, FUNC);  \
   4106    }
   4107
   4108DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
   4109DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
   4110DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
   4111DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
   4112
   4113DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
   4114DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
   4115DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
   4116DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
   4117
   4118DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
   4119DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
   4120DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
   4121DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
   4122
   4123DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
   4124DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
   4125DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
   4126DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
   4127
   4128DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
   4129DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
   4130DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
   4131DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
   4132
   4133DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
   4134DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
   4135DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
   4136DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
   4137
   4138#undef DO_SVE2_RRXR_TB
   4139
   4140#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
   4141    static bool trans_##NAME(DisasContext *s, arg_##NAME *a)       \
   4142    {                                                              \
   4143        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra,    \
   4144                                 (a->index << 2) | a->rot, FUNC);  \
   4145    }
   4146
   4147DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
   4148DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
   4149
   4150DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
   4151DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
   4152
   4153DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
   4154DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
   4155
   4156#undef DO_SVE2_RRXR_ROT
   4157
   4158/*
   4159 *** SVE Floating Point Multiply-Add Indexed Group
   4160 */
   4161
   4162static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
   4163{
   4164    static gen_helper_gvec_4_ptr * const fns[3] = {
   4165        gen_helper_gvec_fmla_idx_h,
   4166        gen_helper_gvec_fmla_idx_s,
   4167        gen_helper_gvec_fmla_idx_d,
   4168    };
   4169
   4170    if (sve_access_check(s)) {
   4171        unsigned vsz = vec_full_reg_size(s);
   4172        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4173        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   4174                           vec_full_reg_offset(s, a->rn),
   4175                           vec_full_reg_offset(s, a->rm),
   4176                           vec_full_reg_offset(s, a->ra),
   4177                           status, vsz, vsz, (a->index << 1) | sub,
   4178                           fns[a->esz - 1]);
   4179        tcg_temp_free_ptr(status);
   4180    }
   4181    return true;
   4182}
   4183
   4184static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
   4185{
   4186    return do_FMLA_zzxz(s, a, false);
   4187}
   4188
   4189static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
   4190{
   4191    return do_FMLA_zzxz(s, a, true);
   4192}
   4193
   4194/*
   4195 *** SVE Floating Point Multiply Indexed Group
   4196 */
   4197
   4198static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
   4199{
   4200    static gen_helper_gvec_3_ptr * const fns[3] = {
   4201        gen_helper_gvec_fmul_idx_h,
   4202        gen_helper_gvec_fmul_idx_s,
   4203        gen_helper_gvec_fmul_idx_d,
   4204    };
   4205
   4206    if (sve_access_check(s)) {
   4207        unsigned vsz = vec_full_reg_size(s);
   4208        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4209        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
   4210                           vec_full_reg_offset(s, a->rn),
   4211                           vec_full_reg_offset(s, a->rm),
   4212                           status, vsz, vsz, a->index, fns[a->esz - 1]);
   4213        tcg_temp_free_ptr(status);
   4214    }
   4215    return true;
   4216}
   4217
   4218/*
   4219 *** SVE Floating Point Fast Reduction Group
   4220 */
   4221
   4222typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
   4223                                  TCGv_ptr, TCGv_i32);
   4224
   4225static void do_reduce(DisasContext *s, arg_rpr_esz *a,
   4226                      gen_helper_fp_reduce *fn)
   4227{
   4228    unsigned vsz = vec_full_reg_size(s);
   4229    unsigned p2vsz = pow2ceil(vsz);
   4230    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
   4231    TCGv_ptr t_zn, t_pg, status;
   4232    TCGv_i64 temp;
   4233
   4234    temp = tcg_temp_new_i64();
   4235    t_zn = tcg_temp_new_ptr();
   4236    t_pg = tcg_temp_new_ptr();
   4237
   4238    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
   4239    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
   4240    status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4241
   4242    fn(temp, t_zn, t_pg, status, t_desc);
   4243    tcg_temp_free_ptr(t_zn);
   4244    tcg_temp_free_ptr(t_pg);
   4245    tcg_temp_free_ptr(status);
   4246    tcg_temp_free_i32(t_desc);
   4247
   4248    write_fp_dreg(s, a->rd, temp);
   4249    tcg_temp_free_i64(temp);
   4250}
   4251
   4252#define DO_VPZ(NAME, name) \
   4253static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
   4254{                                                                        \
   4255    static gen_helper_fp_reduce * const fns[3] = {                       \
   4256        gen_helper_sve_##name##_h,                                       \
   4257        gen_helper_sve_##name##_s,                                       \
   4258        gen_helper_sve_##name##_d,                                       \
   4259    };                                                                   \
   4260    if (a->esz == 0) {                                                   \
   4261        return false;                                                    \
   4262    }                                                                    \
   4263    if (sve_access_check(s)) {                                           \
   4264        do_reduce(s, a, fns[a->esz - 1]);                                \
   4265    }                                                                    \
   4266    return true;                                                         \
   4267}
   4268
   4269DO_VPZ(FADDV, faddv)
   4270DO_VPZ(FMINNMV, fminnmv)
   4271DO_VPZ(FMAXNMV, fmaxnmv)
   4272DO_VPZ(FMINV, fminv)
   4273DO_VPZ(FMAXV, fmaxv)
   4274
   4275/*
   4276 *** SVE Floating Point Unary Operations - Unpredicated Group
   4277 */
   4278
   4279static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
   4280{
   4281    unsigned vsz = vec_full_reg_size(s);
   4282    TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4283
   4284    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
   4285                       vec_full_reg_offset(s, a->rn),
   4286                       status, vsz, vsz, 0, fn);
   4287    tcg_temp_free_ptr(status);
   4288}
   4289
   4290static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
   4291{
   4292    static gen_helper_gvec_2_ptr * const fns[3] = {
   4293        gen_helper_gvec_frecpe_h,
   4294        gen_helper_gvec_frecpe_s,
   4295        gen_helper_gvec_frecpe_d,
   4296    };
   4297    if (a->esz == 0) {
   4298        return false;
   4299    }
   4300    if (sve_access_check(s)) {
   4301        do_zz_fp(s, a, fns[a->esz - 1]);
   4302    }
   4303    return true;
   4304}
   4305
   4306static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
   4307{
   4308    static gen_helper_gvec_2_ptr * const fns[3] = {
   4309        gen_helper_gvec_frsqrte_h,
   4310        gen_helper_gvec_frsqrte_s,
   4311        gen_helper_gvec_frsqrte_d,
   4312    };
   4313    if (a->esz == 0) {
   4314        return false;
   4315    }
   4316    if (sve_access_check(s)) {
   4317        do_zz_fp(s, a, fns[a->esz - 1]);
   4318    }
   4319    return true;
   4320}
   4321
   4322/*
   4323 *** SVE Floating Point Compare with Zero Group
   4324 */
   4325
   4326static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
   4327                      gen_helper_gvec_3_ptr *fn)
   4328{
   4329    unsigned vsz = vec_full_reg_size(s);
   4330    TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4331
   4332    tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
   4333                       vec_full_reg_offset(s, a->rn),
   4334                       pred_full_reg_offset(s, a->pg),
   4335                       status, vsz, vsz, 0, fn);
   4336    tcg_temp_free_ptr(status);
   4337}
   4338
   4339#define DO_PPZ(NAME, name) \
   4340static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
   4341{                                                                 \
   4342    static gen_helper_gvec_3_ptr * const fns[3] = {               \
   4343        gen_helper_sve_##name##_h,                                \
   4344        gen_helper_sve_##name##_s,                                \
   4345        gen_helper_sve_##name##_d,                                \
   4346    };                                                            \
   4347    if (a->esz == 0) {                                            \
   4348        return false;                                             \
   4349    }                                                             \
   4350    if (sve_access_check(s)) {                                    \
   4351        do_ppz_fp(s, a, fns[a->esz - 1]);                         \
   4352    }                                                             \
   4353    return true;                                                  \
   4354}
   4355
   4356DO_PPZ(FCMGE_ppz0, fcmge0)
   4357DO_PPZ(FCMGT_ppz0, fcmgt0)
   4358DO_PPZ(FCMLE_ppz0, fcmle0)
   4359DO_PPZ(FCMLT_ppz0, fcmlt0)
   4360DO_PPZ(FCMEQ_ppz0, fcmeq0)
   4361DO_PPZ(FCMNE_ppz0, fcmne0)
   4362
   4363#undef DO_PPZ
   4364
   4365/*
   4366 *** SVE floating-point trig multiply-add coefficient
   4367 */
   4368
   4369static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
   4370{
   4371    static gen_helper_gvec_3_ptr * const fns[3] = {
   4372        gen_helper_sve_ftmad_h,
   4373        gen_helper_sve_ftmad_s,
   4374        gen_helper_sve_ftmad_d,
   4375    };
   4376
   4377    if (a->esz == 0) {
   4378        return false;
   4379    }
   4380    if (sve_access_check(s)) {
   4381        unsigned vsz = vec_full_reg_size(s);
   4382        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4383        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
   4384                           vec_full_reg_offset(s, a->rn),
   4385                           vec_full_reg_offset(s, a->rm),
   4386                           status, vsz, vsz, a->imm, fns[a->esz - 1]);
   4387        tcg_temp_free_ptr(status);
   4388    }
   4389    return true;
   4390}
   4391
   4392/*
   4393 *** SVE Floating Point Accumulating Reduction Group
   4394 */
   4395
   4396static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
   4397{
   4398    typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
   4399                          TCGv_ptr, TCGv_ptr, TCGv_i32);
   4400    static fadda_fn * const fns[3] = {
   4401        gen_helper_sve_fadda_h,
   4402        gen_helper_sve_fadda_s,
   4403        gen_helper_sve_fadda_d,
   4404    };
   4405    unsigned vsz = vec_full_reg_size(s);
   4406    TCGv_ptr t_rm, t_pg, t_fpst;
   4407    TCGv_i64 t_val;
   4408    TCGv_i32 t_desc;
   4409
   4410    if (a->esz == 0) {
   4411        return false;
   4412    }
   4413    if (!sve_access_check(s)) {
   4414        return true;
   4415    }
   4416
   4417    t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
   4418    t_rm = tcg_temp_new_ptr();
   4419    t_pg = tcg_temp_new_ptr();
   4420    tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
   4421    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
   4422    t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4423    t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
   4424
   4425    fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
   4426
   4427    tcg_temp_free_i32(t_desc);
   4428    tcg_temp_free_ptr(t_fpst);
   4429    tcg_temp_free_ptr(t_pg);
   4430    tcg_temp_free_ptr(t_rm);
   4431
   4432    write_fp_dreg(s, a->rd, t_val);
   4433    tcg_temp_free_i64(t_val);
   4434    return true;
   4435}
   4436
   4437/*
   4438 *** SVE Floating Point Arithmetic - Unpredicated Group
   4439 */
   4440
   4441static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
   4442                      gen_helper_gvec_3_ptr *fn)
   4443{
   4444    if (fn == NULL) {
   4445        return false;
   4446    }
   4447    if (sve_access_check(s)) {
   4448        unsigned vsz = vec_full_reg_size(s);
   4449        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4450        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
   4451                           vec_full_reg_offset(s, a->rn),
   4452                           vec_full_reg_offset(s, a->rm),
   4453                           status, vsz, vsz, 0, fn);
   4454        tcg_temp_free_ptr(status);
   4455    }
   4456    return true;
   4457}
   4458
   4459
   4460#define DO_FP3(NAME, name) \
   4461static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
   4462{                                                                   \
   4463    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
   4464        NULL, gen_helper_gvec_##name##_h,                           \
   4465        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
   4466    };                                                              \
   4467    return do_zzz_fp(s, a, fns[a->esz]);                            \
   4468}
   4469
   4470DO_FP3(FADD_zzz, fadd)
   4471DO_FP3(FSUB_zzz, fsub)
   4472DO_FP3(FMUL_zzz, fmul)
   4473DO_FP3(FTSMUL, ftsmul)
   4474DO_FP3(FRECPS, recps)
   4475DO_FP3(FRSQRTS, rsqrts)
   4476
   4477#undef DO_FP3
   4478
   4479/*
   4480 *** SVE Floating Point Arithmetic - Predicated Group
   4481 */
   4482
   4483static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
   4484                       gen_helper_gvec_4_ptr *fn)
   4485{
   4486    if (fn == NULL) {
   4487        return false;
   4488    }
   4489    if (sve_access_check(s)) {
   4490        unsigned vsz = vec_full_reg_size(s);
   4491        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4492        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   4493                           vec_full_reg_offset(s, a->rn),
   4494                           vec_full_reg_offset(s, a->rm),
   4495                           pred_full_reg_offset(s, a->pg),
   4496                           status, vsz, vsz, 0, fn);
   4497        tcg_temp_free_ptr(status);
   4498    }
   4499    return true;
   4500}
   4501
   4502#define DO_FP3(NAME, name) \
   4503static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
   4504{                                                                   \
   4505    static gen_helper_gvec_4_ptr * const fns[4] = {                 \
   4506        NULL, gen_helper_sve_##name##_h,                            \
   4507        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
   4508    };                                                              \
   4509    return do_zpzz_fp(s, a, fns[a->esz]);                           \
   4510}
   4511
   4512DO_FP3(FADD_zpzz, fadd)
   4513DO_FP3(FSUB_zpzz, fsub)
   4514DO_FP3(FMUL_zpzz, fmul)
   4515DO_FP3(FMIN_zpzz, fmin)
   4516DO_FP3(FMAX_zpzz, fmax)
   4517DO_FP3(FMINNM_zpzz, fminnum)
   4518DO_FP3(FMAXNM_zpzz, fmaxnum)
   4519DO_FP3(FABD, fabd)
   4520DO_FP3(FSCALE, fscalbn)
   4521DO_FP3(FDIV, fdiv)
   4522DO_FP3(FMULX, fmulx)
   4523
   4524#undef DO_FP3
   4525
   4526typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
   4527                                      TCGv_i64, TCGv_ptr, TCGv_i32);
   4528
   4529static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
   4530                         TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
   4531{
   4532    unsigned vsz = vec_full_reg_size(s);
   4533    TCGv_ptr t_zd, t_zn, t_pg, status;
   4534    TCGv_i32 desc;
   4535
   4536    t_zd = tcg_temp_new_ptr();
   4537    t_zn = tcg_temp_new_ptr();
   4538    t_pg = tcg_temp_new_ptr();
   4539    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
   4540    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
   4541    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
   4542
   4543    status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
   4544    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
   4545    fn(t_zd, t_zn, t_pg, scalar, status, desc);
   4546
   4547    tcg_temp_free_i32(desc);
   4548    tcg_temp_free_ptr(status);
   4549    tcg_temp_free_ptr(t_pg);
   4550    tcg_temp_free_ptr(t_zn);
   4551    tcg_temp_free_ptr(t_zd);
   4552}
   4553
   4554static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
   4555                      gen_helper_sve_fp2scalar *fn)
   4556{
   4557    TCGv_i64 temp = tcg_const_i64(imm);
   4558    do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
   4559    tcg_temp_free_i64(temp);
   4560}
   4561
   4562#define DO_FP_IMM(NAME, name, const0, const1) \
   4563static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
   4564{                                                                         \
   4565    static gen_helper_sve_fp2scalar * const fns[3] = {                    \
   4566        gen_helper_sve_##name##_h,                                        \
   4567        gen_helper_sve_##name##_s,                                        \
   4568        gen_helper_sve_##name##_d                                         \
   4569    };                                                                    \
   4570    static uint64_t const val[3][2] = {                                   \
   4571        { float16_##const0, float16_##const1 },                           \
   4572        { float32_##const0, float32_##const1 },                           \
   4573        { float64_##const0, float64_##const1 },                           \
   4574    };                                                                    \
   4575    if (a->esz == 0) {                                                    \
   4576        return false;                                                     \
   4577    }                                                                     \
   4578    if (sve_access_check(s)) {                                            \
   4579        do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
   4580    }                                                                     \
   4581    return true;                                                          \
   4582}
   4583
   4584DO_FP_IMM(FADD, fadds, half, one)
   4585DO_FP_IMM(FSUB, fsubs, half, one)
   4586DO_FP_IMM(FMUL, fmuls, half, two)
   4587DO_FP_IMM(FSUBR, fsubrs, half, one)
   4588DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
   4589DO_FP_IMM(FMINNM, fminnms, zero, one)
   4590DO_FP_IMM(FMAX, fmaxs, zero, one)
   4591DO_FP_IMM(FMIN, fmins, zero, one)
   4592
   4593#undef DO_FP_IMM
   4594
   4595static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
   4596                      gen_helper_gvec_4_ptr *fn)
   4597{
   4598    if (fn == NULL) {
   4599        return false;
   4600    }
   4601    if (sve_access_check(s)) {
   4602        unsigned vsz = vec_full_reg_size(s);
   4603        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4604        tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
   4605                           vec_full_reg_offset(s, a->rn),
   4606                           vec_full_reg_offset(s, a->rm),
   4607                           pred_full_reg_offset(s, a->pg),
   4608                           status, vsz, vsz, 0, fn);
   4609        tcg_temp_free_ptr(status);
   4610    }
   4611    return true;
   4612}
   4613
   4614#define DO_FPCMP(NAME, name) \
   4615static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
   4616{                                                                     \
   4617    static gen_helper_gvec_4_ptr * const fns[4] = {                   \
   4618        NULL, gen_helper_sve_##name##_h,                              \
   4619        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
   4620    };                                                                \
   4621    return do_fp_cmp(s, a, fns[a->esz]);                              \
   4622}
   4623
   4624DO_FPCMP(FCMGE, fcmge)
   4625DO_FPCMP(FCMGT, fcmgt)
   4626DO_FPCMP(FCMEQ, fcmeq)
   4627DO_FPCMP(FCMNE, fcmne)
   4628DO_FPCMP(FCMUO, fcmuo)
   4629DO_FPCMP(FACGE, facge)
   4630DO_FPCMP(FACGT, facgt)
   4631
   4632#undef DO_FPCMP
   4633
   4634static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
   4635{
   4636    static gen_helper_gvec_4_ptr * const fns[3] = {
   4637        gen_helper_sve_fcadd_h,
   4638        gen_helper_sve_fcadd_s,
   4639        gen_helper_sve_fcadd_d
   4640    };
   4641
   4642    if (a->esz == 0) {
   4643        return false;
   4644    }
   4645    if (sve_access_check(s)) {
   4646        unsigned vsz = vec_full_reg_size(s);
   4647        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4648        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   4649                           vec_full_reg_offset(s, a->rn),
   4650                           vec_full_reg_offset(s, a->rm),
   4651                           pred_full_reg_offset(s, a->pg),
   4652                           status, vsz, vsz, a->rot, fns[a->esz - 1]);
   4653        tcg_temp_free_ptr(status);
   4654    }
   4655    return true;
   4656}
   4657
   4658static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
   4659                    gen_helper_gvec_5_ptr *fn)
   4660{
   4661    if (a->esz == 0) {
   4662        return false;
   4663    }
   4664    if (sve_access_check(s)) {
   4665        unsigned vsz = vec_full_reg_size(s);
   4666        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4667        tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
   4668                           vec_full_reg_offset(s, a->rn),
   4669                           vec_full_reg_offset(s, a->rm),
   4670                           vec_full_reg_offset(s, a->ra),
   4671                           pred_full_reg_offset(s, a->pg),
   4672                           status, vsz, vsz, 0, fn);
   4673        tcg_temp_free_ptr(status);
   4674    }
   4675    return true;
   4676}
   4677
   4678#define DO_FMLA(NAME, name) \
   4679static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
   4680{                                                                    \
   4681    static gen_helper_gvec_5_ptr * const fns[4] = {                  \
   4682        NULL, gen_helper_sve_##name##_h,                             \
   4683        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
   4684    };                                                               \
   4685    return do_fmla(s, a, fns[a->esz]);                               \
   4686}
   4687
   4688DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
   4689DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
   4690DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
   4691DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
   4692
   4693#undef DO_FMLA
   4694
   4695static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
   4696{
   4697    static gen_helper_gvec_5_ptr * const fns[4] = {
   4698        NULL,
   4699        gen_helper_sve_fcmla_zpzzz_h,
   4700        gen_helper_sve_fcmla_zpzzz_s,
   4701        gen_helper_sve_fcmla_zpzzz_d,
   4702    };
   4703
   4704    if (a->esz == 0) {
   4705        return false;
   4706    }
   4707    if (sve_access_check(s)) {
   4708        unsigned vsz = vec_full_reg_size(s);
   4709        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4710        tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
   4711                           vec_full_reg_offset(s, a->rn),
   4712                           vec_full_reg_offset(s, a->rm),
   4713                           vec_full_reg_offset(s, a->ra),
   4714                           pred_full_reg_offset(s, a->pg),
   4715                           status, vsz, vsz, a->rot, fns[a->esz]);
   4716        tcg_temp_free_ptr(status);
   4717    }
   4718    return true;
   4719}
   4720
   4721static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
   4722{
   4723    static gen_helper_gvec_4_ptr * const fns[2] = {
   4724        gen_helper_gvec_fcmlah_idx,
   4725        gen_helper_gvec_fcmlas_idx,
   4726    };
   4727
   4728    tcg_debug_assert(a->esz == 1 || a->esz == 2);
   4729    tcg_debug_assert(a->rd == a->ra);
   4730    if (sve_access_check(s)) {
   4731        unsigned vsz = vec_full_reg_size(s);
   4732        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4733        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   4734                           vec_full_reg_offset(s, a->rn),
   4735                           vec_full_reg_offset(s, a->rm),
   4736                           vec_full_reg_offset(s, a->ra),
   4737                           status, vsz, vsz,
   4738                           a->index * 4 + a->rot,
   4739                           fns[a->esz - 1]);
   4740        tcg_temp_free_ptr(status);
   4741    }
   4742    return true;
   4743}
   4744
   4745/*
   4746 *** SVE Floating Point Unary Operations Predicated Group
   4747 */
   4748
   4749static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
   4750                       bool is_fp16, gen_helper_gvec_3_ptr *fn)
   4751{
   4752    if (sve_access_check(s)) {
   4753        unsigned vsz = vec_full_reg_size(s);
   4754        TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
   4755        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
   4756                           vec_full_reg_offset(s, rn),
   4757                           pred_full_reg_offset(s, pg),
   4758                           status, vsz, vsz, 0, fn);
   4759        tcg_temp_free_ptr(status);
   4760    }
   4761    return true;
   4762}
   4763
   4764static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
   4765{
   4766    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
   4767}
   4768
   4769static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
   4770{
   4771    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
   4772}
   4773
   4774static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
   4775{
   4776    if (!dc_isar_feature(aa64_sve_bf16, s)) {
   4777        return false;
   4778    }
   4779    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
   4780}
   4781
   4782static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
   4783{
   4784    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
   4785}
   4786
   4787static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
   4788{
   4789    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
   4790}
   4791
   4792static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
   4793{
   4794    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
   4795}
   4796
   4797static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
   4798{
   4799    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
   4800}
   4801
   4802static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
   4803{
   4804    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
   4805}
   4806
   4807static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
   4808{
   4809    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
   4810}
   4811
   4812static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
   4813{
   4814    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
   4815}
   4816
   4817static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
   4818{
   4819    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
   4820}
   4821
   4822static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
   4823{
   4824    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
   4825}
   4826
   4827static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
   4828{
   4829    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
   4830}
   4831
   4832static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
   4833{
   4834    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
   4835}
   4836
   4837static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
   4838{
   4839    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
   4840}
   4841
   4842static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
   4843{
   4844    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
   4845}
   4846
   4847static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
   4848{
   4849    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
   4850}
   4851
   4852static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
   4853{
   4854    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
   4855}
   4856
   4857static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
   4858{
   4859    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
   4860}
   4861
   4862static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
   4863{
   4864    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
   4865}
   4866
   4867static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
   4868{
   4869    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
   4870}
   4871
   4872static gen_helper_gvec_3_ptr * const frint_fns[3] = {
   4873    gen_helper_sve_frint_h,
   4874    gen_helper_sve_frint_s,
   4875    gen_helper_sve_frint_d
   4876};
   4877
   4878static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
   4879{
   4880    if (a->esz == 0) {
   4881        return false;
   4882    }
   4883    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
   4884                      frint_fns[a->esz - 1]);
   4885}
   4886
   4887static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
   4888{
   4889    static gen_helper_gvec_3_ptr * const fns[3] = {
   4890        gen_helper_sve_frintx_h,
   4891        gen_helper_sve_frintx_s,
   4892        gen_helper_sve_frintx_d
   4893    };
   4894    if (a->esz == 0) {
   4895        return false;
   4896    }
   4897    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
   4898}
   4899
   4900static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
   4901                          int mode, gen_helper_gvec_3_ptr *fn)
   4902{
   4903    if (sve_access_check(s)) {
   4904        unsigned vsz = vec_full_reg_size(s);
   4905        TCGv_i32 tmode = tcg_const_i32(mode);
   4906        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   4907
   4908        gen_helper_set_rmode(tmode, tmode, status);
   4909
   4910        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
   4911                           vec_full_reg_offset(s, a->rn),
   4912                           pred_full_reg_offset(s, a->pg),
   4913                           status, vsz, vsz, 0, fn);
   4914
   4915        gen_helper_set_rmode(tmode, tmode, status);
   4916        tcg_temp_free_i32(tmode);
   4917        tcg_temp_free_ptr(status);
   4918    }
   4919    return true;
   4920}
   4921
   4922static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
   4923{
   4924    if (a->esz == 0) {
   4925        return false;
   4926    }
   4927    return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
   4928}
   4929
   4930static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
   4931{
   4932    if (a->esz == 0) {
   4933        return false;
   4934    }
   4935    return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
   4936}
   4937
   4938static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
   4939{
   4940    if (a->esz == 0) {
   4941        return false;
   4942    }
   4943    return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
   4944}
   4945
   4946static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
   4947{
   4948    if (a->esz == 0) {
   4949        return false;
   4950    }
   4951    return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
   4952}
   4953
   4954static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
   4955{
   4956    if (a->esz == 0) {
   4957        return false;
   4958    }
   4959    return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
   4960}
   4961
   4962static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
   4963{
   4964    static gen_helper_gvec_3_ptr * const fns[3] = {
   4965        gen_helper_sve_frecpx_h,
   4966        gen_helper_sve_frecpx_s,
   4967        gen_helper_sve_frecpx_d
   4968    };
   4969    if (a->esz == 0) {
   4970        return false;
   4971    }
   4972    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
   4973}
   4974
   4975static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
   4976{
   4977    static gen_helper_gvec_3_ptr * const fns[3] = {
   4978        gen_helper_sve_fsqrt_h,
   4979        gen_helper_sve_fsqrt_s,
   4980        gen_helper_sve_fsqrt_d
   4981    };
   4982    if (a->esz == 0) {
   4983        return false;
   4984    }
   4985    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
   4986}
   4987
   4988static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
   4989{
   4990    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
   4991}
   4992
   4993static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
   4994{
   4995    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
   4996}
   4997
   4998static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
   4999{
   5000    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
   5001}
   5002
   5003static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
   5004{
   5005    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
   5006}
   5007
   5008static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
   5009{
   5010    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
   5011}
   5012
   5013static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
   5014{
   5015    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
   5016}
   5017
   5018static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
   5019{
   5020    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
   5021}
   5022
   5023static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
   5024{
   5025    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
   5026}
   5027
   5028static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
   5029{
   5030    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
   5031}
   5032
   5033static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
   5034{
   5035    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
   5036}
   5037
   5038static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
   5039{
   5040    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
   5041}
   5042
   5043static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
   5044{
   5045    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
   5046}
   5047
   5048static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
   5049{
   5050    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
   5051}
   5052
   5053static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
   5054{
   5055    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
   5056}
   5057
   5058/*
   5059 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
   5060 */
   5061
   5062/* Subroutine loading a vector register at VOFS of LEN bytes.
   5063 * The load should begin at the address Rn + IMM.
   5064 */
   5065
   5066static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
   5067{
   5068    int len_align = QEMU_ALIGN_DOWN(len, 8);
   5069    int len_remain = len % 8;
   5070    int nparts = len / 8 + ctpop8(len_remain);
   5071    int midx = get_mem_index(s);
   5072    TCGv_i64 dirty_addr, clean_addr, t0, t1;
   5073
   5074    dirty_addr = tcg_temp_new_i64();
   5075    tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
   5076    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
   5077    tcg_temp_free_i64(dirty_addr);
   5078
   5079    /*
   5080     * Note that unpredicated load/store of vector/predicate registers
   5081     * are defined as a stream of bytes, which equates to little-endian
   5082     * operations on larger quantities.
   5083     * Attempt to keep code expansion to a minimum by limiting the
   5084     * amount of unrolling done.
   5085     */
   5086    if (nparts <= 4) {
   5087        int i;
   5088
   5089        t0 = tcg_temp_new_i64();
   5090        for (i = 0; i < len_align; i += 8) {
   5091            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
   5092            tcg_gen_st_i64(t0, cpu_env, vofs + i);
   5093            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
   5094        }
   5095        tcg_temp_free_i64(t0);
   5096    } else {
   5097        TCGLabel *loop = gen_new_label();
   5098        TCGv_ptr tp, i = tcg_const_local_ptr(0);
   5099
   5100        /* Copy the clean address into a local temp, live across the loop. */
   5101        t0 = clean_addr;
   5102        clean_addr = new_tmp_a64_local(s);
   5103        tcg_gen_mov_i64(clean_addr, t0);
   5104
   5105        gen_set_label(loop);
   5106
   5107        t0 = tcg_temp_new_i64();
   5108        tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
   5109        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
   5110
   5111        tp = tcg_temp_new_ptr();
   5112        tcg_gen_add_ptr(tp, cpu_env, i);
   5113        tcg_gen_addi_ptr(i, i, 8);
   5114        tcg_gen_st_i64(t0, tp, vofs);
   5115        tcg_temp_free_ptr(tp);
   5116        tcg_temp_free_i64(t0);
   5117
   5118        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
   5119        tcg_temp_free_ptr(i);
   5120    }
   5121
   5122    /*
   5123     * Predicate register loads can be any multiple of 2.
   5124     * Note that we still store the entire 64-bit unit into cpu_env.
   5125     */
   5126    if (len_remain) {
   5127        t0 = tcg_temp_new_i64();
   5128        switch (len_remain) {
   5129        case 2:
   5130        case 4:
   5131        case 8:
   5132            tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
   5133                                MO_LE | ctz32(len_remain));
   5134            break;
   5135
   5136        case 6:
   5137            t1 = tcg_temp_new_i64();
   5138            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
   5139            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
   5140            tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
   5141            tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
   5142            tcg_temp_free_i64(t1);
   5143            break;
   5144
   5145        default:
   5146            g_assert_not_reached();
   5147        }
   5148        tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
   5149        tcg_temp_free_i64(t0);
   5150    }
   5151}
   5152
   5153/* Similarly for stores.  */
   5154static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
   5155{
   5156    int len_align = QEMU_ALIGN_DOWN(len, 8);
   5157    int len_remain = len % 8;
   5158    int nparts = len / 8 + ctpop8(len_remain);
   5159    int midx = get_mem_index(s);
   5160    TCGv_i64 dirty_addr, clean_addr, t0;
   5161
   5162    dirty_addr = tcg_temp_new_i64();
   5163    tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
   5164    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
   5165    tcg_temp_free_i64(dirty_addr);
   5166
   5167    /* Note that unpredicated load/store of vector/predicate registers
   5168     * are defined as a stream of bytes, which equates to little-endian
   5169     * operations on larger quantities.  There is no nice way to force
   5170     * a little-endian store for aarch64_be-linux-user out of line.
   5171     *
   5172     * Attempt to keep code expansion to a minimum by limiting the
   5173     * amount of unrolling done.
   5174     */
   5175    if (nparts <= 4) {
   5176        int i;
   5177
   5178        t0 = tcg_temp_new_i64();
   5179        for (i = 0; i < len_align; i += 8) {
   5180            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
   5181            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
   5182            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
   5183        }
   5184        tcg_temp_free_i64(t0);
   5185    } else {
   5186        TCGLabel *loop = gen_new_label();
   5187        TCGv_ptr tp, i = tcg_const_local_ptr(0);
   5188
   5189        /* Copy the clean address into a local temp, live across the loop. */
   5190        t0 = clean_addr;
   5191        clean_addr = new_tmp_a64_local(s);
   5192        tcg_gen_mov_i64(clean_addr, t0);
   5193
   5194        gen_set_label(loop);
   5195
   5196        t0 = tcg_temp_new_i64();
   5197        tp = tcg_temp_new_ptr();
   5198        tcg_gen_add_ptr(tp, cpu_env, i);
   5199        tcg_gen_ld_i64(t0, tp, vofs);
   5200        tcg_gen_addi_ptr(i, i, 8);
   5201        tcg_temp_free_ptr(tp);
   5202
   5203        tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
   5204        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
   5205        tcg_temp_free_i64(t0);
   5206
   5207        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
   5208        tcg_temp_free_ptr(i);
   5209    }
   5210
   5211    /* Predicate register stores can be any multiple of 2.  */
   5212    if (len_remain) {
   5213        t0 = tcg_temp_new_i64();
   5214        tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
   5215
   5216        switch (len_remain) {
   5217        case 2:
   5218        case 4:
   5219        case 8:
   5220            tcg_gen_qemu_st_i64(t0, clean_addr, midx,
   5221                                MO_LE | ctz32(len_remain));
   5222            break;
   5223
   5224        case 6:
   5225            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
   5226            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
   5227            tcg_gen_shri_i64(t0, t0, 32);
   5228            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
   5229            break;
   5230
   5231        default:
   5232            g_assert_not_reached();
   5233        }
   5234        tcg_temp_free_i64(t0);
   5235    }
   5236}
   5237
   5238static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
   5239{
   5240    if (sve_access_check(s)) {
   5241        int size = vec_full_reg_size(s);
   5242        int off = vec_full_reg_offset(s, a->rd);
   5243        do_ldr(s, off, size, a->rn, a->imm * size);
   5244    }
   5245    return true;
   5246}
   5247
   5248static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
   5249{
   5250    if (sve_access_check(s)) {
   5251        int size = pred_full_reg_size(s);
   5252        int off = pred_full_reg_offset(s, a->rd);
   5253        do_ldr(s, off, size, a->rn, a->imm * size);
   5254    }
   5255    return true;
   5256}
   5257
   5258static bool trans_STR_zri(DisasContext *s, arg_rri *a)
   5259{
   5260    if (sve_access_check(s)) {
   5261        int size = vec_full_reg_size(s);
   5262        int off = vec_full_reg_offset(s, a->rd);
   5263        do_str(s, off, size, a->rn, a->imm * size);
   5264    }
   5265    return true;
   5266}
   5267
   5268static bool trans_STR_pri(DisasContext *s, arg_rri *a)
   5269{
   5270    if (sve_access_check(s)) {
   5271        int size = pred_full_reg_size(s);
   5272        int off = pred_full_reg_offset(s, a->rd);
   5273        do_str(s, off, size, a->rn, a->imm * size);
   5274    }
   5275    return true;
   5276}
   5277
   5278/*
   5279 *** SVE Memory - Contiguous Load Group
   5280 */
   5281
   5282/* The memory mode of the dtype.  */
   5283static const MemOp dtype_mop[16] = {
   5284    MO_UB, MO_UB, MO_UB, MO_UB,
   5285    MO_SL, MO_UW, MO_UW, MO_UW,
   5286    MO_SW, MO_SW, MO_UL, MO_UL,
   5287    MO_SB, MO_SB, MO_SB, MO_Q
   5288};
   5289
   5290#define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
   5291
   5292/* The vector element size of dtype.  */
   5293static const uint8_t dtype_esz[16] = {
   5294    0, 1, 2, 3,
   5295    3, 1, 2, 3,
   5296    3, 2, 2, 3,
   5297    3, 2, 1, 3
   5298};
   5299
   5300static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
   5301                       int dtype, uint32_t mte_n, bool is_write,
   5302                       gen_helper_gvec_mem *fn)
   5303{
   5304    unsigned vsz = vec_full_reg_size(s);
   5305    TCGv_ptr t_pg;
   5306    TCGv_i32 t_desc;
   5307    int desc = 0;
   5308
   5309    /*
   5310     * For e.g. LD4, there are not enough arguments to pass all 4
   5311     * registers as pointers, so encode the regno into the data field.
   5312     * For consistency, do this even for LD1.
   5313     */
   5314    if (s->mte_active[0]) {
   5315        int msz = dtype_msz(dtype);
   5316
   5317        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
   5318        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
   5319        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
   5320        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
   5321        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
   5322        desc <<= SVE_MTEDESC_SHIFT;
   5323    } else {
   5324        addr = clean_data_tbi(s, addr);
   5325    }
   5326
   5327    desc = simd_desc(vsz, vsz, zt | desc);
   5328    t_desc = tcg_const_i32(desc);
   5329    t_pg = tcg_temp_new_ptr();
   5330
   5331    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
   5332    fn(cpu_env, t_pg, addr, t_desc);
   5333
   5334    tcg_temp_free_ptr(t_pg);
   5335    tcg_temp_free_i32(t_desc);
   5336}
   5337
   5338/* Indexed by [mte][be][dtype][nreg] */
   5339static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
   5340    { /* mte inactive, little-endian */
   5341      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
   5342          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
   5343        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
   5344        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
   5345        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
   5346
   5347        { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
   5348        { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
   5349          gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
   5350        { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
   5351        { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
   5352
   5353        { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
   5354        { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
   5355        { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
   5356          gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
   5357        { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
   5358
   5359        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
   5360        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
   5361        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
   5362        { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
   5363          gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
   5364
   5365      /* mte inactive, big-endian */
   5366      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
   5367          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
   5368        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
   5369        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
   5370        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
   5371
   5372        { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
   5373        { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
   5374          gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
   5375        { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
   5376        { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
   5377
   5378        { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
   5379        { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
   5380        { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
   5381          gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
   5382        { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
   5383
   5384        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
   5385        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
   5386        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
   5387        { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
   5388          gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
   5389
   5390    { /* mte active, little-endian */
   5391      { { gen_helper_sve_ld1bb_r_mte,
   5392          gen_helper_sve_ld2bb_r_mte,
   5393          gen_helper_sve_ld3bb_r_mte,
   5394          gen_helper_sve_ld4bb_r_mte },
   5395        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
   5396        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
   5397        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
   5398
   5399        { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
   5400        { gen_helper_sve_ld1hh_le_r_mte,
   5401          gen_helper_sve_ld2hh_le_r_mte,
   5402          gen_helper_sve_ld3hh_le_r_mte,
   5403          gen_helper_sve_ld4hh_le_r_mte },
   5404        { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
   5405        { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
   5406
   5407        { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
   5408        { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
   5409        { gen_helper_sve_ld1ss_le_r_mte,
   5410          gen_helper_sve_ld2ss_le_r_mte,
   5411          gen_helper_sve_ld3ss_le_r_mte,
   5412          gen_helper_sve_ld4ss_le_r_mte },
   5413        { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
   5414
   5415        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
   5416        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
   5417        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
   5418        { gen_helper_sve_ld1dd_le_r_mte,
   5419          gen_helper_sve_ld2dd_le_r_mte,
   5420          gen_helper_sve_ld3dd_le_r_mte,
   5421          gen_helper_sve_ld4dd_le_r_mte } },
   5422
   5423      /* mte active, big-endian */
   5424      { { gen_helper_sve_ld1bb_r_mte,
   5425          gen_helper_sve_ld2bb_r_mte,
   5426          gen_helper_sve_ld3bb_r_mte,
   5427          gen_helper_sve_ld4bb_r_mte },
   5428        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
   5429        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
   5430        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
   5431
   5432        { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
   5433        { gen_helper_sve_ld1hh_be_r_mte,
   5434          gen_helper_sve_ld2hh_be_r_mte,
   5435          gen_helper_sve_ld3hh_be_r_mte,
   5436          gen_helper_sve_ld4hh_be_r_mte },
   5437        { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
   5438        { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
   5439
   5440        { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
   5441        { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
   5442        { gen_helper_sve_ld1ss_be_r_mte,
   5443          gen_helper_sve_ld2ss_be_r_mte,
   5444          gen_helper_sve_ld3ss_be_r_mte,
   5445          gen_helper_sve_ld4ss_be_r_mte },
   5446        { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
   5447
   5448        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
   5449        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
   5450        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
   5451        { gen_helper_sve_ld1dd_be_r_mte,
   5452          gen_helper_sve_ld2dd_be_r_mte,
   5453          gen_helper_sve_ld3dd_be_r_mte,
   5454          gen_helper_sve_ld4dd_be_r_mte } } },
   5455};
   5456
   5457static void do_ld_zpa(DisasContext *s, int zt, int pg,
   5458                      TCGv_i64 addr, int dtype, int nreg)
   5459{
   5460    gen_helper_gvec_mem *fn
   5461        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
   5462
   5463    /*
   5464     * While there are holes in the table, they are not
   5465     * accessible via the instruction encoding.
   5466     */
   5467    assert(fn != NULL);
   5468    do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
   5469}
   5470
   5471static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
   5472{
   5473    if (a->rm == 31) {
   5474        return false;
   5475    }
   5476    if (sve_access_check(s)) {
   5477        TCGv_i64 addr = new_tmp_a64(s);
   5478        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
   5479        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
   5480        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
   5481    }
   5482    return true;
   5483}
   5484
   5485static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
   5486{
   5487    if (sve_access_check(s)) {
   5488        int vsz = vec_full_reg_size(s);
   5489        int elements = vsz >> dtype_esz[a->dtype];
   5490        TCGv_i64 addr = new_tmp_a64(s);
   5491
   5492        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
   5493                         (a->imm * elements * (a->nreg + 1))
   5494                         << dtype_msz(a->dtype));
   5495        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
   5496    }
   5497    return true;
   5498}
   5499
   5500static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
   5501{
   5502    static gen_helper_gvec_mem * const fns[2][2][16] = {
   5503        { /* mte inactive, little-endian */
   5504          { gen_helper_sve_ldff1bb_r,
   5505            gen_helper_sve_ldff1bhu_r,
   5506            gen_helper_sve_ldff1bsu_r,
   5507            gen_helper_sve_ldff1bdu_r,
   5508
   5509            gen_helper_sve_ldff1sds_le_r,
   5510            gen_helper_sve_ldff1hh_le_r,
   5511            gen_helper_sve_ldff1hsu_le_r,
   5512            gen_helper_sve_ldff1hdu_le_r,
   5513
   5514            gen_helper_sve_ldff1hds_le_r,
   5515            gen_helper_sve_ldff1hss_le_r,
   5516            gen_helper_sve_ldff1ss_le_r,
   5517            gen_helper_sve_ldff1sdu_le_r,
   5518
   5519            gen_helper_sve_ldff1bds_r,
   5520            gen_helper_sve_ldff1bss_r,
   5521            gen_helper_sve_ldff1bhs_r,
   5522            gen_helper_sve_ldff1dd_le_r },
   5523
   5524          /* mte inactive, big-endian */
   5525          { gen_helper_sve_ldff1bb_r,
   5526            gen_helper_sve_ldff1bhu_r,
   5527            gen_helper_sve_ldff1bsu_r,
   5528            gen_helper_sve_ldff1bdu_r,
   5529
   5530            gen_helper_sve_ldff1sds_be_r,
   5531            gen_helper_sve_ldff1hh_be_r,
   5532            gen_helper_sve_ldff1hsu_be_r,
   5533            gen_helper_sve_ldff1hdu_be_r,
   5534
   5535            gen_helper_sve_ldff1hds_be_r,
   5536            gen_helper_sve_ldff1hss_be_r,
   5537            gen_helper_sve_ldff1ss_be_r,
   5538            gen_helper_sve_ldff1sdu_be_r,
   5539
   5540            gen_helper_sve_ldff1bds_r,
   5541            gen_helper_sve_ldff1bss_r,
   5542            gen_helper_sve_ldff1bhs_r,
   5543            gen_helper_sve_ldff1dd_be_r } },
   5544
   5545        { /* mte active, little-endian */
   5546          { gen_helper_sve_ldff1bb_r_mte,
   5547            gen_helper_sve_ldff1bhu_r_mte,
   5548            gen_helper_sve_ldff1bsu_r_mte,
   5549            gen_helper_sve_ldff1bdu_r_mte,
   5550
   5551            gen_helper_sve_ldff1sds_le_r_mte,
   5552            gen_helper_sve_ldff1hh_le_r_mte,
   5553            gen_helper_sve_ldff1hsu_le_r_mte,
   5554            gen_helper_sve_ldff1hdu_le_r_mte,
   5555
   5556            gen_helper_sve_ldff1hds_le_r_mte,
   5557            gen_helper_sve_ldff1hss_le_r_mte,
   5558            gen_helper_sve_ldff1ss_le_r_mte,
   5559            gen_helper_sve_ldff1sdu_le_r_mte,
   5560
   5561            gen_helper_sve_ldff1bds_r_mte,
   5562            gen_helper_sve_ldff1bss_r_mte,
   5563            gen_helper_sve_ldff1bhs_r_mte,
   5564            gen_helper_sve_ldff1dd_le_r_mte },
   5565
   5566          /* mte active, big-endian */
   5567          { gen_helper_sve_ldff1bb_r_mte,
   5568            gen_helper_sve_ldff1bhu_r_mte,
   5569            gen_helper_sve_ldff1bsu_r_mte,
   5570            gen_helper_sve_ldff1bdu_r_mte,
   5571
   5572            gen_helper_sve_ldff1sds_be_r_mte,
   5573            gen_helper_sve_ldff1hh_be_r_mte,
   5574            gen_helper_sve_ldff1hsu_be_r_mte,
   5575            gen_helper_sve_ldff1hdu_be_r_mte,
   5576
   5577            gen_helper_sve_ldff1hds_be_r_mte,
   5578            gen_helper_sve_ldff1hss_be_r_mte,
   5579            gen_helper_sve_ldff1ss_be_r_mte,
   5580            gen_helper_sve_ldff1sdu_be_r_mte,
   5581
   5582            gen_helper_sve_ldff1bds_r_mte,
   5583            gen_helper_sve_ldff1bss_r_mte,
   5584            gen_helper_sve_ldff1bhs_r_mte,
   5585            gen_helper_sve_ldff1dd_be_r_mte } },
   5586    };
   5587
   5588    if (sve_access_check(s)) {
   5589        TCGv_i64 addr = new_tmp_a64(s);
   5590        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
   5591        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
   5592        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
   5593                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
   5594    }
   5595    return true;
   5596}
   5597
   5598static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
   5599{
   5600    static gen_helper_gvec_mem * const fns[2][2][16] = {
   5601        { /* mte inactive, little-endian */
   5602          { gen_helper_sve_ldnf1bb_r,
   5603            gen_helper_sve_ldnf1bhu_r,
   5604            gen_helper_sve_ldnf1bsu_r,
   5605            gen_helper_sve_ldnf1bdu_r,
   5606
   5607            gen_helper_sve_ldnf1sds_le_r,
   5608            gen_helper_sve_ldnf1hh_le_r,
   5609            gen_helper_sve_ldnf1hsu_le_r,
   5610            gen_helper_sve_ldnf1hdu_le_r,
   5611
   5612            gen_helper_sve_ldnf1hds_le_r,
   5613            gen_helper_sve_ldnf1hss_le_r,
   5614            gen_helper_sve_ldnf1ss_le_r,
   5615            gen_helper_sve_ldnf1sdu_le_r,
   5616
   5617            gen_helper_sve_ldnf1bds_r,
   5618            gen_helper_sve_ldnf1bss_r,
   5619            gen_helper_sve_ldnf1bhs_r,
   5620            gen_helper_sve_ldnf1dd_le_r },
   5621
   5622          /* mte inactive, big-endian */
   5623          { gen_helper_sve_ldnf1bb_r,
   5624            gen_helper_sve_ldnf1bhu_r,
   5625            gen_helper_sve_ldnf1bsu_r,
   5626            gen_helper_sve_ldnf1bdu_r,
   5627
   5628            gen_helper_sve_ldnf1sds_be_r,
   5629            gen_helper_sve_ldnf1hh_be_r,
   5630            gen_helper_sve_ldnf1hsu_be_r,
   5631            gen_helper_sve_ldnf1hdu_be_r,
   5632
   5633            gen_helper_sve_ldnf1hds_be_r,
   5634            gen_helper_sve_ldnf1hss_be_r,
   5635            gen_helper_sve_ldnf1ss_be_r,
   5636            gen_helper_sve_ldnf1sdu_be_r,
   5637
   5638            gen_helper_sve_ldnf1bds_r,
   5639            gen_helper_sve_ldnf1bss_r,
   5640            gen_helper_sve_ldnf1bhs_r,
   5641            gen_helper_sve_ldnf1dd_be_r } },
   5642
   5643        { /* mte inactive, little-endian */
   5644          { gen_helper_sve_ldnf1bb_r_mte,
   5645            gen_helper_sve_ldnf1bhu_r_mte,
   5646            gen_helper_sve_ldnf1bsu_r_mte,
   5647            gen_helper_sve_ldnf1bdu_r_mte,
   5648
   5649            gen_helper_sve_ldnf1sds_le_r_mte,
   5650            gen_helper_sve_ldnf1hh_le_r_mte,
   5651            gen_helper_sve_ldnf1hsu_le_r_mte,
   5652            gen_helper_sve_ldnf1hdu_le_r_mte,
   5653
   5654            gen_helper_sve_ldnf1hds_le_r_mte,
   5655            gen_helper_sve_ldnf1hss_le_r_mte,
   5656            gen_helper_sve_ldnf1ss_le_r_mte,
   5657            gen_helper_sve_ldnf1sdu_le_r_mte,
   5658
   5659            gen_helper_sve_ldnf1bds_r_mte,
   5660            gen_helper_sve_ldnf1bss_r_mte,
   5661            gen_helper_sve_ldnf1bhs_r_mte,
   5662            gen_helper_sve_ldnf1dd_le_r_mte },
   5663
   5664          /* mte inactive, big-endian */
   5665          { gen_helper_sve_ldnf1bb_r_mte,
   5666            gen_helper_sve_ldnf1bhu_r_mte,
   5667            gen_helper_sve_ldnf1bsu_r_mte,
   5668            gen_helper_sve_ldnf1bdu_r_mte,
   5669
   5670            gen_helper_sve_ldnf1sds_be_r_mte,
   5671            gen_helper_sve_ldnf1hh_be_r_mte,
   5672            gen_helper_sve_ldnf1hsu_be_r_mte,
   5673            gen_helper_sve_ldnf1hdu_be_r_mte,
   5674
   5675            gen_helper_sve_ldnf1hds_be_r_mte,
   5676            gen_helper_sve_ldnf1hss_be_r_mte,
   5677            gen_helper_sve_ldnf1ss_be_r_mte,
   5678            gen_helper_sve_ldnf1sdu_be_r_mte,
   5679
   5680            gen_helper_sve_ldnf1bds_r_mte,
   5681            gen_helper_sve_ldnf1bss_r_mte,
   5682            gen_helper_sve_ldnf1bhs_r_mte,
   5683            gen_helper_sve_ldnf1dd_be_r_mte } },
   5684    };
   5685
   5686    if (sve_access_check(s)) {
   5687        int vsz = vec_full_reg_size(s);
   5688        int elements = vsz >> dtype_esz[a->dtype];
   5689        int off = (a->imm * elements) << dtype_msz(a->dtype);
   5690        TCGv_i64 addr = new_tmp_a64(s);
   5691
   5692        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
   5693        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
   5694                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
   5695    }
   5696    return true;
   5697}
   5698
   5699static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
   5700{
   5701    unsigned vsz = vec_full_reg_size(s);
   5702    TCGv_ptr t_pg;
   5703    int poff;
   5704
   5705    /* Load the first quadword using the normal predicated load helpers.  */
   5706    poff = pred_full_reg_offset(s, pg);
   5707    if (vsz > 16) {
   5708        /*
   5709         * Zero-extend the first 16 bits of the predicate into a temporary.
   5710         * This avoids triggering an assert making sure we don't have bits
   5711         * set within a predicate beyond VQ, but we have lowered VQ to 1
   5712         * for this load operation.
   5713         */
   5714        TCGv_i64 tmp = tcg_temp_new_i64();
   5715#ifdef HOST_WORDS_BIGENDIAN
   5716        poff += 6;
   5717#endif
   5718        tcg_gen_ld16u_i64(tmp, cpu_env, poff);
   5719
   5720        poff = offsetof(CPUARMState, vfp.preg_tmp);
   5721        tcg_gen_st_i64(tmp, cpu_env, poff);
   5722        tcg_temp_free_i64(tmp);
   5723    }
   5724
   5725    t_pg = tcg_temp_new_ptr();
   5726    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
   5727
   5728    gen_helper_gvec_mem *fn
   5729        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
   5730    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
   5731
   5732    tcg_temp_free_ptr(t_pg);
   5733
   5734    /* Replicate that first quadword.  */
   5735    if (vsz > 16) {
   5736        int doff = vec_full_reg_offset(s, zt);
   5737        tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
   5738    }
   5739}
   5740
   5741static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
   5742{
   5743    if (a->rm == 31) {
   5744        return false;
   5745    }
   5746    if (sve_access_check(s)) {
   5747        int msz = dtype_msz(a->dtype);
   5748        TCGv_i64 addr = new_tmp_a64(s);
   5749        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
   5750        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
   5751        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
   5752    }
   5753    return true;
   5754}
   5755
   5756static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
   5757{
   5758    if (sve_access_check(s)) {
   5759        TCGv_i64 addr = new_tmp_a64(s);
   5760        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
   5761        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
   5762    }
   5763    return true;
   5764}
   5765
   5766static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
   5767{
   5768    unsigned vsz = vec_full_reg_size(s);
   5769    unsigned vsz_r32;
   5770    TCGv_ptr t_pg;
   5771    int poff, doff;
   5772
   5773    if (vsz < 32) {
   5774        /*
   5775         * Note that this UNDEFINED check comes after CheckSVEEnabled()
   5776         * in the ARM pseudocode, which is the sve_access_check() done
   5777         * in our caller.  We should not now return false from the caller.
   5778         */
   5779        unallocated_encoding(s);
   5780        return;
   5781    }
   5782
   5783    /* Load the first octaword using the normal predicated load helpers.  */
   5784
   5785    poff = pred_full_reg_offset(s, pg);
   5786    if (vsz > 32) {
   5787        /*
   5788         * Zero-extend the first 32 bits of the predicate into a temporary.
   5789         * This avoids triggering an assert making sure we don't have bits
   5790         * set within a predicate beyond VQ, but we have lowered VQ to 2
   5791         * for this load operation.
   5792         */
   5793        TCGv_i64 tmp = tcg_temp_new_i64();
   5794#ifdef HOST_WORDS_BIGENDIAN
   5795        poff += 4;
   5796#endif
   5797        tcg_gen_ld32u_i64(tmp, cpu_env, poff);
   5798
   5799        poff = offsetof(CPUARMState, vfp.preg_tmp);
   5800        tcg_gen_st_i64(tmp, cpu_env, poff);
   5801        tcg_temp_free_i64(tmp);
   5802    }
   5803
   5804    t_pg = tcg_temp_new_ptr();
   5805    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
   5806
   5807    gen_helper_gvec_mem *fn
   5808        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
   5809    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
   5810
   5811    tcg_temp_free_ptr(t_pg);
   5812
   5813    /*
   5814     * Replicate that first octaword.
   5815     * The replication happens in units of 32; if the full vector size
   5816     * is not a multiple of 32, the final bits are zeroed.
   5817     */
   5818    doff = vec_full_reg_offset(s, zt);
   5819    vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
   5820    if (vsz >= 64) {
   5821        tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
   5822    }
   5823    vsz -= vsz_r32;
   5824    if (vsz) {
   5825        tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
   5826    }
   5827}
   5828
   5829static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
   5830{
   5831    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   5832        return false;
   5833    }
   5834    if (a->rm == 31) {
   5835        return false;
   5836    }
   5837    if (sve_access_check(s)) {
   5838        TCGv_i64 addr = new_tmp_a64(s);
   5839        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
   5840        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
   5841        do_ldro(s, a->rd, a->pg, addr, a->dtype);
   5842    }
   5843    return true;
   5844}
   5845
   5846static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
   5847{
   5848    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   5849        return false;
   5850    }
   5851    if (sve_access_check(s)) {
   5852        TCGv_i64 addr = new_tmp_a64(s);
   5853        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
   5854        do_ldro(s, a->rd, a->pg, addr, a->dtype);
   5855    }
   5856    return true;
   5857}
   5858
   5859/* Load and broadcast element.  */
   5860static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
   5861{
   5862    unsigned vsz = vec_full_reg_size(s);
   5863    unsigned psz = pred_full_reg_size(s);
   5864    unsigned esz = dtype_esz[a->dtype];
   5865    unsigned msz = dtype_msz(a->dtype);
   5866    TCGLabel *over;
   5867    TCGv_i64 temp, clean_addr;
   5868
   5869    if (!sve_access_check(s)) {
   5870        return true;
   5871    }
   5872
   5873    over = gen_new_label();
   5874
   5875    /* If the guarding predicate has no bits set, no load occurs.  */
   5876    if (psz <= 8) {
   5877        /* Reduce the pred_esz_masks value simply to reduce the
   5878         * size of the code generated here.
   5879         */
   5880        uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
   5881        temp = tcg_temp_new_i64();
   5882        tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
   5883        tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
   5884        tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
   5885        tcg_temp_free_i64(temp);
   5886    } else {
   5887        TCGv_i32 t32 = tcg_temp_new_i32();
   5888        find_last_active(s, t32, esz, a->pg);
   5889        tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
   5890        tcg_temp_free_i32(t32);
   5891    }
   5892
   5893    /* Load the data.  */
   5894    temp = tcg_temp_new_i64();
   5895    tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
   5896    clean_addr = gen_mte_check1(s, temp, false, true, msz);
   5897
   5898    tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
   5899                        finalize_memop(s, dtype_mop[a->dtype]));
   5900
   5901    /* Broadcast to *all* elements.  */
   5902    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
   5903                         vsz, vsz, temp);
   5904    tcg_temp_free_i64(temp);
   5905
   5906    /* Zero the inactive elements.  */
   5907    gen_set_label(over);
   5908    return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
   5909}
   5910
   5911static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
   5912                      int msz, int esz, int nreg)
   5913{
   5914    static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
   5915        { { { gen_helper_sve_st1bb_r,
   5916              gen_helper_sve_st1bh_r,
   5917              gen_helper_sve_st1bs_r,
   5918              gen_helper_sve_st1bd_r },
   5919            { NULL,
   5920              gen_helper_sve_st1hh_le_r,
   5921              gen_helper_sve_st1hs_le_r,
   5922              gen_helper_sve_st1hd_le_r },
   5923            { NULL, NULL,
   5924              gen_helper_sve_st1ss_le_r,
   5925              gen_helper_sve_st1sd_le_r },
   5926            { NULL, NULL, NULL,
   5927              gen_helper_sve_st1dd_le_r } },
   5928          { { gen_helper_sve_st1bb_r,
   5929              gen_helper_sve_st1bh_r,
   5930              gen_helper_sve_st1bs_r,
   5931              gen_helper_sve_st1bd_r },
   5932            { NULL,
   5933              gen_helper_sve_st1hh_be_r,
   5934              gen_helper_sve_st1hs_be_r,
   5935              gen_helper_sve_st1hd_be_r },
   5936            { NULL, NULL,
   5937              gen_helper_sve_st1ss_be_r,
   5938              gen_helper_sve_st1sd_be_r },
   5939            { NULL, NULL, NULL,
   5940              gen_helper_sve_st1dd_be_r } } },
   5941
   5942        { { { gen_helper_sve_st1bb_r_mte,
   5943              gen_helper_sve_st1bh_r_mte,
   5944              gen_helper_sve_st1bs_r_mte,
   5945              gen_helper_sve_st1bd_r_mte },
   5946            { NULL,
   5947              gen_helper_sve_st1hh_le_r_mte,
   5948              gen_helper_sve_st1hs_le_r_mte,
   5949              gen_helper_sve_st1hd_le_r_mte },
   5950            { NULL, NULL,
   5951              gen_helper_sve_st1ss_le_r_mte,
   5952              gen_helper_sve_st1sd_le_r_mte },
   5953            { NULL, NULL, NULL,
   5954              gen_helper_sve_st1dd_le_r_mte } },
   5955          { { gen_helper_sve_st1bb_r_mte,
   5956              gen_helper_sve_st1bh_r_mte,
   5957              gen_helper_sve_st1bs_r_mte,
   5958              gen_helper_sve_st1bd_r_mte },
   5959            { NULL,
   5960              gen_helper_sve_st1hh_be_r_mte,
   5961              gen_helper_sve_st1hs_be_r_mte,
   5962              gen_helper_sve_st1hd_be_r_mte },
   5963            { NULL, NULL,
   5964              gen_helper_sve_st1ss_be_r_mte,
   5965              gen_helper_sve_st1sd_be_r_mte },
   5966            { NULL, NULL, NULL,
   5967              gen_helper_sve_st1dd_be_r_mte } } },
   5968    };
   5969    static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
   5970        { { { gen_helper_sve_st2bb_r,
   5971              gen_helper_sve_st2hh_le_r,
   5972              gen_helper_sve_st2ss_le_r,
   5973              gen_helper_sve_st2dd_le_r },
   5974            { gen_helper_sve_st3bb_r,
   5975              gen_helper_sve_st3hh_le_r,
   5976              gen_helper_sve_st3ss_le_r,
   5977              gen_helper_sve_st3dd_le_r },
   5978            { gen_helper_sve_st4bb_r,
   5979              gen_helper_sve_st4hh_le_r,
   5980              gen_helper_sve_st4ss_le_r,
   5981              gen_helper_sve_st4dd_le_r } },
   5982          { { gen_helper_sve_st2bb_r,
   5983              gen_helper_sve_st2hh_be_r,
   5984              gen_helper_sve_st2ss_be_r,
   5985              gen_helper_sve_st2dd_be_r },
   5986            { gen_helper_sve_st3bb_r,
   5987              gen_helper_sve_st3hh_be_r,
   5988              gen_helper_sve_st3ss_be_r,
   5989              gen_helper_sve_st3dd_be_r },
   5990            { gen_helper_sve_st4bb_r,
   5991              gen_helper_sve_st4hh_be_r,
   5992              gen_helper_sve_st4ss_be_r,
   5993              gen_helper_sve_st4dd_be_r } } },
   5994        { { { gen_helper_sve_st2bb_r_mte,
   5995              gen_helper_sve_st2hh_le_r_mte,
   5996              gen_helper_sve_st2ss_le_r_mte,
   5997              gen_helper_sve_st2dd_le_r_mte },
   5998            { gen_helper_sve_st3bb_r_mte,
   5999              gen_helper_sve_st3hh_le_r_mte,
   6000              gen_helper_sve_st3ss_le_r_mte,
   6001              gen_helper_sve_st3dd_le_r_mte },
   6002            { gen_helper_sve_st4bb_r_mte,
   6003              gen_helper_sve_st4hh_le_r_mte,
   6004              gen_helper_sve_st4ss_le_r_mte,
   6005              gen_helper_sve_st4dd_le_r_mte } },
   6006          { { gen_helper_sve_st2bb_r_mte,
   6007              gen_helper_sve_st2hh_be_r_mte,
   6008              gen_helper_sve_st2ss_be_r_mte,
   6009              gen_helper_sve_st2dd_be_r_mte },
   6010            { gen_helper_sve_st3bb_r_mte,
   6011              gen_helper_sve_st3hh_be_r_mte,
   6012              gen_helper_sve_st3ss_be_r_mte,
   6013              gen_helper_sve_st3dd_be_r_mte },
   6014            { gen_helper_sve_st4bb_r_mte,
   6015              gen_helper_sve_st4hh_be_r_mte,
   6016              gen_helper_sve_st4ss_be_r_mte,
   6017              gen_helper_sve_st4dd_be_r_mte } } },
   6018    };
   6019    gen_helper_gvec_mem *fn;
   6020    int be = s->be_data == MO_BE;
   6021
   6022    if (nreg == 0) {
   6023        /* ST1 */
   6024        fn = fn_single[s->mte_active[0]][be][msz][esz];
   6025        nreg = 1;
   6026    } else {
   6027        /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
   6028        assert(msz == esz);
   6029        fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
   6030    }
   6031    assert(fn != NULL);
   6032    do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
   6033}
   6034
   6035static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
   6036{
   6037    if (a->rm == 31 || a->msz > a->esz) {
   6038        return false;
   6039    }
   6040    if (sve_access_check(s)) {
   6041        TCGv_i64 addr = new_tmp_a64(s);
   6042        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
   6043        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
   6044        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
   6045    }
   6046    return true;
   6047}
   6048
   6049static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
   6050{
   6051    if (a->msz > a->esz) {
   6052        return false;
   6053    }
   6054    if (sve_access_check(s)) {
   6055        int vsz = vec_full_reg_size(s);
   6056        int elements = vsz >> a->esz;
   6057        TCGv_i64 addr = new_tmp_a64(s);
   6058
   6059        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
   6060                         (a->imm * elements * (a->nreg + 1)) << a->msz);
   6061        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
   6062    }
   6063    return true;
   6064}
   6065
   6066/*
   6067 *** SVE gather loads / scatter stores
   6068 */
   6069
   6070static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
   6071                       int scale, TCGv_i64 scalar, int msz, bool is_write,
   6072                       gen_helper_gvec_mem_scatter *fn)
   6073{
   6074    unsigned vsz = vec_full_reg_size(s);
   6075    TCGv_ptr t_zm = tcg_temp_new_ptr();
   6076    TCGv_ptr t_pg = tcg_temp_new_ptr();
   6077    TCGv_ptr t_zt = tcg_temp_new_ptr();
   6078    TCGv_i32 t_desc;
   6079    int desc = 0;
   6080
   6081    if (s->mte_active[0]) {
   6082        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
   6083        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
   6084        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
   6085        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
   6086        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
   6087        desc <<= SVE_MTEDESC_SHIFT;
   6088    }
   6089    desc = simd_desc(vsz, vsz, desc | scale);
   6090    t_desc = tcg_const_i32(desc);
   6091
   6092    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
   6093    tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
   6094    tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
   6095    fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
   6096
   6097    tcg_temp_free_ptr(t_zt);
   6098    tcg_temp_free_ptr(t_zm);
   6099    tcg_temp_free_ptr(t_pg);
   6100    tcg_temp_free_i32(t_desc);
   6101}
   6102
   6103/* Indexed by [mte][be][ff][xs][u][msz].  */
   6104static gen_helper_gvec_mem_scatter * const
   6105gather_load_fn32[2][2][2][2][2][3] = {
   6106    { /* MTE Inactive */
   6107        { /* Little-endian */
   6108            { { { gen_helper_sve_ldbss_zsu,
   6109                  gen_helper_sve_ldhss_le_zsu,
   6110                  NULL, },
   6111                { gen_helper_sve_ldbsu_zsu,
   6112                  gen_helper_sve_ldhsu_le_zsu,
   6113                  gen_helper_sve_ldss_le_zsu, } },
   6114              { { gen_helper_sve_ldbss_zss,
   6115                  gen_helper_sve_ldhss_le_zss,
   6116                  NULL, },
   6117                { gen_helper_sve_ldbsu_zss,
   6118                  gen_helper_sve_ldhsu_le_zss,
   6119                  gen_helper_sve_ldss_le_zss, } } },
   6120
   6121            /* First-fault */
   6122            { { { gen_helper_sve_ldffbss_zsu,
   6123                  gen_helper_sve_ldffhss_le_zsu,
   6124                  NULL, },
   6125                { gen_helper_sve_ldffbsu_zsu,
   6126                  gen_helper_sve_ldffhsu_le_zsu,
   6127                  gen_helper_sve_ldffss_le_zsu, } },
   6128              { { gen_helper_sve_ldffbss_zss,
   6129                  gen_helper_sve_ldffhss_le_zss,
   6130                  NULL, },
   6131                { gen_helper_sve_ldffbsu_zss,
   6132                  gen_helper_sve_ldffhsu_le_zss,
   6133                  gen_helper_sve_ldffss_le_zss, } } } },
   6134
   6135        { /* Big-endian */
   6136            { { { gen_helper_sve_ldbss_zsu,
   6137                  gen_helper_sve_ldhss_be_zsu,
   6138                  NULL, },
   6139                { gen_helper_sve_ldbsu_zsu,
   6140                  gen_helper_sve_ldhsu_be_zsu,
   6141                  gen_helper_sve_ldss_be_zsu, } },
   6142              { { gen_helper_sve_ldbss_zss,
   6143                  gen_helper_sve_ldhss_be_zss,
   6144                  NULL, },
   6145                { gen_helper_sve_ldbsu_zss,
   6146                  gen_helper_sve_ldhsu_be_zss,
   6147                  gen_helper_sve_ldss_be_zss, } } },
   6148
   6149            /* First-fault */
   6150            { { { gen_helper_sve_ldffbss_zsu,
   6151                  gen_helper_sve_ldffhss_be_zsu,
   6152                  NULL, },
   6153                { gen_helper_sve_ldffbsu_zsu,
   6154                  gen_helper_sve_ldffhsu_be_zsu,
   6155                  gen_helper_sve_ldffss_be_zsu, } },
   6156              { { gen_helper_sve_ldffbss_zss,
   6157                  gen_helper_sve_ldffhss_be_zss,
   6158                  NULL, },
   6159                { gen_helper_sve_ldffbsu_zss,
   6160                  gen_helper_sve_ldffhsu_be_zss,
   6161                  gen_helper_sve_ldffss_be_zss, } } } } },
   6162    { /* MTE Active */
   6163        { /* Little-endian */
   6164            { { { gen_helper_sve_ldbss_zsu_mte,
   6165                  gen_helper_sve_ldhss_le_zsu_mte,
   6166                  NULL, },
   6167                { gen_helper_sve_ldbsu_zsu_mte,
   6168                  gen_helper_sve_ldhsu_le_zsu_mte,
   6169                  gen_helper_sve_ldss_le_zsu_mte, } },
   6170              { { gen_helper_sve_ldbss_zss_mte,
   6171                  gen_helper_sve_ldhss_le_zss_mte,
   6172                  NULL, },
   6173                { gen_helper_sve_ldbsu_zss_mte,
   6174                  gen_helper_sve_ldhsu_le_zss_mte,
   6175                  gen_helper_sve_ldss_le_zss_mte, } } },
   6176
   6177            /* First-fault */
   6178            { { { gen_helper_sve_ldffbss_zsu_mte,
   6179                  gen_helper_sve_ldffhss_le_zsu_mte,
   6180                  NULL, },
   6181                { gen_helper_sve_ldffbsu_zsu_mte,
   6182                  gen_helper_sve_ldffhsu_le_zsu_mte,
   6183                  gen_helper_sve_ldffss_le_zsu_mte, } },
   6184              { { gen_helper_sve_ldffbss_zss_mte,
   6185                  gen_helper_sve_ldffhss_le_zss_mte,
   6186                  NULL, },
   6187                { gen_helper_sve_ldffbsu_zss_mte,
   6188                  gen_helper_sve_ldffhsu_le_zss_mte,
   6189                  gen_helper_sve_ldffss_le_zss_mte, } } } },
   6190
   6191        { /* Big-endian */
   6192            { { { gen_helper_sve_ldbss_zsu_mte,
   6193                  gen_helper_sve_ldhss_be_zsu_mte,
   6194                  NULL, },
   6195                { gen_helper_sve_ldbsu_zsu_mte,
   6196                  gen_helper_sve_ldhsu_be_zsu_mte,
   6197                  gen_helper_sve_ldss_be_zsu_mte, } },
   6198              { { gen_helper_sve_ldbss_zss_mte,
   6199                  gen_helper_sve_ldhss_be_zss_mte,
   6200                  NULL, },
   6201                { gen_helper_sve_ldbsu_zss_mte,
   6202                  gen_helper_sve_ldhsu_be_zss_mte,
   6203                  gen_helper_sve_ldss_be_zss_mte, } } },
   6204
   6205            /* First-fault */
   6206            { { { gen_helper_sve_ldffbss_zsu_mte,
   6207                  gen_helper_sve_ldffhss_be_zsu_mte,
   6208                  NULL, },
   6209                { gen_helper_sve_ldffbsu_zsu_mte,
   6210                  gen_helper_sve_ldffhsu_be_zsu_mte,
   6211                  gen_helper_sve_ldffss_be_zsu_mte, } },
   6212              { { gen_helper_sve_ldffbss_zss_mte,
   6213                  gen_helper_sve_ldffhss_be_zss_mte,
   6214                  NULL, },
   6215                { gen_helper_sve_ldffbsu_zss_mte,
   6216                  gen_helper_sve_ldffhsu_be_zss_mte,
   6217                  gen_helper_sve_ldffss_be_zss_mte, } } } } },
   6218};
   6219
   6220/* Note that we overload xs=2 to indicate 64-bit offset.  */
   6221static gen_helper_gvec_mem_scatter * const
   6222gather_load_fn64[2][2][2][3][2][4] = {
   6223    { /* MTE Inactive */
   6224        { /* Little-endian */
   6225            { { { gen_helper_sve_ldbds_zsu,
   6226                  gen_helper_sve_ldhds_le_zsu,
   6227                  gen_helper_sve_ldsds_le_zsu,
   6228                  NULL, },
   6229                { gen_helper_sve_ldbdu_zsu,
   6230                  gen_helper_sve_ldhdu_le_zsu,
   6231                  gen_helper_sve_ldsdu_le_zsu,
   6232                  gen_helper_sve_lddd_le_zsu, } },
   6233              { { gen_helper_sve_ldbds_zss,
   6234                  gen_helper_sve_ldhds_le_zss,
   6235                  gen_helper_sve_ldsds_le_zss,
   6236                  NULL, },
   6237                { gen_helper_sve_ldbdu_zss,
   6238                  gen_helper_sve_ldhdu_le_zss,
   6239                  gen_helper_sve_ldsdu_le_zss,
   6240                  gen_helper_sve_lddd_le_zss, } },
   6241              { { gen_helper_sve_ldbds_zd,
   6242                  gen_helper_sve_ldhds_le_zd,
   6243                  gen_helper_sve_ldsds_le_zd,
   6244                  NULL, },
   6245                { gen_helper_sve_ldbdu_zd,
   6246                  gen_helper_sve_ldhdu_le_zd,
   6247                  gen_helper_sve_ldsdu_le_zd,
   6248                  gen_helper_sve_lddd_le_zd, } } },
   6249
   6250            /* First-fault */
   6251            { { { gen_helper_sve_ldffbds_zsu,
   6252                  gen_helper_sve_ldffhds_le_zsu,
   6253                  gen_helper_sve_ldffsds_le_zsu,
   6254                  NULL, },
   6255                { gen_helper_sve_ldffbdu_zsu,
   6256                  gen_helper_sve_ldffhdu_le_zsu,
   6257                  gen_helper_sve_ldffsdu_le_zsu,
   6258                  gen_helper_sve_ldffdd_le_zsu, } },
   6259              { { gen_helper_sve_ldffbds_zss,
   6260                  gen_helper_sve_ldffhds_le_zss,
   6261                  gen_helper_sve_ldffsds_le_zss,
   6262                  NULL, },
   6263                { gen_helper_sve_ldffbdu_zss,
   6264                  gen_helper_sve_ldffhdu_le_zss,
   6265                  gen_helper_sve_ldffsdu_le_zss,
   6266                  gen_helper_sve_ldffdd_le_zss, } },
   6267              { { gen_helper_sve_ldffbds_zd,
   6268                  gen_helper_sve_ldffhds_le_zd,
   6269                  gen_helper_sve_ldffsds_le_zd,
   6270                  NULL, },
   6271                { gen_helper_sve_ldffbdu_zd,
   6272                  gen_helper_sve_ldffhdu_le_zd,
   6273                  gen_helper_sve_ldffsdu_le_zd,
   6274                  gen_helper_sve_ldffdd_le_zd, } } } },
   6275        { /* Big-endian */
   6276            { { { gen_helper_sve_ldbds_zsu,
   6277                  gen_helper_sve_ldhds_be_zsu,
   6278                  gen_helper_sve_ldsds_be_zsu,
   6279                  NULL, },
   6280                { gen_helper_sve_ldbdu_zsu,
   6281                  gen_helper_sve_ldhdu_be_zsu,
   6282                  gen_helper_sve_ldsdu_be_zsu,
   6283                  gen_helper_sve_lddd_be_zsu, } },
   6284              { { gen_helper_sve_ldbds_zss,
   6285                  gen_helper_sve_ldhds_be_zss,
   6286                  gen_helper_sve_ldsds_be_zss,
   6287                  NULL, },
   6288                { gen_helper_sve_ldbdu_zss,
   6289                  gen_helper_sve_ldhdu_be_zss,
   6290                  gen_helper_sve_ldsdu_be_zss,
   6291                  gen_helper_sve_lddd_be_zss, } },
   6292              { { gen_helper_sve_ldbds_zd,
   6293                  gen_helper_sve_ldhds_be_zd,
   6294                  gen_helper_sve_ldsds_be_zd,
   6295                  NULL, },
   6296                { gen_helper_sve_ldbdu_zd,
   6297                  gen_helper_sve_ldhdu_be_zd,
   6298                  gen_helper_sve_ldsdu_be_zd,
   6299                  gen_helper_sve_lddd_be_zd, } } },
   6300
   6301            /* First-fault */
   6302            { { { gen_helper_sve_ldffbds_zsu,
   6303                  gen_helper_sve_ldffhds_be_zsu,
   6304                  gen_helper_sve_ldffsds_be_zsu,
   6305                  NULL, },
   6306                { gen_helper_sve_ldffbdu_zsu,
   6307                  gen_helper_sve_ldffhdu_be_zsu,
   6308                  gen_helper_sve_ldffsdu_be_zsu,
   6309                  gen_helper_sve_ldffdd_be_zsu, } },
   6310              { { gen_helper_sve_ldffbds_zss,
   6311                  gen_helper_sve_ldffhds_be_zss,
   6312                  gen_helper_sve_ldffsds_be_zss,
   6313                  NULL, },
   6314                { gen_helper_sve_ldffbdu_zss,
   6315                  gen_helper_sve_ldffhdu_be_zss,
   6316                  gen_helper_sve_ldffsdu_be_zss,
   6317                  gen_helper_sve_ldffdd_be_zss, } },
   6318              { { gen_helper_sve_ldffbds_zd,
   6319                  gen_helper_sve_ldffhds_be_zd,
   6320                  gen_helper_sve_ldffsds_be_zd,
   6321                  NULL, },
   6322                { gen_helper_sve_ldffbdu_zd,
   6323                  gen_helper_sve_ldffhdu_be_zd,
   6324                  gen_helper_sve_ldffsdu_be_zd,
   6325                  gen_helper_sve_ldffdd_be_zd, } } } } },
   6326    { /* MTE Active */
   6327        { /* Little-endian */
   6328            { { { gen_helper_sve_ldbds_zsu_mte,
   6329                  gen_helper_sve_ldhds_le_zsu_mte,
   6330                  gen_helper_sve_ldsds_le_zsu_mte,
   6331                  NULL, },
   6332                { gen_helper_sve_ldbdu_zsu_mte,
   6333                  gen_helper_sve_ldhdu_le_zsu_mte,
   6334                  gen_helper_sve_ldsdu_le_zsu_mte,
   6335                  gen_helper_sve_lddd_le_zsu_mte, } },
   6336              { { gen_helper_sve_ldbds_zss_mte,
   6337                  gen_helper_sve_ldhds_le_zss_mte,
   6338                  gen_helper_sve_ldsds_le_zss_mte,
   6339                  NULL, },
   6340                { gen_helper_sve_ldbdu_zss_mte,
   6341                  gen_helper_sve_ldhdu_le_zss_mte,
   6342                  gen_helper_sve_ldsdu_le_zss_mte,
   6343                  gen_helper_sve_lddd_le_zss_mte, } },
   6344              { { gen_helper_sve_ldbds_zd_mte,
   6345                  gen_helper_sve_ldhds_le_zd_mte,
   6346                  gen_helper_sve_ldsds_le_zd_mte,
   6347                  NULL, },
   6348                { gen_helper_sve_ldbdu_zd_mte,
   6349                  gen_helper_sve_ldhdu_le_zd_mte,
   6350                  gen_helper_sve_ldsdu_le_zd_mte,
   6351                  gen_helper_sve_lddd_le_zd_mte, } } },
   6352
   6353            /* First-fault */
   6354            { { { gen_helper_sve_ldffbds_zsu_mte,
   6355                  gen_helper_sve_ldffhds_le_zsu_mte,
   6356                  gen_helper_sve_ldffsds_le_zsu_mte,
   6357                  NULL, },
   6358                { gen_helper_sve_ldffbdu_zsu_mte,
   6359                  gen_helper_sve_ldffhdu_le_zsu_mte,
   6360                  gen_helper_sve_ldffsdu_le_zsu_mte,
   6361                  gen_helper_sve_ldffdd_le_zsu_mte, } },
   6362              { { gen_helper_sve_ldffbds_zss_mte,
   6363                  gen_helper_sve_ldffhds_le_zss_mte,
   6364                  gen_helper_sve_ldffsds_le_zss_mte,
   6365                  NULL, },
   6366                { gen_helper_sve_ldffbdu_zss_mte,
   6367                  gen_helper_sve_ldffhdu_le_zss_mte,
   6368                  gen_helper_sve_ldffsdu_le_zss_mte,
   6369                  gen_helper_sve_ldffdd_le_zss_mte, } },
   6370              { { gen_helper_sve_ldffbds_zd_mte,
   6371                  gen_helper_sve_ldffhds_le_zd_mte,
   6372                  gen_helper_sve_ldffsds_le_zd_mte,
   6373                  NULL, },
   6374                { gen_helper_sve_ldffbdu_zd_mte,
   6375                  gen_helper_sve_ldffhdu_le_zd_mte,
   6376                  gen_helper_sve_ldffsdu_le_zd_mte,
   6377                  gen_helper_sve_ldffdd_le_zd_mte, } } } },
   6378        { /* Big-endian */
   6379            { { { gen_helper_sve_ldbds_zsu_mte,
   6380                  gen_helper_sve_ldhds_be_zsu_mte,
   6381                  gen_helper_sve_ldsds_be_zsu_mte,
   6382                  NULL, },
   6383                { gen_helper_sve_ldbdu_zsu_mte,
   6384                  gen_helper_sve_ldhdu_be_zsu_mte,
   6385                  gen_helper_sve_ldsdu_be_zsu_mte,
   6386                  gen_helper_sve_lddd_be_zsu_mte, } },
   6387              { { gen_helper_sve_ldbds_zss_mte,
   6388                  gen_helper_sve_ldhds_be_zss_mte,
   6389                  gen_helper_sve_ldsds_be_zss_mte,
   6390                  NULL, },
   6391                { gen_helper_sve_ldbdu_zss_mte,
   6392                  gen_helper_sve_ldhdu_be_zss_mte,
   6393                  gen_helper_sve_ldsdu_be_zss_mte,
   6394                  gen_helper_sve_lddd_be_zss_mte, } },
   6395              { { gen_helper_sve_ldbds_zd_mte,
   6396                  gen_helper_sve_ldhds_be_zd_mte,
   6397                  gen_helper_sve_ldsds_be_zd_mte,
   6398                  NULL, },
   6399                { gen_helper_sve_ldbdu_zd_mte,
   6400                  gen_helper_sve_ldhdu_be_zd_mte,
   6401                  gen_helper_sve_ldsdu_be_zd_mte,
   6402                  gen_helper_sve_lddd_be_zd_mte, } } },
   6403
   6404            /* First-fault */
   6405            { { { gen_helper_sve_ldffbds_zsu_mte,
   6406                  gen_helper_sve_ldffhds_be_zsu_mte,
   6407                  gen_helper_sve_ldffsds_be_zsu_mte,
   6408                  NULL, },
   6409                { gen_helper_sve_ldffbdu_zsu_mte,
   6410                  gen_helper_sve_ldffhdu_be_zsu_mte,
   6411                  gen_helper_sve_ldffsdu_be_zsu_mte,
   6412                  gen_helper_sve_ldffdd_be_zsu_mte, } },
   6413              { { gen_helper_sve_ldffbds_zss_mte,
   6414                  gen_helper_sve_ldffhds_be_zss_mte,
   6415                  gen_helper_sve_ldffsds_be_zss_mte,
   6416                  NULL, },
   6417                { gen_helper_sve_ldffbdu_zss_mte,
   6418                  gen_helper_sve_ldffhdu_be_zss_mte,
   6419                  gen_helper_sve_ldffsdu_be_zss_mte,
   6420                  gen_helper_sve_ldffdd_be_zss_mte, } },
   6421              { { gen_helper_sve_ldffbds_zd_mte,
   6422                  gen_helper_sve_ldffhds_be_zd_mte,
   6423                  gen_helper_sve_ldffsds_be_zd_mte,
   6424                  NULL, },
   6425                { gen_helper_sve_ldffbdu_zd_mte,
   6426                  gen_helper_sve_ldffhdu_be_zd_mte,
   6427                  gen_helper_sve_ldffsdu_be_zd_mte,
   6428                  gen_helper_sve_ldffdd_be_zd_mte, } } } } },
   6429};
   6430
   6431static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
   6432{
   6433    gen_helper_gvec_mem_scatter *fn = NULL;
   6434    bool be = s->be_data == MO_BE;
   6435    bool mte = s->mte_active[0];
   6436
   6437    if (!sve_access_check(s)) {
   6438        return true;
   6439    }
   6440
   6441    switch (a->esz) {
   6442    case MO_32:
   6443        fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
   6444        break;
   6445    case MO_64:
   6446        fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
   6447        break;
   6448    }
   6449    assert(fn != NULL);
   6450
   6451    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
   6452               cpu_reg_sp(s, a->rn), a->msz, false, fn);
   6453    return true;
   6454}
   6455
   6456static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
   6457{
   6458    gen_helper_gvec_mem_scatter *fn = NULL;
   6459    bool be = s->be_data == MO_BE;
   6460    bool mte = s->mte_active[0];
   6461    TCGv_i64 imm;
   6462
   6463    if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
   6464        return false;
   6465    }
   6466    if (!sve_access_check(s)) {
   6467        return true;
   6468    }
   6469
   6470    switch (a->esz) {
   6471    case MO_32:
   6472        fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
   6473        break;
   6474    case MO_64:
   6475        fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
   6476        break;
   6477    }
   6478    assert(fn != NULL);
   6479
   6480    /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
   6481     * by loading the immediate into the scalar parameter.
   6482     */
   6483    imm = tcg_const_i64(a->imm << a->msz);
   6484    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
   6485    tcg_temp_free_i64(imm);
   6486    return true;
   6487}
   6488
   6489static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
   6490{
   6491    if (!dc_isar_feature(aa64_sve2, s)) {
   6492        return false;
   6493    }
   6494    return trans_LD1_zprz(s, a);
   6495}
   6496
   6497/* Indexed by [mte][be][xs][msz].  */
   6498static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
   6499    { /* MTE Inactive */
   6500        { /* Little-endian */
   6501            { gen_helper_sve_stbs_zsu,
   6502              gen_helper_sve_sths_le_zsu,
   6503              gen_helper_sve_stss_le_zsu, },
   6504            { gen_helper_sve_stbs_zss,
   6505              gen_helper_sve_sths_le_zss,
   6506              gen_helper_sve_stss_le_zss, } },
   6507        { /* Big-endian */
   6508            { gen_helper_sve_stbs_zsu,
   6509              gen_helper_sve_sths_be_zsu,
   6510              gen_helper_sve_stss_be_zsu, },
   6511            { gen_helper_sve_stbs_zss,
   6512              gen_helper_sve_sths_be_zss,
   6513              gen_helper_sve_stss_be_zss, } } },
   6514    { /* MTE Active */
   6515        { /* Little-endian */
   6516            { gen_helper_sve_stbs_zsu_mte,
   6517              gen_helper_sve_sths_le_zsu_mte,
   6518              gen_helper_sve_stss_le_zsu_mte, },
   6519            { gen_helper_sve_stbs_zss_mte,
   6520              gen_helper_sve_sths_le_zss_mte,
   6521              gen_helper_sve_stss_le_zss_mte, } },
   6522        { /* Big-endian */
   6523            { gen_helper_sve_stbs_zsu_mte,
   6524              gen_helper_sve_sths_be_zsu_mte,
   6525              gen_helper_sve_stss_be_zsu_mte, },
   6526            { gen_helper_sve_stbs_zss_mte,
   6527              gen_helper_sve_sths_be_zss_mte,
   6528              gen_helper_sve_stss_be_zss_mte, } } },
   6529};
   6530
   6531/* Note that we overload xs=2 to indicate 64-bit offset.  */
   6532static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
   6533    { /* MTE Inactive */
   6534         { /* Little-endian */
   6535             { gen_helper_sve_stbd_zsu,
   6536               gen_helper_sve_sthd_le_zsu,
   6537               gen_helper_sve_stsd_le_zsu,
   6538               gen_helper_sve_stdd_le_zsu, },
   6539             { gen_helper_sve_stbd_zss,
   6540               gen_helper_sve_sthd_le_zss,
   6541               gen_helper_sve_stsd_le_zss,
   6542               gen_helper_sve_stdd_le_zss, },
   6543             { gen_helper_sve_stbd_zd,
   6544               gen_helper_sve_sthd_le_zd,
   6545               gen_helper_sve_stsd_le_zd,
   6546               gen_helper_sve_stdd_le_zd, } },
   6547         { /* Big-endian */
   6548             { gen_helper_sve_stbd_zsu,
   6549               gen_helper_sve_sthd_be_zsu,
   6550               gen_helper_sve_stsd_be_zsu,
   6551               gen_helper_sve_stdd_be_zsu, },
   6552             { gen_helper_sve_stbd_zss,
   6553               gen_helper_sve_sthd_be_zss,
   6554               gen_helper_sve_stsd_be_zss,
   6555               gen_helper_sve_stdd_be_zss, },
   6556             { gen_helper_sve_stbd_zd,
   6557               gen_helper_sve_sthd_be_zd,
   6558               gen_helper_sve_stsd_be_zd,
   6559               gen_helper_sve_stdd_be_zd, } } },
   6560    { /* MTE Inactive */
   6561         { /* Little-endian */
   6562             { gen_helper_sve_stbd_zsu_mte,
   6563               gen_helper_sve_sthd_le_zsu_mte,
   6564               gen_helper_sve_stsd_le_zsu_mte,
   6565               gen_helper_sve_stdd_le_zsu_mte, },
   6566             { gen_helper_sve_stbd_zss_mte,
   6567               gen_helper_sve_sthd_le_zss_mte,
   6568               gen_helper_sve_stsd_le_zss_mte,
   6569               gen_helper_sve_stdd_le_zss_mte, },
   6570             { gen_helper_sve_stbd_zd_mte,
   6571               gen_helper_sve_sthd_le_zd_mte,
   6572               gen_helper_sve_stsd_le_zd_mte,
   6573               gen_helper_sve_stdd_le_zd_mte, } },
   6574         { /* Big-endian */
   6575             { gen_helper_sve_stbd_zsu_mte,
   6576               gen_helper_sve_sthd_be_zsu_mte,
   6577               gen_helper_sve_stsd_be_zsu_mte,
   6578               gen_helper_sve_stdd_be_zsu_mte, },
   6579             { gen_helper_sve_stbd_zss_mte,
   6580               gen_helper_sve_sthd_be_zss_mte,
   6581               gen_helper_sve_stsd_be_zss_mte,
   6582               gen_helper_sve_stdd_be_zss_mte, },
   6583             { gen_helper_sve_stbd_zd_mte,
   6584               gen_helper_sve_sthd_be_zd_mte,
   6585               gen_helper_sve_stsd_be_zd_mte,
   6586               gen_helper_sve_stdd_be_zd_mte, } } },
   6587};
   6588
   6589static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
   6590{
   6591    gen_helper_gvec_mem_scatter *fn;
   6592    bool be = s->be_data == MO_BE;
   6593    bool mte = s->mte_active[0];
   6594
   6595    if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
   6596        return false;
   6597    }
   6598    if (!sve_access_check(s)) {
   6599        return true;
   6600    }
   6601    switch (a->esz) {
   6602    case MO_32:
   6603        fn = scatter_store_fn32[mte][be][a->xs][a->msz];
   6604        break;
   6605    case MO_64:
   6606        fn = scatter_store_fn64[mte][be][a->xs][a->msz];
   6607        break;
   6608    default:
   6609        g_assert_not_reached();
   6610    }
   6611    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
   6612               cpu_reg_sp(s, a->rn), a->msz, true, fn);
   6613    return true;
   6614}
   6615
   6616static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
   6617{
   6618    gen_helper_gvec_mem_scatter *fn = NULL;
   6619    bool be = s->be_data == MO_BE;
   6620    bool mte = s->mte_active[0];
   6621    TCGv_i64 imm;
   6622
   6623    if (a->esz < a->msz) {
   6624        return false;
   6625    }
   6626    if (!sve_access_check(s)) {
   6627        return true;
   6628    }
   6629
   6630    switch (a->esz) {
   6631    case MO_32:
   6632        fn = scatter_store_fn32[mte][be][0][a->msz];
   6633        break;
   6634    case MO_64:
   6635        fn = scatter_store_fn64[mte][be][2][a->msz];
   6636        break;
   6637    }
   6638    assert(fn != NULL);
   6639
   6640    /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
   6641     * by loading the immediate into the scalar parameter.
   6642     */
   6643    imm = tcg_const_i64(a->imm << a->msz);
   6644    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
   6645    tcg_temp_free_i64(imm);
   6646    return true;
   6647}
   6648
   6649static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
   6650{
   6651    if (!dc_isar_feature(aa64_sve2, s)) {
   6652        return false;
   6653    }
   6654    return trans_ST1_zprz(s, a);
   6655}
   6656
   6657/*
   6658 * Prefetches
   6659 */
   6660
   6661static bool trans_PRF(DisasContext *s, arg_PRF *a)
   6662{
   6663    /* Prefetch is a nop within QEMU.  */
   6664    (void)sve_access_check(s);
   6665    return true;
   6666}
   6667
   6668static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
   6669{
   6670    if (a->rm == 31) {
   6671        return false;
   6672    }
   6673    /* Prefetch is a nop within QEMU.  */
   6674    (void)sve_access_check(s);
   6675    return true;
   6676}
   6677
   6678/*
   6679 * Move Prefix
   6680 *
   6681 * TODO: The implementation so far could handle predicated merging movprfx.
   6682 * The helper functions as written take an extra source register to
   6683 * use in the operation, but the result is only written when predication
   6684 * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
   6685 * to allow the final write back to the destination to be unconditional.
   6686 * For predicated zeroing movprfx, we need to rearrange the helpers to
   6687 * allow the final write back to zero inactives.
   6688 *
   6689 * In the meantime, just emit the moves.
   6690 */
   6691
   6692static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
   6693{
   6694    return do_mov_z(s, a->rd, a->rn);
   6695}
   6696
   6697static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
   6698{
   6699    if (sve_access_check(s)) {
   6700        do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
   6701    }
   6702    return true;
   6703}
   6704
   6705static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
   6706{
   6707    return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
   6708}
   6709
   6710/*
   6711 * SVE2 Integer Multiply - Unpredicated
   6712 */
   6713
   6714static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
   6715{
   6716    if (!dc_isar_feature(aa64_sve2, s)) {
   6717        return false;
   6718    }
   6719    if (sve_access_check(s)) {
   6720        gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
   6721    }
   6722    return true;
   6723}
   6724
   6725static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
   6726                            gen_helper_gvec_3 *fn)
   6727{
   6728    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
   6729        return false;
   6730    }
   6731    if (sve_access_check(s)) {
   6732        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
   6733    }
   6734    return true;
   6735}
   6736
   6737static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
   6738{
   6739    static gen_helper_gvec_3 * const fns[4] = {
   6740        gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
   6741        gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
   6742    };
   6743    return do_sve2_zzz_ool(s, a, fns[a->esz]);
   6744}
   6745
   6746static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
   6747{
   6748    static gen_helper_gvec_3 * const fns[4] = {
   6749        gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
   6750        gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
   6751    };
   6752    return do_sve2_zzz_ool(s, a, fns[a->esz]);
   6753}
   6754
   6755static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
   6756{
   6757    return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
   6758}
   6759
   6760static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
   6761{
   6762    static gen_helper_gvec_3 * const fns[4] = {
   6763        gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
   6764        gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
   6765    };
   6766    return do_sve2_zzz_ool(s, a, fns[a->esz]);
   6767}
   6768
   6769static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
   6770{
   6771    static gen_helper_gvec_3 * const fns[4] = {
   6772        gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
   6773        gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
   6774    };
   6775    return do_sve2_zzz_ool(s, a, fns[a->esz]);
   6776}
   6777
   6778/*
   6779 * SVE2 Integer - Predicated
   6780 */
   6781
   6782static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
   6783                             gen_helper_gvec_4 *fn)
   6784{
   6785    if (!dc_isar_feature(aa64_sve2, s)) {
   6786        return false;
   6787    }
   6788    return do_zpzz_ool(s, a, fn);
   6789}
   6790
   6791static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
   6792{
   6793    static gen_helper_gvec_4 * const fns[3] = {
   6794        gen_helper_sve2_sadalp_zpzz_h,
   6795        gen_helper_sve2_sadalp_zpzz_s,
   6796        gen_helper_sve2_sadalp_zpzz_d,
   6797    };
   6798    if (a->esz == 0) {
   6799        return false;
   6800    }
   6801    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
   6802}
   6803
   6804static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
   6805{
   6806    static gen_helper_gvec_4 * const fns[3] = {
   6807        gen_helper_sve2_uadalp_zpzz_h,
   6808        gen_helper_sve2_uadalp_zpzz_s,
   6809        gen_helper_sve2_uadalp_zpzz_d,
   6810    };
   6811    if (a->esz == 0) {
   6812        return false;
   6813    }
   6814    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
   6815}
   6816
   6817/*
   6818 * SVE2 integer unary operations (predicated)
   6819 */
   6820
   6821static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
   6822                            gen_helper_gvec_3 *fn)
   6823{
   6824    if (!dc_isar_feature(aa64_sve2, s)) {
   6825        return false;
   6826    }
   6827    return do_zpz_ool(s, a, fn);
   6828}
   6829
   6830static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
   6831{
   6832    if (a->esz != 2) {
   6833        return false;
   6834    }
   6835    return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
   6836}
   6837
   6838static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
   6839{
   6840    if (a->esz != 2) {
   6841        return false;
   6842    }
   6843    return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
   6844}
   6845
   6846static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
   6847{
   6848    static gen_helper_gvec_3 * const fns[4] = {
   6849        gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
   6850        gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
   6851    };
   6852    return do_sve2_zpz_ool(s, a, fns[a->esz]);
   6853}
   6854
   6855static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
   6856{
   6857    static gen_helper_gvec_3 * const fns[4] = {
   6858        gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
   6859        gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
   6860    };
   6861    return do_sve2_zpz_ool(s, a, fns[a->esz]);
   6862}
   6863
   6864#define DO_SVE2_ZPZZ(NAME, name) \
   6865static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                \
   6866{                                                                         \
   6867    static gen_helper_gvec_4 * const fns[4] = {                           \
   6868        gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
   6869        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
   6870    };                                                                    \
   6871    return do_sve2_zpzz_ool(s, a, fns[a->esz]);                           \
   6872}
   6873
   6874DO_SVE2_ZPZZ(SQSHL, sqshl)
   6875DO_SVE2_ZPZZ(SQRSHL, sqrshl)
   6876DO_SVE2_ZPZZ(SRSHL, srshl)
   6877
   6878DO_SVE2_ZPZZ(UQSHL, uqshl)
   6879DO_SVE2_ZPZZ(UQRSHL, uqrshl)
   6880DO_SVE2_ZPZZ(URSHL, urshl)
   6881
   6882DO_SVE2_ZPZZ(SHADD, shadd)
   6883DO_SVE2_ZPZZ(SRHADD, srhadd)
   6884DO_SVE2_ZPZZ(SHSUB, shsub)
   6885
   6886DO_SVE2_ZPZZ(UHADD, uhadd)
   6887DO_SVE2_ZPZZ(URHADD, urhadd)
   6888DO_SVE2_ZPZZ(UHSUB, uhsub)
   6889
   6890DO_SVE2_ZPZZ(ADDP, addp)
   6891DO_SVE2_ZPZZ(SMAXP, smaxp)
   6892DO_SVE2_ZPZZ(UMAXP, umaxp)
   6893DO_SVE2_ZPZZ(SMINP, sminp)
   6894DO_SVE2_ZPZZ(UMINP, uminp)
   6895
   6896DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
   6897DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
   6898DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
   6899DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
   6900DO_SVE2_ZPZZ(SUQADD, suqadd)
   6901DO_SVE2_ZPZZ(USQADD, usqadd)
   6902
   6903/*
   6904 * SVE2 Widening Integer Arithmetic
   6905 */
   6906
   6907static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
   6908                            gen_helper_gvec_3 *fn, int data)
   6909{
   6910    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
   6911        return false;
   6912    }
   6913    if (sve_access_check(s)) {
   6914        unsigned vsz = vec_full_reg_size(s);
   6915        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
   6916                           vec_full_reg_offset(s, a->rn),
   6917                           vec_full_reg_offset(s, a->rm),
   6918                           vsz, vsz, data, fn);
   6919    }
   6920    return true;
   6921}
   6922
   6923#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
   6924static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)               \
   6925{                                                                       \
   6926    static gen_helper_gvec_3 * const fns[4] = {                         \
   6927        NULL,                       gen_helper_sve2_##name##_h,         \
   6928        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,         \
   6929    };                                                                  \
   6930    return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1);      \
   6931}
   6932
   6933DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
   6934DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
   6935DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
   6936
   6937DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
   6938DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
   6939DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
   6940
   6941DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
   6942DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
   6943DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
   6944
   6945DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
   6946DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
   6947DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
   6948
   6949DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
   6950DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
   6951DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
   6952
   6953DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
   6954DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
   6955
   6956DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
   6957DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
   6958
   6959DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
   6960DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
   6961
   6962static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
   6963{
   6964    static gen_helper_gvec_3 * const fns[4] = {
   6965        gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
   6966        gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
   6967    };
   6968    return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
   6969}
   6970
   6971static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
   6972{
   6973    return do_eor_tb(s, a, false);
   6974}
   6975
   6976static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
   6977{
   6978    return do_eor_tb(s, a, true);
   6979}
   6980
   6981static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
   6982{
   6983    static gen_helper_gvec_3 * const fns[4] = {
   6984        gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
   6985        NULL,                    gen_helper_sve2_pmull_d,
   6986    };
   6987    if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
   6988        return false;
   6989    }
   6990    return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
   6991}
   6992
   6993static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
   6994{
   6995    return do_trans_pmull(s, a, false);
   6996}
   6997
   6998static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
   6999{
   7000    return do_trans_pmull(s, a, true);
   7001}
   7002
   7003#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
   7004static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)       \
   7005{                                                               \
   7006    static gen_helper_gvec_3 * const fns[4] = {                 \
   7007        NULL,                       gen_helper_sve2_##name##_h, \
   7008        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
   7009    };                                                          \
   7010    return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2);            \
   7011}
   7012
   7013DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
   7014DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
   7015DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
   7016DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
   7017
   7018DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
   7019DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
   7020DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
   7021DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
   7022
   7023static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
   7024{
   7025    int top = imm & 1;
   7026    int shl = imm >> 1;
   7027    int halfbits = 4 << vece;
   7028
   7029    if (top) {
   7030        if (shl == halfbits) {
   7031            TCGv_vec t = tcg_temp_new_vec_matching(d);
   7032            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
   7033            tcg_gen_and_vec(vece, d, n, t);
   7034            tcg_temp_free_vec(t);
   7035        } else {
   7036            tcg_gen_sari_vec(vece, d, n, halfbits);
   7037            tcg_gen_shli_vec(vece, d, d, shl);
   7038        }
   7039    } else {
   7040        tcg_gen_shli_vec(vece, d, n, halfbits);
   7041        tcg_gen_sari_vec(vece, d, d, halfbits - shl);
   7042    }
   7043}
   7044
   7045static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
   7046{
   7047    int halfbits = 4 << vece;
   7048    int top = imm & 1;
   7049    int shl = (imm >> 1);
   7050    int shift;
   7051    uint64_t mask;
   7052
   7053    mask = MAKE_64BIT_MASK(0, halfbits);
   7054    mask <<= shl;
   7055    mask = dup_const(vece, mask);
   7056
   7057    shift = shl - top * halfbits;
   7058    if (shift < 0) {
   7059        tcg_gen_shri_i64(d, n, -shift);
   7060    } else {
   7061        tcg_gen_shli_i64(d, n, shift);
   7062    }
   7063    tcg_gen_andi_i64(d, d, mask);
   7064}
   7065
   7066static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
   7067{
   7068    gen_ushll_i64(MO_16, d, n, imm);
   7069}
   7070
   7071static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
   7072{
   7073    gen_ushll_i64(MO_32, d, n, imm);
   7074}
   7075
   7076static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
   7077{
   7078    gen_ushll_i64(MO_64, d, n, imm);
   7079}
   7080
   7081static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
   7082{
   7083    int halfbits = 4 << vece;
   7084    int top = imm & 1;
   7085    int shl = imm >> 1;
   7086
   7087    if (top) {
   7088        if (shl == halfbits) {
   7089            TCGv_vec t = tcg_temp_new_vec_matching(d);
   7090            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
   7091            tcg_gen_and_vec(vece, d, n, t);
   7092            tcg_temp_free_vec(t);
   7093        } else {
   7094            tcg_gen_shri_vec(vece, d, n, halfbits);
   7095            tcg_gen_shli_vec(vece, d, d, shl);
   7096        }
   7097    } else {
   7098        if (shl == 0) {
   7099            TCGv_vec t = tcg_temp_new_vec_matching(d);
   7100            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
   7101            tcg_gen_and_vec(vece, d, n, t);
   7102            tcg_temp_free_vec(t);
   7103        } else {
   7104            tcg_gen_shli_vec(vece, d, n, halfbits);
   7105            tcg_gen_shri_vec(vece, d, d, halfbits - shl);
   7106        }
   7107    }
   7108}
   7109
   7110static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
   7111                            bool sel, bool uns)
   7112{
   7113    static const TCGOpcode sshll_list[] = {
   7114        INDEX_op_shli_vec, INDEX_op_sari_vec, 0
   7115    };
   7116    static const TCGOpcode ushll_list[] = {
   7117        INDEX_op_shli_vec, INDEX_op_shri_vec, 0
   7118    };
   7119    static const GVecGen2i ops[2][3] = {
   7120        { { .fniv = gen_sshll_vec,
   7121            .opt_opc = sshll_list,
   7122            .fno = gen_helper_sve2_sshll_h,
   7123            .vece = MO_16 },
   7124          { .fniv = gen_sshll_vec,
   7125            .opt_opc = sshll_list,
   7126            .fno = gen_helper_sve2_sshll_s,
   7127            .vece = MO_32 },
   7128          { .fniv = gen_sshll_vec,
   7129            .opt_opc = sshll_list,
   7130            .fno = gen_helper_sve2_sshll_d,
   7131            .vece = MO_64 } },
   7132        { { .fni8 = gen_ushll16_i64,
   7133            .fniv = gen_ushll_vec,
   7134            .opt_opc = ushll_list,
   7135            .fno = gen_helper_sve2_ushll_h,
   7136            .vece = MO_16 },
   7137          { .fni8 = gen_ushll32_i64,
   7138            .fniv = gen_ushll_vec,
   7139            .opt_opc = ushll_list,
   7140            .fno = gen_helper_sve2_ushll_s,
   7141            .vece = MO_32 },
   7142          { .fni8 = gen_ushll64_i64,
   7143            .fniv = gen_ushll_vec,
   7144            .opt_opc = ushll_list,
   7145            .fno = gen_helper_sve2_ushll_d,
   7146            .vece = MO_64 } },
   7147    };
   7148
   7149    if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
   7150        return false;
   7151    }
   7152    if (sve_access_check(s)) {
   7153        unsigned vsz = vec_full_reg_size(s);
   7154        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
   7155                        vec_full_reg_offset(s, a->rn),
   7156                        vsz, vsz, (a->imm << 1) | sel,
   7157                        &ops[uns][a->esz]);
   7158    }
   7159    return true;
   7160}
   7161
   7162static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
   7163{
   7164    return do_sve2_shll_tb(s, a, false, false);
   7165}
   7166
   7167static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
   7168{
   7169    return do_sve2_shll_tb(s, a, true, false);
   7170}
   7171
   7172static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
   7173{
   7174    return do_sve2_shll_tb(s, a, false, true);
   7175}
   7176
   7177static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
   7178{
   7179    return do_sve2_shll_tb(s, a, true, true);
   7180}
   7181
   7182static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
   7183{
   7184    static gen_helper_gvec_3 * const fns[4] = {
   7185        gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
   7186        gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
   7187    };
   7188    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
   7189        return false;
   7190    }
   7191    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
   7192}
   7193
   7194static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
   7195{
   7196    static gen_helper_gvec_3 * const fns[4] = {
   7197        gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
   7198        gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
   7199    };
   7200    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
   7201        return false;
   7202    }
   7203    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
   7204}
   7205
   7206static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
   7207{
   7208    static gen_helper_gvec_3 * const fns[4] = {
   7209        gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
   7210        gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
   7211    };
   7212    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
   7213        return false;
   7214    }
   7215    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
   7216}
   7217
   7218static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
   7219{
   7220    static gen_helper_gvec_3 * const fns[2][4] = {
   7221        { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
   7222          gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
   7223        { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
   7224          gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
   7225    };
   7226    return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
   7227}
   7228
   7229static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
   7230{
   7231    return do_cadd(s, a, false, false);
   7232}
   7233
   7234static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
   7235{
   7236    return do_cadd(s, a, false, true);
   7237}
   7238
   7239static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
   7240{
   7241    return do_cadd(s, a, true, false);
   7242}
   7243
   7244static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
   7245{
   7246    return do_cadd(s, a, true, true);
   7247}
   7248
   7249static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
   7250                             gen_helper_gvec_4 *fn, int data)
   7251{
   7252    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
   7253        return false;
   7254    }
   7255    if (sve_access_check(s)) {
   7256        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
   7257    }
   7258    return true;
   7259}
   7260
   7261static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
   7262{
   7263    static gen_helper_gvec_4 * const fns[2][4] = {
   7264        { NULL,                    gen_helper_sve2_sabal_h,
   7265          gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
   7266        { NULL,                    gen_helper_sve2_uabal_h,
   7267          gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
   7268    };
   7269    return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
   7270}
   7271
   7272static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
   7273{
   7274    return do_abal(s, a, false, false);
   7275}
   7276
   7277static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
   7278{
   7279    return do_abal(s, a, false, true);
   7280}
   7281
   7282static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
   7283{
   7284    return do_abal(s, a, true, false);
   7285}
   7286
   7287static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
   7288{
   7289    return do_abal(s, a, true, true);
   7290}
   7291
   7292static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
   7293{
   7294    static gen_helper_gvec_4 * const fns[2] = {
   7295        gen_helper_sve2_adcl_s,
   7296        gen_helper_sve2_adcl_d,
   7297    };
   7298    /*
   7299     * Note that in this case the ESZ field encodes both size and sign.
   7300     * Split out 'subtract' into bit 1 of the data field for the helper.
   7301     */
   7302    return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
   7303}
   7304
   7305static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
   7306{
   7307    return do_adcl(s, a, false);
   7308}
   7309
   7310static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
   7311{
   7312    return do_adcl(s, a, true);
   7313}
   7314
   7315static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
   7316{
   7317    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
   7318        return false;
   7319    }
   7320    if (sve_access_check(s)) {
   7321        unsigned vsz = vec_full_reg_size(s);
   7322        unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
   7323        unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
   7324        fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
   7325    }
   7326    return true;
   7327}
   7328
   7329static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
   7330{
   7331    return do_sve2_fn2i(s, a, gen_gvec_ssra);
   7332}
   7333
   7334static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
   7335{
   7336    return do_sve2_fn2i(s, a, gen_gvec_usra);
   7337}
   7338
   7339static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
   7340{
   7341    return do_sve2_fn2i(s, a, gen_gvec_srsra);
   7342}
   7343
   7344static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
   7345{
   7346    return do_sve2_fn2i(s, a, gen_gvec_ursra);
   7347}
   7348
   7349static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
   7350{
   7351    return do_sve2_fn2i(s, a, gen_gvec_sri);
   7352}
   7353
   7354static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
   7355{
   7356    return do_sve2_fn2i(s, a, gen_gvec_sli);
   7357}
   7358
   7359static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
   7360{
   7361    if (!dc_isar_feature(aa64_sve2, s)) {
   7362        return false;
   7363    }
   7364    if (sve_access_check(s)) {
   7365        gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
   7366    }
   7367    return true;
   7368}
   7369
   7370static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
   7371{
   7372    return do_sve2_fn_zzz(s, a, gen_gvec_saba);
   7373}
   7374
   7375static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
   7376{
   7377    return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
   7378}
   7379
   7380static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
   7381                                   const GVecGen2 ops[3])
   7382{
   7383    if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
   7384        !dc_isar_feature(aa64_sve2, s)) {
   7385        return false;
   7386    }
   7387    if (sve_access_check(s)) {
   7388        unsigned vsz = vec_full_reg_size(s);
   7389        tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
   7390                        vec_full_reg_offset(s, a->rn),
   7391                        vsz, vsz, &ops[a->esz]);
   7392    }
   7393    return true;
   7394}
   7395
   7396static const TCGOpcode sqxtn_list[] = {
   7397    INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
   7398};
   7399
   7400static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
   7401{
   7402    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7403    int halfbits = 4 << vece;
   7404    int64_t mask = (1ull << halfbits) - 1;
   7405    int64_t min = -1ull << (halfbits - 1);
   7406    int64_t max = -min - 1;
   7407
   7408    tcg_gen_dupi_vec(vece, t, min);
   7409    tcg_gen_smax_vec(vece, d, n, t);
   7410    tcg_gen_dupi_vec(vece, t, max);
   7411    tcg_gen_smin_vec(vece, d, d, t);
   7412    tcg_gen_dupi_vec(vece, t, mask);
   7413    tcg_gen_and_vec(vece, d, d, t);
   7414    tcg_temp_free_vec(t);
   7415}
   7416
   7417static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
   7418{
   7419    static const GVecGen2 ops[3] = {
   7420        { .fniv = gen_sqxtnb_vec,
   7421          .opt_opc = sqxtn_list,
   7422          .fno = gen_helper_sve2_sqxtnb_h,
   7423          .vece = MO_16 },
   7424        { .fniv = gen_sqxtnb_vec,
   7425          .opt_opc = sqxtn_list,
   7426          .fno = gen_helper_sve2_sqxtnb_s,
   7427          .vece = MO_32 },
   7428        { .fniv = gen_sqxtnb_vec,
   7429          .opt_opc = sqxtn_list,
   7430          .fno = gen_helper_sve2_sqxtnb_d,
   7431          .vece = MO_64 },
   7432    };
   7433    return do_sve2_narrow_extract(s, a, ops);
   7434}
   7435
   7436static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
   7437{
   7438    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7439    int halfbits = 4 << vece;
   7440    int64_t mask = (1ull << halfbits) - 1;
   7441    int64_t min = -1ull << (halfbits - 1);
   7442    int64_t max = -min - 1;
   7443
   7444    tcg_gen_dupi_vec(vece, t, min);
   7445    tcg_gen_smax_vec(vece, n, n, t);
   7446    tcg_gen_dupi_vec(vece, t, max);
   7447    tcg_gen_smin_vec(vece, n, n, t);
   7448    tcg_gen_shli_vec(vece, n, n, halfbits);
   7449    tcg_gen_dupi_vec(vece, t, mask);
   7450    tcg_gen_bitsel_vec(vece, d, t, d, n);
   7451    tcg_temp_free_vec(t);
   7452}
   7453
   7454static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
   7455{
   7456    static const GVecGen2 ops[3] = {
   7457        { .fniv = gen_sqxtnt_vec,
   7458          .opt_opc = sqxtn_list,
   7459          .load_dest = true,
   7460          .fno = gen_helper_sve2_sqxtnt_h,
   7461          .vece = MO_16 },
   7462        { .fniv = gen_sqxtnt_vec,
   7463          .opt_opc = sqxtn_list,
   7464          .load_dest = true,
   7465          .fno = gen_helper_sve2_sqxtnt_s,
   7466          .vece = MO_32 },
   7467        { .fniv = gen_sqxtnt_vec,
   7468          .opt_opc = sqxtn_list,
   7469          .load_dest = true,
   7470          .fno = gen_helper_sve2_sqxtnt_d,
   7471          .vece = MO_64 },
   7472    };
   7473    return do_sve2_narrow_extract(s, a, ops);
   7474}
   7475
   7476static const TCGOpcode uqxtn_list[] = {
   7477    INDEX_op_shli_vec, INDEX_op_umin_vec, 0
   7478};
   7479
   7480static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
   7481{
   7482    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7483    int halfbits = 4 << vece;
   7484    int64_t max = (1ull << halfbits) - 1;
   7485
   7486    tcg_gen_dupi_vec(vece, t, max);
   7487    tcg_gen_umin_vec(vece, d, n, t);
   7488    tcg_temp_free_vec(t);
   7489}
   7490
   7491static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
   7492{
   7493    static const GVecGen2 ops[3] = {
   7494        { .fniv = gen_uqxtnb_vec,
   7495          .opt_opc = uqxtn_list,
   7496          .fno = gen_helper_sve2_uqxtnb_h,
   7497          .vece = MO_16 },
   7498        { .fniv = gen_uqxtnb_vec,
   7499          .opt_opc = uqxtn_list,
   7500          .fno = gen_helper_sve2_uqxtnb_s,
   7501          .vece = MO_32 },
   7502        { .fniv = gen_uqxtnb_vec,
   7503          .opt_opc = uqxtn_list,
   7504          .fno = gen_helper_sve2_uqxtnb_d,
   7505          .vece = MO_64 },
   7506    };
   7507    return do_sve2_narrow_extract(s, a, ops);
   7508}
   7509
   7510static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
   7511{
   7512    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7513    int halfbits = 4 << vece;
   7514    int64_t max = (1ull << halfbits) - 1;
   7515
   7516    tcg_gen_dupi_vec(vece, t, max);
   7517    tcg_gen_umin_vec(vece, n, n, t);
   7518    tcg_gen_shli_vec(vece, n, n, halfbits);
   7519    tcg_gen_bitsel_vec(vece, d, t, d, n);
   7520    tcg_temp_free_vec(t);
   7521}
   7522
   7523static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
   7524{
   7525    static const GVecGen2 ops[3] = {
   7526        { .fniv = gen_uqxtnt_vec,
   7527          .opt_opc = uqxtn_list,
   7528          .load_dest = true,
   7529          .fno = gen_helper_sve2_uqxtnt_h,
   7530          .vece = MO_16 },
   7531        { .fniv = gen_uqxtnt_vec,
   7532          .opt_opc = uqxtn_list,
   7533          .load_dest = true,
   7534          .fno = gen_helper_sve2_uqxtnt_s,
   7535          .vece = MO_32 },
   7536        { .fniv = gen_uqxtnt_vec,
   7537          .opt_opc = uqxtn_list,
   7538          .load_dest = true,
   7539          .fno = gen_helper_sve2_uqxtnt_d,
   7540          .vece = MO_64 },
   7541    };
   7542    return do_sve2_narrow_extract(s, a, ops);
   7543}
   7544
   7545static const TCGOpcode sqxtun_list[] = {
   7546    INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
   7547};
   7548
   7549static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
   7550{
   7551    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7552    int halfbits = 4 << vece;
   7553    int64_t max = (1ull << halfbits) - 1;
   7554
   7555    tcg_gen_dupi_vec(vece, t, 0);
   7556    tcg_gen_smax_vec(vece, d, n, t);
   7557    tcg_gen_dupi_vec(vece, t, max);
   7558    tcg_gen_umin_vec(vece, d, d, t);
   7559    tcg_temp_free_vec(t);
   7560}
   7561
   7562static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
   7563{
   7564    static const GVecGen2 ops[3] = {
   7565        { .fniv = gen_sqxtunb_vec,
   7566          .opt_opc = sqxtun_list,
   7567          .fno = gen_helper_sve2_sqxtunb_h,
   7568          .vece = MO_16 },
   7569        { .fniv = gen_sqxtunb_vec,
   7570          .opt_opc = sqxtun_list,
   7571          .fno = gen_helper_sve2_sqxtunb_s,
   7572          .vece = MO_32 },
   7573        { .fniv = gen_sqxtunb_vec,
   7574          .opt_opc = sqxtun_list,
   7575          .fno = gen_helper_sve2_sqxtunb_d,
   7576          .vece = MO_64 },
   7577    };
   7578    return do_sve2_narrow_extract(s, a, ops);
   7579}
   7580
   7581static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
   7582{
   7583    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7584    int halfbits = 4 << vece;
   7585    int64_t max = (1ull << halfbits) - 1;
   7586
   7587    tcg_gen_dupi_vec(vece, t, 0);
   7588    tcg_gen_smax_vec(vece, n, n, t);
   7589    tcg_gen_dupi_vec(vece, t, max);
   7590    tcg_gen_umin_vec(vece, n, n, t);
   7591    tcg_gen_shli_vec(vece, n, n, halfbits);
   7592    tcg_gen_bitsel_vec(vece, d, t, d, n);
   7593    tcg_temp_free_vec(t);
   7594}
   7595
   7596static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
   7597{
   7598    static const GVecGen2 ops[3] = {
   7599        { .fniv = gen_sqxtunt_vec,
   7600          .opt_opc = sqxtun_list,
   7601          .load_dest = true,
   7602          .fno = gen_helper_sve2_sqxtunt_h,
   7603          .vece = MO_16 },
   7604        { .fniv = gen_sqxtunt_vec,
   7605          .opt_opc = sqxtun_list,
   7606          .load_dest = true,
   7607          .fno = gen_helper_sve2_sqxtunt_s,
   7608          .vece = MO_32 },
   7609        { .fniv = gen_sqxtunt_vec,
   7610          .opt_opc = sqxtun_list,
   7611          .load_dest = true,
   7612          .fno = gen_helper_sve2_sqxtunt_d,
   7613          .vece = MO_64 },
   7614    };
   7615    return do_sve2_narrow_extract(s, a, ops);
   7616}
   7617
   7618static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
   7619                               const GVecGen2i ops[3])
   7620{
   7621    if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
   7622        return false;
   7623    }
   7624    assert(a->imm > 0 && a->imm <= (8 << a->esz));
   7625    if (sve_access_check(s)) {
   7626        unsigned vsz = vec_full_reg_size(s);
   7627        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
   7628                        vec_full_reg_offset(s, a->rn),
   7629                        vsz, vsz, a->imm, &ops[a->esz]);
   7630    }
   7631    return true;
   7632}
   7633
   7634static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
   7635{
   7636    int halfbits = 4 << vece;
   7637    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
   7638
   7639    tcg_gen_shri_i64(d, n, shr);
   7640    tcg_gen_andi_i64(d, d, mask);
   7641}
   7642
   7643static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
   7644{
   7645    gen_shrnb_i64(MO_16, d, n, shr);
   7646}
   7647
   7648static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
   7649{
   7650    gen_shrnb_i64(MO_32, d, n, shr);
   7651}
   7652
   7653static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
   7654{
   7655    gen_shrnb_i64(MO_64, d, n, shr);
   7656}
   7657
   7658static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
   7659{
   7660    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7661    int halfbits = 4 << vece;
   7662    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
   7663
   7664    tcg_gen_shri_vec(vece, n, n, shr);
   7665    tcg_gen_dupi_vec(vece, t, mask);
   7666    tcg_gen_and_vec(vece, d, n, t);
   7667    tcg_temp_free_vec(t);
   7668}
   7669
   7670static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
   7671{
   7672    static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
   7673    static const GVecGen2i ops[3] = {
   7674        { .fni8 = gen_shrnb16_i64,
   7675          .fniv = gen_shrnb_vec,
   7676          .opt_opc = vec_list,
   7677          .fno = gen_helper_sve2_shrnb_h,
   7678          .vece = MO_16 },
   7679        { .fni8 = gen_shrnb32_i64,
   7680          .fniv = gen_shrnb_vec,
   7681          .opt_opc = vec_list,
   7682          .fno = gen_helper_sve2_shrnb_s,
   7683          .vece = MO_32 },
   7684        { .fni8 = gen_shrnb64_i64,
   7685          .fniv = gen_shrnb_vec,
   7686          .opt_opc = vec_list,
   7687          .fno = gen_helper_sve2_shrnb_d,
   7688          .vece = MO_64 },
   7689    };
   7690    return do_sve2_shr_narrow(s, a, ops);
   7691}
   7692
   7693static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
   7694{
   7695    int halfbits = 4 << vece;
   7696    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
   7697
   7698    tcg_gen_shli_i64(n, n, halfbits - shr);
   7699    tcg_gen_andi_i64(n, n, ~mask);
   7700    tcg_gen_andi_i64(d, d, mask);
   7701    tcg_gen_or_i64(d, d, n);
   7702}
   7703
   7704static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
   7705{
   7706    gen_shrnt_i64(MO_16, d, n, shr);
   7707}
   7708
   7709static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
   7710{
   7711    gen_shrnt_i64(MO_32, d, n, shr);
   7712}
   7713
   7714static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
   7715{
   7716    tcg_gen_shri_i64(n, n, shr);
   7717    tcg_gen_deposit_i64(d, d, n, 32, 32);
   7718}
   7719
   7720static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
   7721{
   7722    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7723    int halfbits = 4 << vece;
   7724    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
   7725
   7726    tcg_gen_shli_vec(vece, n, n, halfbits - shr);
   7727    tcg_gen_dupi_vec(vece, t, mask);
   7728    tcg_gen_bitsel_vec(vece, d, t, d, n);
   7729    tcg_temp_free_vec(t);
   7730}
   7731
   7732static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
   7733{
   7734    static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
   7735    static const GVecGen2i ops[3] = {
   7736        { .fni8 = gen_shrnt16_i64,
   7737          .fniv = gen_shrnt_vec,
   7738          .opt_opc = vec_list,
   7739          .load_dest = true,
   7740          .fno = gen_helper_sve2_shrnt_h,
   7741          .vece = MO_16 },
   7742        { .fni8 = gen_shrnt32_i64,
   7743          .fniv = gen_shrnt_vec,
   7744          .opt_opc = vec_list,
   7745          .load_dest = true,
   7746          .fno = gen_helper_sve2_shrnt_s,
   7747          .vece = MO_32 },
   7748        { .fni8 = gen_shrnt64_i64,
   7749          .fniv = gen_shrnt_vec,
   7750          .opt_opc = vec_list,
   7751          .load_dest = true,
   7752          .fno = gen_helper_sve2_shrnt_d,
   7753          .vece = MO_64 },
   7754    };
   7755    return do_sve2_shr_narrow(s, a, ops);
   7756}
   7757
   7758static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
   7759{
   7760    static const GVecGen2i ops[3] = {
   7761        { .fno = gen_helper_sve2_rshrnb_h },
   7762        { .fno = gen_helper_sve2_rshrnb_s },
   7763        { .fno = gen_helper_sve2_rshrnb_d },
   7764    };
   7765    return do_sve2_shr_narrow(s, a, ops);
   7766}
   7767
   7768static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
   7769{
   7770    static const GVecGen2i ops[3] = {
   7771        { .fno = gen_helper_sve2_rshrnt_h },
   7772        { .fno = gen_helper_sve2_rshrnt_s },
   7773        { .fno = gen_helper_sve2_rshrnt_d },
   7774    };
   7775    return do_sve2_shr_narrow(s, a, ops);
   7776}
   7777
   7778static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
   7779                             TCGv_vec n, int64_t shr)
   7780{
   7781    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7782    int halfbits = 4 << vece;
   7783
   7784    tcg_gen_sari_vec(vece, n, n, shr);
   7785    tcg_gen_dupi_vec(vece, t, 0);
   7786    tcg_gen_smax_vec(vece, n, n, t);
   7787    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
   7788    tcg_gen_umin_vec(vece, d, n, t);
   7789    tcg_temp_free_vec(t);
   7790}
   7791
   7792static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
   7793{
   7794    static const TCGOpcode vec_list[] = {
   7795        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
   7796    };
   7797    static const GVecGen2i ops[3] = {
   7798        { .fniv = gen_sqshrunb_vec,
   7799          .opt_opc = vec_list,
   7800          .fno = gen_helper_sve2_sqshrunb_h,
   7801          .vece = MO_16 },
   7802        { .fniv = gen_sqshrunb_vec,
   7803          .opt_opc = vec_list,
   7804          .fno = gen_helper_sve2_sqshrunb_s,
   7805          .vece = MO_32 },
   7806        { .fniv = gen_sqshrunb_vec,
   7807          .opt_opc = vec_list,
   7808          .fno = gen_helper_sve2_sqshrunb_d,
   7809          .vece = MO_64 },
   7810    };
   7811    return do_sve2_shr_narrow(s, a, ops);
   7812}
   7813
   7814static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
   7815                             TCGv_vec n, int64_t shr)
   7816{
   7817    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7818    int halfbits = 4 << vece;
   7819
   7820    tcg_gen_sari_vec(vece, n, n, shr);
   7821    tcg_gen_dupi_vec(vece, t, 0);
   7822    tcg_gen_smax_vec(vece, n, n, t);
   7823    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
   7824    tcg_gen_umin_vec(vece, n, n, t);
   7825    tcg_gen_shli_vec(vece, n, n, halfbits);
   7826    tcg_gen_bitsel_vec(vece, d, t, d, n);
   7827    tcg_temp_free_vec(t);
   7828}
   7829
   7830static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
   7831{
   7832    static const TCGOpcode vec_list[] = {
   7833        INDEX_op_shli_vec, INDEX_op_sari_vec,
   7834        INDEX_op_smax_vec, INDEX_op_umin_vec, 0
   7835    };
   7836    static const GVecGen2i ops[3] = {
   7837        { .fniv = gen_sqshrunt_vec,
   7838          .opt_opc = vec_list,
   7839          .load_dest = true,
   7840          .fno = gen_helper_sve2_sqshrunt_h,
   7841          .vece = MO_16 },
   7842        { .fniv = gen_sqshrunt_vec,
   7843          .opt_opc = vec_list,
   7844          .load_dest = true,
   7845          .fno = gen_helper_sve2_sqshrunt_s,
   7846          .vece = MO_32 },
   7847        { .fniv = gen_sqshrunt_vec,
   7848          .opt_opc = vec_list,
   7849          .load_dest = true,
   7850          .fno = gen_helper_sve2_sqshrunt_d,
   7851          .vece = MO_64 },
   7852    };
   7853    return do_sve2_shr_narrow(s, a, ops);
   7854}
   7855
   7856static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
   7857{
   7858    static const GVecGen2i ops[3] = {
   7859        { .fno = gen_helper_sve2_sqrshrunb_h },
   7860        { .fno = gen_helper_sve2_sqrshrunb_s },
   7861        { .fno = gen_helper_sve2_sqrshrunb_d },
   7862    };
   7863    return do_sve2_shr_narrow(s, a, ops);
   7864}
   7865
   7866static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
   7867{
   7868    static const GVecGen2i ops[3] = {
   7869        { .fno = gen_helper_sve2_sqrshrunt_h },
   7870        { .fno = gen_helper_sve2_sqrshrunt_s },
   7871        { .fno = gen_helper_sve2_sqrshrunt_d },
   7872    };
   7873    return do_sve2_shr_narrow(s, a, ops);
   7874}
   7875
   7876static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
   7877                            TCGv_vec n, int64_t shr)
   7878{
   7879    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7880    int halfbits = 4 << vece;
   7881    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
   7882    int64_t min = -max - 1;
   7883
   7884    tcg_gen_sari_vec(vece, n, n, shr);
   7885    tcg_gen_dupi_vec(vece, t, min);
   7886    tcg_gen_smax_vec(vece, n, n, t);
   7887    tcg_gen_dupi_vec(vece, t, max);
   7888    tcg_gen_smin_vec(vece, n, n, t);
   7889    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
   7890    tcg_gen_and_vec(vece, d, n, t);
   7891    tcg_temp_free_vec(t);
   7892}
   7893
   7894static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
   7895{
   7896    static const TCGOpcode vec_list[] = {
   7897        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
   7898    };
   7899    static const GVecGen2i ops[3] = {
   7900        { .fniv = gen_sqshrnb_vec,
   7901          .opt_opc = vec_list,
   7902          .fno = gen_helper_sve2_sqshrnb_h,
   7903          .vece = MO_16 },
   7904        { .fniv = gen_sqshrnb_vec,
   7905          .opt_opc = vec_list,
   7906          .fno = gen_helper_sve2_sqshrnb_s,
   7907          .vece = MO_32 },
   7908        { .fniv = gen_sqshrnb_vec,
   7909          .opt_opc = vec_list,
   7910          .fno = gen_helper_sve2_sqshrnb_d,
   7911          .vece = MO_64 },
   7912    };
   7913    return do_sve2_shr_narrow(s, a, ops);
   7914}
   7915
   7916static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
   7917                             TCGv_vec n, int64_t shr)
   7918{
   7919    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7920    int halfbits = 4 << vece;
   7921    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
   7922    int64_t min = -max - 1;
   7923
   7924    tcg_gen_sari_vec(vece, n, n, shr);
   7925    tcg_gen_dupi_vec(vece, t, min);
   7926    tcg_gen_smax_vec(vece, n, n, t);
   7927    tcg_gen_dupi_vec(vece, t, max);
   7928    tcg_gen_smin_vec(vece, n, n, t);
   7929    tcg_gen_shli_vec(vece, n, n, halfbits);
   7930    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
   7931    tcg_gen_bitsel_vec(vece, d, t, d, n);
   7932    tcg_temp_free_vec(t);
   7933}
   7934
   7935static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
   7936{
   7937    static const TCGOpcode vec_list[] = {
   7938        INDEX_op_shli_vec, INDEX_op_sari_vec,
   7939        INDEX_op_smax_vec, INDEX_op_smin_vec, 0
   7940    };
   7941    static const GVecGen2i ops[3] = {
   7942        { .fniv = gen_sqshrnt_vec,
   7943          .opt_opc = vec_list,
   7944          .load_dest = true,
   7945          .fno = gen_helper_sve2_sqshrnt_h,
   7946          .vece = MO_16 },
   7947        { .fniv = gen_sqshrnt_vec,
   7948          .opt_opc = vec_list,
   7949          .load_dest = true,
   7950          .fno = gen_helper_sve2_sqshrnt_s,
   7951          .vece = MO_32 },
   7952        { .fniv = gen_sqshrnt_vec,
   7953          .opt_opc = vec_list,
   7954          .load_dest = true,
   7955          .fno = gen_helper_sve2_sqshrnt_d,
   7956          .vece = MO_64 },
   7957    };
   7958    return do_sve2_shr_narrow(s, a, ops);
   7959}
   7960
   7961static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
   7962{
   7963    static const GVecGen2i ops[3] = {
   7964        { .fno = gen_helper_sve2_sqrshrnb_h },
   7965        { .fno = gen_helper_sve2_sqrshrnb_s },
   7966        { .fno = gen_helper_sve2_sqrshrnb_d },
   7967    };
   7968    return do_sve2_shr_narrow(s, a, ops);
   7969}
   7970
   7971static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
   7972{
   7973    static const GVecGen2i ops[3] = {
   7974        { .fno = gen_helper_sve2_sqrshrnt_h },
   7975        { .fno = gen_helper_sve2_sqrshrnt_s },
   7976        { .fno = gen_helper_sve2_sqrshrnt_d },
   7977    };
   7978    return do_sve2_shr_narrow(s, a, ops);
   7979}
   7980
   7981static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
   7982                            TCGv_vec n, int64_t shr)
   7983{
   7984    TCGv_vec t = tcg_temp_new_vec_matching(d);
   7985    int halfbits = 4 << vece;
   7986
   7987    tcg_gen_shri_vec(vece, n, n, shr);
   7988    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
   7989    tcg_gen_umin_vec(vece, d, n, t);
   7990    tcg_temp_free_vec(t);
   7991}
   7992
   7993static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
   7994{
   7995    static const TCGOpcode vec_list[] = {
   7996        INDEX_op_shri_vec, INDEX_op_umin_vec, 0
   7997    };
   7998    static const GVecGen2i ops[3] = {
   7999        { .fniv = gen_uqshrnb_vec,
   8000          .opt_opc = vec_list,
   8001          .fno = gen_helper_sve2_uqshrnb_h,
   8002          .vece = MO_16 },
   8003        { .fniv = gen_uqshrnb_vec,
   8004          .opt_opc = vec_list,
   8005          .fno = gen_helper_sve2_uqshrnb_s,
   8006          .vece = MO_32 },
   8007        { .fniv = gen_uqshrnb_vec,
   8008          .opt_opc = vec_list,
   8009          .fno = gen_helper_sve2_uqshrnb_d,
   8010          .vece = MO_64 },
   8011    };
   8012    return do_sve2_shr_narrow(s, a, ops);
   8013}
   8014
   8015static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
   8016                            TCGv_vec n, int64_t shr)
   8017{
   8018    TCGv_vec t = tcg_temp_new_vec_matching(d);
   8019    int halfbits = 4 << vece;
   8020
   8021    tcg_gen_shri_vec(vece, n, n, shr);
   8022    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
   8023    tcg_gen_umin_vec(vece, n, n, t);
   8024    tcg_gen_shli_vec(vece, n, n, halfbits);
   8025    tcg_gen_bitsel_vec(vece, d, t, d, n);
   8026    tcg_temp_free_vec(t);
   8027}
   8028
   8029static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
   8030{
   8031    static const TCGOpcode vec_list[] = {
   8032        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
   8033    };
   8034    static const GVecGen2i ops[3] = {
   8035        { .fniv = gen_uqshrnt_vec,
   8036          .opt_opc = vec_list,
   8037          .load_dest = true,
   8038          .fno = gen_helper_sve2_uqshrnt_h,
   8039          .vece = MO_16 },
   8040        { .fniv = gen_uqshrnt_vec,
   8041          .opt_opc = vec_list,
   8042          .load_dest = true,
   8043          .fno = gen_helper_sve2_uqshrnt_s,
   8044          .vece = MO_32 },
   8045        { .fniv = gen_uqshrnt_vec,
   8046          .opt_opc = vec_list,
   8047          .load_dest = true,
   8048          .fno = gen_helper_sve2_uqshrnt_d,
   8049          .vece = MO_64 },
   8050    };
   8051    return do_sve2_shr_narrow(s, a, ops);
   8052}
   8053
   8054static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
   8055{
   8056    static const GVecGen2i ops[3] = {
   8057        { .fno = gen_helper_sve2_uqrshrnb_h },
   8058        { .fno = gen_helper_sve2_uqrshrnb_s },
   8059        { .fno = gen_helper_sve2_uqrshrnb_d },
   8060    };
   8061    return do_sve2_shr_narrow(s, a, ops);
   8062}
   8063
   8064static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
   8065{
   8066    static const GVecGen2i ops[3] = {
   8067        { .fno = gen_helper_sve2_uqrshrnt_h },
   8068        { .fno = gen_helper_sve2_uqrshrnt_s },
   8069        { .fno = gen_helper_sve2_uqrshrnt_d },
   8070    };
   8071    return do_sve2_shr_narrow(s, a, ops);
   8072}
   8073
   8074#define DO_SVE2_ZZZ_NARROW(NAME, name)                                    \
   8075static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)                 \
   8076{                                                                         \
   8077    static gen_helper_gvec_3 * const fns[4] = {                           \
   8078        NULL,                       gen_helper_sve2_##name##_h,           \
   8079        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,           \
   8080    };                                                                    \
   8081    return do_sve2_zzz_ool(s, a, fns[a->esz]);                            \
   8082}
   8083
   8084DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
   8085DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
   8086DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
   8087DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
   8088
   8089DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
   8090DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
   8091DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
   8092DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
   8093
   8094static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
   8095                               gen_helper_gvec_flags_4 *fn)
   8096{
   8097    if (!dc_isar_feature(aa64_sve2, s)) {
   8098        return false;
   8099    }
   8100    return do_ppzz_flags(s, a, fn);
   8101}
   8102
   8103#define DO_SVE2_PPZZ_MATCH(NAME, name)                                      \
   8104static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
   8105{                                                                           \
   8106    static gen_helper_gvec_flags_4 * const fns[4] = {                       \
   8107        gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h,   \
   8108        NULL,                            NULL                               \
   8109    };                                                                      \
   8110    return do_sve2_ppzz_flags(s, a, fns[a->esz]);                           \
   8111}
   8112
   8113DO_SVE2_PPZZ_MATCH(MATCH, match)
   8114DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
   8115
   8116static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
   8117{
   8118    static gen_helper_gvec_4 * const fns[2] = {
   8119        gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
   8120    };
   8121    if (a->esz < 2) {
   8122        return false;
   8123    }
   8124    return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
   8125}
   8126
   8127static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
   8128{
   8129    if (a->esz != 0) {
   8130        return false;
   8131    }
   8132    return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
   8133}
   8134
   8135static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
   8136                            gen_helper_gvec_4_ptr *fn)
   8137{
   8138    if (!dc_isar_feature(aa64_sve2, s)) {
   8139        return false;
   8140    }
   8141    return do_zpzz_fp(s, a, fn);
   8142}
   8143
   8144#define DO_SVE2_ZPZZ_FP(NAME, name)                                         \
   8145static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
   8146{                                                                           \
   8147    static gen_helper_gvec_4_ptr * const fns[4] = {                         \
   8148        NULL,                            gen_helper_sve2_##name##_zpzz_h,   \
   8149        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d    \
   8150    };                                                                      \
   8151    return do_sve2_zpzz_fp(s, a, fns[a->esz]);                              \
   8152}
   8153
   8154DO_SVE2_ZPZZ_FP(FADDP, faddp)
   8155DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
   8156DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
   8157DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
   8158DO_SVE2_ZPZZ_FP(FMINP, fminp)
   8159
   8160/*
   8161 * SVE Integer Multiply-Add (unpredicated)
   8162 */
   8163
   8164static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
   8165{
   8166    gen_helper_gvec_4_ptr *fn;
   8167
   8168    switch (a->esz) {
   8169    case MO_32:
   8170        if (!dc_isar_feature(aa64_sve_f32mm, s)) {
   8171            return false;
   8172        }
   8173        fn = gen_helper_fmmla_s;
   8174        break;
   8175    case MO_64:
   8176        if (!dc_isar_feature(aa64_sve_f64mm, s)) {
   8177            return false;
   8178        }
   8179        fn = gen_helper_fmmla_d;
   8180        break;
   8181    default:
   8182        return false;
   8183    }
   8184
   8185    if (sve_access_check(s)) {
   8186        unsigned vsz = vec_full_reg_size(s);
   8187        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
   8188        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   8189                           vec_full_reg_offset(s, a->rn),
   8190                           vec_full_reg_offset(s, a->rm),
   8191                           vec_full_reg_offset(s, a->ra),
   8192                           status, vsz, vsz, 0, fn);
   8193        tcg_temp_free_ptr(status);
   8194    }
   8195    return true;
   8196}
   8197
   8198static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
   8199                            bool sel1, bool sel2)
   8200{
   8201    static gen_helper_gvec_4 * const fns[] = {
   8202        NULL,                           gen_helper_sve2_sqdmlal_zzzw_h,
   8203        gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
   8204    };
   8205    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
   8206}
   8207
   8208static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
   8209                            bool sel1, bool sel2)
   8210{
   8211    static gen_helper_gvec_4 * const fns[] = {
   8212        NULL,                           gen_helper_sve2_sqdmlsl_zzzw_h,
   8213        gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
   8214    };
   8215    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
   8216}
   8217
   8218static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8219{
   8220    return do_sqdmlal_zzzw(s, a, false, false);
   8221}
   8222
   8223static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8224{
   8225    return do_sqdmlal_zzzw(s, a, true, true);
   8226}
   8227
   8228static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
   8229{
   8230    return do_sqdmlal_zzzw(s, a, false, true);
   8231}
   8232
   8233static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8234{
   8235    return do_sqdmlsl_zzzw(s, a, false, false);
   8236}
   8237
   8238static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8239{
   8240    return do_sqdmlsl_zzzw(s, a, true, true);
   8241}
   8242
   8243static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
   8244{
   8245    return do_sqdmlsl_zzzw(s, a, false, true);
   8246}
   8247
   8248static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
   8249{
   8250    static gen_helper_gvec_4 * const fns[] = {
   8251        gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
   8252        gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
   8253    };
   8254    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
   8255}
   8256
   8257static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
   8258{
   8259    static gen_helper_gvec_4 * const fns[] = {
   8260        gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
   8261        gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
   8262    };
   8263    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
   8264}
   8265
   8266static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
   8267{
   8268    static gen_helper_gvec_4 * const fns[] = {
   8269        NULL,                         gen_helper_sve2_smlal_zzzw_h,
   8270        gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
   8271    };
   8272    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
   8273}
   8274
   8275static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8276{
   8277    return do_smlal_zzzw(s, a, false);
   8278}
   8279
   8280static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8281{
   8282    return do_smlal_zzzw(s, a, true);
   8283}
   8284
   8285static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
   8286{
   8287    static gen_helper_gvec_4 * const fns[] = {
   8288        NULL,                         gen_helper_sve2_umlal_zzzw_h,
   8289        gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
   8290    };
   8291    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
   8292}
   8293
   8294static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8295{
   8296    return do_umlal_zzzw(s, a, false);
   8297}
   8298
   8299static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8300{
   8301    return do_umlal_zzzw(s, a, true);
   8302}
   8303
   8304static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
   8305{
   8306    static gen_helper_gvec_4 * const fns[] = {
   8307        NULL,                         gen_helper_sve2_smlsl_zzzw_h,
   8308        gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
   8309    };
   8310    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
   8311}
   8312
   8313static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8314{
   8315    return do_smlsl_zzzw(s, a, false);
   8316}
   8317
   8318static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8319{
   8320    return do_smlsl_zzzw(s, a, true);
   8321}
   8322
   8323static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
   8324{
   8325    static gen_helper_gvec_4 * const fns[] = {
   8326        NULL,                         gen_helper_sve2_umlsl_zzzw_h,
   8327        gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
   8328    };
   8329    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
   8330}
   8331
   8332static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8333{
   8334    return do_umlsl_zzzw(s, a, false);
   8335}
   8336
   8337static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8338{
   8339    return do_umlsl_zzzw(s, a, true);
   8340}
   8341
   8342static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
   8343{
   8344    static gen_helper_gvec_4 * const fns[] = {
   8345        gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
   8346        gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
   8347    };
   8348
   8349    if (!dc_isar_feature(aa64_sve2, s)) {
   8350        return false;
   8351    }
   8352    if (sve_access_check(s)) {
   8353        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
   8354    }
   8355    return true;
   8356}
   8357
   8358static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
   8359{
   8360    if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
   8361        return false;
   8362    }
   8363    if (sve_access_check(s)) {
   8364        gen_helper_gvec_4 *fn = (a->esz == MO_32
   8365                                 ? gen_helper_sve2_cdot_zzzz_s
   8366                                 : gen_helper_sve2_cdot_zzzz_d);
   8367        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
   8368    }
   8369    return true;
   8370}
   8371
   8372static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
   8373{
   8374    static gen_helper_gvec_4 * const fns[] = {
   8375        gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
   8376        gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
   8377    };
   8378
   8379    if (!dc_isar_feature(aa64_sve2, s)) {
   8380        return false;
   8381    }
   8382    if (sve_access_check(s)) {
   8383        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
   8384    }
   8385    return true;
   8386}
   8387
   8388static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
   8389{
   8390    if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
   8391        return false;
   8392    }
   8393    if (sve_access_check(s)) {
   8394        unsigned vsz = vec_full_reg_size(s);
   8395        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
   8396                           vec_full_reg_offset(s, a->rn),
   8397                           vec_full_reg_offset(s, a->rm),
   8398                           vec_full_reg_offset(s, a->ra),
   8399                           vsz, vsz, 0, gen_helper_gvec_usdot_b);
   8400    }
   8401    return true;
   8402}
   8403
   8404static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
   8405{
   8406    if (!dc_isar_feature(aa64_sve2_aes, s)) {
   8407        return false;
   8408    }
   8409    if (sve_access_check(s)) {
   8410        gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
   8411    }
   8412    return true;
   8413}
   8414
   8415static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
   8416{
   8417    if (!dc_isar_feature(aa64_sve2_aes, s)) {
   8418        return false;
   8419    }
   8420    if (sve_access_check(s)) {
   8421        gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
   8422                         a->rd, a->rn, a->rm, decrypt);
   8423    }
   8424    return true;
   8425}
   8426
   8427static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
   8428{
   8429    return do_aese(s, a, false);
   8430}
   8431
   8432static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
   8433{
   8434    return do_aese(s, a, true);
   8435}
   8436
   8437static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
   8438{
   8439    if (!dc_isar_feature(aa64_sve2_sm4, s)) {
   8440        return false;
   8441    }
   8442    if (sve_access_check(s)) {
   8443        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
   8444    }
   8445    return true;
   8446}
   8447
   8448static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
   8449{
   8450    return do_sm4(s, a, gen_helper_crypto_sm4e);
   8451}
   8452
   8453static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
   8454{
   8455    return do_sm4(s, a, gen_helper_crypto_sm4ekey);
   8456}
   8457
   8458static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
   8459{
   8460    if (!dc_isar_feature(aa64_sve2_sha3, s)) {
   8461        return false;
   8462    }
   8463    if (sve_access_check(s)) {
   8464        gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
   8465    }
   8466    return true;
   8467}
   8468
   8469static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
   8470{
   8471    if (!dc_isar_feature(aa64_sve2, s)) {
   8472        return false;
   8473    }
   8474    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
   8475}
   8476
   8477static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
   8478{
   8479    if (!dc_isar_feature(aa64_sve_bf16, s)) {
   8480        return false;
   8481    }
   8482    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
   8483}
   8484
   8485static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
   8486{
   8487    if (!dc_isar_feature(aa64_sve2, s)) {
   8488        return false;
   8489    }
   8490    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
   8491}
   8492
   8493static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
   8494{
   8495    if (!dc_isar_feature(aa64_sve2, s)) {
   8496        return false;
   8497    }
   8498    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
   8499}
   8500
   8501static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
   8502{
   8503    if (!dc_isar_feature(aa64_sve2, s)) {
   8504        return false;
   8505    }
   8506    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
   8507}
   8508
   8509static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
   8510{
   8511    if (!dc_isar_feature(aa64_sve2, s)) {
   8512        return false;
   8513    }
   8514    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
   8515}
   8516
   8517static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
   8518{
   8519    if (!dc_isar_feature(aa64_sve2, s)) {
   8520        return false;
   8521    }
   8522    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
   8523}
   8524
   8525static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
   8526{
   8527    static gen_helper_gvec_3_ptr * const fns[] = {
   8528        NULL,               gen_helper_flogb_h,
   8529        gen_helper_flogb_s, gen_helper_flogb_d
   8530    };
   8531
   8532    if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
   8533        return false;
   8534    }
   8535    if (sve_access_check(s)) {
   8536        TCGv_ptr status =
   8537            fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   8538        unsigned vsz = vec_full_reg_size(s);
   8539
   8540        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
   8541                           vec_full_reg_offset(s, a->rn),
   8542                           pred_full_reg_offset(s, a->pg),
   8543                           status, vsz, vsz, 0, fns[a->esz]);
   8544        tcg_temp_free_ptr(status);
   8545    }
   8546    return true;
   8547}
   8548
   8549static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
   8550{
   8551    if (!dc_isar_feature(aa64_sve2, s)) {
   8552        return false;
   8553    }
   8554    if (sve_access_check(s)) {
   8555        unsigned vsz = vec_full_reg_size(s);
   8556        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   8557                           vec_full_reg_offset(s, a->rn),
   8558                           vec_full_reg_offset(s, a->rm),
   8559                           vec_full_reg_offset(s, a->ra),
   8560                           cpu_env, vsz, vsz, (sel << 1) | sub,
   8561                           gen_helper_sve2_fmlal_zzzw_s);
   8562    }
   8563    return true;
   8564}
   8565
   8566static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8567{
   8568    return do_FMLAL_zzzw(s, a, false, false);
   8569}
   8570
   8571static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8572{
   8573    return do_FMLAL_zzzw(s, a, false, true);
   8574}
   8575
   8576static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8577{
   8578    return do_FMLAL_zzzw(s, a, true, false);
   8579}
   8580
   8581static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8582{
   8583    return do_FMLAL_zzzw(s, a, true, true);
   8584}
   8585
   8586static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
   8587{
   8588    if (!dc_isar_feature(aa64_sve2, s)) {
   8589        return false;
   8590    }
   8591    if (sve_access_check(s)) {
   8592        unsigned vsz = vec_full_reg_size(s);
   8593        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   8594                           vec_full_reg_offset(s, a->rn),
   8595                           vec_full_reg_offset(s, a->rm),
   8596                           vec_full_reg_offset(s, a->ra),
   8597                           cpu_env, vsz, vsz,
   8598                           (a->index << 2) | (sel << 1) | sub,
   8599                           gen_helper_sve2_fmlal_zzxw_s);
   8600    }
   8601    return true;
   8602}
   8603
   8604static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
   8605{
   8606    return do_FMLAL_zzxw(s, a, false, false);
   8607}
   8608
   8609static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
   8610{
   8611    return do_FMLAL_zzxw(s, a, false, true);
   8612}
   8613
   8614static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
   8615{
   8616    return do_FMLAL_zzxw(s, a, true, false);
   8617}
   8618
   8619static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
   8620{
   8621    return do_FMLAL_zzxw(s, a, true, true);
   8622}
   8623
   8624static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
   8625                             gen_helper_gvec_4 *fn, int data)
   8626{
   8627    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
   8628        return false;
   8629    }
   8630    if (sve_access_check(s)) {
   8631        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
   8632    }
   8633    return true;
   8634}
   8635
   8636static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
   8637{
   8638    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
   8639}
   8640
   8641static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
   8642{
   8643    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
   8644}
   8645
   8646static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
   8647{
   8648    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
   8649}
   8650
   8651static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
   8652{
   8653    if (!dc_isar_feature(aa64_sve_bf16, s)) {
   8654        return false;
   8655    }
   8656    if (sve_access_check(s)) {
   8657        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
   8658                          a->rd, a->rn, a->rm, a->ra, 0);
   8659    }
   8660    return true;
   8661}
   8662
   8663static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
   8664{
   8665    if (!dc_isar_feature(aa64_sve_bf16, s)) {
   8666        return false;
   8667    }
   8668    if (sve_access_check(s)) {
   8669        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
   8670                          a->rd, a->rn, a->rm, a->ra, a->index);
   8671    }
   8672    return true;
   8673}
   8674
   8675static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
   8676{
   8677    if (!dc_isar_feature(aa64_sve_bf16, s)) {
   8678        return false;
   8679    }
   8680    if (sve_access_check(s)) {
   8681        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
   8682                          a->rd, a->rn, a->rm, a->ra, 0);
   8683    }
   8684    return true;
   8685}
   8686
   8687static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
   8688{
   8689    if (!dc_isar_feature(aa64_sve_bf16, s)) {
   8690        return false;
   8691    }
   8692    if (sve_access_check(s)) {
   8693        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
   8694        unsigned vsz = vec_full_reg_size(s);
   8695
   8696        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   8697                           vec_full_reg_offset(s, a->rn),
   8698                           vec_full_reg_offset(s, a->rm),
   8699                           vec_full_reg_offset(s, a->ra),
   8700                           status, vsz, vsz, sel,
   8701                           gen_helper_gvec_bfmlal);
   8702        tcg_temp_free_ptr(status);
   8703    }
   8704    return true;
   8705}
   8706
   8707static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8708{
   8709    return do_BFMLAL_zzzw(s, a, false);
   8710}
   8711
   8712static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
   8713{
   8714    return do_BFMLAL_zzzw(s, a, true);
   8715}
   8716
   8717static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
   8718{
   8719    if (!dc_isar_feature(aa64_sve_bf16, s)) {
   8720        return false;
   8721    }
   8722    if (sve_access_check(s)) {
   8723        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
   8724        unsigned vsz = vec_full_reg_size(s);
   8725
   8726        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
   8727                           vec_full_reg_offset(s, a->rn),
   8728                           vec_full_reg_offset(s, a->rm),
   8729                           vec_full_reg_offset(s, a->ra),
   8730                           status, vsz, vsz, (a->index << 1) | sel,
   8731                           gen_helper_gvec_bfmlal_idx);
   8732        tcg_temp_free_ptr(status);
   8733    }
   8734    return true;
   8735}
   8736
   8737static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
   8738{
   8739    return do_BFMLAL_zzxw(s, a, false);
   8740}
   8741
   8742static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
   8743{
   8744    return do_BFMLAL_zzxw(s, a, true);
   8745}