cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

translate.c (297903B)


      1/*
      2 *  ARM translation
      3 *
      4 *  Copyright (c) 2003 Fabrice Bellard
      5 *  Copyright (c) 2005-2007 CodeSourcery
      6 *  Copyright (c) 2007 OpenedHand, Ltd.
      7 *
      8 * This library is free software; you can redistribute it and/or
      9 * modify it under the terms of the GNU Lesser General Public
     10 * License as published by the Free Software Foundation; either
     11 * version 2.1 of the License, or (at your option) any later version.
     12 *
     13 * This library is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16 * Lesser General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU Lesser General Public
     19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     20 */
     21#include "qemu/osdep.h"
     22
     23#include "cpu.h"
     24#include "internals.h"
     25#include "disas/disas.h"
     26#include "exec/exec-all.h"
     27#include "tcg/tcg-op.h"
     28#include "tcg/tcg-op-gvec.h"
     29#include "qemu/log.h"
     30#include "qemu/bitops.h"
     31#include "arm_ldst.h"
     32#include "semihosting/semihost.h"
     33
     34#include "exec/helper-proto.h"
     35#include "exec/helper-gen.h"
     36
     37#include "exec/log.h"
     38
     39
     40#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
     41#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
     42/* currently all emulated v5 cores are also v5TE, so don't bother */
     43#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
     44#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
     45#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
     46#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
     47#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
     48#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
     49#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
     50
     51#include "translate.h"
     52#include "translate-a32.h"
     53
     54/* These are TCG temporaries used only by the legacy iwMMXt decoder */
     55static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
     56/* These are TCG globals which alias CPUARMState fields */
     57static TCGv_i32 cpu_R[16];
     58TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
     59TCGv_i64 cpu_exclusive_addr;
     60TCGv_i64 cpu_exclusive_val;
     61
     62#include "exec/gen-icount.h"
     63
     64static const char * const regnames[] =
     65    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
     66      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
     67
     68
     69/* initialize TCG globals.  */
     70void arm_translate_init(void)
     71{
     72    int i;
     73
     74    for (i = 0; i < 16; i++) {
     75        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
     76                                          offsetof(CPUARMState, regs[i]),
     77                                          regnames[i]);
     78    }
     79    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
     80    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
     81    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
     82    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
     83
     84    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
     85        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
     86    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
     87        offsetof(CPUARMState, exclusive_val), "exclusive_val");
     88
     89    a64_translate_init();
     90}
     91
     92uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
     93{
     94    /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
     95    switch (cmode) {
     96    case 0: case 1:
     97        /* no-op */
     98        break;
     99    case 2: case 3:
    100        imm <<= 8;
    101        break;
    102    case 4: case 5:
    103        imm <<= 16;
    104        break;
    105    case 6: case 7:
    106        imm <<= 24;
    107        break;
    108    case 8: case 9:
    109        imm |= imm << 16;
    110        break;
    111    case 10: case 11:
    112        imm = (imm << 8) | (imm << 24);
    113        break;
    114    case 12:
    115        imm = (imm << 8) | 0xff;
    116        break;
    117    case 13:
    118        imm = (imm << 16) | 0xffff;
    119        break;
    120    case 14:
    121        if (op) {
    122            /*
    123             * This and cmode == 15 op == 1 are the only cases where
    124             * the top and bottom 32 bits of the encoded constant differ.
    125             */
    126            uint64_t imm64 = 0;
    127            int n;
    128
    129            for (n = 0; n < 8; n++) {
    130                if (imm & (1 << n)) {
    131                    imm64 |= (0xffULL << (n * 8));
    132                }
    133            }
    134            return imm64;
    135        }
    136        imm |= (imm << 8) | (imm << 16) | (imm << 24);
    137        break;
    138    case 15:
    139        if (op) {
    140            /* Reserved encoding for AArch32; valid for AArch64 */
    141            uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
    142            if (imm & 0x80) {
    143                imm64 |= 0x8000000000000000ULL;
    144            }
    145            if (imm & 0x40) {
    146                imm64 |= 0x3fc0000000000000ULL;
    147            } else {
    148                imm64 |= 0x4000000000000000ULL;
    149            }
    150            return imm64;
    151        }
    152        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
    153            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
    154        break;
    155    }
    156    if (op) {
    157        imm = ~imm;
    158    }
    159    return dup_const(MO_32, imm);
    160}
    161
    162/* Generate a label used for skipping this instruction */
    163void arm_gen_condlabel(DisasContext *s)
    164{
    165    if (!s->condjmp) {
    166        s->condlabel = gen_new_label();
    167        s->condjmp = 1;
    168    }
    169}
    170
    171/* Flags for the disas_set_da_iss info argument:
    172 * lower bits hold the Rt register number, higher bits are flags.
    173 */
    174typedef enum ISSInfo {
    175    ISSNone = 0,
    176    ISSRegMask = 0x1f,
    177    ISSInvalid = (1 << 5),
    178    ISSIsAcqRel = (1 << 6),
    179    ISSIsWrite = (1 << 7),
    180    ISSIs16Bit = (1 << 8),
    181} ISSInfo;
    182
    183/* Save the syndrome information for a Data Abort */
    184static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
    185{
    186    uint32_t syn;
    187    int sas = memop & MO_SIZE;
    188    bool sse = memop & MO_SIGN;
    189    bool is_acqrel = issinfo & ISSIsAcqRel;
    190    bool is_write = issinfo & ISSIsWrite;
    191    bool is_16bit = issinfo & ISSIs16Bit;
    192    int srt = issinfo & ISSRegMask;
    193
    194    if (issinfo & ISSInvalid) {
    195        /* Some callsites want to conditionally provide ISS info,
    196         * eg "only if this was not a writeback"
    197         */
    198        return;
    199    }
    200
    201    if (srt == 15) {
    202        /* For AArch32, insns where the src/dest is R15 never generate
    203         * ISS information. Catching that here saves checking at all
    204         * the call sites.
    205         */
    206        return;
    207    }
    208
    209    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
    210                                  0, 0, 0, is_write, 0, is_16bit);
    211    disas_set_insn_syndrome(s, syn);
    212}
    213
    214static inline int get_a32_user_mem_index(DisasContext *s)
    215{
    216    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
    217     * insns:
    218     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
    219     *  otherwise, access as if at PL0.
    220     */
    221    switch (s->mmu_idx) {
    222    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
    223    case ARMMMUIdx_E10_0:
    224    case ARMMMUIdx_E10_1:
    225    case ARMMMUIdx_E10_1_PAN:
    226        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
    227    case ARMMMUIdx_SE3:
    228    case ARMMMUIdx_SE10_0:
    229    case ARMMMUIdx_SE10_1:
    230    case ARMMMUIdx_SE10_1_PAN:
    231        return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
    232    case ARMMMUIdx_MUser:
    233    case ARMMMUIdx_MPriv:
    234        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
    235    case ARMMMUIdx_MUserNegPri:
    236    case ARMMMUIdx_MPrivNegPri:
    237        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
    238    case ARMMMUIdx_MSUser:
    239    case ARMMMUIdx_MSPriv:
    240        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
    241    case ARMMMUIdx_MSUserNegPri:
    242    case ARMMMUIdx_MSPrivNegPri:
    243        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
    244    default:
    245        g_assert_not_reached();
    246    }
    247}
    248
    249/* The architectural value of PC.  */
    250static uint32_t read_pc(DisasContext *s)
    251{
    252    return s->pc_curr + (s->thumb ? 4 : 8);
    253}
    254
    255/* Set a variable to the value of a CPU register.  */
    256void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
    257{
    258    if (reg == 15) {
    259        tcg_gen_movi_i32(var, read_pc(s));
    260    } else {
    261        tcg_gen_mov_i32(var, cpu_R[reg]);
    262    }
    263}
    264
    265/*
    266 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
    267 * This is used for load/store for which use of PC implies (literal),
    268 * or ADD that implies ADR.
    269 */
    270TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
    271{
    272    TCGv_i32 tmp = tcg_temp_new_i32();
    273
    274    if (reg == 15) {
    275        tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
    276    } else {
    277        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
    278    }
    279    return tmp;
    280}
    281
    282/* Set a CPU register.  The source must be a temporary and will be
    283   marked as dead.  */
    284void store_reg(DisasContext *s, int reg, TCGv_i32 var)
    285{
    286    if (reg == 15) {
    287        /* In Thumb mode, we must ignore bit 0.
    288         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
    289         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
    290         * We choose to ignore [1:0] in ARM mode for all architecture versions.
    291         */
    292        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
    293        s->base.is_jmp = DISAS_JUMP;
    294    } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
    295        /* For M-profile SP bits [1:0] are always zero */
    296        tcg_gen_andi_i32(var, var, ~3);
    297    }
    298    tcg_gen_mov_i32(cpu_R[reg], var);
    299    tcg_temp_free_i32(var);
    300}
    301
    302/*
    303 * Variant of store_reg which applies v8M stack-limit checks before updating
    304 * SP. If the check fails this will result in an exception being taken.
    305 * We disable the stack checks for CONFIG_USER_ONLY because we have
    306 * no idea what the stack limits should be in that case.
    307 * If stack checking is not being done this just acts like store_reg().
    308 */
    309static void store_sp_checked(DisasContext *s, TCGv_i32 var)
    310{
    311#ifndef CONFIG_USER_ONLY
    312    if (s->v8m_stackcheck) {
    313        gen_helper_v8m_stackcheck(cpu_env, var);
    314    }
    315#endif
    316    store_reg(s, 13, var);
    317}
    318
    319/* Value extensions.  */
    320#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
    321#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
    322#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
    323#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
    324
    325#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
    326#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
    327
    328void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
    329{
    330    TCGv_i32 tmp_mask = tcg_const_i32(mask);
    331    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
    332    tcg_temp_free_i32(tmp_mask);
    333}
    334
    335static void gen_exception_internal(int excp)
    336{
    337    TCGv_i32 tcg_excp = tcg_const_i32(excp);
    338
    339    assert(excp_is_internal(excp));
    340    gen_helper_exception_internal(cpu_env, tcg_excp);
    341    tcg_temp_free_i32(tcg_excp);
    342}
    343
    344static void gen_step_complete_exception(DisasContext *s)
    345{
    346    /* We just completed step of an insn. Move from Active-not-pending
    347     * to Active-pending, and then also take the swstep exception.
    348     * This corresponds to making the (IMPDEF) choice to prioritize
    349     * swstep exceptions over asynchronous exceptions taken to an exception
    350     * level where debug is disabled. This choice has the advantage that
    351     * we do not need to maintain internal state corresponding to the
    352     * ISV/EX syndrome bits between completion of the step and generation
    353     * of the exception, and our syndrome information is always correct.
    354     */
    355    gen_ss_advance(s);
    356    gen_swstep_exception(s, 1, s->is_ldex);
    357    s->base.is_jmp = DISAS_NORETURN;
    358}
    359
    360static void gen_singlestep_exception(DisasContext *s)
    361{
    362    /* Generate the right kind of exception for singlestep, which is
    363     * either the architectural singlestep or EXCP_DEBUG for QEMU's
    364     * gdb singlestepping.
    365     */
    366    if (s->ss_active) {
    367        gen_step_complete_exception(s);
    368    } else {
    369        gen_exception_internal(EXCP_DEBUG);
    370    }
    371}
    372
    373static inline bool is_singlestepping(DisasContext *s)
    374{
    375    /* Return true if we are singlestepping either because of
    376     * architectural singlestep or QEMU gdbstub singlestep. This does
    377     * not include the command line '-singlestep' mode which is rather
    378     * misnamed as it only means "one instruction per TB" and doesn't
    379     * affect the code we generate.
    380     */
    381    return s->base.singlestep_enabled || s->ss_active;
    382}
    383
    384void clear_eci_state(DisasContext *s)
    385{
    386    /*
    387     * Clear any ECI/ICI state: used when a load multiple/store
    388     * multiple insn executes.
    389     */
    390    if (s->eci) {
    391        TCGv_i32 tmp = tcg_const_i32(0);
    392        store_cpu_field(tmp, condexec_bits);
    393        s->eci = 0;
    394    }
    395}
    396
    397static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
    398{
    399    TCGv_i32 tmp1 = tcg_temp_new_i32();
    400    TCGv_i32 tmp2 = tcg_temp_new_i32();
    401    tcg_gen_ext16s_i32(tmp1, a);
    402    tcg_gen_ext16s_i32(tmp2, b);
    403    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
    404    tcg_temp_free_i32(tmp2);
    405    tcg_gen_sari_i32(a, a, 16);
    406    tcg_gen_sari_i32(b, b, 16);
    407    tcg_gen_mul_i32(b, b, a);
    408    tcg_gen_mov_i32(a, tmp1);
    409    tcg_temp_free_i32(tmp1);
    410}
    411
    412/* Byteswap each halfword.  */
    413void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
    414{
    415    TCGv_i32 tmp = tcg_temp_new_i32();
    416    TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
    417    tcg_gen_shri_i32(tmp, var, 8);
    418    tcg_gen_and_i32(tmp, tmp, mask);
    419    tcg_gen_and_i32(var, var, mask);
    420    tcg_gen_shli_i32(var, var, 8);
    421    tcg_gen_or_i32(dest, var, tmp);
    422    tcg_temp_free_i32(mask);
    423    tcg_temp_free_i32(tmp);
    424}
    425
    426/* Byteswap low halfword and sign extend.  */
    427static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
    428{
    429    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
    430}
    431
    432/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
    433    tmp = (t0 ^ t1) & 0x8000;
    434    t0 &= ~0x8000;
    435    t1 &= ~0x8000;
    436    t0 = (t0 + t1) ^ tmp;
    437 */
    438
    439static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    440{
    441    TCGv_i32 tmp = tcg_temp_new_i32();
    442    tcg_gen_xor_i32(tmp, t0, t1);
    443    tcg_gen_andi_i32(tmp, tmp, 0x8000);
    444    tcg_gen_andi_i32(t0, t0, ~0x8000);
    445    tcg_gen_andi_i32(t1, t1, ~0x8000);
    446    tcg_gen_add_i32(t0, t0, t1);
    447    tcg_gen_xor_i32(dest, t0, tmp);
    448    tcg_temp_free_i32(tmp);
    449}
    450
    451/* Set N and Z flags from var.  */
    452static inline void gen_logic_CC(TCGv_i32 var)
    453{
    454    tcg_gen_mov_i32(cpu_NF, var);
    455    tcg_gen_mov_i32(cpu_ZF, var);
    456}
    457
    458/* dest = T0 + T1 + CF. */
    459static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    460{
    461    tcg_gen_add_i32(dest, t0, t1);
    462    tcg_gen_add_i32(dest, dest, cpu_CF);
    463}
    464
    465/* dest = T0 - T1 + CF - 1.  */
    466static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    467{
    468    tcg_gen_sub_i32(dest, t0, t1);
    469    tcg_gen_add_i32(dest, dest, cpu_CF);
    470    tcg_gen_subi_i32(dest, dest, 1);
    471}
    472
    473/* dest = T0 + T1. Compute C, N, V and Z flags */
    474static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    475{
    476    TCGv_i32 tmp = tcg_temp_new_i32();
    477    tcg_gen_movi_i32(tmp, 0);
    478    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
    479    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    480    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
    481    tcg_gen_xor_i32(tmp, t0, t1);
    482    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
    483    tcg_temp_free_i32(tmp);
    484    tcg_gen_mov_i32(dest, cpu_NF);
    485}
    486
    487/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
    488static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    489{
    490    TCGv_i32 tmp = tcg_temp_new_i32();
    491    if (TCG_TARGET_HAS_add2_i32) {
    492        tcg_gen_movi_i32(tmp, 0);
    493        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
    494        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
    495    } else {
    496        TCGv_i64 q0 = tcg_temp_new_i64();
    497        TCGv_i64 q1 = tcg_temp_new_i64();
    498        tcg_gen_extu_i32_i64(q0, t0);
    499        tcg_gen_extu_i32_i64(q1, t1);
    500        tcg_gen_add_i64(q0, q0, q1);
    501        tcg_gen_extu_i32_i64(q1, cpu_CF);
    502        tcg_gen_add_i64(q0, q0, q1);
    503        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
    504        tcg_temp_free_i64(q0);
    505        tcg_temp_free_i64(q1);
    506    }
    507    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    508    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
    509    tcg_gen_xor_i32(tmp, t0, t1);
    510    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
    511    tcg_temp_free_i32(tmp);
    512    tcg_gen_mov_i32(dest, cpu_NF);
    513}
    514
    515/* dest = T0 - T1. Compute C, N, V and Z flags */
    516static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    517{
    518    TCGv_i32 tmp;
    519    tcg_gen_sub_i32(cpu_NF, t0, t1);
    520    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    521    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
    522    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
    523    tmp = tcg_temp_new_i32();
    524    tcg_gen_xor_i32(tmp, t0, t1);
    525    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
    526    tcg_temp_free_i32(tmp);
    527    tcg_gen_mov_i32(dest, cpu_NF);
    528}
    529
    530/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
    531static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    532{
    533    TCGv_i32 tmp = tcg_temp_new_i32();
    534    tcg_gen_not_i32(tmp, t1);
    535    gen_adc_CC(dest, t0, tmp);
    536    tcg_temp_free_i32(tmp);
    537}
    538
    539#define GEN_SHIFT(name)                                               \
    540static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
    541{                                                                     \
    542    TCGv_i32 tmp1, tmp2, tmp3;                                        \
    543    tmp1 = tcg_temp_new_i32();                                        \
    544    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
    545    tmp2 = tcg_const_i32(0);                                          \
    546    tmp3 = tcg_const_i32(0x1f);                                       \
    547    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
    548    tcg_temp_free_i32(tmp3);                                          \
    549    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
    550    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
    551    tcg_temp_free_i32(tmp2);                                          \
    552    tcg_temp_free_i32(tmp1);                                          \
    553}
    554GEN_SHIFT(shl)
    555GEN_SHIFT(shr)
    556#undef GEN_SHIFT
    557
    558static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    559{
    560    TCGv_i32 tmp1, tmp2;
    561    tmp1 = tcg_temp_new_i32();
    562    tcg_gen_andi_i32(tmp1, t1, 0xff);
    563    tmp2 = tcg_const_i32(0x1f);
    564    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
    565    tcg_temp_free_i32(tmp2);
    566    tcg_gen_sar_i32(dest, t0, tmp1);
    567    tcg_temp_free_i32(tmp1);
    568}
    569
    570static void shifter_out_im(TCGv_i32 var, int shift)
    571{
    572    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
    573}
    574
    575/* Shift by immediate.  Includes special handling for shift == 0.  */
    576static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
    577                                    int shift, int flags)
    578{
    579    switch (shiftop) {
    580    case 0: /* LSL */
    581        if (shift != 0) {
    582            if (flags)
    583                shifter_out_im(var, 32 - shift);
    584            tcg_gen_shli_i32(var, var, shift);
    585        }
    586        break;
    587    case 1: /* LSR */
    588        if (shift == 0) {
    589            if (flags) {
    590                tcg_gen_shri_i32(cpu_CF, var, 31);
    591            }
    592            tcg_gen_movi_i32(var, 0);
    593        } else {
    594            if (flags)
    595                shifter_out_im(var, shift - 1);
    596            tcg_gen_shri_i32(var, var, shift);
    597        }
    598        break;
    599    case 2: /* ASR */
    600        if (shift == 0)
    601            shift = 32;
    602        if (flags)
    603            shifter_out_im(var, shift - 1);
    604        if (shift == 32)
    605          shift = 31;
    606        tcg_gen_sari_i32(var, var, shift);
    607        break;
    608    case 3: /* ROR/RRX */
    609        if (shift != 0) {
    610            if (flags)
    611                shifter_out_im(var, shift - 1);
    612            tcg_gen_rotri_i32(var, var, shift); break;
    613        } else {
    614            TCGv_i32 tmp = tcg_temp_new_i32();
    615            tcg_gen_shli_i32(tmp, cpu_CF, 31);
    616            if (flags)
    617                shifter_out_im(var, 0);
    618            tcg_gen_shri_i32(var, var, 1);
    619            tcg_gen_or_i32(var, var, tmp);
    620            tcg_temp_free_i32(tmp);
    621        }
    622    }
    623};
    624
    625static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
    626                                     TCGv_i32 shift, int flags)
    627{
    628    if (flags) {
    629        switch (shiftop) {
    630        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
    631        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
    632        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
    633        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
    634        }
    635    } else {
    636        switch (shiftop) {
    637        case 0:
    638            gen_shl(var, var, shift);
    639            break;
    640        case 1:
    641            gen_shr(var, var, shift);
    642            break;
    643        case 2:
    644            gen_sar(var, var, shift);
    645            break;
    646        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
    647                tcg_gen_rotr_i32(var, var, shift); break;
    648        }
    649    }
    650    tcg_temp_free_i32(shift);
    651}
    652
    653/*
    654 * Generate a conditional based on ARM condition code cc.
    655 * This is common between ARM and Aarch64 targets.
    656 */
    657void arm_test_cc(DisasCompare *cmp, int cc)
    658{
    659    TCGv_i32 value;
    660    TCGCond cond;
    661    bool global = true;
    662
    663    switch (cc) {
    664    case 0: /* eq: Z */
    665    case 1: /* ne: !Z */
    666        cond = TCG_COND_EQ;
    667        value = cpu_ZF;
    668        break;
    669
    670    case 2: /* cs: C */
    671    case 3: /* cc: !C */
    672        cond = TCG_COND_NE;
    673        value = cpu_CF;
    674        break;
    675
    676    case 4: /* mi: N */
    677    case 5: /* pl: !N */
    678        cond = TCG_COND_LT;
    679        value = cpu_NF;
    680        break;
    681
    682    case 6: /* vs: V */
    683    case 7: /* vc: !V */
    684        cond = TCG_COND_LT;
    685        value = cpu_VF;
    686        break;
    687
    688    case 8: /* hi: C && !Z */
    689    case 9: /* ls: !C || Z -> !(C && !Z) */
    690        cond = TCG_COND_NE;
    691        value = tcg_temp_new_i32();
    692        global = false;
    693        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
    694           ZF is non-zero for !Z; so AND the two subexpressions.  */
    695        tcg_gen_neg_i32(value, cpu_CF);
    696        tcg_gen_and_i32(value, value, cpu_ZF);
    697        break;
    698
    699    case 10: /* ge: N == V -> N ^ V == 0 */
    700    case 11: /* lt: N != V -> N ^ V != 0 */
    701        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
    702        cond = TCG_COND_GE;
    703        value = tcg_temp_new_i32();
    704        global = false;
    705        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
    706        break;
    707
    708    case 12: /* gt: !Z && N == V */
    709    case 13: /* le: Z || N != V */
    710        cond = TCG_COND_NE;
    711        value = tcg_temp_new_i32();
    712        global = false;
    713        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
    714         * the sign bit then AND with ZF to yield the result.  */
    715        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
    716        tcg_gen_sari_i32(value, value, 31);
    717        tcg_gen_andc_i32(value, cpu_ZF, value);
    718        break;
    719
    720    case 14: /* always */
    721    case 15: /* always */
    722        /* Use the ALWAYS condition, which will fold early.
    723         * It doesn't matter what we use for the value.  */
    724        cond = TCG_COND_ALWAYS;
    725        value = cpu_ZF;
    726        goto no_invert;
    727
    728    default:
    729        fprintf(stderr, "Bad condition code 0x%x\n", cc);
    730        abort();
    731    }
    732
    733    if (cc & 1) {
    734        cond = tcg_invert_cond(cond);
    735    }
    736
    737 no_invert:
    738    cmp->cond = cond;
    739    cmp->value = value;
    740    cmp->value_global = global;
    741}
    742
    743void arm_free_cc(DisasCompare *cmp)
    744{
    745    if (!cmp->value_global) {
    746        tcg_temp_free_i32(cmp->value);
    747    }
    748}
    749
    750void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
    751{
    752    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
    753}
    754
    755void arm_gen_test_cc(int cc, TCGLabel *label)
    756{
    757    DisasCompare cmp;
    758    arm_test_cc(&cmp, cc);
    759    arm_jump_cc(&cmp, label);
    760    arm_free_cc(&cmp);
    761}
    762
    763void gen_set_condexec(DisasContext *s)
    764{
    765    if (s->condexec_mask) {
    766        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
    767        TCGv_i32 tmp = tcg_temp_new_i32();
    768        tcg_gen_movi_i32(tmp, val);
    769        store_cpu_field(tmp, condexec_bits);
    770    }
    771}
    772
    773void gen_set_pc_im(DisasContext *s, target_ulong val)
    774{
    775    tcg_gen_movi_i32(cpu_R[15], val);
    776}
    777
    778/* Set PC and Thumb state from var.  var is marked as dead.  */
    779static inline void gen_bx(DisasContext *s, TCGv_i32 var)
    780{
    781    s->base.is_jmp = DISAS_JUMP;
    782    tcg_gen_andi_i32(cpu_R[15], var, ~1);
    783    tcg_gen_andi_i32(var, var, 1);
    784    store_cpu_field(var, thumb);
    785}
    786
    787/*
    788 * Set PC and Thumb state from var. var is marked as dead.
    789 * For M-profile CPUs, include logic to detect exception-return
    790 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
    791 * and BX reg, and no others, and happens only for code in Handler mode.
    792 * The Security Extension also requires us to check for the FNC_RETURN
    793 * which signals a function return from non-secure state; this can happen
    794 * in both Handler and Thread mode.
    795 * To avoid having to do multiple comparisons in inline generated code,
    796 * we make the check we do here loose, so it will match for EXC_RETURN
    797 * in Thread mode. For system emulation do_v7m_exception_exit() checks
    798 * for these spurious cases and returns without doing anything (giving
    799 * the same behaviour as for a branch to a non-magic address).
    800 *
    801 * In linux-user mode it is unclear what the right behaviour for an
    802 * attempted FNC_RETURN should be, because in real hardware this will go
    803 * directly to Secure code (ie not the Linux kernel) which will then treat
    804 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
    805 * attempt behave the way it would on a CPU without the security extension,
    806 * which is to say "like a normal branch". That means we can simply treat
    807 * all branches as normal with no magic address behaviour.
    808 */
    809static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
    810{
    811    /* Generate the same code here as for a simple bx, but flag via
    812     * s->base.is_jmp that we need to do the rest of the work later.
    813     */
    814    gen_bx(s, var);
    815#ifndef CONFIG_USER_ONLY
    816    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
    817        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
    818        s->base.is_jmp = DISAS_BX_EXCRET;
    819    }
    820#endif
    821}
    822
    823static inline void gen_bx_excret_final_code(DisasContext *s)
    824{
    825    /* Generate the code to finish possible exception return and end the TB */
    826    TCGLabel *excret_label = gen_new_label();
    827    uint32_t min_magic;
    828
    829    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
    830        /* Covers FNC_RETURN and EXC_RETURN magic */
    831        min_magic = FNC_RETURN_MIN_MAGIC;
    832    } else {
    833        /* EXC_RETURN magic only */
    834        min_magic = EXC_RETURN_MIN_MAGIC;
    835    }
    836
    837    /* Is the new PC value in the magic range indicating exception return? */
    838    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
    839    /* No: end the TB as we would for a DISAS_JMP */
    840    if (is_singlestepping(s)) {
    841        gen_singlestep_exception(s);
    842    } else {
    843        tcg_gen_exit_tb(NULL, 0);
    844    }
    845    gen_set_label(excret_label);
    846    /* Yes: this is an exception return.
    847     * At this point in runtime env->regs[15] and env->thumb will hold
    848     * the exception-return magic number, which do_v7m_exception_exit()
    849     * will read. Nothing else will be able to see those values because
    850     * the cpu-exec main loop guarantees that we will always go straight
    851     * from raising the exception to the exception-handling code.
    852     *
    853     * gen_ss_advance(s) does nothing on M profile currently but
    854     * calling it is conceptually the right thing as we have executed
    855     * this instruction (compare SWI, HVC, SMC handling).
    856     */
    857    gen_ss_advance(s);
    858    gen_exception_internal(EXCP_EXCEPTION_EXIT);
    859}
    860
    861static inline void gen_bxns(DisasContext *s, int rm)
    862{
    863    TCGv_i32 var = load_reg(s, rm);
    864
    865    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
    866     * we need to sync state before calling it, but:
    867     *  - we don't need to do gen_set_pc_im() because the bxns helper will
    868     *    always set the PC itself
    869     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
    870     *    unless it's outside an IT block or the last insn in an IT block,
    871     *    so we know that condexec == 0 (already set at the top of the TB)
    872     *    is correct in the non-UNPREDICTABLE cases, and we can choose
    873     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
    874     */
    875    gen_helper_v7m_bxns(cpu_env, var);
    876    tcg_temp_free_i32(var);
    877    s->base.is_jmp = DISAS_EXIT;
    878}
    879
    880static inline void gen_blxns(DisasContext *s, int rm)
    881{
    882    TCGv_i32 var = load_reg(s, rm);
    883
    884    /* We don't need to sync condexec state, for the same reason as bxns.
    885     * We do however need to set the PC, because the blxns helper reads it.
    886     * The blxns helper may throw an exception.
    887     */
    888    gen_set_pc_im(s, s->base.pc_next);
    889    gen_helper_v7m_blxns(cpu_env, var);
    890    tcg_temp_free_i32(var);
    891    s->base.is_jmp = DISAS_EXIT;
    892}
    893
    894/* Variant of store_reg which uses branch&exchange logic when storing
    895   to r15 in ARM architecture v7 and above. The source must be a temporary
    896   and will be marked as dead. */
    897static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
    898{
    899    if (reg == 15 && ENABLE_ARCH_7) {
    900        gen_bx(s, var);
    901    } else {
    902        store_reg(s, reg, var);
    903    }
    904}
    905
    906/* Variant of store_reg which uses branch&exchange logic when storing
    907 * to r15 in ARM architecture v5T and above. This is used for storing
    908 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
    909 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
    910static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
    911{
    912    if (reg == 15 && ENABLE_ARCH_5) {
    913        gen_bx_excret(s, var);
    914    } else {
    915        store_reg(s, reg, var);
    916    }
    917}
    918
    919#ifdef CONFIG_USER_ONLY
    920#define IS_USER_ONLY 1
    921#else
    922#define IS_USER_ONLY 0
    923#endif
    924
    925MemOp pow2_align(unsigned i)
    926{
    927    static const MemOp mop_align[] = {
    928        0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
    929        /*
    930         * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
    931         * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
    932         * see get_alignment_bits(). Enforce only 128-bit alignment for now.
    933         */
    934        MO_ALIGN_16
    935    };
    936    g_assert(i < ARRAY_SIZE(mop_align));
    937    return mop_align[i];
    938}
    939
    940/*
    941 * Abstractions of "generate code to do a guest load/store for
    942 * AArch32", where a vaddr is always 32 bits (and is zero
    943 * extended if we're a 64 bit core) and  data is also
    944 * 32 bits unless specifically doing a 64 bit access.
    945 * These functions work like tcg_gen_qemu_{ld,st}* except
    946 * that the address argument is TCGv_i32 rather than TCGv.
    947 */
    948
    949static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
    950{
    951    TCGv addr = tcg_temp_new();
    952    tcg_gen_extu_i32_tl(addr, a32);
    953
    954    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
    955    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
    956        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
    957    }
    958    return addr;
    959}
    960
    961/*
    962 * Internal routines are used for NEON cases where the endianness
    963 * and/or alignment has already been taken into account and manipulated.
    964 */
    965void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
    966                              TCGv_i32 a32, int index, MemOp opc)
    967{
    968    TCGv addr = gen_aa32_addr(s, a32, opc);
    969    tcg_gen_qemu_ld_i32(val, addr, index, opc);
    970    tcg_temp_free(addr);
    971}
    972
    973void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
    974                              TCGv_i32 a32, int index, MemOp opc)
    975{
    976    TCGv addr = gen_aa32_addr(s, a32, opc);
    977    tcg_gen_qemu_st_i32(val, addr, index, opc);
    978    tcg_temp_free(addr);
    979}
    980
    981void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
    982                              TCGv_i32 a32, int index, MemOp opc)
    983{
    984    TCGv addr = gen_aa32_addr(s, a32, opc);
    985
    986    tcg_gen_qemu_ld_i64(val, addr, index, opc);
    987
    988    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
    989    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
    990        tcg_gen_rotri_i64(val, val, 32);
    991    }
    992    tcg_temp_free(addr);
    993}
    994
    995void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
    996                              TCGv_i32 a32, int index, MemOp opc)
    997{
    998    TCGv addr = gen_aa32_addr(s, a32, opc);
    999
   1000    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
   1001    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
   1002        TCGv_i64 tmp = tcg_temp_new_i64();
   1003        tcg_gen_rotri_i64(tmp, val, 32);
   1004        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
   1005        tcg_temp_free_i64(tmp);
   1006    } else {
   1007        tcg_gen_qemu_st_i64(val, addr, index, opc);
   1008    }
   1009    tcg_temp_free(addr);
   1010}
   1011
   1012void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
   1013                     int index, MemOp opc)
   1014{
   1015    gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
   1016}
   1017
   1018void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
   1019                     int index, MemOp opc)
   1020{
   1021    gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
   1022}
   1023
   1024void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
   1025                     int index, MemOp opc)
   1026{
   1027    gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
   1028}
   1029
   1030void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
   1031                     int index, MemOp opc)
   1032{
   1033    gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
   1034}
   1035
   1036#define DO_GEN_LD(SUFF, OPC)                                            \
   1037    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
   1038                                         TCGv_i32 a32, int index)       \
   1039    {                                                                   \
   1040        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
   1041    }
   1042
   1043#define DO_GEN_ST(SUFF, OPC)                                            \
   1044    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
   1045                                         TCGv_i32 a32, int index)       \
   1046    {                                                                   \
   1047        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
   1048    }
   1049
   1050static inline void gen_hvc(DisasContext *s, int imm16)
   1051{
   1052    /* The pre HVC helper handles cases when HVC gets trapped
   1053     * as an undefined insn by runtime configuration (ie before
   1054     * the insn really executes).
   1055     */
   1056    gen_set_pc_im(s, s->pc_curr);
   1057    gen_helper_pre_hvc(cpu_env);
   1058    /* Otherwise we will treat this as a real exception which
   1059     * happens after execution of the insn. (The distinction matters
   1060     * for the PC value reported to the exception handler and also
   1061     * for single stepping.)
   1062     */
   1063    s->svc_imm = imm16;
   1064    gen_set_pc_im(s, s->base.pc_next);
   1065    s->base.is_jmp = DISAS_HVC;
   1066}
   1067
   1068static inline void gen_smc(DisasContext *s)
   1069{
   1070    /* As with HVC, we may take an exception either before or after
   1071     * the insn executes.
   1072     */
   1073    TCGv_i32 tmp;
   1074
   1075    gen_set_pc_im(s, s->pc_curr);
   1076    tmp = tcg_const_i32(syn_aa32_smc());
   1077    gen_helper_pre_smc(cpu_env, tmp);
   1078    tcg_temp_free_i32(tmp);
   1079    gen_set_pc_im(s, s->base.pc_next);
   1080    s->base.is_jmp = DISAS_SMC;
   1081}
   1082
   1083static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
   1084{
   1085    gen_set_condexec(s);
   1086    gen_set_pc_im(s, pc);
   1087    gen_exception_internal(excp);
   1088    s->base.is_jmp = DISAS_NORETURN;
   1089}
   1090
   1091void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
   1092                        uint32_t syn, uint32_t target_el)
   1093{
   1094    if (s->aarch64) {
   1095        gen_a64_set_pc_im(pc);
   1096    } else {
   1097        gen_set_condexec(s);
   1098        gen_set_pc_im(s, pc);
   1099    }
   1100    gen_exception(excp, syn, target_el);
   1101    s->base.is_jmp = DISAS_NORETURN;
   1102}
   1103
   1104static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
   1105{
   1106    TCGv_i32 tcg_syn;
   1107
   1108    gen_set_condexec(s);
   1109    gen_set_pc_im(s, s->pc_curr);
   1110    tcg_syn = tcg_const_i32(syn);
   1111    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
   1112    tcg_temp_free_i32(tcg_syn);
   1113    s->base.is_jmp = DISAS_NORETURN;
   1114}
   1115
   1116void unallocated_encoding(DisasContext *s)
   1117{
   1118    /* Unallocated and reserved encodings are uncategorized */
   1119    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
   1120                       default_exception_el(s));
   1121}
   1122
   1123static void gen_exception_el(DisasContext *s, int excp, uint32_t syn,
   1124                             TCGv_i32 tcg_el)
   1125{
   1126    TCGv_i32 tcg_excp;
   1127    TCGv_i32 tcg_syn;
   1128
   1129    gen_set_condexec(s);
   1130    gen_set_pc_im(s, s->pc_curr);
   1131    tcg_excp = tcg_const_i32(excp);
   1132    tcg_syn = tcg_const_i32(syn);
   1133    gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn, tcg_el);
   1134    tcg_temp_free_i32(tcg_syn);
   1135    tcg_temp_free_i32(tcg_excp);
   1136    s->base.is_jmp = DISAS_NORETURN;
   1137}
   1138
   1139/* Force a TB lookup after an instruction that changes the CPU state.  */
   1140void gen_lookup_tb(DisasContext *s)
   1141{
   1142    tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
   1143    s->base.is_jmp = DISAS_EXIT;
   1144}
   1145
   1146static inline void gen_hlt(DisasContext *s, int imm)
   1147{
   1148    /* HLT. This has two purposes.
   1149     * Architecturally, it is an external halting debug instruction.
   1150     * Since QEMU doesn't implement external debug, we treat this as
   1151     * it is required for halting debug disabled: it will UNDEF.
   1152     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
   1153     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
   1154     * must trigger semihosting even for ARMv7 and earlier, where
   1155     * HLT was an undefined encoding.
   1156     * In system mode, we don't allow userspace access to
   1157     * semihosting, to provide some semblance of security
   1158     * (and for consistency with our 32-bit semihosting).
   1159     */
   1160    if (semihosting_enabled() &&
   1161#ifndef CONFIG_USER_ONLY
   1162        s->current_el != 0 &&
   1163#endif
   1164        (imm == (s->thumb ? 0x3c : 0xf000))) {
   1165        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
   1166        return;
   1167    }
   1168
   1169    unallocated_encoding(s);
   1170}
   1171
   1172/*
   1173 * Return the offset of a "full" NEON Dreg.
   1174 */
   1175long neon_full_reg_offset(unsigned reg)
   1176{
   1177    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
   1178}
   1179
   1180/*
   1181 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
   1182 * where 0 is the least significant end of the register.
   1183 */
   1184long neon_element_offset(int reg, int element, MemOp memop)
   1185{
   1186    int element_size = 1 << (memop & MO_SIZE);
   1187    int ofs = element * element_size;
   1188#ifdef HOST_WORDS_BIGENDIAN
   1189    /*
   1190     * Calculate the offset assuming fully little-endian,
   1191     * then XOR to account for the order of the 8-byte units.
   1192     */
   1193    if (element_size < 8) {
   1194        ofs ^= 8 - element_size;
   1195    }
   1196#endif
   1197    return neon_full_reg_offset(reg) + ofs;
   1198}
   1199
   1200/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
   1201long vfp_reg_offset(bool dp, unsigned reg)
   1202{
   1203    if (dp) {
   1204        return neon_element_offset(reg, 0, MO_64);
   1205    } else {
   1206        return neon_element_offset(reg >> 1, reg & 1, MO_32);
   1207    }
   1208}
   1209
   1210void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
   1211{
   1212    long off = neon_element_offset(reg, ele, memop);
   1213
   1214    switch (memop) {
   1215    case MO_SB:
   1216        tcg_gen_ld8s_i32(dest, cpu_env, off);
   1217        break;
   1218    case MO_UB:
   1219        tcg_gen_ld8u_i32(dest, cpu_env, off);
   1220        break;
   1221    case MO_SW:
   1222        tcg_gen_ld16s_i32(dest, cpu_env, off);
   1223        break;
   1224    case MO_UW:
   1225        tcg_gen_ld16u_i32(dest, cpu_env, off);
   1226        break;
   1227    case MO_UL:
   1228    case MO_SL:
   1229        tcg_gen_ld_i32(dest, cpu_env, off);
   1230        break;
   1231    default:
   1232        g_assert_not_reached();
   1233    }
   1234}
   1235
   1236void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
   1237{
   1238    long off = neon_element_offset(reg, ele, memop);
   1239
   1240    switch (memop) {
   1241    case MO_SL:
   1242        tcg_gen_ld32s_i64(dest, cpu_env, off);
   1243        break;
   1244    case MO_UL:
   1245        tcg_gen_ld32u_i64(dest, cpu_env, off);
   1246        break;
   1247    case MO_Q:
   1248        tcg_gen_ld_i64(dest, cpu_env, off);
   1249        break;
   1250    default:
   1251        g_assert_not_reached();
   1252    }
   1253}
   1254
   1255void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
   1256{
   1257    long off = neon_element_offset(reg, ele, memop);
   1258
   1259    switch (memop) {
   1260    case MO_8:
   1261        tcg_gen_st8_i32(src, cpu_env, off);
   1262        break;
   1263    case MO_16:
   1264        tcg_gen_st16_i32(src, cpu_env, off);
   1265        break;
   1266    case MO_32:
   1267        tcg_gen_st_i32(src, cpu_env, off);
   1268        break;
   1269    default:
   1270        g_assert_not_reached();
   1271    }
   1272}
   1273
   1274void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
   1275{
   1276    long off = neon_element_offset(reg, ele, memop);
   1277
   1278    switch (memop) {
   1279    case MO_32:
   1280        tcg_gen_st32_i64(src, cpu_env, off);
   1281        break;
   1282    case MO_64:
   1283        tcg_gen_st_i64(src, cpu_env, off);
   1284        break;
   1285    default:
   1286        g_assert_not_reached();
   1287    }
   1288}
   1289
   1290#define ARM_CP_RW_BIT   (1 << 20)
   1291
   1292static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
   1293{
   1294    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
   1295}
   1296
   1297static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
   1298{
   1299    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
   1300}
   1301
   1302static inline TCGv_i32 iwmmxt_load_creg(int reg)
   1303{
   1304    TCGv_i32 var = tcg_temp_new_i32();
   1305    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
   1306    return var;
   1307}
   1308
   1309static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
   1310{
   1311    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
   1312    tcg_temp_free_i32(var);
   1313}
   1314
   1315static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
   1316{
   1317    iwmmxt_store_reg(cpu_M0, rn);
   1318}
   1319
   1320static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
   1321{
   1322    iwmmxt_load_reg(cpu_M0, rn);
   1323}
   1324
   1325static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
   1326{
   1327    iwmmxt_load_reg(cpu_V1, rn);
   1328    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
   1329}
   1330
   1331static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
   1332{
   1333    iwmmxt_load_reg(cpu_V1, rn);
   1334    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
   1335}
   1336
   1337static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
   1338{
   1339    iwmmxt_load_reg(cpu_V1, rn);
   1340    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
   1341}
   1342
   1343#define IWMMXT_OP(name) \
   1344static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
   1345{ \
   1346    iwmmxt_load_reg(cpu_V1, rn); \
   1347    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
   1348}
   1349
   1350#define IWMMXT_OP_ENV(name) \
   1351static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
   1352{ \
   1353    iwmmxt_load_reg(cpu_V1, rn); \
   1354    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
   1355}
   1356
   1357#define IWMMXT_OP_ENV_SIZE(name) \
   1358IWMMXT_OP_ENV(name##b) \
   1359IWMMXT_OP_ENV(name##w) \
   1360IWMMXT_OP_ENV(name##l)
   1361
   1362#define IWMMXT_OP_ENV1(name) \
   1363static inline void gen_op_iwmmxt_##name##_M0(void) \
   1364{ \
   1365    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
   1366}
   1367
   1368IWMMXT_OP(maddsq)
   1369IWMMXT_OP(madduq)
   1370IWMMXT_OP(sadb)
   1371IWMMXT_OP(sadw)
   1372IWMMXT_OP(mulslw)
   1373IWMMXT_OP(mulshw)
   1374IWMMXT_OP(mululw)
   1375IWMMXT_OP(muluhw)
   1376IWMMXT_OP(macsw)
   1377IWMMXT_OP(macuw)
   1378
   1379IWMMXT_OP_ENV_SIZE(unpackl)
   1380IWMMXT_OP_ENV_SIZE(unpackh)
   1381
   1382IWMMXT_OP_ENV1(unpacklub)
   1383IWMMXT_OP_ENV1(unpackluw)
   1384IWMMXT_OP_ENV1(unpacklul)
   1385IWMMXT_OP_ENV1(unpackhub)
   1386IWMMXT_OP_ENV1(unpackhuw)
   1387IWMMXT_OP_ENV1(unpackhul)
   1388IWMMXT_OP_ENV1(unpacklsb)
   1389IWMMXT_OP_ENV1(unpacklsw)
   1390IWMMXT_OP_ENV1(unpacklsl)
   1391IWMMXT_OP_ENV1(unpackhsb)
   1392IWMMXT_OP_ENV1(unpackhsw)
   1393IWMMXT_OP_ENV1(unpackhsl)
   1394
   1395IWMMXT_OP_ENV_SIZE(cmpeq)
   1396IWMMXT_OP_ENV_SIZE(cmpgtu)
   1397IWMMXT_OP_ENV_SIZE(cmpgts)
   1398
   1399IWMMXT_OP_ENV_SIZE(mins)
   1400IWMMXT_OP_ENV_SIZE(minu)
   1401IWMMXT_OP_ENV_SIZE(maxs)
   1402IWMMXT_OP_ENV_SIZE(maxu)
   1403
   1404IWMMXT_OP_ENV_SIZE(subn)
   1405IWMMXT_OP_ENV_SIZE(addn)
   1406IWMMXT_OP_ENV_SIZE(subu)
   1407IWMMXT_OP_ENV_SIZE(addu)
   1408IWMMXT_OP_ENV_SIZE(subs)
   1409IWMMXT_OP_ENV_SIZE(adds)
   1410
   1411IWMMXT_OP_ENV(avgb0)
   1412IWMMXT_OP_ENV(avgb1)
   1413IWMMXT_OP_ENV(avgw0)
   1414IWMMXT_OP_ENV(avgw1)
   1415
   1416IWMMXT_OP_ENV(packuw)
   1417IWMMXT_OP_ENV(packul)
   1418IWMMXT_OP_ENV(packuq)
   1419IWMMXT_OP_ENV(packsw)
   1420IWMMXT_OP_ENV(packsl)
   1421IWMMXT_OP_ENV(packsq)
   1422
   1423static void gen_op_iwmmxt_set_mup(void)
   1424{
   1425    TCGv_i32 tmp;
   1426    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1427    tcg_gen_ori_i32(tmp, tmp, 2);
   1428    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1429}
   1430
   1431static void gen_op_iwmmxt_set_cup(void)
   1432{
   1433    TCGv_i32 tmp;
   1434    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1435    tcg_gen_ori_i32(tmp, tmp, 1);
   1436    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1437}
   1438
   1439static void gen_op_iwmmxt_setpsr_nz(void)
   1440{
   1441    TCGv_i32 tmp = tcg_temp_new_i32();
   1442    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
   1443    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
   1444}
   1445
   1446static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
   1447{
   1448    iwmmxt_load_reg(cpu_V1, rn);
   1449    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
   1450    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
   1451}
   1452
   1453static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
   1454                                     TCGv_i32 dest)
   1455{
   1456    int rd;
   1457    uint32_t offset;
   1458    TCGv_i32 tmp;
   1459
   1460    rd = (insn >> 16) & 0xf;
   1461    tmp = load_reg(s, rd);
   1462
   1463    offset = (insn & 0xff) << ((insn >> 7) & 2);
   1464    if (insn & (1 << 24)) {
   1465        /* Pre indexed */
   1466        if (insn & (1 << 23))
   1467            tcg_gen_addi_i32(tmp, tmp, offset);
   1468        else
   1469            tcg_gen_addi_i32(tmp, tmp, -offset);
   1470        tcg_gen_mov_i32(dest, tmp);
   1471        if (insn & (1 << 21))
   1472            store_reg(s, rd, tmp);
   1473        else
   1474            tcg_temp_free_i32(tmp);
   1475    } else if (insn & (1 << 21)) {
   1476        /* Post indexed */
   1477        tcg_gen_mov_i32(dest, tmp);
   1478        if (insn & (1 << 23))
   1479            tcg_gen_addi_i32(tmp, tmp, offset);
   1480        else
   1481            tcg_gen_addi_i32(tmp, tmp, -offset);
   1482        store_reg(s, rd, tmp);
   1483    } else if (!(insn & (1 << 23)))
   1484        return 1;
   1485    return 0;
   1486}
   1487
   1488static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
   1489{
   1490    int rd = (insn >> 0) & 0xf;
   1491    TCGv_i32 tmp;
   1492
   1493    if (insn & (1 << 8)) {
   1494        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
   1495            return 1;
   1496        } else {
   1497            tmp = iwmmxt_load_creg(rd);
   1498        }
   1499    } else {
   1500        tmp = tcg_temp_new_i32();
   1501        iwmmxt_load_reg(cpu_V0, rd);
   1502        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
   1503    }
   1504    tcg_gen_andi_i32(tmp, tmp, mask);
   1505    tcg_gen_mov_i32(dest, tmp);
   1506    tcg_temp_free_i32(tmp);
   1507    return 0;
   1508}
   1509
   1510/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
   1511   (ie. an undefined instruction).  */
   1512static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
   1513{
   1514    int rd, wrd;
   1515    int rdhi, rdlo, rd0, rd1, i;
   1516    TCGv_i32 addr;
   1517    TCGv_i32 tmp, tmp2, tmp3;
   1518
   1519    if ((insn & 0x0e000e00) == 0x0c000000) {
   1520        if ((insn & 0x0fe00ff0) == 0x0c400000) {
   1521            wrd = insn & 0xf;
   1522            rdlo = (insn >> 12) & 0xf;
   1523            rdhi = (insn >> 16) & 0xf;
   1524            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
   1525                iwmmxt_load_reg(cpu_V0, wrd);
   1526                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
   1527                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
   1528            } else {                                    /* TMCRR */
   1529                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
   1530                iwmmxt_store_reg(cpu_V0, wrd);
   1531                gen_op_iwmmxt_set_mup();
   1532            }
   1533            return 0;
   1534        }
   1535
   1536        wrd = (insn >> 12) & 0xf;
   1537        addr = tcg_temp_new_i32();
   1538        if (gen_iwmmxt_address(s, insn, addr)) {
   1539            tcg_temp_free_i32(addr);
   1540            return 1;
   1541        }
   1542        if (insn & ARM_CP_RW_BIT) {
   1543            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
   1544                tmp = tcg_temp_new_i32();
   1545                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
   1546                iwmmxt_store_creg(wrd, tmp);
   1547            } else {
   1548                i = 1;
   1549                if (insn & (1 << 8)) {
   1550                    if (insn & (1 << 22)) {             /* WLDRD */
   1551                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
   1552                        i = 0;
   1553                    } else {                            /* WLDRW wRd */
   1554                        tmp = tcg_temp_new_i32();
   1555                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
   1556                    }
   1557                } else {
   1558                    tmp = tcg_temp_new_i32();
   1559                    if (insn & (1 << 22)) {             /* WLDRH */
   1560                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
   1561                    } else {                            /* WLDRB */
   1562                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
   1563                    }
   1564                }
   1565                if (i) {
   1566                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
   1567                    tcg_temp_free_i32(tmp);
   1568                }
   1569                gen_op_iwmmxt_movq_wRn_M0(wrd);
   1570            }
   1571        } else {
   1572            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
   1573                tmp = iwmmxt_load_creg(wrd);
   1574                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
   1575            } else {
   1576                gen_op_iwmmxt_movq_M0_wRn(wrd);
   1577                tmp = tcg_temp_new_i32();
   1578                if (insn & (1 << 8)) {
   1579                    if (insn & (1 << 22)) {             /* WSTRD */
   1580                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
   1581                    } else {                            /* WSTRW wRd */
   1582                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1583                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
   1584                    }
   1585                } else {
   1586                    if (insn & (1 << 22)) {             /* WSTRH */
   1587                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1588                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
   1589                    } else {                            /* WSTRB */
   1590                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1591                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
   1592                    }
   1593                }
   1594            }
   1595            tcg_temp_free_i32(tmp);
   1596        }
   1597        tcg_temp_free_i32(addr);
   1598        return 0;
   1599    }
   1600
   1601    if ((insn & 0x0f000000) != 0x0e000000)
   1602        return 1;
   1603
   1604    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
   1605    case 0x000:                                                 /* WOR */
   1606        wrd = (insn >> 12) & 0xf;
   1607        rd0 = (insn >> 0) & 0xf;
   1608        rd1 = (insn >> 16) & 0xf;
   1609        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1610        gen_op_iwmmxt_orq_M0_wRn(rd1);
   1611        gen_op_iwmmxt_setpsr_nz();
   1612        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1613        gen_op_iwmmxt_set_mup();
   1614        gen_op_iwmmxt_set_cup();
   1615        break;
   1616    case 0x011:                                                 /* TMCR */
   1617        if (insn & 0xf)
   1618            return 1;
   1619        rd = (insn >> 12) & 0xf;
   1620        wrd = (insn >> 16) & 0xf;
   1621        switch (wrd) {
   1622        case ARM_IWMMXT_wCID:
   1623        case ARM_IWMMXT_wCASF:
   1624            break;
   1625        case ARM_IWMMXT_wCon:
   1626            gen_op_iwmmxt_set_cup();
   1627            /* Fall through.  */
   1628        case ARM_IWMMXT_wCSSF:
   1629            tmp = iwmmxt_load_creg(wrd);
   1630            tmp2 = load_reg(s, rd);
   1631            tcg_gen_andc_i32(tmp, tmp, tmp2);
   1632            tcg_temp_free_i32(tmp2);
   1633            iwmmxt_store_creg(wrd, tmp);
   1634            break;
   1635        case ARM_IWMMXT_wCGR0:
   1636        case ARM_IWMMXT_wCGR1:
   1637        case ARM_IWMMXT_wCGR2:
   1638        case ARM_IWMMXT_wCGR3:
   1639            gen_op_iwmmxt_set_cup();
   1640            tmp = load_reg(s, rd);
   1641            iwmmxt_store_creg(wrd, tmp);
   1642            break;
   1643        default:
   1644            return 1;
   1645        }
   1646        break;
   1647    case 0x100:                                                 /* WXOR */
   1648        wrd = (insn >> 12) & 0xf;
   1649        rd0 = (insn >> 0) & 0xf;
   1650        rd1 = (insn >> 16) & 0xf;
   1651        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1652        gen_op_iwmmxt_xorq_M0_wRn(rd1);
   1653        gen_op_iwmmxt_setpsr_nz();
   1654        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1655        gen_op_iwmmxt_set_mup();
   1656        gen_op_iwmmxt_set_cup();
   1657        break;
   1658    case 0x111:                                                 /* TMRC */
   1659        if (insn & 0xf)
   1660            return 1;
   1661        rd = (insn >> 12) & 0xf;
   1662        wrd = (insn >> 16) & 0xf;
   1663        tmp = iwmmxt_load_creg(wrd);
   1664        store_reg(s, rd, tmp);
   1665        break;
   1666    case 0x300:                                                 /* WANDN */
   1667        wrd = (insn >> 12) & 0xf;
   1668        rd0 = (insn >> 0) & 0xf;
   1669        rd1 = (insn >> 16) & 0xf;
   1670        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1671        tcg_gen_neg_i64(cpu_M0, cpu_M0);
   1672        gen_op_iwmmxt_andq_M0_wRn(rd1);
   1673        gen_op_iwmmxt_setpsr_nz();
   1674        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1675        gen_op_iwmmxt_set_mup();
   1676        gen_op_iwmmxt_set_cup();
   1677        break;
   1678    case 0x200:                                                 /* WAND */
   1679        wrd = (insn >> 12) & 0xf;
   1680        rd0 = (insn >> 0) & 0xf;
   1681        rd1 = (insn >> 16) & 0xf;
   1682        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1683        gen_op_iwmmxt_andq_M0_wRn(rd1);
   1684        gen_op_iwmmxt_setpsr_nz();
   1685        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1686        gen_op_iwmmxt_set_mup();
   1687        gen_op_iwmmxt_set_cup();
   1688        break;
   1689    case 0x810: case 0xa10:                             /* WMADD */
   1690        wrd = (insn >> 12) & 0xf;
   1691        rd0 = (insn >> 0) & 0xf;
   1692        rd1 = (insn >> 16) & 0xf;
   1693        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1694        if (insn & (1 << 21))
   1695            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
   1696        else
   1697            gen_op_iwmmxt_madduq_M0_wRn(rd1);
   1698        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1699        gen_op_iwmmxt_set_mup();
   1700        break;
   1701    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
   1702        wrd = (insn >> 12) & 0xf;
   1703        rd0 = (insn >> 16) & 0xf;
   1704        rd1 = (insn >> 0) & 0xf;
   1705        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1706        switch ((insn >> 22) & 3) {
   1707        case 0:
   1708            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
   1709            break;
   1710        case 1:
   1711            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
   1712            break;
   1713        case 2:
   1714            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
   1715            break;
   1716        case 3:
   1717            return 1;
   1718        }
   1719        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1720        gen_op_iwmmxt_set_mup();
   1721        gen_op_iwmmxt_set_cup();
   1722        break;
   1723    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
   1724        wrd = (insn >> 12) & 0xf;
   1725        rd0 = (insn >> 16) & 0xf;
   1726        rd1 = (insn >> 0) & 0xf;
   1727        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1728        switch ((insn >> 22) & 3) {
   1729        case 0:
   1730            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
   1731            break;
   1732        case 1:
   1733            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
   1734            break;
   1735        case 2:
   1736            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
   1737            break;
   1738        case 3:
   1739            return 1;
   1740        }
   1741        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1742        gen_op_iwmmxt_set_mup();
   1743        gen_op_iwmmxt_set_cup();
   1744        break;
   1745    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
   1746        wrd = (insn >> 12) & 0xf;
   1747        rd0 = (insn >> 16) & 0xf;
   1748        rd1 = (insn >> 0) & 0xf;
   1749        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1750        if (insn & (1 << 22))
   1751            gen_op_iwmmxt_sadw_M0_wRn(rd1);
   1752        else
   1753            gen_op_iwmmxt_sadb_M0_wRn(rd1);
   1754        if (!(insn & (1 << 20)))
   1755            gen_op_iwmmxt_addl_M0_wRn(wrd);
   1756        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1757        gen_op_iwmmxt_set_mup();
   1758        break;
   1759    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
   1760        wrd = (insn >> 12) & 0xf;
   1761        rd0 = (insn >> 16) & 0xf;
   1762        rd1 = (insn >> 0) & 0xf;
   1763        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1764        if (insn & (1 << 21)) {
   1765            if (insn & (1 << 20))
   1766                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
   1767            else
   1768                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
   1769        } else {
   1770            if (insn & (1 << 20))
   1771                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
   1772            else
   1773                gen_op_iwmmxt_mululw_M0_wRn(rd1);
   1774        }
   1775        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1776        gen_op_iwmmxt_set_mup();
   1777        break;
   1778    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
   1779        wrd = (insn >> 12) & 0xf;
   1780        rd0 = (insn >> 16) & 0xf;
   1781        rd1 = (insn >> 0) & 0xf;
   1782        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1783        if (insn & (1 << 21))
   1784            gen_op_iwmmxt_macsw_M0_wRn(rd1);
   1785        else
   1786            gen_op_iwmmxt_macuw_M0_wRn(rd1);
   1787        if (!(insn & (1 << 20))) {
   1788            iwmmxt_load_reg(cpu_V1, wrd);
   1789            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
   1790        }
   1791        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1792        gen_op_iwmmxt_set_mup();
   1793        break;
   1794    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
   1795        wrd = (insn >> 12) & 0xf;
   1796        rd0 = (insn >> 16) & 0xf;
   1797        rd1 = (insn >> 0) & 0xf;
   1798        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1799        switch ((insn >> 22) & 3) {
   1800        case 0:
   1801            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
   1802            break;
   1803        case 1:
   1804            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
   1805            break;
   1806        case 2:
   1807            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
   1808            break;
   1809        case 3:
   1810            return 1;
   1811        }
   1812        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1813        gen_op_iwmmxt_set_mup();
   1814        gen_op_iwmmxt_set_cup();
   1815        break;
   1816    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
   1817        wrd = (insn >> 12) & 0xf;
   1818        rd0 = (insn >> 16) & 0xf;
   1819        rd1 = (insn >> 0) & 0xf;
   1820        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1821        if (insn & (1 << 22)) {
   1822            if (insn & (1 << 20))
   1823                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
   1824            else
   1825                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
   1826        } else {
   1827            if (insn & (1 << 20))
   1828                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
   1829            else
   1830                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
   1831        }
   1832        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1833        gen_op_iwmmxt_set_mup();
   1834        gen_op_iwmmxt_set_cup();
   1835        break;
   1836    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
   1837        wrd = (insn >> 12) & 0xf;
   1838        rd0 = (insn >> 16) & 0xf;
   1839        rd1 = (insn >> 0) & 0xf;
   1840        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1841        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
   1842        tcg_gen_andi_i32(tmp, tmp, 7);
   1843        iwmmxt_load_reg(cpu_V1, rd1);
   1844        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
   1845        tcg_temp_free_i32(tmp);
   1846        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1847        gen_op_iwmmxt_set_mup();
   1848        break;
   1849    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
   1850        if (((insn >> 6) & 3) == 3)
   1851            return 1;
   1852        rd = (insn >> 12) & 0xf;
   1853        wrd = (insn >> 16) & 0xf;
   1854        tmp = load_reg(s, rd);
   1855        gen_op_iwmmxt_movq_M0_wRn(wrd);
   1856        switch ((insn >> 6) & 3) {
   1857        case 0:
   1858            tmp2 = tcg_const_i32(0xff);
   1859            tmp3 = tcg_const_i32((insn & 7) << 3);
   1860            break;
   1861        case 1:
   1862            tmp2 = tcg_const_i32(0xffff);
   1863            tmp3 = tcg_const_i32((insn & 3) << 4);
   1864            break;
   1865        case 2:
   1866            tmp2 = tcg_const_i32(0xffffffff);
   1867            tmp3 = tcg_const_i32((insn & 1) << 5);
   1868            break;
   1869        default:
   1870            tmp2 = NULL;
   1871            tmp3 = NULL;
   1872        }
   1873        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
   1874        tcg_temp_free_i32(tmp3);
   1875        tcg_temp_free_i32(tmp2);
   1876        tcg_temp_free_i32(tmp);
   1877        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1878        gen_op_iwmmxt_set_mup();
   1879        break;
   1880    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
   1881        rd = (insn >> 12) & 0xf;
   1882        wrd = (insn >> 16) & 0xf;
   1883        if (rd == 15 || ((insn >> 22) & 3) == 3)
   1884            return 1;
   1885        gen_op_iwmmxt_movq_M0_wRn(wrd);
   1886        tmp = tcg_temp_new_i32();
   1887        switch ((insn >> 22) & 3) {
   1888        case 0:
   1889            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
   1890            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1891            if (insn & 8) {
   1892                tcg_gen_ext8s_i32(tmp, tmp);
   1893            } else {
   1894                tcg_gen_andi_i32(tmp, tmp, 0xff);
   1895            }
   1896            break;
   1897        case 1:
   1898            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
   1899            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1900            if (insn & 8) {
   1901                tcg_gen_ext16s_i32(tmp, tmp);
   1902            } else {
   1903                tcg_gen_andi_i32(tmp, tmp, 0xffff);
   1904            }
   1905            break;
   1906        case 2:
   1907            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
   1908            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1909            break;
   1910        }
   1911        store_reg(s, rd, tmp);
   1912        break;
   1913    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
   1914        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
   1915            return 1;
   1916        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
   1917        switch ((insn >> 22) & 3) {
   1918        case 0:
   1919            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
   1920            break;
   1921        case 1:
   1922            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
   1923            break;
   1924        case 2:
   1925            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
   1926            break;
   1927        }
   1928        tcg_gen_shli_i32(tmp, tmp, 28);
   1929        gen_set_nzcv(tmp);
   1930        tcg_temp_free_i32(tmp);
   1931        break;
   1932    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
   1933        if (((insn >> 6) & 3) == 3)
   1934            return 1;
   1935        rd = (insn >> 12) & 0xf;
   1936        wrd = (insn >> 16) & 0xf;
   1937        tmp = load_reg(s, rd);
   1938        switch ((insn >> 6) & 3) {
   1939        case 0:
   1940            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
   1941            break;
   1942        case 1:
   1943            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
   1944            break;
   1945        case 2:
   1946            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
   1947            break;
   1948        }
   1949        tcg_temp_free_i32(tmp);
   1950        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1951        gen_op_iwmmxt_set_mup();
   1952        break;
   1953    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
   1954        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
   1955            return 1;
   1956        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
   1957        tmp2 = tcg_temp_new_i32();
   1958        tcg_gen_mov_i32(tmp2, tmp);
   1959        switch ((insn >> 22) & 3) {
   1960        case 0:
   1961            for (i = 0; i < 7; i ++) {
   1962                tcg_gen_shli_i32(tmp2, tmp2, 4);
   1963                tcg_gen_and_i32(tmp, tmp, tmp2);
   1964            }
   1965            break;
   1966        case 1:
   1967            for (i = 0; i < 3; i ++) {
   1968                tcg_gen_shli_i32(tmp2, tmp2, 8);
   1969                tcg_gen_and_i32(tmp, tmp, tmp2);
   1970            }
   1971            break;
   1972        case 2:
   1973            tcg_gen_shli_i32(tmp2, tmp2, 16);
   1974            tcg_gen_and_i32(tmp, tmp, tmp2);
   1975            break;
   1976        }
   1977        gen_set_nzcv(tmp);
   1978        tcg_temp_free_i32(tmp2);
   1979        tcg_temp_free_i32(tmp);
   1980        break;
   1981    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
   1982        wrd = (insn >> 12) & 0xf;
   1983        rd0 = (insn >> 16) & 0xf;
   1984        gen_op_iwmmxt_movq_M0_wRn(rd0);
   1985        switch ((insn >> 22) & 3) {
   1986        case 0:
   1987            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
   1988            break;
   1989        case 1:
   1990            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
   1991            break;
   1992        case 2:
   1993            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
   1994            break;
   1995        case 3:
   1996            return 1;
   1997        }
   1998        gen_op_iwmmxt_movq_wRn_M0(wrd);
   1999        gen_op_iwmmxt_set_mup();
   2000        break;
   2001    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
   2002        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
   2003            return 1;
   2004        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
   2005        tmp2 = tcg_temp_new_i32();
   2006        tcg_gen_mov_i32(tmp2, tmp);
   2007        switch ((insn >> 22) & 3) {
   2008        case 0:
   2009            for (i = 0; i < 7; i ++) {
   2010                tcg_gen_shli_i32(tmp2, tmp2, 4);
   2011                tcg_gen_or_i32(tmp, tmp, tmp2);
   2012            }
   2013            break;
   2014        case 1:
   2015            for (i = 0; i < 3; i ++) {
   2016                tcg_gen_shli_i32(tmp2, tmp2, 8);
   2017                tcg_gen_or_i32(tmp, tmp, tmp2);
   2018            }
   2019            break;
   2020        case 2:
   2021            tcg_gen_shli_i32(tmp2, tmp2, 16);
   2022            tcg_gen_or_i32(tmp, tmp, tmp2);
   2023            break;
   2024        }
   2025        gen_set_nzcv(tmp);
   2026        tcg_temp_free_i32(tmp2);
   2027        tcg_temp_free_i32(tmp);
   2028        break;
   2029    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
   2030        rd = (insn >> 12) & 0xf;
   2031        rd0 = (insn >> 16) & 0xf;
   2032        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
   2033            return 1;
   2034        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2035        tmp = tcg_temp_new_i32();
   2036        switch ((insn >> 22) & 3) {
   2037        case 0:
   2038            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
   2039            break;
   2040        case 1:
   2041            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
   2042            break;
   2043        case 2:
   2044            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
   2045            break;
   2046        }
   2047        store_reg(s, rd, tmp);
   2048        break;
   2049    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
   2050    case 0x906: case 0xb06: case 0xd06: case 0xf06:
   2051        wrd = (insn >> 12) & 0xf;
   2052        rd0 = (insn >> 16) & 0xf;
   2053        rd1 = (insn >> 0) & 0xf;
   2054        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2055        switch ((insn >> 22) & 3) {
   2056        case 0:
   2057            if (insn & (1 << 21))
   2058                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
   2059            else
   2060                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
   2061            break;
   2062        case 1:
   2063            if (insn & (1 << 21))
   2064                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
   2065            else
   2066                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
   2067            break;
   2068        case 2:
   2069            if (insn & (1 << 21))
   2070                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
   2071            else
   2072                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
   2073            break;
   2074        case 3:
   2075            return 1;
   2076        }
   2077        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2078        gen_op_iwmmxt_set_mup();
   2079        gen_op_iwmmxt_set_cup();
   2080        break;
   2081    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
   2082    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
   2083        wrd = (insn >> 12) & 0xf;
   2084        rd0 = (insn >> 16) & 0xf;
   2085        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2086        switch ((insn >> 22) & 3) {
   2087        case 0:
   2088            if (insn & (1 << 21))
   2089                gen_op_iwmmxt_unpacklsb_M0();
   2090            else
   2091                gen_op_iwmmxt_unpacklub_M0();
   2092            break;
   2093        case 1:
   2094            if (insn & (1 << 21))
   2095                gen_op_iwmmxt_unpacklsw_M0();
   2096            else
   2097                gen_op_iwmmxt_unpackluw_M0();
   2098            break;
   2099        case 2:
   2100            if (insn & (1 << 21))
   2101                gen_op_iwmmxt_unpacklsl_M0();
   2102            else
   2103                gen_op_iwmmxt_unpacklul_M0();
   2104            break;
   2105        case 3:
   2106            return 1;
   2107        }
   2108        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2109        gen_op_iwmmxt_set_mup();
   2110        gen_op_iwmmxt_set_cup();
   2111        break;
   2112    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
   2113    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
   2114        wrd = (insn >> 12) & 0xf;
   2115        rd0 = (insn >> 16) & 0xf;
   2116        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2117        switch ((insn >> 22) & 3) {
   2118        case 0:
   2119            if (insn & (1 << 21))
   2120                gen_op_iwmmxt_unpackhsb_M0();
   2121            else
   2122                gen_op_iwmmxt_unpackhub_M0();
   2123            break;
   2124        case 1:
   2125            if (insn & (1 << 21))
   2126                gen_op_iwmmxt_unpackhsw_M0();
   2127            else
   2128                gen_op_iwmmxt_unpackhuw_M0();
   2129            break;
   2130        case 2:
   2131            if (insn & (1 << 21))
   2132                gen_op_iwmmxt_unpackhsl_M0();
   2133            else
   2134                gen_op_iwmmxt_unpackhul_M0();
   2135            break;
   2136        case 3:
   2137            return 1;
   2138        }
   2139        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2140        gen_op_iwmmxt_set_mup();
   2141        gen_op_iwmmxt_set_cup();
   2142        break;
   2143    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
   2144    case 0x214: case 0x614: case 0xa14: case 0xe14:
   2145        if (((insn >> 22) & 3) == 0)
   2146            return 1;
   2147        wrd = (insn >> 12) & 0xf;
   2148        rd0 = (insn >> 16) & 0xf;
   2149        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2150        tmp = tcg_temp_new_i32();
   2151        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
   2152            tcg_temp_free_i32(tmp);
   2153            return 1;
   2154        }
   2155        switch ((insn >> 22) & 3) {
   2156        case 1:
   2157            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
   2158            break;
   2159        case 2:
   2160            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
   2161            break;
   2162        case 3:
   2163            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
   2164            break;
   2165        }
   2166        tcg_temp_free_i32(tmp);
   2167        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2168        gen_op_iwmmxt_set_mup();
   2169        gen_op_iwmmxt_set_cup();
   2170        break;
   2171    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
   2172    case 0x014: case 0x414: case 0x814: case 0xc14:
   2173        if (((insn >> 22) & 3) == 0)
   2174            return 1;
   2175        wrd = (insn >> 12) & 0xf;
   2176        rd0 = (insn >> 16) & 0xf;
   2177        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2178        tmp = tcg_temp_new_i32();
   2179        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
   2180            tcg_temp_free_i32(tmp);
   2181            return 1;
   2182        }
   2183        switch ((insn >> 22) & 3) {
   2184        case 1:
   2185            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
   2186            break;
   2187        case 2:
   2188            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
   2189            break;
   2190        case 3:
   2191            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
   2192            break;
   2193        }
   2194        tcg_temp_free_i32(tmp);
   2195        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2196        gen_op_iwmmxt_set_mup();
   2197        gen_op_iwmmxt_set_cup();
   2198        break;
   2199    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
   2200    case 0x114: case 0x514: case 0x914: case 0xd14:
   2201        if (((insn >> 22) & 3) == 0)
   2202            return 1;
   2203        wrd = (insn >> 12) & 0xf;
   2204        rd0 = (insn >> 16) & 0xf;
   2205        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2206        tmp = tcg_temp_new_i32();
   2207        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
   2208            tcg_temp_free_i32(tmp);
   2209            return 1;
   2210        }
   2211        switch ((insn >> 22) & 3) {
   2212        case 1:
   2213            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
   2214            break;
   2215        case 2:
   2216            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
   2217            break;
   2218        case 3:
   2219            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
   2220            break;
   2221        }
   2222        tcg_temp_free_i32(tmp);
   2223        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2224        gen_op_iwmmxt_set_mup();
   2225        gen_op_iwmmxt_set_cup();
   2226        break;
   2227    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
   2228    case 0x314: case 0x714: case 0xb14: case 0xf14:
   2229        if (((insn >> 22) & 3) == 0)
   2230            return 1;
   2231        wrd = (insn >> 12) & 0xf;
   2232        rd0 = (insn >> 16) & 0xf;
   2233        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2234        tmp = tcg_temp_new_i32();
   2235        switch ((insn >> 22) & 3) {
   2236        case 1:
   2237            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
   2238                tcg_temp_free_i32(tmp);
   2239                return 1;
   2240            }
   2241            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
   2242            break;
   2243        case 2:
   2244            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
   2245                tcg_temp_free_i32(tmp);
   2246                return 1;
   2247            }
   2248            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
   2249            break;
   2250        case 3:
   2251            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
   2252                tcg_temp_free_i32(tmp);
   2253                return 1;
   2254            }
   2255            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
   2256            break;
   2257        }
   2258        tcg_temp_free_i32(tmp);
   2259        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2260        gen_op_iwmmxt_set_mup();
   2261        gen_op_iwmmxt_set_cup();
   2262        break;
   2263    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
   2264    case 0x916: case 0xb16: case 0xd16: case 0xf16:
   2265        wrd = (insn >> 12) & 0xf;
   2266        rd0 = (insn >> 16) & 0xf;
   2267        rd1 = (insn >> 0) & 0xf;
   2268        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2269        switch ((insn >> 22) & 3) {
   2270        case 0:
   2271            if (insn & (1 << 21))
   2272                gen_op_iwmmxt_minsb_M0_wRn(rd1);
   2273            else
   2274                gen_op_iwmmxt_minub_M0_wRn(rd1);
   2275            break;
   2276        case 1:
   2277            if (insn & (1 << 21))
   2278                gen_op_iwmmxt_minsw_M0_wRn(rd1);
   2279            else
   2280                gen_op_iwmmxt_minuw_M0_wRn(rd1);
   2281            break;
   2282        case 2:
   2283            if (insn & (1 << 21))
   2284                gen_op_iwmmxt_minsl_M0_wRn(rd1);
   2285            else
   2286                gen_op_iwmmxt_minul_M0_wRn(rd1);
   2287            break;
   2288        case 3:
   2289            return 1;
   2290        }
   2291        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2292        gen_op_iwmmxt_set_mup();
   2293        break;
   2294    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
   2295    case 0x816: case 0xa16: case 0xc16: case 0xe16:
   2296        wrd = (insn >> 12) & 0xf;
   2297        rd0 = (insn >> 16) & 0xf;
   2298        rd1 = (insn >> 0) & 0xf;
   2299        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2300        switch ((insn >> 22) & 3) {
   2301        case 0:
   2302            if (insn & (1 << 21))
   2303                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
   2304            else
   2305                gen_op_iwmmxt_maxub_M0_wRn(rd1);
   2306            break;
   2307        case 1:
   2308            if (insn & (1 << 21))
   2309                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
   2310            else
   2311                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
   2312            break;
   2313        case 2:
   2314            if (insn & (1 << 21))
   2315                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
   2316            else
   2317                gen_op_iwmmxt_maxul_M0_wRn(rd1);
   2318            break;
   2319        case 3:
   2320            return 1;
   2321        }
   2322        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2323        gen_op_iwmmxt_set_mup();
   2324        break;
   2325    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
   2326    case 0x402: case 0x502: case 0x602: case 0x702:
   2327        wrd = (insn >> 12) & 0xf;
   2328        rd0 = (insn >> 16) & 0xf;
   2329        rd1 = (insn >> 0) & 0xf;
   2330        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2331        tmp = tcg_const_i32((insn >> 20) & 3);
   2332        iwmmxt_load_reg(cpu_V1, rd1);
   2333        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
   2334        tcg_temp_free_i32(tmp);
   2335        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2336        gen_op_iwmmxt_set_mup();
   2337        break;
   2338    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
   2339    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
   2340    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
   2341    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
   2342        wrd = (insn >> 12) & 0xf;
   2343        rd0 = (insn >> 16) & 0xf;
   2344        rd1 = (insn >> 0) & 0xf;
   2345        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2346        switch ((insn >> 20) & 0xf) {
   2347        case 0x0:
   2348            gen_op_iwmmxt_subnb_M0_wRn(rd1);
   2349            break;
   2350        case 0x1:
   2351            gen_op_iwmmxt_subub_M0_wRn(rd1);
   2352            break;
   2353        case 0x3:
   2354            gen_op_iwmmxt_subsb_M0_wRn(rd1);
   2355            break;
   2356        case 0x4:
   2357            gen_op_iwmmxt_subnw_M0_wRn(rd1);
   2358            break;
   2359        case 0x5:
   2360            gen_op_iwmmxt_subuw_M0_wRn(rd1);
   2361            break;
   2362        case 0x7:
   2363            gen_op_iwmmxt_subsw_M0_wRn(rd1);
   2364            break;
   2365        case 0x8:
   2366            gen_op_iwmmxt_subnl_M0_wRn(rd1);
   2367            break;
   2368        case 0x9:
   2369            gen_op_iwmmxt_subul_M0_wRn(rd1);
   2370            break;
   2371        case 0xb:
   2372            gen_op_iwmmxt_subsl_M0_wRn(rd1);
   2373            break;
   2374        default:
   2375            return 1;
   2376        }
   2377        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2378        gen_op_iwmmxt_set_mup();
   2379        gen_op_iwmmxt_set_cup();
   2380        break;
   2381    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
   2382    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
   2383    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
   2384    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
   2385        wrd = (insn >> 12) & 0xf;
   2386        rd0 = (insn >> 16) & 0xf;
   2387        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2388        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
   2389        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
   2390        tcg_temp_free_i32(tmp);
   2391        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2392        gen_op_iwmmxt_set_mup();
   2393        gen_op_iwmmxt_set_cup();
   2394        break;
   2395    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
   2396    case 0x418: case 0x518: case 0x618: case 0x718:
   2397    case 0x818: case 0x918: case 0xa18: case 0xb18:
   2398    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
   2399        wrd = (insn >> 12) & 0xf;
   2400        rd0 = (insn >> 16) & 0xf;
   2401        rd1 = (insn >> 0) & 0xf;
   2402        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2403        switch ((insn >> 20) & 0xf) {
   2404        case 0x0:
   2405            gen_op_iwmmxt_addnb_M0_wRn(rd1);
   2406            break;
   2407        case 0x1:
   2408            gen_op_iwmmxt_addub_M0_wRn(rd1);
   2409            break;
   2410        case 0x3:
   2411            gen_op_iwmmxt_addsb_M0_wRn(rd1);
   2412            break;
   2413        case 0x4:
   2414            gen_op_iwmmxt_addnw_M0_wRn(rd1);
   2415            break;
   2416        case 0x5:
   2417            gen_op_iwmmxt_adduw_M0_wRn(rd1);
   2418            break;
   2419        case 0x7:
   2420            gen_op_iwmmxt_addsw_M0_wRn(rd1);
   2421            break;
   2422        case 0x8:
   2423            gen_op_iwmmxt_addnl_M0_wRn(rd1);
   2424            break;
   2425        case 0x9:
   2426            gen_op_iwmmxt_addul_M0_wRn(rd1);
   2427            break;
   2428        case 0xb:
   2429            gen_op_iwmmxt_addsl_M0_wRn(rd1);
   2430            break;
   2431        default:
   2432            return 1;
   2433        }
   2434        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2435        gen_op_iwmmxt_set_mup();
   2436        gen_op_iwmmxt_set_cup();
   2437        break;
   2438    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
   2439    case 0x408: case 0x508: case 0x608: case 0x708:
   2440    case 0x808: case 0x908: case 0xa08: case 0xb08:
   2441    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
   2442        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
   2443            return 1;
   2444        wrd = (insn >> 12) & 0xf;
   2445        rd0 = (insn >> 16) & 0xf;
   2446        rd1 = (insn >> 0) & 0xf;
   2447        gen_op_iwmmxt_movq_M0_wRn(rd0);
   2448        switch ((insn >> 22) & 3) {
   2449        case 1:
   2450            if (insn & (1 << 21))
   2451                gen_op_iwmmxt_packsw_M0_wRn(rd1);
   2452            else
   2453                gen_op_iwmmxt_packuw_M0_wRn(rd1);
   2454            break;
   2455        case 2:
   2456            if (insn & (1 << 21))
   2457                gen_op_iwmmxt_packsl_M0_wRn(rd1);
   2458            else
   2459                gen_op_iwmmxt_packul_M0_wRn(rd1);
   2460            break;
   2461        case 3:
   2462            if (insn & (1 << 21))
   2463                gen_op_iwmmxt_packsq_M0_wRn(rd1);
   2464            else
   2465                gen_op_iwmmxt_packuq_M0_wRn(rd1);
   2466            break;
   2467        }
   2468        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2469        gen_op_iwmmxt_set_mup();
   2470        gen_op_iwmmxt_set_cup();
   2471        break;
   2472    case 0x201: case 0x203: case 0x205: case 0x207:
   2473    case 0x209: case 0x20b: case 0x20d: case 0x20f:
   2474    case 0x211: case 0x213: case 0x215: case 0x217:
   2475    case 0x219: case 0x21b: case 0x21d: case 0x21f:
   2476        wrd = (insn >> 5) & 0xf;
   2477        rd0 = (insn >> 12) & 0xf;
   2478        rd1 = (insn >> 0) & 0xf;
   2479        if (rd0 == 0xf || rd1 == 0xf)
   2480            return 1;
   2481        gen_op_iwmmxt_movq_M0_wRn(wrd);
   2482        tmp = load_reg(s, rd0);
   2483        tmp2 = load_reg(s, rd1);
   2484        switch ((insn >> 16) & 0xf) {
   2485        case 0x0:                                       /* TMIA */
   2486            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
   2487            break;
   2488        case 0x8:                                       /* TMIAPH */
   2489            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
   2490            break;
   2491        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
   2492            if (insn & (1 << 16))
   2493                tcg_gen_shri_i32(tmp, tmp, 16);
   2494            if (insn & (1 << 17))
   2495                tcg_gen_shri_i32(tmp2, tmp2, 16);
   2496            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
   2497            break;
   2498        default:
   2499            tcg_temp_free_i32(tmp2);
   2500            tcg_temp_free_i32(tmp);
   2501            return 1;
   2502        }
   2503        tcg_temp_free_i32(tmp2);
   2504        tcg_temp_free_i32(tmp);
   2505        gen_op_iwmmxt_movq_wRn_M0(wrd);
   2506        gen_op_iwmmxt_set_mup();
   2507        break;
   2508    default:
   2509        return 1;
   2510    }
   2511
   2512    return 0;
   2513}
   2514
   2515/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
   2516   (ie. an undefined instruction).  */
   2517static int disas_dsp_insn(DisasContext *s, uint32_t insn)
   2518{
   2519    int acc, rd0, rd1, rdhi, rdlo;
   2520    TCGv_i32 tmp, tmp2;
   2521
   2522    if ((insn & 0x0ff00f10) == 0x0e200010) {
   2523        /* Multiply with Internal Accumulate Format */
   2524        rd0 = (insn >> 12) & 0xf;
   2525        rd1 = insn & 0xf;
   2526        acc = (insn >> 5) & 7;
   2527
   2528        if (acc != 0)
   2529            return 1;
   2530
   2531        tmp = load_reg(s, rd0);
   2532        tmp2 = load_reg(s, rd1);
   2533        switch ((insn >> 16) & 0xf) {
   2534        case 0x0:                                       /* MIA */
   2535            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
   2536            break;
   2537        case 0x8:                                       /* MIAPH */
   2538            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
   2539            break;
   2540        case 0xc:                                       /* MIABB */
   2541        case 0xd:                                       /* MIABT */
   2542        case 0xe:                                       /* MIATB */
   2543        case 0xf:                                       /* MIATT */
   2544            if (insn & (1 << 16))
   2545                tcg_gen_shri_i32(tmp, tmp, 16);
   2546            if (insn & (1 << 17))
   2547                tcg_gen_shri_i32(tmp2, tmp2, 16);
   2548            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
   2549            break;
   2550        default:
   2551            return 1;
   2552        }
   2553        tcg_temp_free_i32(tmp2);
   2554        tcg_temp_free_i32(tmp);
   2555
   2556        gen_op_iwmmxt_movq_wRn_M0(acc);
   2557        return 0;
   2558    }
   2559
   2560    if ((insn & 0x0fe00ff8) == 0x0c400000) {
   2561        /* Internal Accumulator Access Format */
   2562        rdhi = (insn >> 16) & 0xf;
   2563        rdlo = (insn >> 12) & 0xf;
   2564        acc = insn & 7;
   2565
   2566        if (acc != 0)
   2567            return 1;
   2568
   2569        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
   2570            iwmmxt_load_reg(cpu_V0, acc);
   2571            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
   2572            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
   2573            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
   2574        } else {                                        /* MAR */
   2575            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
   2576            iwmmxt_store_reg(cpu_V0, acc);
   2577        }
   2578        return 0;
   2579    }
   2580
   2581    return 1;
   2582}
   2583
   2584static void gen_goto_ptr(void)
   2585{
   2586    tcg_gen_lookup_and_goto_ptr();
   2587}
   2588
   2589/* This will end the TB but doesn't guarantee we'll return to
   2590 * cpu_loop_exec. Any live exit_requests will be processed as we
   2591 * enter the next TB.
   2592 */
   2593static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
   2594{
   2595    if (translator_use_goto_tb(&s->base, dest)) {
   2596        tcg_gen_goto_tb(n);
   2597        gen_set_pc_im(s, dest);
   2598        tcg_gen_exit_tb(s->base.tb, n);
   2599    } else {
   2600        gen_set_pc_im(s, dest);
   2601        gen_goto_ptr();
   2602    }
   2603    s->base.is_jmp = DISAS_NORETURN;
   2604}
   2605
   2606/* Jump, specifying which TB number to use if we gen_goto_tb() */
   2607static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
   2608{
   2609    if (unlikely(is_singlestepping(s))) {
   2610        /* An indirect jump so that we still trigger the debug exception.  */
   2611        gen_set_pc_im(s, dest);
   2612        s->base.is_jmp = DISAS_JUMP;
   2613        return;
   2614    }
   2615    switch (s->base.is_jmp) {
   2616    case DISAS_NEXT:
   2617    case DISAS_TOO_MANY:
   2618    case DISAS_NORETURN:
   2619        /*
   2620         * The normal case: just go to the destination TB.
   2621         * NB: NORETURN happens if we generate code like
   2622         *    gen_brcondi(l);
   2623         *    gen_jmp();
   2624         *    gen_set_label(l);
   2625         *    gen_jmp();
   2626         * on the second call to gen_jmp().
   2627         */
   2628        gen_goto_tb(s, tbno, dest);
   2629        break;
   2630    case DISAS_UPDATE_NOCHAIN:
   2631    case DISAS_UPDATE_EXIT:
   2632        /*
   2633         * We already decided we're leaving the TB for some other reason.
   2634         * Avoid using goto_tb so we really do exit back to the main loop
   2635         * and don't chain to another TB.
   2636         */
   2637        gen_set_pc_im(s, dest);
   2638        gen_goto_ptr();
   2639        s->base.is_jmp = DISAS_NORETURN;
   2640        break;
   2641    default:
   2642        /*
   2643         * We shouldn't be emitting code for a jump and also have
   2644         * is_jmp set to one of the special cases like DISAS_SWI.
   2645         */
   2646        g_assert_not_reached();
   2647    }
   2648}
   2649
   2650static inline void gen_jmp(DisasContext *s, uint32_t dest)
   2651{
   2652    gen_jmp_tb(s, dest, 0);
   2653}
   2654
   2655static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
   2656{
   2657    if (x)
   2658        tcg_gen_sari_i32(t0, t0, 16);
   2659    else
   2660        gen_sxth(t0);
   2661    if (y)
   2662        tcg_gen_sari_i32(t1, t1, 16);
   2663    else
   2664        gen_sxth(t1);
   2665    tcg_gen_mul_i32(t0, t0, t1);
   2666}
   2667
   2668/* Return the mask of PSR bits set by a MSR instruction.  */
   2669static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
   2670{
   2671    uint32_t mask = 0;
   2672
   2673    if (flags & (1 << 0)) {
   2674        mask |= 0xff;
   2675    }
   2676    if (flags & (1 << 1)) {
   2677        mask |= 0xff00;
   2678    }
   2679    if (flags & (1 << 2)) {
   2680        mask |= 0xff0000;
   2681    }
   2682    if (flags & (1 << 3)) {
   2683        mask |= 0xff000000;
   2684    }
   2685
   2686    /* Mask out undefined and reserved bits.  */
   2687    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
   2688
   2689    /* Mask out execution state.  */
   2690    if (!spsr) {
   2691        mask &= ~CPSR_EXEC;
   2692    }
   2693
   2694    /* Mask out privileged bits.  */
   2695    if (IS_USER(s)) {
   2696        mask &= CPSR_USER;
   2697    }
   2698    return mask;
   2699}
   2700
   2701/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
   2702static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
   2703{
   2704    TCGv_i32 tmp;
   2705    if (spsr) {
   2706        /* ??? This is also undefined in system mode.  */
   2707        if (IS_USER(s))
   2708            return 1;
   2709
   2710        tmp = load_cpu_field(spsr);
   2711        tcg_gen_andi_i32(tmp, tmp, ~mask);
   2712        tcg_gen_andi_i32(t0, t0, mask);
   2713        tcg_gen_or_i32(tmp, tmp, t0);
   2714        store_cpu_field(tmp, spsr);
   2715    } else {
   2716        gen_set_cpsr(t0, mask);
   2717    }
   2718    tcg_temp_free_i32(t0);
   2719    gen_lookup_tb(s);
   2720    return 0;
   2721}
   2722
   2723/* Returns nonzero if access to the PSR is not permitted.  */
   2724static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
   2725{
   2726    TCGv_i32 tmp;
   2727    tmp = tcg_temp_new_i32();
   2728    tcg_gen_movi_i32(tmp, val);
   2729    return gen_set_psr(s, mask, spsr, tmp);
   2730}
   2731
   2732static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
   2733                                     int *tgtmode, int *regno)
   2734{
   2735    /* Decode the r and sysm fields of MSR/MRS banked accesses into
   2736     * the target mode and register number, and identify the various
   2737     * unpredictable cases.
   2738     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
   2739     *  + executed in user mode
   2740     *  + using R15 as the src/dest register
   2741     *  + accessing an unimplemented register
   2742     *  + accessing a register that's inaccessible at current PL/security state*
   2743     *  + accessing a register that you could access with a different insn
   2744     * We choose to UNDEF in all these cases.
   2745     * Since we don't know which of the various AArch32 modes we are in
   2746     * we have to defer some checks to runtime.
   2747     * Accesses to Monitor mode registers from Secure EL1 (which implies
   2748     * that EL3 is AArch64) must trap to EL3.
   2749     *
   2750     * If the access checks fail this function will emit code to take
   2751     * an exception and return false. Otherwise it will return true,
   2752     * and set *tgtmode and *regno appropriately.
   2753     */
   2754    int exc_target = default_exception_el(s);
   2755
   2756    /* These instructions are present only in ARMv8, or in ARMv7 with the
   2757     * Virtualization Extensions.
   2758     */
   2759    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
   2760        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
   2761        goto undef;
   2762    }
   2763
   2764    if (IS_USER(s) || rn == 15) {
   2765        goto undef;
   2766    }
   2767
   2768    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
   2769     * of registers into (r, sysm).
   2770     */
   2771    if (r) {
   2772        /* SPSRs for other modes */
   2773        switch (sysm) {
   2774        case 0xe: /* SPSR_fiq */
   2775            *tgtmode = ARM_CPU_MODE_FIQ;
   2776            break;
   2777        case 0x10: /* SPSR_irq */
   2778            *tgtmode = ARM_CPU_MODE_IRQ;
   2779            break;
   2780        case 0x12: /* SPSR_svc */
   2781            *tgtmode = ARM_CPU_MODE_SVC;
   2782            break;
   2783        case 0x14: /* SPSR_abt */
   2784            *tgtmode = ARM_CPU_MODE_ABT;
   2785            break;
   2786        case 0x16: /* SPSR_und */
   2787            *tgtmode = ARM_CPU_MODE_UND;
   2788            break;
   2789        case 0x1c: /* SPSR_mon */
   2790            *tgtmode = ARM_CPU_MODE_MON;
   2791            break;
   2792        case 0x1e: /* SPSR_hyp */
   2793            *tgtmode = ARM_CPU_MODE_HYP;
   2794            break;
   2795        default: /* unallocated */
   2796            goto undef;
   2797        }
   2798        /* We arbitrarily assign SPSR a register number of 16. */
   2799        *regno = 16;
   2800    } else {
   2801        /* general purpose registers for other modes */
   2802        switch (sysm) {
   2803        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
   2804            *tgtmode = ARM_CPU_MODE_USR;
   2805            *regno = sysm + 8;
   2806            break;
   2807        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
   2808            *tgtmode = ARM_CPU_MODE_FIQ;
   2809            *regno = sysm;
   2810            break;
   2811        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
   2812            *tgtmode = ARM_CPU_MODE_IRQ;
   2813            *regno = sysm & 1 ? 13 : 14;
   2814            break;
   2815        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
   2816            *tgtmode = ARM_CPU_MODE_SVC;
   2817            *regno = sysm & 1 ? 13 : 14;
   2818            break;
   2819        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
   2820            *tgtmode = ARM_CPU_MODE_ABT;
   2821            *regno = sysm & 1 ? 13 : 14;
   2822            break;
   2823        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
   2824            *tgtmode = ARM_CPU_MODE_UND;
   2825            *regno = sysm & 1 ? 13 : 14;
   2826            break;
   2827        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
   2828            *tgtmode = ARM_CPU_MODE_MON;
   2829            *regno = sysm & 1 ? 13 : 14;
   2830            break;
   2831        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
   2832            *tgtmode = ARM_CPU_MODE_HYP;
   2833            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
   2834            *regno = sysm & 1 ? 13 : 17;
   2835            break;
   2836        default: /* unallocated */
   2837            goto undef;
   2838        }
   2839    }
   2840
   2841    /* Catch the 'accessing inaccessible register' cases we can detect
   2842     * at translate time.
   2843     */
   2844    switch (*tgtmode) {
   2845    case ARM_CPU_MODE_MON:
   2846        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
   2847            goto undef;
   2848        }
   2849        if (s->current_el == 1) {
   2850            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
   2851             * then accesses to Mon registers trap to Secure EL2, if it exists,
   2852             * otherwise EL3.
   2853             */
   2854            TCGv_i32 tcg_el;
   2855
   2856            if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
   2857                dc_isar_feature(aa64_sel2, s)) {
   2858                /* Target EL is EL<3 minus SCR_EL3.EEL2> */
   2859                tcg_el = load_cpu_field(cp15.scr_el3);
   2860                tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
   2861                tcg_gen_addi_i32(tcg_el, tcg_el, 3);
   2862            } else {
   2863                tcg_el = tcg_const_i32(3);
   2864            }
   2865
   2866            gen_exception_el(s, EXCP_UDEF, syn_uncategorized(), tcg_el);
   2867            tcg_temp_free_i32(tcg_el);
   2868            return false;
   2869        }
   2870        break;
   2871    case ARM_CPU_MODE_HYP:
   2872        /*
   2873         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
   2874         * (and so we can forbid accesses from EL2 or below). elr_hyp
   2875         * can be accessed also from Hyp mode, so forbid accesses from
   2876         * EL0 or EL1.
   2877         */
   2878        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
   2879            (s->current_el < 3 && *regno != 17)) {
   2880            goto undef;
   2881        }
   2882        break;
   2883    default:
   2884        break;
   2885    }
   2886
   2887    return true;
   2888
   2889undef:
   2890    /* If we get here then some access check did not pass */
   2891    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
   2892                       syn_uncategorized(), exc_target);
   2893    return false;
   2894}
   2895
   2896static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
   2897{
   2898    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
   2899    int tgtmode = 0, regno = 0;
   2900
   2901    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
   2902        return;
   2903    }
   2904
   2905    /* Sync state because msr_banked() can raise exceptions */
   2906    gen_set_condexec(s);
   2907    gen_set_pc_im(s, s->pc_curr);
   2908    tcg_reg = load_reg(s, rn);
   2909    tcg_tgtmode = tcg_const_i32(tgtmode);
   2910    tcg_regno = tcg_const_i32(regno);
   2911    gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
   2912    tcg_temp_free_i32(tcg_tgtmode);
   2913    tcg_temp_free_i32(tcg_regno);
   2914    tcg_temp_free_i32(tcg_reg);
   2915    s->base.is_jmp = DISAS_UPDATE_EXIT;
   2916}
   2917
   2918static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
   2919{
   2920    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
   2921    int tgtmode = 0, regno = 0;
   2922
   2923    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
   2924        return;
   2925    }
   2926
   2927    /* Sync state because mrs_banked() can raise exceptions */
   2928    gen_set_condexec(s);
   2929    gen_set_pc_im(s, s->pc_curr);
   2930    tcg_reg = tcg_temp_new_i32();
   2931    tcg_tgtmode = tcg_const_i32(tgtmode);
   2932    tcg_regno = tcg_const_i32(regno);
   2933    gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
   2934    tcg_temp_free_i32(tcg_tgtmode);
   2935    tcg_temp_free_i32(tcg_regno);
   2936    store_reg(s, rn, tcg_reg);
   2937    s->base.is_jmp = DISAS_UPDATE_EXIT;
   2938}
   2939
   2940/* Store value to PC as for an exception return (ie don't
   2941 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
   2942 * will do the masking based on the new value of the Thumb bit.
   2943 */
   2944static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
   2945{
   2946    tcg_gen_mov_i32(cpu_R[15], pc);
   2947    tcg_temp_free_i32(pc);
   2948}
   2949
   2950/* Generate a v6 exception return.  Marks both values as dead.  */
   2951static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
   2952{
   2953    store_pc_exc_ret(s, pc);
   2954    /* The cpsr_write_eret helper will mask the low bits of PC
   2955     * appropriately depending on the new Thumb bit, so it must
   2956     * be called after storing the new PC.
   2957     */
   2958    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
   2959        gen_io_start();
   2960    }
   2961    gen_helper_cpsr_write_eret(cpu_env, cpsr);
   2962    tcg_temp_free_i32(cpsr);
   2963    /* Must exit loop to check un-masked IRQs */
   2964    s->base.is_jmp = DISAS_EXIT;
   2965}
   2966
   2967/* Generate an old-style exception return. Marks pc as dead. */
   2968static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
   2969{
   2970    gen_rfe(s, pc, load_cpu_field(spsr));
   2971}
   2972
   2973static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
   2974                            uint32_t opr_sz, uint32_t max_sz,
   2975                            gen_helper_gvec_3_ptr *fn)
   2976{
   2977    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
   2978
   2979    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
   2980    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
   2981                       opr_sz, max_sz, 0, fn);
   2982    tcg_temp_free_ptr(qc_ptr);
   2983}
   2984
   2985void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   2986                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   2987{
   2988    static gen_helper_gvec_3_ptr * const fns[2] = {
   2989        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
   2990    };
   2991    tcg_debug_assert(vece >= 1 && vece <= 2);
   2992    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
   2993}
   2994
   2995void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   2996                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   2997{
   2998    static gen_helper_gvec_3_ptr * const fns[2] = {
   2999        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
   3000    };
   3001    tcg_debug_assert(vece >= 1 && vece <= 2);
   3002    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
   3003}
   3004
   3005#define GEN_CMP0(NAME, COND)                                            \
   3006    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
   3007    {                                                                   \
   3008        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
   3009        tcg_gen_neg_i32(d, d);                                          \
   3010    }                                                                   \
   3011    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
   3012    {                                                                   \
   3013        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
   3014        tcg_gen_neg_i64(d, d);                                          \
   3015    }                                                                   \
   3016    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
   3017    {                                                                   \
   3018        TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
   3019        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
   3020        tcg_temp_free_vec(zero);                                        \
   3021    }                                                                   \
   3022    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
   3023                            uint32_t opr_sz, uint32_t max_sz)           \
   3024    {                                                                   \
   3025        const GVecGen2 op[4] = {                                        \
   3026            { .fno = gen_helper_gvec_##NAME##0_b,                       \
   3027              .fniv = gen_##NAME##0_vec,                                \
   3028              .opt_opc = vecop_list_cmp,                                \
   3029              .vece = MO_8 },                                           \
   3030            { .fno = gen_helper_gvec_##NAME##0_h,                       \
   3031              .fniv = gen_##NAME##0_vec,                                \
   3032              .opt_opc = vecop_list_cmp,                                \
   3033              .vece = MO_16 },                                          \
   3034            { .fni4 = gen_##NAME##0_i32,                                \
   3035              .fniv = gen_##NAME##0_vec,                                \
   3036              .opt_opc = vecop_list_cmp,                                \
   3037              .vece = MO_32 },                                          \
   3038            { .fni8 = gen_##NAME##0_i64,                                \
   3039              .fniv = gen_##NAME##0_vec,                                \
   3040              .opt_opc = vecop_list_cmp,                                \
   3041              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
   3042              .vece = MO_64 },                                          \
   3043        };                                                              \
   3044        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
   3045    }
   3046
   3047static const TCGOpcode vecop_list_cmp[] = {
   3048    INDEX_op_cmp_vec, 0
   3049};
   3050
   3051GEN_CMP0(ceq, TCG_COND_EQ)
   3052GEN_CMP0(cle, TCG_COND_LE)
   3053GEN_CMP0(cge, TCG_COND_GE)
   3054GEN_CMP0(clt, TCG_COND_LT)
   3055GEN_CMP0(cgt, TCG_COND_GT)
   3056
   3057#undef GEN_CMP0
   3058
   3059static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3060{
   3061    tcg_gen_vec_sar8i_i64(a, a, shift);
   3062    tcg_gen_vec_add8_i64(d, d, a);
   3063}
   3064
   3065static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3066{
   3067    tcg_gen_vec_sar16i_i64(a, a, shift);
   3068    tcg_gen_vec_add16_i64(d, d, a);
   3069}
   3070
   3071static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3072{
   3073    tcg_gen_sari_i32(a, a, shift);
   3074    tcg_gen_add_i32(d, d, a);
   3075}
   3076
   3077static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3078{
   3079    tcg_gen_sari_i64(a, a, shift);
   3080    tcg_gen_add_i64(d, d, a);
   3081}
   3082
   3083static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3084{
   3085    tcg_gen_sari_vec(vece, a, a, sh);
   3086    tcg_gen_add_vec(vece, d, d, a);
   3087}
   3088
   3089void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3090                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3091{
   3092    static const TCGOpcode vecop_list[] = {
   3093        INDEX_op_sari_vec, INDEX_op_add_vec, 0
   3094    };
   3095    static const GVecGen2i ops[4] = {
   3096        { .fni8 = gen_ssra8_i64,
   3097          .fniv = gen_ssra_vec,
   3098          .fno = gen_helper_gvec_ssra_b,
   3099          .load_dest = true,
   3100          .opt_opc = vecop_list,
   3101          .vece = MO_8 },
   3102        { .fni8 = gen_ssra16_i64,
   3103          .fniv = gen_ssra_vec,
   3104          .fno = gen_helper_gvec_ssra_h,
   3105          .load_dest = true,
   3106          .opt_opc = vecop_list,
   3107          .vece = MO_16 },
   3108        { .fni4 = gen_ssra32_i32,
   3109          .fniv = gen_ssra_vec,
   3110          .fno = gen_helper_gvec_ssra_s,
   3111          .load_dest = true,
   3112          .opt_opc = vecop_list,
   3113          .vece = MO_32 },
   3114        { .fni8 = gen_ssra64_i64,
   3115          .fniv = gen_ssra_vec,
   3116          .fno = gen_helper_gvec_ssra_b,
   3117          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3118          .opt_opc = vecop_list,
   3119          .load_dest = true,
   3120          .vece = MO_64 },
   3121    };
   3122
   3123    /* tszimm encoding produces immediates in the range [1..esize]. */
   3124    tcg_debug_assert(shift > 0);
   3125    tcg_debug_assert(shift <= (8 << vece));
   3126
   3127    /*
   3128     * Shifts larger than the element size are architecturally valid.
   3129     * Signed results in all sign bits.
   3130     */
   3131    shift = MIN(shift, (8 << vece) - 1);
   3132    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3133}
   3134
   3135static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3136{
   3137    tcg_gen_vec_shr8i_i64(a, a, shift);
   3138    tcg_gen_vec_add8_i64(d, d, a);
   3139}
   3140
   3141static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3142{
   3143    tcg_gen_vec_shr16i_i64(a, a, shift);
   3144    tcg_gen_vec_add16_i64(d, d, a);
   3145}
   3146
   3147static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3148{
   3149    tcg_gen_shri_i32(a, a, shift);
   3150    tcg_gen_add_i32(d, d, a);
   3151}
   3152
   3153static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3154{
   3155    tcg_gen_shri_i64(a, a, shift);
   3156    tcg_gen_add_i64(d, d, a);
   3157}
   3158
   3159static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3160{
   3161    tcg_gen_shri_vec(vece, a, a, sh);
   3162    tcg_gen_add_vec(vece, d, d, a);
   3163}
   3164
   3165void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3166                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3167{
   3168    static const TCGOpcode vecop_list[] = {
   3169        INDEX_op_shri_vec, INDEX_op_add_vec, 0
   3170    };
   3171    static const GVecGen2i ops[4] = {
   3172        { .fni8 = gen_usra8_i64,
   3173          .fniv = gen_usra_vec,
   3174          .fno = gen_helper_gvec_usra_b,
   3175          .load_dest = true,
   3176          .opt_opc = vecop_list,
   3177          .vece = MO_8, },
   3178        { .fni8 = gen_usra16_i64,
   3179          .fniv = gen_usra_vec,
   3180          .fno = gen_helper_gvec_usra_h,
   3181          .load_dest = true,
   3182          .opt_opc = vecop_list,
   3183          .vece = MO_16, },
   3184        { .fni4 = gen_usra32_i32,
   3185          .fniv = gen_usra_vec,
   3186          .fno = gen_helper_gvec_usra_s,
   3187          .load_dest = true,
   3188          .opt_opc = vecop_list,
   3189          .vece = MO_32, },
   3190        { .fni8 = gen_usra64_i64,
   3191          .fniv = gen_usra_vec,
   3192          .fno = gen_helper_gvec_usra_d,
   3193          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3194          .load_dest = true,
   3195          .opt_opc = vecop_list,
   3196          .vece = MO_64, },
   3197    };
   3198
   3199    /* tszimm encoding produces immediates in the range [1..esize]. */
   3200    tcg_debug_assert(shift > 0);
   3201    tcg_debug_assert(shift <= (8 << vece));
   3202
   3203    /*
   3204     * Shifts larger than the element size are architecturally valid.
   3205     * Unsigned results in all zeros as input to accumulate: nop.
   3206     */
   3207    if (shift < (8 << vece)) {
   3208        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3209    } else {
   3210        /* Nop, but we do need to clear the tail. */
   3211        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
   3212    }
   3213}
   3214
   3215/*
   3216 * Shift one less than the requested amount, and the low bit is
   3217 * the rounding bit.  For the 8 and 16-bit operations, because we
   3218 * mask the low bit, we can perform a normal integer shift instead
   3219 * of a vector shift.
   3220 */
   3221static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3222{
   3223    TCGv_i64 t = tcg_temp_new_i64();
   3224
   3225    tcg_gen_shri_i64(t, a, sh - 1);
   3226    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
   3227    tcg_gen_vec_sar8i_i64(d, a, sh);
   3228    tcg_gen_vec_add8_i64(d, d, t);
   3229    tcg_temp_free_i64(t);
   3230}
   3231
   3232static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3233{
   3234    TCGv_i64 t = tcg_temp_new_i64();
   3235
   3236    tcg_gen_shri_i64(t, a, sh - 1);
   3237    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
   3238    tcg_gen_vec_sar16i_i64(d, a, sh);
   3239    tcg_gen_vec_add16_i64(d, d, t);
   3240    tcg_temp_free_i64(t);
   3241}
   3242
   3243static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3244{
   3245    TCGv_i32 t;
   3246
   3247    /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
   3248    if (sh == 32) {
   3249        tcg_gen_movi_i32(d, 0);
   3250        return;
   3251    }
   3252    t = tcg_temp_new_i32();
   3253    tcg_gen_extract_i32(t, a, sh - 1, 1);
   3254    tcg_gen_sari_i32(d, a, sh);
   3255    tcg_gen_add_i32(d, d, t);
   3256    tcg_temp_free_i32(t);
   3257}
   3258
   3259static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3260{
   3261    TCGv_i64 t = tcg_temp_new_i64();
   3262
   3263    tcg_gen_extract_i64(t, a, sh - 1, 1);
   3264    tcg_gen_sari_i64(d, a, sh);
   3265    tcg_gen_add_i64(d, d, t);
   3266    tcg_temp_free_i64(t);
   3267}
   3268
   3269static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3270{
   3271    TCGv_vec t = tcg_temp_new_vec_matching(d);
   3272    TCGv_vec ones = tcg_temp_new_vec_matching(d);
   3273
   3274    tcg_gen_shri_vec(vece, t, a, sh - 1);
   3275    tcg_gen_dupi_vec(vece, ones, 1);
   3276    tcg_gen_and_vec(vece, t, t, ones);
   3277    tcg_gen_sari_vec(vece, d, a, sh);
   3278    tcg_gen_add_vec(vece, d, d, t);
   3279
   3280    tcg_temp_free_vec(t);
   3281    tcg_temp_free_vec(ones);
   3282}
   3283
   3284void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3285                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3286{
   3287    static const TCGOpcode vecop_list[] = {
   3288        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
   3289    };
   3290    static const GVecGen2i ops[4] = {
   3291        { .fni8 = gen_srshr8_i64,
   3292          .fniv = gen_srshr_vec,
   3293          .fno = gen_helper_gvec_srshr_b,
   3294          .opt_opc = vecop_list,
   3295          .vece = MO_8 },
   3296        { .fni8 = gen_srshr16_i64,
   3297          .fniv = gen_srshr_vec,
   3298          .fno = gen_helper_gvec_srshr_h,
   3299          .opt_opc = vecop_list,
   3300          .vece = MO_16 },
   3301        { .fni4 = gen_srshr32_i32,
   3302          .fniv = gen_srshr_vec,
   3303          .fno = gen_helper_gvec_srshr_s,
   3304          .opt_opc = vecop_list,
   3305          .vece = MO_32 },
   3306        { .fni8 = gen_srshr64_i64,
   3307          .fniv = gen_srshr_vec,
   3308          .fno = gen_helper_gvec_srshr_d,
   3309          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3310          .opt_opc = vecop_list,
   3311          .vece = MO_64 },
   3312    };
   3313
   3314    /* tszimm encoding produces immediates in the range [1..esize] */
   3315    tcg_debug_assert(shift > 0);
   3316    tcg_debug_assert(shift <= (8 << vece));
   3317
   3318    if (shift == (8 << vece)) {
   3319        /*
   3320         * Shifts larger than the element size are architecturally valid.
   3321         * Signed results in all sign bits.  With rounding, this produces
   3322         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
   3323         * I.e. always zero.
   3324         */
   3325        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
   3326    } else {
   3327        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3328    }
   3329}
   3330
   3331static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3332{
   3333    TCGv_i64 t = tcg_temp_new_i64();
   3334
   3335    gen_srshr8_i64(t, a, sh);
   3336    tcg_gen_vec_add8_i64(d, d, t);
   3337    tcg_temp_free_i64(t);
   3338}
   3339
   3340static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3341{
   3342    TCGv_i64 t = tcg_temp_new_i64();
   3343
   3344    gen_srshr16_i64(t, a, sh);
   3345    tcg_gen_vec_add16_i64(d, d, t);
   3346    tcg_temp_free_i64(t);
   3347}
   3348
   3349static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3350{
   3351    TCGv_i32 t = tcg_temp_new_i32();
   3352
   3353    gen_srshr32_i32(t, a, sh);
   3354    tcg_gen_add_i32(d, d, t);
   3355    tcg_temp_free_i32(t);
   3356}
   3357
   3358static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3359{
   3360    TCGv_i64 t = tcg_temp_new_i64();
   3361
   3362    gen_srshr64_i64(t, a, sh);
   3363    tcg_gen_add_i64(d, d, t);
   3364    tcg_temp_free_i64(t);
   3365}
   3366
   3367static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3368{
   3369    TCGv_vec t = tcg_temp_new_vec_matching(d);
   3370
   3371    gen_srshr_vec(vece, t, a, sh);
   3372    tcg_gen_add_vec(vece, d, d, t);
   3373    tcg_temp_free_vec(t);
   3374}
   3375
   3376void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3377                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3378{
   3379    static const TCGOpcode vecop_list[] = {
   3380        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
   3381    };
   3382    static const GVecGen2i ops[4] = {
   3383        { .fni8 = gen_srsra8_i64,
   3384          .fniv = gen_srsra_vec,
   3385          .fno = gen_helper_gvec_srsra_b,
   3386          .opt_opc = vecop_list,
   3387          .load_dest = true,
   3388          .vece = MO_8 },
   3389        { .fni8 = gen_srsra16_i64,
   3390          .fniv = gen_srsra_vec,
   3391          .fno = gen_helper_gvec_srsra_h,
   3392          .opt_opc = vecop_list,
   3393          .load_dest = true,
   3394          .vece = MO_16 },
   3395        { .fni4 = gen_srsra32_i32,
   3396          .fniv = gen_srsra_vec,
   3397          .fno = gen_helper_gvec_srsra_s,
   3398          .opt_opc = vecop_list,
   3399          .load_dest = true,
   3400          .vece = MO_32 },
   3401        { .fni8 = gen_srsra64_i64,
   3402          .fniv = gen_srsra_vec,
   3403          .fno = gen_helper_gvec_srsra_d,
   3404          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3405          .opt_opc = vecop_list,
   3406          .load_dest = true,
   3407          .vece = MO_64 },
   3408    };
   3409
   3410    /* tszimm encoding produces immediates in the range [1..esize] */
   3411    tcg_debug_assert(shift > 0);
   3412    tcg_debug_assert(shift <= (8 << vece));
   3413
   3414    /*
   3415     * Shifts larger than the element size are architecturally valid.
   3416     * Signed results in all sign bits.  With rounding, this produces
   3417     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
   3418     * I.e. always zero.  With accumulation, this leaves D unchanged.
   3419     */
   3420    if (shift == (8 << vece)) {
   3421        /* Nop, but we do need to clear the tail. */
   3422        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
   3423    } else {
   3424        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3425    }
   3426}
   3427
   3428static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3429{
   3430    TCGv_i64 t = tcg_temp_new_i64();
   3431
   3432    tcg_gen_shri_i64(t, a, sh - 1);
   3433    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
   3434    tcg_gen_vec_shr8i_i64(d, a, sh);
   3435    tcg_gen_vec_add8_i64(d, d, t);
   3436    tcg_temp_free_i64(t);
   3437}
   3438
   3439static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3440{
   3441    TCGv_i64 t = tcg_temp_new_i64();
   3442
   3443    tcg_gen_shri_i64(t, a, sh - 1);
   3444    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
   3445    tcg_gen_vec_shr16i_i64(d, a, sh);
   3446    tcg_gen_vec_add16_i64(d, d, t);
   3447    tcg_temp_free_i64(t);
   3448}
   3449
   3450static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3451{
   3452    TCGv_i32 t;
   3453
   3454    /* Handle shift by the input size for the benefit of trans_URSHR_ri */
   3455    if (sh == 32) {
   3456        tcg_gen_extract_i32(d, a, sh - 1, 1);
   3457        return;
   3458    }
   3459    t = tcg_temp_new_i32();
   3460    tcg_gen_extract_i32(t, a, sh - 1, 1);
   3461    tcg_gen_shri_i32(d, a, sh);
   3462    tcg_gen_add_i32(d, d, t);
   3463    tcg_temp_free_i32(t);
   3464}
   3465
   3466static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3467{
   3468    TCGv_i64 t = tcg_temp_new_i64();
   3469
   3470    tcg_gen_extract_i64(t, a, sh - 1, 1);
   3471    tcg_gen_shri_i64(d, a, sh);
   3472    tcg_gen_add_i64(d, d, t);
   3473    tcg_temp_free_i64(t);
   3474}
   3475
   3476static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
   3477{
   3478    TCGv_vec t = tcg_temp_new_vec_matching(d);
   3479    TCGv_vec ones = tcg_temp_new_vec_matching(d);
   3480
   3481    tcg_gen_shri_vec(vece, t, a, shift - 1);
   3482    tcg_gen_dupi_vec(vece, ones, 1);
   3483    tcg_gen_and_vec(vece, t, t, ones);
   3484    tcg_gen_shri_vec(vece, d, a, shift);
   3485    tcg_gen_add_vec(vece, d, d, t);
   3486
   3487    tcg_temp_free_vec(t);
   3488    tcg_temp_free_vec(ones);
   3489}
   3490
   3491void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3492                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3493{
   3494    static const TCGOpcode vecop_list[] = {
   3495        INDEX_op_shri_vec, INDEX_op_add_vec, 0
   3496    };
   3497    static const GVecGen2i ops[4] = {
   3498        { .fni8 = gen_urshr8_i64,
   3499          .fniv = gen_urshr_vec,
   3500          .fno = gen_helper_gvec_urshr_b,
   3501          .opt_opc = vecop_list,
   3502          .vece = MO_8 },
   3503        { .fni8 = gen_urshr16_i64,
   3504          .fniv = gen_urshr_vec,
   3505          .fno = gen_helper_gvec_urshr_h,
   3506          .opt_opc = vecop_list,
   3507          .vece = MO_16 },
   3508        { .fni4 = gen_urshr32_i32,
   3509          .fniv = gen_urshr_vec,
   3510          .fno = gen_helper_gvec_urshr_s,
   3511          .opt_opc = vecop_list,
   3512          .vece = MO_32 },
   3513        { .fni8 = gen_urshr64_i64,
   3514          .fniv = gen_urshr_vec,
   3515          .fno = gen_helper_gvec_urshr_d,
   3516          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3517          .opt_opc = vecop_list,
   3518          .vece = MO_64 },
   3519    };
   3520
   3521    /* tszimm encoding produces immediates in the range [1..esize] */
   3522    tcg_debug_assert(shift > 0);
   3523    tcg_debug_assert(shift <= (8 << vece));
   3524
   3525    if (shift == (8 << vece)) {
   3526        /*
   3527         * Shifts larger than the element size are architecturally valid.
   3528         * Unsigned results in zero.  With rounding, this produces a
   3529         * copy of the most significant bit.
   3530         */
   3531        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
   3532    } else {
   3533        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3534    }
   3535}
   3536
   3537static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3538{
   3539    TCGv_i64 t = tcg_temp_new_i64();
   3540
   3541    if (sh == 8) {
   3542        tcg_gen_vec_shr8i_i64(t, a, 7);
   3543    } else {
   3544        gen_urshr8_i64(t, a, sh);
   3545    }
   3546    tcg_gen_vec_add8_i64(d, d, t);
   3547    tcg_temp_free_i64(t);
   3548}
   3549
   3550static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3551{
   3552    TCGv_i64 t = tcg_temp_new_i64();
   3553
   3554    if (sh == 16) {
   3555        tcg_gen_vec_shr16i_i64(t, a, 15);
   3556    } else {
   3557        gen_urshr16_i64(t, a, sh);
   3558    }
   3559    tcg_gen_vec_add16_i64(d, d, t);
   3560    tcg_temp_free_i64(t);
   3561}
   3562
   3563static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3564{
   3565    TCGv_i32 t = tcg_temp_new_i32();
   3566
   3567    if (sh == 32) {
   3568        tcg_gen_shri_i32(t, a, 31);
   3569    } else {
   3570        gen_urshr32_i32(t, a, sh);
   3571    }
   3572    tcg_gen_add_i32(d, d, t);
   3573    tcg_temp_free_i32(t);
   3574}
   3575
   3576static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3577{
   3578    TCGv_i64 t = tcg_temp_new_i64();
   3579
   3580    if (sh == 64) {
   3581        tcg_gen_shri_i64(t, a, 63);
   3582    } else {
   3583        gen_urshr64_i64(t, a, sh);
   3584    }
   3585    tcg_gen_add_i64(d, d, t);
   3586    tcg_temp_free_i64(t);
   3587}
   3588
   3589static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3590{
   3591    TCGv_vec t = tcg_temp_new_vec_matching(d);
   3592
   3593    if (sh == (8 << vece)) {
   3594        tcg_gen_shri_vec(vece, t, a, sh - 1);
   3595    } else {
   3596        gen_urshr_vec(vece, t, a, sh);
   3597    }
   3598    tcg_gen_add_vec(vece, d, d, t);
   3599    tcg_temp_free_vec(t);
   3600}
   3601
   3602void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3603                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3604{
   3605    static const TCGOpcode vecop_list[] = {
   3606        INDEX_op_shri_vec, INDEX_op_add_vec, 0
   3607    };
   3608    static const GVecGen2i ops[4] = {
   3609        { .fni8 = gen_ursra8_i64,
   3610          .fniv = gen_ursra_vec,
   3611          .fno = gen_helper_gvec_ursra_b,
   3612          .opt_opc = vecop_list,
   3613          .load_dest = true,
   3614          .vece = MO_8 },
   3615        { .fni8 = gen_ursra16_i64,
   3616          .fniv = gen_ursra_vec,
   3617          .fno = gen_helper_gvec_ursra_h,
   3618          .opt_opc = vecop_list,
   3619          .load_dest = true,
   3620          .vece = MO_16 },
   3621        { .fni4 = gen_ursra32_i32,
   3622          .fniv = gen_ursra_vec,
   3623          .fno = gen_helper_gvec_ursra_s,
   3624          .opt_opc = vecop_list,
   3625          .load_dest = true,
   3626          .vece = MO_32 },
   3627        { .fni8 = gen_ursra64_i64,
   3628          .fniv = gen_ursra_vec,
   3629          .fno = gen_helper_gvec_ursra_d,
   3630          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3631          .opt_opc = vecop_list,
   3632          .load_dest = true,
   3633          .vece = MO_64 },
   3634    };
   3635
   3636    /* tszimm encoding produces immediates in the range [1..esize] */
   3637    tcg_debug_assert(shift > 0);
   3638    tcg_debug_assert(shift <= (8 << vece));
   3639
   3640    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3641}
   3642
   3643static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3644{
   3645    uint64_t mask = dup_const(MO_8, 0xff >> shift);
   3646    TCGv_i64 t = tcg_temp_new_i64();
   3647
   3648    tcg_gen_shri_i64(t, a, shift);
   3649    tcg_gen_andi_i64(t, t, mask);
   3650    tcg_gen_andi_i64(d, d, ~mask);
   3651    tcg_gen_or_i64(d, d, t);
   3652    tcg_temp_free_i64(t);
   3653}
   3654
   3655static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3656{
   3657    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
   3658    TCGv_i64 t = tcg_temp_new_i64();
   3659
   3660    tcg_gen_shri_i64(t, a, shift);
   3661    tcg_gen_andi_i64(t, t, mask);
   3662    tcg_gen_andi_i64(d, d, ~mask);
   3663    tcg_gen_or_i64(d, d, t);
   3664    tcg_temp_free_i64(t);
   3665}
   3666
   3667static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3668{
   3669    tcg_gen_shri_i32(a, a, shift);
   3670    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
   3671}
   3672
   3673static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3674{
   3675    tcg_gen_shri_i64(a, a, shift);
   3676    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
   3677}
   3678
   3679static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3680{
   3681    TCGv_vec t = tcg_temp_new_vec_matching(d);
   3682    TCGv_vec m = tcg_temp_new_vec_matching(d);
   3683
   3684    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
   3685    tcg_gen_shri_vec(vece, t, a, sh);
   3686    tcg_gen_and_vec(vece, d, d, m);
   3687    tcg_gen_or_vec(vece, d, d, t);
   3688
   3689    tcg_temp_free_vec(t);
   3690    tcg_temp_free_vec(m);
   3691}
   3692
   3693void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3694                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3695{
   3696    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
   3697    const GVecGen2i ops[4] = {
   3698        { .fni8 = gen_shr8_ins_i64,
   3699          .fniv = gen_shr_ins_vec,
   3700          .fno = gen_helper_gvec_sri_b,
   3701          .load_dest = true,
   3702          .opt_opc = vecop_list,
   3703          .vece = MO_8 },
   3704        { .fni8 = gen_shr16_ins_i64,
   3705          .fniv = gen_shr_ins_vec,
   3706          .fno = gen_helper_gvec_sri_h,
   3707          .load_dest = true,
   3708          .opt_opc = vecop_list,
   3709          .vece = MO_16 },
   3710        { .fni4 = gen_shr32_ins_i32,
   3711          .fniv = gen_shr_ins_vec,
   3712          .fno = gen_helper_gvec_sri_s,
   3713          .load_dest = true,
   3714          .opt_opc = vecop_list,
   3715          .vece = MO_32 },
   3716        { .fni8 = gen_shr64_ins_i64,
   3717          .fniv = gen_shr_ins_vec,
   3718          .fno = gen_helper_gvec_sri_d,
   3719          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3720          .load_dest = true,
   3721          .opt_opc = vecop_list,
   3722          .vece = MO_64 },
   3723    };
   3724
   3725    /* tszimm encoding produces immediates in the range [1..esize]. */
   3726    tcg_debug_assert(shift > 0);
   3727    tcg_debug_assert(shift <= (8 << vece));
   3728
   3729    /* Shift of esize leaves destination unchanged. */
   3730    if (shift < (8 << vece)) {
   3731        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3732    } else {
   3733        /* Nop, but we do need to clear the tail. */
   3734        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
   3735    }
   3736}
   3737
   3738static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3739{
   3740    uint64_t mask = dup_const(MO_8, 0xff << shift);
   3741    TCGv_i64 t = tcg_temp_new_i64();
   3742
   3743    tcg_gen_shli_i64(t, a, shift);
   3744    tcg_gen_andi_i64(t, t, mask);
   3745    tcg_gen_andi_i64(d, d, ~mask);
   3746    tcg_gen_or_i64(d, d, t);
   3747    tcg_temp_free_i64(t);
   3748}
   3749
   3750static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3751{
   3752    uint64_t mask = dup_const(MO_16, 0xffff << shift);
   3753    TCGv_i64 t = tcg_temp_new_i64();
   3754
   3755    tcg_gen_shli_i64(t, a, shift);
   3756    tcg_gen_andi_i64(t, t, mask);
   3757    tcg_gen_andi_i64(d, d, ~mask);
   3758    tcg_gen_or_i64(d, d, t);
   3759    tcg_temp_free_i64(t);
   3760}
   3761
   3762static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3763{
   3764    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
   3765}
   3766
   3767static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3768{
   3769    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
   3770}
   3771
   3772static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3773{
   3774    TCGv_vec t = tcg_temp_new_vec_matching(d);
   3775    TCGv_vec m = tcg_temp_new_vec_matching(d);
   3776
   3777    tcg_gen_shli_vec(vece, t, a, sh);
   3778    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
   3779    tcg_gen_and_vec(vece, d, d, m);
   3780    tcg_gen_or_vec(vece, d, d, t);
   3781
   3782    tcg_temp_free_vec(t);
   3783    tcg_temp_free_vec(m);
   3784}
   3785
   3786void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3787                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3788{
   3789    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
   3790    const GVecGen2i ops[4] = {
   3791        { .fni8 = gen_shl8_ins_i64,
   3792          .fniv = gen_shl_ins_vec,
   3793          .fno = gen_helper_gvec_sli_b,
   3794          .load_dest = true,
   3795          .opt_opc = vecop_list,
   3796          .vece = MO_8 },
   3797        { .fni8 = gen_shl16_ins_i64,
   3798          .fniv = gen_shl_ins_vec,
   3799          .fno = gen_helper_gvec_sli_h,
   3800          .load_dest = true,
   3801          .opt_opc = vecop_list,
   3802          .vece = MO_16 },
   3803        { .fni4 = gen_shl32_ins_i32,
   3804          .fniv = gen_shl_ins_vec,
   3805          .fno = gen_helper_gvec_sli_s,
   3806          .load_dest = true,
   3807          .opt_opc = vecop_list,
   3808          .vece = MO_32 },
   3809        { .fni8 = gen_shl64_ins_i64,
   3810          .fniv = gen_shl_ins_vec,
   3811          .fno = gen_helper_gvec_sli_d,
   3812          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3813          .load_dest = true,
   3814          .opt_opc = vecop_list,
   3815          .vece = MO_64 },
   3816    };
   3817
   3818    /* tszimm encoding produces immediates in the range [0..esize-1]. */
   3819    tcg_debug_assert(shift >= 0);
   3820    tcg_debug_assert(shift < (8 << vece));
   3821
   3822    if (shift == 0) {
   3823        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
   3824    } else {
   3825        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3826    }
   3827}
   3828
   3829static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3830{
   3831    gen_helper_neon_mul_u8(a, a, b);
   3832    gen_helper_neon_add_u8(d, d, a);
   3833}
   3834
   3835static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3836{
   3837    gen_helper_neon_mul_u8(a, a, b);
   3838    gen_helper_neon_sub_u8(d, d, a);
   3839}
   3840
   3841static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3842{
   3843    gen_helper_neon_mul_u16(a, a, b);
   3844    gen_helper_neon_add_u16(d, d, a);
   3845}
   3846
   3847static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3848{
   3849    gen_helper_neon_mul_u16(a, a, b);
   3850    gen_helper_neon_sub_u16(d, d, a);
   3851}
   3852
   3853static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3854{
   3855    tcg_gen_mul_i32(a, a, b);
   3856    tcg_gen_add_i32(d, d, a);
   3857}
   3858
   3859static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3860{
   3861    tcg_gen_mul_i32(a, a, b);
   3862    tcg_gen_sub_i32(d, d, a);
   3863}
   3864
   3865static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   3866{
   3867    tcg_gen_mul_i64(a, a, b);
   3868    tcg_gen_add_i64(d, d, a);
   3869}
   3870
   3871static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   3872{
   3873    tcg_gen_mul_i64(a, a, b);
   3874    tcg_gen_sub_i64(d, d, a);
   3875}
   3876
   3877static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   3878{
   3879    tcg_gen_mul_vec(vece, a, a, b);
   3880    tcg_gen_add_vec(vece, d, d, a);
   3881}
   3882
   3883static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   3884{
   3885    tcg_gen_mul_vec(vece, a, a, b);
   3886    tcg_gen_sub_vec(vece, d, d, a);
   3887}
   3888
   3889/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
   3890 * these tables are shared with AArch64 which does support them.
   3891 */
   3892void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   3893                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   3894{
   3895    static const TCGOpcode vecop_list[] = {
   3896        INDEX_op_mul_vec, INDEX_op_add_vec, 0
   3897    };
   3898    static const GVecGen3 ops[4] = {
   3899        { .fni4 = gen_mla8_i32,
   3900          .fniv = gen_mla_vec,
   3901          .load_dest = true,
   3902          .opt_opc = vecop_list,
   3903          .vece = MO_8 },
   3904        { .fni4 = gen_mla16_i32,
   3905          .fniv = gen_mla_vec,
   3906          .load_dest = true,
   3907          .opt_opc = vecop_list,
   3908          .vece = MO_16 },
   3909        { .fni4 = gen_mla32_i32,
   3910          .fniv = gen_mla_vec,
   3911          .load_dest = true,
   3912          .opt_opc = vecop_list,
   3913          .vece = MO_32 },
   3914        { .fni8 = gen_mla64_i64,
   3915          .fniv = gen_mla_vec,
   3916          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3917          .load_dest = true,
   3918          .opt_opc = vecop_list,
   3919          .vece = MO_64 },
   3920    };
   3921    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   3922}
   3923
   3924void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   3925                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   3926{
   3927    static const TCGOpcode vecop_list[] = {
   3928        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
   3929    };
   3930    static const GVecGen3 ops[4] = {
   3931        { .fni4 = gen_mls8_i32,
   3932          .fniv = gen_mls_vec,
   3933          .load_dest = true,
   3934          .opt_opc = vecop_list,
   3935          .vece = MO_8 },
   3936        { .fni4 = gen_mls16_i32,
   3937          .fniv = gen_mls_vec,
   3938          .load_dest = true,
   3939          .opt_opc = vecop_list,
   3940          .vece = MO_16 },
   3941        { .fni4 = gen_mls32_i32,
   3942          .fniv = gen_mls_vec,
   3943          .load_dest = true,
   3944          .opt_opc = vecop_list,
   3945          .vece = MO_32 },
   3946        { .fni8 = gen_mls64_i64,
   3947          .fniv = gen_mls_vec,
   3948          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3949          .load_dest = true,
   3950          .opt_opc = vecop_list,
   3951          .vece = MO_64 },
   3952    };
   3953    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   3954}
   3955
   3956/* CMTST : test is "if (X & Y != 0)". */
   3957static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3958{
   3959    tcg_gen_and_i32(d, a, b);
   3960    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
   3961    tcg_gen_neg_i32(d, d);
   3962}
   3963
   3964void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   3965{
   3966    tcg_gen_and_i64(d, a, b);
   3967    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
   3968    tcg_gen_neg_i64(d, d);
   3969}
   3970
   3971static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   3972{
   3973    tcg_gen_and_vec(vece, d, a, b);
   3974    tcg_gen_dupi_vec(vece, a, 0);
   3975    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
   3976}
   3977
   3978void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   3979                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   3980{
   3981    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
   3982    static const GVecGen3 ops[4] = {
   3983        { .fni4 = gen_helper_neon_tst_u8,
   3984          .fniv = gen_cmtst_vec,
   3985          .opt_opc = vecop_list,
   3986          .vece = MO_8 },
   3987        { .fni4 = gen_helper_neon_tst_u16,
   3988          .fniv = gen_cmtst_vec,
   3989          .opt_opc = vecop_list,
   3990          .vece = MO_16 },
   3991        { .fni4 = gen_cmtst_i32,
   3992          .fniv = gen_cmtst_vec,
   3993          .opt_opc = vecop_list,
   3994          .vece = MO_32 },
   3995        { .fni8 = gen_cmtst_i64,
   3996          .fniv = gen_cmtst_vec,
   3997          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3998          .opt_opc = vecop_list,
   3999          .vece = MO_64 },
   4000    };
   4001    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4002}
   4003
   4004void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
   4005{
   4006    TCGv_i32 lval = tcg_temp_new_i32();
   4007    TCGv_i32 rval = tcg_temp_new_i32();
   4008    TCGv_i32 lsh = tcg_temp_new_i32();
   4009    TCGv_i32 rsh = tcg_temp_new_i32();
   4010    TCGv_i32 zero = tcg_const_i32(0);
   4011    TCGv_i32 max = tcg_const_i32(32);
   4012
   4013    /*
   4014     * Rely on the TCG guarantee that out of range shifts produce
   4015     * unspecified results, not undefined behaviour (i.e. no trap).
   4016     * Discard out-of-range results after the fact.
   4017     */
   4018    tcg_gen_ext8s_i32(lsh, shift);
   4019    tcg_gen_neg_i32(rsh, lsh);
   4020    tcg_gen_shl_i32(lval, src, lsh);
   4021    tcg_gen_shr_i32(rval, src, rsh);
   4022    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
   4023    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
   4024
   4025    tcg_temp_free_i32(lval);
   4026    tcg_temp_free_i32(rval);
   4027    tcg_temp_free_i32(lsh);
   4028    tcg_temp_free_i32(rsh);
   4029    tcg_temp_free_i32(zero);
   4030    tcg_temp_free_i32(max);
   4031}
   4032
   4033void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
   4034{
   4035    TCGv_i64 lval = tcg_temp_new_i64();
   4036    TCGv_i64 rval = tcg_temp_new_i64();
   4037    TCGv_i64 lsh = tcg_temp_new_i64();
   4038    TCGv_i64 rsh = tcg_temp_new_i64();
   4039    TCGv_i64 zero = tcg_const_i64(0);
   4040    TCGv_i64 max = tcg_const_i64(64);
   4041
   4042    /*
   4043     * Rely on the TCG guarantee that out of range shifts produce
   4044     * unspecified results, not undefined behaviour (i.e. no trap).
   4045     * Discard out-of-range results after the fact.
   4046     */
   4047    tcg_gen_ext8s_i64(lsh, shift);
   4048    tcg_gen_neg_i64(rsh, lsh);
   4049    tcg_gen_shl_i64(lval, src, lsh);
   4050    tcg_gen_shr_i64(rval, src, rsh);
   4051    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
   4052    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
   4053
   4054    tcg_temp_free_i64(lval);
   4055    tcg_temp_free_i64(rval);
   4056    tcg_temp_free_i64(lsh);
   4057    tcg_temp_free_i64(rsh);
   4058    tcg_temp_free_i64(zero);
   4059    tcg_temp_free_i64(max);
   4060}
   4061
   4062static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
   4063                         TCGv_vec src, TCGv_vec shift)
   4064{
   4065    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
   4066    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
   4067    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
   4068    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
   4069    TCGv_vec msk, max;
   4070
   4071    tcg_gen_neg_vec(vece, rsh, shift);
   4072    if (vece == MO_8) {
   4073        tcg_gen_mov_vec(lsh, shift);
   4074    } else {
   4075        msk = tcg_temp_new_vec_matching(dst);
   4076        tcg_gen_dupi_vec(vece, msk, 0xff);
   4077        tcg_gen_and_vec(vece, lsh, shift, msk);
   4078        tcg_gen_and_vec(vece, rsh, rsh, msk);
   4079        tcg_temp_free_vec(msk);
   4080    }
   4081
   4082    /*
   4083     * Rely on the TCG guarantee that out of range shifts produce
   4084     * unspecified results, not undefined behaviour (i.e. no trap).
   4085     * Discard out-of-range results after the fact.
   4086     */
   4087    tcg_gen_shlv_vec(vece, lval, src, lsh);
   4088    tcg_gen_shrv_vec(vece, rval, src, rsh);
   4089
   4090    max = tcg_temp_new_vec_matching(dst);
   4091    tcg_gen_dupi_vec(vece, max, 8 << vece);
   4092
   4093    /*
   4094     * The choice of LT (signed) and GEU (unsigned) are biased toward
   4095     * the instructions of the x86_64 host.  For MO_8, the whole byte
   4096     * is significant so we must use an unsigned compare; otherwise we
   4097     * have already masked to a byte and so a signed compare works.
   4098     * Other tcg hosts have a full set of comparisons and do not care.
   4099     */
   4100    if (vece == MO_8) {
   4101        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
   4102        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
   4103        tcg_gen_andc_vec(vece, lval, lval, lsh);
   4104        tcg_gen_andc_vec(vece, rval, rval, rsh);
   4105    } else {
   4106        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
   4107        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
   4108        tcg_gen_and_vec(vece, lval, lval, lsh);
   4109        tcg_gen_and_vec(vece, rval, rval, rsh);
   4110    }
   4111    tcg_gen_or_vec(vece, dst, lval, rval);
   4112
   4113    tcg_temp_free_vec(max);
   4114    tcg_temp_free_vec(lval);
   4115    tcg_temp_free_vec(rval);
   4116    tcg_temp_free_vec(lsh);
   4117    tcg_temp_free_vec(rsh);
   4118}
   4119
   4120void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4121                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4122{
   4123    static const TCGOpcode vecop_list[] = {
   4124        INDEX_op_neg_vec, INDEX_op_shlv_vec,
   4125        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
   4126    };
   4127    static const GVecGen3 ops[4] = {
   4128        { .fniv = gen_ushl_vec,
   4129          .fno = gen_helper_gvec_ushl_b,
   4130          .opt_opc = vecop_list,
   4131          .vece = MO_8 },
   4132        { .fniv = gen_ushl_vec,
   4133          .fno = gen_helper_gvec_ushl_h,
   4134          .opt_opc = vecop_list,
   4135          .vece = MO_16 },
   4136        { .fni4 = gen_ushl_i32,
   4137          .fniv = gen_ushl_vec,
   4138          .opt_opc = vecop_list,
   4139          .vece = MO_32 },
   4140        { .fni8 = gen_ushl_i64,
   4141          .fniv = gen_ushl_vec,
   4142          .opt_opc = vecop_list,
   4143          .vece = MO_64 },
   4144    };
   4145    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4146}
   4147
   4148void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
   4149{
   4150    TCGv_i32 lval = tcg_temp_new_i32();
   4151    TCGv_i32 rval = tcg_temp_new_i32();
   4152    TCGv_i32 lsh = tcg_temp_new_i32();
   4153    TCGv_i32 rsh = tcg_temp_new_i32();
   4154    TCGv_i32 zero = tcg_const_i32(0);
   4155    TCGv_i32 max = tcg_const_i32(31);
   4156
   4157    /*
   4158     * Rely on the TCG guarantee that out of range shifts produce
   4159     * unspecified results, not undefined behaviour (i.e. no trap).
   4160     * Discard out-of-range results after the fact.
   4161     */
   4162    tcg_gen_ext8s_i32(lsh, shift);
   4163    tcg_gen_neg_i32(rsh, lsh);
   4164    tcg_gen_shl_i32(lval, src, lsh);
   4165    tcg_gen_umin_i32(rsh, rsh, max);
   4166    tcg_gen_sar_i32(rval, src, rsh);
   4167    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
   4168    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
   4169
   4170    tcg_temp_free_i32(lval);
   4171    tcg_temp_free_i32(rval);
   4172    tcg_temp_free_i32(lsh);
   4173    tcg_temp_free_i32(rsh);
   4174    tcg_temp_free_i32(zero);
   4175    tcg_temp_free_i32(max);
   4176}
   4177
   4178void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
   4179{
   4180    TCGv_i64 lval = tcg_temp_new_i64();
   4181    TCGv_i64 rval = tcg_temp_new_i64();
   4182    TCGv_i64 lsh = tcg_temp_new_i64();
   4183    TCGv_i64 rsh = tcg_temp_new_i64();
   4184    TCGv_i64 zero = tcg_const_i64(0);
   4185    TCGv_i64 max = tcg_const_i64(63);
   4186
   4187    /*
   4188     * Rely on the TCG guarantee that out of range shifts produce
   4189     * unspecified results, not undefined behaviour (i.e. no trap).
   4190     * Discard out-of-range results after the fact.
   4191     */
   4192    tcg_gen_ext8s_i64(lsh, shift);
   4193    tcg_gen_neg_i64(rsh, lsh);
   4194    tcg_gen_shl_i64(lval, src, lsh);
   4195    tcg_gen_umin_i64(rsh, rsh, max);
   4196    tcg_gen_sar_i64(rval, src, rsh);
   4197    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
   4198    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
   4199
   4200    tcg_temp_free_i64(lval);
   4201    tcg_temp_free_i64(rval);
   4202    tcg_temp_free_i64(lsh);
   4203    tcg_temp_free_i64(rsh);
   4204    tcg_temp_free_i64(zero);
   4205    tcg_temp_free_i64(max);
   4206}
   4207
   4208static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
   4209                         TCGv_vec src, TCGv_vec shift)
   4210{
   4211    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
   4212    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
   4213    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
   4214    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
   4215    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
   4216
   4217    /*
   4218     * Rely on the TCG guarantee that out of range shifts produce
   4219     * unspecified results, not undefined behaviour (i.e. no trap).
   4220     * Discard out-of-range results after the fact.
   4221     */
   4222    tcg_gen_neg_vec(vece, rsh, shift);
   4223    if (vece == MO_8) {
   4224        tcg_gen_mov_vec(lsh, shift);
   4225    } else {
   4226        tcg_gen_dupi_vec(vece, tmp, 0xff);
   4227        tcg_gen_and_vec(vece, lsh, shift, tmp);
   4228        tcg_gen_and_vec(vece, rsh, rsh, tmp);
   4229    }
   4230
   4231    /* Bound rsh so out of bound right shift gets -1.  */
   4232    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
   4233    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
   4234    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
   4235
   4236    tcg_gen_shlv_vec(vece, lval, src, lsh);
   4237    tcg_gen_sarv_vec(vece, rval, src, rsh);
   4238
   4239    /* Select in-bound left shift.  */
   4240    tcg_gen_andc_vec(vece, lval, lval, tmp);
   4241
   4242    /* Select between left and right shift.  */
   4243    if (vece == MO_8) {
   4244        tcg_gen_dupi_vec(vece, tmp, 0);
   4245        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
   4246    } else {
   4247        tcg_gen_dupi_vec(vece, tmp, 0x80);
   4248        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
   4249    }
   4250
   4251    tcg_temp_free_vec(lval);
   4252    tcg_temp_free_vec(rval);
   4253    tcg_temp_free_vec(lsh);
   4254    tcg_temp_free_vec(rsh);
   4255    tcg_temp_free_vec(tmp);
   4256}
   4257
   4258void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4259                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4260{
   4261    static const TCGOpcode vecop_list[] = {
   4262        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
   4263        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
   4264    };
   4265    static const GVecGen3 ops[4] = {
   4266        { .fniv = gen_sshl_vec,
   4267          .fno = gen_helper_gvec_sshl_b,
   4268          .opt_opc = vecop_list,
   4269          .vece = MO_8 },
   4270        { .fniv = gen_sshl_vec,
   4271          .fno = gen_helper_gvec_sshl_h,
   4272          .opt_opc = vecop_list,
   4273          .vece = MO_16 },
   4274        { .fni4 = gen_sshl_i32,
   4275          .fniv = gen_sshl_vec,
   4276          .opt_opc = vecop_list,
   4277          .vece = MO_32 },
   4278        { .fni8 = gen_sshl_i64,
   4279          .fniv = gen_sshl_vec,
   4280          .opt_opc = vecop_list,
   4281          .vece = MO_64 },
   4282    };
   4283    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4284}
   4285
   4286static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4287                          TCGv_vec a, TCGv_vec b)
   4288{
   4289    TCGv_vec x = tcg_temp_new_vec_matching(t);
   4290    tcg_gen_add_vec(vece, x, a, b);
   4291    tcg_gen_usadd_vec(vece, t, a, b);
   4292    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4293    tcg_gen_or_vec(vece, sat, sat, x);
   4294    tcg_temp_free_vec(x);
   4295}
   4296
   4297void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4298                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4299{
   4300    static const TCGOpcode vecop_list[] = {
   4301        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
   4302    };
   4303    static const GVecGen4 ops[4] = {
   4304        { .fniv = gen_uqadd_vec,
   4305          .fno = gen_helper_gvec_uqadd_b,
   4306          .write_aofs = true,
   4307          .opt_opc = vecop_list,
   4308          .vece = MO_8 },
   4309        { .fniv = gen_uqadd_vec,
   4310          .fno = gen_helper_gvec_uqadd_h,
   4311          .write_aofs = true,
   4312          .opt_opc = vecop_list,
   4313          .vece = MO_16 },
   4314        { .fniv = gen_uqadd_vec,
   4315          .fno = gen_helper_gvec_uqadd_s,
   4316          .write_aofs = true,
   4317          .opt_opc = vecop_list,
   4318          .vece = MO_32 },
   4319        { .fniv = gen_uqadd_vec,
   4320          .fno = gen_helper_gvec_uqadd_d,
   4321          .write_aofs = true,
   4322          .opt_opc = vecop_list,
   4323          .vece = MO_64 },
   4324    };
   4325    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4326                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4327}
   4328
   4329static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4330                          TCGv_vec a, TCGv_vec b)
   4331{
   4332    TCGv_vec x = tcg_temp_new_vec_matching(t);
   4333    tcg_gen_add_vec(vece, x, a, b);
   4334    tcg_gen_ssadd_vec(vece, t, a, b);
   4335    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4336    tcg_gen_or_vec(vece, sat, sat, x);
   4337    tcg_temp_free_vec(x);
   4338}
   4339
   4340void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4341                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4342{
   4343    static const TCGOpcode vecop_list[] = {
   4344        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
   4345    };
   4346    static const GVecGen4 ops[4] = {
   4347        { .fniv = gen_sqadd_vec,
   4348          .fno = gen_helper_gvec_sqadd_b,
   4349          .opt_opc = vecop_list,
   4350          .write_aofs = true,
   4351          .vece = MO_8 },
   4352        { .fniv = gen_sqadd_vec,
   4353          .fno = gen_helper_gvec_sqadd_h,
   4354          .opt_opc = vecop_list,
   4355          .write_aofs = true,
   4356          .vece = MO_16 },
   4357        { .fniv = gen_sqadd_vec,
   4358          .fno = gen_helper_gvec_sqadd_s,
   4359          .opt_opc = vecop_list,
   4360          .write_aofs = true,
   4361          .vece = MO_32 },
   4362        { .fniv = gen_sqadd_vec,
   4363          .fno = gen_helper_gvec_sqadd_d,
   4364          .opt_opc = vecop_list,
   4365          .write_aofs = true,
   4366          .vece = MO_64 },
   4367    };
   4368    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4369                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4370}
   4371
   4372static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4373                          TCGv_vec a, TCGv_vec b)
   4374{
   4375    TCGv_vec x = tcg_temp_new_vec_matching(t);
   4376    tcg_gen_sub_vec(vece, x, a, b);
   4377    tcg_gen_ussub_vec(vece, t, a, b);
   4378    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4379    tcg_gen_or_vec(vece, sat, sat, x);
   4380    tcg_temp_free_vec(x);
   4381}
   4382
   4383void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4384                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4385{
   4386    static const TCGOpcode vecop_list[] = {
   4387        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
   4388    };
   4389    static const GVecGen4 ops[4] = {
   4390        { .fniv = gen_uqsub_vec,
   4391          .fno = gen_helper_gvec_uqsub_b,
   4392          .opt_opc = vecop_list,
   4393          .write_aofs = true,
   4394          .vece = MO_8 },
   4395        { .fniv = gen_uqsub_vec,
   4396          .fno = gen_helper_gvec_uqsub_h,
   4397          .opt_opc = vecop_list,
   4398          .write_aofs = true,
   4399          .vece = MO_16 },
   4400        { .fniv = gen_uqsub_vec,
   4401          .fno = gen_helper_gvec_uqsub_s,
   4402          .opt_opc = vecop_list,
   4403          .write_aofs = true,
   4404          .vece = MO_32 },
   4405        { .fniv = gen_uqsub_vec,
   4406          .fno = gen_helper_gvec_uqsub_d,
   4407          .opt_opc = vecop_list,
   4408          .write_aofs = true,
   4409          .vece = MO_64 },
   4410    };
   4411    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4412                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4413}
   4414
   4415static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4416                          TCGv_vec a, TCGv_vec b)
   4417{
   4418    TCGv_vec x = tcg_temp_new_vec_matching(t);
   4419    tcg_gen_sub_vec(vece, x, a, b);
   4420    tcg_gen_sssub_vec(vece, t, a, b);
   4421    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4422    tcg_gen_or_vec(vece, sat, sat, x);
   4423    tcg_temp_free_vec(x);
   4424}
   4425
   4426void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4427                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4428{
   4429    static const TCGOpcode vecop_list[] = {
   4430        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
   4431    };
   4432    static const GVecGen4 ops[4] = {
   4433        { .fniv = gen_sqsub_vec,
   4434          .fno = gen_helper_gvec_sqsub_b,
   4435          .opt_opc = vecop_list,
   4436          .write_aofs = true,
   4437          .vece = MO_8 },
   4438        { .fniv = gen_sqsub_vec,
   4439          .fno = gen_helper_gvec_sqsub_h,
   4440          .opt_opc = vecop_list,
   4441          .write_aofs = true,
   4442          .vece = MO_16 },
   4443        { .fniv = gen_sqsub_vec,
   4444          .fno = gen_helper_gvec_sqsub_s,
   4445          .opt_opc = vecop_list,
   4446          .write_aofs = true,
   4447          .vece = MO_32 },
   4448        { .fniv = gen_sqsub_vec,
   4449          .fno = gen_helper_gvec_sqsub_d,
   4450          .opt_opc = vecop_list,
   4451          .write_aofs = true,
   4452          .vece = MO_64 },
   4453    };
   4454    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4455                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4456}
   4457
   4458static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4459{
   4460    TCGv_i32 t = tcg_temp_new_i32();
   4461
   4462    tcg_gen_sub_i32(t, a, b);
   4463    tcg_gen_sub_i32(d, b, a);
   4464    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
   4465    tcg_temp_free_i32(t);
   4466}
   4467
   4468static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4469{
   4470    TCGv_i64 t = tcg_temp_new_i64();
   4471
   4472    tcg_gen_sub_i64(t, a, b);
   4473    tcg_gen_sub_i64(d, b, a);
   4474    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
   4475    tcg_temp_free_i64(t);
   4476}
   4477
   4478static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4479{
   4480    TCGv_vec t = tcg_temp_new_vec_matching(d);
   4481
   4482    tcg_gen_smin_vec(vece, t, a, b);
   4483    tcg_gen_smax_vec(vece, d, a, b);
   4484    tcg_gen_sub_vec(vece, d, d, t);
   4485    tcg_temp_free_vec(t);
   4486}
   4487
   4488void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4489                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4490{
   4491    static const TCGOpcode vecop_list[] = {
   4492        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
   4493    };
   4494    static const GVecGen3 ops[4] = {
   4495        { .fniv = gen_sabd_vec,
   4496          .fno = gen_helper_gvec_sabd_b,
   4497          .opt_opc = vecop_list,
   4498          .vece = MO_8 },
   4499        { .fniv = gen_sabd_vec,
   4500          .fno = gen_helper_gvec_sabd_h,
   4501          .opt_opc = vecop_list,
   4502          .vece = MO_16 },
   4503        { .fni4 = gen_sabd_i32,
   4504          .fniv = gen_sabd_vec,
   4505          .fno = gen_helper_gvec_sabd_s,
   4506          .opt_opc = vecop_list,
   4507          .vece = MO_32 },
   4508        { .fni8 = gen_sabd_i64,
   4509          .fniv = gen_sabd_vec,
   4510          .fno = gen_helper_gvec_sabd_d,
   4511          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4512          .opt_opc = vecop_list,
   4513          .vece = MO_64 },
   4514    };
   4515    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4516}
   4517
   4518static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4519{
   4520    TCGv_i32 t = tcg_temp_new_i32();
   4521
   4522    tcg_gen_sub_i32(t, a, b);
   4523    tcg_gen_sub_i32(d, b, a);
   4524    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
   4525    tcg_temp_free_i32(t);
   4526}
   4527
   4528static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4529{
   4530    TCGv_i64 t = tcg_temp_new_i64();
   4531
   4532    tcg_gen_sub_i64(t, a, b);
   4533    tcg_gen_sub_i64(d, b, a);
   4534    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
   4535    tcg_temp_free_i64(t);
   4536}
   4537
   4538static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4539{
   4540    TCGv_vec t = tcg_temp_new_vec_matching(d);
   4541
   4542    tcg_gen_umin_vec(vece, t, a, b);
   4543    tcg_gen_umax_vec(vece, d, a, b);
   4544    tcg_gen_sub_vec(vece, d, d, t);
   4545    tcg_temp_free_vec(t);
   4546}
   4547
   4548void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4549                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4550{
   4551    static const TCGOpcode vecop_list[] = {
   4552        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
   4553    };
   4554    static const GVecGen3 ops[4] = {
   4555        { .fniv = gen_uabd_vec,
   4556          .fno = gen_helper_gvec_uabd_b,
   4557          .opt_opc = vecop_list,
   4558          .vece = MO_8 },
   4559        { .fniv = gen_uabd_vec,
   4560          .fno = gen_helper_gvec_uabd_h,
   4561          .opt_opc = vecop_list,
   4562          .vece = MO_16 },
   4563        { .fni4 = gen_uabd_i32,
   4564          .fniv = gen_uabd_vec,
   4565          .fno = gen_helper_gvec_uabd_s,
   4566          .opt_opc = vecop_list,
   4567          .vece = MO_32 },
   4568        { .fni8 = gen_uabd_i64,
   4569          .fniv = gen_uabd_vec,
   4570          .fno = gen_helper_gvec_uabd_d,
   4571          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4572          .opt_opc = vecop_list,
   4573          .vece = MO_64 },
   4574    };
   4575    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4576}
   4577
   4578static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4579{
   4580    TCGv_i32 t = tcg_temp_new_i32();
   4581    gen_sabd_i32(t, a, b);
   4582    tcg_gen_add_i32(d, d, t);
   4583    tcg_temp_free_i32(t);
   4584}
   4585
   4586static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4587{
   4588    TCGv_i64 t = tcg_temp_new_i64();
   4589    gen_sabd_i64(t, a, b);
   4590    tcg_gen_add_i64(d, d, t);
   4591    tcg_temp_free_i64(t);
   4592}
   4593
   4594static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4595{
   4596    TCGv_vec t = tcg_temp_new_vec_matching(d);
   4597    gen_sabd_vec(vece, t, a, b);
   4598    tcg_gen_add_vec(vece, d, d, t);
   4599    tcg_temp_free_vec(t);
   4600}
   4601
   4602void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4603                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4604{
   4605    static const TCGOpcode vecop_list[] = {
   4606        INDEX_op_sub_vec, INDEX_op_add_vec,
   4607        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
   4608    };
   4609    static const GVecGen3 ops[4] = {
   4610        { .fniv = gen_saba_vec,
   4611          .fno = gen_helper_gvec_saba_b,
   4612          .opt_opc = vecop_list,
   4613          .load_dest = true,
   4614          .vece = MO_8 },
   4615        { .fniv = gen_saba_vec,
   4616          .fno = gen_helper_gvec_saba_h,
   4617          .opt_opc = vecop_list,
   4618          .load_dest = true,
   4619          .vece = MO_16 },
   4620        { .fni4 = gen_saba_i32,
   4621          .fniv = gen_saba_vec,
   4622          .fno = gen_helper_gvec_saba_s,
   4623          .opt_opc = vecop_list,
   4624          .load_dest = true,
   4625          .vece = MO_32 },
   4626        { .fni8 = gen_saba_i64,
   4627          .fniv = gen_saba_vec,
   4628          .fno = gen_helper_gvec_saba_d,
   4629          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4630          .opt_opc = vecop_list,
   4631          .load_dest = true,
   4632          .vece = MO_64 },
   4633    };
   4634    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4635}
   4636
   4637static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4638{
   4639    TCGv_i32 t = tcg_temp_new_i32();
   4640    gen_uabd_i32(t, a, b);
   4641    tcg_gen_add_i32(d, d, t);
   4642    tcg_temp_free_i32(t);
   4643}
   4644
   4645static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4646{
   4647    TCGv_i64 t = tcg_temp_new_i64();
   4648    gen_uabd_i64(t, a, b);
   4649    tcg_gen_add_i64(d, d, t);
   4650    tcg_temp_free_i64(t);
   4651}
   4652
   4653static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4654{
   4655    TCGv_vec t = tcg_temp_new_vec_matching(d);
   4656    gen_uabd_vec(vece, t, a, b);
   4657    tcg_gen_add_vec(vece, d, d, t);
   4658    tcg_temp_free_vec(t);
   4659}
   4660
   4661void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4662                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4663{
   4664    static const TCGOpcode vecop_list[] = {
   4665        INDEX_op_sub_vec, INDEX_op_add_vec,
   4666        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
   4667    };
   4668    static const GVecGen3 ops[4] = {
   4669        { .fniv = gen_uaba_vec,
   4670          .fno = gen_helper_gvec_uaba_b,
   4671          .opt_opc = vecop_list,
   4672          .load_dest = true,
   4673          .vece = MO_8 },
   4674        { .fniv = gen_uaba_vec,
   4675          .fno = gen_helper_gvec_uaba_h,
   4676          .opt_opc = vecop_list,
   4677          .load_dest = true,
   4678          .vece = MO_16 },
   4679        { .fni4 = gen_uaba_i32,
   4680          .fniv = gen_uaba_vec,
   4681          .fno = gen_helper_gvec_uaba_s,
   4682          .opt_opc = vecop_list,
   4683          .load_dest = true,
   4684          .vece = MO_32 },
   4685        { .fni8 = gen_uaba_i64,
   4686          .fniv = gen_uaba_vec,
   4687          .fno = gen_helper_gvec_uaba_d,
   4688          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4689          .opt_opc = vecop_list,
   4690          .load_dest = true,
   4691          .vece = MO_64 },
   4692    };
   4693    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4694}
   4695
   4696static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
   4697                           int opc1, int crn, int crm, int opc2,
   4698                           bool isread, int rt, int rt2)
   4699{
   4700    const ARMCPRegInfo *ri;
   4701
   4702    ri = get_arm_cp_reginfo(s->cp_regs,
   4703            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
   4704    if (ri) {
   4705        bool need_exit_tb;
   4706
   4707        /* Check access permissions */
   4708        if (!cp_access_ok(s->current_el, ri, isread)) {
   4709            unallocated_encoding(s);
   4710            return;
   4711        }
   4712
   4713        if (s->hstr_active || ri->accessfn ||
   4714            (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
   4715            /* Emit code to perform further access permissions checks at
   4716             * runtime; this may result in an exception.
   4717             * Note that on XScale all cp0..c13 registers do an access check
   4718             * call in order to handle c15_cpar.
   4719             */
   4720            TCGv_ptr tmpptr;
   4721            TCGv_i32 tcg_syn, tcg_isread;
   4722            uint32_t syndrome;
   4723
   4724            /* Note that since we are an implementation which takes an
   4725             * exception on a trapped conditional instruction only if the
   4726             * instruction passes its condition code check, we can take
   4727             * advantage of the clause in the ARM ARM that allows us to set
   4728             * the COND field in the instruction to 0xE in all cases.
   4729             * We could fish the actual condition out of the insn (ARM)
   4730             * or the condexec bits (Thumb) but it isn't necessary.
   4731             */
   4732            switch (cpnum) {
   4733            case 14:
   4734                if (is64) {
   4735                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
   4736                                                 isread, false);
   4737                } else {
   4738                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
   4739                                                rt, isread, false);
   4740                }
   4741                break;
   4742            case 15:
   4743                if (is64) {
   4744                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
   4745                                                 isread, false);
   4746                } else {
   4747                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
   4748                                                rt, isread, false);
   4749                }
   4750                break;
   4751            default:
   4752                /* ARMv8 defines that only coprocessors 14 and 15 exist,
   4753                 * so this can only happen if this is an ARMv7 or earlier CPU,
   4754                 * in which case the syndrome information won't actually be
   4755                 * guest visible.
   4756                 */
   4757                assert(!arm_dc_feature(s, ARM_FEATURE_V8));
   4758                syndrome = syn_uncategorized();
   4759                break;
   4760            }
   4761
   4762            gen_set_condexec(s);
   4763            gen_set_pc_im(s, s->pc_curr);
   4764            tmpptr = tcg_const_ptr(ri);
   4765            tcg_syn = tcg_const_i32(syndrome);
   4766            tcg_isread = tcg_const_i32(isread);
   4767            gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
   4768                                           tcg_isread);
   4769            tcg_temp_free_ptr(tmpptr);
   4770            tcg_temp_free_i32(tcg_syn);
   4771            tcg_temp_free_i32(tcg_isread);
   4772        } else if (ri->type & ARM_CP_RAISES_EXC) {
   4773            /*
   4774             * The readfn or writefn might raise an exception;
   4775             * synchronize the CPU state in case it does.
   4776             */
   4777            gen_set_condexec(s);
   4778            gen_set_pc_im(s, s->pc_curr);
   4779        }
   4780
   4781        /* Handle special cases first */
   4782        switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
   4783        case ARM_CP_NOP:
   4784            return;
   4785        case ARM_CP_WFI:
   4786            if (isread) {
   4787                unallocated_encoding(s);
   4788                return;
   4789            }
   4790            gen_set_pc_im(s, s->base.pc_next);
   4791            s->base.is_jmp = DISAS_WFI;
   4792            return;
   4793        default:
   4794            break;
   4795        }
   4796
   4797        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
   4798            gen_io_start();
   4799        }
   4800
   4801        if (isread) {
   4802            /* Read */
   4803            if (is64) {
   4804                TCGv_i64 tmp64;
   4805                TCGv_i32 tmp;
   4806                if (ri->type & ARM_CP_CONST) {
   4807                    tmp64 = tcg_const_i64(ri->resetvalue);
   4808                } else if (ri->readfn) {
   4809                    TCGv_ptr tmpptr;
   4810                    tmp64 = tcg_temp_new_i64();
   4811                    tmpptr = tcg_const_ptr(ri);
   4812                    gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
   4813                    tcg_temp_free_ptr(tmpptr);
   4814                } else {
   4815                    tmp64 = tcg_temp_new_i64();
   4816                    tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
   4817                }
   4818                tmp = tcg_temp_new_i32();
   4819                tcg_gen_extrl_i64_i32(tmp, tmp64);
   4820                store_reg(s, rt, tmp);
   4821                tmp = tcg_temp_new_i32();
   4822                tcg_gen_extrh_i64_i32(tmp, tmp64);
   4823                tcg_temp_free_i64(tmp64);
   4824                store_reg(s, rt2, tmp);
   4825            } else {
   4826                TCGv_i32 tmp;
   4827                if (ri->type & ARM_CP_CONST) {
   4828                    tmp = tcg_const_i32(ri->resetvalue);
   4829                } else if (ri->readfn) {
   4830                    TCGv_ptr tmpptr;
   4831                    tmp = tcg_temp_new_i32();
   4832                    tmpptr = tcg_const_ptr(ri);
   4833                    gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
   4834                    tcg_temp_free_ptr(tmpptr);
   4835                } else {
   4836                    tmp = load_cpu_offset(ri->fieldoffset);
   4837                }
   4838                if (rt == 15) {
   4839                    /* Destination register of r15 for 32 bit loads sets
   4840                     * the condition codes from the high 4 bits of the value
   4841                     */
   4842                    gen_set_nzcv(tmp);
   4843                    tcg_temp_free_i32(tmp);
   4844                } else {
   4845                    store_reg(s, rt, tmp);
   4846                }
   4847            }
   4848        } else {
   4849            /* Write */
   4850            if (ri->type & ARM_CP_CONST) {
   4851                /* If not forbidden by access permissions, treat as WI */
   4852                return;
   4853            }
   4854
   4855            if (is64) {
   4856                TCGv_i32 tmplo, tmphi;
   4857                TCGv_i64 tmp64 = tcg_temp_new_i64();
   4858                tmplo = load_reg(s, rt);
   4859                tmphi = load_reg(s, rt2);
   4860                tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
   4861                tcg_temp_free_i32(tmplo);
   4862                tcg_temp_free_i32(tmphi);
   4863                if (ri->writefn) {
   4864                    TCGv_ptr tmpptr = tcg_const_ptr(ri);
   4865                    gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
   4866                    tcg_temp_free_ptr(tmpptr);
   4867                } else {
   4868                    tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
   4869                }
   4870                tcg_temp_free_i64(tmp64);
   4871            } else {
   4872                if (ri->writefn) {
   4873                    TCGv_i32 tmp;
   4874                    TCGv_ptr tmpptr;
   4875                    tmp = load_reg(s, rt);
   4876                    tmpptr = tcg_const_ptr(ri);
   4877                    gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
   4878                    tcg_temp_free_ptr(tmpptr);
   4879                    tcg_temp_free_i32(tmp);
   4880                } else {
   4881                    TCGv_i32 tmp = load_reg(s, rt);
   4882                    store_cpu_offset(tmp, ri->fieldoffset);
   4883                }
   4884            }
   4885        }
   4886
   4887        /* I/O operations must end the TB here (whether read or write) */
   4888        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
   4889                        (ri->type & ARM_CP_IO));
   4890
   4891        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
   4892            /*
   4893             * A write to any coprocessor register that ends a TB
   4894             * must rebuild the hflags for the next TB.
   4895             */
   4896            TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
   4897            if (arm_dc_feature(s, ARM_FEATURE_M)) {
   4898                gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
   4899            } else {
   4900                if (ri->type & ARM_CP_NEWEL) {
   4901                    gen_helper_rebuild_hflags_a32_newel(cpu_env);
   4902                } else {
   4903                    gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
   4904                }
   4905            }
   4906            tcg_temp_free_i32(tcg_el);
   4907            /*
   4908             * We default to ending the TB on a coprocessor register write,
   4909             * but allow this to be suppressed by the register definition
   4910             * (usually only necessary to work around guest bugs).
   4911             */
   4912            need_exit_tb = true;
   4913        }
   4914        if (need_exit_tb) {
   4915            gen_lookup_tb(s);
   4916        }
   4917
   4918        return;
   4919    }
   4920
   4921    /* Unknown register; this might be a guest error or a QEMU
   4922     * unimplemented feature.
   4923     */
   4924    if (is64) {
   4925        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
   4926                      "64 bit system register cp:%d opc1: %d crm:%d "
   4927                      "(%s)\n",
   4928                      isread ? "read" : "write", cpnum, opc1, crm,
   4929                      s->ns ? "non-secure" : "secure");
   4930    } else {
   4931        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
   4932                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
   4933                      "(%s)\n",
   4934                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
   4935                      s->ns ? "non-secure" : "secure");
   4936    }
   4937
   4938    unallocated_encoding(s);
   4939    return;
   4940}
   4941
   4942/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
   4943static void disas_xscale_insn(DisasContext *s, uint32_t insn)
   4944{
   4945    int cpnum = (insn >> 8) & 0xf;
   4946
   4947    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
   4948        unallocated_encoding(s);
   4949    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
   4950        if (disas_iwmmxt_insn(s, insn)) {
   4951            unallocated_encoding(s);
   4952        }
   4953    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
   4954        if (disas_dsp_insn(s, insn)) {
   4955            unallocated_encoding(s);
   4956        }
   4957    }
   4958}
   4959
   4960/* Store a 64-bit value to a register pair.  Clobbers val.  */
   4961static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
   4962{
   4963    TCGv_i32 tmp;
   4964    tmp = tcg_temp_new_i32();
   4965    tcg_gen_extrl_i64_i32(tmp, val);
   4966    store_reg(s, rlow, tmp);
   4967    tmp = tcg_temp_new_i32();
   4968    tcg_gen_extrh_i64_i32(tmp, val);
   4969    store_reg(s, rhigh, tmp);
   4970}
   4971
   4972/* load and add a 64-bit value from a register pair.  */
   4973static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
   4974{
   4975    TCGv_i64 tmp;
   4976    TCGv_i32 tmpl;
   4977    TCGv_i32 tmph;
   4978
   4979    /* Load 64-bit value rd:rn.  */
   4980    tmpl = load_reg(s, rlow);
   4981    tmph = load_reg(s, rhigh);
   4982    tmp = tcg_temp_new_i64();
   4983    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
   4984    tcg_temp_free_i32(tmpl);
   4985    tcg_temp_free_i32(tmph);
   4986    tcg_gen_add_i64(val, val, tmp);
   4987    tcg_temp_free_i64(tmp);
   4988}
   4989
   4990/* Set N and Z flags from hi|lo.  */
   4991static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
   4992{
   4993    tcg_gen_mov_i32(cpu_NF, hi);
   4994    tcg_gen_or_i32(cpu_ZF, lo, hi);
   4995}
   4996
   4997/* Load/Store exclusive instructions are implemented by remembering
   4998   the value/address loaded, and seeing if these are the same
   4999   when the store is performed.  This should be sufficient to implement
   5000   the architecturally mandated semantics, and avoids having to monitor
   5001   regular stores.  The compare vs the remembered value is done during
   5002   the cmpxchg operation, but we must compare the addresses manually.  */
   5003static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
   5004                               TCGv_i32 addr, int size)
   5005{
   5006    TCGv_i32 tmp = tcg_temp_new_i32();
   5007    MemOp opc = size | MO_ALIGN | s->be_data;
   5008
   5009    s->is_ldex = true;
   5010
   5011    if (size == 3) {
   5012        TCGv_i32 tmp2 = tcg_temp_new_i32();
   5013        TCGv_i64 t64 = tcg_temp_new_i64();
   5014
   5015        /*
   5016         * For AArch32, architecturally the 32-bit word at the lowest
   5017         * address is always Rt and the one at addr+4 is Rt2, even if
   5018         * the CPU is big-endian. That means we don't want to do a
   5019         * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
   5020         * architecturally 64-bit access, but instead do a 64-bit access
   5021         * using MO_BE if appropriate and then split the two halves.
   5022         */
   5023        TCGv taddr = gen_aa32_addr(s, addr, opc);
   5024
   5025        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
   5026        tcg_temp_free(taddr);
   5027        tcg_gen_mov_i64(cpu_exclusive_val, t64);
   5028        if (s->be_data == MO_BE) {
   5029            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
   5030        } else {
   5031            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
   5032        }
   5033        tcg_temp_free_i64(t64);
   5034
   5035        store_reg(s, rt2, tmp2);
   5036    } else {
   5037        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
   5038        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
   5039    }
   5040
   5041    store_reg(s, rt, tmp);
   5042    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
   5043}
   5044
   5045static void gen_clrex(DisasContext *s)
   5046{
   5047    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
   5048}
   5049
   5050static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
   5051                                TCGv_i32 addr, int size)
   5052{
   5053    TCGv_i32 t0, t1, t2;
   5054    TCGv_i64 extaddr;
   5055    TCGv taddr;
   5056    TCGLabel *done_label;
   5057    TCGLabel *fail_label;
   5058    MemOp opc = size | MO_ALIGN | s->be_data;
   5059
   5060    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
   5061         [addr] = {Rt};
   5062         {Rd} = 0;
   5063       } else {
   5064         {Rd} = 1;
   5065       } */
   5066    fail_label = gen_new_label();
   5067    done_label = gen_new_label();
   5068    extaddr = tcg_temp_new_i64();
   5069    tcg_gen_extu_i32_i64(extaddr, addr);
   5070    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
   5071    tcg_temp_free_i64(extaddr);
   5072
   5073    taddr = gen_aa32_addr(s, addr, opc);
   5074    t0 = tcg_temp_new_i32();
   5075    t1 = load_reg(s, rt);
   5076    if (size == 3) {
   5077        TCGv_i64 o64 = tcg_temp_new_i64();
   5078        TCGv_i64 n64 = tcg_temp_new_i64();
   5079
   5080        t2 = load_reg(s, rt2);
   5081
   5082        /*
   5083         * For AArch32, architecturally the 32-bit word at the lowest
   5084         * address is always Rt and the one at addr+4 is Rt2, even if
   5085         * the CPU is big-endian. Since we're going to treat this as a
   5086         * single 64-bit BE store, we need to put the two halves in the
   5087         * opposite order for BE to LE, so that they end up in the right
   5088         * places.  We don't want gen_aa32_st_i64, because that checks
   5089         * SCTLR_B as if for an architectural 64-bit access.
   5090         */
   5091        if (s->be_data == MO_BE) {
   5092            tcg_gen_concat_i32_i64(n64, t2, t1);
   5093        } else {
   5094            tcg_gen_concat_i32_i64(n64, t1, t2);
   5095        }
   5096        tcg_temp_free_i32(t2);
   5097
   5098        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
   5099                                   get_mem_index(s), opc);
   5100        tcg_temp_free_i64(n64);
   5101
   5102        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
   5103        tcg_gen_extrl_i64_i32(t0, o64);
   5104
   5105        tcg_temp_free_i64(o64);
   5106    } else {
   5107        t2 = tcg_temp_new_i32();
   5108        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
   5109        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
   5110        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
   5111        tcg_temp_free_i32(t2);
   5112    }
   5113    tcg_temp_free_i32(t1);
   5114    tcg_temp_free(taddr);
   5115    tcg_gen_mov_i32(cpu_R[rd], t0);
   5116    tcg_temp_free_i32(t0);
   5117    tcg_gen_br(done_label);
   5118
   5119    gen_set_label(fail_label);
   5120    tcg_gen_movi_i32(cpu_R[rd], 1);
   5121    gen_set_label(done_label);
   5122    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
   5123}
   5124
   5125/* gen_srs:
   5126 * @env: CPUARMState
   5127 * @s: DisasContext
   5128 * @mode: mode field from insn (which stack to store to)
   5129 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
   5130 * @writeback: true if writeback bit set
   5131 *
   5132 * Generate code for the SRS (Store Return State) insn.
   5133 */
   5134static void gen_srs(DisasContext *s,
   5135                    uint32_t mode, uint32_t amode, bool writeback)
   5136{
   5137    int32_t offset;
   5138    TCGv_i32 addr, tmp;
   5139    bool undef = false;
   5140
   5141    /* SRS is:
   5142     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
   5143     *   and specified mode is monitor mode
   5144     * - UNDEFINED in Hyp mode
   5145     * - UNPREDICTABLE in User or System mode
   5146     * - UNPREDICTABLE if the specified mode is:
   5147     * -- not implemented
   5148     * -- not a valid mode number
   5149     * -- a mode that's at a higher exception level
   5150     * -- Monitor, if we are Non-secure
   5151     * For the UNPREDICTABLE cases we choose to UNDEF.
   5152     */
   5153    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
   5154        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
   5155        return;
   5156    }
   5157
   5158    if (s->current_el == 0 || s->current_el == 2) {
   5159        undef = true;
   5160    }
   5161
   5162    switch (mode) {
   5163    case ARM_CPU_MODE_USR:
   5164    case ARM_CPU_MODE_FIQ:
   5165    case ARM_CPU_MODE_IRQ:
   5166    case ARM_CPU_MODE_SVC:
   5167    case ARM_CPU_MODE_ABT:
   5168    case ARM_CPU_MODE_UND:
   5169    case ARM_CPU_MODE_SYS:
   5170        break;
   5171    case ARM_CPU_MODE_HYP:
   5172        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
   5173            undef = true;
   5174        }
   5175        break;
   5176    case ARM_CPU_MODE_MON:
   5177        /* No need to check specifically for "are we non-secure" because
   5178         * we've already made EL0 UNDEF and handled the trap for S-EL1;
   5179         * so if this isn't EL3 then we must be non-secure.
   5180         */
   5181        if (s->current_el != 3) {
   5182            undef = true;
   5183        }
   5184        break;
   5185    default:
   5186        undef = true;
   5187    }
   5188
   5189    if (undef) {
   5190        unallocated_encoding(s);
   5191        return;
   5192    }
   5193
   5194    addr = tcg_temp_new_i32();
   5195    tmp = tcg_const_i32(mode);
   5196    /* get_r13_banked() will raise an exception if called from System mode */
   5197    gen_set_condexec(s);
   5198    gen_set_pc_im(s, s->pc_curr);
   5199    gen_helper_get_r13_banked(addr, cpu_env, tmp);
   5200    tcg_temp_free_i32(tmp);
   5201    switch (amode) {
   5202    case 0: /* DA */
   5203        offset = -4;
   5204        break;
   5205    case 1: /* IA */
   5206        offset = 0;
   5207        break;
   5208    case 2: /* DB */
   5209        offset = -8;
   5210        break;
   5211    case 3: /* IB */
   5212        offset = 4;
   5213        break;
   5214    default:
   5215        abort();
   5216    }
   5217    tcg_gen_addi_i32(addr, addr, offset);
   5218    tmp = load_reg(s, 14);
   5219    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   5220    tcg_temp_free_i32(tmp);
   5221    tmp = load_cpu_field(spsr);
   5222    tcg_gen_addi_i32(addr, addr, 4);
   5223    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   5224    tcg_temp_free_i32(tmp);
   5225    if (writeback) {
   5226        switch (amode) {
   5227        case 0:
   5228            offset = -8;
   5229            break;
   5230        case 1:
   5231            offset = 4;
   5232            break;
   5233        case 2:
   5234            offset = -4;
   5235            break;
   5236        case 3:
   5237            offset = 0;
   5238            break;
   5239        default:
   5240            abort();
   5241        }
   5242        tcg_gen_addi_i32(addr, addr, offset);
   5243        tmp = tcg_const_i32(mode);
   5244        gen_helper_set_r13_banked(cpu_env, tmp, addr);
   5245        tcg_temp_free_i32(tmp);
   5246    }
   5247    tcg_temp_free_i32(addr);
   5248    s->base.is_jmp = DISAS_UPDATE_EXIT;
   5249}
   5250
   5251/* Skip this instruction if the ARM condition is false */
   5252static void arm_skip_unless(DisasContext *s, uint32_t cond)
   5253{
   5254    arm_gen_condlabel(s);
   5255    arm_gen_test_cc(cond ^ 1, s->condlabel);
   5256}
   5257
   5258
   5259/*
   5260 * Constant expanders used by T16/T32 decode
   5261 */
   5262
   5263/* Return only the rotation part of T32ExpandImm.  */
   5264static int t32_expandimm_rot(DisasContext *s, int x)
   5265{
   5266    return x & 0xc00 ? extract32(x, 7, 5) : 0;
   5267}
   5268
   5269/* Return the unrotated immediate from T32ExpandImm.  */
   5270static int t32_expandimm_imm(DisasContext *s, int x)
   5271{
   5272    int imm = extract32(x, 0, 8);
   5273
   5274    switch (extract32(x, 8, 4)) {
   5275    case 0: /* XY */
   5276        /* Nothing to do.  */
   5277        break;
   5278    case 1: /* 00XY00XY */
   5279        imm *= 0x00010001;
   5280        break;
   5281    case 2: /* XY00XY00 */
   5282        imm *= 0x01000100;
   5283        break;
   5284    case 3: /* XYXYXYXY */
   5285        imm *= 0x01010101;
   5286        break;
   5287    default:
   5288        /* Rotated constant.  */
   5289        imm |= 0x80;
   5290        break;
   5291    }
   5292    return imm;
   5293}
   5294
   5295static int t32_branch24(DisasContext *s, int x)
   5296{
   5297    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
   5298    x ^= !(x < 0) * (3 << 21);
   5299    /* Append the final zero.  */
   5300    return x << 1;
   5301}
   5302
   5303static int t16_setflags(DisasContext *s)
   5304{
   5305    return s->condexec_mask == 0;
   5306}
   5307
   5308static int t16_push_list(DisasContext *s, int x)
   5309{
   5310    return (x & 0xff) | (x & 0x100) << (14 - 8);
   5311}
   5312
   5313static int t16_pop_list(DisasContext *s, int x)
   5314{
   5315    return (x & 0xff) | (x & 0x100) << (15 - 8);
   5316}
   5317
   5318/*
   5319 * Include the generated decoders.
   5320 */
   5321
   5322#include "decode-a32.c.inc"
   5323#include "decode-a32-uncond.c.inc"
   5324#include "decode-t32.c.inc"
   5325#include "decode-t16.c.inc"
   5326
   5327static bool valid_cp(DisasContext *s, int cp)
   5328{
   5329    /*
   5330     * Return true if this coprocessor field indicates something
   5331     * that's really a possible coprocessor.
   5332     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
   5333     * and of those only cp14 and cp15 were used for registers.
   5334     * cp10 and cp11 were used for VFP and Neon, whose decode is
   5335     * dealt with elsewhere. With the advent of fp16, cp9 is also
   5336     * now part of VFP.
   5337     * For v8A and later, the encoding has been tightened so that
   5338     * only cp14 and cp15 are valid, and other values aren't considered
   5339     * to be in the coprocessor-instruction space at all. v8M still
   5340     * permits coprocessors 0..7.
   5341     * For XScale, we must not decode the XScale cp0, cp1 space as
   5342     * a standard coprocessor insn, because we want to fall through to
   5343     * the legacy disas_xscale_insn() decoder after decodetree is done.
   5344     */
   5345    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
   5346        return false;
   5347    }
   5348
   5349    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
   5350        !arm_dc_feature(s, ARM_FEATURE_M)) {
   5351        return cp >= 14;
   5352    }
   5353    return cp < 8 || cp >= 14;
   5354}
   5355
   5356static bool trans_MCR(DisasContext *s, arg_MCR *a)
   5357{
   5358    if (!valid_cp(s, a->cp)) {
   5359        return false;
   5360    }
   5361    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
   5362                   false, a->rt, 0);
   5363    return true;
   5364}
   5365
   5366static bool trans_MRC(DisasContext *s, arg_MRC *a)
   5367{
   5368    if (!valid_cp(s, a->cp)) {
   5369        return false;
   5370    }
   5371    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
   5372                   true, a->rt, 0);
   5373    return true;
   5374}
   5375
   5376static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
   5377{
   5378    if (!valid_cp(s, a->cp)) {
   5379        return false;
   5380    }
   5381    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
   5382                   false, a->rt, a->rt2);
   5383    return true;
   5384}
   5385
   5386static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
   5387{
   5388    if (!valid_cp(s, a->cp)) {
   5389        return false;
   5390    }
   5391    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
   5392                   true, a->rt, a->rt2);
   5393    return true;
   5394}
   5395
   5396/* Helpers to swap operands for reverse-subtract.  */
   5397static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
   5398{
   5399    tcg_gen_sub_i32(dst, b, a);
   5400}
   5401
   5402static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
   5403{
   5404    gen_sub_CC(dst, b, a);
   5405}
   5406
   5407static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
   5408{
   5409    gen_sub_carry(dest, b, a);
   5410}
   5411
   5412static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
   5413{
   5414    gen_sbc_CC(dest, b, a);
   5415}
   5416
   5417/*
   5418 * Helpers for the data processing routines.
   5419 *
   5420 * After the computation store the results back.
   5421 * This may be suppressed altogether (STREG_NONE), require a runtime
   5422 * check against the stack limits (STREG_SP_CHECK), or generate an
   5423 * exception return.  Oh, or store into a register.
   5424 *
   5425 * Always return true, indicating success for a trans_* function.
   5426 */
   5427typedef enum {
   5428   STREG_NONE,
   5429   STREG_NORMAL,
   5430   STREG_SP_CHECK,
   5431   STREG_EXC_RET,
   5432} StoreRegKind;
   5433
   5434static bool store_reg_kind(DisasContext *s, int rd,
   5435                            TCGv_i32 val, StoreRegKind kind)
   5436{
   5437    switch (kind) {
   5438    case STREG_NONE:
   5439        tcg_temp_free_i32(val);
   5440        return true;
   5441    case STREG_NORMAL:
   5442        /* See ALUWritePC: Interworking only from a32 mode. */
   5443        if (s->thumb) {
   5444            store_reg(s, rd, val);
   5445        } else {
   5446            store_reg_bx(s, rd, val);
   5447        }
   5448        return true;
   5449    case STREG_SP_CHECK:
   5450        store_sp_checked(s, val);
   5451        return true;
   5452    case STREG_EXC_RET:
   5453        gen_exception_return(s, val);
   5454        return true;
   5455    }
   5456    g_assert_not_reached();
   5457}
   5458
   5459/*
   5460 * Data Processing (register)
   5461 *
   5462 * Operate, with set flags, one register source,
   5463 * one immediate shifted register source, and a destination.
   5464 */
   5465static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
   5466                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
   5467                         int logic_cc, StoreRegKind kind)
   5468{
   5469    TCGv_i32 tmp1, tmp2;
   5470
   5471    tmp2 = load_reg(s, a->rm);
   5472    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
   5473    tmp1 = load_reg(s, a->rn);
   5474
   5475    gen(tmp1, tmp1, tmp2);
   5476    tcg_temp_free_i32(tmp2);
   5477
   5478    if (logic_cc) {
   5479        gen_logic_CC(tmp1);
   5480    }
   5481    return store_reg_kind(s, a->rd, tmp1, kind);
   5482}
   5483
   5484static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
   5485                         void (*gen)(TCGv_i32, TCGv_i32),
   5486                         int logic_cc, StoreRegKind kind)
   5487{
   5488    TCGv_i32 tmp;
   5489
   5490    tmp = load_reg(s, a->rm);
   5491    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
   5492
   5493    gen(tmp, tmp);
   5494    if (logic_cc) {
   5495        gen_logic_CC(tmp);
   5496    }
   5497    return store_reg_kind(s, a->rd, tmp, kind);
   5498}
   5499
   5500/*
   5501 * Data-processing (register-shifted register)
   5502 *
   5503 * Operate, with set flags, one register source,
   5504 * one register shifted register source, and a destination.
   5505 */
   5506static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
   5507                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
   5508                         int logic_cc, StoreRegKind kind)
   5509{
   5510    TCGv_i32 tmp1, tmp2;
   5511
   5512    tmp1 = load_reg(s, a->rs);
   5513    tmp2 = load_reg(s, a->rm);
   5514    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
   5515    tmp1 = load_reg(s, a->rn);
   5516
   5517    gen(tmp1, tmp1, tmp2);
   5518    tcg_temp_free_i32(tmp2);
   5519
   5520    if (logic_cc) {
   5521        gen_logic_CC(tmp1);
   5522    }
   5523    return store_reg_kind(s, a->rd, tmp1, kind);
   5524}
   5525
   5526static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
   5527                         void (*gen)(TCGv_i32, TCGv_i32),
   5528                         int logic_cc, StoreRegKind kind)
   5529{
   5530    TCGv_i32 tmp1, tmp2;
   5531
   5532    tmp1 = load_reg(s, a->rs);
   5533    tmp2 = load_reg(s, a->rm);
   5534    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
   5535
   5536    gen(tmp2, tmp2);
   5537    if (logic_cc) {
   5538        gen_logic_CC(tmp2);
   5539    }
   5540    return store_reg_kind(s, a->rd, tmp2, kind);
   5541}
   5542
   5543/*
   5544 * Data-processing (immediate)
   5545 *
   5546 * Operate, with set flags, one register source,
   5547 * one rotated immediate, and a destination.
   5548 *
   5549 * Note that logic_cc && a->rot setting CF based on the msb of the
   5550 * immediate is the reason why we must pass in the unrotated form
   5551 * of the immediate.
   5552 */
   5553static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
   5554                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
   5555                         int logic_cc, StoreRegKind kind)
   5556{
   5557    TCGv_i32 tmp1, tmp2;
   5558    uint32_t imm;
   5559
   5560    imm = ror32(a->imm, a->rot);
   5561    if (logic_cc && a->rot) {
   5562        tcg_gen_movi_i32(cpu_CF, imm >> 31);
   5563    }
   5564    tmp2 = tcg_const_i32(imm);
   5565    tmp1 = load_reg(s, a->rn);
   5566
   5567    gen(tmp1, tmp1, tmp2);
   5568    tcg_temp_free_i32(tmp2);
   5569
   5570    if (logic_cc) {
   5571        gen_logic_CC(tmp1);
   5572    }
   5573    return store_reg_kind(s, a->rd, tmp1, kind);
   5574}
   5575
   5576static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
   5577                         void (*gen)(TCGv_i32, TCGv_i32),
   5578                         int logic_cc, StoreRegKind kind)
   5579{
   5580    TCGv_i32 tmp;
   5581    uint32_t imm;
   5582
   5583    imm = ror32(a->imm, a->rot);
   5584    if (logic_cc && a->rot) {
   5585        tcg_gen_movi_i32(cpu_CF, imm >> 31);
   5586    }
   5587    tmp = tcg_const_i32(imm);
   5588
   5589    gen(tmp, tmp);
   5590    if (logic_cc) {
   5591        gen_logic_CC(tmp);
   5592    }
   5593    return store_reg_kind(s, a->rd, tmp, kind);
   5594}
   5595
   5596#define DO_ANY3(NAME, OP, L, K)                                         \
   5597    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
   5598    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
   5599    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
   5600    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
   5601    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
   5602    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
   5603
   5604#define DO_ANY2(NAME, OP, L, K)                                         \
   5605    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
   5606    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
   5607    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
   5608    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
   5609    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
   5610    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
   5611
   5612#define DO_CMP2(NAME, OP, L)                                            \
   5613    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
   5614    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
   5615    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
   5616    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
   5617    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
   5618    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
   5619
   5620DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
   5621DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
   5622DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
   5623DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
   5624
   5625DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
   5626DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
   5627DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
   5628DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
   5629
   5630DO_CMP2(TST, tcg_gen_and_i32, true)
   5631DO_CMP2(TEQ, tcg_gen_xor_i32, true)
   5632DO_CMP2(CMN, gen_add_CC, false)
   5633DO_CMP2(CMP, gen_sub_CC, false)
   5634
   5635DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
   5636        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
   5637
   5638/*
   5639 * Note for the computation of StoreRegKind we return out of the
   5640 * middle of the functions that are expanded by DO_ANY3, and that
   5641 * we modify a->s via that parameter before it is used by OP.
   5642 */
   5643DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
   5644        ({
   5645            StoreRegKind ret = STREG_NORMAL;
   5646            if (a->rd == 15 && a->s) {
   5647                /*
   5648                 * See ALUExceptionReturn:
   5649                 * In User mode, UNPREDICTABLE; we choose UNDEF.
   5650                 * In Hyp mode, UNDEFINED.
   5651                 */
   5652                if (IS_USER(s) || s->current_el == 2) {
   5653                    unallocated_encoding(s);
   5654                    return true;
   5655                }
   5656                /* There is no writeback of nzcv to PSTATE.  */
   5657                a->s = 0;
   5658                ret = STREG_EXC_RET;
   5659            } else if (a->rd == 13 && a->rn == 13) {
   5660                ret = STREG_SP_CHECK;
   5661            }
   5662            ret;
   5663        }))
   5664
   5665DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
   5666        ({
   5667            StoreRegKind ret = STREG_NORMAL;
   5668            if (a->rd == 15 && a->s) {
   5669                /*
   5670                 * See ALUExceptionReturn:
   5671                 * In User mode, UNPREDICTABLE; we choose UNDEF.
   5672                 * In Hyp mode, UNDEFINED.
   5673                 */
   5674                if (IS_USER(s) || s->current_el == 2) {
   5675                    unallocated_encoding(s);
   5676                    return true;
   5677                }
   5678                /* There is no writeback of nzcv to PSTATE.  */
   5679                a->s = 0;
   5680                ret = STREG_EXC_RET;
   5681            } else if (a->rd == 13) {
   5682                ret = STREG_SP_CHECK;
   5683            }
   5684            ret;
   5685        }))
   5686
   5687DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
   5688
   5689/*
   5690 * ORN is only available with T32, so there is no register-shifted-register
   5691 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
   5692 */
   5693static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
   5694{
   5695    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
   5696}
   5697
   5698static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
   5699{
   5700    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
   5701}
   5702
   5703#undef DO_ANY3
   5704#undef DO_ANY2
   5705#undef DO_CMP2
   5706
   5707static bool trans_ADR(DisasContext *s, arg_ri *a)
   5708{
   5709    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
   5710    return true;
   5711}
   5712
   5713static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
   5714{
   5715    TCGv_i32 tmp;
   5716
   5717    if (!ENABLE_ARCH_6T2) {
   5718        return false;
   5719    }
   5720
   5721    tmp = tcg_const_i32(a->imm);
   5722    store_reg(s, a->rd, tmp);
   5723    return true;
   5724}
   5725
   5726static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
   5727{
   5728    TCGv_i32 tmp;
   5729
   5730    if (!ENABLE_ARCH_6T2) {
   5731        return false;
   5732    }
   5733
   5734    tmp = load_reg(s, a->rd);
   5735    tcg_gen_ext16u_i32(tmp, tmp);
   5736    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
   5737    store_reg(s, a->rd, tmp);
   5738    return true;
   5739}
   5740
   5741/*
   5742 * v8.1M MVE wide-shifts
   5743 */
   5744static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
   5745                          WideShiftImmFn *fn)
   5746{
   5747    TCGv_i64 rda;
   5748    TCGv_i32 rdalo, rdahi;
   5749
   5750    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5751        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5752        return false;
   5753    }
   5754    if (a->rdahi == 15) {
   5755        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
   5756        return false;
   5757    }
   5758    if (!dc_isar_feature(aa32_mve, s) ||
   5759        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5760        a->rdahi == 13) {
   5761        /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
   5762        unallocated_encoding(s);
   5763        return true;
   5764    }
   5765
   5766    if (a->shim == 0) {
   5767        a->shim = 32;
   5768    }
   5769
   5770    rda = tcg_temp_new_i64();
   5771    rdalo = load_reg(s, a->rdalo);
   5772    rdahi = load_reg(s, a->rdahi);
   5773    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   5774
   5775    fn(rda, rda, a->shim);
   5776
   5777    tcg_gen_extrl_i64_i32(rdalo, rda);
   5778    tcg_gen_extrh_i64_i32(rdahi, rda);
   5779    store_reg(s, a->rdalo, rdalo);
   5780    store_reg(s, a->rdahi, rdahi);
   5781    tcg_temp_free_i64(rda);
   5782
   5783    return true;
   5784}
   5785
   5786static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5787{
   5788    return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
   5789}
   5790
   5791static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5792{
   5793    return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
   5794}
   5795
   5796static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5797{
   5798    return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
   5799}
   5800
   5801static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
   5802{
   5803    gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
   5804}
   5805
   5806static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5807{
   5808    return do_mve_shl_ri(s, a, gen_mve_sqshll);
   5809}
   5810
   5811static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
   5812{
   5813    gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
   5814}
   5815
   5816static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5817{
   5818    return do_mve_shl_ri(s, a, gen_mve_uqshll);
   5819}
   5820
   5821static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5822{
   5823    return do_mve_shl_ri(s, a, gen_srshr64_i64);
   5824}
   5825
   5826static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5827{
   5828    return do_mve_shl_ri(s, a, gen_urshr64_i64);
   5829}
   5830
   5831static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
   5832{
   5833    TCGv_i64 rda;
   5834    TCGv_i32 rdalo, rdahi;
   5835
   5836    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5837        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5838        return false;
   5839    }
   5840    if (a->rdahi == 15) {
   5841        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
   5842        return false;
   5843    }
   5844    if (!dc_isar_feature(aa32_mve, s) ||
   5845        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5846        a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
   5847        a->rm == a->rdahi || a->rm == a->rdalo) {
   5848        /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
   5849        unallocated_encoding(s);
   5850        return true;
   5851    }
   5852
   5853    rda = tcg_temp_new_i64();
   5854    rdalo = load_reg(s, a->rdalo);
   5855    rdahi = load_reg(s, a->rdahi);
   5856    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   5857
   5858    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
   5859    fn(rda, cpu_env, rda, cpu_R[a->rm]);
   5860
   5861    tcg_gen_extrl_i64_i32(rdalo, rda);
   5862    tcg_gen_extrh_i64_i32(rdahi, rda);
   5863    store_reg(s, a->rdalo, rdalo);
   5864    store_reg(s, a->rdahi, rdahi);
   5865    tcg_temp_free_i64(rda);
   5866
   5867    return true;
   5868}
   5869
   5870static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
   5871{
   5872    return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
   5873}
   5874
   5875static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
   5876{
   5877    return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
   5878}
   5879
   5880static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
   5881{
   5882    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
   5883}
   5884
   5885static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
   5886{
   5887    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
   5888}
   5889
   5890static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
   5891{
   5892    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
   5893}
   5894
   5895static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
   5896{
   5897    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
   5898}
   5899
   5900static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
   5901{
   5902    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5903        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5904        return false;
   5905    }
   5906    if (!dc_isar_feature(aa32_mve, s) ||
   5907        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5908        a->rda == 13 || a->rda == 15) {
   5909        /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
   5910        unallocated_encoding(s);
   5911        return true;
   5912    }
   5913
   5914    if (a->shim == 0) {
   5915        a->shim = 32;
   5916    }
   5917    fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
   5918
   5919    return true;
   5920}
   5921
   5922static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
   5923{
   5924    return do_mve_sh_ri(s, a, gen_urshr32_i32);
   5925}
   5926
   5927static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
   5928{
   5929    return do_mve_sh_ri(s, a, gen_srshr32_i32);
   5930}
   5931
   5932static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
   5933{
   5934    gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
   5935}
   5936
   5937static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
   5938{
   5939    return do_mve_sh_ri(s, a, gen_mve_sqshl);
   5940}
   5941
   5942static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
   5943{
   5944    gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
   5945}
   5946
   5947static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
   5948{
   5949    return do_mve_sh_ri(s, a, gen_mve_uqshl);
   5950}
   5951
   5952static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
   5953{
   5954    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5955        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5956        return false;
   5957    }
   5958    if (!dc_isar_feature(aa32_mve, s) ||
   5959        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5960        a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
   5961        a->rm == a->rda) {
   5962        /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
   5963        unallocated_encoding(s);
   5964        return true;
   5965    }
   5966
   5967    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
   5968    fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
   5969    return true;
   5970}
   5971
   5972static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
   5973{
   5974    return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
   5975}
   5976
   5977static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
   5978{
   5979    return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
   5980}
   5981
   5982/*
   5983 * Multiply and multiply accumulate
   5984 */
   5985
   5986static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
   5987{
   5988    TCGv_i32 t1, t2;
   5989
   5990    t1 = load_reg(s, a->rn);
   5991    t2 = load_reg(s, a->rm);
   5992    tcg_gen_mul_i32(t1, t1, t2);
   5993    tcg_temp_free_i32(t2);
   5994    if (add) {
   5995        t2 = load_reg(s, a->ra);
   5996        tcg_gen_add_i32(t1, t1, t2);
   5997        tcg_temp_free_i32(t2);
   5998    }
   5999    if (a->s) {
   6000        gen_logic_CC(t1);
   6001    }
   6002    store_reg(s, a->rd, t1);
   6003    return true;
   6004}
   6005
   6006static bool trans_MUL(DisasContext *s, arg_MUL *a)
   6007{
   6008    return op_mla(s, a, false);
   6009}
   6010
   6011static bool trans_MLA(DisasContext *s, arg_MLA *a)
   6012{
   6013    return op_mla(s, a, true);
   6014}
   6015
   6016static bool trans_MLS(DisasContext *s, arg_MLS *a)
   6017{
   6018    TCGv_i32 t1, t2;
   6019
   6020    if (!ENABLE_ARCH_6T2) {
   6021        return false;
   6022    }
   6023    t1 = load_reg(s, a->rn);
   6024    t2 = load_reg(s, a->rm);
   6025    tcg_gen_mul_i32(t1, t1, t2);
   6026    tcg_temp_free_i32(t2);
   6027    t2 = load_reg(s, a->ra);
   6028    tcg_gen_sub_i32(t1, t2, t1);
   6029    tcg_temp_free_i32(t2);
   6030    store_reg(s, a->rd, t1);
   6031    return true;
   6032}
   6033
   6034static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
   6035{
   6036    TCGv_i32 t0, t1, t2, t3;
   6037
   6038    t0 = load_reg(s, a->rm);
   6039    t1 = load_reg(s, a->rn);
   6040    if (uns) {
   6041        tcg_gen_mulu2_i32(t0, t1, t0, t1);
   6042    } else {
   6043        tcg_gen_muls2_i32(t0, t1, t0, t1);
   6044    }
   6045    if (add) {
   6046        t2 = load_reg(s, a->ra);
   6047        t3 = load_reg(s, a->rd);
   6048        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
   6049        tcg_temp_free_i32(t2);
   6050        tcg_temp_free_i32(t3);
   6051    }
   6052    if (a->s) {
   6053        gen_logicq_cc(t0, t1);
   6054    }
   6055    store_reg(s, a->ra, t0);
   6056    store_reg(s, a->rd, t1);
   6057    return true;
   6058}
   6059
   6060static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
   6061{
   6062    return op_mlal(s, a, true, false);
   6063}
   6064
   6065static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
   6066{
   6067    return op_mlal(s, a, false, false);
   6068}
   6069
   6070static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
   6071{
   6072    return op_mlal(s, a, true, true);
   6073}
   6074
   6075static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
   6076{
   6077    return op_mlal(s, a, false, true);
   6078}
   6079
   6080static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
   6081{
   6082    TCGv_i32 t0, t1, t2, zero;
   6083
   6084    if (s->thumb
   6085        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   6086        : !ENABLE_ARCH_6) {
   6087        return false;
   6088    }
   6089
   6090    t0 = load_reg(s, a->rm);
   6091    t1 = load_reg(s, a->rn);
   6092    tcg_gen_mulu2_i32(t0, t1, t0, t1);
   6093    zero = tcg_const_i32(0);
   6094    t2 = load_reg(s, a->ra);
   6095    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
   6096    tcg_temp_free_i32(t2);
   6097    t2 = load_reg(s, a->rd);
   6098    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
   6099    tcg_temp_free_i32(t2);
   6100    tcg_temp_free_i32(zero);
   6101    store_reg(s, a->ra, t0);
   6102    store_reg(s, a->rd, t1);
   6103    return true;
   6104}
   6105
   6106/*
   6107 * Saturating addition and subtraction
   6108 */
   6109
   6110static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
   6111{
   6112    TCGv_i32 t0, t1;
   6113
   6114    if (s->thumb
   6115        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   6116        : !ENABLE_ARCH_5TE) {
   6117        return false;
   6118    }
   6119
   6120    t0 = load_reg(s, a->rm);
   6121    t1 = load_reg(s, a->rn);
   6122    if (doub) {
   6123        gen_helper_add_saturate(t1, cpu_env, t1, t1);
   6124    }
   6125    if (add) {
   6126        gen_helper_add_saturate(t0, cpu_env, t0, t1);
   6127    } else {
   6128        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
   6129    }
   6130    tcg_temp_free_i32(t1);
   6131    store_reg(s, a->rd, t0);
   6132    return true;
   6133}
   6134
   6135#define DO_QADDSUB(NAME, ADD, DOUB) \
   6136static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
   6137{                                                        \
   6138    return op_qaddsub(s, a, ADD, DOUB);                  \
   6139}
   6140
   6141DO_QADDSUB(QADD, true, false)
   6142DO_QADDSUB(QSUB, false, false)
   6143DO_QADDSUB(QDADD, true, true)
   6144DO_QADDSUB(QDSUB, false, true)
   6145
   6146#undef DO_QADDSUB
   6147
   6148/*
   6149 * Halfword multiply and multiply accumulate
   6150 */
   6151
   6152static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
   6153                       int add_long, bool nt, bool mt)
   6154{
   6155    TCGv_i32 t0, t1, tl, th;
   6156
   6157    if (s->thumb
   6158        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   6159        : !ENABLE_ARCH_5TE) {
   6160        return false;
   6161    }
   6162
   6163    t0 = load_reg(s, a->rn);
   6164    t1 = load_reg(s, a->rm);
   6165    gen_mulxy(t0, t1, nt, mt);
   6166    tcg_temp_free_i32(t1);
   6167
   6168    switch (add_long) {
   6169    case 0:
   6170        store_reg(s, a->rd, t0);
   6171        break;
   6172    case 1:
   6173        t1 = load_reg(s, a->ra);
   6174        gen_helper_add_setq(t0, cpu_env, t0, t1);
   6175        tcg_temp_free_i32(t1);
   6176        store_reg(s, a->rd, t0);
   6177        break;
   6178    case 2:
   6179        tl = load_reg(s, a->ra);
   6180        th = load_reg(s, a->rd);
   6181        /* Sign-extend the 32-bit product to 64 bits.  */
   6182        t1 = tcg_temp_new_i32();
   6183        tcg_gen_sari_i32(t1, t0, 31);
   6184        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
   6185        tcg_temp_free_i32(t0);
   6186        tcg_temp_free_i32(t1);
   6187        store_reg(s, a->ra, tl);
   6188        store_reg(s, a->rd, th);
   6189        break;
   6190    default:
   6191        g_assert_not_reached();
   6192    }
   6193    return true;
   6194}
   6195
   6196#define DO_SMLAX(NAME, add, nt, mt) \
   6197static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
   6198{                                                          \
   6199    return op_smlaxxx(s, a, add, nt, mt);                  \
   6200}
   6201
   6202DO_SMLAX(SMULBB, 0, 0, 0)
   6203DO_SMLAX(SMULBT, 0, 0, 1)
   6204DO_SMLAX(SMULTB, 0, 1, 0)
   6205DO_SMLAX(SMULTT, 0, 1, 1)
   6206
   6207DO_SMLAX(SMLABB, 1, 0, 0)
   6208DO_SMLAX(SMLABT, 1, 0, 1)
   6209DO_SMLAX(SMLATB, 1, 1, 0)
   6210DO_SMLAX(SMLATT, 1, 1, 1)
   6211
   6212DO_SMLAX(SMLALBB, 2, 0, 0)
   6213DO_SMLAX(SMLALBT, 2, 0, 1)
   6214DO_SMLAX(SMLALTB, 2, 1, 0)
   6215DO_SMLAX(SMLALTT, 2, 1, 1)
   6216
   6217#undef DO_SMLAX
   6218
   6219static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
   6220{
   6221    TCGv_i32 t0, t1;
   6222
   6223    if (!ENABLE_ARCH_5TE) {
   6224        return false;
   6225    }
   6226
   6227    t0 = load_reg(s, a->rn);
   6228    t1 = load_reg(s, a->rm);
   6229    /*
   6230     * Since the nominal result is product<47:16>, shift the 16-bit
   6231     * input up by 16 bits, so that the result is at product<63:32>.
   6232     */
   6233    if (mt) {
   6234        tcg_gen_andi_i32(t1, t1, 0xffff0000);
   6235    } else {
   6236        tcg_gen_shli_i32(t1, t1, 16);
   6237    }
   6238    tcg_gen_muls2_i32(t0, t1, t0, t1);
   6239    tcg_temp_free_i32(t0);
   6240    if (add) {
   6241        t0 = load_reg(s, a->ra);
   6242        gen_helper_add_setq(t1, cpu_env, t1, t0);
   6243        tcg_temp_free_i32(t0);
   6244    }
   6245    store_reg(s, a->rd, t1);
   6246    return true;
   6247}
   6248
   6249#define DO_SMLAWX(NAME, add, mt) \
   6250static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
   6251{                                                          \
   6252    return op_smlawx(s, a, add, mt);                       \
   6253}
   6254
   6255DO_SMLAWX(SMULWB, 0, 0)
   6256DO_SMLAWX(SMULWT, 0, 1)
   6257DO_SMLAWX(SMLAWB, 1, 0)
   6258DO_SMLAWX(SMLAWT, 1, 1)
   6259
   6260#undef DO_SMLAWX
   6261
   6262/*
   6263 * MSR (immediate) and hints
   6264 */
   6265
   6266static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
   6267{
   6268    /*
   6269     * When running single-threaded TCG code, use the helper to ensure that
   6270     * the next round-robin scheduled vCPU gets a crack.  When running in
   6271     * MTTCG we don't generate jumps to the helper as it won't affect the
   6272     * scheduling of other vCPUs.
   6273     */
   6274    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
   6275        gen_set_pc_im(s, s->base.pc_next);
   6276        s->base.is_jmp = DISAS_YIELD;
   6277    }
   6278    return true;
   6279}
   6280
   6281static bool trans_WFE(DisasContext *s, arg_WFE *a)
   6282{
   6283    /*
   6284     * When running single-threaded TCG code, use the helper to ensure that
   6285     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
   6286     * just skip this instruction.  Currently the SEV/SEVL instructions,
   6287     * which are *one* of many ways to wake the CPU from WFE, are not
   6288     * implemented so we can't sleep like WFI does.
   6289     */
   6290    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
   6291        gen_set_pc_im(s, s->base.pc_next);
   6292        s->base.is_jmp = DISAS_WFE;
   6293    }
   6294    return true;
   6295}
   6296
   6297static bool trans_WFI(DisasContext *s, arg_WFI *a)
   6298{
   6299    /* For WFI, halt the vCPU until an IRQ. */
   6300    gen_set_pc_im(s, s->base.pc_next);
   6301    s->base.is_jmp = DISAS_WFI;
   6302    return true;
   6303}
   6304
   6305static bool trans_NOP(DisasContext *s, arg_NOP *a)
   6306{
   6307    return true;
   6308}
   6309
   6310static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
   6311{
   6312    uint32_t val = ror32(a->imm, a->rot * 2);
   6313    uint32_t mask = msr_mask(s, a->mask, a->r);
   6314
   6315    if (gen_set_psr_im(s, mask, a->r, val)) {
   6316        unallocated_encoding(s);
   6317    }
   6318    return true;
   6319}
   6320
   6321/*
   6322 * Cyclic Redundancy Check
   6323 */
   6324
   6325static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
   6326{
   6327    TCGv_i32 t1, t2, t3;
   6328
   6329    if (!dc_isar_feature(aa32_crc32, s)) {
   6330        return false;
   6331    }
   6332
   6333    t1 = load_reg(s, a->rn);
   6334    t2 = load_reg(s, a->rm);
   6335    switch (sz) {
   6336    case MO_8:
   6337        gen_uxtb(t2);
   6338        break;
   6339    case MO_16:
   6340        gen_uxth(t2);
   6341        break;
   6342    case MO_32:
   6343        break;
   6344    default:
   6345        g_assert_not_reached();
   6346    }
   6347    t3 = tcg_const_i32(1 << sz);
   6348    if (c) {
   6349        gen_helper_crc32c(t1, t1, t2, t3);
   6350    } else {
   6351        gen_helper_crc32(t1, t1, t2, t3);
   6352    }
   6353    tcg_temp_free_i32(t2);
   6354    tcg_temp_free_i32(t3);
   6355    store_reg(s, a->rd, t1);
   6356    return true;
   6357}
   6358
   6359#define DO_CRC32(NAME, c, sz) \
   6360static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
   6361    { return op_crc32(s, a, c, sz); }
   6362
   6363DO_CRC32(CRC32B, false, MO_8)
   6364DO_CRC32(CRC32H, false, MO_16)
   6365DO_CRC32(CRC32W, false, MO_32)
   6366DO_CRC32(CRC32CB, true, MO_8)
   6367DO_CRC32(CRC32CH, true, MO_16)
   6368DO_CRC32(CRC32CW, true, MO_32)
   6369
   6370#undef DO_CRC32
   6371
   6372/*
   6373 * Miscellaneous instructions
   6374 */
   6375
   6376static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
   6377{
   6378    if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6379        return false;
   6380    }
   6381    gen_mrs_banked(s, a->r, a->sysm, a->rd);
   6382    return true;
   6383}
   6384
   6385static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
   6386{
   6387    if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6388        return false;
   6389    }
   6390    gen_msr_banked(s, a->r, a->sysm, a->rn);
   6391    return true;
   6392}
   6393
   6394static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
   6395{
   6396    TCGv_i32 tmp;
   6397
   6398    if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6399        return false;
   6400    }
   6401    if (a->r) {
   6402        if (IS_USER(s)) {
   6403            unallocated_encoding(s);
   6404            return true;
   6405        }
   6406        tmp = load_cpu_field(spsr);
   6407    } else {
   6408        tmp = tcg_temp_new_i32();
   6409        gen_helper_cpsr_read(tmp, cpu_env);
   6410    }
   6411    store_reg(s, a->rd, tmp);
   6412    return true;
   6413}
   6414
   6415static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
   6416{
   6417    TCGv_i32 tmp;
   6418    uint32_t mask = msr_mask(s, a->mask, a->r);
   6419
   6420    if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6421        return false;
   6422    }
   6423    tmp = load_reg(s, a->rn);
   6424    if (gen_set_psr(s, mask, a->r, tmp)) {
   6425        unallocated_encoding(s);
   6426    }
   6427    return true;
   6428}
   6429
   6430static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
   6431{
   6432    TCGv_i32 tmp;
   6433
   6434    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
   6435        return false;
   6436    }
   6437    tmp = tcg_const_i32(a->sysm);
   6438    gen_helper_v7m_mrs(tmp, cpu_env, tmp);
   6439    store_reg(s, a->rd, tmp);
   6440    return true;
   6441}
   6442
   6443static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
   6444{
   6445    TCGv_i32 addr, reg;
   6446
   6447    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
   6448        return false;
   6449    }
   6450    addr = tcg_const_i32((a->mask << 10) | a->sysm);
   6451    reg = load_reg(s, a->rn);
   6452    gen_helper_v7m_msr(cpu_env, addr, reg);
   6453    tcg_temp_free_i32(addr);
   6454    tcg_temp_free_i32(reg);
   6455    /* If we wrote to CONTROL, the EL might have changed */
   6456    gen_helper_rebuild_hflags_m32_newel(cpu_env);
   6457    gen_lookup_tb(s);
   6458    return true;
   6459}
   6460
   6461static bool trans_BX(DisasContext *s, arg_BX *a)
   6462{
   6463    if (!ENABLE_ARCH_4T) {
   6464        return false;
   6465    }
   6466    gen_bx_excret(s, load_reg(s, a->rm));
   6467    return true;
   6468}
   6469
   6470static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
   6471{
   6472    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
   6473        return false;
   6474    }
   6475    /*
   6476     * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
   6477     * TBFLAGS bit on a basically-never-happens case, so call a helper
   6478     * function to check for the trap and raise the exception if needed
   6479     * (passing it the register number for the syndrome value).
   6480     * v8A doesn't have this HSTR bit.
   6481     */
   6482    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
   6483        arm_dc_feature(s, ARM_FEATURE_EL2) &&
   6484        s->current_el < 2 && s->ns) {
   6485        gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
   6486    }
   6487    /* Trivial implementation equivalent to bx.  */
   6488    gen_bx(s, load_reg(s, a->rm));
   6489    return true;
   6490}
   6491
   6492static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
   6493{
   6494    TCGv_i32 tmp;
   6495
   6496    if (!ENABLE_ARCH_5) {
   6497        return false;
   6498    }
   6499    tmp = load_reg(s, a->rm);
   6500    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
   6501    gen_bx(s, tmp);
   6502    return true;
   6503}
   6504
   6505/*
   6506 * BXNS/BLXNS: only exist for v8M with the security extensions,
   6507 * and always UNDEF if NonSecure.  We don't implement these in
   6508 * the user-only mode either (in theory you can use them from
   6509 * Secure User mode but they are too tied in to system emulation).
   6510 */
   6511static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
   6512{
   6513    if (!s->v8m_secure || IS_USER_ONLY) {
   6514        unallocated_encoding(s);
   6515    } else {
   6516        gen_bxns(s, a->rm);
   6517    }
   6518    return true;
   6519}
   6520
   6521static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
   6522{
   6523    if (!s->v8m_secure || IS_USER_ONLY) {
   6524        unallocated_encoding(s);
   6525    } else {
   6526        gen_blxns(s, a->rm);
   6527    }
   6528    return true;
   6529}
   6530
   6531static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
   6532{
   6533    TCGv_i32 tmp;
   6534
   6535    if (!ENABLE_ARCH_5) {
   6536        return false;
   6537    }
   6538    tmp = load_reg(s, a->rm);
   6539    tcg_gen_clzi_i32(tmp, tmp, 32);
   6540    store_reg(s, a->rd, tmp);
   6541    return true;
   6542}
   6543
   6544static bool trans_ERET(DisasContext *s, arg_ERET *a)
   6545{
   6546    TCGv_i32 tmp;
   6547
   6548    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
   6549        return false;
   6550    }
   6551    if (IS_USER(s)) {
   6552        unallocated_encoding(s);
   6553        return true;
   6554    }
   6555    if (s->current_el == 2) {
   6556        /* ERET from Hyp uses ELR_Hyp, not LR */
   6557        tmp = load_cpu_field(elr_el[2]);
   6558    } else {
   6559        tmp = load_reg(s, 14);
   6560    }
   6561    gen_exception_return(s, tmp);
   6562    return true;
   6563}
   6564
   6565static bool trans_HLT(DisasContext *s, arg_HLT *a)
   6566{
   6567    gen_hlt(s, a->imm);
   6568    return true;
   6569}
   6570
   6571static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
   6572{
   6573    if (!ENABLE_ARCH_5) {
   6574        return false;
   6575    }
   6576    /* BKPT is OK with ECI set and leaves it untouched */
   6577    s->eci_handled = true;
   6578    if (arm_dc_feature(s, ARM_FEATURE_M) &&
   6579        semihosting_enabled() &&
   6580#ifndef CONFIG_USER_ONLY
   6581        !IS_USER(s) &&
   6582#endif
   6583        (a->imm == 0xab)) {
   6584        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
   6585    } else {
   6586        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
   6587    }
   6588    return true;
   6589}
   6590
   6591static bool trans_HVC(DisasContext *s, arg_HVC *a)
   6592{
   6593    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
   6594        return false;
   6595    }
   6596    if (IS_USER(s)) {
   6597        unallocated_encoding(s);
   6598    } else {
   6599        gen_hvc(s, a->imm);
   6600    }
   6601    return true;
   6602}
   6603
   6604static bool trans_SMC(DisasContext *s, arg_SMC *a)
   6605{
   6606    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
   6607        return false;
   6608    }
   6609    if (IS_USER(s)) {
   6610        unallocated_encoding(s);
   6611    } else {
   6612        gen_smc(s);
   6613    }
   6614    return true;
   6615}
   6616
   6617static bool trans_SG(DisasContext *s, arg_SG *a)
   6618{
   6619    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
   6620        !arm_dc_feature(s, ARM_FEATURE_V8)) {
   6621        return false;
   6622    }
   6623    /*
   6624     * SG (v8M only)
   6625     * The bulk of the behaviour for this instruction is implemented
   6626     * in v7m_handle_execute_nsc(), which deals with the insn when
   6627     * it is executed by a CPU in non-secure state from memory
   6628     * which is Secure & NonSecure-Callable.
   6629     * Here we only need to handle the remaining cases:
   6630     *  * in NS memory (including the "security extension not
   6631     *    implemented" case) : NOP
   6632     *  * in S memory but CPU already secure (clear IT bits)
   6633     * We know that the attribute for the memory this insn is
   6634     * in must match the current CPU state, because otherwise
   6635     * get_phys_addr_pmsav8 would have generated an exception.
   6636     */
   6637    if (s->v8m_secure) {
   6638        /* Like the IT insn, we don't need to generate any code */
   6639        s->condexec_cond = 0;
   6640        s->condexec_mask = 0;
   6641    }
   6642    return true;
   6643}
   6644
   6645static bool trans_TT(DisasContext *s, arg_TT *a)
   6646{
   6647    TCGv_i32 addr, tmp;
   6648
   6649    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
   6650        !arm_dc_feature(s, ARM_FEATURE_V8)) {
   6651        return false;
   6652    }
   6653    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
   6654        /* We UNDEF for these UNPREDICTABLE cases */
   6655        unallocated_encoding(s);
   6656        return true;
   6657    }
   6658    if (a->A && !s->v8m_secure) {
   6659        /* This case is UNDEFINED.  */
   6660        unallocated_encoding(s);
   6661        return true;
   6662    }
   6663
   6664    addr = load_reg(s, a->rn);
   6665    tmp = tcg_const_i32((a->A << 1) | a->T);
   6666    gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
   6667    tcg_temp_free_i32(addr);
   6668    store_reg(s, a->rd, tmp);
   6669    return true;
   6670}
   6671
   6672/*
   6673 * Load/store register index
   6674 */
   6675
   6676static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
   6677{
   6678    ISSInfo ret;
   6679
   6680    /* ISS not valid if writeback */
   6681    if (p && !w) {
   6682        ret = rd;
   6683        if (s->base.pc_next - s->pc_curr == 2) {
   6684            ret |= ISSIs16Bit;
   6685        }
   6686    } else {
   6687        ret = ISSInvalid;
   6688    }
   6689    return ret;
   6690}
   6691
   6692static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
   6693{
   6694    TCGv_i32 addr = load_reg(s, a->rn);
   6695
   6696    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
   6697        gen_helper_v8m_stackcheck(cpu_env, addr);
   6698    }
   6699
   6700    if (a->p) {
   6701        TCGv_i32 ofs = load_reg(s, a->rm);
   6702        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
   6703        if (a->u) {
   6704            tcg_gen_add_i32(addr, addr, ofs);
   6705        } else {
   6706            tcg_gen_sub_i32(addr, addr, ofs);
   6707        }
   6708        tcg_temp_free_i32(ofs);
   6709    }
   6710    return addr;
   6711}
   6712
   6713static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
   6714                            TCGv_i32 addr, int address_offset)
   6715{
   6716    if (!a->p) {
   6717        TCGv_i32 ofs = load_reg(s, a->rm);
   6718        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
   6719        if (a->u) {
   6720            tcg_gen_add_i32(addr, addr, ofs);
   6721        } else {
   6722            tcg_gen_sub_i32(addr, addr, ofs);
   6723        }
   6724        tcg_temp_free_i32(ofs);
   6725    } else if (!a->w) {
   6726        tcg_temp_free_i32(addr);
   6727        return;
   6728    }
   6729    tcg_gen_addi_i32(addr, addr, address_offset);
   6730    store_reg(s, a->rn, addr);
   6731}
   6732
   6733static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
   6734                       MemOp mop, int mem_idx)
   6735{
   6736    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
   6737    TCGv_i32 addr, tmp;
   6738
   6739    addr = op_addr_rr_pre(s, a);
   6740
   6741    tmp = tcg_temp_new_i32();
   6742    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
   6743    disas_set_da_iss(s, mop, issinfo);
   6744
   6745    /*
   6746     * Perform base writeback before the loaded value to
   6747     * ensure correct behavior with overlapping index registers.
   6748     */
   6749    op_addr_rr_post(s, a, addr, 0);
   6750    store_reg_from_load(s, a->rt, tmp);
   6751    return true;
   6752}
   6753
   6754static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
   6755                        MemOp mop, int mem_idx)
   6756{
   6757    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
   6758    TCGv_i32 addr, tmp;
   6759
   6760    /*
   6761     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
   6762     * is either UNPREDICTABLE or has defined behaviour
   6763     */
   6764    if (s->thumb && a->rn == 15) {
   6765        return false;
   6766    }
   6767
   6768    addr = op_addr_rr_pre(s, a);
   6769
   6770    tmp = load_reg(s, a->rt);
   6771    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
   6772    disas_set_da_iss(s, mop, issinfo);
   6773    tcg_temp_free_i32(tmp);
   6774
   6775    op_addr_rr_post(s, a, addr, 0);
   6776    return true;
   6777}
   6778
   6779static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
   6780{
   6781    int mem_idx = get_mem_index(s);
   6782    TCGv_i32 addr, tmp;
   6783
   6784    if (!ENABLE_ARCH_5TE) {
   6785        return false;
   6786    }
   6787    if (a->rt & 1) {
   6788        unallocated_encoding(s);
   6789        return true;
   6790    }
   6791    addr = op_addr_rr_pre(s, a);
   6792
   6793    tmp = tcg_temp_new_i32();
   6794    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6795    store_reg(s, a->rt, tmp);
   6796
   6797    tcg_gen_addi_i32(addr, addr, 4);
   6798
   6799    tmp = tcg_temp_new_i32();
   6800    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6801    store_reg(s, a->rt + 1, tmp);
   6802
   6803    /* LDRD w/ base writeback is undefined if the registers overlap.  */
   6804    op_addr_rr_post(s, a, addr, -4);
   6805    return true;
   6806}
   6807
   6808static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
   6809{
   6810    int mem_idx = get_mem_index(s);
   6811    TCGv_i32 addr, tmp;
   6812
   6813    if (!ENABLE_ARCH_5TE) {
   6814        return false;
   6815    }
   6816    if (a->rt & 1) {
   6817        unallocated_encoding(s);
   6818        return true;
   6819    }
   6820    addr = op_addr_rr_pre(s, a);
   6821
   6822    tmp = load_reg(s, a->rt);
   6823    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6824    tcg_temp_free_i32(tmp);
   6825
   6826    tcg_gen_addi_i32(addr, addr, 4);
   6827
   6828    tmp = load_reg(s, a->rt + 1);
   6829    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6830    tcg_temp_free_i32(tmp);
   6831
   6832    op_addr_rr_post(s, a, addr, -4);
   6833    return true;
   6834}
   6835
   6836/*
   6837 * Load/store immediate index
   6838 */
   6839
   6840static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
   6841{
   6842    int ofs = a->imm;
   6843
   6844    if (!a->u) {
   6845        ofs = -ofs;
   6846    }
   6847
   6848    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
   6849        /*
   6850         * Stackcheck. Here we know 'addr' is the current SP;
   6851         * U is set if we're moving SP up, else down. It is
   6852         * UNKNOWN whether the limit check triggers when SP starts
   6853         * below the limit and ends up above it; we chose to do so.
   6854         */
   6855        if (!a->u) {
   6856            TCGv_i32 newsp = tcg_temp_new_i32();
   6857            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
   6858            gen_helper_v8m_stackcheck(cpu_env, newsp);
   6859            tcg_temp_free_i32(newsp);
   6860        } else {
   6861            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
   6862        }
   6863    }
   6864
   6865    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
   6866}
   6867
   6868static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
   6869                            TCGv_i32 addr, int address_offset)
   6870{
   6871    if (!a->p) {
   6872        if (a->u) {
   6873            address_offset += a->imm;
   6874        } else {
   6875            address_offset -= a->imm;
   6876        }
   6877    } else if (!a->w) {
   6878        tcg_temp_free_i32(addr);
   6879        return;
   6880    }
   6881    tcg_gen_addi_i32(addr, addr, address_offset);
   6882    store_reg(s, a->rn, addr);
   6883}
   6884
   6885static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
   6886                       MemOp mop, int mem_idx)
   6887{
   6888    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
   6889    TCGv_i32 addr, tmp;
   6890
   6891    addr = op_addr_ri_pre(s, a);
   6892
   6893    tmp = tcg_temp_new_i32();
   6894    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
   6895    disas_set_da_iss(s, mop, issinfo);
   6896
   6897    /*
   6898     * Perform base writeback before the loaded value to
   6899     * ensure correct behavior with overlapping index registers.
   6900     */
   6901    op_addr_ri_post(s, a, addr, 0);
   6902    store_reg_from_load(s, a->rt, tmp);
   6903    return true;
   6904}
   6905
   6906static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
   6907                        MemOp mop, int mem_idx)
   6908{
   6909    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
   6910    TCGv_i32 addr, tmp;
   6911
   6912    /*
   6913     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
   6914     * is either UNPREDICTABLE or has defined behaviour
   6915     */
   6916    if (s->thumb && a->rn == 15) {
   6917        return false;
   6918    }
   6919
   6920    addr = op_addr_ri_pre(s, a);
   6921
   6922    tmp = load_reg(s, a->rt);
   6923    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
   6924    disas_set_da_iss(s, mop, issinfo);
   6925    tcg_temp_free_i32(tmp);
   6926
   6927    op_addr_ri_post(s, a, addr, 0);
   6928    return true;
   6929}
   6930
   6931static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
   6932{
   6933    int mem_idx = get_mem_index(s);
   6934    TCGv_i32 addr, tmp;
   6935
   6936    addr = op_addr_ri_pre(s, a);
   6937
   6938    tmp = tcg_temp_new_i32();
   6939    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6940    store_reg(s, a->rt, tmp);
   6941
   6942    tcg_gen_addi_i32(addr, addr, 4);
   6943
   6944    tmp = tcg_temp_new_i32();
   6945    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6946    store_reg(s, rt2, tmp);
   6947
   6948    /* LDRD w/ base writeback is undefined if the registers overlap.  */
   6949    op_addr_ri_post(s, a, addr, -4);
   6950    return true;
   6951}
   6952
   6953static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
   6954{
   6955    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
   6956        return false;
   6957    }
   6958    return op_ldrd_ri(s, a, a->rt + 1);
   6959}
   6960
   6961static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
   6962{
   6963    arg_ldst_ri b = {
   6964        .u = a->u, .w = a->w, .p = a->p,
   6965        .rn = a->rn, .rt = a->rt, .imm = a->imm
   6966    };
   6967    return op_ldrd_ri(s, &b, a->rt2);
   6968}
   6969
   6970static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
   6971{
   6972    int mem_idx = get_mem_index(s);
   6973    TCGv_i32 addr, tmp;
   6974
   6975    addr = op_addr_ri_pre(s, a);
   6976
   6977    tmp = load_reg(s, a->rt);
   6978    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6979    tcg_temp_free_i32(tmp);
   6980
   6981    tcg_gen_addi_i32(addr, addr, 4);
   6982
   6983    tmp = load_reg(s, rt2);
   6984    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6985    tcg_temp_free_i32(tmp);
   6986
   6987    op_addr_ri_post(s, a, addr, -4);
   6988    return true;
   6989}
   6990
   6991static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
   6992{
   6993    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
   6994        return false;
   6995    }
   6996    return op_strd_ri(s, a, a->rt + 1);
   6997}
   6998
   6999static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
   7000{
   7001    arg_ldst_ri b = {
   7002        .u = a->u, .w = a->w, .p = a->p,
   7003        .rn = a->rn, .rt = a->rt, .imm = a->imm
   7004    };
   7005    return op_strd_ri(s, &b, a->rt2);
   7006}
   7007
   7008#define DO_LDST(NAME, WHICH, MEMOP) \
   7009static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
   7010{                                                                     \
   7011    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
   7012}                                                                     \
   7013static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
   7014{                                                                     \
   7015    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
   7016}                                                                     \
   7017static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
   7018{                                                                     \
   7019    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
   7020}                                                                     \
   7021static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
   7022{                                                                     \
   7023    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
   7024}
   7025
   7026DO_LDST(LDR, load, MO_UL)
   7027DO_LDST(LDRB, load, MO_UB)
   7028DO_LDST(LDRH, load, MO_UW)
   7029DO_LDST(LDRSB, load, MO_SB)
   7030DO_LDST(LDRSH, load, MO_SW)
   7031
   7032DO_LDST(STR, store, MO_UL)
   7033DO_LDST(STRB, store, MO_UB)
   7034DO_LDST(STRH, store, MO_UW)
   7035
   7036#undef DO_LDST
   7037
   7038/*
   7039 * Synchronization primitives
   7040 */
   7041
   7042static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
   7043{
   7044    TCGv_i32 addr, tmp;
   7045    TCGv taddr;
   7046
   7047    opc |= s->be_data;
   7048    addr = load_reg(s, a->rn);
   7049    taddr = gen_aa32_addr(s, addr, opc);
   7050    tcg_temp_free_i32(addr);
   7051
   7052    tmp = load_reg(s, a->rt2);
   7053    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
   7054    tcg_temp_free(taddr);
   7055
   7056    store_reg(s, a->rt, tmp);
   7057    return true;
   7058}
   7059
   7060static bool trans_SWP(DisasContext *s, arg_SWP *a)
   7061{
   7062    return op_swp(s, a, MO_UL | MO_ALIGN);
   7063}
   7064
   7065static bool trans_SWPB(DisasContext *s, arg_SWP *a)
   7066{
   7067    return op_swp(s, a, MO_UB);
   7068}
   7069
   7070/*
   7071 * Load/Store Exclusive and Load-Acquire/Store-Release
   7072 */
   7073
   7074static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
   7075{
   7076    TCGv_i32 addr;
   7077    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
   7078    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
   7079
   7080    /* We UNDEF for these UNPREDICTABLE cases.  */
   7081    if (a->rd == 15 || a->rn == 15 || a->rt == 15
   7082        || a->rd == a->rn || a->rd == a->rt
   7083        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
   7084        || (mop == MO_64
   7085            && (a->rt2 == 15
   7086                || a->rd == a->rt2
   7087                || (!v8a && s->thumb && a->rt2 == 13)))) {
   7088        unallocated_encoding(s);
   7089        return true;
   7090    }
   7091
   7092    if (rel) {
   7093        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   7094    }
   7095
   7096    addr = tcg_temp_local_new_i32();
   7097    load_reg_var(s, addr, a->rn);
   7098    tcg_gen_addi_i32(addr, addr, a->imm);
   7099
   7100    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
   7101    tcg_temp_free_i32(addr);
   7102    return true;
   7103}
   7104
   7105static bool trans_STREX(DisasContext *s, arg_STREX *a)
   7106{
   7107    if (!ENABLE_ARCH_6) {
   7108        return false;
   7109    }
   7110    return op_strex(s, a, MO_32, false);
   7111}
   7112
   7113static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
   7114{
   7115    if (!ENABLE_ARCH_6K) {
   7116        return false;
   7117    }
   7118    /* We UNDEF for these UNPREDICTABLE cases.  */
   7119    if (a->rt & 1) {
   7120        unallocated_encoding(s);
   7121        return true;
   7122    }
   7123    a->rt2 = a->rt + 1;
   7124    return op_strex(s, a, MO_64, false);
   7125}
   7126
   7127static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
   7128{
   7129    return op_strex(s, a, MO_64, false);
   7130}
   7131
   7132static bool trans_STREXB(DisasContext *s, arg_STREX *a)
   7133{
   7134    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7135        return false;
   7136    }
   7137    return op_strex(s, a, MO_8, false);
   7138}
   7139
   7140static bool trans_STREXH(DisasContext *s, arg_STREX *a)
   7141{
   7142    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7143        return false;
   7144    }
   7145    return op_strex(s, a, MO_16, false);
   7146}
   7147
   7148static bool trans_STLEX(DisasContext *s, arg_STREX *a)
   7149{
   7150    if (!ENABLE_ARCH_8) {
   7151        return false;
   7152    }
   7153    return op_strex(s, a, MO_32, true);
   7154}
   7155
   7156static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
   7157{
   7158    if (!ENABLE_ARCH_8) {
   7159        return false;
   7160    }
   7161    /* We UNDEF for these UNPREDICTABLE cases.  */
   7162    if (a->rt & 1) {
   7163        unallocated_encoding(s);
   7164        return true;
   7165    }
   7166    a->rt2 = a->rt + 1;
   7167    return op_strex(s, a, MO_64, true);
   7168}
   7169
   7170static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
   7171{
   7172    if (!ENABLE_ARCH_8) {
   7173        return false;
   7174    }
   7175    return op_strex(s, a, MO_64, true);
   7176}
   7177
   7178static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
   7179{
   7180    if (!ENABLE_ARCH_8) {
   7181        return false;
   7182    }
   7183    return op_strex(s, a, MO_8, true);
   7184}
   7185
   7186static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
   7187{
   7188    if (!ENABLE_ARCH_8) {
   7189        return false;
   7190    }
   7191    return op_strex(s, a, MO_16, true);
   7192}
   7193
   7194static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
   7195{
   7196    TCGv_i32 addr, tmp;
   7197
   7198    if (!ENABLE_ARCH_8) {
   7199        return false;
   7200    }
   7201    /* We UNDEF for these UNPREDICTABLE cases.  */
   7202    if (a->rn == 15 || a->rt == 15) {
   7203        unallocated_encoding(s);
   7204        return true;
   7205    }
   7206
   7207    addr = load_reg(s, a->rn);
   7208    tmp = load_reg(s, a->rt);
   7209    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   7210    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
   7211    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
   7212
   7213    tcg_temp_free_i32(tmp);
   7214    tcg_temp_free_i32(addr);
   7215    return true;
   7216}
   7217
   7218static bool trans_STL(DisasContext *s, arg_STL *a)
   7219{
   7220    return op_stl(s, a, MO_UL);
   7221}
   7222
   7223static bool trans_STLB(DisasContext *s, arg_STL *a)
   7224{
   7225    return op_stl(s, a, MO_UB);
   7226}
   7227
   7228static bool trans_STLH(DisasContext *s, arg_STL *a)
   7229{
   7230    return op_stl(s, a, MO_UW);
   7231}
   7232
   7233static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
   7234{
   7235    TCGv_i32 addr;
   7236    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
   7237    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
   7238
   7239    /* We UNDEF for these UNPREDICTABLE cases.  */
   7240    if (a->rn == 15 || a->rt == 15
   7241        || (!v8a && s->thumb && a->rt == 13)
   7242        || (mop == MO_64
   7243            && (a->rt2 == 15 || a->rt == a->rt2
   7244                || (!v8a && s->thumb && a->rt2 == 13)))) {
   7245        unallocated_encoding(s);
   7246        return true;
   7247    }
   7248
   7249    addr = tcg_temp_local_new_i32();
   7250    load_reg_var(s, addr, a->rn);
   7251    tcg_gen_addi_i32(addr, addr, a->imm);
   7252
   7253    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
   7254    tcg_temp_free_i32(addr);
   7255
   7256    if (acq) {
   7257        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
   7258    }
   7259    return true;
   7260}
   7261
   7262static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
   7263{
   7264    if (!ENABLE_ARCH_6) {
   7265        return false;
   7266    }
   7267    return op_ldrex(s, a, MO_32, false);
   7268}
   7269
   7270static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
   7271{
   7272    if (!ENABLE_ARCH_6K) {
   7273        return false;
   7274    }
   7275    /* We UNDEF for these UNPREDICTABLE cases.  */
   7276    if (a->rt & 1) {
   7277        unallocated_encoding(s);
   7278        return true;
   7279    }
   7280    a->rt2 = a->rt + 1;
   7281    return op_ldrex(s, a, MO_64, false);
   7282}
   7283
   7284static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
   7285{
   7286    return op_ldrex(s, a, MO_64, false);
   7287}
   7288
   7289static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
   7290{
   7291    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7292        return false;
   7293    }
   7294    return op_ldrex(s, a, MO_8, false);
   7295}
   7296
   7297static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
   7298{
   7299    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7300        return false;
   7301    }
   7302    return op_ldrex(s, a, MO_16, false);
   7303}
   7304
   7305static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
   7306{
   7307    if (!ENABLE_ARCH_8) {
   7308        return false;
   7309    }
   7310    return op_ldrex(s, a, MO_32, true);
   7311}
   7312
   7313static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
   7314{
   7315    if (!ENABLE_ARCH_8) {
   7316        return false;
   7317    }
   7318    /* We UNDEF for these UNPREDICTABLE cases.  */
   7319    if (a->rt & 1) {
   7320        unallocated_encoding(s);
   7321        return true;
   7322    }
   7323    a->rt2 = a->rt + 1;
   7324    return op_ldrex(s, a, MO_64, true);
   7325}
   7326
   7327static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
   7328{
   7329    if (!ENABLE_ARCH_8) {
   7330        return false;
   7331    }
   7332    return op_ldrex(s, a, MO_64, true);
   7333}
   7334
   7335static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
   7336{
   7337    if (!ENABLE_ARCH_8) {
   7338        return false;
   7339    }
   7340    return op_ldrex(s, a, MO_8, true);
   7341}
   7342
   7343static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
   7344{
   7345    if (!ENABLE_ARCH_8) {
   7346        return false;
   7347    }
   7348    return op_ldrex(s, a, MO_16, true);
   7349}
   7350
   7351static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
   7352{
   7353    TCGv_i32 addr, tmp;
   7354
   7355    if (!ENABLE_ARCH_8) {
   7356        return false;
   7357    }
   7358    /* We UNDEF for these UNPREDICTABLE cases.  */
   7359    if (a->rn == 15 || a->rt == 15) {
   7360        unallocated_encoding(s);
   7361        return true;
   7362    }
   7363
   7364    addr = load_reg(s, a->rn);
   7365    tmp = tcg_temp_new_i32();
   7366    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
   7367    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
   7368    tcg_temp_free_i32(addr);
   7369
   7370    store_reg(s, a->rt, tmp);
   7371    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   7372    return true;
   7373}
   7374
   7375static bool trans_LDA(DisasContext *s, arg_LDA *a)
   7376{
   7377    return op_lda(s, a, MO_UL);
   7378}
   7379
   7380static bool trans_LDAB(DisasContext *s, arg_LDA *a)
   7381{
   7382    return op_lda(s, a, MO_UB);
   7383}
   7384
   7385static bool trans_LDAH(DisasContext *s, arg_LDA *a)
   7386{
   7387    return op_lda(s, a, MO_UW);
   7388}
   7389
   7390/*
   7391 * Media instructions
   7392 */
   7393
   7394static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
   7395{
   7396    TCGv_i32 t1, t2;
   7397
   7398    if (!ENABLE_ARCH_6) {
   7399        return false;
   7400    }
   7401
   7402    t1 = load_reg(s, a->rn);
   7403    t2 = load_reg(s, a->rm);
   7404    gen_helper_usad8(t1, t1, t2);
   7405    tcg_temp_free_i32(t2);
   7406    if (a->ra != 15) {
   7407        t2 = load_reg(s, a->ra);
   7408        tcg_gen_add_i32(t1, t1, t2);
   7409        tcg_temp_free_i32(t2);
   7410    }
   7411    store_reg(s, a->rd, t1);
   7412    return true;
   7413}
   7414
   7415static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
   7416{
   7417    TCGv_i32 tmp;
   7418    int width = a->widthm1 + 1;
   7419    int shift = a->lsb;
   7420
   7421    if (!ENABLE_ARCH_6T2) {
   7422        return false;
   7423    }
   7424    if (shift + width > 32) {
   7425        /* UNPREDICTABLE; we choose to UNDEF */
   7426        unallocated_encoding(s);
   7427        return true;
   7428    }
   7429
   7430    tmp = load_reg(s, a->rn);
   7431    if (u) {
   7432        tcg_gen_extract_i32(tmp, tmp, shift, width);
   7433    } else {
   7434        tcg_gen_sextract_i32(tmp, tmp, shift, width);
   7435    }
   7436    store_reg(s, a->rd, tmp);
   7437    return true;
   7438}
   7439
   7440static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
   7441{
   7442    return op_bfx(s, a, false);
   7443}
   7444
   7445static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
   7446{
   7447    return op_bfx(s, a, true);
   7448}
   7449
   7450static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
   7451{
   7452    TCGv_i32 tmp;
   7453    int msb = a->msb, lsb = a->lsb;
   7454    int width;
   7455
   7456    if (!ENABLE_ARCH_6T2) {
   7457        return false;
   7458    }
   7459    if (msb < lsb) {
   7460        /* UNPREDICTABLE; we choose to UNDEF */
   7461        unallocated_encoding(s);
   7462        return true;
   7463    }
   7464
   7465    width = msb + 1 - lsb;
   7466    if (a->rn == 15) {
   7467        /* BFC */
   7468        tmp = tcg_const_i32(0);
   7469    } else {
   7470        /* BFI */
   7471        tmp = load_reg(s, a->rn);
   7472    }
   7473    if (width != 32) {
   7474        TCGv_i32 tmp2 = load_reg(s, a->rd);
   7475        tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
   7476        tcg_temp_free_i32(tmp2);
   7477    }
   7478    store_reg(s, a->rd, tmp);
   7479    return true;
   7480}
   7481
   7482static bool trans_UDF(DisasContext *s, arg_UDF *a)
   7483{
   7484    unallocated_encoding(s);
   7485    return true;
   7486}
   7487
   7488/*
   7489 * Parallel addition and subtraction
   7490 */
   7491
   7492static bool op_par_addsub(DisasContext *s, arg_rrr *a,
   7493                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
   7494{
   7495    TCGv_i32 t0, t1;
   7496
   7497    if (s->thumb
   7498        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7499        : !ENABLE_ARCH_6) {
   7500        return false;
   7501    }
   7502
   7503    t0 = load_reg(s, a->rn);
   7504    t1 = load_reg(s, a->rm);
   7505
   7506    gen(t0, t0, t1);
   7507
   7508    tcg_temp_free_i32(t1);
   7509    store_reg(s, a->rd, t0);
   7510    return true;
   7511}
   7512
   7513static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
   7514                             void (*gen)(TCGv_i32, TCGv_i32,
   7515                                         TCGv_i32, TCGv_ptr))
   7516{
   7517    TCGv_i32 t0, t1;
   7518    TCGv_ptr ge;
   7519
   7520    if (s->thumb
   7521        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7522        : !ENABLE_ARCH_6) {
   7523        return false;
   7524    }
   7525
   7526    t0 = load_reg(s, a->rn);
   7527    t1 = load_reg(s, a->rm);
   7528
   7529    ge = tcg_temp_new_ptr();
   7530    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
   7531    gen(t0, t0, t1, ge);
   7532
   7533    tcg_temp_free_ptr(ge);
   7534    tcg_temp_free_i32(t1);
   7535    store_reg(s, a->rd, t0);
   7536    return true;
   7537}
   7538
   7539#define DO_PAR_ADDSUB(NAME, helper) \
   7540static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
   7541{                                                       \
   7542    return op_par_addsub(s, a, helper);                 \
   7543}
   7544
   7545#define DO_PAR_ADDSUB_GE(NAME, helper) \
   7546static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
   7547{                                                       \
   7548    return op_par_addsub_ge(s, a, helper);              \
   7549}
   7550
   7551DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
   7552DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
   7553DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
   7554DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
   7555DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
   7556DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
   7557
   7558DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
   7559DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
   7560DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
   7561DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
   7562DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
   7563DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
   7564
   7565DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
   7566DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
   7567DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
   7568DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
   7569DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
   7570DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
   7571
   7572DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
   7573DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
   7574DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
   7575DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
   7576DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
   7577DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
   7578
   7579DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
   7580DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
   7581DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
   7582DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
   7583DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
   7584DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
   7585
   7586DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
   7587DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
   7588DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
   7589DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
   7590DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
   7591DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
   7592
   7593#undef DO_PAR_ADDSUB
   7594#undef DO_PAR_ADDSUB_GE
   7595
   7596/*
   7597 * Packing, unpacking, saturation, and reversal
   7598 */
   7599
   7600static bool trans_PKH(DisasContext *s, arg_PKH *a)
   7601{
   7602    TCGv_i32 tn, tm;
   7603    int shift = a->imm;
   7604
   7605    if (s->thumb
   7606        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7607        : !ENABLE_ARCH_6) {
   7608        return false;
   7609    }
   7610
   7611    tn = load_reg(s, a->rn);
   7612    tm = load_reg(s, a->rm);
   7613    if (a->tb) {
   7614        /* PKHTB */
   7615        if (shift == 0) {
   7616            shift = 31;
   7617        }
   7618        tcg_gen_sari_i32(tm, tm, shift);
   7619        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
   7620    } else {
   7621        /* PKHBT */
   7622        tcg_gen_shli_i32(tm, tm, shift);
   7623        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
   7624    }
   7625    tcg_temp_free_i32(tm);
   7626    store_reg(s, a->rd, tn);
   7627    return true;
   7628}
   7629
   7630static bool op_sat(DisasContext *s, arg_sat *a,
   7631                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
   7632{
   7633    TCGv_i32 tmp, satimm;
   7634    int shift = a->imm;
   7635
   7636    if (!ENABLE_ARCH_6) {
   7637        return false;
   7638    }
   7639
   7640    tmp = load_reg(s, a->rn);
   7641    if (a->sh) {
   7642        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
   7643    } else {
   7644        tcg_gen_shli_i32(tmp, tmp, shift);
   7645    }
   7646
   7647    satimm = tcg_const_i32(a->satimm);
   7648    gen(tmp, cpu_env, tmp, satimm);
   7649    tcg_temp_free_i32(satimm);
   7650
   7651    store_reg(s, a->rd, tmp);
   7652    return true;
   7653}
   7654
   7655static bool trans_SSAT(DisasContext *s, arg_sat *a)
   7656{
   7657    return op_sat(s, a, gen_helper_ssat);
   7658}
   7659
   7660static bool trans_USAT(DisasContext *s, arg_sat *a)
   7661{
   7662    return op_sat(s, a, gen_helper_usat);
   7663}
   7664
   7665static bool trans_SSAT16(DisasContext *s, arg_sat *a)
   7666{
   7667    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7668        return false;
   7669    }
   7670    return op_sat(s, a, gen_helper_ssat16);
   7671}
   7672
   7673static bool trans_USAT16(DisasContext *s, arg_sat *a)
   7674{
   7675    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7676        return false;
   7677    }
   7678    return op_sat(s, a, gen_helper_usat16);
   7679}
   7680
   7681static bool op_xta(DisasContext *s, arg_rrr_rot *a,
   7682                   void (*gen_extract)(TCGv_i32, TCGv_i32),
   7683                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
   7684{
   7685    TCGv_i32 tmp;
   7686
   7687    if (!ENABLE_ARCH_6) {
   7688        return false;
   7689    }
   7690
   7691    tmp = load_reg(s, a->rm);
   7692    /*
   7693     * TODO: In many cases we could do a shift instead of a rotate.
   7694     * Combined with a simple extend, that becomes an extract.
   7695     */
   7696    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
   7697    gen_extract(tmp, tmp);
   7698
   7699    if (a->rn != 15) {
   7700        TCGv_i32 tmp2 = load_reg(s, a->rn);
   7701        gen_add(tmp, tmp, tmp2);
   7702        tcg_temp_free_i32(tmp2);
   7703    }
   7704    store_reg(s, a->rd, tmp);
   7705    return true;
   7706}
   7707
   7708static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
   7709{
   7710    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
   7711}
   7712
   7713static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
   7714{
   7715    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
   7716}
   7717
   7718static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
   7719{
   7720    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7721        return false;
   7722    }
   7723    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
   7724}
   7725
   7726static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
   7727{
   7728    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
   7729}
   7730
   7731static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
   7732{
   7733    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
   7734}
   7735
   7736static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
   7737{
   7738    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7739        return false;
   7740    }
   7741    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
   7742}
   7743
   7744static bool trans_SEL(DisasContext *s, arg_rrr *a)
   7745{
   7746    TCGv_i32 t1, t2, t3;
   7747
   7748    if (s->thumb
   7749        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7750        : !ENABLE_ARCH_6) {
   7751        return false;
   7752    }
   7753
   7754    t1 = load_reg(s, a->rn);
   7755    t2 = load_reg(s, a->rm);
   7756    t3 = tcg_temp_new_i32();
   7757    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
   7758    gen_helper_sel_flags(t1, t3, t1, t2);
   7759    tcg_temp_free_i32(t3);
   7760    tcg_temp_free_i32(t2);
   7761    store_reg(s, a->rd, t1);
   7762    return true;
   7763}
   7764
   7765static bool op_rr(DisasContext *s, arg_rr *a,
   7766                  void (*gen)(TCGv_i32, TCGv_i32))
   7767{
   7768    TCGv_i32 tmp;
   7769
   7770    tmp = load_reg(s, a->rm);
   7771    gen(tmp, tmp);
   7772    store_reg(s, a->rd, tmp);
   7773    return true;
   7774}
   7775
   7776static bool trans_REV(DisasContext *s, arg_rr *a)
   7777{
   7778    if (!ENABLE_ARCH_6) {
   7779        return false;
   7780    }
   7781    return op_rr(s, a, tcg_gen_bswap32_i32);
   7782}
   7783
   7784static bool trans_REV16(DisasContext *s, arg_rr *a)
   7785{
   7786    if (!ENABLE_ARCH_6) {
   7787        return false;
   7788    }
   7789    return op_rr(s, a, gen_rev16);
   7790}
   7791
   7792static bool trans_REVSH(DisasContext *s, arg_rr *a)
   7793{
   7794    if (!ENABLE_ARCH_6) {
   7795        return false;
   7796    }
   7797    return op_rr(s, a, gen_revsh);
   7798}
   7799
   7800static bool trans_RBIT(DisasContext *s, arg_rr *a)
   7801{
   7802    if (!ENABLE_ARCH_6T2) {
   7803        return false;
   7804    }
   7805    return op_rr(s, a, gen_helper_rbit);
   7806}
   7807
   7808/*
   7809 * Signed multiply, signed and unsigned divide
   7810 */
   7811
   7812static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
   7813{
   7814    TCGv_i32 t1, t2;
   7815
   7816    if (!ENABLE_ARCH_6) {
   7817        return false;
   7818    }
   7819
   7820    t1 = load_reg(s, a->rn);
   7821    t2 = load_reg(s, a->rm);
   7822    if (m_swap) {
   7823        gen_swap_half(t2, t2);
   7824    }
   7825    gen_smul_dual(t1, t2);
   7826
   7827    if (sub) {
   7828        /*
   7829         * This subtraction cannot overflow, so we can do a simple
   7830         * 32-bit subtraction and then a possible 32-bit saturating
   7831         * addition of Ra.
   7832         */
   7833        tcg_gen_sub_i32(t1, t1, t2);
   7834        tcg_temp_free_i32(t2);
   7835
   7836        if (a->ra != 15) {
   7837            t2 = load_reg(s, a->ra);
   7838            gen_helper_add_setq(t1, cpu_env, t1, t2);
   7839            tcg_temp_free_i32(t2);
   7840        }
   7841    } else if (a->ra == 15) {
   7842        /* Single saturation-checking addition */
   7843        gen_helper_add_setq(t1, cpu_env, t1, t2);
   7844        tcg_temp_free_i32(t2);
   7845    } else {
   7846        /*
   7847         * We need to add the products and Ra together and then
   7848         * determine whether the final result overflowed. Doing
   7849         * this as two separate add-and-check-overflow steps incorrectly
   7850         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
   7851         * Do all the arithmetic at 64-bits and then check for overflow.
   7852         */
   7853        TCGv_i64 p64, q64;
   7854        TCGv_i32 t3, qf, one;
   7855
   7856        p64 = tcg_temp_new_i64();
   7857        q64 = tcg_temp_new_i64();
   7858        tcg_gen_ext_i32_i64(p64, t1);
   7859        tcg_gen_ext_i32_i64(q64, t2);
   7860        tcg_gen_add_i64(p64, p64, q64);
   7861        load_reg_var(s, t2, a->ra);
   7862        tcg_gen_ext_i32_i64(q64, t2);
   7863        tcg_gen_add_i64(p64, p64, q64);
   7864        tcg_temp_free_i64(q64);
   7865
   7866        tcg_gen_extr_i64_i32(t1, t2, p64);
   7867        tcg_temp_free_i64(p64);
   7868        /*
   7869         * t1 is the low half of the result which goes into Rd.
   7870         * We have overflow and must set Q if the high half (t2)
   7871         * is different from the sign-extension of t1.
   7872         */
   7873        t3 = tcg_temp_new_i32();
   7874        tcg_gen_sari_i32(t3, t1, 31);
   7875        qf = load_cpu_field(QF);
   7876        one = tcg_const_i32(1);
   7877        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
   7878        store_cpu_field(qf, QF);
   7879        tcg_temp_free_i32(one);
   7880        tcg_temp_free_i32(t3);
   7881        tcg_temp_free_i32(t2);
   7882    }
   7883    store_reg(s, a->rd, t1);
   7884    return true;
   7885}
   7886
   7887static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
   7888{
   7889    return op_smlad(s, a, false, false);
   7890}
   7891
   7892static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
   7893{
   7894    return op_smlad(s, a, true, false);
   7895}
   7896
   7897static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
   7898{
   7899    return op_smlad(s, a, false, true);
   7900}
   7901
   7902static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
   7903{
   7904    return op_smlad(s, a, true, true);
   7905}
   7906
   7907static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
   7908{
   7909    TCGv_i32 t1, t2;
   7910    TCGv_i64 l1, l2;
   7911
   7912    if (!ENABLE_ARCH_6) {
   7913        return false;
   7914    }
   7915
   7916    t1 = load_reg(s, a->rn);
   7917    t2 = load_reg(s, a->rm);
   7918    if (m_swap) {
   7919        gen_swap_half(t2, t2);
   7920    }
   7921    gen_smul_dual(t1, t2);
   7922
   7923    l1 = tcg_temp_new_i64();
   7924    l2 = tcg_temp_new_i64();
   7925    tcg_gen_ext_i32_i64(l1, t1);
   7926    tcg_gen_ext_i32_i64(l2, t2);
   7927    tcg_temp_free_i32(t1);
   7928    tcg_temp_free_i32(t2);
   7929
   7930    if (sub) {
   7931        tcg_gen_sub_i64(l1, l1, l2);
   7932    } else {
   7933        tcg_gen_add_i64(l1, l1, l2);
   7934    }
   7935    tcg_temp_free_i64(l2);
   7936
   7937    gen_addq(s, l1, a->ra, a->rd);
   7938    gen_storeq_reg(s, a->ra, a->rd, l1);
   7939    tcg_temp_free_i64(l1);
   7940    return true;
   7941}
   7942
   7943static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
   7944{
   7945    return op_smlald(s, a, false, false);
   7946}
   7947
   7948static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
   7949{
   7950    return op_smlald(s, a, true, false);
   7951}
   7952
   7953static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
   7954{
   7955    return op_smlald(s, a, false, true);
   7956}
   7957
   7958static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
   7959{
   7960    return op_smlald(s, a, true, true);
   7961}
   7962
   7963static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
   7964{
   7965    TCGv_i32 t1, t2;
   7966
   7967    if (s->thumb
   7968        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7969        : !ENABLE_ARCH_6) {
   7970        return false;
   7971    }
   7972
   7973    t1 = load_reg(s, a->rn);
   7974    t2 = load_reg(s, a->rm);
   7975    tcg_gen_muls2_i32(t2, t1, t1, t2);
   7976
   7977    if (a->ra != 15) {
   7978        TCGv_i32 t3 = load_reg(s, a->ra);
   7979        if (sub) {
   7980            /*
   7981             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
   7982             * a non-zero multiplicand lowpart, and the correct result
   7983             * lowpart for rounding.
   7984             */
   7985            TCGv_i32 zero = tcg_const_i32(0);
   7986            tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
   7987            tcg_temp_free_i32(zero);
   7988        } else {
   7989            tcg_gen_add_i32(t1, t1, t3);
   7990        }
   7991        tcg_temp_free_i32(t3);
   7992    }
   7993    if (round) {
   7994        /*
   7995         * Adding 0x80000000 to the 64-bit quantity means that we have
   7996         * carry in to the high word when the low word has the msb set.
   7997         */
   7998        tcg_gen_shri_i32(t2, t2, 31);
   7999        tcg_gen_add_i32(t1, t1, t2);
   8000    }
   8001    tcg_temp_free_i32(t2);
   8002    store_reg(s, a->rd, t1);
   8003    return true;
   8004}
   8005
   8006static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
   8007{
   8008    return op_smmla(s, a, false, false);
   8009}
   8010
   8011static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
   8012{
   8013    return op_smmla(s, a, true, false);
   8014}
   8015
   8016static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
   8017{
   8018    return op_smmla(s, a, false, true);
   8019}
   8020
   8021static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
   8022{
   8023    return op_smmla(s, a, true, true);
   8024}
   8025
   8026static bool op_div(DisasContext *s, arg_rrr *a, bool u)
   8027{
   8028    TCGv_i32 t1, t2;
   8029
   8030    if (s->thumb
   8031        ? !dc_isar_feature(aa32_thumb_div, s)
   8032        : !dc_isar_feature(aa32_arm_div, s)) {
   8033        return false;
   8034    }
   8035
   8036    t1 = load_reg(s, a->rn);
   8037    t2 = load_reg(s, a->rm);
   8038    if (u) {
   8039        gen_helper_udiv(t1, cpu_env, t1, t2);
   8040    } else {
   8041        gen_helper_sdiv(t1, cpu_env, t1, t2);
   8042    }
   8043    tcg_temp_free_i32(t2);
   8044    store_reg(s, a->rd, t1);
   8045    return true;
   8046}
   8047
   8048static bool trans_SDIV(DisasContext *s, arg_rrr *a)
   8049{
   8050    return op_div(s, a, false);
   8051}
   8052
   8053static bool trans_UDIV(DisasContext *s, arg_rrr *a)
   8054{
   8055    return op_div(s, a, true);
   8056}
   8057
   8058/*
   8059 * Block data transfer
   8060 */
   8061
   8062static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
   8063{
   8064    TCGv_i32 addr = load_reg(s, a->rn);
   8065
   8066    if (a->b) {
   8067        if (a->i) {
   8068            /* pre increment */
   8069            tcg_gen_addi_i32(addr, addr, 4);
   8070        } else {
   8071            /* pre decrement */
   8072            tcg_gen_addi_i32(addr, addr, -(n * 4));
   8073        }
   8074    } else if (!a->i && n != 1) {
   8075        /* post decrement */
   8076        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
   8077    }
   8078
   8079    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
   8080        /*
   8081         * If the writeback is incrementing SP rather than
   8082         * decrementing it, and the initial SP is below the
   8083         * stack limit but the final written-back SP would
   8084         * be above, then then we must not perform any memory
   8085         * accesses, but it is IMPDEF whether we generate
   8086         * an exception. We choose to do so in this case.
   8087         * At this point 'addr' is the lowest address, so
   8088         * either the original SP (if incrementing) or our
   8089         * final SP (if decrementing), so that's what we check.
   8090         */
   8091        gen_helper_v8m_stackcheck(cpu_env, addr);
   8092    }
   8093
   8094    return addr;
   8095}
   8096
   8097static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
   8098                               TCGv_i32 addr, int n)
   8099{
   8100    if (a->w) {
   8101        /* write back */
   8102        if (!a->b) {
   8103            if (a->i) {
   8104                /* post increment */
   8105                tcg_gen_addi_i32(addr, addr, 4);
   8106            } else {
   8107                /* post decrement */
   8108                tcg_gen_addi_i32(addr, addr, -(n * 4));
   8109            }
   8110        } else if (!a->i && n != 1) {
   8111            /* pre decrement */
   8112            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
   8113        }
   8114        store_reg(s, a->rn, addr);
   8115    } else {
   8116        tcg_temp_free_i32(addr);
   8117    }
   8118}
   8119
   8120static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
   8121{
   8122    int i, j, n, list, mem_idx;
   8123    bool user = a->u;
   8124    TCGv_i32 addr, tmp, tmp2;
   8125
   8126    if (user) {
   8127        /* STM (user) */
   8128        if (IS_USER(s)) {
   8129            /* Only usable in supervisor mode.  */
   8130            unallocated_encoding(s);
   8131            return true;
   8132        }
   8133    }
   8134
   8135    list = a->list;
   8136    n = ctpop16(list);
   8137    if (n < min_n || a->rn == 15) {
   8138        unallocated_encoding(s);
   8139        return true;
   8140    }
   8141
   8142    s->eci_handled = true;
   8143
   8144    addr = op_addr_block_pre(s, a, n);
   8145    mem_idx = get_mem_index(s);
   8146
   8147    for (i = j = 0; i < 16; i++) {
   8148        if (!(list & (1 << i))) {
   8149            continue;
   8150        }
   8151
   8152        if (user && i != 15) {
   8153            tmp = tcg_temp_new_i32();
   8154            tmp2 = tcg_const_i32(i);
   8155            gen_helper_get_user_reg(tmp, cpu_env, tmp2);
   8156            tcg_temp_free_i32(tmp2);
   8157        } else {
   8158            tmp = load_reg(s, i);
   8159        }
   8160        gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   8161        tcg_temp_free_i32(tmp);
   8162
   8163        /* No need to add after the last transfer.  */
   8164        if (++j != n) {
   8165            tcg_gen_addi_i32(addr, addr, 4);
   8166        }
   8167    }
   8168
   8169    op_addr_block_post(s, a, addr, n);
   8170    clear_eci_state(s);
   8171    return true;
   8172}
   8173
   8174static bool trans_STM(DisasContext *s, arg_ldst_block *a)
   8175{
   8176    /* BitCount(list) < 1 is UNPREDICTABLE */
   8177    return op_stm(s, a, 1);
   8178}
   8179
   8180static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
   8181{
   8182    /* Writeback register in register list is UNPREDICTABLE for T32.  */
   8183    if (a->w && (a->list & (1 << a->rn))) {
   8184        unallocated_encoding(s);
   8185        return true;
   8186    }
   8187    /* BitCount(list) < 2 is UNPREDICTABLE */
   8188    return op_stm(s, a, 2);
   8189}
   8190
   8191static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
   8192{
   8193    int i, j, n, list, mem_idx;
   8194    bool loaded_base;
   8195    bool user = a->u;
   8196    bool exc_return = false;
   8197    TCGv_i32 addr, tmp, tmp2, loaded_var;
   8198
   8199    if (user) {
   8200        /* LDM (user), LDM (exception return) */
   8201        if (IS_USER(s)) {
   8202            /* Only usable in supervisor mode.  */
   8203            unallocated_encoding(s);
   8204            return true;
   8205        }
   8206        if (extract32(a->list, 15, 1)) {
   8207            exc_return = true;
   8208            user = false;
   8209        } else {
   8210            /* LDM (user) does not allow writeback.  */
   8211            if (a->w) {
   8212                unallocated_encoding(s);
   8213                return true;
   8214            }
   8215        }
   8216    }
   8217
   8218    list = a->list;
   8219    n = ctpop16(list);
   8220    if (n < min_n || a->rn == 15) {
   8221        unallocated_encoding(s);
   8222        return true;
   8223    }
   8224
   8225    s->eci_handled = true;
   8226
   8227    addr = op_addr_block_pre(s, a, n);
   8228    mem_idx = get_mem_index(s);
   8229    loaded_base = false;
   8230    loaded_var = NULL;
   8231
   8232    for (i = j = 0; i < 16; i++) {
   8233        if (!(list & (1 << i))) {
   8234            continue;
   8235        }
   8236
   8237        tmp = tcg_temp_new_i32();
   8238        gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   8239        if (user) {
   8240            tmp2 = tcg_const_i32(i);
   8241            gen_helper_set_user_reg(cpu_env, tmp2, tmp);
   8242            tcg_temp_free_i32(tmp2);
   8243            tcg_temp_free_i32(tmp);
   8244        } else if (i == a->rn) {
   8245            loaded_var = tmp;
   8246            loaded_base = true;
   8247        } else if (i == 15 && exc_return) {
   8248            store_pc_exc_ret(s, tmp);
   8249        } else {
   8250            store_reg_from_load(s, i, tmp);
   8251        }
   8252
   8253        /* No need to add after the last transfer.  */
   8254        if (++j != n) {
   8255            tcg_gen_addi_i32(addr, addr, 4);
   8256        }
   8257    }
   8258
   8259    op_addr_block_post(s, a, addr, n);
   8260
   8261    if (loaded_base) {
   8262        /* Note that we reject base == pc above.  */
   8263        store_reg(s, a->rn, loaded_var);
   8264    }
   8265
   8266    if (exc_return) {
   8267        /* Restore CPSR from SPSR.  */
   8268        tmp = load_cpu_field(spsr);
   8269        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
   8270            gen_io_start();
   8271        }
   8272        gen_helper_cpsr_write_eret(cpu_env, tmp);
   8273        tcg_temp_free_i32(tmp);
   8274        /* Must exit loop to check un-masked IRQs */
   8275        s->base.is_jmp = DISAS_EXIT;
   8276    }
   8277    clear_eci_state(s);
   8278    return true;
   8279}
   8280
   8281static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
   8282{
   8283    /*
   8284     * Writeback register in register list is UNPREDICTABLE
   8285     * for ArchVersion() >= 7.  Prior to v7, A32 would write
   8286     * an UNKNOWN value to the base register.
   8287     */
   8288    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
   8289        unallocated_encoding(s);
   8290        return true;
   8291    }
   8292    /* BitCount(list) < 1 is UNPREDICTABLE */
   8293    return do_ldm(s, a, 1);
   8294}
   8295
   8296static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
   8297{
   8298    /* Writeback register in register list is UNPREDICTABLE for T32. */
   8299    if (a->w && (a->list & (1 << a->rn))) {
   8300        unallocated_encoding(s);
   8301        return true;
   8302    }
   8303    /* BitCount(list) < 2 is UNPREDICTABLE */
   8304    return do_ldm(s, a, 2);
   8305}
   8306
   8307static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
   8308{
   8309    /* Writeback is conditional on the base register not being loaded.  */
   8310    a->w = !(a->list & (1 << a->rn));
   8311    /* BitCount(list) < 1 is UNPREDICTABLE */
   8312    return do_ldm(s, a, 1);
   8313}
   8314
   8315static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
   8316{
   8317    int i;
   8318    TCGv_i32 zero;
   8319
   8320    if (!dc_isar_feature(aa32_m_sec_state, s)) {
   8321        return false;
   8322    }
   8323
   8324    if (extract32(a->list, 13, 1)) {
   8325        return false;
   8326    }
   8327
   8328    if (!a->list) {
   8329        /* UNPREDICTABLE; we choose to UNDEF */
   8330        return false;
   8331    }
   8332
   8333    s->eci_handled = true;
   8334
   8335    zero = tcg_const_i32(0);
   8336    for (i = 0; i < 15; i++) {
   8337        if (extract32(a->list, i, 1)) {
   8338            /* Clear R[i] */
   8339            tcg_gen_mov_i32(cpu_R[i], zero);
   8340        }
   8341    }
   8342    if (extract32(a->list, 15, 1)) {
   8343        /*
   8344         * Clear APSR (by calling the MSR helper with the same argument
   8345         * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
   8346         */
   8347        TCGv_i32 maskreg = tcg_const_i32(0xc << 8);
   8348        gen_helper_v7m_msr(cpu_env, maskreg, zero);
   8349        tcg_temp_free_i32(maskreg);
   8350    }
   8351    tcg_temp_free_i32(zero);
   8352    clear_eci_state(s);
   8353    return true;
   8354}
   8355
   8356/*
   8357 * Branch, branch with link
   8358 */
   8359
   8360static bool trans_B(DisasContext *s, arg_i *a)
   8361{
   8362    gen_jmp(s, read_pc(s) + a->imm);
   8363    return true;
   8364}
   8365
   8366static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
   8367{
   8368    /* This has cond from encoding, required to be outside IT block.  */
   8369    if (a->cond >= 0xe) {
   8370        return false;
   8371    }
   8372    if (s->condexec_mask) {
   8373        unallocated_encoding(s);
   8374        return true;
   8375    }
   8376    arm_skip_unless(s, a->cond);
   8377    gen_jmp(s, read_pc(s) + a->imm);
   8378    return true;
   8379}
   8380
   8381static bool trans_BL(DisasContext *s, arg_i *a)
   8382{
   8383    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
   8384    gen_jmp(s, read_pc(s) + a->imm);
   8385    return true;
   8386}
   8387
   8388static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
   8389{
   8390    TCGv_i32 tmp;
   8391
   8392    /*
   8393     * BLX <imm> would be useless on M-profile; the encoding space
   8394     * is used for other insns from v8.1M onward, and UNDEFs before that.
   8395     */
   8396    if (arm_dc_feature(s, ARM_FEATURE_M)) {
   8397        return false;
   8398    }
   8399
   8400    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
   8401    if (s->thumb && (a->imm & 2)) {
   8402        return false;
   8403    }
   8404    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
   8405    tmp = tcg_const_i32(!s->thumb);
   8406    store_cpu_field(tmp, thumb);
   8407    gen_jmp(s, (read_pc(s) & ~3) + a->imm);
   8408    return true;
   8409}
   8410
   8411static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
   8412{
   8413    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
   8414    tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
   8415    return true;
   8416}
   8417
   8418static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
   8419{
   8420    TCGv_i32 tmp = tcg_temp_new_i32();
   8421
   8422    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
   8423    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
   8424    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
   8425    gen_bx(s, tmp);
   8426    return true;
   8427}
   8428
   8429static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
   8430{
   8431    TCGv_i32 tmp;
   8432
   8433    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
   8434    if (!ENABLE_ARCH_5) {
   8435        return false;
   8436    }
   8437    tmp = tcg_temp_new_i32();
   8438    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
   8439    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
   8440    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
   8441    gen_bx(s, tmp);
   8442    return true;
   8443}
   8444
   8445static bool trans_BF(DisasContext *s, arg_BF *a)
   8446{
   8447    /*
   8448     * M-profile branch future insns. The architecture permits an
   8449     * implementation to implement these as NOPs (equivalent to
   8450     * discarding the LO_BRANCH_INFO cache immediately), and we
   8451     * take that IMPDEF option because for QEMU a "real" implementation
   8452     * would be complicated and wouldn't execute any faster.
   8453     */
   8454    if (!dc_isar_feature(aa32_lob, s)) {
   8455        return false;
   8456    }
   8457    if (a->boff == 0) {
   8458        /* SEE "Related encodings" (loop insns) */
   8459        return false;
   8460    }
   8461    /* Handle as NOP */
   8462    return true;
   8463}
   8464
   8465static bool trans_DLS(DisasContext *s, arg_DLS *a)
   8466{
   8467    /* M-profile low-overhead loop start */
   8468    TCGv_i32 tmp;
   8469
   8470    if (!dc_isar_feature(aa32_lob, s)) {
   8471        return false;
   8472    }
   8473    if (a->rn == 13 || a->rn == 15) {
   8474        /*
   8475         * For DLSTP rn == 15 is a related encoding (LCTP); the
   8476         * other cases caught by this condition are all
   8477         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
   8478         */
   8479        return false;
   8480    }
   8481
   8482    if (a->size != 4) {
   8483        /* DLSTP */
   8484        if (!dc_isar_feature(aa32_mve, s)) {
   8485            return false;
   8486        }
   8487        if (!vfp_access_check(s)) {
   8488            return true;
   8489        }
   8490    }
   8491
   8492    /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
   8493    tmp = load_reg(s, a->rn);
   8494    store_reg(s, 14, tmp);
   8495    if (a->size != 4) {
   8496        /* DLSTP: set FPSCR.LTPSIZE */
   8497        tmp = tcg_const_i32(a->size);
   8498        store_cpu_field(tmp, v7m.ltpsize);
   8499        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   8500    }
   8501    return true;
   8502}
   8503
   8504static bool trans_WLS(DisasContext *s, arg_WLS *a)
   8505{
   8506    /* M-profile low-overhead while-loop start */
   8507    TCGv_i32 tmp;
   8508    TCGLabel *nextlabel;
   8509
   8510    if (!dc_isar_feature(aa32_lob, s)) {
   8511        return false;
   8512    }
   8513    if (a->rn == 13 || a->rn == 15) {
   8514        /*
   8515         * For WLSTP rn == 15 is a related encoding (LE); the
   8516         * other cases caught by this condition are all
   8517         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
   8518         */
   8519        return false;
   8520    }
   8521    if (s->condexec_mask) {
   8522        /*
   8523         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
   8524         * we choose to UNDEF, because otherwise our use of
   8525         * gen_goto_tb(1) would clash with the use of TB exit 1
   8526         * in the dc->condjmp condition-failed codepath in
   8527         * arm_tr_tb_stop() and we'd get an assertion.
   8528         */
   8529        return false;
   8530    }
   8531    if (a->size != 4) {
   8532        /* WLSTP */
   8533        if (!dc_isar_feature(aa32_mve, s)) {
   8534            return false;
   8535        }
   8536        /*
   8537         * We need to check that the FPU is enabled here, but mustn't
   8538         * call vfp_access_check() to do that because we don't want to
   8539         * do the lazy state preservation in the "loop count is zero" case.
   8540         * Do the check-and-raise-exception by hand.
   8541         */
   8542        if (s->fp_excp_el) {
   8543            gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
   8544                               syn_uncategorized(), s->fp_excp_el);
   8545            return true;
   8546        }
   8547    }
   8548
   8549    nextlabel = gen_new_label();
   8550    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
   8551    tmp = load_reg(s, a->rn);
   8552    store_reg(s, 14, tmp);
   8553    if (a->size != 4) {
   8554        /*
   8555         * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
   8556         * lazy state preservation, new FP context creation, etc,
   8557         * that vfp_access_check() does. We know that the actual
   8558         * access check will succeed (ie it won't generate code that
   8559         * throws an exception) because we did that check by hand earlier.
   8560         */
   8561        bool ok = vfp_access_check(s);
   8562        assert(ok);
   8563        tmp = tcg_const_i32(a->size);
   8564        store_cpu_field(tmp, v7m.ltpsize);
   8565        /*
   8566         * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
   8567         * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
   8568         */
   8569    }
   8570    gen_jmp_tb(s, s->base.pc_next, 1);
   8571
   8572    gen_set_label(nextlabel);
   8573    gen_jmp(s, read_pc(s) + a->imm);
   8574    return true;
   8575}
   8576
   8577static bool trans_LE(DisasContext *s, arg_LE *a)
   8578{
   8579    /*
   8580     * M-profile low-overhead loop end. The architecture permits an
   8581     * implementation to discard the LO_BRANCH_INFO cache at any time,
   8582     * and we take the IMPDEF option to never set it in the first place
   8583     * (equivalent to always discarding it immediately), because for QEMU
   8584     * a "real" implementation would be complicated and wouldn't execute
   8585     * any faster.
   8586     */
   8587    TCGv_i32 tmp;
   8588    TCGLabel *loopend;
   8589    bool fpu_active;
   8590
   8591    if (!dc_isar_feature(aa32_lob, s)) {
   8592        return false;
   8593    }
   8594    if (a->f && a->tp) {
   8595        return false;
   8596    }
   8597    if (s->condexec_mask) {
   8598        /*
   8599         * LE in an IT block is CONSTRAINED UNPREDICTABLE;
   8600         * we choose to UNDEF, because otherwise our use of
   8601         * gen_goto_tb(1) would clash with the use of TB exit 1
   8602         * in the dc->condjmp condition-failed codepath in
   8603         * arm_tr_tb_stop() and we'd get an assertion.
   8604         */
   8605        return false;
   8606    }
   8607    if (a->tp) {
   8608        /* LETP */
   8609        if (!dc_isar_feature(aa32_mve, s)) {
   8610            return false;
   8611        }
   8612        if (!vfp_access_check(s)) {
   8613            s->eci_handled = true;
   8614            return true;
   8615        }
   8616    }
   8617
   8618    /* LE/LETP is OK with ECI set and leaves it untouched */
   8619    s->eci_handled = true;
   8620
   8621    /*
   8622     * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
   8623     * UsageFault exception for the LE insn in that case. Note that we
   8624     * are not directly checking FPSCR.LTPSIZE but instead check the
   8625     * pseudocode LTPSIZE() function, which returns 4 if the FPU is
   8626     * not currently active (ie ActiveFPState() returns false). We
   8627     * can identify not-active purely from our TB state flags, as the
   8628     * FPU is active only if:
   8629     *  the FPU is enabled
   8630     *  AND lazy state preservation is not active
   8631     *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
   8632     *
   8633     * Usually we don't need to care about this distinction between
   8634     * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
   8635     * will either take an exception or clear the conditions that make
   8636     * the FPU not active. But LE is an unusual case of a non-FP insn
   8637     * that looks at LTPSIZE.
   8638     */
   8639    fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
   8640
   8641    if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
   8642        /* Need to do a runtime check for LTPSIZE != 4 */
   8643        TCGLabel *skipexc = gen_new_label();
   8644        tmp = load_cpu_field(v7m.ltpsize);
   8645        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
   8646        tcg_temp_free_i32(tmp);
   8647        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
   8648                           default_exception_el(s));
   8649        gen_set_label(skipexc);
   8650    }
   8651
   8652    if (a->f) {
   8653        /* Loop-forever: just jump back to the loop start */
   8654        gen_jmp(s, read_pc(s) - a->imm);
   8655        return true;
   8656    }
   8657
   8658    /*
   8659     * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
   8660     * For LE, we know at this point that LTPSIZE must be 4 and the
   8661     * loop decrement value is 1. For LETP we need to calculate the decrement
   8662     * value from LTPSIZE.
   8663     */
   8664    loopend = gen_new_label();
   8665    if (!a->tp) {
   8666        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
   8667        tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
   8668    } else {
   8669        /*
   8670         * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
   8671         * so that decr stays live after the brcondi.
   8672         */
   8673        TCGv_i32 decr = tcg_temp_local_new_i32();
   8674        TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
   8675        tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
   8676        tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
   8677        tcg_temp_free_i32(ltpsize);
   8678
   8679        tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
   8680
   8681        tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
   8682        tcg_temp_free_i32(decr);
   8683    }
   8684    /* Jump back to the loop start */
   8685    gen_jmp(s, read_pc(s) - a->imm);
   8686
   8687    gen_set_label(loopend);
   8688    if (a->tp) {
   8689        /* Exits from tail-pred loops must reset LTPSIZE to 4 */
   8690        tmp = tcg_const_i32(4);
   8691        store_cpu_field(tmp, v7m.ltpsize);
   8692    }
   8693    /* End TB, continuing to following insn */
   8694    gen_jmp_tb(s, s->base.pc_next, 1);
   8695    return true;
   8696}
   8697
   8698static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
   8699{
   8700    /*
   8701     * M-profile Loop Clear with Tail Predication. Since our implementation
   8702     * doesn't cache branch information, all we need to do is reset
   8703     * FPSCR.LTPSIZE to 4.
   8704     */
   8705    TCGv_i32 ltpsize;
   8706
   8707    if (!dc_isar_feature(aa32_lob, s) ||
   8708        !dc_isar_feature(aa32_mve, s)) {
   8709        return false;
   8710    }
   8711
   8712    if (!vfp_access_check(s)) {
   8713        return true;
   8714    }
   8715
   8716    ltpsize = tcg_const_i32(4);
   8717    store_cpu_field(ltpsize, v7m.ltpsize);
   8718    return true;
   8719}
   8720
   8721static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
   8722{
   8723    /*
   8724     * M-profile Create Vector Tail Predicate. This insn is itself
   8725     * predicated and is subject to beatwise execution.
   8726     */
   8727    TCGv_i32 rn_shifted, masklen;
   8728
   8729    if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
   8730        return false;
   8731    }
   8732
   8733    if (!mve_eci_check(s) || !vfp_access_check(s)) {
   8734        return true;
   8735    }
   8736
   8737    /*
   8738     * We pre-calculate the mask length here to avoid having
   8739     * to have multiple helpers specialized for size.
   8740     * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
   8741     */
   8742    rn_shifted = tcg_temp_new_i32();
   8743    masklen = load_reg(s, a->rn);
   8744    tcg_gen_shli_i32(rn_shifted, masklen, a->size);
   8745    tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
   8746                        masklen, tcg_constant_i32(1 << (4 - a->size)),
   8747                        rn_shifted, tcg_constant_i32(16));
   8748    gen_helper_mve_vctp(cpu_env, masklen);
   8749    tcg_temp_free_i32(masklen);
   8750    tcg_temp_free_i32(rn_shifted);
   8751    /* This insn updates predication bits */
   8752    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   8753    mve_update_eci(s);
   8754    return true;
   8755}
   8756
   8757static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
   8758{
   8759    TCGv_i32 addr, tmp;
   8760
   8761    tmp = load_reg(s, a->rm);
   8762    if (half) {
   8763        tcg_gen_add_i32(tmp, tmp, tmp);
   8764    }
   8765    addr = load_reg(s, a->rn);
   8766    tcg_gen_add_i32(addr, addr, tmp);
   8767
   8768    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
   8769    tcg_temp_free_i32(addr);
   8770
   8771    tcg_gen_add_i32(tmp, tmp, tmp);
   8772    tcg_gen_addi_i32(tmp, tmp, read_pc(s));
   8773    store_reg(s, 15, tmp);
   8774    return true;
   8775}
   8776
   8777static bool trans_TBB(DisasContext *s, arg_tbranch *a)
   8778{
   8779    return op_tbranch(s, a, false);
   8780}
   8781
   8782static bool trans_TBH(DisasContext *s, arg_tbranch *a)
   8783{
   8784    return op_tbranch(s, a, true);
   8785}
   8786
   8787static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
   8788{
   8789    TCGv_i32 tmp = load_reg(s, a->rn);
   8790
   8791    arm_gen_condlabel(s);
   8792    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
   8793                        tmp, 0, s->condlabel);
   8794    tcg_temp_free_i32(tmp);
   8795    gen_jmp(s, read_pc(s) + a->imm);
   8796    return true;
   8797}
   8798
   8799/*
   8800 * Supervisor call - both T32 & A32 come here so we need to check
   8801 * which mode we are in when checking for semihosting.
   8802 */
   8803
   8804static bool trans_SVC(DisasContext *s, arg_SVC *a)
   8805{
   8806    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
   8807
   8808    if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
   8809#ifndef CONFIG_USER_ONLY
   8810        !IS_USER(s) &&
   8811#endif
   8812        (a->imm == semihost_imm)) {
   8813        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
   8814    } else {
   8815        gen_set_pc_im(s, s->base.pc_next);
   8816        s->svc_imm = a->imm;
   8817        s->base.is_jmp = DISAS_SWI;
   8818    }
   8819    return true;
   8820}
   8821
   8822/*
   8823 * Unconditional system instructions
   8824 */
   8825
   8826static bool trans_RFE(DisasContext *s, arg_RFE *a)
   8827{
   8828    static const int8_t pre_offset[4] = {
   8829        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
   8830    };
   8831    static const int8_t post_offset[4] = {
   8832        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
   8833    };
   8834    TCGv_i32 addr, t1, t2;
   8835
   8836    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
   8837        return false;
   8838    }
   8839    if (IS_USER(s)) {
   8840        unallocated_encoding(s);
   8841        return true;
   8842    }
   8843
   8844    addr = load_reg(s, a->rn);
   8845    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
   8846
   8847    /* Load PC into tmp and CPSR into tmp2.  */
   8848    t1 = tcg_temp_new_i32();
   8849    gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   8850    tcg_gen_addi_i32(addr, addr, 4);
   8851    t2 = tcg_temp_new_i32();
   8852    gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   8853
   8854    if (a->w) {
   8855        /* Base writeback.  */
   8856        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
   8857        store_reg(s, a->rn, addr);
   8858    } else {
   8859        tcg_temp_free_i32(addr);
   8860    }
   8861    gen_rfe(s, t1, t2);
   8862    return true;
   8863}
   8864
   8865static bool trans_SRS(DisasContext *s, arg_SRS *a)
   8866{
   8867    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
   8868        return false;
   8869    }
   8870    gen_srs(s, a->mode, a->pu, a->w);
   8871    return true;
   8872}
   8873
   8874static bool trans_CPS(DisasContext *s, arg_CPS *a)
   8875{
   8876    uint32_t mask, val;
   8877
   8878    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
   8879        return false;
   8880    }
   8881    if (IS_USER(s)) {
   8882        /* Implemented as NOP in user mode.  */
   8883        return true;
   8884    }
   8885    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
   8886
   8887    mask = val = 0;
   8888    if (a->imod & 2) {
   8889        if (a->A) {
   8890            mask |= CPSR_A;
   8891        }
   8892        if (a->I) {
   8893            mask |= CPSR_I;
   8894        }
   8895        if (a->F) {
   8896            mask |= CPSR_F;
   8897        }
   8898        if (a->imod & 1) {
   8899            val |= mask;
   8900        }
   8901    }
   8902    if (a->M) {
   8903        mask |= CPSR_M;
   8904        val |= a->mode;
   8905    }
   8906    if (mask) {
   8907        gen_set_psr_im(s, mask, 0, val);
   8908    }
   8909    return true;
   8910}
   8911
   8912static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
   8913{
   8914    TCGv_i32 tmp, addr, el;
   8915
   8916    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
   8917        return false;
   8918    }
   8919    if (IS_USER(s)) {
   8920        /* Implemented as NOP in user mode.  */
   8921        return true;
   8922    }
   8923
   8924    tmp = tcg_const_i32(a->im);
   8925    /* FAULTMASK */
   8926    if (a->F) {
   8927        addr = tcg_const_i32(19);
   8928        gen_helper_v7m_msr(cpu_env, addr, tmp);
   8929        tcg_temp_free_i32(addr);
   8930    }
   8931    /* PRIMASK */
   8932    if (a->I) {
   8933        addr = tcg_const_i32(16);
   8934        gen_helper_v7m_msr(cpu_env, addr, tmp);
   8935        tcg_temp_free_i32(addr);
   8936    }
   8937    el = tcg_const_i32(s->current_el);
   8938    gen_helper_rebuild_hflags_m32(cpu_env, el);
   8939    tcg_temp_free_i32(el);
   8940    tcg_temp_free_i32(tmp);
   8941    gen_lookup_tb(s);
   8942    return true;
   8943}
   8944
   8945/*
   8946 * Clear-Exclusive, Barriers
   8947 */
   8948
   8949static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
   8950{
   8951    if (s->thumb
   8952        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
   8953        : !ENABLE_ARCH_6K) {
   8954        return false;
   8955    }
   8956    gen_clrex(s);
   8957    return true;
   8958}
   8959
   8960static bool trans_DSB(DisasContext *s, arg_DSB *a)
   8961{
   8962    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
   8963        return false;
   8964    }
   8965    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
   8966    return true;
   8967}
   8968
   8969static bool trans_DMB(DisasContext *s, arg_DMB *a)
   8970{
   8971    return trans_DSB(s, NULL);
   8972}
   8973
   8974static bool trans_ISB(DisasContext *s, arg_ISB *a)
   8975{
   8976    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
   8977        return false;
   8978    }
   8979    /*
   8980     * We need to break the TB after this insn to execute
   8981     * self-modifying code correctly and also to take
   8982     * any pending interrupts immediately.
   8983     */
   8984    s->base.is_jmp = DISAS_TOO_MANY;
   8985    return true;
   8986}
   8987
   8988static bool trans_SB(DisasContext *s, arg_SB *a)
   8989{
   8990    if (!dc_isar_feature(aa32_sb, s)) {
   8991        return false;
   8992    }
   8993    /*
   8994     * TODO: There is no speculation barrier opcode
   8995     * for TCG; MB and end the TB instead.
   8996     */
   8997    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
   8998    s->base.is_jmp = DISAS_TOO_MANY;
   8999    return true;
   9000}
   9001
   9002static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
   9003{
   9004    if (!ENABLE_ARCH_6) {
   9005        return false;
   9006    }
   9007    if (a->E != (s->be_data == MO_BE)) {
   9008        gen_helper_setend(cpu_env);
   9009        s->base.is_jmp = DISAS_UPDATE_EXIT;
   9010    }
   9011    return true;
   9012}
   9013
   9014/*
   9015 * Preload instructions
   9016 * All are nops, contingent on the appropriate arch level.
   9017 */
   9018
   9019static bool trans_PLD(DisasContext *s, arg_PLD *a)
   9020{
   9021    return ENABLE_ARCH_5TE;
   9022}
   9023
   9024static bool trans_PLDW(DisasContext *s, arg_PLD *a)
   9025{
   9026    return arm_dc_feature(s, ARM_FEATURE_V7MP);
   9027}
   9028
   9029static bool trans_PLI(DisasContext *s, arg_PLD *a)
   9030{
   9031    return ENABLE_ARCH_7;
   9032}
   9033
   9034/*
   9035 * If-then
   9036 */
   9037
   9038static bool trans_IT(DisasContext *s, arg_IT *a)
   9039{
   9040    int cond_mask = a->cond_mask;
   9041
   9042    /*
   9043     * No actual code generated for this insn, just setup state.
   9044     *
   9045     * Combinations of firstcond and mask which set up an 0b1111
   9046     * condition are UNPREDICTABLE; we take the CONSTRAINED
   9047     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
   9048     * i.e. both meaning "execute always".
   9049     */
   9050    s->condexec_cond = (cond_mask >> 4) & 0xe;
   9051    s->condexec_mask = cond_mask & 0x1f;
   9052    return true;
   9053}
   9054
   9055/* v8.1M CSEL/CSINC/CSNEG/CSINV */
   9056static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
   9057{
   9058    TCGv_i32 rn, rm, zero;
   9059    DisasCompare c;
   9060
   9061    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   9062        return false;
   9063    }
   9064
   9065    if (a->rm == 13) {
   9066        /* SEE "Related encodings" (MVE shifts) */
   9067        return false;
   9068    }
   9069
   9070    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
   9071        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
   9072        return false;
   9073    }
   9074
   9075    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
   9076    if (a->rn == 15) {
   9077        rn = tcg_const_i32(0);
   9078    } else {
   9079        rn = load_reg(s, a->rn);
   9080    }
   9081    if (a->rm == 15) {
   9082        rm = tcg_const_i32(0);
   9083    } else {
   9084        rm = load_reg(s, a->rm);
   9085    }
   9086
   9087    switch (a->op) {
   9088    case 0: /* CSEL */
   9089        break;
   9090    case 1: /* CSINC */
   9091        tcg_gen_addi_i32(rm, rm, 1);
   9092        break;
   9093    case 2: /* CSINV */
   9094        tcg_gen_not_i32(rm, rm);
   9095        break;
   9096    case 3: /* CSNEG */
   9097        tcg_gen_neg_i32(rm, rm);
   9098        break;
   9099    default:
   9100        g_assert_not_reached();
   9101    }
   9102
   9103    arm_test_cc(&c, a->fcond);
   9104    zero = tcg_const_i32(0);
   9105    tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
   9106    arm_free_cc(&c);
   9107    tcg_temp_free_i32(zero);
   9108
   9109    store_reg(s, a->rd, rn);
   9110    tcg_temp_free_i32(rm);
   9111
   9112    return true;
   9113}
   9114
   9115/*
   9116 * Legacy decoder.
   9117 */
   9118
   9119static void disas_arm_insn(DisasContext *s, unsigned int insn)
   9120{
   9121    unsigned int cond = insn >> 28;
   9122
   9123    /* M variants do not implement ARM mode; this must raise the INVSTATE
   9124     * UsageFault exception.
   9125     */
   9126    if (arm_dc_feature(s, ARM_FEATURE_M)) {
   9127        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
   9128                           default_exception_el(s));
   9129        return;
   9130    }
   9131
   9132    if (s->pstate_il) {
   9133        /*
   9134         * Illegal execution state. This has priority over BTI
   9135         * exceptions, but comes after instruction abort exceptions.
   9136         */
   9137        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
   9138                           syn_illegalstate(), default_exception_el(s));
   9139        return;
   9140    }
   9141
   9142    if (cond == 0xf) {
   9143        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
   9144         * choose to UNDEF. In ARMv5 and above the space is used
   9145         * for miscellaneous unconditional instructions.
   9146         */
   9147        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
   9148            unallocated_encoding(s);
   9149            return;
   9150        }
   9151
   9152        /* Unconditional instructions.  */
   9153        /* TODO: Perhaps merge these into one decodetree output file.  */
   9154        if (disas_a32_uncond(s, insn) ||
   9155            disas_vfp_uncond(s, insn) ||
   9156            disas_neon_dp(s, insn) ||
   9157            disas_neon_ls(s, insn) ||
   9158            disas_neon_shared(s, insn)) {
   9159            return;
   9160        }
   9161        /* fall back to legacy decoder */
   9162
   9163        if ((insn & 0x0e000f00) == 0x0c000100) {
   9164            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
   9165                /* iWMMXt register transfer.  */
   9166                if (extract32(s->c15_cpar, 1, 1)) {
   9167                    if (!disas_iwmmxt_insn(s, insn)) {
   9168                        return;
   9169                    }
   9170                }
   9171            }
   9172        }
   9173        goto illegal_op;
   9174    }
   9175    if (cond != 0xe) {
   9176        /* if not always execute, we generate a conditional jump to
   9177           next instruction */
   9178        arm_skip_unless(s, cond);
   9179    }
   9180
   9181    /* TODO: Perhaps merge these into one decodetree output file.  */
   9182    if (disas_a32(s, insn) ||
   9183        disas_vfp(s, insn)) {
   9184        return;
   9185    }
   9186    /* fall back to legacy decoder */
   9187    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
   9188    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
   9189        if (((insn & 0x0c000e00) == 0x0c000000)
   9190            && ((insn & 0x03000000) != 0x03000000)) {
   9191            /* Coprocessor insn, coprocessor 0 or 1 */
   9192            disas_xscale_insn(s, insn);
   9193            return;
   9194        }
   9195    }
   9196
   9197illegal_op:
   9198    unallocated_encoding(s);
   9199}
   9200
   9201static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
   9202{
   9203    /*
   9204     * Return true if this is a 16 bit instruction. We must be precise
   9205     * about this (matching the decode).
   9206     */
   9207    if ((insn >> 11) < 0x1d) {
   9208        /* Definitely a 16-bit instruction */
   9209        return true;
   9210    }
   9211
   9212    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
   9213     * first half of a 32-bit Thumb insn. Thumb-1 cores might
   9214     * end up actually treating this as two 16-bit insns, though,
   9215     * if it's half of a bl/blx pair that might span a page boundary.
   9216     */
   9217    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
   9218        arm_dc_feature(s, ARM_FEATURE_M)) {
   9219        /* Thumb2 cores (including all M profile ones) always treat
   9220         * 32-bit insns as 32-bit.
   9221         */
   9222        return false;
   9223    }
   9224
   9225    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
   9226        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
   9227         * is not on the next page; we merge this into a 32-bit
   9228         * insn.
   9229         */
   9230        return false;
   9231    }
   9232    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
   9233     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
   9234     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
   9235     *  -- handle as single 16 bit insn
   9236     */
   9237    return true;
   9238}
   9239
   9240/* Translate a 32-bit thumb instruction. */
   9241static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
   9242{
   9243    /*
   9244     * ARMv6-M supports a limited subset of Thumb2 instructions.
   9245     * Other Thumb1 architectures allow only 32-bit
   9246     * combined BL/BLX prefix and suffix.
   9247     */
   9248    if (arm_dc_feature(s, ARM_FEATURE_M) &&
   9249        !arm_dc_feature(s, ARM_FEATURE_V7)) {
   9250        int i;
   9251        bool found = false;
   9252        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
   9253                                               0xf3b08040 /* dsb */,
   9254                                               0xf3b08050 /* dmb */,
   9255                                               0xf3b08060 /* isb */,
   9256                                               0xf3e08000 /* mrs */,
   9257                                               0xf000d000 /* bl */};
   9258        static const uint32_t armv6m_mask[] = {0xffe0d000,
   9259                                               0xfff0d0f0,
   9260                                               0xfff0d0f0,
   9261                                               0xfff0d0f0,
   9262                                               0xffe0d000,
   9263                                               0xf800d000};
   9264
   9265        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
   9266            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
   9267                found = true;
   9268                break;
   9269            }
   9270        }
   9271        if (!found) {
   9272            goto illegal_op;
   9273        }
   9274    } else if ((insn & 0xf800e800) != 0xf000e800)  {
   9275        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
   9276            unallocated_encoding(s);
   9277            return;
   9278        }
   9279    }
   9280
   9281    if (arm_dc_feature(s, ARM_FEATURE_M)) {
   9282        /*
   9283         * NOCP takes precedence over any UNDEF for (almost) the
   9284         * entire wide range of coprocessor-space encodings, so check
   9285         * for it first before proceeding to actually decode eg VFP
   9286         * insns. This decode also handles the few insns which are
   9287         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
   9288         */
   9289        if (disas_m_nocp(s, insn)) {
   9290            return;
   9291        }
   9292    }
   9293
   9294    if ((insn & 0xef000000) == 0xef000000) {
   9295        /*
   9296         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
   9297         * transform into
   9298         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
   9299         */
   9300        uint32_t a32_insn = (insn & 0xe2ffffff) |
   9301            ((insn & (1 << 28)) >> 4) | (1 << 28);
   9302
   9303        if (disas_neon_dp(s, a32_insn)) {
   9304            return;
   9305        }
   9306    }
   9307
   9308    if ((insn & 0xff100000) == 0xf9000000) {
   9309        /*
   9310         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
   9311         * transform into
   9312         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
   9313         */
   9314        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
   9315
   9316        if (disas_neon_ls(s, a32_insn)) {
   9317            return;
   9318        }
   9319    }
   9320
   9321    /*
   9322     * TODO: Perhaps merge these into one decodetree output file.
   9323     * Note disas_vfp is written for a32 with cond field in the
   9324     * top nibble.  The t32 encoding requires 0xe in the top nibble.
   9325     */
   9326    if (disas_t32(s, insn) ||
   9327        disas_vfp_uncond(s, insn) ||
   9328        disas_neon_shared(s, insn) ||
   9329        disas_mve(s, insn) ||
   9330        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
   9331        return;
   9332    }
   9333
   9334illegal_op:
   9335    unallocated_encoding(s);
   9336}
   9337
   9338static void disas_thumb_insn(DisasContext *s, uint32_t insn)
   9339{
   9340    if (!disas_t16(s, insn)) {
   9341        unallocated_encoding(s);
   9342    }
   9343}
   9344
   9345static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
   9346{
   9347    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
   9348     * (False positives are OK, false negatives are not.)
   9349     * We know this is a Thumb insn, and our caller ensures we are
   9350     * only called if dc->base.pc_next is less than 4 bytes from the page
   9351     * boundary, so we cross the page if the first 16 bits indicate
   9352     * that this is a 32 bit insn.
   9353     */
   9354    uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
   9355
   9356    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
   9357}
   9358
   9359static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
   9360{
   9361    DisasContext *dc = container_of(dcbase, DisasContext, base);
   9362    CPUARMState *env = cs->env_ptr;
   9363    ARMCPU *cpu = env_archcpu(env);
   9364    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
   9365    uint32_t condexec, core_mmu_idx;
   9366
   9367    dc->isar = &cpu->isar;
   9368    dc->condjmp = 0;
   9369
   9370    dc->aarch64 = 0;
   9371    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
   9372     * there is no secure EL1, so we route exceptions to EL3.
   9373     */
   9374    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
   9375                               !arm_el_is_aa64(env, 3);
   9376    dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
   9377    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
   9378    condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
   9379    /*
   9380     * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
   9381     * is always the IT bits. On M-profile, some of the reserved encodings
   9382     * of IT are used instead to indicate either ICI or ECI, which
   9383     * indicate partial progress of a restartable insn that was interrupted
   9384     * partway through by an exception:
   9385     *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
   9386     *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
   9387     * In all cases CONDEXEC == 0 means "not in IT block or restartable
   9388     * insn, behave normally".
   9389     */
   9390    dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
   9391    dc->eci_handled = false;
   9392    dc->insn_eci_rewind = NULL;
   9393    if (condexec & 0xf) {
   9394        dc->condexec_mask = (condexec & 0xf) << 1;
   9395        dc->condexec_cond = condexec >> 4;
   9396    } else {
   9397        if (arm_feature(env, ARM_FEATURE_M)) {
   9398            dc->eci = condexec >> 4;
   9399        }
   9400    }
   9401
   9402    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
   9403    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
   9404    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
   9405#if !defined(CONFIG_USER_ONLY)
   9406    dc->user = (dc->current_el == 0);
   9407#endif
   9408    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
   9409    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
   9410    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
   9411
   9412    if (arm_feature(env, ARM_FEATURE_M)) {
   9413        dc->vfp_enabled = 1;
   9414        dc->be_data = MO_TE;
   9415        dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
   9416        dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
   9417            regime_is_secure(env, dc->mmu_idx);
   9418        dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
   9419        dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
   9420        dc->v7m_new_fp_ctxt_needed =
   9421            EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
   9422        dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
   9423        dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
   9424    } else {
   9425        dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
   9426        dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
   9427        dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
   9428        dc->ns = EX_TBFLAG_A32(tb_flags, NS);
   9429        dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
   9430        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
   9431            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
   9432        } else {
   9433            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
   9434            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
   9435        }
   9436    }
   9437    dc->cp_regs = cpu->cp_regs;
   9438    dc->features = env->features;
   9439
   9440    /* Single step state. The code-generation logic here is:
   9441     *  SS_ACTIVE == 0:
   9442     *   generate code with no special handling for single-stepping (except
   9443     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
   9444     *   this happens anyway because those changes are all system register or
   9445     *   PSTATE writes).
   9446     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
   9447     *   emit code for one insn
   9448     *   emit code to clear PSTATE.SS
   9449     *   emit code to generate software step exception for completed step
   9450     *   end TB (as usual for having generated an exception)
   9451     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
   9452     *   emit code to generate a software step exception
   9453     *   end the TB
   9454     */
   9455    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
   9456    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
   9457    dc->is_ldex = false;
   9458
   9459    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
   9460
   9461    /* If architectural single step active, limit to 1.  */
   9462    if (is_singlestepping(dc)) {
   9463        dc->base.max_insns = 1;
   9464    }
   9465
   9466    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
   9467       to those left on the page.  */
   9468    if (!dc->thumb) {
   9469        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
   9470        dc->base.max_insns = MIN(dc->base.max_insns, bound);
   9471    }
   9472
   9473    cpu_V0 = tcg_temp_new_i64();
   9474    cpu_V1 = tcg_temp_new_i64();
   9475    cpu_M0 = tcg_temp_new_i64();
   9476}
   9477
   9478static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
   9479{
   9480    DisasContext *dc = container_of(dcbase, DisasContext, base);
   9481
   9482    /* A note on handling of the condexec (IT) bits:
   9483     *
   9484     * We want to avoid the overhead of having to write the updated condexec
   9485     * bits back to the CPUARMState for every instruction in an IT block. So:
   9486     * (1) if the condexec bits are not already zero then we write
   9487     * zero back into the CPUARMState now. This avoids complications trying
   9488     * to do it at the end of the block. (For example if we don't do this
   9489     * it's hard to identify whether we can safely skip writing condexec
   9490     * at the end of the TB, which we definitely want to do for the case
   9491     * where a TB doesn't do anything with the IT state at all.)
   9492     * (2) if we are going to leave the TB then we call gen_set_condexec()
   9493     * which will write the correct value into CPUARMState if zero is wrong.
   9494     * This is done both for leaving the TB at the end, and for leaving
   9495     * it because of an exception we know will happen, which is done in
   9496     * gen_exception_insn(). The latter is necessary because we need to
   9497     * leave the TB with the PC/IT state just prior to execution of the
   9498     * instruction which caused the exception.
   9499     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
   9500     * then the CPUARMState will be wrong and we need to reset it.
   9501     * This is handled in the same way as restoration of the
   9502     * PC in these situations; we save the value of the condexec bits
   9503     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
   9504     * then uses this to restore them after an exception.
   9505     *
   9506     * Note that there are no instructions which can read the condexec
   9507     * bits, and none which can write non-static values to them, so
   9508     * we don't need to care about whether CPUARMState is correct in the
   9509     * middle of a TB.
   9510     */
   9511
   9512    /* Reset the conditional execution bits immediately. This avoids
   9513       complications trying to do it at the end of the block.  */
   9514    if (dc->condexec_mask || dc->condexec_cond) {
   9515        TCGv_i32 tmp = tcg_temp_new_i32();
   9516        tcg_gen_movi_i32(tmp, 0);
   9517        store_cpu_field(tmp, condexec_bits);
   9518    }
   9519}
   9520
   9521static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
   9522{
   9523    DisasContext *dc = container_of(dcbase, DisasContext, base);
   9524    /*
   9525     * The ECI/ICI bits share PSR bits with the IT bits, so we
   9526     * need to reconstitute the bits from the split-out DisasContext
   9527     * fields here.
   9528     */
   9529    uint32_t condexec_bits;
   9530
   9531    if (dc->eci) {
   9532        condexec_bits = dc->eci << 4;
   9533    } else {
   9534        condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
   9535    }
   9536    tcg_gen_insn_start(dc->base.pc_next, condexec_bits, 0);
   9537    dc->insn_start = tcg_last_op();
   9538}
   9539
   9540static bool arm_pre_translate_insn(DisasContext *dc)
   9541{
   9542#ifdef CONFIG_USER_ONLY
   9543    /* Intercept jump to the magic kernel page.  */
   9544    if (dc->base.pc_next >= 0xffff0000) {
   9545        /* We always get here via a jump, so know we are not in a
   9546           conditional execution block.  */
   9547        gen_exception_internal(EXCP_KERNEL_TRAP);
   9548        dc->base.is_jmp = DISAS_NORETURN;
   9549        return true;
   9550    }
   9551#endif
   9552
   9553    if (dc->ss_active && !dc->pstate_ss) {
   9554        /* Singlestep state is Active-pending.
   9555         * If we're in this state at the start of a TB then either
   9556         *  a) we just took an exception to an EL which is being debugged
   9557         *     and this is the first insn in the exception handler
   9558         *  b) debug exceptions were masked and we just unmasked them
   9559         *     without changing EL (eg by clearing PSTATE.D)
   9560         * In either case we're going to take a swstep exception in the
   9561         * "did not step an insn" case, and so the syndrome ISV and EX
   9562         * bits should be zero.
   9563         */
   9564        assert(dc->base.num_insns == 1);
   9565        gen_swstep_exception(dc, 0, 0);
   9566        dc->base.is_jmp = DISAS_NORETURN;
   9567        return true;
   9568    }
   9569
   9570    return false;
   9571}
   9572
   9573static void arm_post_translate_insn(DisasContext *dc)
   9574{
   9575    if (dc->condjmp && !dc->base.is_jmp) {
   9576        gen_set_label(dc->condlabel);
   9577        dc->condjmp = 0;
   9578    }
   9579    translator_loop_temp_check(&dc->base);
   9580}
   9581
   9582static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
   9583{
   9584    DisasContext *dc = container_of(dcbase, DisasContext, base);
   9585    CPUARMState *env = cpu->env_ptr;
   9586    unsigned int insn;
   9587
   9588    if (arm_pre_translate_insn(dc)) {
   9589        dc->base.pc_next += 4;
   9590        return;
   9591    }
   9592
   9593    dc->pc_curr = dc->base.pc_next;
   9594    insn = arm_ldl_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
   9595    dc->insn = insn;
   9596    dc->base.pc_next += 4;
   9597    disas_arm_insn(dc, insn);
   9598
   9599    arm_post_translate_insn(dc);
   9600
   9601    /* ARM is a fixed-length ISA.  We performed the cross-page check
   9602       in init_disas_context by adjusting max_insns.  */
   9603}
   9604
   9605static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
   9606{
   9607    /* Return true if this Thumb insn is always unconditional,
   9608     * even inside an IT block. This is true of only a very few
   9609     * instructions: BKPT, HLT, and SG.
   9610     *
   9611     * A larger class of instructions are UNPREDICTABLE if used
   9612     * inside an IT block; we do not need to detect those here, because
   9613     * what we do by default (perform the cc check and update the IT
   9614     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
   9615     * choice for those situations.
   9616     *
   9617     * insn is either a 16-bit or a 32-bit instruction; the two are
   9618     * distinguishable because for the 16-bit case the top 16 bits
   9619     * are zeroes, and that isn't a valid 32-bit encoding.
   9620     */
   9621    if ((insn & 0xffffff00) == 0xbe00) {
   9622        /* BKPT */
   9623        return true;
   9624    }
   9625
   9626    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
   9627        !arm_dc_feature(s, ARM_FEATURE_M)) {
   9628        /* HLT: v8A only. This is unconditional even when it is going to
   9629         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
   9630         * For v7 cores this was a plain old undefined encoding and so
   9631         * honours its cc check. (We might be using the encoding as
   9632         * a semihosting trap, but we don't change the cc check behaviour
   9633         * on that account, because a debugger connected to a real v7A
   9634         * core and emulating semihosting traps by catching the UNDEF
   9635         * exception would also only see cases where the cc check passed.
   9636         * No guest code should be trying to do a HLT semihosting trap
   9637         * in an IT block anyway.
   9638         */
   9639        return true;
   9640    }
   9641
   9642    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
   9643        arm_dc_feature(s, ARM_FEATURE_M)) {
   9644        /* SG: v8M only */
   9645        return true;
   9646    }
   9647
   9648    return false;
   9649}
   9650
   9651static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
   9652{
   9653    DisasContext *dc = container_of(dcbase, DisasContext, base);
   9654    CPUARMState *env = cpu->env_ptr;
   9655    uint32_t insn;
   9656    bool is_16bit;
   9657
   9658    if (arm_pre_translate_insn(dc)) {
   9659        dc->base.pc_next += 2;
   9660        return;
   9661    }
   9662
   9663    dc->pc_curr = dc->base.pc_next;
   9664    insn = arm_lduw_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
   9665    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
   9666    dc->base.pc_next += 2;
   9667    if (!is_16bit) {
   9668        uint32_t insn2 = arm_lduw_code(env, &dc->base, dc->base.pc_next,
   9669                                       dc->sctlr_b);
   9670
   9671        insn = insn << 16 | insn2;
   9672        dc->base.pc_next += 2;
   9673    }
   9674    dc->insn = insn;
   9675
   9676    if (dc->pstate_il) {
   9677        /*
   9678         * Illegal execution state. This has priority over BTI
   9679         * exceptions, but comes after instruction abort exceptions.
   9680         */
   9681        gen_exception_insn(dc, dc->pc_curr, EXCP_UDEF,
   9682                           syn_illegalstate(), default_exception_el(dc));
   9683        return;
   9684    }
   9685
   9686    if (dc->eci) {
   9687        /*
   9688         * For M-profile continuable instructions, ECI/ICI handling
   9689         * falls into these cases:
   9690         *  - interrupt-continuable instructions
   9691         *     These are the various load/store multiple insns (both
   9692         *     integer and fp). The ICI bits indicate the register
   9693         *     where the load/store can resume. We make the IMPDEF
   9694         *     choice to always do "instruction restart", ie ignore
   9695         *     the ICI value and always execute the ldm/stm from the
   9696         *     start. So all we need to do is zero PSR.ICI if the
   9697         *     insn executes.
   9698         *  - MVE instructions subject to beat-wise execution
   9699         *     Here the ECI bits indicate which beats have already been
   9700         *     executed, and we must honour this. Each insn of this
   9701         *     type will handle it correctly. We will update PSR.ECI
   9702         *     in the helper function for the insn (some ECI values
   9703         *     mean that the following insn also has been partially
   9704         *     executed).
   9705         *  - Special cases which don't advance ECI
   9706         *     The insns LE, LETP and BKPT leave the ECI/ICI state
   9707         *     bits untouched.
   9708         *  - all other insns (the common case)
   9709         *     Non-zero ECI/ICI means an INVSTATE UsageFault.
   9710         *     We place a rewind-marker here. Insns in the previous
   9711         *     three categories will set a flag in the DisasContext.
   9712         *     If the flag isn't set after we call disas_thumb_insn()
   9713         *     or disas_thumb2_insn() then we know we have a "some other
   9714         *     insn" case. We will rewind to the marker (ie throwing away
   9715         *     all the generated code) and instead emit "take exception".
   9716         */
   9717        dc->insn_eci_rewind = tcg_last_op();
   9718    }
   9719
   9720    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
   9721        uint32_t cond = dc->condexec_cond;
   9722
   9723        /*
   9724         * Conditionally skip the insn. Note that both 0xe and 0xf mean
   9725         * "always"; 0xf is not "never".
   9726         */
   9727        if (cond < 0x0e) {
   9728            arm_skip_unless(dc, cond);
   9729        }
   9730    }
   9731
   9732    if (is_16bit) {
   9733        disas_thumb_insn(dc, insn);
   9734    } else {
   9735        disas_thumb2_insn(dc, insn);
   9736    }
   9737
   9738    /* Advance the Thumb condexec condition.  */
   9739    if (dc->condexec_mask) {
   9740        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
   9741                             ((dc->condexec_mask >> 4) & 1));
   9742        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
   9743        if (dc->condexec_mask == 0) {
   9744            dc->condexec_cond = 0;
   9745        }
   9746    }
   9747
   9748    if (dc->eci && !dc->eci_handled) {
   9749        /*
   9750         * Insn wasn't valid for ECI/ICI at all: undo what we
   9751         * just generated and instead emit an exception
   9752         */
   9753        tcg_remove_ops_after(dc->insn_eci_rewind);
   9754        dc->condjmp = 0;
   9755        gen_exception_insn(dc, dc->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
   9756                           default_exception_el(dc));
   9757    }
   9758
   9759    arm_post_translate_insn(dc);
   9760
   9761    /* Thumb is a variable-length ISA.  Stop translation when the next insn
   9762     * will touch a new page.  This ensures that prefetch aborts occur at
   9763     * the right place.
   9764     *
   9765     * We want to stop the TB if the next insn starts in a new page,
   9766     * or if it spans between this page and the next. This means that
   9767     * if we're looking at the last halfword in the page we need to
   9768     * see if it's a 16-bit Thumb insn (which will fit in this TB)
   9769     * or a 32-bit Thumb insn (which won't).
   9770     * This is to avoid generating a silly TB with a single 16-bit insn
   9771     * in it at the end of this page (which would execute correctly
   9772     * but isn't very efficient).
   9773     */
   9774    if (dc->base.is_jmp == DISAS_NEXT
   9775        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
   9776            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
   9777                && insn_crosses_page(env, dc)))) {
   9778        dc->base.is_jmp = DISAS_TOO_MANY;
   9779    }
   9780}
   9781
   9782static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
   9783{
   9784    DisasContext *dc = container_of(dcbase, DisasContext, base);
   9785
   9786    /* At this stage dc->condjmp will only be set when the skipped
   9787       instruction was a conditional branch or trap, and the PC has
   9788       already been written.  */
   9789    gen_set_condexec(dc);
   9790    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
   9791        /* Exception return branches need some special case code at the
   9792         * end of the TB, which is complex enough that it has to
   9793         * handle the single-step vs not and the condition-failed
   9794         * insn codepath itself.
   9795         */
   9796        gen_bx_excret_final_code(dc);
   9797    } else if (unlikely(is_singlestepping(dc))) {
   9798        /* Unconditional and "condition passed" instruction codepath. */
   9799        switch (dc->base.is_jmp) {
   9800        case DISAS_SWI:
   9801            gen_ss_advance(dc);
   9802            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
   9803                          default_exception_el(dc));
   9804            break;
   9805        case DISAS_HVC:
   9806            gen_ss_advance(dc);
   9807            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
   9808            break;
   9809        case DISAS_SMC:
   9810            gen_ss_advance(dc);
   9811            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
   9812            break;
   9813        case DISAS_NEXT:
   9814        case DISAS_TOO_MANY:
   9815        case DISAS_UPDATE_EXIT:
   9816        case DISAS_UPDATE_NOCHAIN:
   9817            gen_set_pc_im(dc, dc->base.pc_next);
   9818            /* fall through */
   9819        default:
   9820            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
   9821            gen_singlestep_exception(dc);
   9822            break;
   9823        case DISAS_NORETURN:
   9824            break;
   9825        }
   9826    } else {
   9827        /* While branches must always occur at the end of an IT block,
   9828           there are a few other things that can cause us to terminate
   9829           the TB in the middle of an IT block:
   9830            - Exception generating instructions (bkpt, swi, undefined).
   9831            - Page boundaries.
   9832            - Hardware watchpoints.
   9833           Hardware breakpoints have already been handled and skip this code.
   9834         */
   9835        switch (dc->base.is_jmp) {
   9836        case DISAS_NEXT:
   9837        case DISAS_TOO_MANY:
   9838            gen_goto_tb(dc, 1, dc->base.pc_next);
   9839            break;
   9840        case DISAS_UPDATE_NOCHAIN:
   9841            gen_set_pc_im(dc, dc->base.pc_next);
   9842            /* fall through */
   9843        case DISAS_JUMP:
   9844            gen_goto_ptr();
   9845            break;
   9846        case DISAS_UPDATE_EXIT:
   9847            gen_set_pc_im(dc, dc->base.pc_next);
   9848            /* fall through */
   9849        default:
   9850            /* indicate that the hash table must be used to find the next TB */
   9851            tcg_gen_exit_tb(NULL, 0);
   9852            break;
   9853        case DISAS_NORETURN:
   9854            /* nothing more to generate */
   9855            break;
   9856        case DISAS_WFI:
   9857        {
   9858            TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
   9859                                          !(dc->insn & (1U << 31))) ? 2 : 4);
   9860
   9861            gen_helper_wfi(cpu_env, tmp);
   9862            tcg_temp_free_i32(tmp);
   9863            /* The helper doesn't necessarily throw an exception, but we
   9864             * must go back to the main loop to check for interrupts anyway.
   9865             */
   9866            tcg_gen_exit_tb(NULL, 0);
   9867            break;
   9868        }
   9869        case DISAS_WFE:
   9870            gen_helper_wfe(cpu_env);
   9871            break;
   9872        case DISAS_YIELD:
   9873            gen_helper_yield(cpu_env);
   9874            break;
   9875        case DISAS_SWI:
   9876            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
   9877                          default_exception_el(dc));
   9878            break;
   9879        case DISAS_HVC:
   9880            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
   9881            break;
   9882        case DISAS_SMC:
   9883            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
   9884            break;
   9885        }
   9886    }
   9887
   9888    if (dc->condjmp) {
   9889        /* "Condition failed" instruction codepath for the branch/trap insn */
   9890        gen_set_label(dc->condlabel);
   9891        gen_set_condexec(dc);
   9892        if (unlikely(is_singlestepping(dc))) {
   9893            gen_set_pc_im(dc, dc->base.pc_next);
   9894            gen_singlestep_exception(dc);
   9895        } else {
   9896            gen_goto_tb(dc, 1, dc->base.pc_next);
   9897        }
   9898    }
   9899}
   9900
   9901static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
   9902{
   9903    DisasContext *dc = container_of(dcbase, DisasContext, base);
   9904
   9905    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
   9906    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
   9907}
   9908
   9909static const TranslatorOps arm_translator_ops = {
   9910    .init_disas_context = arm_tr_init_disas_context,
   9911    .tb_start           = arm_tr_tb_start,
   9912    .insn_start         = arm_tr_insn_start,
   9913    .translate_insn     = arm_tr_translate_insn,
   9914    .tb_stop            = arm_tr_tb_stop,
   9915    .disas_log          = arm_tr_disas_log,
   9916};
   9917
   9918static const TranslatorOps thumb_translator_ops = {
   9919    .init_disas_context = arm_tr_init_disas_context,
   9920    .tb_start           = arm_tr_tb_start,
   9921    .insn_start         = arm_tr_insn_start,
   9922    .translate_insn     = thumb_tr_translate_insn,
   9923    .tb_stop            = arm_tr_tb_stop,
   9924    .disas_log          = arm_tr_disas_log,
   9925};
   9926
   9927/* generate intermediate code for basic block 'tb'.  */
   9928void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
   9929{
   9930    DisasContext dc = { };
   9931    const TranslatorOps *ops = &arm_translator_ops;
   9932    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
   9933
   9934    if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
   9935        ops = &thumb_translator_ops;
   9936    }
   9937#ifdef TARGET_AARCH64
   9938    if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
   9939        ops = &aarch64_translator_ops;
   9940    }
   9941#endif
   9942
   9943    translator_loop(ops, &dc.base, cpu, tb, max_insns);
   9944}
   9945
   9946void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
   9947                          target_ulong *data)
   9948{
   9949    if (is_a64(env)) {
   9950        env->pc = data[0];
   9951        env->condexec_bits = 0;
   9952        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
   9953    } else {
   9954        env->regs[15] = data[0];
   9955        env->condexec_bits = data[1];
   9956        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
   9957    }
   9958}