cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

tcg-target.c.inc (104089B)


      1/*
      2 * Tiny Code Generator for QEMU
      3 *
      4 * Copyright (c) 2008 Andrzej Zaborowski
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a copy
      7 * of this software and associated documentation files (the "Software"), to deal
      8 * in the Software without restriction, including without limitation the rights
      9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10 * copies of the Software, and to permit persons to whom the Software is
     11 * furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in
     14 * all copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22 * THE SOFTWARE.
     23 */
     24
     25#include "elf.h"
     26#include "../tcg-pool.c.inc"
     27
     28int arm_arch = __ARM_ARCH;
     29
     30#ifndef use_idiv_instructions
     31bool use_idiv_instructions;
     32#endif
     33#ifndef use_neon_instructions
     34bool use_neon_instructions;
     35#endif
     36
     37/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
     38#ifdef CONFIG_SOFTMMU
     39# define USING_SOFTMMU 1
     40#else
     41# define USING_SOFTMMU 0
     42#endif
     43
     44#ifdef CONFIG_DEBUG_TCG
     45static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     46    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
     47    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%sp",  "%r14", "%pc",
     48    "%q0",  "%q1",  "%q2",  "%q3",  "%q4",  "%q5",  "%q6",  "%q7",
     49    "%q8",  "%q9",  "%q10", "%q11", "%q12", "%q13", "%q14", "%q15",
     50};
     51#endif
     52
     53static const int tcg_target_reg_alloc_order[] = {
     54    TCG_REG_R4,
     55    TCG_REG_R5,
     56    TCG_REG_R6,
     57    TCG_REG_R7,
     58    TCG_REG_R8,
     59    TCG_REG_R9,
     60    TCG_REG_R10,
     61    TCG_REG_R11,
     62    TCG_REG_R13,
     63    TCG_REG_R0,
     64    TCG_REG_R1,
     65    TCG_REG_R2,
     66    TCG_REG_R3,
     67    TCG_REG_R12,
     68    TCG_REG_R14,
     69
     70    TCG_REG_Q0,
     71    TCG_REG_Q1,
     72    TCG_REG_Q2,
     73    TCG_REG_Q3,
     74    /* Q4 - Q7 are call-saved, and skipped. */
     75    TCG_REG_Q8,
     76    TCG_REG_Q9,
     77    TCG_REG_Q10,
     78    TCG_REG_Q11,
     79    TCG_REG_Q12,
     80    TCG_REG_Q13,
     81    TCG_REG_Q14,
     82    TCG_REG_Q15,
     83};
     84
     85static const int tcg_target_call_iarg_regs[4] = {
     86    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
     87};
     88static const int tcg_target_call_oarg_regs[2] = {
     89    TCG_REG_R0, TCG_REG_R1
     90};
     91
     92#define TCG_REG_TMP  TCG_REG_R12
     93#define TCG_VEC_TMP  TCG_REG_Q15
     94
     95typedef enum {
     96    COND_EQ = 0x0,
     97    COND_NE = 0x1,
     98    COND_CS = 0x2,	/* Unsigned greater or equal */
     99    COND_CC = 0x3,	/* Unsigned less than */
    100    COND_MI = 0x4,	/* Negative */
    101    COND_PL = 0x5,	/* Zero or greater */
    102    COND_VS = 0x6,	/* Overflow */
    103    COND_VC = 0x7,	/* No overflow */
    104    COND_HI = 0x8,	/* Unsigned greater than */
    105    COND_LS = 0x9,	/* Unsigned less or equal */
    106    COND_GE = 0xa,
    107    COND_LT = 0xb,
    108    COND_GT = 0xc,
    109    COND_LE = 0xd,
    110    COND_AL = 0xe,
    111} ARMCond;
    112
    113#define TO_CPSR (1 << 20)
    114
    115#define SHIFT_IMM_LSL(im)	(((im) << 7) | 0x00)
    116#define SHIFT_IMM_LSR(im)	(((im) << 7) | 0x20)
    117#define SHIFT_IMM_ASR(im)	(((im) << 7) | 0x40)
    118#define SHIFT_IMM_ROR(im)	(((im) << 7) | 0x60)
    119#define SHIFT_REG_LSL(rs)	(((rs) << 8) | 0x10)
    120#define SHIFT_REG_LSR(rs)	(((rs) << 8) | 0x30)
    121#define SHIFT_REG_ASR(rs)	(((rs) << 8) | 0x50)
    122#define SHIFT_REG_ROR(rs)	(((rs) << 8) | 0x70)
    123
    124typedef enum {
    125    ARITH_AND = 0x0 << 21,
    126    ARITH_EOR = 0x1 << 21,
    127    ARITH_SUB = 0x2 << 21,
    128    ARITH_RSB = 0x3 << 21,
    129    ARITH_ADD = 0x4 << 21,
    130    ARITH_ADC = 0x5 << 21,
    131    ARITH_SBC = 0x6 << 21,
    132    ARITH_RSC = 0x7 << 21,
    133    ARITH_TST = 0x8 << 21 | TO_CPSR,
    134    ARITH_CMP = 0xa << 21 | TO_CPSR,
    135    ARITH_CMN = 0xb << 21 | TO_CPSR,
    136    ARITH_ORR = 0xc << 21,
    137    ARITH_MOV = 0xd << 21,
    138    ARITH_BIC = 0xe << 21,
    139    ARITH_MVN = 0xf << 21,
    140
    141    INSN_CLZ       = 0x016f0f10,
    142    INSN_RBIT      = 0x06ff0f30,
    143
    144    INSN_LDMIA     = 0x08b00000,
    145    INSN_STMDB     = 0x09200000,
    146
    147    INSN_LDR_IMM   = 0x04100000,
    148    INSN_LDR_REG   = 0x06100000,
    149    INSN_STR_IMM   = 0x04000000,
    150    INSN_STR_REG   = 0x06000000,
    151
    152    INSN_LDRH_IMM  = 0x005000b0,
    153    INSN_LDRH_REG  = 0x001000b0,
    154    INSN_LDRSH_IMM = 0x005000f0,
    155    INSN_LDRSH_REG = 0x001000f0,
    156    INSN_STRH_IMM  = 0x004000b0,
    157    INSN_STRH_REG  = 0x000000b0,
    158
    159    INSN_LDRB_IMM  = 0x04500000,
    160    INSN_LDRB_REG  = 0x06500000,
    161    INSN_LDRSB_IMM = 0x005000d0,
    162    INSN_LDRSB_REG = 0x001000d0,
    163    INSN_STRB_IMM  = 0x04400000,
    164    INSN_STRB_REG  = 0x06400000,
    165
    166    INSN_LDRD_IMM  = 0x004000d0,
    167    INSN_LDRD_REG  = 0x000000d0,
    168    INSN_STRD_IMM  = 0x004000f0,
    169    INSN_STRD_REG  = 0x000000f0,
    170
    171    INSN_DMB_ISH   = 0xf57ff05b,
    172    INSN_DMB_MCR   = 0xee070fba,
    173
    174    /* Architected nop introduced in v6k.  */
    175    /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
    176       also Just So Happened to do nothing on pre-v6k so that we
    177       don't need to conditionalize it?  */
    178    INSN_NOP_v6k   = 0xe320f000,
    179    /* Otherwise the assembler uses mov r0,r0 */
    180    INSN_NOP_v4    = (COND_AL << 28) | ARITH_MOV,
    181
    182    INSN_VADD      = 0xf2000800,
    183    INSN_VAND      = 0xf2000110,
    184    INSN_VBIC      = 0xf2100110,
    185    INSN_VEOR      = 0xf3000110,
    186    INSN_VORN      = 0xf2300110,
    187    INSN_VORR      = 0xf2200110,
    188    INSN_VSUB      = 0xf3000800,
    189    INSN_VMUL      = 0xf2000910,
    190    INSN_VQADD     = 0xf2000010,
    191    INSN_VQADD_U   = 0xf3000010,
    192    INSN_VQSUB     = 0xf2000210,
    193    INSN_VQSUB_U   = 0xf3000210,
    194    INSN_VMAX      = 0xf2000600,
    195    INSN_VMAX_U    = 0xf3000600,
    196    INSN_VMIN      = 0xf2000610,
    197    INSN_VMIN_U    = 0xf3000610,
    198
    199    INSN_VABS      = 0xf3b10300,
    200    INSN_VMVN      = 0xf3b00580,
    201    INSN_VNEG      = 0xf3b10380,
    202
    203    INSN_VCEQ0     = 0xf3b10100,
    204    INSN_VCGT0     = 0xf3b10000,
    205    INSN_VCGE0     = 0xf3b10080,
    206    INSN_VCLE0     = 0xf3b10180,
    207    INSN_VCLT0     = 0xf3b10200,
    208
    209    INSN_VCEQ      = 0xf3000810,
    210    INSN_VCGE      = 0xf2000310,
    211    INSN_VCGT      = 0xf2000300,
    212    INSN_VCGE_U    = 0xf3000310,
    213    INSN_VCGT_U    = 0xf3000300,
    214
    215    INSN_VSHLI     = 0xf2800510,  /* VSHL (immediate) */
    216    INSN_VSARI     = 0xf2800010,  /* VSHR.S */
    217    INSN_VSHRI     = 0xf3800010,  /* VSHR.U */
    218    INSN_VSLI      = 0xf3800510,
    219    INSN_VSHL_S    = 0xf2000400,  /* VSHL.S (register) */
    220    INSN_VSHL_U    = 0xf3000400,  /* VSHL.U (register) */
    221
    222    INSN_VBSL      = 0xf3100110,
    223    INSN_VBIT      = 0xf3200110,
    224    INSN_VBIF      = 0xf3300110,
    225
    226    INSN_VTST      = 0xf2000810,
    227
    228    INSN_VDUP_G    = 0xee800b10,  /* VDUP (ARM core register) */
    229    INSN_VDUP_S    = 0xf3b00c00,  /* VDUP (scalar) */
    230    INSN_VLDR_D    = 0xed100b00,  /* VLDR.64 */
    231    INSN_VLD1      = 0xf4200000,  /* VLD1 (multiple single elements) */
    232    INSN_VLD1R     = 0xf4a00c00,  /* VLD1 (single element to all lanes) */
    233    INSN_VST1      = 0xf4000000,  /* VST1 (multiple single elements) */
    234    INSN_VMOVI     = 0xf2800010,  /* VMOV (immediate) */
    235} ARMInsn;
    236
    237#define INSN_NOP   (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
    238
    239static const uint8_t tcg_cond_to_arm_cond[] = {
    240    [TCG_COND_EQ] = COND_EQ,
    241    [TCG_COND_NE] = COND_NE,
    242    [TCG_COND_LT] = COND_LT,
    243    [TCG_COND_GE] = COND_GE,
    244    [TCG_COND_LE] = COND_LE,
    245    [TCG_COND_GT] = COND_GT,
    246    /* unsigned */
    247    [TCG_COND_LTU] = COND_CC,
    248    [TCG_COND_GEU] = COND_CS,
    249    [TCG_COND_LEU] = COND_LS,
    250    [TCG_COND_GTU] = COND_HI,
    251};
    252
    253static int encode_imm(uint32_t imm);
    254
    255/* TCG private relocation type: add with pc+imm8 */
    256#define R_ARM_PC8  11
    257
    258/* TCG private relocation type: vldr with imm8 << 2 */
    259#define R_ARM_PC11 12
    260
    261static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    262{
    263    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    264    ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) >> 2;
    265
    266    if (offset == sextract32(offset, 0, 24)) {
    267        *src_rw = deposit32(*src_rw, 0, 24, offset);
    268        return true;
    269    }
    270    return false;
    271}
    272
    273static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    274{
    275    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    276    ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
    277
    278    if (offset >= -0xfff && offset <= 0xfff) {
    279        tcg_insn_unit insn = *src_rw;
    280        bool u = (offset >= 0);
    281        if (!u) {
    282            offset = -offset;
    283        }
    284        insn = deposit32(insn, 23, 1, u);
    285        insn = deposit32(insn, 0, 12, offset);
    286        *src_rw = insn;
    287        return true;
    288    }
    289    return false;
    290}
    291
    292static bool reloc_pc11(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    293{
    294    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    295    ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) / 4;
    296
    297    if (offset >= -0xff && offset <= 0xff) {
    298        tcg_insn_unit insn = *src_rw;
    299        bool u = (offset >= 0);
    300        if (!u) {
    301            offset = -offset;
    302        }
    303        insn = deposit32(insn, 23, 1, u);
    304        insn = deposit32(insn, 0, 8, offset);
    305        *src_rw = insn;
    306        return true;
    307    }
    308    return false;
    309}
    310
    311static bool reloc_pc8(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    312{
    313    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    314    ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
    315    int imm12 = encode_imm(offset);
    316
    317    if (imm12 >= 0) {
    318        *src_rw = deposit32(*src_rw, 0, 12, imm12);
    319        return true;
    320    }
    321    return false;
    322}
    323
    324static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
    325                        intptr_t value, intptr_t addend)
    326{
    327    tcg_debug_assert(addend == 0);
    328    switch (type) {
    329    case R_ARM_PC24:
    330        return reloc_pc24(code_ptr, (const tcg_insn_unit *)value);
    331    case R_ARM_PC13:
    332        return reloc_pc13(code_ptr, (const tcg_insn_unit *)value);
    333    case R_ARM_PC11:
    334        return reloc_pc11(code_ptr, (const tcg_insn_unit *)value);
    335    case R_ARM_PC8:
    336        return reloc_pc8(code_ptr, (const tcg_insn_unit *)value);
    337    default:
    338        g_assert_not_reached();
    339    }
    340}
    341
    342#define TCG_CT_CONST_ARM  0x100
    343#define TCG_CT_CONST_INV  0x200
    344#define TCG_CT_CONST_NEG  0x400
    345#define TCG_CT_CONST_ZERO 0x800
    346#define TCG_CT_CONST_ORRI 0x1000
    347#define TCG_CT_CONST_ANDI 0x2000
    348
    349#define ALL_GENERAL_REGS  0xffffu
    350#define ALL_VECTOR_REGS   0xffff0000u
    351
    352/*
    353 * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
    354 * and r0-r1 doing the byte swapping, so don't use these.
    355 * r3 is removed for softmmu to avoid clashes with helper arguments.
    356 */
    357#ifdef CONFIG_SOFTMMU
    358#define ALL_QLOAD_REGS \
    359    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
    360                          (1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
    361                          (1 << TCG_REG_R14)))
    362#define ALL_QSTORE_REGS \
    363    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
    364                          (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
    365                          ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
    366#else
    367#define ALL_QLOAD_REGS   ALL_GENERAL_REGS
    368#define ALL_QSTORE_REGS \
    369    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
    370#endif
    371
    372/*
    373 * ARM immediates for ALU instructions are made of an unsigned 8-bit
    374 * right-rotated by an even amount between 0 and 30.
    375 *
    376 * Return < 0 if @imm cannot be encoded, else the entire imm12 field.
    377 */
    378static int encode_imm(uint32_t imm)
    379{
    380    uint32_t rot, imm8;
    381
    382    /* Simple case, no rotation required. */
    383    if ((imm & ~0xff) == 0) {
    384        return imm;
    385    }
    386
    387    /* Next, try a simple even shift.  */
    388    rot = ctz32(imm) & ~1;
    389    imm8 = imm >> rot;
    390    rot = 32 - rot;
    391    if ((imm8 & ~0xff) == 0) {
    392        goto found;
    393    }
    394
    395    /*
    396     * Finally, try harder with rotations.
    397     * The ctz test above will have taken care of rotates >= 8.
    398     */
    399    for (rot = 2; rot < 8; rot += 2) {
    400        imm8 = rol32(imm, rot);
    401        if ((imm8 & ~0xff) == 0) {
    402            goto found;
    403        }
    404    }
    405    /* Fail: imm cannot be encoded. */
    406    return -1;
    407
    408 found:
    409    /* Note that rot is even, and we discard bit 0 by shifting by 7. */
    410    return rot << 7 | imm8;
    411}
    412
    413static int encode_imm_nofail(uint32_t imm)
    414{
    415    int ret = encode_imm(imm);
    416    tcg_debug_assert(ret >= 0);
    417    return ret;
    418}
    419
    420static bool check_fit_imm(uint32_t imm)
    421{
    422    return encode_imm(imm) >= 0;
    423}
    424
    425/* Return true if v16 is a valid 16-bit shifted immediate.  */
    426static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
    427{
    428    if (v16 == (v16 & 0xff)) {
    429        *cmode = 0x8;
    430        *imm8 = v16 & 0xff;
    431        return true;
    432    } else if (v16 == (v16 & 0xff00)) {
    433        *cmode = 0xa;
    434        *imm8 = v16 >> 8;
    435        return true;
    436    }
    437    return false;
    438}
    439
    440/* Return true if v32 is a valid 32-bit shifted immediate.  */
    441static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
    442{
    443    if (v32 == (v32 & 0xff)) {
    444        *cmode = 0x0;
    445        *imm8 = v32 & 0xff;
    446        return true;
    447    } else if (v32 == (v32 & 0xff00)) {
    448        *cmode = 0x2;
    449        *imm8 = (v32 >> 8) & 0xff;
    450        return true;
    451    } else if (v32 == (v32 & 0xff0000)) {
    452        *cmode = 0x4;
    453        *imm8 = (v32 >> 16) & 0xff;
    454        return true;
    455    } else if (v32 == (v32 & 0xff000000)) {
    456        *cmode = 0x6;
    457        *imm8 = v32 >> 24;
    458        return true;
    459    }
    460    return false;
    461}
    462
    463/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
    464static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
    465{
    466    if ((v32 & 0xffff00ff) == 0xff) {
    467        *cmode = 0xc;
    468        *imm8 = (v32 >> 8) & 0xff;
    469        return true;
    470    } else if ((v32 & 0xff00ffff) == 0xffff) {
    471        *cmode = 0xd;
    472        *imm8 = (v32 >> 16) & 0xff;
    473        return true;
    474    }
    475    return false;
    476}
    477
    478/*
    479 * Return non-zero if v32 can be formed by MOVI+ORR.
    480 * Place the parameters for MOVI in (cmode, imm8).
    481 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
    482 */
    483static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
    484{
    485    int i;
    486
    487    for (i = 6; i > 0; i -= 2) {
    488        /* Mask out one byte we can add with ORR.  */
    489        uint32_t tmp = v32 & ~(0xffu << (i * 4));
    490        if (is_shimm32(tmp, cmode, imm8) ||
    491            is_soimm32(tmp, cmode, imm8)) {
    492            break;
    493        }
    494    }
    495    return i;
    496}
    497
    498/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
    499static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
    500{
    501    if (v32 == deposit32(v32, 16, 16, v32)) {
    502        return is_shimm16(v32, cmode, imm8);
    503    } else {
    504        return is_shimm32(v32, cmode, imm8);
    505    }
    506}
    507
    508/* Test if a constant matches the constraint.
    509 * TODO: define constraints for:
    510 *
    511 * ldr/str offset:   between -0xfff and 0xfff
    512 * ldrh/strh offset: between -0xff and 0xff
    513 * mov operand2:     values represented with x << (2 * y), x < 0x100
    514 * add, sub, eor...: ditto
    515 */
    516static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
    517{
    518    if (ct & TCG_CT_CONST) {
    519        return 1;
    520    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
    521        return 1;
    522    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
    523        return 1;
    524    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
    525        return 1;
    526    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
    527        return 1;
    528    }
    529
    530    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
    531    case 0:
    532        break;
    533    case TCG_CT_CONST_ANDI:
    534        val = ~val;
    535        /* fallthru */
    536    case TCG_CT_CONST_ORRI:
    537        if (val == deposit64(val, 32, 32, val)) {
    538            int cmode, imm8;
    539            return is_shimm1632(val, &cmode, &imm8);
    540        }
    541        break;
    542    default:
    543        /* Both bits should not be set for the same insn.  */
    544        g_assert_not_reached();
    545    }
    546
    547    return 0;
    548}
    549
    550static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset)
    551{
    552    tcg_out32(s, (cond << 28) | 0x0a000000 |
    553                    (((offset - 8) >> 2) & 0x00ffffff));
    554}
    555
    556static void tcg_out_bl_imm(TCGContext *s, ARMCond cond, int32_t offset)
    557{
    558    tcg_out32(s, (cond << 28) | 0x0b000000 |
    559                    (((offset - 8) >> 2) & 0x00ffffff));
    560}
    561
    562static void tcg_out_blx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
    563{
    564    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
    565}
    566
    567static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
    568{
    569    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
    570                (((offset - 8) >> 2) & 0x00ffffff));
    571}
    572
    573static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, ARMInsn opc,
    574                            TCGReg rd, TCGReg rn, TCGReg rm, int shift)
    575{
    576    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
    577                    (rn << 16) | (rd << 12) | shift | rm);
    578}
    579
    580static void tcg_out_mov_reg(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rm)
    581{
    582    /* Simple reg-reg move, optimising out the 'do nothing' case */
    583    if (rd != rm) {
    584        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
    585    }
    586}
    587
    588static void tcg_out_bx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
    589{
    590    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
    591}
    592
    593static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn)
    594{
    595    /*
    596     * Unless the C portion of QEMU is compiled as thumb, we don't need
    597     * true BX semantics; merely a branch to an address held in a register.
    598     */
    599    if (use_armv5t_instructions) {
    600        tcg_out_bx_reg(s, cond, rn);
    601    } else {
    602        tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
    603    }
    604}
    605
    606static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc,
    607                            TCGReg rd, TCGReg rn, int im)
    608{
    609    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
    610                    (rn << 16) | (rd << 12) | im);
    611}
    612
    613static void tcg_out_ldstm(TCGContext *s, ARMCond cond, ARMInsn opc,
    614                          TCGReg rn, uint16_t mask)
    615{
    616    tcg_out32(s, (cond << 28) | opc | (rn << 16) | mask);
    617}
    618
    619/* Note that this routine is used for both LDR and LDRH formats, so we do
    620   not wish to include an immediate shift at this point.  */
    621static void tcg_out_memop_r(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
    622                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
    623{
    624    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
    625              | (w << 21) | (rn << 16) | (rt << 12) | rm);
    626}
    627
    628static void tcg_out_memop_8(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
    629                            TCGReg rn, int imm8, bool p, bool w)
    630{
    631    bool u = 1;
    632    if (imm8 < 0) {
    633        imm8 = -imm8;
    634        u = 0;
    635    }
    636    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
    637              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
    638}
    639
    640static void tcg_out_memop_12(TCGContext *s, ARMCond cond, ARMInsn opc,
    641                             TCGReg rt, TCGReg rn, int imm12, bool p, bool w)
    642{
    643    bool u = 1;
    644    if (imm12 < 0) {
    645        imm12 = -imm12;
    646        u = 0;
    647    }
    648    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
    649              (rn << 16) | (rt << 12) | imm12);
    650}
    651
    652static void tcg_out_ld32_12(TCGContext *s, ARMCond cond, TCGReg rt,
    653                            TCGReg rn, int imm12)
    654{
    655    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
    656}
    657
    658static void tcg_out_st32_12(TCGContext *s, ARMCond cond, TCGReg rt,
    659                            TCGReg rn, int imm12)
    660{
    661    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
    662}
    663
    664static void tcg_out_ld32_r(TCGContext *s, ARMCond cond, TCGReg rt,
    665                           TCGReg rn, TCGReg rm)
    666{
    667    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
    668}
    669
    670static void tcg_out_st32_r(TCGContext *s, ARMCond cond, TCGReg rt,
    671                           TCGReg rn, TCGReg rm)
    672{
    673    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
    674}
    675
    676static void tcg_out_ldrd_8(TCGContext *s, ARMCond cond, TCGReg rt,
    677                           TCGReg rn, int imm8)
    678{
    679    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
    680}
    681
    682static void tcg_out_ldrd_r(TCGContext *s, ARMCond cond, TCGReg rt,
    683                           TCGReg rn, TCGReg rm)
    684{
    685    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
    686}
    687
    688static void __attribute__((unused))
    689tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
    690{
    691    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
    692}
    693
    694static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
    695                           TCGReg rn, int imm8)
    696{
    697    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
    698}
    699
    700static void tcg_out_strd_r(TCGContext *s, ARMCond cond, TCGReg rt,
    701                           TCGReg rn, TCGReg rm)
    702{
    703    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
    704}
    705
    706/* Register pre-increment with base writeback.  */
    707static void tcg_out_ld32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
    708                             TCGReg rn, TCGReg rm)
    709{
    710    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
    711}
    712
    713static void tcg_out_st32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
    714                             TCGReg rn, TCGReg rm)
    715{
    716    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
    717}
    718
    719static void tcg_out_ld16u_8(TCGContext *s, ARMCond cond, TCGReg rt,
    720                            TCGReg rn, int imm8)
    721{
    722    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
    723}
    724
    725static void tcg_out_st16_8(TCGContext *s, ARMCond cond, TCGReg rt,
    726                           TCGReg rn, int imm8)
    727{
    728    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
    729}
    730
    731static void tcg_out_ld16u_r(TCGContext *s, ARMCond cond, TCGReg rt,
    732                            TCGReg rn, TCGReg rm)
    733{
    734    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
    735}
    736
    737static void tcg_out_st16_r(TCGContext *s, ARMCond cond, TCGReg rt,
    738                           TCGReg rn, TCGReg rm)
    739{
    740    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
    741}
    742
    743static void tcg_out_ld16s_8(TCGContext *s, ARMCond cond, TCGReg rt,
    744                            TCGReg rn, int imm8)
    745{
    746    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
    747}
    748
    749static void tcg_out_ld16s_r(TCGContext *s, ARMCond cond, TCGReg rt,
    750                            TCGReg rn, TCGReg rm)
    751{
    752    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
    753}
    754
    755static void tcg_out_ld8_12(TCGContext *s, ARMCond cond, TCGReg rt,
    756                           TCGReg rn, int imm12)
    757{
    758    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
    759}
    760
    761static void tcg_out_st8_12(TCGContext *s, ARMCond cond, TCGReg rt,
    762                           TCGReg rn, int imm12)
    763{
    764    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
    765}
    766
    767static void tcg_out_ld8_r(TCGContext *s, ARMCond cond, TCGReg rt,
    768                          TCGReg rn, TCGReg rm)
    769{
    770    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
    771}
    772
    773static void tcg_out_st8_r(TCGContext *s, ARMCond cond, TCGReg rt,
    774                          TCGReg rn, TCGReg rm)
    775{
    776    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
    777}
    778
    779static void tcg_out_ld8s_8(TCGContext *s, ARMCond cond, TCGReg rt,
    780                           TCGReg rn, int imm8)
    781{
    782    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
    783}
    784
    785static void tcg_out_ld8s_r(TCGContext *s, ARMCond cond, TCGReg rt,
    786                           TCGReg rn, TCGReg rm)
    787{
    788    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
    789}
    790
    791static void tcg_out_movi_pool(TCGContext *s, ARMCond cond,
    792                              TCGReg rd, uint32_t arg)
    793{
    794    new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
    795    tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
    796}
    797
    798static void tcg_out_movi32(TCGContext *s, ARMCond cond,
    799                           TCGReg rd, uint32_t arg)
    800{
    801    int imm12, diff, opc, sh1, sh2;
    802    uint32_t tt0, tt1, tt2;
    803
    804    /* Check a single MOV/MVN before anything else.  */
    805    imm12 = encode_imm(arg);
    806    if (imm12 >= 0) {
    807        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, imm12);
    808        return;
    809    }
    810    imm12 = encode_imm(~arg);
    811    if (imm12 >= 0) {
    812        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, imm12);
    813        return;
    814    }
    815
    816    /* Check for a pc-relative address.  This will usually be the TB,
    817       or within the TB, which is immediately before the code block.  */
    818    diff = tcg_pcrel_diff(s, (void *)arg) - 8;
    819    if (diff >= 0) {
    820        imm12 = encode_imm(diff);
    821        if (imm12 >= 0) {
    822            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC, imm12);
    823            return;
    824        }
    825    } else {
    826        imm12 = encode_imm(-diff);
    827        if (imm12 >= 0) {
    828            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC, imm12);
    829            return;
    830        }
    831    }
    832
    833    /* Use movw + movt.  */
    834    if (use_armv7_instructions) {
    835        /* movw */
    836        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
    837                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
    838        if (arg & 0xffff0000) {
    839            /* movt */
    840            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
    841                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
    842        }
    843        return;
    844    }
    845
    846    /* Look for sequences of two insns.  If we have lots of 1's, we can
    847       shorten the sequence by beginning with mvn and then clearing
    848       higher bits with eor.  */
    849    tt0 = arg;
    850    opc = ARITH_MOV;
    851    if (ctpop32(arg) > 16) {
    852        tt0 = ~arg;
    853        opc = ARITH_MVN;
    854    }
    855    sh1 = ctz32(tt0) & ~1;
    856    tt1 = tt0 & ~(0xff << sh1);
    857    sh2 = ctz32(tt1) & ~1;
    858    tt2 = tt1 & ~(0xff << sh2);
    859    if (tt2 == 0) {
    860        int rot;
    861
    862        rot = ((32 - sh1) << 7) & 0xf00;
    863        tcg_out_dat_imm(s, cond, opc, rd,  0, ((tt0 >> sh1) & 0xff) | rot);
    864        rot = ((32 - sh2) << 7) & 0xf00;
    865        tcg_out_dat_imm(s, cond, ARITH_EOR, rd, rd,
    866                        ((tt0 >> sh2) & 0xff) | rot);
    867        return;
    868    }
    869
    870    /* Otherwise, drop it into the constant pool.  */
    871    tcg_out_movi_pool(s, cond, rd, arg);
    872}
    873
    874/*
    875 * Emit either the reg,imm or reg,reg form of a data-processing insn.
    876 * rhs must satisfy the "rI" constraint.
    877 */
    878static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc,
    879                           TCGReg dst, TCGReg lhs, TCGArg rhs, int rhs_is_const)
    880{
    881    if (rhs_is_const) {
    882        tcg_out_dat_imm(s, cond, opc, dst, lhs, encode_imm_nofail(rhs));
    883    } else {
    884        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
    885    }
    886}
    887
    888/*
    889 * Emit either the reg,imm or reg,reg form of a data-processing insn.
    890 * rhs must satisfy the "rIK" constraint.
    891 */
    892static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc,
    893                            ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs,
    894                            bool rhs_is_const)
    895{
    896    if (rhs_is_const) {
    897        int imm12 = encode_imm(rhs);
    898        if (imm12 < 0) {
    899            imm12 = encode_imm_nofail(~rhs);
    900            opc = opinv;
    901        }
    902        tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
    903    } else {
    904        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
    905    }
    906}
    907
    908static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc,
    909                            ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs,
    910                            bool rhs_is_const)
    911{
    912    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
    913     * rhs must satisfy the "rIN" constraint.
    914     */
    915    if (rhs_is_const) {
    916        int imm12 = encode_imm(rhs);
    917        if (imm12 < 0) {
    918            imm12 = encode_imm_nofail(-rhs);
    919            opc = opneg;
    920        }
    921        tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
    922    } else {
    923        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
    924    }
    925}
    926
    927static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd,
    928                          TCGReg rn, TCGReg rm)
    929{
    930    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
    931    if (!use_armv6_instructions && rd == rn) {
    932        if (rd == rm) {
    933            /* rd == rn == rm; copy an input to tmp first.  */
    934            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
    935            rm = rn = TCG_REG_TMP;
    936        } else {
    937            rn = rm;
    938            rm = rd;
    939        }
    940    }
    941    /* mul */
    942    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
    943}
    944
    945static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0,
    946                            TCGReg rd1, TCGReg rn, TCGReg rm)
    947{
    948    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
    949    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
    950        if (rd0 == rm || rd1 == rm) {
    951            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
    952            rn = TCG_REG_TMP;
    953        } else {
    954            TCGReg t = rn;
    955            rn = rm;
    956            rm = t;
    957        }
    958    }
    959    /* umull */
    960    tcg_out32(s, (cond << 28) | 0x00800090 |
    961              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
    962}
    963
    964static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0,
    965                            TCGReg rd1, TCGReg rn, TCGReg rm)
    966{
    967    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
    968    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
    969        if (rd0 == rm || rd1 == rm) {
    970            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
    971            rn = TCG_REG_TMP;
    972        } else {
    973            TCGReg t = rn;
    974            rn = rm;
    975            rm = t;
    976        }
    977    }
    978    /* smull */
    979    tcg_out32(s, (cond << 28) | 0x00c00090 |
    980              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
    981}
    982
    983static void tcg_out_sdiv(TCGContext *s, ARMCond cond,
    984                         TCGReg rd, TCGReg rn, TCGReg rm)
    985{
    986    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
    987}
    988
    989static void tcg_out_udiv(TCGContext *s, ARMCond cond,
    990                         TCGReg rd, TCGReg rn, TCGReg rm)
    991{
    992    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
    993}
    994
    995static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
    996{
    997    if (use_armv6_instructions) {
    998        /* sxtb */
    999        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
   1000    } else {
   1001        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1002                        rd, 0, rn, SHIFT_IMM_LSL(24));
   1003        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1004                        rd, 0, rd, SHIFT_IMM_ASR(24));
   1005    }
   1006}
   1007
   1008static void __attribute__((unused))
   1009tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
   1010{
   1011    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
   1012}
   1013
   1014static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
   1015{
   1016    if (use_armv6_instructions) {
   1017        /* sxth */
   1018        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
   1019    } else {
   1020        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1021                        rd, 0, rn, SHIFT_IMM_LSL(16));
   1022        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1023                        rd, 0, rd, SHIFT_IMM_ASR(16));
   1024    }
   1025}
   1026
   1027static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
   1028{
   1029    if (use_armv6_instructions) {
   1030        /* uxth */
   1031        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
   1032    } else {
   1033        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1034                        rd, 0, rn, SHIFT_IMM_LSL(16));
   1035        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1036                        rd, 0, rd, SHIFT_IMM_LSR(16));
   1037    }
   1038}
   1039
   1040static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
   1041                            TCGReg rd, TCGReg rn, int flags)
   1042{
   1043    if (use_armv6_instructions) {
   1044        if (flags & TCG_BSWAP_OS) {
   1045            /* revsh */
   1046            tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
   1047            return;
   1048        }
   1049
   1050        /* rev16 */
   1051        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
   1052        if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
   1053            /* uxth */
   1054            tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd);
   1055        }
   1056        return;
   1057    }
   1058
   1059    if (flags == 0) {
   1060        /*
   1061         * For stores, no input or output extension:
   1062         *                              rn  = xxAB
   1063         * lsr tmp, rn, #8              tmp = 0xxA
   1064         * and tmp, tmp, #0xff          tmp = 000A
   1065         * orr rd, tmp, rn, lsl #8      rd  = xABA
   1066         */
   1067        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1068                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
   1069        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
   1070        tcg_out_dat_reg(s, cond, ARITH_ORR,
   1071                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
   1072        return;
   1073    }
   1074
   1075    /*
   1076     * Byte swap, leaving the result at the top of the register.
   1077     * We will then shift down, zero or sign-extending.
   1078     */
   1079    if (flags & TCG_BSWAP_IZ) {
   1080        /*
   1081         *                              rn  = 00AB
   1082         * ror tmp, rn, #8              tmp = B00A
   1083         * orr tmp, tmp, tmp, lsl #16   tmp = BA00
   1084         */
   1085        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1086                        TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8));
   1087        tcg_out_dat_reg(s, cond, ARITH_ORR,
   1088                        TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP,
   1089                        SHIFT_IMM_LSL(16));
   1090    } else {
   1091        /*
   1092         *                              rn  = xxAB
   1093         * and tmp, rn, #0xff00         tmp = 00A0
   1094         * lsl tmp, tmp, #8             tmp = 0A00
   1095         * orr tmp, tmp, rn, lsl #24    tmp = BA00
   1096         */
   1097        tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1);
   1098        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1099                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8));
   1100        tcg_out_dat_reg(s, cond, ARITH_ORR,
   1101                        TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24));
   1102    }
   1103    tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP,
   1104                    (flags & TCG_BSWAP_OS
   1105                     ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8)));
   1106}
   1107
   1108static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
   1109{
   1110    if (use_armv6_instructions) {
   1111        /* rev */
   1112        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
   1113    } else {
   1114        tcg_out_dat_reg(s, cond, ARITH_EOR,
   1115                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
   1116        tcg_out_dat_imm(s, cond, ARITH_BIC,
   1117                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
   1118        tcg_out_dat_reg(s, cond, ARITH_MOV,
   1119                        rd, 0, rn, SHIFT_IMM_ROR(8));
   1120        tcg_out_dat_reg(s, cond, ARITH_EOR,
   1121                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
   1122    }
   1123}
   1124
   1125static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
   1126                            TCGArg a1, int ofs, int len, bool const_a1)
   1127{
   1128    if (const_a1) {
   1129        /* bfi becomes bfc with rn == 15.  */
   1130        a1 = 15;
   1131    }
   1132    /* bfi/bfc */
   1133    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
   1134              | (ofs << 7) | ((ofs + len - 1) << 16));
   1135}
   1136
   1137static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
   1138                            TCGReg rn, int ofs, int len)
   1139{
   1140    /* ubfx */
   1141    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
   1142              | (ofs << 7) | ((len - 1) << 16));
   1143}
   1144
   1145static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
   1146                             TCGReg rn, int ofs, int len)
   1147{
   1148    /* sbfx */
   1149    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
   1150              | (ofs << 7) | ((len - 1) << 16));
   1151}
   1152
   1153static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
   1154                          TCGReg rd, TCGReg rn, int32_t offset)
   1155{
   1156    if (offset > 0xfff || offset < -0xfff) {
   1157        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1158        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
   1159    } else
   1160        tcg_out_ld32_12(s, cond, rd, rn, offset);
   1161}
   1162
   1163static void tcg_out_st32(TCGContext *s, ARMCond cond,
   1164                         TCGReg rd, TCGReg rn, int32_t offset)
   1165{
   1166    if (offset > 0xfff || offset < -0xfff) {
   1167        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1168        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
   1169    } else
   1170        tcg_out_st32_12(s, cond, rd, rn, offset);
   1171}
   1172
   1173static void tcg_out_ld16u(TCGContext *s, ARMCond cond,
   1174                          TCGReg rd, TCGReg rn, int32_t offset)
   1175{
   1176    if (offset > 0xff || offset < -0xff) {
   1177        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1178        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
   1179    } else
   1180        tcg_out_ld16u_8(s, cond, rd, rn, offset);
   1181}
   1182
   1183static void tcg_out_ld16s(TCGContext *s, ARMCond cond,
   1184                          TCGReg rd, TCGReg rn, int32_t offset)
   1185{
   1186    if (offset > 0xff || offset < -0xff) {
   1187        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1188        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
   1189    } else
   1190        tcg_out_ld16s_8(s, cond, rd, rn, offset);
   1191}
   1192
   1193static void tcg_out_st16(TCGContext *s, ARMCond cond,
   1194                         TCGReg rd, TCGReg rn, int32_t offset)
   1195{
   1196    if (offset > 0xff || offset < -0xff) {
   1197        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1198        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
   1199    } else
   1200        tcg_out_st16_8(s, cond, rd, rn, offset);
   1201}
   1202
   1203static void tcg_out_ld8u(TCGContext *s, ARMCond cond,
   1204                         TCGReg rd, TCGReg rn, int32_t offset)
   1205{
   1206    if (offset > 0xfff || offset < -0xfff) {
   1207        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1208        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
   1209    } else
   1210        tcg_out_ld8_12(s, cond, rd, rn, offset);
   1211}
   1212
   1213static void tcg_out_ld8s(TCGContext *s, ARMCond cond,
   1214                         TCGReg rd, TCGReg rn, int32_t offset)
   1215{
   1216    if (offset > 0xff || offset < -0xff) {
   1217        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1218        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
   1219    } else
   1220        tcg_out_ld8s_8(s, cond, rd, rn, offset);
   1221}
   1222
   1223static void tcg_out_st8(TCGContext *s, ARMCond cond,
   1224                        TCGReg rd, TCGReg rn, int32_t offset)
   1225{
   1226    if (offset > 0xfff || offset < -0xfff) {
   1227        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1228        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
   1229    } else
   1230        tcg_out_st8_12(s, cond, rd, rn, offset);
   1231}
   1232
   1233/*
   1234 * The _goto case is normally between TBs within the same code buffer, and
   1235 * with the code buffer limited to 16MB we wouldn't need the long case.
   1236 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
   1237 */
   1238static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr)
   1239{
   1240    intptr_t addri = (intptr_t)addr;
   1241    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
   1242    bool arm_mode = !(addri & 1);
   1243
   1244    if (arm_mode && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
   1245        tcg_out_b_imm(s, cond, disp);
   1246        return;
   1247    }
   1248
   1249    /* LDR is interworking from v5t. */
   1250    if (arm_mode || use_armv5t_instructions) {
   1251        tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
   1252        return;
   1253    }
   1254
   1255    /* else v4t */
   1256    tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
   1257    tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
   1258}
   1259
   1260/*
   1261 * The call case is mostly used for helpers - so it's not unreasonable
   1262 * for them to be beyond branch range.
   1263 */
   1264static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
   1265{
   1266    intptr_t addri = (intptr_t)addr;
   1267    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
   1268    bool arm_mode = !(addri & 1);
   1269
   1270    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
   1271        if (arm_mode) {
   1272            tcg_out_bl_imm(s, COND_AL, disp);
   1273            return;
   1274        }
   1275        if (use_armv5t_instructions) {
   1276            tcg_out_blx_imm(s, disp);
   1277            return;
   1278        }
   1279    }
   1280
   1281    if (use_armv5t_instructions) {
   1282        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
   1283        tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP);
   1284    } else if (arm_mode) {
   1285        /* ??? Know that movi_pool emits exactly 1 insn.  */
   1286        tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
   1287        tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
   1288    } else {
   1289        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
   1290        tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
   1291        tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
   1292    }
   1293}
   1294
   1295static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l)
   1296{
   1297    if (l->has_value) {
   1298        tcg_out_goto(s, cond, l->u.value_ptr);
   1299    } else {
   1300        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
   1301        tcg_out_b_imm(s, cond, 0);
   1302    }
   1303}
   1304
   1305static void tcg_out_mb(TCGContext *s, TCGArg a0)
   1306{
   1307    if (use_armv7_instructions) {
   1308        tcg_out32(s, INSN_DMB_ISH);
   1309    } else if (use_armv6_instructions) {
   1310        tcg_out32(s, INSN_DMB_MCR);
   1311    }
   1312}
   1313
   1314static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
   1315                            const int *const_args)
   1316{
   1317    TCGReg al = args[0];
   1318    TCGReg ah = args[1];
   1319    TCGArg bl = args[2];
   1320    TCGArg bh = args[3];
   1321    TCGCond cond = args[4];
   1322    int const_bl = const_args[2];
   1323    int const_bh = const_args[3];
   1324
   1325    switch (cond) {
   1326    case TCG_COND_EQ:
   1327    case TCG_COND_NE:
   1328    case TCG_COND_LTU:
   1329    case TCG_COND_LEU:
   1330    case TCG_COND_GTU:
   1331    case TCG_COND_GEU:
   1332        /* We perform a conditional comparision.  If the high half is
   1333           equal, then overwrite the flags with the comparison of the
   1334           low half.  The resulting flags cover the whole.  */
   1335        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
   1336        tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
   1337        return cond;
   1338
   1339    case TCG_COND_LT:
   1340    case TCG_COND_GE:
   1341        /* We perform a double-word subtraction and examine the result.
   1342           We do not actually need the result of the subtract, so the
   1343           low part "subtract" is a compare.  For the high half we have
   1344           no choice but to compute into a temporary.  */
   1345        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
   1346        tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
   1347                       TCG_REG_TMP, ah, bh, const_bh);
   1348        return cond;
   1349
   1350    case TCG_COND_LE:
   1351    case TCG_COND_GT:
   1352        /* Similar, but with swapped arguments, via reversed subtract.  */
   1353        tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
   1354                       TCG_REG_TMP, al, bl, const_bl);
   1355        tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
   1356                       TCG_REG_TMP, ah, bh, const_bh);
   1357        return tcg_swap_cond(cond);
   1358
   1359    default:
   1360        g_assert_not_reached();
   1361    }
   1362}
   1363
   1364/*
   1365 * Note that TCGReg references Q-registers.
   1366 * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting.
   1367 */
   1368static uint32_t encode_vd(TCGReg rd)
   1369{
   1370    tcg_debug_assert(rd >= TCG_REG_Q0);
   1371    return (extract32(rd, 3, 1) << 22) | (extract32(rd, 0, 3) << 13);
   1372}
   1373
   1374static uint32_t encode_vn(TCGReg rn)
   1375{
   1376    tcg_debug_assert(rn >= TCG_REG_Q0);
   1377    return (extract32(rn, 3, 1) << 7) | (extract32(rn, 0, 3) << 17);
   1378}
   1379
   1380static uint32_t encode_vm(TCGReg rm)
   1381{
   1382    tcg_debug_assert(rm >= TCG_REG_Q0);
   1383    return (extract32(rm, 3, 1) << 5) | (extract32(rm, 0, 3) << 1);
   1384}
   1385
   1386static void tcg_out_vreg2(TCGContext *s, ARMInsn insn, int q, int vece,
   1387                          TCGReg d, TCGReg m)
   1388{
   1389    tcg_out32(s, insn | (vece << 18) | (q << 6) |
   1390              encode_vd(d) | encode_vm(m));
   1391}
   1392
   1393static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
   1394                          TCGReg d, TCGReg n, TCGReg m)
   1395{
   1396    tcg_out32(s, insn | (vece << 20) | (q << 6) |
   1397              encode_vd(d) | encode_vn(n) | encode_vm(m));
   1398}
   1399
   1400static void tcg_out_vmovi(TCGContext *s, TCGReg rd,
   1401                          int q, int op, int cmode, uint8_t imm8)
   1402{
   1403    tcg_out32(s, INSN_VMOVI | encode_vd(rd) | (q << 6) | (op << 5)
   1404              | (cmode << 8) | extract32(imm8, 0, 4)
   1405              | (extract32(imm8, 4, 3) << 16)
   1406              | (extract32(imm8, 7, 1) << 24));
   1407}
   1408
   1409static void tcg_out_vshifti(TCGContext *s, ARMInsn insn, int q,
   1410                            TCGReg rd, TCGReg rm, int l_imm6)
   1411{
   1412    tcg_out32(s, insn | (q << 6) | encode_vd(rd) | encode_vm(rm) |
   1413              (extract32(l_imm6, 6, 1) << 7) |
   1414              (extract32(l_imm6, 0, 6) << 16));
   1415}
   1416
   1417static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
   1418                          TCGReg rd, TCGReg rn, int offset)
   1419{
   1420    if (offset != 0) {
   1421        if (check_fit_imm(offset) || check_fit_imm(-offset)) {
   1422            tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
   1423                            TCG_REG_TMP, rn, offset, true);
   1424        } else {
   1425            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
   1426            tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
   1427                            TCG_REG_TMP, TCG_REG_TMP, rn, 0);
   1428        }
   1429        rn = TCG_REG_TMP;
   1430    }
   1431    tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf);
   1432}
   1433
   1434#ifdef CONFIG_SOFTMMU
   1435#include "../tcg-ldst.c.inc"
   1436
   1437/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
   1438 *                                     int mmu_idx, uintptr_t ra)
   1439 */
   1440static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
   1441    [MO_UB]   = helper_ret_ldub_mmu,
   1442    [MO_SB]   = helper_ret_ldsb_mmu,
   1443#ifdef HOST_WORDS_BIGENDIAN
   1444    [MO_UW] = helper_be_lduw_mmu,
   1445    [MO_UL] = helper_be_ldul_mmu,
   1446    [MO_Q]  = helper_be_ldq_mmu,
   1447    [MO_SW] = helper_be_ldsw_mmu,
   1448    [MO_SL] = helper_be_ldul_mmu,
   1449#else
   1450    [MO_UW] = helper_le_lduw_mmu,
   1451    [MO_UL] = helper_le_ldul_mmu,
   1452    [MO_Q]  = helper_le_ldq_mmu,
   1453    [MO_SW] = helper_le_ldsw_mmu,
   1454    [MO_SL] = helper_le_ldul_mmu,
   1455#endif
   1456};
   1457
   1458/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
   1459 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
   1460 */
   1461static void * const qemu_st_helpers[MO_SIZE + 1] = {
   1462    [MO_8]   = helper_ret_stb_mmu,
   1463#ifdef HOST_WORDS_BIGENDIAN
   1464    [MO_16] = helper_be_stw_mmu,
   1465    [MO_32] = helper_be_stl_mmu,
   1466    [MO_64] = helper_be_stq_mmu,
   1467#else
   1468    [MO_16] = helper_le_stw_mmu,
   1469    [MO_32] = helper_le_stl_mmu,
   1470    [MO_64] = helper_le_stq_mmu,
   1471#endif
   1472};
   1473
   1474/* Helper routines for marshalling helper function arguments into
   1475 * the correct registers and stack.
   1476 * argreg is where we want to put this argument, arg is the argument itself.
   1477 * Return value is the updated argreg ready for the next call.
   1478 * Note that argreg 0..3 is real registers, 4+ on stack.
   1479 *
   1480 * We provide routines for arguments which are: immediate, 32 bit
   1481 * value in register, 16 and 8 bit values in register (which must be zero
   1482 * extended before use) and 64 bit value in a lo:hi register pair.
   1483 */
   1484#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
   1485static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
   1486{                                                                          \
   1487    if (argreg < 4) {                                                      \
   1488        MOV_ARG(s, COND_AL, argreg, arg);                                  \
   1489    } else {                                                               \
   1490        int ofs = (argreg - 4) * 4;                                        \
   1491        EXT_ARG;                                                           \
   1492        tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
   1493        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
   1494    }                                                                      \
   1495    return argreg + 1;                                                     \
   1496}
   1497
   1498DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
   1499    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
   1500DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
   1501    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
   1502DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
   1503    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
   1504DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
   1505
   1506static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
   1507                                TCGReg arglo, TCGReg arghi)
   1508{
   1509    /* 64 bit arguments must go in even/odd register pairs
   1510     * and in 8-aligned stack slots.
   1511     */
   1512    if (argreg & 1) {
   1513        argreg++;
   1514    }
   1515    if (use_armv6_instructions && argreg >= 4
   1516        && (arglo & 1) == 0 && arghi == arglo + 1) {
   1517        tcg_out_strd_8(s, COND_AL, arglo,
   1518                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
   1519        return argreg + 2;
   1520    } else {
   1521        argreg = tcg_out_arg_reg32(s, argreg, arglo);
   1522        argreg = tcg_out_arg_reg32(s, argreg, arghi);
   1523        return argreg;
   1524    }
   1525}
   1526
   1527#define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
   1528
   1529/* We expect to use an 9-bit sign-magnitude negative offset from ENV.  */
   1530QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
   1531QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
   1532
   1533/* These offsets are built into the LDRD below.  */
   1534QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
   1535QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
   1536
   1537/* Load and compare a TLB entry, leaving the flags set.  Returns the register
   1538   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
   1539
   1540static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
   1541                               MemOp opc, int mem_index, bool is_load)
   1542{
   1543    int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
   1544                   : offsetof(CPUTLBEntry, addr_write));
   1545    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
   1546    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
   1547    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
   1548    unsigned s_bits = opc & MO_SIZE;
   1549    unsigned a_bits = get_alignment_bits(opc);
   1550
   1551    /*
   1552     * We don't support inline unaligned acceses, but we can easily
   1553     * support overalignment checks.
   1554     */
   1555    if (a_bits < s_bits) {
   1556        a_bits = s_bits;
   1557    }
   1558
   1559    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}.  */
   1560    if (use_armv6_instructions) {
   1561        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
   1562    } else {
   1563        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off);
   1564        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off);
   1565    }
   1566
   1567    /* Extract the tlb index from the address into R0.  */
   1568    tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
   1569                    SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
   1570
   1571    /*
   1572     * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
   1573     * Load the tlb comparator into R2/R3 and the fast path addend into R1.
   1574     */
   1575    if (cmp_off == 0) {
   1576        if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
   1577            tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
   1578        } else {
   1579            tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
   1580        }
   1581    } else {
   1582        tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
   1583                        TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
   1584        if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
   1585            tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
   1586        } else {
   1587            tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
   1588        }
   1589    }
   1590    if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
   1591        tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4);
   1592    }
   1593
   1594    /* Load the tlb addend.  */
   1595    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
   1596                    offsetof(CPUTLBEntry, addend));
   1597
   1598    /*
   1599     * Check alignment, check comparators.
   1600     * Do this in no more than 3 insns.  Use MOVW for v7, if possible,
   1601     * to reduce the number of sequential conditional instructions.
   1602     * Almost all guests have at least 4k pages, which means that we need
   1603     * to clear at least 9 bits even for an 8-byte memory, which means it
   1604     * isn't worth checking for an immediate operand for BIC.
   1605     */
   1606    if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
   1607        tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
   1608
   1609        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
   1610        tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
   1611                        addrlo, TCG_REG_TMP, 0);
   1612        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
   1613    } else {
   1614        if (a_bits) {
   1615            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
   1616                            (1 << a_bits) - 1);
   1617        }
   1618        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
   1619                        SHIFT_IMM_LSR(TARGET_PAGE_BITS));
   1620        tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
   1621                        0, TCG_REG_R2, TCG_REG_TMP,
   1622                        SHIFT_IMM_LSL(TARGET_PAGE_BITS));
   1623    }
   1624
   1625    if (TARGET_LONG_BITS == 64) {
   1626        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
   1627    }
   1628
   1629    return TCG_REG_R1;
   1630}
   1631
   1632/* Record the context of a call to the out of line helper code for the slow
   1633   path for a load or store, so that we can later generate the correct
   1634   helper code.  */
   1635static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
   1636                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
   1637                                TCGReg addrhi, tcg_insn_unit *raddr,
   1638                                tcg_insn_unit *label_ptr)
   1639{
   1640    TCGLabelQemuLdst *label = new_ldst_label(s);
   1641
   1642    label->is_ld = is_ld;
   1643    label->oi = oi;
   1644    label->datalo_reg = datalo;
   1645    label->datahi_reg = datahi;
   1646    label->addrlo_reg = addrlo;
   1647    label->addrhi_reg = addrhi;
   1648    label->raddr = tcg_splitwx_to_rx(raddr);
   1649    label->label_ptr[0] = label_ptr;
   1650}
   1651
   1652static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
   1653{
   1654    TCGReg argreg, datalo, datahi;
   1655    MemOpIdx oi = lb->oi;
   1656    MemOp opc = get_memop(oi);
   1657    void *func;
   1658
   1659    if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   1660        return false;
   1661    }
   1662
   1663    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
   1664    if (TARGET_LONG_BITS == 64) {
   1665        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
   1666    } else {
   1667        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
   1668    }
   1669    argreg = tcg_out_arg_imm32(s, argreg, oi);
   1670    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
   1671
   1672    /* For armv6 we can use the canonical unsigned helpers and minimize
   1673       icache usage.  For pre-armv6, use the signed helpers since we do
   1674       not have a single insn sign-extend.  */
   1675    if (use_armv6_instructions) {
   1676        func = qemu_ld_helpers[opc & MO_SIZE];
   1677    } else {
   1678        func = qemu_ld_helpers[opc & MO_SSIZE];
   1679        if (opc & MO_SIGN) {
   1680            opc = MO_UL;
   1681        }
   1682    }
   1683    tcg_out_call(s, func);
   1684
   1685    datalo = lb->datalo_reg;
   1686    datahi = lb->datahi_reg;
   1687    switch (opc & MO_SSIZE) {
   1688    case MO_SB:
   1689        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
   1690        break;
   1691    case MO_SW:
   1692        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
   1693        break;
   1694    default:
   1695        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
   1696        break;
   1697    case MO_Q:
   1698        if (datalo != TCG_REG_R1) {
   1699            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
   1700            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
   1701        } else if (datahi != TCG_REG_R0) {
   1702            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
   1703            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
   1704        } else {
   1705            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
   1706            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
   1707            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
   1708        }
   1709        break;
   1710    }
   1711
   1712    tcg_out_goto(s, COND_AL, lb->raddr);
   1713    return true;
   1714}
   1715
   1716static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
   1717{
   1718    TCGReg argreg, datalo, datahi;
   1719    MemOpIdx oi = lb->oi;
   1720    MemOp opc = get_memop(oi);
   1721
   1722    if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   1723        return false;
   1724    }
   1725
   1726    argreg = TCG_REG_R0;
   1727    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
   1728    if (TARGET_LONG_BITS == 64) {
   1729        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
   1730    } else {
   1731        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
   1732    }
   1733
   1734    datalo = lb->datalo_reg;
   1735    datahi = lb->datahi_reg;
   1736    switch (opc & MO_SIZE) {
   1737    case MO_8:
   1738        argreg = tcg_out_arg_reg8(s, argreg, datalo);
   1739        break;
   1740    case MO_16:
   1741        argreg = tcg_out_arg_reg16(s, argreg, datalo);
   1742        break;
   1743    case MO_32:
   1744    default:
   1745        argreg = tcg_out_arg_reg32(s, argreg, datalo);
   1746        break;
   1747    case MO_64:
   1748        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
   1749        break;
   1750    }
   1751
   1752    argreg = tcg_out_arg_imm32(s, argreg, oi);
   1753    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
   1754
   1755    /* Tail-call to the helper, which will return to the fast path.  */
   1756    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
   1757    return true;
   1758}
   1759#endif /* SOFTMMU */
   1760
   1761static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
   1762                                  TCGReg datalo, TCGReg datahi,
   1763                                  TCGReg addrlo, TCGReg addend)
   1764{
   1765    /* Byte swapping is left to middle-end expansion. */
   1766    tcg_debug_assert((opc & MO_BSWAP) == 0);
   1767
   1768    switch (opc & MO_SSIZE) {
   1769    case MO_UB:
   1770        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
   1771        break;
   1772    case MO_SB:
   1773        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
   1774        break;
   1775    case MO_UW:
   1776        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
   1777        break;
   1778    case MO_SW:
   1779        tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
   1780        break;
   1781    case MO_UL:
   1782        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
   1783        break;
   1784    case MO_Q:
   1785        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
   1786        if (USING_SOFTMMU && use_armv6_instructions
   1787            && (datalo & 1) == 0 && datahi == datalo + 1) {
   1788            tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
   1789        } else if (datalo != addend) {
   1790            tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
   1791            tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
   1792        } else {
   1793            tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
   1794                            addend, addrlo, SHIFT_IMM_LSL(0));
   1795            tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0);
   1796            tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4);
   1797        }
   1798        break;
   1799    default:
   1800        g_assert_not_reached();
   1801    }
   1802}
   1803
   1804#ifndef CONFIG_SOFTMMU
   1805static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
   1806                                   TCGReg datahi, TCGReg addrlo)
   1807{
   1808    /* Byte swapping is left to middle-end expansion. */
   1809    tcg_debug_assert((opc & MO_BSWAP) == 0);
   1810
   1811    switch (opc & MO_SSIZE) {
   1812    case MO_UB:
   1813        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
   1814        break;
   1815    case MO_SB:
   1816        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
   1817        break;
   1818    case MO_UW:
   1819        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
   1820        break;
   1821    case MO_SW:
   1822        tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
   1823        break;
   1824    case MO_UL:
   1825        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
   1826        break;
   1827    case MO_Q:
   1828        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
   1829        if (USING_SOFTMMU && use_armv6_instructions
   1830            && (datalo & 1) == 0 && datahi == datalo + 1) {
   1831            tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
   1832        } else if (datalo == addrlo) {
   1833            tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
   1834            tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
   1835        } else {
   1836            tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
   1837            tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
   1838        }
   1839        break;
   1840    default:
   1841        g_assert_not_reached();
   1842    }
   1843}
   1844#endif
   1845
   1846static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
   1847{
   1848    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
   1849    MemOpIdx oi;
   1850    MemOp opc;
   1851#ifdef CONFIG_SOFTMMU
   1852    int mem_index;
   1853    TCGReg addend;
   1854    tcg_insn_unit *label_ptr;
   1855#endif
   1856
   1857    datalo = *args++;
   1858    datahi = (is64 ? *args++ : 0);
   1859    addrlo = *args++;
   1860    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
   1861    oi = *args++;
   1862    opc = get_memop(oi);
   1863
   1864#ifdef CONFIG_SOFTMMU
   1865    mem_index = get_mmuidx(oi);
   1866    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
   1867
   1868    /* This a conditional BL only to load a pointer within this opcode into LR
   1869       for the slow path.  We will not be using the value for a tail call.  */
   1870    label_ptr = s->code_ptr;
   1871    tcg_out_bl_imm(s, COND_NE, 0);
   1872
   1873    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
   1874
   1875    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
   1876                        s->code_ptr, label_ptr);
   1877#else /* !CONFIG_SOFTMMU */
   1878    if (guest_base) {
   1879        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
   1880        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
   1881    } else {
   1882        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
   1883    }
   1884#endif
   1885}
   1886
   1887static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
   1888                                  TCGReg datalo, TCGReg datahi,
   1889                                  TCGReg addrlo, TCGReg addend)
   1890{
   1891    /* Byte swapping is left to middle-end expansion. */
   1892    tcg_debug_assert((opc & MO_BSWAP) == 0);
   1893
   1894    switch (opc & MO_SIZE) {
   1895    case MO_8:
   1896        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
   1897        break;
   1898    case MO_16:
   1899        tcg_out_st16_r(s, cond, datalo, addrlo, addend);
   1900        break;
   1901    case MO_32:
   1902        tcg_out_st32_r(s, cond, datalo, addrlo, addend);
   1903        break;
   1904    case MO_64:
   1905        /* Avoid strd for user-only emulation, to handle unaligned.  */
   1906        if (USING_SOFTMMU && use_armv6_instructions
   1907            && (datalo & 1) == 0 && datahi == datalo + 1) {
   1908            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
   1909        } else {
   1910            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
   1911            tcg_out_st32_12(s, cond, datahi, addend, 4);
   1912        }
   1913        break;
   1914    default:
   1915        g_assert_not_reached();
   1916    }
   1917}
   1918
   1919#ifndef CONFIG_SOFTMMU
   1920static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
   1921                                   TCGReg datahi, TCGReg addrlo)
   1922{
   1923    /* Byte swapping is left to middle-end expansion. */
   1924    tcg_debug_assert((opc & MO_BSWAP) == 0);
   1925
   1926    switch (opc & MO_SIZE) {
   1927    case MO_8:
   1928        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
   1929        break;
   1930    case MO_16:
   1931        tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
   1932        break;
   1933    case MO_32:
   1934        tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
   1935        break;
   1936    case MO_64:
   1937        /* Avoid strd for user-only emulation, to handle unaligned.  */
   1938        if (USING_SOFTMMU && use_armv6_instructions
   1939            && (datalo & 1) == 0 && datahi == datalo + 1) {
   1940            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
   1941        } else {
   1942            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
   1943            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
   1944        }
   1945        break;
   1946    default:
   1947        g_assert_not_reached();
   1948    }
   1949}
   1950#endif
   1951
   1952static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
   1953{
   1954    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
   1955    MemOpIdx oi;
   1956    MemOp opc;
   1957#ifdef CONFIG_SOFTMMU
   1958    int mem_index;
   1959    TCGReg addend;
   1960    tcg_insn_unit *label_ptr;
   1961#endif
   1962
   1963    datalo = *args++;
   1964    datahi = (is64 ? *args++ : 0);
   1965    addrlo = *args++;
   1966    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
   1967    oi = *args++;
   1968    opc = get_memop(oi);
   1969
   1970#ifdef CONFIG_SOFTMMU
   1971    mem_index = get_mmuidx(oi);
   1972    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
   1973
   1974    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
   1975
   1976    /* The conditional call must come last, as we're going to return here.  */
   1977    label_ptr = s->code_ptr;
   1978    tcg_out_bl_imm(s, COND_NE, 0);
   1979
   1980    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
   1981                        s->code_ptr, label_ptr);
   1982#else /* !CONFIG_SOFTMMU */
   1983    if (guest_base) {
   1984        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
   1985        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
   1986                              datahi, addrlo, TCG_REG_TMP);
   1987    } else {
   1988        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
   1989    }
   1990#endif
   1991}
   1992
   1993static void tcg_out_epilogue(TCGContext *s);
   1994
   1995static void tcg_out_op(TCGContext *s, TCGOpcode opc,
   1996                       const TCGArg args[TCG_MAX_OP_ARGS],
   1997                       const int const_args[TCG_MAX_OP_ARGS])
   1998{
   1999    TCGArg a0, a1, a2, a3, a4, a5;
   2000    int c;
   2001
   2002    switch (opc) {
   2003    case INDEX_op_exit_tb:
   2004        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
   2005        tcg_out_epilogue(s);
   2006        break;
   2007    case INDEX_op_goto_tb:
   2008        {
   2009            /* Indirect jump method */
   2010            intptr_t ptr, dif, dil;
   2011            TCGReg base = TCG_REG_PC;
   2012
   2013            tcg_debug_assert(s->tb_jmp_insn_offset == 0);
   2014            ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]);
   2015            dif = tcg_pcrel_diff(s, (void *)ptr) - 8;
   2016            dil = sextract32(dif, 0, 12);
   2017            if (dif != dil) {
   2018                /* The TB is close, but outside the 12 bits addressable by
   2019                   the load.  We can extend this to 20 bits with a sub of a
   2020                   shifted immediate from pc.  In the vastly unlikely event
   2021                   the code requires more than 1MB, we'll use 2 insns and
   2022                   be no worse off.  */
   2023                base = TCG_REG_R0;
   2024                tcg_out_movi32(s, COND_AL, base, ptr - dil);
   2025            }
   2026            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
   2027            set_jmp_reset_offset(s, args[0]);
   2028        }
   2029        break;
   2030    case INDEX_op_goto_ptr:
   2031        tcg_out_b_reg(s, COND_AL, args[0]);
   2032        break;
   2033    case INDEX_op_br:
   2034        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
   2035        break;
   2036
   2037    case INDEX_op_ld8u_i32:
   2038        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
   2039        break;
   2040    case INDEX_op_ld8s_i32:
   2041        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
   2042        break;
   2043    case INDEX_op_ld16u_i32:
   2044        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
   2045        break;
   2046    case INDEX_op_ld16s_i32:
   2047        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
   2048        break;
   2049    case INDEX_op_ld_i32:
   2050        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
   2051        break;
   2052    case INDEX_op_st8_i32:
   2053        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
   2054        break;
   2055    case INDEX_op_st16_i32:
   2056        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
   2057        break;
   2058    case INDEX_op_st_i32:
   2059        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
   2060        break;
   2061
   2062    case INDEX_op_movcond_i32:
   2063        /* Constraints mean that v2 is always in the same register as dest,
   2064         * so we only need to do "if condition passed, move v1 to dest".
   2065         */
   2066        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
   2067                        args[1], args[2], const_args[2]);
   2068        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
   2069                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
   2070        break;
   2071    case INDEX_op_add_i32:
   2072        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
   2073                        args[0], args[1], args[2], const_args[2]);
   2074        break;
   2075    case INDEX_op_sub_i32:
   2076        if (const_args[1]) {
   2077            if (const_args[2]) {
   2078                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
   2079            } else {
   2080                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
   2081                               args[0], args[2], args[1], 1);
   2082            }
   2083        } else {
   2084            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
   2085                            args[0], args[1], args[2], const_args[2]);
   2086        }
   2087        break;
   2088    case INDEX_op_and_i32:
   2089        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
   2090                        args[0], args[1], args[2], const_args[2]);
   2091        break;
   2092    case INDEX_op_andc_i32:
   2093        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
   2094                        args[0], args[1], args[2], const_args[2]);
   2095        break;
   2096    case INDEX_op_or_i32:
   2097        c = ARITH_ORR;
   2098        goto gen_arith;
   2099    case INDEX_op_xor_i32:
   2100        c = ARITH_EOR;
   2101        /* Fall through.  */
   2102    gen_arith:
   2103        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
   2104        break;
   2105    case INDEX_op_add2_i32:
   2106        a0 = args[0], a1 = args[1], a2 = args[2];
   2107        a3 = args[3], a4 = args[4], a5 = args[5];
   2108        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
   2109            a0 = TCG_REG_TMP;
   2110        }
   2111        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
   2112                        a0, a2, a4, const_args[4]);
   2113        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
   2114                        a1, a3, a5, const_args[5]);
   2115        tcg_out_mov_reg(s, COND_AL, args[0], a0);
   2116        break;
   2117    case INDEX_op_sub2_i32:
   2118        a0 = args[0], a1 = args[1], a2 = args[2];
   2119        a3 = args[3], a4 = args[4], a5 = args[5];
   2120        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
   2121            a0 = TCG_REG_TMP;
   2122        }
   2123        if (const_args[2]) {
   2124            if (const_args[4]) {
   2125                tcg_out_movi32(s, COND_AL, a0, a4);
   2126                a4 = a0;
   2127            }
   2128            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
   2129        } else {
   2130            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
   2131                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
   2132        }
   2133        if (const_args[3]) {
   2134            if (const_args[5]) {
   2135                tcg_out_movi32(s, COND_AL, a1, a5);
   2136                a5 = a1;
   2137            }
   2138            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
   2139        } else {
   2140            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
   2141                            a1, a3, a5, const_args[5]);
   2142        }
   2143        tcg_out_mov_reg(s, COND_AL, args[0], a0);
   2144        break;
   2145    case INDEX_op_neg_i32:
   2146        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
   2147        break;
   2148    case INDEX_op_not_i32:
   2149        tcg_out_dat_reg(s, COND_AL,
   2150                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
   2151        break;
   2152    case INDEX_op_mul_i32:
   2153        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
   2154        break;
   2155    case INDEX_op_mulu2_i32:
   2156        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
   2157        break;
   2158    case INDEX_op_muls2_i32:
   2159        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
   2160        break;
   2161    /* XXX: Perhaps args[2] & 0x1f is wrong */
   2162    case INDEX_op_shl_i32:
   2163        c = const_args[2] ?
   2164                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
   2165        goto gen_shift32;
   2166    case INDEX_op_shr_i32:
   2167        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
   2168                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
   2169        goto gen_shift32;
   2170    case INDEX_op_sar_i32:
   2171        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
   2172                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
   2173        goto gen_shift32;
   2174    case INDEX_op_rotr_i32:
   2175        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
   2176                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
   2177        /* Fall through.  */
   2178    gen_shift32:
   2179        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
   2180        break;
   2181
   2182    case INDEX_op_rotl_i32:
   2183        if (const_args[2]) {
   2184            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
   2185                            ((0x20 - args[2]) & 0x1f) ?
   2186                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
   2187                            SHIFT_IMM_LSL(0));
   2188        } else {
   2189            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
   2190            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
   2191                            SHIFT_REG_ROR(TCG_REG_TMP));
   2192        }
   2193        break;
   2194
   2195    case INDEX_op_ctz_i32:
   2196        tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
   2197        a1 = TCG_REG_TMP;
   2198        goto do_clz;
   2199
   2200    case INDEX_op_clz_i32:
   2201        a1 = args[1];
   2202    do_clz:
   2203        a0 = args[0];
   2204        a2 = args[2];
   2205        c = const_args[2];
   2206        if (c && a2 == 32) {
   2207            tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
   2208            break;
   2209        }
   2210        tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
   2211        tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
   2212        if (c || a0 != a2) {
   2213            tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
   2214        }
   2215        break;
   2216
   2217    case INDEX_op_brcond_i32:
   2218        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
   2219                       args[0], args[1], const_args[1]);
   2220        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
   2221                           arg_label(args[3]));
   2222        break;
   2223    case INDEX_op_setcond_i32:
   2224        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
   2225                        args[1], args[2], const_args[2]);
   2226        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
   2227                        ARITH_MOV, args[0], 0, 1);
   2228        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
   2229                        ARITH_MOV, args[0], 0, 0);
   2230        break;
   2231
   2232    case INDEX_op_brcond2_i32:
   2233        c = tcg_out_cmp2(s, args, const_args);
   2234        tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
   2235        break;
   2236    case INDEX_op_setcond2_i32:
   2237        c = tcg_out_cmp2(s, args + 1, const_args + 1);
   2238        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
   2239        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
   2240                        ARITH_MOV, args[0], 0, 0);
   2241        break;
   2242
   2243    case INDEX_op_qemu_ld_i32:
   2244        tcg_out_qemu_ld(s, args, 0);
   2245        break;
   2246    case INDEX_op_qemu_ld_i64:
   2247        tcg_out_qemu_ld(s, args, 1);
   2248        break;
   2249    case INDEX_op_qemu_st_i32:
   2250        tcg_out_qemu_st(s, args, 0);
   2251        break;
   2252    case INDEX_op_qemu_st_i64:
   2253        tcg_out_qemu_st(s, args, 1);
   2254        break;
   2255
   2256    case INDEX_op_bswap16_i32:
   2257        tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]);
   2258        break;
   2259    case INDEX_op_bswap32_i32:
   2260        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
   2261        break;
   2262
   2263    case INDEX_op_ext8s_i32:
   2264        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
   2265        break;
   2266    case INDEX_op_ext16s_i32:
   2267        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
   2268        break;
   2269    case INDEX_op_ext16u_i32:
   2270        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
   2271        break;
   2272
   2273    case INDEX_op_deposit_i32:
   2274        tcg_out_deposit(s, COND_AL, args[0], args[2],
   2275                        args[3], args[4], const_args[2]);
   2276        break;
   2277    case INDEX_op_extract_i32:
   2278        tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
   2279        break;
   2280    case INDEX_op_sextract_i32:
   2281        tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
   2282        break;
   2283    case INDEX_op_extract2_i32:
   2284        /* ??? These optimization vs zero should be generic.  */
   2285        /* ??? But we can't substitute 2 for 1 in the opcode stream yet.  */
   2286        if (const_args[1]) {
   2287            if (const_args[2]) {
   2288                tcg_out_movi(s, TCG_TYPE_REG, args[0], 0);
   2289            } else {
   2290                tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
   2291                                args[2], SHIFT_IMM_LSL(32 - args[3]));
   2292            }
   2293        } else if (const_args[2]) {
   2294            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
   2295                            args[1], SHIFT_IMM_LSR(args[3]));
   2296        } else {
   2297            /* We can do extract2 in 2 insns, vs the 3 required otherwise.  */
   2298            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0,
   2299                            args[2], SHIFT_IMM_LSL(32 - args[3]));
   2300            tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP,
   2301                            args[1], SHIFT_IMM_LSR(args[3]));
   2302        }
   2303        break;
   2304
   2305    case INDEX_op_div_i32:
   2306        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
   2307        break;
   2308    case INDEX_op_divu_i32:
   2309        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
   2310        break;
   2311
   2312    case INDEX_op_mb:
   2313        tcg_out_mb(s, args[0]);
   2314        break;
   2315
   2316    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
   2317    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
   2318    default:
   2319        tcg_abort();
   2320    }
   2321}
   2322
   2323static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
   2324{
   2325    switch (op) {
   2326    case INDEX_op_goto_ptr:
   2327        return C_O0_I1(r);
   2328
   2329    case INDEX_op_ld8u_i32:
   2330    case INDEX_op_ld8s_i32:
   2331    case INDEX_op_ld16u_i32:
   2332    case INDEX_op_ld16s_i32:
   2333    case INDEX_op_ld_i32:
   2334    case INDEX_op_neg_i32:
   2335    case INDEX_op_not_i32:
   2336    case INDEX_op_bswap16_i32:
   2337    case INDEX_op_bswap32_i32:
   2338    case INDEX_op_ext8s_i32:
   2339    case INDEX_op_ext16s_i32:
   2340    case INDEX_op_ext16u_i32:
   2341    case INDEX_op_extract_i32:
   2342    case INDEX_op_sextract_i32:
   2343        return C_O1_I1(r, r);
   2344
   2345    case INDEX_op_st8_i32:
   2346    case INDEX_op_st16_i32:
   2347    case INDEX_op_st_i32:
   2348        return C_O0_I2(r, r);
   2349
   2350    case INDEX_op_add_i32:
   2351    case INDEX_op_sub_i32:
   2352    case INDEX_op_setcond_i32:
   2353        return C_O1_I2(r, r, rIN);
   2354
   2355    case INDEX_op_and_i32:
   2356    case INDEX_op_andc_i32:
   2357    case INDEX_op_clz_i32:
   2358    case INDEX_op_ctz_i32:
   2359        return C_O1_I2(r, r, rIK);
   2360
   2361    case INDEX_op_mul_i32:
   2362    case INDEX_op_div_i32:
   2363    case INDEX_op_divu_i32:
   2364        return C_O1_I2(r, r, r);
   2365
   2366    case INDEX_op_mulu2_i32:
   2367    case INDEX_op_muls2_i32:
   2368        return C_O2_I2(r, r, r, r);
   2369
   2370    case INDEX_op_or_i32:
   2371    case INDEX_op_xor_i32:
   2372        return C_O1_I2(r, r, rI);
   2373
   2374    case INDEX_op_shl_i32:
   2375    case INDEX_op_shr_i32:
   2376    case INDEX_op_sar_i32:
   2377    case INDEX_op_rotl_i32:
   2378    case INDEX_op_rotr_i32:
   2379        return C_O1_I2(r, r, ri);
   2380
   2381    case INDEX_op_brcond_i32:
   2382        return C_O0_I2(r, rIN);
   2383    case INDEX_op_deposit_i32:
   2384        return C_O1_I2(r, 0, rZ);
   2385    case INDEX_op_extract2_i32:
   2386        return C_O1_I2(r, rZ, rZ);
   2387    case INDEX_op_movcond_i32:
   2388        return C_O1_I4(r, r, rIN, rIK, 0);
   2389    case INDEX_op_add2_i32:
   2390        return C_O2_I4(r, r, r, r, rIN, rIK);
   2391    case INDEX_op_sub2_i32:
   2392        return C_O2_I4(r, r, rI, rI, rIN, rIK);
   2393    case INDEX_op_brcond2_i32:
   2394        return C_O0_I4(r, r, rI, rI);
   2395    case INDEX_op_setcond2_i32:
   2396        return C_O1_I4(r, r, r, rI, rI);
   2397
   2398    case INDEX_op_qemu_ld_i32:
   2399        return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
   2400    case INDEX_op_qemu_ld_i64:
   2401        return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
   2402    case INDEX_op_qemu_st_i32:
   2403        return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
   2404    case INDEX_op_qemu_st_i64:
   2405        return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
   2406
   2407    case INDEX_op_st_vec:
   2408        return C_O0_I2(w, r);
   2409    case INDEX_op_ld_vec:
   2410    case INDEX_op_dupm_vec:
   2411        return C_O1_I1(w, r);
   2412    case INDEX_op_dup_vec:
   2413        return C_O1_I1(w, wr);
   2414    case INDEX_op_abs_vec:
   2415    case INDEX_op_neg_vec:
   2416    case INDEX_op_not_vec:
   2417    case INDEX_op_shli_vec:
   2418    case INDEX_op_shri_vec:
   2419    case INDEX_op_sari_vec:
   2420        return C_O1_I1(w, w);
   2421    case INDEX_op_dup2_vec:
   2422    case INDEX_op_add_vec:
   2423    case INDEX_op_mul_vec:
   2424    case INDEX_op_smax_vec:
   2425    case INDEX_op_smin_vec:
   2426    case INDEX_op_ssadd_vec:
   2427    case INDEX_op_sssub_vec:
   2428    case INDEX_op_sub_vec:
   2429    case INDEX_op_umax_vec:
   2430    case INDEX_op_umin_vec:
   2431    case INDEX_op_usadd_vec:
   2432    case INDEX_op_ussub_vec:
   2433    case INDEX_op_xor_vec:
   2434    case INDEX_op_arm_sshl_vec:
   2435    case INDEX_op_arm_ushl_vec:
   2436        return C_O1_I2(w, w, w);
   2437    case INDEX_op_arm_sli_vec:
   2438        return C_O1_I2(w, 0, w);
   2439    case INDEX_op_or_vec:
   2440    case INDEX_op_andc_vec:
   2441        return C_O1_I2(w, w, wO);
   2442    case INDEX_op_and_vec:
   2443    case INDEX_op_orc_vec:
   2444        return C_O1_I2(w, w, wV);
   2445    case INDEX_op_cmp_vec:
   2446        return C_O1_I2(w, w, wZ);
   2447    case INDEX_op_bitsel_vec:
   2448        return C_O1_I3(w, w, w, w);
   2449    default:
   2450        g_assert_not_reached();
   2451    }
   2452}
   2453
   2454static void tcg_target_init(TCGContext *s)
   2455{
   2456    /*
   2457     * Only probe for the platform and capabilities if we haven't already
   2458     * determined maximum values at compile time.
   2459     */
   2460#if !defined(use_idiv_instructions) || !defined(use_neon_instructions)
   2461    {
   2462        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
   2463#ifndef use_idiv_instructions
   2464        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
   2465#endif
   2466#ifndef use_neon_instructions
   2467        use_neon_instructions = (hwcap & HWCAP_ARM_NEON) != 0;
   2468#endif
   2469    }
   2470#endif
   2471
   2472    if (__ARM_ARCH < 7) {
   2473        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
   2474        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
   2475            arm_arch = pl[1] - '0';
   2476        }
   2477    }
   2478
   2479    tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
   2480
   2481    tcg_target_call_clobber_regs = 0;
   2482    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
   2483    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
   2484    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
   2485    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
   2486    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
   2487    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
   2488
   2489    if (use_neon_instructions) {
   2490        tcg_target_available_regs[TCG_TYPE_V64]  = ALL_VECTOR_REGS;
   2491        tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
   2492
   2493        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
   2494        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
   2495        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
   2496        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
   2497        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
   2498        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
   2499        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
   2500        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
   2501        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
   2502        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
   2503        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
   2504        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
   2505    }
   2506
   2507    s->reserved_regs = 0;
   2508    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
   2509    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
   2510    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
   2511    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
   2512}
   2513
   2514static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
   2515                       TCGReg arg1, intptr_t arg2)
   2516{
   2517    switch (type) {
   2518    case TCG_TYPE_I32:
   2519        tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
   2520        return;
   2521    case TCG_TYPE_V64:
   2522        /* regs 1; size 8; align 8 */
   2523        tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2);
   2524        return;
   2525    case TCG_TYPE_V128:
   2526        /* regs 2; size 8; align 16 */
   2527        tcg_out_vldst(s, INSN_VLD1 | 0xae0, arg, arg1, arg2);
   2528        return;
   2529    default:
   2530        g_assert_not_reached();
   2531    }
   2532}
   2533
   2534static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
   2535                       TCGReg arg1, intptr_t arg2)
   2536{
   2537    switch (type) {
   2538    case TCG_TYPE_I32:
   2539        tcg_out_st32(s, COND_AL, arg, arg1, arg2);
   2540        return;
   2541    case TCG_TYPE_V64:
   2542        /* regs 1; size 8; align 8 */
   2543        tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2);
   2544        return;
   2545    case TCG_TYPE_V128:
   2546        /* regs 2; size 8; align 16 */
   2547        tcg_out_vldst(s, INSN_VST1 | 0xae0, arg, arg1, arg2);
   2548        return;
   2549    default:
   2550        g_assert_not_reached();
   2551    }
   2552}
   2553
   2554static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
   2555                        TCGReg base, intptr_t ofs)
   2556{
   2557    return false;
   2558}
   2559
   2560static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
   2561{
   2562    if (ret == arg) {
   2563        return true;
   2564    }
   2565    switch (type) {
   2566    case TCG_TYPE_I32:
   2567        if (ret < TCG_REG_Q0 && arg < TCG_REG_Q0) {
   2568            tcg_out_mov_reg(s, COND_AL, ret, arg);
   2569            return true;
   2570        }
   2571        return false;
   2572
   2573    case TCG_TYPE_V64:
   2574    case TCG_TYPE_V128:
   2575        /* "VMOV D,N" is an alias for "VORR D,N,N". */
   2576        tcg_out_vreg3(s, INSN_VORR, type - TCG_TYPE_V64, 0, ret, arg, arg);
   2577        return true;
   2578
   2579    default:
   2580        g_assert_not_reached();
   2581    }
   2582}
   2583
   2584static void tcg_out_movi(TCGContext *s, TCGType type,
   2585                         TCGReg ret, tcg_target_long arg)
   2586{
   2587    tcg_debug_assert(type == TCG_TYPE_I32);
   2588    tcg_debug_assert(ret < TCG_REG_Q0);
   2589    tcg_out_movi32(s, COND_AL, ret, arg);
   2590}
   2591
   2592/* Type is always V128, with I64 elements.  */
   2593static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
   2594{
   2595    /* Move high element into place first. */
   2596    /* VMOV Dd+1, Ds */
   2597    tcg_out_vreg3(s, INSN_VORR | (1 << 12), 0, 0, rd, rh, rh);
   2598    /* Move low element into place; tcg_out_mov will check for nop. */
   2599    tcg_out_mov(s, TCG_TYPE_V64, rd, rl);
   2600}
   2601
   2602static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
   2603                            TCGReg rd, TCGReg rs)
   2604{
   2605    int q = type - TCG_TYPE_V64;
   2606
   2607    if (vece == MO_64) {
   2608        if (type == TCG_TYPE_V128) {
   2609            tcg_out_dup2_vec(s, rd, rs, rs);
   2610        } else {
   2611            tcg_out_mov(s, TCG_TYPE_V64, rd, rs);
   2612        }
   2613    } else if (rs < TCG_REG_Q0) {
   2614        int b = (vece == MO_8);
   2615        int e = (vece == MO_16);
   2616        tcg_out32(s, INSN_VDUP_G | (b << 22) | (q << 21) | (e << 5) |
   2617                  encode_vn(rd) | (rs << 12));
   2618    } else {
   2619        int imm4 = 1 << vece;
   2620        tcg_out32(s, INSN_VDUP_S | (imm4 << 16) | (q << 6) |
   2621                  encode_vd(rd) | encode_vm(rs));
   2622    }
   2623    return true;
   2624}
   2625
   2626static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
   2627                             TCGReg rd, TCGReg base, intptr_t offset)
   2628{
   2629    if (vece == MO_64) {
   2630        tcg_out_ld(s, TCG_TYPE_V64, rd, base, offset);
   2631        if (type == TCG_TYPE_V128) {
   2632            tcg_out_dup2_vec(s, rd, rd, rd);
   2633        }
   2634    } else {
   2635        int q = type - TCG_TYPE_V64;
   2636        tcg_out_vldst(s, INSN_VLD1R | (vece << 6) | (q << 5),
   2637                      rd, base, offset);
   2638    }
   2639    return true;
   2640}
   2641
   2642static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
   2643                             TCGReg rd, int64_t v64)
   2644{
   2645    int q = type - TCG_TYPE_V64;
   2646    int cmode, imm8, i;
   2647
   2648    /* Test all bytes equal first.  */
   2649    if (vece == MO_8) {
   2650        tcg_out_vmovi(s, rd, q, 0, 0xe, v64);
   2651        return;
   2652    }
   2653
   2654    /*
   2655     * Test all bytes 0x00 or 0xff second.  This can match cases that
   2656     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
   2657     */
   2658    for (i = imm8 = 0; i < 8; i++) {
   2659        uint8_t byte = v64 >> (i * 8);
   2660        if (byte == 0xff) {
   2661            imm8 |= 1 << i;
   2662        } else if (byte != 0) {
   2663            goto fail_bytes;
   2664        }
   2665    }
   2666    tcg_out_vmovi(s, rd, q, 1, 0xe, imm8);
   2667    return;
   2668 fail_bytes:
   2669
   2670    /*
   2671     * Tests for various replications.  For each element width, if we
   2672     * cannot find an expansion there's no point checking a larger
   2673     * width because we already know by replication it cannot match.
   2674     */
   2675    if (vece == MO_16) {
   2676        uint16_t v16 = v64;
   2677
   2678        if (is_shimm16(v16, &cmode, &imm8)) {
   2679            tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
   2680            return;
   2681        }
   2682        if (is_shimm16(~v16, &cmode, &imm8)) {
   2683            tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
   2684            return;
   2685        }
   2686
   2687        /*
   2688         * Otherwise, all remaining constants can be loaded in two insns:
   2689         * rd = v16 & 0xff, rd |= v16 & 0xff00.
   2690         */
   2691        tcg_out_vmovi(s, rd, q, 0, 0x8, v16 & 0xff);
   2692        tcg_out_vmovi(s, rd, q, 0, 0xb, v16 >> 8);   /* VORRI */
   2693        return;
   2694    }
   2695
   2696    if (vece == MO_32) {
   2697        uint32_t v32 = v64;
   2698
   2699        if (is_shimm32(v32, &cmode, &imm8) ||
   2700            is_soimm32(v32, &cmode, &imm8)) {
   2701            tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
   2702            return;
   2703        }
   2704        if (is_shimm32(~v32, &cmode, &imm8) ||
   2705            is_soimm32(~v32, &cmode, &imm8)) {
   2706            tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
   2707            return;
   2708        }
   2709
   2710        /*
   2711         * Restrict the set of constants to those we can load with
   2712         * two instructions.  Others we load from the pool.
   2713         */
   2714        i = is_shimm32_pair(v32, &cmode, &imm8);
   2715        if (i) {
   2716            tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
   2717            tcg_out_vmovi(s, rd, q, 0, i | 1, extract32(v32, i * 4, 8));
   2718            return;
   2719        }
   2720        i = is_shimm32_pair(~v32, &cmode, &imm8);
   2721        if (i) {
   2722            tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
   2723            tcg_out_vmovi(s, rd, q, 1, i | 1, extract32(~v32, i * 4, 8));
   2724            return;
   2725        }
   2726    }
   2727
   2728    /*
   2729     * As a last resort, load from the constant pool.
   2730     */
   2731    if (!q || vece == MO_64) {
   2732        new_pool_l2(s, R_ARM_PC11, s->code_ptr, 0, v64, v64 >> 32);
   2733        /* VLDR Dd, [pc + offset] */
   2734        tcg_out32(s, INSN_VLDR_D | encode_vd(rd) | (0xf << 16));
   2735        if (q) {
   2736            tcg_out_dup2_vec(s, rd, rd, rd);
   2737        }
   2738    } else {
   2739        new_pool_label(s, (uint32_t)v64, R_ARM_PC8, s->code_ptr, 0);
   2740        /* add tmp, pc, offset */
   2741        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, TCG_REG_PC, 0);
   2742        tcg_out_dupm_vec(s, type, MO_32, rd, TCG_REG_TMP, 0);
   2743    }
   2744}
   2745
   2746static const ARMInsn vec_cmp_insn[16] = {
   2747    [TCG_COND_EQ] = INSN_VCEQ,
   2748    [TCG_COND_GT] = INSN_VCGT,
   2749    [TCG_COND_GE] = INSN_VCGE,
   2750    [TCG_COND_GTU] = INSN_VCGT_U,
   2751    [TCG_COND_GEU] = INSN_VCGE_U,
   2752};
   2753
   2754static const ARMInsn vec_cmp0_insn[16] = {
   2755    [TCG_COND_EQ] = INSN_VCEQ0,
   2756    [TCG_COND_GT] = INSN_VCGT0,
   2757    [TCG_COND_GE] = INSN_VCGE0,
   2758    [TCG_COND_LT] = INSN_VCLT0,
   2759    [TCG_COND_LE] = INSN_VCLE0,
   2760};
   2761
   2762static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
   2763                           unsigned vecl, unsigned vece,
   2764                           const TCGArg args[TCG_MAX_OP_ARGS],
   2765                           const int const_args[TCG_MAX_OP_ARGS])
   2766{
   2767    TCGType type = vecl + TCG_TYPE_V64;
   2768    unsigned q = vecl;
   2769    TCGArg a0, a1, a2, a3;
   2770    int cmode, imm8;
   2771
   2772    a0 = args[0];
   2773    a1 = args[1];
   2774    a2 = args[2];
   2775
   2776    switch (opc) {
   2777    case INDEX_op_ld_vec:
   2778        tcg_out_ld(s, type, a0, a1, a2);
   2779        return;
   2780    case INDEX_op_st_vec:
   2781        tcg_out_st(s, type, a0, a1, a2);
   2782        return;
   2783    case INDEX_op_dupm_vec:
   2784        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
   2785        return;
   2786    case INDEX_op_dup2_vec:
   2787        tcg_out_dup2_vec(s, a0, a1, a2);
   2788        return;
   2789    case INDEX_op_abs_vec:
   2790        tcg_out_vreg2(s, INSN_VABS, q, vece, a0, a1);
   2791        return;
   2792    case INDEX_op_neg_vec:
   2793        tcg_out_vreg2(s, INSN_VNEG, q, vece, a0, a1);
   2794        return;
   2795    case INDEX_op_not_vec:
   2796        tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a1);
   2797        return;
   2798    case INDEX_op_add_vec:
   2799        tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
   2800        return;
   2801    case INDEX_op_mul_vec:
   2802        tcg_out_vreg3(s, INSN_VMUL, q, vece, a0, a1, a2);
   2803        return;
   2804    case INDEX_op_smax_vec:
   2805        tcg_out_vreg3(s, INSN_VMAX, q, vece, a0, a1, a2);
   2806        return;
   2807    case INDEX_op_smin_vec:
   2808        tcg_out_vreg3(s, INSN_VMIN, q, vece, a0, a1, a2);
   2809        return;
   2810    case INDEX_op_sub_vec:
   2811        tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
   2812        return;
   2813    case INDEX_op_ssadd_vec:
   2814        tcg_out_vreg3(s, INSN_VQADD, q, vece, a0, a1, a2);
   2815        return;
   2816    case INDEX_op_sssub_vec:
   2817        tcg_out_vreg3(s, INSN_VQSUB, q, vece, a0, a1, a2);
   2818        return;
   2819    case INDEX_op_umax_vec:
   2820        tcg_out_vreg3(s, INSN_VMAX_U, q, vece, a0, a1, a2);
   2821        return;
   2822    case INDEX_op_umin_vec:
   2823        tcg_out_vreg3(s, INSN_VMIN_U, q, vece, a0, a1, a2);
   2824        return;
   2825    case INDEX_op_usadd_vec:
   2826        tcg_out_vreg3(s, INSN_VQADD_U, q, vece, a0, a1, a2);
   2827        return;
   2828    case INDEX_op_ussub_vec:
   2829        tcg_out_vreg3(s, INSN_VQSUB_U, q, vece, a0, a1, a2);
   2830        return;
   2831    case INDEX_op_xor_vec:
   2832        tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
   2833        return;
   2834    case INDEX_op_arm_sshl_vec:
   2835        /*
   2836         * Note that Vm is the data and Vn is the shift count,
   2837         * therefore the arguments appear reversed.
   2838         */
   2839        tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1);
   2840        return;
   2841    case INDEX_op_arm_ushl_vec:
   2842        /* See above. */
   2843        tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1);
   2844        return;
   2845    case INDEX_op_shli_vec:
   2846        tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
   2847        return;
   2848    case INDEX_op_shri_vec:
   2849        tcg_out_vshifti(s, INSN_VSHRI, q, a0, a1, (16 << vece) - a2);
   2850        return;
   2851    case INDEX_op_sari_vec:
   2852        tcg_out_vshifti(s, INSN_VSARI, q, a0, a1, (16 << vece) - a2);
   2853        return;
   2854    case INDEX_op_arm_sli_vec:
   2855        tcg_out_vshifti(s, INSN_VSLI, q, a0, a2, args[3] + (8 << vece));
   2856        return;
   2857
   2858    case INDEX_op_andc_vec:
   2859        if (!const_args[2]) {
   2860            tcg_out_vreg3(s, INSN_VBIC, q, 0, a0, a1, a2);
   2861            return;
   2862        }
   2863        a2 = ~a2;
   2864        /* fall through */
   2865    case INDEX_op_and_vec:
   2866        if (const_args[2]) {
   2867            is_shimm1632(~a2, &cmode, &imm8);
   2868            if (a0 == a1) {
   2869                tcg_out_vmovi(s, a0, q, 1, cmode | 1, imm8); /* VBICI */
   2870                return;
   2871            }
   2872            tcg_out_vmovi(s, a0, q, 1, cmode, imm8); /* VMVNI */
   2873            a2 = a0;
   2874        }
   2875        tcg_out_vreg3(s, INSN_VAND, q, 0, a0, a1, a2);
   2876        return;
   2877
   2878    case INDEX_op_orc_vec:
   2879        if (!const_args[2]) {
   2880            tcg_out_vreg3(s, INSN_VORN, q, 0, a0, a1, a2);
   2881            return;
   2882        }
   2883        a2 = ~a2;
   2884        /* fall through */
   2885    case INDEX_op_or_vec:
   2886        if (const_args[2]) {
   2887            is_shimm1632(a2, &cmode, &imm8);
   2888            if (a0 == a1) {
   2889                tcg_out_vmovi(s, a0, q, 0, cmode | 1, imm8); /* VORRI */
   2890                return;
   2891            }
   2892            tcg_out_vmovi(s, a0, q, 0, cmode, imm8); /* VMOVI */
   2893            a2 = a0;
   2894        }
   2895        tcg_out_vreg3(s, INSN_VORR, q, 0, a0, a1, a2);
   2896        return;
   2897
   2898    case INDEX_op_cmp_vec:
   2899        {
   2900            TCGCond cond = args[3];
   2901
   2902            if (cond == TCG_COND_NE) {
   2903                if (const_args[2]) {
   2904                    tcg_out_vreg3(s, INSN_VTST, q, vece, a0, a1, a1);
   2905                } else {
   2906                    tcg_out_vreg3(s, INSN_VCEQ, q, vece, a0, a1, a2);
   2907                    tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a0);
   2908                }
   2909            } else {
   2910                ARMInsn insn;
   2911
   2912                if (const_args[2]) {
   2913                    insn = vec_cmp0_insn[cond];
   2914                    if (insn) {
   2915                        tcg_out_vreg2(s, insn, q, vece, a0, a1);
   2916                        return;
   2917                    }
   2918                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
   2919                    a2 = TCG_VEC_TMP;
   2920                }
   2921                insn = vec_cmp_insn[cond];
   2922                if (insn == 0) {
   2923                    TCGArg t;
   2924                    t = a1, a1 = a2, a2 = t;
   2925                    cond = tcg_swap_cond(cond);
   2926                    insn = vec_cmp_insn[cond];
   2927                    tcg_debug_assert(insn != 0);
   2928                }
   2929                tcg_out_vreg3(s, insn, q, vece, a0, a1, a2);
   2930            }
   2931        }
   2932        return;
   2933
   2934    case INDEX_op_bitsel_vec:
   2935        a3 = args[3];
   2936        if (a0 == a3) {
   2937            tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
   2938        } else if (a0 == a2) {
   2939            tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
   2940        } else {
   2941            tcg_out_mov(s, type, a0, a1);
   2942            tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
   2943        }
   2944        return;
   2945
   2946    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
   2947    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
   2948    default:
   2949        g_assert_not_reached();
   2950    }
   2951}
   2952
   2953int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
   2954{
   2955    switch (opc) {
   2956    case INDEX_op_add_vec:
   2957    case INDEX_op_sub_vec:
   2958    case INDEX_op_and_vec:
   2959    case INDEX_op_andc_vec:
   2960    case INDEX_op_or_vec:
   2961    case INDEX_op_orc_vec:
   2962    case INDEX_op_xor_vec:
   2963    case INDEX_op_not_vec:
   2964    case INDEX_op_shli_vec:
   2965    case INDEX_op_shri_vec:
   2966    case INDEX_op_sari_vec:
   2967    case INDEX_op_ssadd_vec:
   2968    case INDEX_op_sssub_vec:
   2969    case INDEX_op_usadd_vec:
   2970    case INDEX_op_ussub_vec:
   2971    case INDEX_op_bitsel_vec:
   2972        return 1;
   2973    case INDEX_op_abs_vec:
   2974    case INDEX_op_cmp_vec:
   2975    case INDEX_op_mul_vec:
   2976    case INDEX_op_neg_vec:
   2977    case INDEX_op_smax_vec:
   2978    case INDEX_op_smin_vec:
   2979    case INDEX_op_umax_vec:
   2980    case INDEX_op_umin_vec:
   2981        return vece < MO_64;
   2982    case INDEX_op_shlv_vec:
   2983    case INDEX_op_shrv_vec:
   2984    case INDEX_op_sarv_vec:
   2985    case INDEX_op_rotli_vec:
   2986    case INDEX_op_rotlv_vec:
   2987    case INDEX_op_rotrv_vec:
   2988        return -1;
   2989    default:
   2990        return 0;
   2991    }
   2992}
   2993
   2994void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
   2995                       TCGArg a0, ...)
   2996{
   2997    va_list va;
   2998    TCGv_vec v0, v1, v2, t1, t2, c1;
   2999    TCGArg a2;
   3000
   3001    va_start(va, a0);
   3002    v0 = temp_tcgv_vec(arg_temp(a0));
   3003    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
   3004    a2 = va_arg(va, TCGArg);
   3005    va_end(va);
   3006
   3007    switch (opc) {
   3008    case INDEX_op_shlv_vec:
   3009        /*
   3010         * Merely propagate shlv_vec to arm_ushl_vec.
   3011         * In this way we don't set TCG_TARGET_HAS_shv_vec
   3012         * because everything is done via expansion.
   3013         */
   3014        v2 = temp_tcgv_vec(arg_temp(a2));
   3015        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
   3016                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
   3017        break;
   3018
   3019    case INDEX_op_shrv_vec:
   3020    case INDEX_op_sarv_vec:
   3021        /* Right shifts are negative left shifts for NEON.  */
   3022        v2 = temp_tcgv_vec(arg_temp(a2));
   3023        t1 = tcg_temp_new_vec(type);
   3024        tcg_gen_neg_vec(vece, t1, v2);
   3025        if (opc == INDEX_op_shrv_vec) {
   3026            opc = INDEX_op_arm_ushl_vec;
   3027        } else {
   3028            opc = INDEX_op_arm_sshl_vec;
   3029        }
   3030        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
   3031                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
   3032        tcg_temp_free_vec(t1);
   3033        break;
   3034
   3035    case INDEX_op_rotli_vec:
   3036        t1 = tcg_temp_new_vec(type);
   3037        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
   3038        vec_gen_4(INDEX_op_arm_sli_vec, type, vece,
   3039                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
   3040        tcg_temp_free_vec(t1);
   3041        break;
   3042
   3043    case INDEX_op_rotlv_vec:
   3044        v2 = temp_tcgv_vec(arg_temp(a2));
   3045        t1 = tcg_temp_new_vec(type);
   3046        c1 = tcg_constant_vec(type, vece, 8 << vece);
   3047        tcg_gen_sub_vec(vece, t1, v2, c1);
   3048        /* Right shifts are negative left shifts for NEON.  */
   3049        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
   3050                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
   3051        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
   3052                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
   3053        tcg_gen_or_vec(vece, v0, v0, t1);
   3054        tcg_temp_free_vec(t1);
   3055        break;
   3056
   3057    case INDEX_op_rotrv_vec:
   3058        v2 = temp_tcgv_vec(arg_temp(a2));
   3059        t1 = tcg_temp_new_vec(type);
   3060        t2 = tcg_temp_new_vec(type);
   3061        c1 = tcg_constant_vec(type, vece, 8 << vece);
   3062        tcg_gen_neg_vec(vece, t1, v2);
   3063        tcg_gen_sub_vec(vece, t2, c1, v2);
   3064        /* Right shifts are negative left shifts for NEON.  */
   3065        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
   3066                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
   3067        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t2),
   3068                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
   3069        tcg_gen_or_vec(vece, v0, t1, t2);
   3070        tcg_temp_free_vec(t1);
   3071        tcg_temp_free_vec(t2);
   3072        break;
   3073
   3074    default:
   3075        g_assert_not_reached();
   3076    }
   3077}
   3078
   3079static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
   3080{
   3081    int i;
   3082    for (i = 0; i < count; ++i) {
   3083        p[i] = INSN_NOP;
   3084    }
   3085}
   3086
   3087/* Compute frame size via macros, to share between tcg_target_qemu_prologue
   3088   and tcg_register_jit.  */
   3089
   3090#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
   3091
   3092#define FRAME_SIZE \
   3093    ((PUSH_SIZE \
   3094      + TCG_STATIC_CALL_ARGS_SIZE \
   3095      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
   3096      + TCG_TARGET_STACK_ALIGN - 1) \
   3097     & -TCG_TARGET_STACK_ALIGN)
   3098
   3099#define STACK_ADDEND  (FRAME_SIZE - PUSH_SIZE)
   3100
   3101static void tcg_target_qemu_prologue(TCGContext *s)
   3102{
   3103    /* Calling convention requires us to save r4-r11 and lr.  */
   3104    /* stmdb sp!, { r4 - r11, lr } */
   3105    tcg_out_ldstm(s, COND_AL, INSN_STMDB, TCG_REG_CALL_STACK,
   3106                  (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
   3107                  (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
   3108                  (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_R14));
   3109
   3110    /* Reserve callee argument and tcg temp space.  */
   3111    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
   3112                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
   3113    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
   3114                  CPU_TEMP_BUF_NLONGS * sizeof(long));
   3115
   3116    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
   3117
   3118    tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
   3119
   3120    /*
   3121     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
   3122     * and fall through to the rest of the epilogue.
   3123     */
   3124    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
   3125    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
   3126    tcg_out_epilogue(s);
   3127}
   3128
   3129static void tcg_out_epilogue(TCGContext *s)
   3130{
   3131    /* Release local stack frame.  */
   3132    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
   3133                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
   3134
   3135    /* ldmia sp!, { r4 - r11, pc } */
   3136    tcg_out_ldstm(s, COND_AL, INSN_LDMIA, TCG_REG_CALL_STACK,
   3137                  (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
   3138                  (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
   3139                  (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
   3140}
   3141
   3142typedef struct {
   3143    DebugFrameHeader h;
   3144    uint8_t fde_def_cfa[4];
   3145    uint8_t fde_reg_ofs[18];
   3146} DebugFrame;
   3147
   3148#define ELF_HOST_MACHINE EM_ARM
   3149
   3150/* We're expecting a 2 byte uleb128 encoded value.  */
   3151QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
   3152
   3153static const DebugFrame debug_frame = {
   3154    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
   3155    .h.cie.id = -1,
   3156    .h.cie.version = 1,
   3157    .h.cie.code_align = 1,
   3158    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
   3159    .h.cie.return_column = 14,
   3160
   3161    /* Total FDE size does not include the "len" member.  */
   3162    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
   3163
   3164    .fde_def_cfa = {
   3165        12, 13,                         /* DW_CFA_def_cfa sp, ... */
   3166        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
   3167        (FRAME_SIZE >> 7)
   3168    },
   3169    .fde_reg_ofs = {
   3170        /* The following must match the stmdb in the prologue.  */
   3171        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
   3172        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
   3173        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
   3174        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
   3175        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
   3176        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
   3177        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
   3178        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
   3179        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
   3180    }
   3181};
   3182
   3183void tcg_register_jit(const void *buf, size_t buf_size)
   3184{
   3185    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
   3186}