cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

mem_helper.c (89882B)


      1/*
      2 *  S/390 memory access helper routines
      3 *
      4 *  Copyright (c) 2009 Ulrich Hecht
      5 *  Copyright (c) 2009 Alexander Graf
      6 *
      7 * This library is free software; you can redistribute it and/or
      8 * modify it under the terms of the GNU Lesser General Public
      9 * License as published by the Free Software Foundation; either
     10 * version 2.1 of the License, or (at your option) any later version.
     11 *
     12 * This library is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 * Lesser General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU Lesser General Public
     18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     19 */
     20
     21#include "qemu/osdep.h"
     22#include "cpu.h"
     23#include "s390x-internal.h"
     24#include "tcg_s390x.h"
     25#include "exec/helper-proto.h"
     26#include "exec/exec-all.h"
     27#include "exec/cpu_ldst.h"
     28#include "qemu/int128.h"
     29#include "qemu/atomic128.h"
     30#include "tcg/tcg.h"
     31#include "trace.h"
     32
     33#if !defined(CONFIG_USER_ONLY)
     34#include "hw/s390x/storage-keys.h"
     35#include "hw/boards.h"
     36#endif
     37
     38/*****************************************************************************/
     39/* Softmmu support */
     40
     41/* #define DEBUG_HELPER */
     42#ifdef DEBUG_HELPER
     43#define HELPER_LOG(x...) qemu_log(x)
     44#else
     45#define HELPER_LOG(x...)
     46#endif
     47
     48static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
     49{
     50    uint16_t pkm = env->cregs[3] >> 16;
     51
     52    if (env->psw.mask & PSW_MASK_PSTATE) {
     53        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
     54        return pkm & (0x80 >> psw_key);
     55    }
     56    return true;
     57}
     58
     59static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
     60                                   uint64_t src, uint32_t len)
     61{
     62    if (!len || src == dest) {
     63        return false;
     64    }
     65    /* Take care of wrapping at the end of address space. */
     66    if (unlikely(wrap_address(env, src + len - 1) < src)) {
     67        return dest > src || dest <= wrap_address(env, src + len - 1);
     68    }
     69    return dest > src && dest <= src + len - 1;
     70}
     71
     72/* Trigger a SPECIFICATION exception if an address or a length is not
     73   naturally aligned.  */
     74static inline void check_alignment(CPUS390XState *env, uint64_t v,
     75                                   int wordsize, uintptr_t ra)
     76{
     77    if (v % wordsize) {
     78        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
     79    }
     80}
     81
     82/* Load a value from memory according to its size.  */
     83static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
     84                                           int wordsize, uintptr_t ra)
     85{
     86    switch (wordsize) {
     87    case 1:
     88        return cpu_ldub_data_ra(env, addr, ra);
     89    case 2:
     90        return cpu_lduw_data_ra(env, addr, ra);
     91    default:
     92        abort();
     93    }
     94}
     95
     96/* Store a to memory according to its size.  */
     97static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
     98                                      uint64_t value, int wordsize,
     99                                      uintptr_t ra)
    100{
    101    switch (wordsize) {
    102    case 1:
    103        cpu_stb_data_ra(env, addr, value, ra);
    104        break;
    105    case 2:
    106        cpu_stw_data_ra(env, addr, value, ra);
    107        break;
    108    default:
    109        abort();
    110    }
    111}
    112
    113/* An access covers at most 4096 bytes and therefore at most two pages. */
    114typedef struct S390Access {
    115    target_ulong vaddr1;
    116    target_ulong vaddr2;
    117    char *haddr1;
    118    char *haddr2;
    119    uint16_t size1;
    120    uint16_t size2;
    121    /*
    122     * If we can't access the host page directly, we'll have to do I/O access
    123     * via ld/st helpers. These are internal details, so we store the
    124     * mmu idx to do the access here instead of passing it around in the
    125     * helpers. Maybe, one day we can get rid of ld/st access - once we can
    126     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
    127     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
    128     * pages, we might trigger a new MMU translation - very unlikely that
    129     * the mapping changes in between and we would trigger a fault.
    130     */
    131    int mmu_idx;
    132} S390Access;
    133
    134/*
    135 * With nonfault=1, return the PGM_ exception that would have been injected
    136 * into the guest; return 0 if no exception was detected.
    137 *
    138 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
    139 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
    140 */
    141static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
    142                             MMUAccessType access_type, int mmu_idx,
    143                             bool nonfault, void **phost, uintptr_t ra)
    144{
    145    int flags;
    146
    147#if defined(CONFIG_USER_ONLY)
    148    flags = page_get_flags(addr);
    149    if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
    150        env->__excp_addr = addr;
    151        flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
    152        if (nonfault) {
    153            return flags;
    154        }
    155        tcg_s390_program_interrupt(env, flags, ra);
    156    }
    157    *phost = g2h(env_cpu(env), addr);
    158#else
    159    /*
    160     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
    161     * to detect if there was an exception during tlb_fill().
    162     */
    163    env->tlb_fill_exc = 0;
    164    flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
    165                               ra);
    166    if (env->tlb_fill_exc) {
    167        return env->tlb_fill_exc;
    168    }
    169
    170    if (unlikely(flags & TLB_WATCHPOINT)) {
    171        /* S390 does not presently use transaction attributes. */
    172        cpu_check_watchpoint(env_cpu(env), addr, size,
    173                             MEMTXATTRS_UNSPECIFIED,
    174                             (access_type == MMU_DATA_STORE
    175                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
    176    }
    177#endif
    178    return 0;
    179}
    180
    181static int access_prepare_nf(S390Access *access, CPUS390XState *env,
    182                             bool nonfault, vaddr vaddr1, int size,
    183                             MMUAccessType access_type,
    184                             int mmu_idx, uintptr_t ra)
    185{
    186    void *haddr1, *haddr2 = NULL;
    187    int size1, size2, exc;
    188    vaddr vaddr2 = 0;
    189
    190    assert(size > 0 && size <= 4096);
    191
    192    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
    193    size2 = size - size1;
    194
    195    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
    196                            &haddr1, ra);
    197    if (exc) {
    198        return exc;
    199    }
    200    if (unlikely(size2)) {
    201        /* The access crosses page boundaries. */
    202        vaddr2 = wrap_address(env, vaddr1 + size1);
    203        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
    204                                nonfault, &haddr2, ra);
    205        if (exc) {
    206            return exc;
    207        }
    208    }
    209
    210    *access = (S390Access) {
    211        .vaddr1 = vaddr1,
    212        .vaddr2 = vaddr2,
    213        .haddr1 = haddr1,
    214        .haddr2 = haddr2,
    215        .size1 = size1,
    216        .size2 = size2,
    217        .mmu_idx = mmu_idx
    218    };
    219    return 0;
    220}
    221
    222static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
    223                                 MMUAccessType access_type, int mmu_idx,
    224                                 uintptr_t ra)
    225{
    226    S390Access ret;
    227    int exc = access_prepare_nf(&ret, env, false, vaddr, size,
    228                                access_type, mmu_idx, ra);
    229    assert(!exc);
    230    return ret;
    231}
    232
    233/* Helper to handle memset on a single page. */
    234static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
    235                             uint8_t byte, uint16_t size, int mmu_idx,
    236                             uintptr_t ra)
    237{
    238#ifdef CONFIG_USER_ONLY
    239    g_assert(haddr);
    240    memset(haddr, byte, size);
    241#else
    242    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
    243    int i;
    244
    245    if (likely(haddr)) {
    246        memset(haddr, byte, size);
    247    } else {
    248        /*
    249         * Do a single access and test if we can then get access to the
    250         * page. This is especially relevant to speed up TLB_NOTDIRTY.
    251         */
    252        g_assert(size > 0);
    253        helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
    254        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
    255        if (likely(haddr)) {
    256            memset(haddr + 1, byte, size - 1);
    257        } else {
    258            for (i = 1; i < size; i++) {
    259                helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
    260            }
    261        }
    262    }
    263#endif
    264}
    265
    266static void access_memset(CPUS390XState *env, S390Access *desta,
    267                          uint8_t byte, uintptr_t ra)
    268{
    269
    270    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
    271                     desta->mmu_idx, ra);
    272    if (likely(!desta->size2)) {
    273        return;
    274    }
    275    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
    276                     desta->mmu_idx, ra);
    277}
    278
    279static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
    280                                  int offset, int mmu_idx, uintptr_t ra)
    281{
    282#ifdef CONFIG_USER_ONLY
    283    return ldub_p(*haddr + offset);
    284#else
    285    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
    286    uint8_t byte;
    287
    288    if (likely(*haddr)) {
    289        return ldub_p(*haddr + offset);
    290    }
    291    /*
    292     * Do a single access and test if we can then get access to the
    293     * page. This is especially relevant to speed up TLB_NOTDIRTY.
    294     */
    295    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
    296    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
    297    return byte;
    298#endif
    299}
    300
    301static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
    302                               int offset, uintptr_t ra)
    303{
    304    if (offset < access->size1) {
    305        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
    306                                  offset, access->mmu_idx, ra);
    307    }
    308    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
    309                              offset - access->size1, access->mmu_idx, ra);
    310}
    311
    312static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
    313                               int offset, uint8_t byte, int mmu_idx,
    314                               uintptr_t ra)
    315{
    316#ifdef CONFIG_USER_ONLY
    317    stb_p(*haddr + offset, byte);
    318#else
    319    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
    320
    321    if (likely(*haddr)) {
    322        stb_p(*haddr + offset, byte);
    323        return;
    324    }
    325    /*
    326     * Do a single access and test if we can then get access to the
    327     * page. This is especially relevant to speed up TLB_NOTDIRTY.
    328     */
    329    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
    330    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
    331#endif
    332}
    333
    334static void access_set_byte(CPUS390XState *env, S390Access *access,
    335                            int offset, uint8_t byte, uintptr_t ra)
    336{
    337    if (offset < access->size1) {
    338        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
    339                           access->mmu_idx, ra);
    340    } else {
    341        do_access_set_byte(env, access->vaddr2, &access->haddr2,
    342                           offset - access->size1, byte, access->mmu_idx, ra);
    343    }
    344}
    345
    346/*
    347 * Move data with the same semantics as memmove() in case ranges don't overlap
    348 * or src > dest. Undefined behavior on destructive overlaps.
    349 */
    350static void access_memmove(CPUS390XState *env, S390Access *desta,
    351                           S390Access *srca, uintptr_t ra)
    352{
    353    int diff;
    354
    355    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
    356
    357    /* Fallback to slow access in case we don't have access to all host pages */
    358    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
    359                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
    360        int i;
    361
    362        for (i = 0; i < desta->size1 + desta->size2; i++) {
    363            uint8_t byte = access_get_byte(env, srca, i, ra);
    364
    365            access_set_byte(env, desta, i, byte, ra);
    366        }
    367        return;
    368    }
    369
    370    if (srca->size1 == desta->size1) {
    371        memmove(desta->haddr1, srca->haddr1, srca->size1);
    372        if (unlikely(srca->size2)) {
    373            memmove(desta->haddr2, srca->haddr2, srca->size2);
    374        }
    375    } else if (srca->size1 < desta->size1) {
    376        diff = desta->size1 - srca->size1;
    377        memmove(desta->haddr1, srca->haddr1, srca->size1);
    378        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
    379        if (likely(desta->size2)) {
    380            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
    381        }
    382    } else {
    383        diff = srca->size1 - desta->size1;
    384        memmove(desta->haddr1, srca->haddr1, desta->size1);
    385        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
    386        if (likely(srca->size2)) {
    387            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
    388        }
    389    }
    390}
    391
    392static int mmu_idx_from_as(uint8_t as)
    393{
    394    switch (as) {
    395    case AS_PRIMARY:
    396        return MMU_PRIMARY_IDX;
    397    case AS_SECONDARY:
    398        return MMU_SECONDARY_IDX;
    399    case AS_HOME:
    400        return MMU_HOME_IDX;
    401    default:
    402        /* FIXME AS_ACCREG */
    403        g_assert_not_reached();
    404    }
    405}
    406
    407/* and on array */
    408static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
    409                             uint64_t src, uintptr_t ra)
    410{
    411    const int mmu_idx = cpu_mmu_index(env, false);
    412    S390Access srca1, srca2, desta;
    413    uint32_t i;
    414    uint8_t c = 0;
    415
    416    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    417               __func__, l, dest, src);
    418
    419    /* NC always processes one more byte than specified - maximum is 256 */
    420    l++;
    421
    422    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    423    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    424    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    425    for (i = 0; i < l; i++) {
    426        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
    427                          access_get_byte(env, &srca2, i, ra);
    428
    429        c |= x;
    430        access_set_byte(env, &desta, i, x, ra);
    431    }
    432    return c != 0;
    433}
    434
    435uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
    436                    uint64_t src)
    437{
    438    return do_helper_nc(env, l, dest, src, GETPC());
    439}
    440
    441/* xor on array */
    442static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
    443                             uint64_t src, uintptr_t ra)
    444{
    445    const int mmu_idx = cpu_mmu_index(env, false);
    446    S390Access srca1, srca2, desta;
    447    uint32_t i;
    448    uint8_t c = 0;
    449
    450    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    451               __func__, l, dest, src);
    452
    453    /* XC always processes one more byte than specified - maximum is 256 */
    454    l++;
    455
    456    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    457    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    458    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    459
    460    /* xor with itself is the same as memset(0) */
    461    if (src == dest) {
    462        access_memset(env, &desta, 0, ra);
    463        return 0;
    464    }
    465
    466    for (i = 0; i < l; i++) {
    467        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
    468                          access_get_byte(env, &srca2, i, ra);
    469
    470        c |= x;
    471        access_set_byte(env, &desta, i, x, ra);
    472    }
    473    return c != 0;
    474}
    475
    476uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
    477                    uint64_t src)
    478{
    479    return do_helper_xc(env, l, dest, src, GETPC());
    480}
    481
    482/* or on array */
    483static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
    484                             uint64_t src, uintptr_t ra)
    485{
    486    const int mmu_idx = cpu_mmu_index(env, false);
    487    S390Access srca1, srca2, desta;
    488    uint32_t i;
    489    uint8_t c = 0;
    490
    491    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    492               __func__, l, dest, src);
    493
    494    /* OC always processes one more byte than specified - maximum is 256 */
    495    l++;
    496
    497    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    498    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    499    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    500    for (i = 0; i < l; i++) {
    501        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
    502                          access_get_byte(env, &srca2, i, ra);
    503
    504        c |= x;
    505        access_set_byte(env, &desta, i, x, ra);
    506    }
    507    return c != 0;
    508}
    509
    510uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
    511                    uint64_t src)
    512{
    513    return do_helper_oc(env, l, dest, src, GETPC());
    514}
    515
    516/* memmove */
    517static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
    518                              uint64_t src, uintptr_t ra)
    519{
    520    const int mmu_idx = cpu_mmu_index(env, false);
    521    S390Access srca, desta;
    522    uint32_t i;
    523
    524    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    525               __func__, l, dest, src);
    526
    527    /* MVC always copies one more byte than specified - maximum is 256 */
    528    l++;
    529
    530    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    531    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    532
    533    /*
    534     * "When the operands overlap, the result is obtained as if the operands
    535     * were processed one byte at a time". Only non-destructive overlaps
    536     * behave like memmove().
    537     */
    538    if (dest == src + 1) {
    539        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
    540    } else if (!is_destructive_overlap(env, dest, src, l)) {
    541        access_memmove(env, &desta, &srca, ra);
    542    } else {
    543        for (i = 0; i < l; i++) {
    544            uint8_t byte = access_get_byte(env, &srca, i, ra);
    545
    546            access_set_byte(env, &desta, i, byte, ra);
    547        }
    548    }
    549
    550    return env->cc_op;
    551}
    552
    553void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    554{
    555    do_helper_mvc(env, l, dest, src, GETPC());
    556}
    557
    558/* move inverse  */
    559void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    560{
    561    const int mmu_idx = cpu_mmu_index(env, false);
    562    S390Access srca, desta;
    563    uintptr_t ra = GETPC();
    564    int i;
    565
    566    /* MVCIN always copies one more byte than specified - maximum is 256 */
    567    l++;
    568
    569    src = wrap_address(env, src - l + 1);
    570    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    571    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    572    for (i = 0; i < l; i++) {
    573        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
    574
    575        access_set_byte(env, &desta, i, x, ra);
    576    }
    577}
    578
    579/* move numerics  */
    580void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    581{
    582    const int mmu_idx = cpu_mmu_index(env, false);
    583    S390Access srca1, srca2, desta;
    584    uintptr_t ra = GETPC();
    585    int i;
    586
    587    /* MVN always copies one more byte than specified - maximum is 256 */
    588    l++;
    589
    590    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    591    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    592    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    593    for (i = 0; i < l; i++) {
    594        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
    595                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
    596
    597        access_set_byte(env, &desta, i, x, ra);
    598    }
    599}
    600
    601/* move with offset  */
    602void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    603{
    604    const int mmu_idx = cpu_mmu_index(env, false);
    605    /* MVO always processes one more byte than specified - maximum is 16 */
    606    const int len_dest = (l >> 4) + 1;
    607    const int len_src = (l & 0xf) + 1;
    608    uintptr_t ra = GETPC();
    609    uint8_t byte_dest, byte_src;
    610    S390Access srca, desta;
    611    int i, j;
    612
    613    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
    614    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
    615
    616    /* Handle rightmost byte */
    617    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
    618    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
    619    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
    620    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
    621
    622    /* Process remaining bytes from right to left */
    623    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
    624        byte_dest = byte_src >> 4;
    625        if (j >= 0) {
    626            byte_src = access_get_byte(env, &srca, j, ra);
    627        } else {
    628            byte_src = 0;
    629        }
    630        byte_dest |= byte_src << 4;
    631        access_set_byte(env, &desta, i, byte_dest, ra);
    632    }
    633}
    634
    635/* move zones  */
    636void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    637{
    638    const int mmu_idx = cpu_mmu_index(env, false);
    639    S390Access srca1, srca2, desta;
    640    uintptr_t ra = GETPC();
    641    int i;
    642
    643    /* MVZ always copies one more byte than specified - maximum is 256 */
    644    l++;
    645
    646    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    647    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    648    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    649    for (i = 0; i < l; i++) {
    650        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
    651                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
    652
    653        access_set_byte(env, &desta, i, x, ra);
    654    }
    655}
    656
    657/* compare unsigned byte arrays */
    658static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
    659                              uint64_t s2, uintptr_t ra)
    660{
    661    uint32_t i;
    662    uint32_t cc = 0;
    663
    664    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
    665               __func__, l, s1, s2);
    666
    667    for (i = 0; i <= l; i++) {
    668        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
    669        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
    670        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
    671        if (x < y) {
    672            cc = 1;
    673            break;
    674        } else if (x > y) {
    675            cc = 2;
    676            break;
    677        }
    678    }
    679
    680    HELPER_LOG("\n");
    681    return cc;
    682}
    683
    684uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
    685{
    686    return do_helper_clc(env, l, s1, s2, GETPC());
    687}
    688
    689/* compare logical under mask */
    690uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
    691                     uint64_t addr)
    692{
    693    uintptr_t ra = GETPC();
    694    uint32_t cc = 0;
    695
    696    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
    697               mask, addr);
    698
    699    while (mask) {
    700        if (mask & 8) {
    701            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
    702            uint8_t r = extract32(r1, 24, 8);
    703            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
    704                       addr);
    705            if (r < d) {
    706                cc = 1;
    707                break;
    708            } else if (r > d) {
    709                cc = 2;
    710                break;
    711            }
    712            addr++;
    713        }
    714        mask = (mask << 1) & 0xf;
    715        r1 <<= 8;
    716    }
    717
    718    HELPER_LOG("\n");
    719    return cc;
    720}
    721
    722static inline uint64_t get_address(CPUS390XState *env, int reg)
    723{
    724    return wrap_address(env, env->regs[reg]);
    725}
    726
    727/*
    728 * Store the address to the given register, zeroing out unused leftmost
    729 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
    730 */
    731static inline void set_address_zero(CPUS390XState *env, int reg,
    732                                    uint64_t address)
    733{
    734    if (env->psw.mask & PSW_MASK_64) {
    735        env->regs[reg] = address;
    736    } else {
    737        if (!(env->psw.mask & PSW_MASK_32)) {
    738            address &= 0x00ffffff;
    739        } else {
    740            address &= 0x7fffffff;
    741        }
    742        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
    743    }
    744}
    745
    746static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
    747{
    748    if (env->psw.mask & PSW_MASK_64) {
    749        /* 64-Bit mode */
    750        env->regs[reg] = address;
    751    } else {
    752        if (!(env->psw.mask & PSW_MASK_32)) {
    753            /* 24-Bit mode. According to the PoO it is implementation
    754            dependent if bits 32-39 remain unchanged or are set to
    755            zeros.  Choose the former so that the function can also be
    756            used for TRT.  */
    757            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
    758        } else {
    759            /* 31-Bit mode. According to the PoO it is implementation
    760            dependent if bit 32 remains unchanged or is set to zero.
    761            Choose the latter so that the function can also be used for
    762            TRT.  */
    763            address &= 0x7fffffff;
    764            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
    765        }
    766    }
    767}
    768
    769static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
    770{
    771    if (!(env->psw.mask & PSW_MASK_64)) {
    772        return (uint32_t)length;
    773    }
    774    return length;
    775}
    776
    777static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
    778{
    779    if (!(env->psw.mask & PSW_MASK_64)) {
    780        /* 24-Bit and 31-Bit mode */
    781        length &= 0x7fffffff;
    782    }
    783    return length;
    784}
    785
    786static inline uint64_t get_length(CPUS390XState *env, int reg)
    787{
    788    return wrap_length31(env, env->regs[reg]);
    789}
    790
    791static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
    792{
    793    if (env->psw.mask & PSW_MASK_64) {
    794        /* 64-Bit mode */
    795        env->regs[reg] = length;
    796    } else {
    797        /* 24-Bit and 31-Bit mode */
    798        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
    799    }
    800}
    801
    802/* search string (c is byte to search, r2 is string, r1 end of string) */
    803void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
    804{
    805    uintptr_t ra = GETPC();
    806    uint64_t end, str;
    807    uint32_t len;
    808    uint8_t v, c = env->regs[0];
    809
    810    /* Bits 32-55 must contain all 0.  */
    811    if (env->regs[0] & 0xffffff00u) {
    812        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
    813    }
    814
    815    str = get_address(env, r2);
    816    end = get_address(env, r1);
    817
    818    /* Lest we fail to service interrupts in a timely manner, limit the
    819       amount of work we're willing to do.  For now, let's cap at 8k.  */
    820    for (len = 0; len < 0x2000; ++len) {
    821        if (str + len == end) {
    822            /* Character not found.  R1 & R2 are unmodified.  */
    823            env->cc_op = 2;
    824            return;
    825        }
    826        v = cpu_ldub_data_ra(env, str + len, ra);
    827        if (v == c) {
    828            /* Character found.  Set R1 to the location; R2 is unmodified.  */
    829            env->cc_op = 1;
    830            set_address(env, r1, str + len);
    831            return;
    832        }
    833    }
    834
    835    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
    836    env->cc_op = 3;
    837    set_address(env, r2, str + len);
    838}
    839
    840void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
    841{
    842    uintptr_t ra = GETPC();
    843    uint32_t len;
    844    uint16_t v, c = env->regs[0];
    845    uint64_t end, str, adj_end;
    846
    847    /* Bits 32-47 of R0 must be zero.  */
    848    if (env->regs[0] & 0xffff0000u) {
    849        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
    850    }
    851
    852    str = get_address(env, r2);
    853    end = get_address(env, r1);
    854
    855    /* If the LSB of the two addresses differ, use one extra byte.  */
    856    adj_end = end + ((str ^ end) & 1);
    857
    858    /* Lest we fail to service interrupts in a timely manner, limit the
    859       amount of work we're willing to do.  For now, let's cap at 8k.  */
    860    for (len = 0; len < 0x2000; len += 2) {
    861        if (str + len == adj_end) {
    862            /* End of input found.  */
    863            env->cc_op = 2;
    864            return;
    865        }
    866        v = cpu_lduw_data_ra(env, str + len, ra);
    867        if (v == c) {
    868            /* Character found.  Set R1 to the location; R2 is unmodified.  */
    869            env->cc_op = 1;
    870            set_address(env, r1, str + len);
    871            return;
    872        }
    873    }
    874
    875    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
    876    env->cc_op = 3;
    877    set_address(env, r2, str + len);
    878}
    879
    880/* unsigned string compare (c is string terminator) */
    881uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
    882{
    883    uintptr_t ra = GETPC();
    884    uint32_t len;
    885
    886    c = c & 0xff;
    887    s1 = wrap_address(env, s1);
    888    s2 = wrap_address(env, s2);
    889
    890    /* Lest we fail to service interrupts in a timely manner, limit the
    891       amount of work we're willing to do.  For now, let's cap at 8k.  */
    892    for (len = 0; len < 0x2000; ++len) {
    893        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
    894        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
    895        if (v1 == v2) {
    896            if (v1 == c) {
    897                /* Equal.  CC=0, and don't advance the registers.  */
    898                env->cc_op = 0;
    899                env->retxl = s2;
    900                return s1;
    901            }
    902        } else {
    903            /* Unequal.  CC={1,2}, and advance the registers.  Note that
    904               the terminator need not be zero, but the string that contains
    905               the terminator is by definition "low".  */
    906            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
    907            env->retxl = s2 + len;
    908            return s1 + len;
    909        }
    910    }
    911
    912    /* CPU-determined bytes equal; advance the registers.  */
    913    env->cc_op = 3;
    914    env->retxl = s2 + len;
    915    return s1 + len;
    916}
    917
    918/* move page */
    919uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
    920{
    921    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
    922    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
    923    const int mmu_idx = cpu_mmu_index(env, false);
    924    const bool f = extract64(r0, 11, 1);
    925    const bool s = extract64(r0, 10, 1);
    926    const bool cco = extract64(r0, 8, 1);
    927    uintptr_t ra = GETPC();
    928    S390Access srca, desta;
    929    int exc;
    930
    931    if ((f && s) || extract64(r0, 12, 4)) {
    932        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
    933    }
    934
    935    /*
    936     * We always manually handle exceptions such that we can properly store
    937     * r1/r2 to the lowcore on page-translation exceptions.
    938     *
    939     * TODO: Access key handling
    940     */
    941    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
    942                            MMU_DATA_LOAD, mmu_idx, ra);
    943    if (exc) {
    944        if (cco) {
    945            return 2;
    946        }
    947        goto inject_exc;
    948    }
    949    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
    950                            MMU_DATA_STORE, mmu_idx, ra);
    951    if (exc) {
    952        if (cco && exc != PGM_PROTECTION) {
    953            return 1;
    954        }
    955        goto inject_exc;
    956    }
    957    access_memmove(env, &desta, &srca, ra);
    958    return 0; /* data moved */
    959inject_exc:
    960#if !defined(CONFIG_USER_ONLY)
    961    if (exc != PGM_ADDRESSING) {
    962        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
    963                 env->tlb_fill_tec);
    964    }
    965    if (exc == PGM_PAGE_TRANS) {
    966        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
    967                 r1 << 4 | r2);
    968    }
    969#endif
    970    tcg_s390_program_interrupt(env, exc, ra);
    971}
    972
    973/* string copy */
    974uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
    975{
    976    const int mmu_idx = cpu_mmu_index(env, false);
    977    const uint64_t d = get_address(env, r1);
    978    const uint64_t s = get_address(env, r2);
    979    const uint8_t c = env->regs[0];
    980    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
    981    S390Access srca, desta;
    982    uintptr_t ra = GETPC();
    983    int i;
    984
    985    if (env->regs[0] & 0xffffff00ull) {
    986        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
    987    }
    988
    989    /*
    990     * Our access should not exceed single pages, as we must not report access
    991     * exceptions exceeding the actually copied range (which we don't know at
    992     * this point). We might over-indicate watchpoints within the pages
    993     * (if we ever care, we have to limit processing to a single byte).
    994     */
    995    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
    996    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
    997    for (i = 0; i < len; i++) {
    998        const uint8_t v = access_get_byte(env, &srca, i, ra);
    999
   1000        access_set_byte(env, &desta, i, v, ra);
   1001        if (v == c) {
   1002            set_address_zero(env, r1, d + i);
   1003            return 1;
   1004        }
   1005    }
   1006    set_address_zero(env, r1, d + len);
   1007    set_address_zero(env, r2, s + len);
   1008    return 3;
   1009}
   1010
   1011/* load access registers r1 to r3 from memory at a2 */
   1012void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   1013{
   1014    uintptr_t ra = GETPC();
   1015    int i;
   1016
   1017    if (a2 & 0x3) {
   1018        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   1019    }
   1020
   1021    for (i = r1;; i = (i + 1) % 16) {
   1022        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
   1023        a2 += 4;
   1024
   1025        if (i == r3) {
   1026            break;
   1027        }
   1028    }
   1029}
   1030
   1031/* store access registers r1 to r3 in memory at a2 */
   1032void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   1033{
   1034    uintptr_t ra = GETPC();
   1035    int i;
   1036
   1037    if (a2 & 0x3) {
   1038        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   1039    }
   1040
   1041    for (i = r1;; i = (i + 1) % 16) {
   1042        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
   1043        a2 += 4;
   1044
   1045        if (i == r3) {
   1046            break;
   1047        }
   1048    }
   1049}
   1050
   1051/* move long helper */
   1052static inline uint32_t do_mvcl(CPUS390XState *env,
   1053                               uint64_t *dest, uint64_t *destlen,
   1054                               uint64_t *src, uint64_t *srclen,
   1055                               uint16_t pad, int wordsize, uintptr_t ra)
   1056{
   1057    const int mmu_idx = cpu_mmu_index(env, false);
   1058    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
   1059    S390Access srca, desta;
   1060    int i, cc;
   1061
   1062    if (*destlen == *srclen) {
   1063        cc = 0;
   1064    } else if (*destlen < *srclen) {
   1065        cc = 1;
   1066    } else {
   1067        cc = 2;
   1068    }
   1069
   1070    if (!*destlen) {
   1071        return cc;
   1072    }
   1073
   1074    /*
   1075     * Only perform one type of type of operation (move/pad) at a time.
   1076     * Stay within single pages.
   1077     */
   1078    if (*srclen) {
   1079        /* Copy the src array */
   1080        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
   1081        *destlen -= len;
   1082        *srclen -= len;
   1083        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
   1084        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
   1085        access_memmove(env, &desta, &srca, ra);
   1086        *src = wrap_address(env, *src + len);
   1087        *dest = wrap_address(env, *dest + len);
   1088    } else if (wordsize == 1) {
   1089        /* Pad the remaining area */
   1090        *destlen -= len;
   1091        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
   1092        access_memset(env, &desta, pad, ra);
   1093        *dest = wrap_address(env, *dest + len);
   1094    } else {
   1095        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
   1096
   1097        /* The remaining length selects the padding byte. */
   1098        for (i = 0; i < len; (*destlen)--, i++) {
   1099            if (*destlen & 1) {
   1100                access_set_byte(env, &desta, i, pad, ra);
   1101            } else {
   1102                access_set_byte(env, &desta, i, pad >> 8, ra);
   1103            }
   1104        }
   1105        *dest = wrap_address(env, *dest + len);
   1106    }
   1107
   1108    return *destlen ? 3 : cc;
   1109}
   1110
   1111/* move long */
   1112uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
   1113{
   1114    const int mmu_idx = cpu_mmu_index(env, false);
   1115    uintptr_t ra = GETPC();
   1116    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
   1117    uint64_t dest = get_address(env, r1);
   1118    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
   1119    uint64_t src = get_address(env, r2);
   1120    uint8_t pad = env->regs[r2 + 1] >> 24;
   1121    CPUState *cs = env_cpu(env);
   1122    S390Access srca, desta;
   1123    uint32_t cc, cur_len;
   1124
   1125    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
   1126        cc = 3;
   1127    } else if (srclen == destlen) {
   1128        cc = 0;
   1129    } else if (destlen < srclen) {
   1130        cc = 1;
   1131    } else {
   1132        cc = 2;
   1133    }
   1134
   1135    /* We might have to zero-out some bits even if there was no action. */
   1136    if (unlikely(!destlen || cc == 3)) {
   1137        set_address_zero(env, r2, src);
   1138        set_address_zero(env, r1, dest);
   1139        return cc;
   1140    } else if (!srclen) {
   1141        set_address_zero(env, r2, src);
   1142    }
   1143
   1144    /*
   1145     * Only perform one type of type of operation (move/pad) in one step.
   1146     * Stay within single pages.
   1147     */
   1148    while (destlen) {
   1149        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
   1150        if (!srclen) {
   1151            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
   1152                                   ra);
   1153            access_memset(env, &desta, pad, ra);
   1154        } else {
   1155            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
   1156
   1157            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
   1158                                  ra);
   1159            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
   1160                                   ra);
   1161            access_memmove(env, &desta, &srca, ra);
   1162            src = wrap_address(env, src + cur_len);
   1163            srclen -= cur_len;
   1164            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
   1165            set_address_zero(env, r2, src);
   1166        }
   1167        dest = wrap_address(env, dest + cur_len);
   1168        destlen -= cur_len;
   1169        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
   1170        set_address_zero(env, r1, dest);
   1171
   1172        /*
   1173         * MVCL is interruptible. Return to the main loop if requested after
   1174         * writing back all state to registers. If no interrupt will get
   1175         * injected, we'll end up back in this handler and continue processing
   1176         * the remaining parts.
   1177         */
   1178        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
   1179            cpu_loop_exit_restore(cs, ra);
   1180        }
   1181    }
   1182    return cc;
   1183}
   1184
   1185/* move long extended */
   1186uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1187                       uint32_t r3)
   1188{
   1189    uintptr_t ra = GETPC();
   1190    uint64_t destlen = get_length(env, r1 + 1);
   1191    uint64_t dest = get_address(env, r1);
   1192    uint64_t srclen = get_length(env, r3 + 1);
   1193    uint64_t src = get_address(env, r3);
   1194    uint8_t pad = a2;
   1195    uint32_t cc;
   1196
   1197    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
   1198
   1199    set_length(env, r1 + 1, destlen);
   1200    set_length(env, r3 + 1, srclen);
   1201    set_address(env, r1, dest);
   1202    set_address(env, r3, src);
   1203
   1204    return cc;
   1205}
   1206
   1207/* move long unicode */
   1208uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1209                       uint32_t r3)
   1210{
   1211    uintptr_t ra = GETPC();
   1212    uint64_t destlen = get_length(env, r1 + 1);
   1213    uint64_t dest = get_address(env, r1);
   1214    uint64_t srclen = get_length(env, r3 + 1);
   1215    uint64_t src = get_address(env, r3);
   1216    uint16_t pad = a2;
   1217    uint32_t cc;
   1218
   1219    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
   1220
   1221    set_length(env, r1 + 1, destlen);
   1222    set_length(env, r3 + 1, srclen);
   1223    set_address(env, r1, dest);
   1224    set_address(env, r3, src);
   1225
   1226    return cc;
   1227}
   1228
   1229/* compare logical long helper */
   1230static inline uint32_t do_clcl(CPUS390XState *env,
   1231                               uint64_t *src1, uint64_t *src1len,
   1232                               uint64_t *src3, uint64_t *src3len,
   1233                               uint16_t pad, uint64_t limit,
   1234                               int wordsize, uintptr_t ra)
   1235{
   1236    uint64_t len = MAX(*src1len, *src3len);
   1237    uint32_t cc = 0;
   1238
   1239    check_alignment(env, *src1len | *src3len, wordsize, ra);
   1240
   1241    if (!len) {
   1242        return cc;
   1243    }
   1244
   1245    /* Lest we fail to service interrupts in a timely manner, limit the
   1246       amount of work we're willing to do.  */
   1247    if (len > limit) {
   1248        len = limit;
   1249        cc = 3;
   1250    }
   1251
   1252    for (; len; len -= wordsize) {
   1253        uint16_t v1 = pad;
   1254        uint16_t v3 = pad;
   1255
   1256        if (*src1len) {
   1257            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
   1258        }
   1259        if (*src3len) {
   1260            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
   1261        }
   1262
   1263        if (v1 != v3) {
   1264            cc = (v1 < v3) ? 1 : 2;
   1265            break;
   1266        }
   1267
   1268        if (*src1len) {
   1269            *src1 += wordsize;
   1270            *src1len -= wordsize;
   1271        }
   1272        if (*src3len) {
   1273            *src3 += wordsize;
   1274            *src3len -= wordsize;
   1275        }
   1276    }
   1277
   1278    return cc;
   1279}
   1280
   1281
   1282/* compare logical long */
   1283uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
   1284{
   1285    uintptr_t ra = GETPC();
   1286    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
   1287    uint64_t src1 = get_address(env, r1);
   1288    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
   1289    uint64_t src3 = get_address(env, r2);
   1290    uint8_t pad = env->regs[r2 + 1] >> 24;
   1291    uint32_t cc;
   1292
   1293    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
   1294
   1295    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
   1296    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
   1297    set_address(env, r1, src1);
   1298    set_address(env, r2, src3);
   1299
   1300    return cc;
   1301}
   1302
   1303/* compare logical long extended memcompare insn with padding */
   1304uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1305                       uint32_t r3)
   1306{
   1307    uintptr_t ra = GETPC();
   1308    uint64_t src1len = get_length(env, r1 + 1);
   1309    uint64_t src1 = get_address(env, r1);
   1310    uint64_t src3len = get_length(env, r3 + 1);
   1311    uint64_t src3 = get_address(env, r3);
   1312    uint8_t pad = a2;
   1313    uint32_t cc;
   1314
   1315    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
   1316
   1317    set_length(env, r1 + 1, src1len);
   1318    set_length(env, r3 + 1, src3len);
   1319    set_address(env, r1, src1);
   1320    set_address(env, r3, src3);
   1321
   1322    return cc;
   1323}
   1324
   1325/* compare logical long unicode memcompare insn with padding */
   1326uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1327                       uint32_t r3)
   1328{
   1329    uintptr_t ra = GETPC();
   1330    uint64_t src1len = get_length(env, r1 + 1);
   1331    uint64_t src1 = get_address(env, r1);
   1332    uint64_t src3len = get_length(env, r3 + 1);
   1333    uint64_t src3 = get_address(env, r3);
   1334    uint16_t pad = a2;
   1335    uint32_t cc = 0;
   1336
   1337    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
   1338
   1339    set_length(env, r1 + 1, src1len);
   1340    set_length(env, r3 + 1, src3len);
   1341    set_address(env, r1, src1);
   1342    set_address(env, r3, src3);
   1343
   1344    return cc;
   1345}
   1346
   1347/* checksum */
   1348uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
   1349                      uint64_t src, uint64_t src_len)
   1350{
   1351    uintptr_t ra = GETPC();
   1352    uint64_t max_len, len;
   1353    uint64_t cksm = (uint32_t)r1;
   1354
   1355    /* Lest we fail to service interrupts in a timely manner, limit the
   1356       amount of work we're willing to do.  For now, let's cap at 8k.  */
   1357    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
   1358
   1359    /* Process full words as available.  */
   1360    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
   1361        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
   1362    }
   1363
   1364    switch (max_len - len) {
   1365    case 1:
   1366        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
   1367        len += 1;
   1368        break;
   1369    case 2:
   1370        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
   1371        len += 2;
   1372        break;
   1373    case 3:
   1374        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
   1375        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
   1376        len += 3;
   1377        break;
   1378    }
   1379
   1380    /* Fold the carry from the checksum.  Note that we can see carry-out
   1381       during folding more than once (but probably not more than twice).  */
   1382    while (cksm > 0xffffffffull) {
   1383        cksm = (uint32_t)cksm + (cksm >> 32);
   1384    }
   1385
   1386    /* Indicate whether or not we've processed everything.  */
   1387    env->cc_op = (len == src_len ? 0 : 3);
   1388
   1389    /* Return both cksm and processed length.  */
   1390    env->retxl = cksm;
   1391    return len;
   1392}
   1393
   1394void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
   1395{
   1396    uintptr_t ra = GETPC();
   1397    int len_dest = len >> 4;
   1398    int len_src = len & 0xf;
   1399    uint8_t b;
   1400
   1401    dest += len_dest;
   1402    src += len_src;
   1403
   1404    /* last byte is special, it only flips the nibbles */
   1405    b = cpu_ldub_data_ra(env, src, ra);
   1406    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
   1407    src--;
   1408    len_src--;
   1409
   1410    /* now pack every value */
   1411    while (len_dest > 0) {
   1412        b = 0;
   1413
   1414        if (len_src >= 0) {
   1415            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
   1416            src--;
   1417            len_src--;
   1418        }
   1419        if (len_src >= 0) {
   1420            b |= cpu_ldub_data_ra(env, src, ra) << 4;
   1421            src--;
   1422            len_src--;
   1423        }
   1424
   1425        len_dest--;
   1426        dest--;
   1427        cpu_stb_data_ra(env, dest, b, ra);
   1428    }
   1429}
   1430
   1431static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
   1432                           uint32_t srclen, int ssize, uintptr_t ra)
   1433{
   1434    int i;
   1435    /* The destination operand is always 16 bytes long.  */
   1436    const int destlen = 16;
   1437
   1438    /* The operands are processed from right to left.  */
   1439    src += srclen - 1;
   1440    dest += destlen - 1;
   1441
   1442    for (i = 0; i < destlen; i++) {
   1443        uint8_t b = 0;
   1444
   1445        /* Start with a positive sign */
   1446        if (i == 0) {
   1447            b = 0xc;
   1448        } else if (srclen > ssize) {
   1449            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
   1450            src -= ssize;
   1451            srclen -= ssize;
   1452        }
   1453
   1454        if (srclen > ssize) {
   1455            b |= cpu_ldub_data_ra(env, src, ra) << 4;
   1456            src -= ssize;
   1457            srclen -= ssize;
   1458        }
   1459
   1460        cpu_stb_data_ra(env, dest, b, ra);
   1461        dest--;
   1462    }
   1463}
   1464
   1465
   1466void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
   1467                 uint32_t srclen)
   1468{
   1469    do_pkau(env, dest, src, srclen, 1, GETPC());
   1470}
   1471
   1472void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
   1473                 uint32_t srclen)
   1474{
   1475    do_pkau(env, dest, src, srclen, 2, GETPC());
   1476}
   1477
   1478void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
   1479                  uint64_t src)
   1480{
   1481    uintptr_t ra = GETPC();
   1482    int len_dest = len >> 4;
   1483    int len_src = len & 0xf;
   1484    uint8_t b;
   1485    int second_nibble = 0;
   1486
   1487    dest += len_dest;
   1488    src += len_src;
   1489
   1490    /* last byte is special, it only flips the nibbles */
   1491    b = cpu_ldub_data_ra(env, src, ra);
   1492    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
   1493    src--;
   1494    len_src--;
   1495
   1496    /* now pad every nibble with 0xf0 */
   1497
   1498    while (len_dest > 0) {
   1499        uint8_t cur_byte = 0;
   1500
   1501        if (len_src > 0) {
   1502            cur_byte = cpu_ldub_data_ra(env, src, ra);
   1503        }
   1504
   1505        len_dest--;
   1506        dest--;
   1507
   1508        /* only advance one nibble at a time */
   1509        if (second_nibble) {
   1510            cur_byte >>= 4;
   1511            len_src--;
   1512            src--;
   1513        }
   1514        second_nibble = !second_nibble;
   1515
   1516        /* digit */
   1517        cur_byte = (cur_byte & 0xf);
   1518        /* zone bits */
   1519        cur_byte |= 0xf0;
   1520
   1521        cpu_stb_data_ra(env, dest, cur_byte, ra);
   1522    }
   1523}
   1524
   1525static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
   1526                                 uint32_t destlen, int dsize, uint64_t src,
   1527                                 uintptr_t ra)
   1528{
   1529    int i;
   1530    uint32_t cc;
   1531    uint8_t b;
   1532    /* The source operand is always 16 bytes long.  */
   1533    const int srclen = 16;
   1534
   1535    /* The operands are processed from right to left.  */
   1536    src += srclen - 1;
   1537    dest += destlen - dsize;
   1538
   1539    /* Check for the sign.  */
   1540    b = cpu_ldub_data_ra(env, src, ra);
   1541    src--;
   1542    switch (b & 0xf) {
   1543    case 0xa:
   1544    case 0xc:
   1545    case 0xe ... 0xf:
   1546        cc = 0;  /* plus */
   1547        break;
   1548    case 0xb:
   1549    case 0xd:
   1550        cc = 1;  /* minus */
   1551        break;
   1552    default:
   1553    case 0x0 ... 0x9:
   1554        cc = 3;  /* invalid */
   1555        break;
   1556    }
   1557
   1558    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
   1559    for (i = 0; i < destlen; i += dsize) {
   1560        if (i == (31 * dsize)) {
   1561            /* If length is 32/64 bytes, the leftmost byte is 0. */
   1562            b = 0;
   1563        } else if (i % (2 * dsize)) {
   1564            b = cpu_ldub_data_ra(env, src, ra);
   1565            src--;
   1566        } else {
   1567            b >>= 4;
   1568        }
   1569        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
   1570        dest -= dsize;
   1571    }
   1572
   1573    return cc;
   1574}
   1575
   1576uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
   1577                       uint64_t src)
   1578{
   1579    return do_unpkau(env, dest, destlen, 1, src, GETPC());
   1580}
   1581
   1582uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
   1583                       uint64_t src)
   1584{
   1585    return do_unpkau(env, dest, destlen, 2, src, GETPC());
   1586}
   1587
   1588uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
   1589{
   1590    uintptr_t ra = GETPC();
   1591    uint32_t cc = 0;
   1592    int i;
   1593
   1594    for (i = 0; i < destlen; i++) {
   1595        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
   1596        /* digit */
   1597        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
   1598
   1599        if (i == (destlen - 1)) {
   1600            /* sign */
   1601            cc |= (b & 0xf) < 0xa ? 1 : 0;
   1602        } else {
   1603            /* digit */
   1604            cc |= (b & 0xf) > 0x9 ? 2 : 0;
   1605        }
   1606    }
   1607
   1608    return cc;
   1609}
   1610
   1611static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
   1612                             uint64_t trans, uintptr_t ra)
   1613{
   1614    uint32_t i;
   1615
   1616    for (i = 0; i <= len; i++) {
   1617        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
   1618        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
   1619        cpu_stb_data_ra(env, array + i, new_byte, ra);
   1620    }
   1621
   1622    return env->cc_op;
   1623}
   1624
   1625void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
   1626                uint64_t trans)
   1627{
   1628    do_helper_tr(env, len, array, trans, GETPC());
   1629}
   1630
   1631uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
   1632                     uint64_t len, uint64_t trans)
   1633{
   1634    uintptr_t ra = GETPC();
   1635    uint8_t end = env->regs[0] & 0xff;
   1636    uint64_t l = len;
   1637    uint64_t i;
   1638    uint32_t cc = 0;
   1639
   1640    if (!(env->psw.mask & PSW_MASK_64)) {
   1641        array &= 0x7fffffff;
   1642        l = (uint32_t)l;
   1643    }
   1644
   1645    /* Lest we fail to service interrupts in a timely manner, limit the
   1646       amount of work we're willing to do.  For now, let's cap at 8k.  */
   1647    if (l > 0x2000) {
   1648        l = 0x2000;
   1649        cc = 3;
   1650    }
   1651
   1652    for (i = 0; i < l; i++) {
   1653        uint8_t byte, new_byte;
   1654
   1655        byte = cpu_ldub_data_ra(env, array + i, ra);
   1656
   1657        if (byte == end) {
   1658            cc = 1;
   1659            break;
   1660        }
   1661
   1662        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
   1663        cpu_stb_data_ra(env, array + i, new_byte, ra);
   1664    }
   1665
   1666    env->cc_op = cc;
   1667    env->retxl = len - i;
   1668    return array + i;
   1669}
   1670
   1671static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
   1672                                     uint64_t array, uint64_t trans,
   1673                                     int inc, uintptr_t ra)
   1674{
   1675    int i;
   1676
   1677    for (i = 0; i <= len; i++) {
   1678        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
   1679        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
   1680
   1681        if (sbyte != 0) {
   1682            set_address(env, 1, array + i * inc);
   1683            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
   1684            return (i == len) ? 2 : 1;
   1685        }
   1686    }
   1687
   1688    return 0;
   1689}
   1690
   1691static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
   1692                                  uint64_t array, uint64_t trans,
   1693                                  uintptr_t ra)
   1694{
   1695    return do_helper_trt(env, len, array, trans, 1, ra);
   1696}
   1697
   1698uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
   1699                     uint64_t trans)
   1700{
   1701    return do_helper_trt(env, len, array, trans, 1, GETPC());
   1702}
   1703
   1704static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
   1705                                   uint64_t array, uint64_t trans,
   1706                                   uintptr_t ra)
   1707{
   1708    return do_helper_trt(env, len, array, trans, -1, ra);
   1709}
   1710
   1711uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
   1712                      uint64_t trans)
   1713{
   1714    return do_helper_trt(env, len, array, trans, -1, GETPC());
   1715}
   1716
   1717/* Translate one/two to one/two */
   1718uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
   1719                      uint32_t tst, uint32_t sizes)
   1720{
   1721    uintptr_t ra = GETPC();
   1722    int dsize = (sizes & 1) ? 1 : 2;
   1723    int ssize = (sizes & 2) ? 1 : 2;
   1724    uint64_t tbl = get_address(env, 1);
   1725    uint64_t dst = get_address(env, r1);
   1726    uint64_t len = get_length(env, r1 + 1);
   1727    uint64_t src = get_address(env, r2);
   1728    uint32_t cc = 3;
   1729    int i;
   1730
   1731    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
   1732       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
   1733       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
   1734    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
   1735        tbl &= -4096;
   1736    } else {
   1737        tbl &= -8;
   1738    }
   1739
   1740    check_alignment(env, len, ssize, ra);
   1741
   1742    /* Lest we fail to service interrupts in a timely manner, */
   1743    /* limit the amount of work we're willing to do.   */
   1744    for (i = 0; i < 0x2000; i++) {
   1745        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
   1746        uint64_t tble = tbl + (sval * dsize);
   1747        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
   1748        if (dval == tst) {
   1749            cc = 1;
   1750            break;
   1751        }
   1752        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
   1753
   1754        len -= ssize;
   1755        src += ssize;
   1756        dst += dsize;
   1757
   1758        if (len == 0) {
   1759            cc = 0;
   1760            break;
   1761        }
   1762    }
   1763
   1764    set_address(env, r1, dst);
   1765    set_length(env, r1 + 1, len);
   1766    set_address(env, r2, src);
   1767
   1768    return cc;
   1769}
   1770
   1771void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
   1772                  uint32_t r1, uint32_t r3)
   1773{
   1774    uintptr_t ra = GETPC();
   1775    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
   1776    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
   1777    Int128 oldv;
   1778    uint64_t oldh, oldl;
   1779    bool fail;
   1780
   1781    check_alignment(env, addr, 16, ra);
   1782
   1783    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
   1784    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
   1785
   1786    oldv = int128_make128(oldl, oldh);
   1787    fail = !int128_eq(oldv, cmpv);
   1788    if (fail) {
   1789        newv = oldv;
   1790    }
   1791
   1792    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
   1793    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
   1794
   1795    env->cc_op = fail;
   1796    env->regs[r1] = int128_gethi(oldv);
   1797    env->regs[r1 + 1] = int128_getlo(oldv);
   1798}
   1799
   1800void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
   1801                           uint32_t r1, uint32_t r3)
   1802{
   1803    uintptr_t ra = GETPC();
   1804    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
   1805    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
   1806    int mem_idx;
   1807    MemOpIdx oi;
   1808    Int128 oldv;
   1809    bool fail;
   1810
   1811    assert(HAVE_CMPXCHG128);
   1812
   1813    mem_idx = cpu_mmu_index(env, false);
   1814    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
   1815    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
   1816    fail = !int128_eq(oldv, cmpv);
   1817
   1818    env->cc_op = fail;
   1819    env->regs[r1] = int128_gethi(oldv);
   1820    env->regs[r1 + 1] = int128_getlo(oldv);
   1821}
   1822
   1823static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
   1824                        uint64_t a2, bool parallel)
   1825{
   1826    uint32_t mem_idx = cpu_mmu_index(env, false);
   1827    uintptr_t ra = GETPC();
   1828    uint32_t fc = extract32(env->regs[0], 0, 8);
   1829    uint32_t sc = extract32(env->regs[0], 8, 8);
   1830    uint64_t pl = get_address(env, 1) & -16;
   1831    uint64_t svh, svl;
   1832    uint32_t cc;
   1833
   1834    /* Sanity check the function code and storage characteristic.  */
   1835    if (fc > 1 || sc > 3) {
   1836        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
   1837            goto spec_exception;
   1838        }
   1839        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
   1840            goto spec_exception;
   1841        }
   1842    }
   1843
   1844    /* Sanity check the alignments.  */
   1845    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
   1846        goto spec_exception;
   1847    }
   1848
   1849    /* Sanity check writability of the store address.  */
   1850    probe_write(env, a2, 1 << sc, mem_idx, ra);
   1851
   1852    /*
   1853     * Note that the compare-and-swap is atomic, and the store is atomic,
   1854     * but the complete operation is not.  Therefore we do not need to
   1855     * assert serial context in order to implement this.  That said,
   1856     * restart early if we can't support either operation that is supposed
   1857     * to be atomic.
   1858     */
   1859    if (parallel) {
   1860        uint32_t max = 2;
   1861#ifdef CONFIG_ATOMIC64
   1862        max = 3;
   1863#endif
   1864        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
   1865            (HAVE_ATOMIC128  ? 0 : sc > max)) {
   1866            cpu_loop_exit_atomic(env_cpu(env), ra);
   1867        }
   1868    }
   1869
   1870    /* All loads happen before all stores.  For simplicity, load the entire
   1871       store value area from the parameter list.  */
   1872    svh = cpu_ldq_data_ra(env, pl + 16, ra);
   1873    svl = cpu_ldq_data_ra(env, pl + 24, ra);
   1874
   1875    switch (fc) {
   1876    case 0:
   1877        {
   1878            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
   1879            uint32_t cv = env->regs[r3];
   1880            uint32_t ov;
   1881
   1882            if (parallel) {
   1883#ifdef CONFIG_USER_ONLY
   1884                uint32_t *haddr = g2h(env_cpu(env), a1);
   1885                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
   1886#else
   1887                MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
   1888                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
   1889#endif
   1890            } else {
   1891                ov = cpu_ldl_data_ra(env, a1, ra);
   1892                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
   1893            }
   1894            cc = (ov != cv);
   1895            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
   1896        }
   1897        break;
   1898
   1899    case 1:
   1900        {
   1901            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
   1902            uint64_t cv = env->regs[r3];
   1903            uint64_t ov;
   1904
   1905            if (parallel) {
   1906#ifdef CONFIG_ATOMIC64
   1907                MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
   1908                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
   1909#else
   1910                /* Note that we asserted !parallel above.  */
   1911                g_assert_not_reached();
   1912#endif
   1913            } else {
   1914                ov = cpu_ldq_data_ra(env, a1, ra);
   1915                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
   1916            }
   1917            cc = (ov != cv);
   1918            env->regs[r3] = ov;
   1919        }
   1920        break;
   1921
   1922    case 2:
   1923        {
   1924            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
   1925            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
   1926            Int128 nv = int128_make128(nvl, nvh);
   1927            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
   1928            Int128 ov;
   1929
   1930            if (!parallel) {
   1931                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
   1932                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
   1933
   1934                ov = int128_make128(ol, oh);
   1935                cc = !int128_eq(ov, cv);
   1936                if (cc) {
   1937                    nv = ov;
   1938                }
   1939
   1940                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
   1941                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
   1942            } else if (HAVE_CMPXCHG128) {
   1943                MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
   1944                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
   1945                cc = !int128_eq(ov, cv);
   1946            } else {
   1947                /* Note that we asserted !parallel above.  */
   1948                g_assert_not_reached();
   1949            }
   1950
   1951            env->regs[r3 + 0] = int128_gethi(ov);
   1952            env->regs[r3 + 1] = int128_getlo(ov);
   1953        }
   1954        break;
   1955
   1956    default:
   1957        g_assert_not_reached();
   1958    }
   1959
   1960    /* Store only if the comparison succeeded.  Note that above we use a pair
   1961       of 64-bit big-endian loads, so for sc < 3 we must extract the value
   1962       from the most-significant bits of svh.  */
   1963    if (cc == 0) {
   1964        switch (sc) {
   1965        case 0:
   1966            cpu_stb_data_ra(env, a2, svh >> 56, ra);
   1967            break;
   1968        case 1:
   1969            cpu_stw_data_ra(env, a2, svh >> 48, ra);
   1970            break;
   1971        case 2:
   1972            cpu_stl_data_ra(env, a2, svh >> 32, ra);
   1973            break;
   1974        case 3:
   1975            cpu_stq_data_ra(env, a2, svh, ra);
   1976            break;
   1977        case 4:
   1978            if (!parallel) {
   1979                cpu_stq_data_ra(env, a2 + 0, svh, ra);
   1980                cpu_stq_data_ra(env, a2 + 8, svl, ra);
   1981            } else if (HAVE_ATOMIC128) {
   1982                MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
   1983                Int128 sv = int128_make128(svl, svh);
   1984                cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
   1985            } else {
   1986                /* Note that we asserted !parallel above.  */
   1987                g_assert_not_reached();
   1988            }
   1989            break;
   1990        default:
   1991            g_assert_not_reached();
   1992        }
   1993    }
   1994
   1995    return cc;
   1996
   1997 spec_exception:
   1998    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   1999}
   2000
   2001uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
   2002{
   2003    return do_csst(env, r3, a1, a2, false);
   2004}
   2005
   2006uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
   2007                               uint64_t a2)
   2008{
   2009    return do_csst(env, r3, a1, a2, true);
   2010}
   2011
   2012#if !defined(CONFIG_USER_ONLY)
   2013void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2014{
   2015    uintptr_t ra = GETPC();
   2016    bool PERchanged = false;
   2017    uint64_t src = a2;
   2018    uint32_t i;
   2019
   2020    if (src & 0x7) {
   2021        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2022    }
   2023
   2024    for (i = r1;; i = (i + 1) % 16) {
   2025        uint64_t val = cpu_ldq_data_ra(env, src, ra);
   2026        if (env->cregs[i] != val && i >= 9 && i <= 11) {
   2027            PERchanged = true;
   2028        }
   2029        env->cregs[i] = val;
   2030        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
   2031                   i, src, val);
   2032        src += sizeof(uint64_t);
   2033
   2034        if (i == r3) {
   2035            break;
   2036        }
   2037    }
   2038
   2039    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
   2040        s390_cpu_recompute_watchpoints(env_cpu(env));
   2041    }
   2042
   2043    tlb_flush(env_cpu(env));
   2044}
   2045
   2046void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2047{
   2048    uintptr_t ra = GETPC();
   2049    bool PERchanged = false;
   2050    uint64_t src = a2;
   2051    uint32_t i;
   2052
   2053    if (src & 0x3) {
   2054        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2055    }
   2056
   2057    for (i = r1;; i = (i + 1) % 16) {
   2058        uint32_t val = cpu_ldl_data_ra(env, src, ra);
   2059        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
   2060            PERchanged = true;
   2061        }
   2062        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
   2063        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
   2064        src += sizeof(uint32_t);
   2065
   2066        if (i == r3) {
   2067            break;
   2068        }
   2069    }
   2070
   2071    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
   2072        s390_cpu_recompute_watchpoints(env_cpu(env));
   2073    }
   2074
   2075    tlb_flush(env_cpu(env));
   2076}
   2077
   2078void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2079{
   2080    uintptr_t ra = GETPC();
   2081    uint64_t dest = a2;
   2082    uint32_t i;
   2083
   2084    if (dest & 0x7) {
   2085        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2086    }
   2087
   2088    for (i = r1;; i = (i + 1) % 16) {
   2089        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
   2090        dest += sizeof(uint64_t);
   2091
   2092        if (i == r3) {
   2093            break;
   2094        }
   2095    }
   2096}
   2097
   2098void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2099{
   2100    uintptr_t ra = GETPC();
   2101    uint64_t dest = a2;
   2102    uint32_t i;
   2103
   2104    if (dest & 0x3) {
   2105        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2106    }
   2107
   2108    for (i = r1;; i = (i + 1) % 16) {
   2109        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
   2110        dest += sizeof(uint32_t);
   2111
   2112        if (i == r3) {
   2113            break;
   2114        }
   2115    }
   2116}
   2117
   2118uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
   2119{
   2120    uintptr_t ra = GETPC();
   2121    int i;
   2122
   2123    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
   2124
   2125    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
   2126        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
   2127    }
   2128
   2129    return 0;
   2130}
   2131
   2132uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
   2133{
   2134    S390CPU *cpu = env_archcpu(env);
   2135    CPUState *cs = env_cpu(env);
   2136
   2137    /*
   2138     * TODO: we currently don't handle all access protection types
   2139     * (including access-list and key-controlled) as well as AR mode.
   2140     */
   2141    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
   2142        /* Fetching permitted; storing permitted */
   2143        return 0;
   2144    }
   2145
   2146    if (env->int_pgm_code == PGM_PROTECTION) {
   2147        /* retry if reading is possible */
   2148        cs->exception_index = -1;
   2149        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
   2150            /* Fetching permitted; storing not permitted */
   2151            return 1;
   2152        }
   2153    }
   2154
   2155    switch (env->int_pgm_code) {
   2156    case PGM_PROTECTION:
   2157        /* Fetching not permitted; storing not permitted */
   2158        cs->exception_index = -1;
   2159        return 2;
   2160    case PGM_ADDRESSING:
   2161    case PGM_TRANS_SPEC:
   2162        /* exceptions forwarded to the guest */
   2163        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
   2164        return 0;
   2165    }
   2166
   2167    /* Translation not available */
   2168    cs->exception_index = -1;
   2169    return 3;
   2170}
   2171
   2172/* insert storage key extended */
   2173uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
   2174{
   2175    static S390SKeysState *ss;
   2176    static S390SKeysClass *skeyclass;
   2177    uint64_t addr = wrap_address(env, r2);
   2178    uint8_t key;
   2179    int rc;
   2180
   2181    addr = mmu_real2abs(env, addr);
   2182    if (!mmu_absolute_addr_valid(addr, false)) {
   2183        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
   2184    }
   2185
   2186    if (unlikely(!ss)) {
   2187        ss = s390_get_skeys_device();
   2188        skeyclass = S390_SKEYS_GET_CLASS(ss);
   2189        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
   2190            tlb_flush_all_cpus_synced(env_cpu(env));
   2191        }
   2192    }
   2193
   2194    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2195    if (rc) {
   2196        trace_get_skeys_nonzero(rc);
   2197        return 0;
   2198    }
   2199    return key;
   2200}
   2201
   2202/* set storage key extended */
   2203void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
   2204{
   2205    static S390SKeysState *ss;
   2206    static S390SKeysClass *skeyclass;
   2207    uint64_t addr = wrap_address(env, r2);
   2208    uint8_t key;
   2209    int rc;
   2210
   2211    addr = mmu_real2abs(env, addr);
   2212    if (!mmu_absolute_addr_valid(addr, false)) {
   2213        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
   2214    }
   2215
   2216    if (unlikely(!ss)) {
   2217        ss = s390_get_skeys_device();
   2218        skeyclass = S390_SKEYS_GET_CLASS(ss);
   2219        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
   2220            tlb_flush_all_cpus_synced(env_cpu(env));
   2221        }
   2222    }
   2223
   2224    key = r1 & 0xfe;
   2225    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2226    if (rc) {
   2227        trace_set_skeys_nonzero(rc);
   2228    }
   2229   /*
   2230    * As we can only flush by virtual address and not all the entries
   2231    * that point to a physical address we have to flush the whole TLB.
   2232    */
   2233    tlb_flush_all_cpus_synced(env_cpu(env));
   2234}
   2235
   2236/* reset reference bit extended */
   2237uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
   2238{
   2239    uint64_t addr = wrap_address(env, r2);
   2240    static S390SKeysState *ss;
   2241    static S390SKeysClass *skeyclass;
   2242    uint8_t re, key;
   2243    int rc;
   2244
   2245    addr = mmu_real2abs(env, addr);
   2246    if (!mmu_absolute_addr_valid(addr, false)) {
   2247        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
   2248    }
   2249
   2250    if (unlikely(!ss)) {
   2251        ss = s390_get_skeys_device();
   2252        skeyclass = S390_SKEYS_GET_CLASS(ss);
   2253        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
   2254            tlb_flush_all_cpus_synced(env_cpu(env));
   2255        }
   2256    }
   2257
   2258    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2259    if (rc) {
   2260        trace_get_skeys_nonzero(rc);
   2261        return 0;
   2262    }
   2263
   2264    re = key & (SK_R | SK_C);
   2265    key &= ~SK_R;
   2266
   2267    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2268    if (rc) {
   2269        trace_set_skeys_nonzero(rc);
   2270        return 0;
   2271    }
   2272   /*
   2273    * As we can only flush by virtual address and not all the entries
   2274    * that point to a physical address we have to flush the whole TLB.
   2275    */
   2276    tlb_flush_all_cpus_synced(env_cpu(env));
   2277
   2278    /*
   2279     * cc
   2280     *
   2281     * 0  Reference bit zero; change bit zero
   2282     * 1  Reference bit zero; change bit one
   2283     * 2  Reference bit one; change bit zero
   2284     * 3  Reference bit one; change bit one
   2285     */
   2286
   2287    return re >> 1;
   2288}
   2289
   2290uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
   2291{
   2292    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
   2293    S390Access srca, desta;
   2294    uintptr_t ra = GETPC();
   2295    int cc = 0;
   2296
   2297    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
   2298               __func__, l, a1, a2);
   2299
   2300    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
   2301        psw_as == AS_HOME || psw_as == AS_ACCREG) {
   2302        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2303    }
   2304
   2305    l = wrap_length32(env, l);
   2306    if (l > 256) {
   2307        /* max 256 */
   2308        l = 256;
   2309        cc = 3;
   2310    } else if (!l) {
   2311        return cc;
   2312    }
   2313
   2314    /* TODO: Access key handling */
   2315    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
   2316    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
   2317    access_memmove(env, &desta, &srca, ra);
   2318    return cc;
   2319}
   2320
   2321uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
   2322{
   2323    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
   2324    S390Access srca, desta;
   2325    uintptr_t ra = GETPC();
   2326    int cc = 0;
   2327
   2328    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
   2329               __func__, l, a1, a2);
   2330
   2331    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
   2332        psw_as == AS_HOME || psw_as == AS_ACCREG) {
   2333        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2334    }
   2335
   2336    l = wrap_length32(env, l);
   2337    if (l > 256) {
   2338        /* max 256 */
   2339        l = 256;
   2340        cc = 3;
   2341    } else if (!l) {
   2342        return cc;
   2343    }
   2344
   2345    /* TODO: Access key handling */
   2346    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
   2347    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
   2348    access_memmove(env, &desta, &srca, ra);
   2349    return cc;
   2350}
   2351
   2352void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
   2353{
   2354    CPUState *cs = env_cpu(env);
   2355    const uintptr_t ra = GETPC();
   2356    uint64_t table, entry, raddr;
   2357    uint16_t entries, i, index = 0;
   2358
   2359    if (r2 & 0xff000) {
   2360        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2361    }
   2362
   2363    if (!(r2 & 0x800)) {
   2364        /* invalidation-and-clearing operation */
   2365        table = r1 & ASCE_ORIGIN;
   2366        entries = (r2 & 0x7ff) + 1;
   2367
   2368        switch (r1 & ASCE_TYPE_MASK) {
   2369        case ASCE_TYPE_REGION1:
   2370            index = (r2 >> 53) & 0x7ff;
   2371            break;
   2372        case ASCE_TYPE_REGION2:
   2373            index = (r2 >> 42) & 0x7ff;
   2374            break;
   2375        case ASCE_TYPE_REGION3:
   2376            index = (r2 >> 31) & 0x7ff;
   2377            break;
   2378        case ASCE_TYPE_SEGMENT:
   2379            index = (r2 >> 20) & 0x7ff;
   2380            break;
   2381        }
   2382        for (i = 0; i < entries; i++) {
   2383            /* addresses are not wrapped in 24/31bit mode but table index is */
   2384            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
   2385            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
   2386            if (!(entry & REGION_ENTRY_I)) {
   2387                /* we are allowed to not store if already invalid */
   2388                entry |= REGION_ENTRY_I;
   2389                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
   2390            }
   2391        }
   2392    }
   2393
   2394    /* We simply flush the complete tlb, therefore we can ignore r3. */
   2395    if (m4 & 1) {
   2396        tlb_flush(cs);
   2397    } else {
   2398        tlb_flush_all_cpus_synced(cs);
   2399    }
   2400}
   2401
   2402/* invalidate pte */
   2403void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
   2404                  uint32_t m4)
   2405{
   2406    CPUState *cs = env_cpu(env);
   2407    const uintptr_t ra = GETPC();
   2408    uint64_t page = vaddr & TARGET_PAGE_MASK;
   2409    uint64_t pte_addr, pte;
   2410
   2411    /* Compute the page table entry address */
   2412    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
   2413    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
   2414
   2415    /* Mark the page table entry as invalid */
   2416    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
   2417    pte |= PAGE_ENTRY_I;
   2418    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
   2419
   2420    /* XXX we exploit the fact that Linux passes the exact virtual
   2421       address here - it's not obliged to! */
   2422    if (m4 & 1) {
   2423        if (vaddr & ~VADDR_PAGE_TX_MASK) {
   2424            tlb_flush_page(cs, page);
   2425            /* XXX 31-bit hack */
   2426            tlb_flush_page(cs, page ^ 0x80000000);
   2427        } else {
   2428            /* looks like we don't have a valid virtual address */
   2429            tlb_flush(cs);
   2430        }
   2431    } else {
   2432        if (vaddr & ~VADDR_PAGE_TX_MASK) {
   2433            tlb_flush_page_all_cpus_synced(cs, page);
   2434            /* XXX 31-bit hack */
   2435            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
   2436        } else {
   2437            /* looks like we don't have a valid virtual address */
   2438            tlb_flush_all_cpus_synced(cs);
   2439        }
   2440    }
   2441}
   2442
   2443/* flush local tlb */
   2444void HELPER(ptlb)(CPUS390XState *env)
   2445{
   2446    tlb_flush(env_cpu(env));
   2447}
   2448
   2449/* flush global tlb */
   2450void HELPER(purge)(CPUS390XState *env)
   2451{
   2452    tlb_flush_all_cpus_synced(env_cpu(env));
   2453}
   2454
   2455/* load real address */
   2456uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
   2457{
   2458    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
   2459    uint64_t ret, tec;
   2460    int flags, exc, cc;
   2461
   2462    /* XXX incomplete - has more corner cases */
   2463    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
   2464        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
   2465    }
   2466
   2467    exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
   2468    if (exc) {
   2469        cc = 3;
   2470        ret = exc | 0x80000000;
   2471    } else {
   2472        cc = 0;
   2473        ret |= addr & ~TARGET_PAGE_MASK;
   2474    }
   2475
   2476    env->cc_op = cc;
   2477    return ret;
   2478}
   2479#endif
   2480
   2481/* load pair from quadword */
   2482uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
   2483{
   2484    uintptr_t ra = GETPC();
   2485    uint64_t hi, lo;
   2486
   2487    check_alignment(env, addr, 16, ra);
   2488    hi = cpu_ldq_data_ra(env, addr + 0, ra);
   2489    lo = cpu_ldq_data_ra(env, addr + 8, ra);
   2490
   2491    env->retxl = lo;
   2492    return hi;
   2493}
   2494
   2495uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
   2496{
   2497    uintptr_t ra = GETPC();
   2498    uint64_t hi, lo;
   2499    int mem_idx;
   2500    MemOpIdx oi;
   2501    Int128 v;
   2502
   2503    assert(HAVE_ATOMIC128);
   2504
   2505    mem_idx = cpu_mmu_index(env, false);
   2506    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
   2507    v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
   2508    hi = int128_gethi(v);
   2509    lo = int128_getlo(v);
   2510
   2511    env->retxl = lo;
   2512    return hi;
   2513}
   2514
   2515/* store pair to quadword */
   2516void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
   2517                  uint64_t low, uint64_t high)
   2518{
   2519    uintptr_t ra = GETPC();
   2520
   2521    check_alignment(env, addr, 16, ra);
   2522    cpu_stq_data_ra(env, addr + 0, high, ra);
   2523    cpu_stq_data_ra(env, addr + 8, low, ra);
   2524}
   2525
   2526void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
   2527                           uint64_t low, uint64_t high)
   2528{
   2529    uintptr_t ra = GETPC();
   2530    int mem_idx;
   2531    MemOpIdx oi;
   2532    Int128 v;
   2533
   2534    assert(HAVE_ATOMIC128);
   2535
   2536    mem_idx = cpu_mmu_index(env, false);
   2537    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
   2538    v = int128_make128(low, high);
   2539    cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
   2540}
   2541
   2542/* Execute instruction.  This instruction executes an insn modified with
   2543   the contents of r1.  It does not change the executed instruction in memory;
   2544   it does not change the program counter.
   2545
   2546   Perform this by recording the modified instruction in env->ex_value.
   2547   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
   2548*/
   2549void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
   2550{
   2551    uint64_t insn = cpu_lduw_code(env, addr);
   2552    uint8_t opc = insn >> 8;
   2553
   2554    /* Or in the contents of R1[56:63].  */
   2555    insn |= r1 & 0xff;
   2556
   2557    /* Load the rest of the instruction.  */
   2558    insn <<= 48;
   2559    switch (get_ilen(opc)) {
   2560    case 2:
   2561        break;
   2562    case 4:
   2563        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
   2564        break;
   2565    case 6:
   2566        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
   2567        break;
   2568    default:
   2569        g_assert_not_reached();
   2570    }
   2571
   2572    /* The very most common cases can be sped up by avoiding a new TB.  */
   2573    if ((opc & 0xf0) == 0xd0) {
   2574        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
   2575                                      uint64_t, uintptr_t);
   2576        static const dx_helper dx[16] = {
   2577            [0x0] = do_helper_trt_bkwd,
   2578            [0x2] = do_helper_mvc,
   2579            [0x4] = do_helper_nc,
   2580            [0x5] = do_helper_clc,
   2581            [0x6] = do_helper_oc,
   2582            [0x7] = do_helper_xc,
   2583            [0xc] = do_helper_tr,
   2584            [0xd] = do_helper_trt_fwd,
   2585        };
   2586        dx_helper helper = dx[opc & 0xf];
   2587
   2588        if (helper) {
   2589            uint32_t l = extract64(insn, 48, 8);
   2590            uint32_t b1 = extract64(insn, 44, 4);
   2591            uint32_t d1 = extract64(insn, 32, 12);
   2592            uint32_t b2 = extract64(insn, 28, 4);
   2593            uint32_t d2 = extract64(insn, 16, 12);
   2594            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
   2595            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
   2596
   2597            env->cc_op = helper(env, l, a1, a2, 0);
   2598            env->psw.addr += ilen;
   2599            return;
   2600        }
   2601    } else if (opc == 0x0a) {
   2602        env->int_svc_code = extract64(insn, 48, 8);
   2603        env->int_svc_ilen = ilen;
   2604        helper_exception(env, EXCP_SVC);
   2605        g_assert_not_reached();
   2606    }
   2607
   2608    /* Record the insn we want to execute as well as the ilen to use
   2609       during the execution of the target insn.  This will also ensure
   2610       that ex_value is non-zero, which flags that we are in a state
   2611       that requires such execution.  */
   2612    env->ex_value = insn | ilen;
   2613}
   2614
   2615uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
   2616                       uint64_t len)
   2617{
   2618    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
   2619    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
   2620    const uint64_t r0 = env->regs[0];
   2621    const uintptr_t ra = GETPC();
   2622    uint8_t dest_key, dest_as, dest_k, dest_a;
   2623    uint8_t src_key, src_as, src_k, src_a;
   2624    uint64_t val;
   2625    int cc = 0;
   2626
   2627    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
   2628               __func__, dest, src, len);
   2629
   2630    if (!(env->psw.mask & PSW_MASK_DAT)) {
   2631        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2632    }
   2633
   2634    /* OAC (operand access control) for the first operand -> dest */
   2635    val = (r0 & 0xffff0000ULL) >> 16;
   2636    dest_key = (val >> 12) & 0xf;
   2637    dest_as = (val >> 6) & 0x3;
   2638    dest_k = (val >> 1) & 0x1;
   2639    dest_a = val & 0x1;
   2640
   2641    /* OAC (operand access control) for the second operand -> src */
   2642    val = (r0 & 0x0000ffffULL);
   2643    src_key = (val >> 12) & 0xf;
   2644    src_as = (val >> 6) & 0x3;
   2645    src_k = (val >> 1) & 0x1;
   2646    src_a = val & 0x1;
   2647
   2648    if (!dest_k) {
   2649        dest_key = psw_key;
   2650    }
   2651    if (!src_k) {
   2652        src_key = psw_key;
   2653    }
   2654    if (!dest_a) {
   2655        dest_as = psw_as;
   2656    }
   2657    if (!src_a) {
   2658        src_as = psw_as;
   2659    }
   2660
   2661    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
   2662        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2663    }
   2664    if (!(env->cregs[0] & CR0_SECONDARY) &&
   2665        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
   2666        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2667    }
   2668    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
   2669        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
   2670    }
   2671
   2672    len = wrap_length32(env, len);
   2673    if (len > 4096) {
   2674        cc = 3;
   2675        len = 4096;
   2676    }
   2677
   2678    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
   2679    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
   2680        (env->psw.mask & PSW_MASK_PSTATE)) {
   2681        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
   2682                      __func__);
   2683        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
   2684    }
   2685
   2686    /* FIXME: Access using correct keys and AR-mode */
   2687    if (len) {
   2688        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
   2689                                         mmu_idx_from_as(src_as), ra);
   2690        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
   2691                                          mmu_idx_from_as(dest_as), ra);
   2692
   2693        access_memmove(env, &desta, &srca, ra);
   2694    }
   2695
   2696    return cc;
   2697}
   2698
   2699/* Decode a Unicode character.  A return value < 0 indicates success, storing
   2700   the UTF-32 result into OCHAR and the input length into OLEN.  A return
   2701   value >= 0 indicates failure, and the CC value to be returned.  */
   2702typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
   2703                                 uint64_t ilen, bool enh_check, uintptr_t ra,
   2704                                 uint32_t *ochar, uint32_t *olen);
   2705
   2706/* Encode a Unicode character.  A return value < 0 indicates success, storing
   2707   the bytes into ADDR and the output length into OLEN.  A return value >= 0
   2708   indicates failure, and the CC value to be returned.  */
   2709typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
   2710                                 uint64_t ilen, uintptr_t ra, uint32_t c,
   2711                                 uint32_t *olen);
   2712
   2713static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2714                       bool enh_check, uintptr_t ra,
   2715                       uint32_t *ochar, uint32_t *olen)
   2716{
   2717    uint8_t s0, s1, s2, s3;
   2718    uint32_t c, l;
   2719
   2720    if (ilen < 1) {
   2721        return 0;
   2722    }
   2723    s0 = cpu_ldub_data_ra(env, addr, ra);
   2724    if (s0 <= 0x7f) {
   2725        /* one byte character */
   2726        l = 1;
   2727        c = s0;
   2728    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
   2729        /* invalid character */
   2730        return 2;
   2731    } else if (s0 <= 0xdf) {
   2732        /* two byte character */
   2733        l = 2;
   2734        if (ilen < 2) {
   2735            return 0;
   2736        }
   2737        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
   2738        c = s0 & 0x1f;
   2739        c = (c << 6) | (s1 & 0x3f);
   2740        if (enh_check && (s1 & 0xc0) != 0x80) {
   2741            return 2;
   2742        }
   2743    } else if (s0 <= 0xef) {
   2744        /* three byte character */
   2745        l = 3;
   2746        if (ilen < 3) {
   2747            return 0;
   2748        }
   2749        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
   2750        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
   2751        c = s0 & 0x0f;
   2752        c = (c << 6) | (s1 & 0x3f);
   2753        c = (c << 6) | (s2 & 0x3f);
   2754        /* Fold the byte-by-byte range descriptions in the PoO into
   2755           tests against the complete value.  It disallows encodings
   2756           that could be smaller, and the UTF-16 surrogates.  */
   2757        if (enh_check
   2758            && ((s1 & 0xc0) != 0x80
   2759                || (s2 & 0xc0) != 0x80
   2760                || c < 0x1000
   2761                || (c >= 0xd800 && c <= 0xdfff))) {
   2762            return 2;
   2763        }
   2764    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
   2765        /* four byte character */
   2766        l = 4;
   2767        if (ilen < 4) {
   2768            return 0;
   2769        }
   2770        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
   2771        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
   2772        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
   2773        c = s0 & 0x07;
   2774        c = (c << 6) | (s1 & 0x3f);
   2775        c = (c << 6) | (s2 & 0x3f);
   2776        c = (c << 6) | (s3 & 0x3f);
   2777        /* See above.  */
   2778        if (enh_check
   2779            && ((s1 & 0xc0) != 0x80
   2780                || (s2 & 0xc0) != 0x80
   2781                || (s3 & 0xc0) != 0x80
   2782                || c < 0x010000
   2783                || c > 0x10ffff)) {
   2784            return 2;
   2785        }
   2786    } else {
   2787        /* invalid character */
   2788        return 2;
   2789    }
   2790
   2791    *ochar = c;
   2792    *olen = l;
   2793    return -1;
   2794}
   2795
   2796static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2797                        bool enh_check, uintptr_t ra,
   2798                        uint32_t *ochar, uint32_t *olen)
   2799{
   2800    uint16_t s0, s1;
   2801    uint32_t c, l;
   2802
   2803    if (ilen < 2) {
   2804        return 0;
   2805    }
   2806    s0 = cpu_lduw_data_ra(env, addr, ra);
   2807    if ((s0 & 0xfc00) != 0xd800) {
   2808        /* one word character */
   2809        l = 2;
   2810        c = s0;
   2811    } else {
   2812        /* two word character */
   2813        l = 4;
   2814        if (ilen < 4) {
   2815            return 0;
   2816        }
   2817        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
   2818        c = extract32(s0, 6, 4) + 1;
   2819        c = (c << 6) | (s0 & 0x3f);
   2820        c = (c << 10) | (s1 & 0x3ff);
   2821        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
   2822            /* invalid surrogate character */
   2823            return 2;
   2824        }
   2825    }
   2826
   2827    *ochar = c;
   2828    *olen = l;
   2829    return -1;
   2830}
   2831
   2832static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2833                        bool enh_check, uintptr_t ra,
   2834                        uint32_t *ochar, uint32_t *olen)
   2835{
   2836    uint32_t c;
   2837
   2838    if (ilen < 4) {
   2839        return 0;
   2840    }
   2841    c = cpu_ldl_data_ra(env, addr, ra);
   2842    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
   2843        /* invalid unicode character */
   2844        return 2;
   2845    }
   2846
   2847    *ochar = c;
   2848    *olen = 4;
   2849    return -1;
   2850}
   2851
   2852static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2853                       uintptr_t ra, uint32_t c, uint32_t *olen)
   2854{
   2855    uint8_t d[4];
   2856    uint32_t l, i;
   2857
   2858    if (c <= 0x7f) {
   2859        /* one byte character */
   2860        l = 1;
   2861        d[0] = c;
   2862    } else if (c <= 0x7ff) {
   2863        /* two byte character */
   2864        l = 2;
   2865        d[1] = 0x80 | extract32(c, 0, 6);
   2866        d[0] = 0xc0 | extract32(c, 6, 5);
   2867    } else if (c <= 0xffff) {
   2868        /* three byte character */
   2869        l = 3;
   2870        d[2] = 0x80 | extract32(c, 0, 6);
   2871        d[1] = 0x80 | extract32(c, 6, 6);
   2872        d[0] = 0xe0 | extract32(c, 12, 4);
   2873    } else {
   2874        /* four byte character */
   2875        l = 4;
   2876        d[3] = 0x80 | extract32(c, 0, 6);
   2877        d[2] = 0x80 | extract32(c, 6, 6);
   2878        d[1] = 0x80 | extract32(c, 12, 6);
   2879        d[0] = 0xf0 | extract32(c, 18, 3);
   2880    }
   2881
   2882    if (ilen < l) {
   2883        return 1;
   2884    }
   2885    for (i = 0; i < l; ++i) {
   2886        cpu_stb_data_ra(env, addr + i, d[i], ra);
   2887    }
   2888
   2889    *olen = l;
   2890    return -1;
   2891}
   2892
   2893static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2894                        uintptr_t ra, uint32_t c, uint32_t *olen)
   2895{
   2896    uint16_t d0, d1;
   2897
   2898    if (c <= 0xffff) {
   2899        /* one word character */
   2900        if (ilen < 2) {
   2901            return 1;
   2902        }
   2903        cpu_stw_data_ra(env, addr, c, ra);
   2904        *olen = 2;
   2905    } else {
   2906        /* two word character */
   2907        if (ilen < 4) {
   2908            return 1;
   2909        }
   2910        d1 = 0xdc00 | extract32(c, 0, 10);
   2911        d0 = 0xd800 | extract32(c, 10, 6);
   2912        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
   2913        cpu_stw_data_ra(env, addr + 0, d0, ra);
   2914        cpu_stw_data_ra(env, addr + 2, d1, ra);
   2915        *olen = 4;
   2916    }
   2917
   2918    return -1;
   2919}
   2920
   2921static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2922                        uintptr_t ra, uint32_t c, uint32_t *olen)
   2923{
   2924    if (ilen < 4) {
   2925        return 1;
   2926    }
   2927    cpu_stl_data_ra(env, addr, c, ra);
   2928    *olen = 4;
   2929    return -1;
   2930}
   2931
   2932static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
   2933                                       uint32_t r2, uint32_t m3, uintptr_t ra,
   2934                                       decode_unicode_fn decode,
   2935                                       encode_unicode_fn encode)
   2936{
   2937    uint64_t dst = get_address(env, r1);
   2938    uint64_t dlen = get_length(env, r1 + 1);
   2939    uint64_t src = get_address(env, r2);
   2940    uint64_t slen = get_length(env, r2 + 1);
   2941    bool enh_check = m3 & 1;
   2942    int cc, i;
   2943
   2944    /* Lest we fail to service interrupts in a timely manner, limit the
   2945       amount of work we're willing to do.  For now, let's cap at 256.  */
   2946    for (i = 0; i < 256; ++i) {
   2947        uint32_t c, ilen, olen;
   2948
   2949        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
   2950        if (unlikely(cc >= 0)) {
   2951            break;
   2952        }
   2953        cc = encode(env, dst, dlen, ra, c, &olen);
   2954        if (unlikely(cc >= 0)) {
   2955            break;
   2956        }
   2957
   2958        src += ilen;
   2959        slen -= ilen;
   2960        dst += olen;
   2961        dlen -= olen;
   2962        cc = 3;
   2963    }
   2964
   2965    set_address(env, r1, dst);
   2966    set_length(env, r1 + 1, dlen);
   2967    set_address(env, r2, src);
   2968    set_length(env, r2 + 1, slen);
   2969
   2970    return cc;
   2971}
   2972
   2973uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2974{
   2975    return convert_unicode(env, r1, r2, m3, GETPC(),
   2976                           decode_utf8, encode_utf16);
   2977}
   2978
   2979uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2980{
   2981    return convert_unicode(env, r1, r2, m3, GETPC(),
   2982                           decode_utf8, encode_utf32);
   2983}
   2984
   2985uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2986{
   2987    return convert_unicode(env, r1, r2, m3, GETPC(),
   2988                           decode_utf16, encode_utf8);
   2989}
   2990
   2991uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2992{
   2993    return convert_unicode(env, r1, r2, m3, GETPC(),
   2994                           decode_utf16, encode_utf32);
   2995}
   2996
   2997uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2998{
   2999    return convert_unicode(env, r1, r2, m3, GETPC(),
   3000                           decode_utf32, encode_utf8);
   3001}
   3002
   3003uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   3004{
   3005    return convert_unicode(env, r1, r2, m3, GETPC(),
   3006                           decode_utf32, encode_utf16);
   3007}
   3008
   3009void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
   3010                        uintptr_t ra)
   3011{
   3012    /* test the actual access, not just any access to the page due to LAP */
   3013    while (len) {
   3014        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
   3015        const uint64_t curlen = MIN(pagelen, len);
   3016
   3017        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
   3018        addr = wrap_address(env, addr + curlen);
   3019        len -= curlen;
   3020    }
   3021}
   3022
   3023void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
   3024{
   3025    probe_write_access(env, addr, len, GETPC());
   3026}