cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

int_helper.c (92353B)


      1/*
      2 *  PowerPC integer and vector emulation helpers for QEMU.
      3 *
      4 *  Copyright (c) 2003-2007 Jocelyn Mayer
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "cpu.h"
     22#include "internal.h"
     23#include "qemu/host-utils.h"
     24#include "qemu/main-loop.h"
     25#include "qemu/log.h"
     26#include "exec/helper-proto.h"
     27#include "crypto/aes.h"
     28#include "fpu/softfloat.h"
     29#include "qapi/error.h"
     30#include "qemu/guest-random.h"
     31
     32#include "helper_regs.h"
     33/*****************************************************************************/
     34/* Fixed point operations helpers */
     35
     36static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
     37{
     38    if (unlikely(ov)) {
     39        env->so = env->ov = 1;
     40    } else {
     41        env->ov = 0;
     42    }
     43}
     44
     45target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
     46                           uint32_t oe)
     47{
     48    uint64_t rt = 0;
     49    int overflow = 0;
     50
     51    uint64_t dividend = (uint64_t)ra << 32;
     52    uint64_t divisor = (uint32_t)rb;
     53
     54    if (unlikely(divisor == 0)) {
     55        overflow = 1;
     56    } else {
     57        rt = dividend / divisor;
     58        overflow = rt > UINT32_MAX;
     59    }
     60
     61    if (unlikely(overflow)) {
     62        rt = 0; /* Undefined */
     63    }
     64
     65    if (oe) {
     66        helper_update_ov_legacy(env, overflow);
     67    }
     68
     69    return (target_ulong)rt;
     70}
     71
     72target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
     73                          uint32_t oe)
     74{
     75    int64_t rt = 0;
     76    int overflow = 0;
     77
     78    int64_t dividend = (int64_t)ra << 32;
     79    int64_t divisor = (int64_t)((int32_t)rb);
     80
     81    if (unlikely((divisor == 0) ||
     82                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
     83        overflow = 1;
     84    } else {
     85        rt = dividend / divisor;
     86        overflow = rt != (int32_t)rt;
     87    }
     88
     89    if (unlikely(overflow)) {
     90        rt = 0; /* Undefined */
     91    }
     92
     93    if (oe) {
     94        helper_update_ov_legacy(env, overflow);
     95    }
     96
     97    return (target_ulong)rt;
     98}
     99
    100#if defined(TARGET_PPC64)
    101
    102uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
    103{
    104    uint64_t rt = 0;
    105    int overflow = 0;
    106
    107    overflow = divu128(&rt, &ra, rb);
    108
    109    if (unlikely(overflow)) {
    110        rt = 0; /* Undefined */
    111    }
    112
    113    if (oe) {
    114        helper_update_ov_legacy(env, overflow);
    115    }
    116
    117    return rt;
    118}
    119
    120uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
    121{
    122    int64_t rt = 0;
    123    int64_t ra = (int64_t)rau;
    124    int64_t rb = (int64_t)rbu;
    125    int overflow = divs128(&rt, &ra, rb);
    126
    127    if (unlikely(overflow)) {
    128        rt = 0; /* Undefined */
    129    }
    130
    131    if (oe) {
    132        helper_update_ov_legacy(env, overflow);
    133    }
    134
    135    return rt;
    136}
    137
    138#endif
    139
    140
    141#if defined(TARGET_PPC64)
    142/* if x = 0xab, returns 0xababababababababa */
    143#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
    144
    145/*
    146 * subtract 1 from each byte, and with inverse, check if MSB is set at each
    147 * byte.
    148 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
    149 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
    150 */
    151#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
    152
    153/* When you XOR the pattern and there is a match, that byte will be zero */
    154#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
    155
    156uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
    157{
    158    return hasvalue(rb, ra) ? CRF_GT : 0;
    159}
    160
    161#undef pattern
    162#undef haszero
    163#undef hasvalue
    164
    165/*
    166 * Return a random number.
    167 */
    168uint64_t helper_darn32(void)
    169{
    170    Error *err = NULL;
    171    uint32_t ret;
    172
    173    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
    174        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
    175                      error_get_pretty(err));
    176        error_free(err);
    177        return -1;
    178    }
    179
    180    return ret;
    181}
    182
    183uint64_t helper_darn64(void)
    184{
    185    Error *err = NULL;
    186    uint64_t ret;
    187
    188    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
    189        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
    190                      error_get_pretty(err));
    191        error_free(err);
    192        return -1;
    193    }
    194
    195    return ret;
    196}
    197
    198uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
    199{
    200    int i;
    201    uint64_t ra = 0;
    202
    203    for (i = 0; i < 8; i++) {
    204        int index = (rs >> (i * 8)) & 0xFF;
    205        if (index < 64) {
    206            if (rb & PPC_BIT(index)) {
    207                ra |= 1 << i;
    208            }
    209        }
    210    }
    211    return ra;
    212}
    213
    214#endif
    215
    216target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
    217{
    218    target_ulong mask = 0xff;
    219    target_ulong ra = 0;
    220    int i;
    221
    222    for (i = 0; i < sizeof(target_ulong); i++) {
    223        if ((rs & mask) == (rb & mask)) {
    224            ra |= mask;
    225        }
    226        mask <<= 8;
    227    }
    228    return ra;
    229}
    230
    231/* shift right arithmetic helper */
    232target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
    233                         target_ulong shift)
    234{
    235    int32_t ret;
    236
    237    if (likely(!(shift & 0x20))) {
    238        if (likely((uint32_t)shift != 0)) {
    239            shift &= 0x1f;
    240            ret = (int32_t)value >> shift;
    241            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
    242                env->ca32 = env->ca = 0;
    243            } else {
    244                env->ca32 = env->ca = 1;
    245            }
    246        } else {
    247            ret = (int32_t)value;
    248            env->ca32 = env->ca = 0;
    249        }
    250    } else {
    251        ret = (int32_t)value >> 31;
    252        env->ca32 = env->ca = (ret != 0);
    253    }
    254    return (target_long)ret;
    255}
    256
    257#if defined(TARGET_PPC64)
    258target_ulong helper_srad(CPUPPCState *env, target_ulong value,
    259                         target_ulong shift)
    260{
    261    int64_t ret;
    262
    263    if (likely(!(shift & 0x40))) {
    264        if (likely((uint64_t)shift != 0)) {
    265            shift &= 0x3f;
    266            ret = (int64_t)value >> shift;
    267            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
    268                env->ca32 = env->ca = 0;
    269            } else {
    270                env->ca32 = env->ca = 1;
    271            }
    272        } else {
    273            ret = (int64_t)value;
    274            env->ca32 = env->ca = 0;
    275        }
    276    } else {
    277        ret = (int64_t)value >> 63;
    278        env->ca32 = env->ca = (ret != 0);
    279    }
    280    return ret;
    281}
    282#endif
    283
    284#if defined(TARGET_PPC64)
    285target_ulong helper_popcntb(target_ulong val)
    286{
    287    /* Note that we don't fold past bytes */
    288    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
    289                                           0x5555555555555555ULL);
    290    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
    291                                           0x3333333333333333ULL);
    292    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
    293                                           0x0f0f0f0f0f0f0f0fULL);
    294    return val;
    295}
    296
    297target_ulong helper_popcntw(target_ulong val)
    298{
    299    /* Note that we don't fold past words.  */
    300    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
    301                                           0x5555555555555555ULL);
    302    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
    303                                           0x3333333333333333ULL);
    304    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
    305                                           0x0f0f0f0f0f0f0f0fULL);
    306    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
    307                                           0x00ff00ff00ff00ffULL);
    308    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
    309                                           0x0000ffff0000ffffULL);
    310    return val;
    311}
    312#else
    313target_ulong helper_popcntb(target_ulong val)
    314{
    315    /* Note that we don't fold past bytes */
    316    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
    317    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
    318    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
    319    return val;
    320}
    321#endif
    322
    323uint64_t helper_cfuged(uint64_t src, uint64_t mask)
    324{
    325    /*
    326     * Instead of processing the mask bit-by-bit from the most significant to
    327     * the least significant bit, as described in PowerISA, we'll handle it in
    328     * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
    329     * ctz or cto, we negate the mask at the end of the loop.
    330     */
    331    target_ulong m, left = 0, right = 0;
    332    unsigned int n, i = 64;
    333    bool bit = false; /* tracks if we are processing zeros or ones */
    334
    335    if (mask == 0 || mask == -1) {
    336        return src;
    337    }
    338
    339    /* Processes the mask in blocks, from LSB to MSB */
    340    while (i) {
    341        /* Find how many bits we should take */
    342        n = ctz64(mask);
    343        if (n > i) {
    344            n = i;
    345        }
    346
    347        /*
    348         * Extracts 'n' trailing bits of src and put them on the leading 'n'
    349         * bits of 'right' or 'left', pushing down the previously extracted
    350         * values.
    351         */
    352        m = (1ll << n) - 1;
    353        if (bit) {
    354            right = ror64(right | (src & m), n);
    355        } else {
    356            left = ror64(left | (src & m), n);
    357        }
    358
    359        /*
    360         * Discards the processed bits from 'src' and 'mask'. Note that we are
    361         * removing 'n' trailing zeros from 'mask', but the logical shift will
    362         * add 'n' leading zeros back, so the population count of 'mask' is kept
    363         * the same.
    364         */
    365        src >>= n;
    366        mask >>= n;
    367        i -= n;
    368        bit = !bit;
    369        mask = ~mask;
    370    }
    371
    372    /*
    373     * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
    374     * we'll shift it more 64-ctpop(mask) times.
    375     */
    376    if (bit) {
    377        n = ctpop64(mask);
    378    } else {
    379        n = 64 - ctpop64(mask);
    380    }
    381
    382    return left | (right >> n);
    383}
    384
    385/*****************************************************************************/
    386/* PowerPC 601 specific instructions (POWER bridge) */
    387target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
    388{
    389    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
    390
    391    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
    392        (int32_t)arg2 == 0) {
    393        env->spr[SPR_MQ] = 0;
    394        return INT32_MIN;
    395    } else {
    396        env->spr[SPR_MQ] = tmp % arg2;
    397        return  tmp / (int32_t)arg2;
    398    }
    399}
    400
    401target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
    402                         target_ulong arg2)
    403{
    404    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
    405
    406    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
    407        (int32_t)arg2 == 0) {
    408        env->so = env->ov = 1;
    409        env->spr[SPR_MQ] = 0;
    410        return INT32_MIN;
    411    } else {
    412        env->spr[SPR_MQ] = tmp % arg2;
    413        tmp /= (int32_t)arg2;
    414        if ((int32_t)tmp != tmp) {
    415            env->so = env->ov = 1;
    416        } else {
    417            env->ov = 0;
    418        }
    419        return tmp;
    420    }
    421}
    422
    423target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
    424                         target_ulong arg2)
    425{
    426    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
    427        (int32_t)arg2 == 0) {
    428        env->spr[SPR_MQ] = 0;
    429        return INT32_MIN;
    430    } else {
    431        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
    432        return (int32_t)arg1 / (int32_t)arg2;
    433    }
    434}
    435
    436target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
    437                          target_ulong arg2)
    438{
    439    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
    440        (int32_t)arg2 == 0) {
    441        env->so = env->ov = 1;
    442        env->spr[SPR_MQ] = 0;
    443        return INT32_MIN;
    444    } else {
    445        env->ov = 0;
    446        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
    447        return (int32_t)arg1 / (int32_t)arg2;
    448    }
    449}
    450
    451/*****************************************************************************/
    452/* 602 specific instructions */
    453/* mfrom is the most crazy instruction ever seen, imho ! */
    454/* Real implementation uses a ROM table. Do the same */
    455/*
    456 * Extremely decomposed:
    457 *                      -arg / 256
    458 * return 256 * log10(10           + 1.0) + 0.5
    459 */
    460#if !defined(CONFIG_USER_ONLY)
    461target_ulong helper_602_mfrom(target_ulong arg)
    462{
    463    if (likely(arg < 602)) {
    464#include "mfrom_table.c.inc"
    465        return mfrom_ROM_table[arg];
    466    } else {
    467        return 0;
    468    }
    469}
    470#endif
    471
    472/*****************************************************************************/
    473/* Altivec extension helpers */
    474#if defined(HOST_WORDS_BIGENDIAN)
    475#define VECTOR_FOR_INORDER_I(index, element)                    \
    476    for (index = 0; index < ARRAY_SIZE(r->element); index++)
    477#else
    478#define VECTOR_FOR_INORDER_I(index, element)                    \
    479    for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
    480#endif
    481
    482/* Saturating arithmetic helpers.  */
    483#define SATCVT(from, to, from_type, to_type, min, max)          \
    484    static inline to_type cvt##from##to(from_type x, int *sat)  \
    485    {                                                           \
    486        to_type r;                                              \
    487                                                                \
    488        if (x < (from_type)min) {                               \
    489            r = min;                                            \
    490            *sat = 1;                                           \
    491        } else if (x > (from_type)max) {                        \
    492            r = max;                                            \
    493            *sat = 1;                                           \
    494        } else {                                                \
    495            r = x;                                              \
    496        }                                                       \
    497        return r;                                               \
    498    }
    499#define SATCVTU(from, to, from_type, to_type, min, max)         \
    500    static inline to_type cvt##from##to(from_type x, int *sat)  \
    501    {                                                           \
    502        to_type r;                                              \
    503                                                                \
    504        if (x > (from_type)max) {                               \
    505            r = max;                                            \
    506            *sat = 1;                                           \
    507        } else {                                                \
    508            r = x;                                              \
    509        }                                                       \
    510        return r;                                               \
    511    }
    512SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
    513SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
    514SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
    515
    516SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
    517SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
    518SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
    519SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
    520SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
    521SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
    522#undef SATCVT
    523#undef SATCVTU
    524
    525void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
    526{
    527    ppc_store_vscr(env, vscr);
    528}
    529
    530uint32_t helper_mfvscr(CPUPPCState *env)
    531{
    532    return ppc_get_vscr(env);
    533}
    534
    535static inline void set_vscr_sat(CPUPPCState *env)
    536{
    537    /* The choice of non-zero value is arbitrary.  */
    538    env->vscr_sat.u32[0] = 1;
    539}
    540
    541void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
    542{
    543    int i;
    544
    545    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
    546        r->u32[i] = ~a->u32[i] < b->u32[i];
    547    }
    548}
    549
    550/* vprtybw */
    551void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
    552{
    553    int i;
    554    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
    555        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
    556        res ^= res >> 8;
    557        r->u32[i] = res & 1;
    558    }
    559}
    560
    561/* vprtybd */
    562void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
    563{
    564    int i;
    565    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
    566        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
    567        res ^= res >> 16;
    568        res ^= res >> 8;
    569        r->u64[i] = res & 1;
    570    }
    571}
    572
    573/* vprtybq */
    574void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
    575{
    576    uint64_t res = b->u64[0] ^ b->u64[1];
    577    res ^= res >> 32;
    578    res ^= res >> 16;
    579    res ^= res >> 8;
    580    r->VsrD(1) = res & 1;
    581    r->VsrD(0) = 0;
    582}
    583
    584#define VARITHFP(suffix, func)                                          \
    585    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
    586                          ppc_avr_t *b)                                 \
    587    {                                                                   \
    588        int i;                                                          \
    589                                                                        \
    590        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    591            r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status);   \
    592        }                                                               \
    593    }
    594VARITHFP(addfp, float32_add)
    595VARITHFP(subfp, float32_sub)
    596VARITHFP(minfp, float32_min)
    597VARITHFP(maxfp, float32_max)
    598#undef VARITHFP
    599
    600#define VARITHFPFMA(suffix, type)                                       \
    601    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
    602                           ppc_avr_t *b, ppc_avr_t *c)                  \
    603    {                                                                   \
    604        int i;                                                          \
    605        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    606            r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
    607                                       type, &env->vec_status);         \
    608        }                                                               \
    609    }
    610VARITHFPFMA(maddfp, 0);
    611VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
    612#undef VARITHFPFMA
    613
    614#define VARITHSAT_CASE(type, op, cvt, element)                          \
    615    {                                                                   \
    616        type result = (type)a->element[i] op (type)b->element[i];       \
    617        r->element[i] = cvt(result, &sat);                              \
    618    }
    619
    620#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
    621    void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
    622                        ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
    623    {                                                                   \
    624        int sat = 0;                                                    \
    625        int i;                                                          \
    626                                                                        \
    627        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
    628            VARITHSAT_CASE(optype, op, cvt, element);                   \
    629        }                                                               \
    630        if (sat) {                                                      \
    631            vscr_sat->u32[0] = 1;                                       \
    632        }                                                               \
    633    }
    634#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
    635    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
    636    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
    637#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
    638    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
    639    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
    640VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
    641VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
    642VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
    643VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
    644VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
    645VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
    646#undef VARITHSAT_CASE
    647#undef VARITHSAT_DO
    648#undef VARITHSAT_SIGNED
    649#undef VARITHSAT_UNSIGNED
    650
    651#define VAVG_DO(name, element, etype)                                   \
    652    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
    653    {                                                                   \
    654        int i;                                                          \
    655                                                                        \
    656        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
    657            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
    658            r->element[i] = x >> 1;                                     \
    659        }                                                               \
    660    }
    661
    662#define VAVG(type, signed_element, signed_type, unsigned_element,       \
    663             unsigned_type)                                             \
    664    VAVG_DO(avgs##type, signed_element, signed_type)                    \
    665    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
    666VAVG(b, s8, int16_t, u8, uint16_t)
    667VAVG(h, s16, int32_t, u16, uint32_t)
    668VAVG(w, s32, int64_t, u32, uint64_t)
    669#undef VAVG_DO
    670#undef VAVG
    671
    672#define VABSDU_DO(name, element)                                        \
    673void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
    674{                                                                       \
    675    int i;                                                              \
    676                                                                        \
    677    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
    678        r->element[i] = (a->element[i] > b->element[i]) ?               \
    679            (a->element[i] - b->element[i]) :                           \
    680            (b->element[i] - a->element[i]);                            \
    681    }                                                                   \
    682}
    683
    684/*
    685 * VABSDU - Vector absolute difference unsigned
    686 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
    687 *   element - element type to access from vector
    688 */
    689#define VABSDU(type, element)                   \
    690    VABSDU_DO(absdu##type, element)
    691VABSDU(b, u8)
    692VABSDU(h, u16)
    693VABSDU(w, u32)
    694#undef VABSDU_DO
    695#undef VABSDU
    696
    697#define VCF(suffix, cvt, element)                                       \
    698    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
    699                            ppc_avr_t *b, uint32_t uim)                 \
    700    {                                                                   \
    701        int i;                                                          \
    702                                                                        \
    703        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    704            float32 t = cvt(b->element[i], &env->vec_status);           \
    705            r->f32[i] = float32_scalbn(t, -uim, &env->vec_status);      \
    706        }                                                               \
    707    }
    708VCF(ux, uint32_to_float32, u32)
    709VCF(sx, int32_to_float32, s32)
    710#undef VCF
    711
    712#define VCMP_DO(suffix, compare, element, record)                       \
    713    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
    714                             ppc_avr_t *a, ppc_avr_t *b)                \
    715    {                                                                   \
    716        uint64_t ones = (uint64_t)-1;                                   \
    717        uint64_t all = ones;                                            \
    718        uint64_t none = 0;                                              \
    719        int i;                                                          \
    720                                                                        \
    721        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
    722            uint64_t result = (a->element[i] compare b->element[i] ?    \
    723                               ones : 0x0);                             \
    724            switch (sizeof(a->element[0])) {                            \
    725            case 8:                                                     \
    726                r->u64[i] = result;                                     \
    727                break;                                                  \
    728            case 4:                                                     \
    729                r->u32[i] = result;                                     \
    730                break;                                                  \
    731            case 2:                                                     \
    732                r->u16[i] = result;                                     \
    733                break;                                                  \
    734            case 1:                                                     \
    735                r->u8[i] = result;                                      \
    736                break;                                                  \
    737            }                                                           \
    738            all &= result;                                              \
    739            none |= result;                                             \
    740        }                                                               \
    741        if (record) {                                                   \
    742            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
    743        }                                                               \
    744    }
    745#define VCMP(suffix, compare, element)          \
    746    VCMP_DO(suffix, compare, element, 0)        \
    747    VCMP_DO(suffix##_dot, compare, element, 1)
    748VCMP(equb, ==, u8)
    749VCMP(equh, ==, u16)
    750VCMP(equw, ==, u32)
    751VCMP(equd, ==, u64)
    752VCMP(gtub, >, u8)
    753VCMP(gtuh, >, u16)
    754VCMP(gtuw, >, u32)
    755VCMP(gtud, >, u64)
    756VCMP(gtsb, >, s8)
    757VCMP(gtsh, >, s16)
    758VCMP(gtsw, >, s32)
    759VCMP(gtsd, >, s64)
    760#undef VCMP_DO
    761#undef VCMP
    762
    763#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
    764void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
    765                            ppc_avr_t *a, ppc_avr_t *b)                 \
    766{                                                                       \
    767    etype ones = (etype)-1;                                             \
    768    etype all = ones;                                                   \
    769    etype result, none = 0;                                             \
    770    int i;                                                              \
    771                                                                        \
    772    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
    773        if (cmpzero) {                                                  \
    774            result = ((a->element[i] == 0)                              \
    775                           || (b->element[i] == 0)                      \
    776                           || (a->element[i] != b->element[i]) ?        \
    777                           ones : 0x0);                                 \
    778        } else {                                                        \
    779            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
    780        }                                                               \
    781        r->element[i] = result;                                         \
    782        all &= result;                                                  \
    783        none |= result;                                                 \
    784    }                                                                   \
    785    if (record) {                                                       \
    786        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
    787    }                                                                   \
    788}
    789
    790/*
    791 * VCMPNEZ - Vector compare not equal to zero
    792 *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
    793 *   element - element type to access from vector
    794 */
    795#define VCMPNE(suffix, element, etype, cmpzero)         \
    796    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
    797    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
    798VCMPNE(zb, u8, uint8_t, 1)
    799VCMPNE(zh, u16, uint16_t, 1)
    800VCMPNE(zw, u32, uint32_t, 1)
    801VCMPNE(b, u8, uint8_t, 0)
    802VCMPNE(h, u16, uint16_t, 0)
    803VCMPNE(w, u32, uint32_t, 0)
    804#undef VCMPNE_DO
    805#undef VCMPNE
    806
    807#define VCMPFP_DO(suffix, compare, order, record)                       \
    808    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
    809                             ppc_avr_t *a, ppc_avr_t *b)                \
    810    {                                                                   \
    811        uint32_t ones = (uint32_t)-1;                                   \
    812        uint32_t all = ones;                                            \
    813        uint32_t none = 0;                                              \
    814        int i;                                                          \
    815                                                                        \
    816        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    817            uint32_t result;                                            \
    818            FloatRelation rel =                                         \
    819                float32_compare_quiet(a->f32[i], b->f32[i],             \
    820                                      &env->vec_status);                \
    821            if (rel == float_relation_unordered) {                      \
    822                result = 0;                                             \
    823            } else if (rel compare order) {                             \
    824                result = ones;                                          \
    825            } else {                                                    \
    826                result = 0;                                             \
    827            }                                                           \
    828            r->u32[i] = result;                                         \
    829            all &= result;                                              \
    830            none |= result;                                             \
    831        }                                                               \
    832        if (record) {                                                   \
    833            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
    834        }                                                               \
    835    }
    836#define VCMPFP(suffix, compare, order)          \
    837    VCMPFP_DO(suffix, compare, order, 0)        \
    838    VCMPFP_DO(suffix##_dot, compare, order, 1)
    839VCMPFP(eqfp, ==, float_relation_equal)
    840VCMPFP(gefp, !=, float_relation_less)
    841VCMPFP(gtfp, ==, float_relation_greater)
    842#undef VCMPFP_DO
    843#undef VCMPFP
    844
    845static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
    846                                    ppc_avr_t *a, ppc_avr_t *b, int record)
    847{
    848    int i;
    849    int all_in = 0;
    850
    851    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
    852        FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
    853                                                     &env->vec_status);
    854        if (le_rel == float_relation_unordered) {
    855            r->u32[i] = 0xc0000000;
    856            all_in = 1;
    857        } else {
    858            float32 bneg = float32_chs(b->f32[i]);
    859            FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
    860                                                         &env->vec_status);
    861            int le = le_rel != float_relation_greater;
    862            int ge = ge_rel != float_relation_less;
    863
    864            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
    865            all_in |= (!le | !ge);
    866        }
    867    }
    868    if (record) {
    869        env->crf[6] = (all_in == 0) << 1;
    870    }
    871}
    872
    873void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
    874{
    875    vcmpbfp_internal(env, r, a, b, 0);
    876}
    877
    878void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
    879                        ppc_avr_t *b)
    880{
    881    vcmpbfp_internal(env, r, a, b, 1);
    882}
    883
    884#define VCT(suffix, satcvt, element)                                    \
    885    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
    886                            ppc_avr_t *b, uint32_t uim)                 \
    887    {                                                                   \
    888        int i;                                                          \
    889        int sat = 0;                                                    \
    890        float_status s = env->vec_status;                               \
    891                                                                        \
    892        set_float_rounding_mode(float_round_to_zero, &s);               \
    893        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    894            if (float32_is_any_nan(b->f32[i])) {                        \
    895                r->element[i] = 0;                                      \
    896            } else {                                                    \
    897                float64 t = float32_to_float64(b->f32[i], &s);          \
    898                int64_t j;                                              \
    899                                                                        \
    900                t = float64_scalbn(t, uim, &s);                         \
    901                j = float64_to_int64(t, &s);                            \
    902                r->element[i] = satcvt(j, &sat);                        \
    903            }                                                           \
    904        }                                                               \
    905        if (sat) {                                                      \
    906            set_vscr_sat(env);                                          \
    907        }                                                               \
    908    }
    909VCT(uxs, cvtsduw, u32)
    910VCT(sxs, cvtsdsw, s32)
    911#undef VCT
    912
    913target_ulong helper_vclzlsbb(ppc_avr_t *r)
    914{
    915    target_ulong count = 0;
    916    int i;
    917    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
    918        if (r->VsrB(i) & 0x01) {
    919            break;
    920        }
    921        count++;
    922    }
    923    return count;
    924}
    925
    926target_ulong helper_vctzlsbb(ppc_avr_t *r)
    927{
    928    target_ulong count = 0;
    929    int i;
    930    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
    931        if (r->VsrB(i) & 0x01) {
    932            break;
    933        }
    934        count++;
    935    }
    936    return count;
    937}
    938
    939void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
    940                      ppc_avr_t *b, ppc_avr_t *c)
    941{
    942    int sat = 0;
    943    int i;
    944
    945    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
    946        int32_t prod = a->s16[i] * b->s16[i];
    947        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
    948
    949        r->s16[i] = cvtswsh(t, &sat);
    950    }
    951
    952    if (sat) {
    953        set_vscr_sat(env);
    954    }
    955}
    956
    957void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
    958                       ppc_avr_t *b, ppc_avr_t *c)
    959{
    960    int sat = 0;
    961    int i;
    962
    963    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
    964        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
    965        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
    966        r->s16[i] = cvtswsh(t, &sat);
    967    }
    968
    969    if (sat) {
    970        set_vscr_sat(env);
    971    }
    972}
    973
    974void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
    975{
    976    int i;
    977
    978    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
    979        int32_t prod = a->s16[i] * b->s16[i];
    980        r->s16[i] = (int16_t) (prod + c->s16[i]);
    981    }
    982}
    983
    984#define VMRG_DO(name, element, access, ofs)                                  \
    985    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)            \
    986    {                                                                        \
    987        ppc_avr_t result;                                                    \
    988        int i, half = ARRAY_SIZE(r->element) / 2;                            \
    989                                                                             \
    990        for (i = 0; i < half; i++) {                                         \
    991            result.access(i * 2 + 0) = a->access(i + ofs);                   \
    992            result.access(i * 2 + 1) = b->access(i + ofs);                   \
    993        }                                                                    \
    994        *r = result;                                                         \
    995    }
    996
    997#define VMRG(suffix, element, access)          \
    998    VMRG_DO(mrgl##suffix, element, access, half)   \
    999    VMRG_DO(mrgh##suffix, element, access, 0)
   1000VMRG(b, u8, VsrB)
   1001VMRG(h, u16, VsrH)
   1002VMRG(w, u32, VsrW)
   1003#undef VMRG_DO
   1004#undef VMRG
   1005
   1006void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1007                     ppc_avr_t *b, ppc_avr_t *c)
   1008{
   1009    int32_t prod[16];
   1010    int i;
   1011
   1012    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
   1013        prod[i] = (int32_t)a->s8[i] * b->u8[i];
   1014    }
   1015
   1016    VECTOR_FOR_INORDER_I(i, s32) {
   1017        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
   1018            prod[4 * i + 2] + prod[4 * i + 3];
   1019    }
   1020}
   1021
   1022void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1023                     ppc_avr_t *b, ppc_avr_t *c)
   1024{
   1025    int32_t prod[8];
   1026    int i;
   1027
   1028    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
   1029        prod[i] = a->s16[i] * b->s16[i];
   1030    }
   1031
   1032    VECTOR_FOR_INORDER_I(i, s32) {
   1033        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
   1034    }
   1035}
   1036
   1037void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1038                     ppc_avr_t *b, ppc_avr_t *c)
   1039{
   1040    int32_t prod[8];
   1041    int i;
   1042    int sat = 0;
   1043
   1044    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
   1045        prod[i] = (int32_t)a->s16[i] * b->s16[i];
   1046    }
   1047
   1048    VECTOR_FOR_INORDER_I(i, s32) {
   1049        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
   1050
   1051        r->u32[i] = cvtsdsw(t, &sat);
   1052    }
   1053
   1054    if (sat) {
   1055        set_vscr_sat(env);
   1056    }
   1057}
   1058
   1059void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1060                     ppc_avr_t *b, ppc_avr_t *c)
   1061{
   1062    uint16_t prod[16];
   1063    int i;
   1064
   1065    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1066        prod[i] = a->u8[i] * b->u8[i];
   1067    }
   1068
   1069    VECTOR_FOR_INORDER_I(i, u32) {
   1070        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
   1071            prod[4 * i + 2] + prod[4 * i + 3];
   1072    }
   1073}
   1074
   1075void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1076                     ppc_avr_t *b, ppc_avr_t *c)
   1077{
   1078    uint32_t prod[8];
   1079    int i;
   1080
   1081    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
   1082        prod[i] = a->u16[i] * b->u16[i];
   1083    }
   1084
   1085    VECTOR_FOR_INORDER_I(i, u32) {
   1086        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
   1087    }
   1088}
   1089
   1090void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1091                     ppc_avr_t *b, ppc_avr_t *c)
   1092{
   1093    uint32_t prod[8];
   1094    int i;
   1095    int sat = 0;
   1096
   1097    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
   1098        prod[i] = a->u16[i] * b->u16[i];
   1099    }
   1100
   1101    VECTOR_FOR_INORDER_I(i, s32) {
   1102        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
   1103
   1104        r->u32[i] = cvtuduw(t, &sat);
   1105    }
   1106
   1107    if (sat) {
   1108        set_vscr_sat(env);
   1109    }
   1110}
   1111
   1112#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
   1113    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
   1114    {                                                                   \
   1115        int i;                                                          \
   1116                                                                        \
   1117        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
   1118            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
   1119                                     (cast)b->mul_access(i);            \
   1120        }                                                               \
   1121    }
   1122
   1123#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
   1124    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
   1125    {                                                                   \
   1126        int i;                                                          \
   1127                                                                        \
   1128        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
   1129            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
   1130                                     (cast)b->mul_access(i + 1);        \
   1131        }                                                               \
   1132    }
   1133
   1134#define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
   1135    VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast)  \
   1136    VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
   1137VMUL(sb, s8, VsrSB, VsrSH, int16_t)
   1138VMUL(sh, s16, VsrSH, VsrSW, int32_t)
   1139VMUL(sw, s32, VsrSW, VsrSD, int64_t)
   1140VMUL(ub, u8, VsrB, VsrH, uint16_t)
   1141VMUL(uh, u16, VsrH, VsrW, uint32_t)
   1142VMUL(uw, u32, VsrW, VsrD, uint64_t)
   1143#undef VMUL_DO_EVN
   1144#undef VMUL_DO_ODD
   1145#undef VMUL
   1146
   1147void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1148{
   1149    int i;
   1150
   1151    for (i = 0; i < 4; i++) {
   1152        r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
   1153    }
   1154}
   1155
   1156void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1157{
   1158    int i;
   1159
   1160    for (i = 0; i < 4; i++) {
   1161        r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] *
   1162                               (uint64_t)b->u32[i]) >> 32);
   1163    }
   1164}
   1165
   1166void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1167{
   1168    uint64_t discard;
   1169
   1170    muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]);
   1171    muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]);
   1172}
   1173
   1174void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1175{
   1176    uint64_t discard;
   1177
   1178    mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]);
   1179    mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]);
   1180}
   1181
   1182void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
   1183                  ppc_avr_t *c)
   1184{
   1185    ppc_avr_t result;
   1186    int i;
   1187
   1188    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1189        int s = c->VsrB(i) & 0x1f;
   1190        int index = s & 0xf;
   1191
   1192        if (s & 0x10) {
   1193            result.VsrB(i) = b->VsrB(index);
   1194        } else {
   1195            result.VsrB(i) = a->VsrB(index);
   1196        }
   1197    }
   1198    *r = result;
   1199}
   1200
   1201void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
   1202                  ppc_avr_t *c)
   1203{
   1204    ppc_avr_t result;
   1205    int i;
   1206
   1207    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1208        int s = c->VsrB(i) & 0x1f;
   1209        int index = 15 - (s & 0xf);
   1210
   1211        if (s & 0x10) {
   1212            result.VsrB(i) = a->VsrB(index);
   1213        } else {
   1214            result.VsrB(i) = b->VsrB(index);
   1215        }
   1216    }
   1217    *r = result;
   1218}
   1219
   1220#if defined(HOST_WORDS_BIGENDIAN)
   1221#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
   1222#define VBPERMD_INDEX(i) (i)
   1223#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
   1224#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
   1225#else
   1226#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
   1227#define VBPERMD_INDEX(i) (1 - i)
   1228#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
   1229#define EXTRACT_BIT(avr, i, index) \
   1230        (extract64((avr)->u64[1 - i], 63 - index, 1))
   1231#endif
   1232
   1233void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1234{
   1235    int i, j;
   1236    ppc_avr_t result = { .u64 = { 0, 0 } };
   1237    VECTOR_FOR_INORDER_I(i, u64) {
   1238        for (j = 0; j < 8; j++) {
   1239            int index = VBPERMQ_INDEX(b, (i * 8) + j);
   1240            if (index < 64 && EXTRACT_BIT(a, i, index)) {
   1241                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
   1242            }
   1243        }
   1244    }
   1245    *r = result;
   1246}
   1247
   1248void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1249{
   1250    int i;
   1251    uint64_t perm = 0;
   1252
   1253    VECTOR_FOR_INORDER_I(i, u8) {
   1254        int index = VBPERMQ_INDEX(b, i);
   1255
   1256        if (index < 128) {
   1257            uint64_t mask = (1ull << (63 - (index & 0x3F)));
   1258            if (a->u64[VBPERMQ_DW(index)] & mask) {
   1259                perm |= (0x8000 >> i);
   1260            }
   1261        }
   1262    }
   1263
   1264    r->VsrD(0) = perm;
   1265    r->VsrD(1) = 0;
   1266}
   1267
   1268#undef VBPERMQ_INDEX
   1269#undef VBPERMQ_DW
   1270
   1271#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
   1272void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
   1273{                                                             \
   1274    int i, j;                                                 \
   1275    trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
   1276                                                              \
   1277    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
   1278        prod[i] = 0;                                          \
   1279        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
   1280            if (a->srcfld[i] & (1ull << j)) {                 \
   1281                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
   1282            }                                                 \
   1283        }                                                     \
   1284    }                                                         \
   1285                                                              \
   1286    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
   1287        r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
   1288    }                                                         \
   1289}
   1290
   1291PMSUM(vpmsumb, u8, u16, uint16_t)
   1292PMSUM(vpmsumh, u16, u32, uint32_t)
   1293PMSUM(vpmsumw, u32, u64, uint64_t)
   1294
   1295void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1296{
   1297
   1298#ifdef CONFIG_INT128
   1299    int i, j;
   1300    __uint128_t prod[2];
   1301
   1302    VECTOR_FOR_INORDER_I(i, u64) {
   1303        prod[i] = 0;
   1304        for (j = 0; j < 64; j++) {
   1305            if (a->u64[i] & (1ull << j)) {
   1306                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
   1307            }
   1308        }
   1309    }
   1310
   1311    r->u128 = prod[0] ^ prod[1];
   1312
   1313#else
   1314    int i, j;
   1315    ppc_avr_t prod[2];
   1316
   1317    VECTOR_FOR_INORDER_I(i, u64) {
   1318        prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
   1319        for (j = 0; j < 64; j++) {
   1320            if (a->u64[i] & (1ull << j)) {
   1321                ppc_avr_t bshift;
   1322                if (j == 0) {
   1323                    bshift.VsrD(0) = 0;
   1324                    bshift.VsrD(1) = b->u64[i];
   1325                } else {
   1326                    bshift.VsrD(0) = b->u64[i] >> (64 - j);
   1327                    bshift.VsrD(1) = b->u64[i] << j;
   1328                }
   1329                prod[i].VsrD(1) ^= bshift.VsrD(1);
   1330                prod[i].VsrD(0) ^= bshift.VsrD(0);
   1331            }
   1332        }
   1333    }
   1334
   1335    r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
   1336    r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
   1337#endif
   1338}
   1339
   1340
   1341#if defined(HOST_WORDS_BIGENDIAN)
   1342#define PKBIG 1
   1343#else
   1344#define PKBIG 0
   1345#endif
   1346void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1347{
   1348    int i, j;
   1349    ppc_avr_t result;
   1350#if defined(HOST_WORDS_BIGENDIAN)
   1351    const ppc_avr_t *x[2] = { a, b };
   1352#else
   1353    const ppc_avr_t *x[2] = { b, a };
   1354#endif
   1355
   1356    VECTOR_FOR_INORDER_I(i, u64) {
   1357        VECTOR_FOR_INORDER_I(j, u32) {
   1358            uint32_t e = x[i]->u32[j];
   1359
   1360            result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
   1361                                     ((e >> 6) & 0x3e0) |
   1362                                     ((e >> 3) & 0x1f));
   1363        }
   1364    }
   1365    *r = result;
   1366}
   1367
   1368#define VPK(suffix, from, to, cvt, dosat)                               \
   1369    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
   1370                            ppc_avr_t *a, ppc_avr_t *b)                 \
   1371    {                                                                   \
   1372        int i;                                                          \
   1373        int sat = 0;                                                    \
   1374        ppc_avr_t result;                                               \
   1375        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
   1376        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
   1377                                                                        \
   1378        VECTOR_FOR_INORDER_I(i, from) {                                 \
   1379            result.to[i] = cvt(a0->from[i], &sat);                      \
   1380            result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
   1381        }                                                               \
   1382        *r = result;                                                    \
   1383        if (dosat && sat) {                                             \
   1384            set_vscr_sat(env);                                          \
   1385        }                                                               \
   1386    }
   1387#define I(x, y) (x)
   1388VPK(shss, s16, s8, cvtshsb, 1)
   1389VPK(shus, s16, u8, cvtshub, 1)
   1390VPK(swss, s32, s16, cvtswsh, 1)
   1391VPK(swus, s32, u16, cvtswuh, 1)
   1392VPK(sdss, s64, s32, cvtsdsw, 1)
   1393VPK(sdus, s64, u32, cvtsduw, 1)
   1394VPK(uhus, u16, u8, cvtuhub, 1)
   1395VPK(uwus, u32, u16, cvtuwuh, 1)
   1396VPK(udus, u64, u32, cvtuduw, 1)
   1397VPK(uhum, u16, u8, I, 0)
   1398VPK(uwum, u32, u16, I, 0)
   1399VPK(udum, u64, u32, I, 0)
   1400#undef I
   1401#undef VPK
   1402#undef PKBIG
   1403
   1404void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1405{
   1406    int i;
   1407
   1408    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1409        r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
   1410    }
   1411}
   1412
   1413#define VRFI(suffix, rounding)                                  \
   1414    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
   1415                             ppc_avr_t *b)                      \
   1416    {                                                           \
   1417        int i;                                                  \
   1418        float_status s = env->vec_status;                       \
   1419                                                                \
   1420        set_float_rounding_mode(rounding, &s);                  \
   1421        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {              \
   1422            r->f32[i] = float32_round_to_int (b->f32[i], &s);   \
   1423        }                                                       \
   1424    }
   1425VRFI(n, float_round_nearest_even)
   1426VRFI(m, float_round_down)
   1427VRFI(p, float_round_up)
   1428VRFI(z, float_round_to_zero)
   1429#undef VRFI
   1430
   1431void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1432{
   1433    int i;
   1434
   1435    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1436        float32 t = float32_sqrt(b->f32[i], &env->vec_status);
   1437
   1438        r->f32[i] = float32_div(float32_one, t, &env->vec_status);
   1439    }
   1440}
   1441
   1442#define VRLMI(name, size, element, insert)                            \
   1443void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
   1444{                                                                     \
   1445    int i;                                                            \
   1446    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
   1447        uint##size##_t src1 = a->element[i];                          \
   1448        uint##size##_t src2 = b->element[i];                          \
   1449        uint##size##_t src3 = r->element[i];                          \
   1450        uint##size##_t begin, end, shift, mask, rot_val;              \
   1451                                                                      \
   1452        shift = extract##size(src2, 0, 6);                            \
   1453        end   = extract##size(src2, 8, 6);                            \
   1454        begin = extract##size(src2, 16, 6);                           \
   1455        rot_val = rol##size(src1, shift);                             \
   1456        mask = mask_u##size(begin, end);                              \
   1457        if (insert) {                                                 \
   1458            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
   1459        } else {                                                      \
   1460            r->element[i] = (rot_val & mask);                         \
   1461        }                                                             \
   1462    }                                                                 \
   1463}
   1464
   1465VRLMI(vrldmi, 64, u64, 1);
   1466VRLMI(vrlwmi, 32, u32, 1);
   1467VRLMI(vrldnm, 64, u64, 0);
   1468VRLMI(vrlwnm, 32, u32, 0);
   1469
   1470void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
   1471                 ppc_avr_t *c)
   1472{
   1473    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
   1474    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
   1475}
   1476
   1477void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1478{
   1479    int i;
   1480
   1481    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1482        r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
   1483    }
   1484}
   1485
   1486void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1487{
   1488    int i;
   1489
   1490    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1491        r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
   1492    }
   1493}
   1494
   1495#define VEXTU_X_DO(name, size, left)                            \
   1496target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
   1497{                                                               \
   1498    int index = (a & 0xf) * 8;                                  \
   1499    if (left) {                                                 \
   1500        index = 128 - index - size;                             \
   1501    }                                                           \
   1502    return int128_getlo(int128_rshift(b->s128, index)) &        \
   1503        MAKE_64BIT_MASK(0, size);                               \
   1504}
   1505VEXTU_X_DO(vextublx,  8, 1)
   1506VEXTU_X_DO(vextuhlx, 16, 1)
   1507VEXTU_X_DO(vextuwlx, 32, 1)
   1508VEXTU_X_DO(vextubrx,  8, 0)
   1509VEXTU_X_DO(vextuhrx, 16, 0)
   1510VEXTU_X_DO(vextuwrx, 32, 0)
   1511#undef VEXTU_X_DO
   1512
   1513void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1514{
   1515    int i;
   1516    unsigned int shift, bytes, size;
   1517
   1518    size = ARRAY_SIZE(r->u8);
   1519    for (i = 0; i < size; i++) {
   1520        shift = b->VsrB(i) & 0x7;             /* extract shift value */
   1521        bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
   1522            (((i + 1) < size) ? a->VsrB(i + 1) : 0);
   1523        r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
   1524    }
   1525}
   1526
   1527void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1528{
   1529    int i;
   1530    unsigned int shift, bytes;
   1531
   1532    /*
   1533     * Use reverse order, as destination and source register can be
   1534     * same. Its being modified in place saving temporary, reverse
   1535     * order will guarantee that computed result is not fed back.
   1536     */
   1537    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
   1538        shift = b->VsrB(i) & 0x7;               /* extract shift value */
   1539        bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
   1540                                                /* extract adjacent bytes */
   1541        r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
   1542    }
   1543}
   1544
   1545void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
   1546{
   1547    int sh = shift & 0xf;
   1548    int i;
   1549    ppc_avr_t result;
   1550
   1551    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1552        int index = sh + i;
   1553        if (index > 0xf) {
   1554            result.VsrB(i) = b->VsrB(index - 0x10);
   1555        } else {
   1556            result.VsrB(i) = a->VsrB(index);
   1557        }
   1558    }
   1559    *r = result;
   1560}
   1561
   1562void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1563{
   1564    int sh = (b->VsrB(0xf) >> 3) & 0xf;
   1565
   1566#if defined(HOST_WORDS_BIGENDIAN)
   1567    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
   1568    memset(&r->u8[16 - sh], 0, sh);
   1569#else
   1570    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
   1571    memset(&r->u8[0], 0, sh);
   1572#endif
   1573}
   1574
   1575#if defined(HOST_WORDS_BIGENDIAN)
   1576#define VINSERT(suffix, element)                                            \
   1577    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
   1578    {                                                                       \
   1579        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])],           \
   1580               sizeof(r->element[0]));                                      \
   1581    }
   1582#else
   1583#define VINSERT(suffix, element)                                            \
   1584    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
   1585    {                                                                       \
   1586        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
   1587        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
   1588    }
   1589#endif
   1590VINSERT(b, u8)
   1591VINSERT(h, u16)
   1592VINSERT(w, u32)
   1593VINSERT(d, u64)
   1594#undef VINSERT
   1595#if defined(HOST_WORDS_BIGENDIAN)
   1596#define VEXTRACT(suffix, element)                                            \
   1597    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
   1598    {                                                                        \
   1599        uint32_t es = sizeof(r->element[0]);                                 \
   1600        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
   1601        memset(&r->u8[8], 0, 8);                                             \
   1602        memset(&r->u8[0], 0, 8 - es);                                        \
   1603    }
   1604#else
   1605#define VEXTRACT(suffix, element)                                            \
   1606    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
   1607    {                                                                        \
   1608        uint32_t es = sizeof(r->element[0]);                                 \
   1609        uint32_t s = (16 - index) - es;                                      \
   1610        memmove(&r->u8[8], &b->u8[s], es);                                   \
   1611        memset(&r->u8[0], 0, 8);                                             \
   1612        memset(&r->u8[8 + es], 0, 8 - es);                                   \
   1613    }
   1614#endif
   1615VEXTRACT(ub, u8)
   1616VEXTRACT(uh, u16)
   1617VEXTRACT(uw, u32)
   1618VEXTRACT(d, u64)
   1619#undef VEXTRACT
   1620
   1621void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
   1622                        ppc_vsr_t *xb, uint32_t index)
   1623{
   1624    ppc_vsr_t t = { };
   1625    size_t es = sizeof(uint32_t);
   1626    uint32_t ext_index;
   1627    int i;
   1628
   1629    ext_index = index;
   1630    for (i = 0; i < es; i++, ext_index++) {
   1631        t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
   1632    }
   1633
   1634    *xt = t;
   1635}
   1636
   1637void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
   1638                      ppc_vsr_t *xb, uint32_t index)
   1639{
   1640    ppc_vsr_t t = *xt;
   1641    size_t es = sizeof(uint32_t);
   1642    int ins_index, i = 0;
   1643
   1644    ins_index = index;
   1645    for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
   1646        t.VsrB(ins_index) = xb->VsrB(8 - es + i);
   1647    }
   1648
   1649    *xt = t;
   1650}
   1651
   1652#define VEXT_SIGNED(name, element, cast)                            \
   1653void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
   1654{                                                                   \
   1655    int i;                                                          \
   1656    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
   1657        r->element[i] = (cast)b->element[i];                        \
   1658    }                                                               \
   1659}
   1660VEXT_SIGNED(vextsb2w, s32, int8_t)
   1661VEXT_SIGNED(vextsb2d, s64, int8_t)
   1662VEXT_SIGNED(vextsh2w, s32, int16_t)
   1663VEXT_SIGNED(vextsh2d, s64, int16_t)
   1664VEXT_SIGNED(vextsw2d, s64, int32_t)
   1665#undef VEXT_SIGNED
   1666
   1667#define VNEG(name, element)                                         \
   1668void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
   1669{                                                                   \
   1670    int i;                                                          \
   1671    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
   1672        r->element[i] = -b->element[i];                             \
   1673    }                                                               \
   1674}
   1675VNEG(vnegw, s32)
   1676VNEG(vnegd, s64)
   1677#undef VNEG
   1678
   1679void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1680{
   1681    int sh = (b->VsrB(0xf) >> 3) & 0xf;
   1682
   1683#if defined(HOST_WORDS_BIGENDIAN)
   1684    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
   1685    memset(&r->u8[0], 0, sh);
   1686#else
   1687    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
   1688    memset(&r->u8[16 - sh], 0, sh);
   1689#endif
   1690}
   1691
   1692void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1693{
   1694    int i;
   1695
   1696    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
   1697        r->u32[i] = a->u32[i] >= b->u32[i];
   1698    }
   1699}
   1700
   1701void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1702{
   1703    int64_t t;
   1704    int i, upper;
   1705    ppc_avr_t result;
   1706    int sat = 0;
   1707
   1708    upper = ARRAY_SIZE(r->s32) - 1;
   1709    t = (int64_t)b->VsrSW(upper);
   1710    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
   1711        t += a->VsrSW(i);
   1712        result.VsrSW(i) = 0;
   1713    }
   1714    result.VsrSW(upper) = cvtsdsw(t, &sat);
   1715    *r = result;
   1716
   1717    if (sat) {
   1718        set_vscr_sat(env);
   1719    }
   1720}
   1721
   1722void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1723{
   1724    int i, j, upper;
   1725    ppc_avr_t result;
   1726    int sat = 0;
   1727
   1728    upper = 1;
   1729    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
   1730        int64_t t = (int64_t)b->VsrSW(upper + i * 2);
   1731
   1732        result.VsrD(i) = 0;
   1733        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
   1734            t += a->VsrSW(2 * i + j);
   1735        }
   1736        result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
   1737    }
   1738
   1739    *r = result;
   1740    if (sat) {
   1741        set_vscr_sat(env);
   1742    }
   1743}
   1744
   1745void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1746{
   1747    int i, j;
   1748    int sat = 0;
   1749
   1750    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
   1751        int64_t t = (int64_t)b->s32[i];
   1752
   1753        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
   1754            t += a->s8[4 * i + j];
   1755        }
   1756        r->s32[i] = cvtsdsw(t, &sat);
   1757    }
   1758
   1759    if (sat) {
   1760        set_vscr_sat(env);
   1761    }
   1762}
   1763
   1764void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1765{
   1766    int sat = 0;
   1767    int i;
   1768
   1769    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
   1770        int64_t t = (int64_t)b->s32[i];
   1771
   1772        t += a->s16[2 * i] + a->s16[2 * i + 1];
   1773        r->s32[i] = cvtsdsw(t, &sat);
   1774    }
   1775
   1776    if (sat) {
   1777        set_vscr_sat(env);
   1778    }
   1779}
   1780
   1781void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1782{
   1783    int i, j;
   1784    int sat = 0;
   1785
   1786    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
   1787        uint64_t t = (uint64_t)b->u32[i];
   1788
   1789        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
   1790            t += a->u8[4 * i + j];
   1791        }
   1792        r->u32[i] = cvtuduw(t, &sat);
   1793    }
   1794
   1795    if (sat) {
   1796        set_vscr_sat(env);
   1797    }
   1798}
   1799
   1800#if defined(HOST_WORDS_BIGENDIAN)
   1801#define UPKHI 1
   1802#define UPKLO 0
   1803#else
   1804#define UPKHI 0
   1805#define UPKLO 1
   1806#endif
   1807#define VUPKPX(suffix, hi)                                              \
   1808    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
   1809    {                                                                   \
   1810        int i;                                                          \
   1811        ppc_avr_t result;                                               \
   1812                                                                        \
   1813        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
   1814            uint16_t e = b->u16[hi ? i : i + 4];                        \
   1815            uint8_t a = (e >> 15) ? 0xff : 0;                           \
   1816            uint8_t r = (e >> 10) & 0x1f;                               \
   1817            uint8_t g = (e >> 5) & 0x1f;                                \
   1818            uint8_t b = e & 0x1f;                                       \
   1819                                                                        \
   1820            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
   1821        }                                                               \
   1822        *r = result;                                                    \
   1823    }
   1824VUPKPX(lpx, UPKLO)
   1825VUPKPX(hpx, UPKHI)
   1826#undef VUPKPX
   1827
   1828#define VUPK(suffix, unpacked, packee, hi)                              \
   1829    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
   1830    {                                                                   \
   1831        int i;                                                          \
   1832        ppc_avr_t result;                                               \
   1833                                                                        \
   1834        if (hi) {                                                       \
   1835            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
   1836                result.unpacked[i] = b->packee[i];                      \
   1837            }                                                           \
   1838        } else {                                                        \
   1839            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
   1840                 i++) {                                                 \
   1841                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
   1842            }                                                           \
   1843        }                                                               \
   1844        *r = result;                                                    \
   1845    }
   1846VUPK(hsb, s16, s8, UPKHI)
   1847VUPK(hsh, s32, s16, UPKHI)
   1848VUPK(hsw, s64, s32, UPKHI)
   1849VUPK(lsb, s16, s8, UPKLO)
   1850VUPK(lsh, s32, s16, UPKLO)
   1851VUPK(lsw, s64, s32, UPKLO)
   1852#undef VUPK
   1853#undef UPKHI
   1854#undef UPKLO
   1855
   1856#define VGENERIC_DO(name, element)                                      \
   1857    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
   1858    {                                                                   \
   1859        int i;                                                          \
   1860                                                                        \
   1861        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
   1862            r->element[i] = name(b->element[i]);                        \
   1863        }                                                               \
   1864    }
   1865
   1866#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
   1867#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
   1868
   1869VGENERIC_DO(clzb, u8)
   1870VGENERIC_DO(clzh, u16)
   1871
   1872#undef clzb
   1873#undef clzh
   1874
   1875#define ctzb(v) ((v) ? ctz32(v) : 8)
   1876#define ctzh(v) ((v) ? ctz32(v) : 16)
   1877#define ctzw(v) ctz32((v))
   1878#define ctzd(v) ctz64((v))
   1879
   1880VGENERIC_DO(ctzb, u8)
   1881VGENERIC_DO(ctzh, u16)
   1882VGENERIC_DO(ctzw, u32)
   1883VGENERIC_DO(ctzd, u64)
   1884
   1885#undef ctzb
   1886#undef ctzh
   1887#undef ctzw
   1888#undef ctzd
   1889
   1890#define popcntb(v) ctpop8(v)
   1891#define popcnth(v) ctpop16(v)
   1892#define popcntw(v) ctpop32(v)
   1893#define popcntd(v) ctpop64(v)
   1894
   1895VGENERIC_DO(popcntb, u8)
   1896VGENERIC_DO(popcnth, u16)
   1897VGENERIC_DO(popcntw, u32)
   1898VGENERIC_DO(popcntd, u64)
   1899
   1900#undef popcntb
   1901#undef popcnth
   1902#undef popcntw
   1903#undef popcntd
   1904
   1905#undef VGENERIC_DO
   1906
   1907#if defined(HOST_WORDS_BIGENDIAN)
   1908#define QW_ONE { .u64 = { 0, 1 } }
   1909#else
   1910#define QW_ONE { .u64 = { 1, 0 } }
   1911#endif
   1912
   1913#ifndef CONFIG_INT128
   1914
   1915static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
   1916{
   1917    t->u64[0] = ~a.u64[0];
   1918    t->u64[1] = ~a.u64[1];
   1919}
   1920
   1921static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
   1922{
   1923    if (a.VsrD(0) < b.VsrD(0)) {
   1924        return -1;
   1925    } else if (a.VsrD(0) > b.VsrD(0)) {
   1926        return 1;
   1927    } else if (a.VsrD(1) < b.VsrD(1)) {
   1928        return -1;
   1929    } else if (a.VsrD(1) > b.VsrD(1)) {
   1930        return 1;
   1931    } else {
   1932        return 0;
   1933    }
   1934}
   1935
   1936static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
   1937{
   1938    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
   1939    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
   1940                     (~a.VsrD(1) < b.VsrD(1));
   1941}
   1942
   1943static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
   1944{
   1945    ppc_avr_t not_a;
   1946    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
   1947    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
   1948                     (~a.VsrD(1) < b.VsrD(1));
   1949    avr_qw_not(&not_a, a);
   1950    return avr_qw_cmpu(not_a, b) < 0;
   1951}
   1952
   1953#endif
   1954
   1955void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1956{
   1957#ifdef CONFIG_INT128
   1958    r->u128 = a->u128 + b->u128;
   1959#else
   1960    avr_qw_add(r, *a, *b);
   1961#endif
   1962}
   1963
   1964void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   1965{
   1966#ifdef CONFIG_INT128
   1967    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
   1968#else
   1969
   1970    if (c->VsrD(1) & 1) {
   1971        ppc_avr_t tmp;
   1972
   1973        tmp.VsrD(0) = 0;
   1974        tmp.VsrD(1) = c->VsrD(1) & 1;
   1975        avr_qw_add(&tmp, *a, tmp);
   1976        avr_qw_add(r, tmp, *b);
   1977    } else {
   1978        avr_qw_add(r, *a, *b);
   1979    }
   1980#endif
   1981}
   1982
   1983void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1984{
   1985#ifdef CONFIG_INT128
   1986    r->u128 = (~a->u128 < b->u128);
   1987#else
   1988    ppc_avr_t not_a;
   1989
   1990    avr_qw_not(&not_a, *a);
   1991
   1992    r->VsrD(0) = 0;
   1993    r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
   1994#endif
   1995}
   1996
   1997void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   1998{
   1999#ifdef CONFIG_INT128
   2000    int carry_out = (~a->u128 < b->u128);
   2001    if (!carry_out && (c->u128 & 1)) {
   2002        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
   2003                    ((a->u128 != 0) || (b->u128 != 0));
   2004    }
   2005    r->u128 = carry_out;
   2006#else
   2007
   2008    int carry_in = c->VsrD(1) & 1;
   2009    int carry_out = 0;
   2010    ppc_avr_t tmp;
   2011
   2012    carry_out = avr_qw_addc(&tmp, *a, *b);
   2013
   2014    if (!carry_out && carry_in) {
   2015        ppc_avr_t one = QW_ONE;
   2016        carry_out = avr_qw_addc(&tmp, tmp, one);
   2017    }
   2018    r->VsrD(0) = 0;
   2019    r->VsrD(1) = carry_out;
   2020#endif
   2021}
   2022
   2023void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2024{
   2025#ifdef CONFIG_INT128
   2026    r->u128 = a->u128 - b->u128;
   2027#else
   2028    ppc_avr_t tmp;
   2029    ppc_avr_t one = QW_ONE;
   2030
   2031    avr_qw_not(&tmp, *b);
   2032    avr_qw_add(&tmp, *a, tmp);
   2033    avr_qw_add(r, tmp, one);
   2034#endif
   2035}
   2036
   2037void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   2038{
   2039#ifdef CONFIG_INT128
   2040    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
   2041#else
   2042    ppc_avr_t tmp, sum;
   2043
   2044    avr_qw_not(&tmp, *b);
   2045    avr_qw_add(&sum, *a, tmp);
   2046
   2047    tmp.VsrD(0) = 0;
   2048    tmp.VsrD(1) = c->VsrD(1) & 1;
   2049    avr_qw_add(r, sum, tmp);
   2050#endif
   2051}
   2052
   2053void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2054{
   2055#ifdef CONFIG_INT128
   2056    r->u128 = (~a->u128 < ~b->u128) ||
   2057                 (a->u128 + ~b->u128 == (__uint128_t)-1);
   2058#else
   2059    int carry = (avr_qw_cmpu(*a, *b) > 0);
   2060    if (!carry) {
   2061        ppc_avr_t tmp;
   2062        avr_qw_not(&tmp, *b);
   2063        avr_qw_add(&tmp, *a, tmp);
   2064        carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
   2065    }
   2066    r->VsrD(0) = 0;
   2067    r->VsrD(1) = carry;
   2068#endif
   2069}
   2070
   2071void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   2072{
   2073#ifdef CONFIG_INT128
   2074    r->u128 =
   2075        (~a->u128 < ~b->u128) ||
   2076        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
   2077#else
   2078    int carry_in = c->VsrD(1) & 1;
   2079    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
   2080    if (!carry_out && carry_in) {
   2081        ppc_avr_t tmp;
   2082        avr_qw_not(&tmp, *b);
   2083        avr_qw_add(&tmp, *a, tmp);
   2084        carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
   2085    }
   2086
   2087    r->VsrD(0) = 0;
   2088    r->VsrD(1) = carry_out;
   2089#endif
   2090}
   2091
   2092#define BCD_PLUS_PREF_1 0xC
   2093#define BCD_PLUS_PREF_2 0xF
   2094#define BCD_PLUS_ALT_1  0xA
   2095#define BCD_NEG_PREF    0xD
   2096#define BCD_NEG_ALT     0xB
   2097#define BCD_PLUS_ALT_2  0xE
   2098#define NATIONAL_PLUS   0x2B
   2099#define NATIONAL_NEG    0x2D
   2100
   2101#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
   2102
   2103static int bcd_get_sgn(ppc_avr_t *bcd)
   2104{
   2105    switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
   2106    case BCD_PLUS_PREF_1:
   2107    case BCD_PLUS_PREF_2:
   2108    case BCD_PLUS_ALT_1:
   2109    case BCD_PLUS_ALT_2:
   2110    {
   2111        return 1;
   2112    }
   2113
   2114    case BCD_NEG_PREF:
   2115    case BCD_NEG_ALT:
   2116    {
   2117        return -1;
   2118    }
   2119
   2120    default:
   2121    {
   2122        return 0;
   2123    }
   2124    }
   2125}
   2126
   2127static int bcd_preferred_sgn(int sgn, int ps)
   2128{
   2129    if (sgn >= 0) {
   2130        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
   2131    } else {
   2132        return BCD_NEG_PREF;
   2133    }
   2134}
   2135
   2136static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
   2137{
   2138    uint8_t result;
   2139    if (n & 1) {
   2140        result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
   2141    } else {
   2142       result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
   2143    }
   2144
   2145    if (unlikely(result > 9)) {
   2146        *invalid = true;
   2147    }
   2148    return result;
   2149}
   2150
   2151static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
   2152{
   2153    if (n & 1) {
   2154        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
   2155        bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
   2156    } else {
   2157        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
   2158        bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
   2159    }
   2160}
   2161
   2162static bool bcd_is_valid(ppc_avr_t *bcd)
   2163{
   2164    int i;
   2165    int invalid = 0;
   2166
   2167    if (bcd_get_sgn(bcd) == 0) {
   2168        return false;
   2169    }
   2170
   2171    for (i = 1; i < 32; i++) {
   2172        bcd_get_digit(bcd, i, &invalid);
   2173        if (unlikely(invalid)) {
   2174            return false;
   2175        }
   2176    }
   2177    return true;
   2178}
   2179
   2180static int bcd_cmp_zero(ppc_avr_t *bcd)
   2181{
   2182    if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
   2183        return CRF_EQ;
   2184    } else {
   2185        return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
   2186    }
   2187}
   2188
   2189static uint16_t get_national_digit(ppc_avr_t *reg, int n)
   2190{
   2191    return reg->VsrH(7 - n);
   2192}
   2193
   2194static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
   2195{
   2196    reg->VsrH(7 - n) = val;
   2197}
   2198
   2199static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
   2200{
   2201    int i;
   2202    int invalid = 0;
   2203    for (i = 31; i > 0; i--) {
   2204        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
   2205        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
   2206        if (unlikely(invalid)) {
   2207            return 0; /* doesn't matter */
   2208        } else if (dig_a > dig_b) {
   2209            return 1;
   2210        } else if (dig_a < dig_b) {
   2211            return -1;
   2212        }
   2213    }
   2214
   2215    return 0;
   2216}
   2217
   2218static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
   2219                       int *overflow)
   2220{
   2221    int carry = 0;
   2222    int i;
   2223    int is_zero = 1;
   2224
   2225    for (i = 1; i <= 31; i++) {
   2226        uint8_t digit = bcd_get_digit(a, i, invalid) +
   2227                        bcd_get_digit(b, i, invalid) + carry;
   2228        is_zero &= (digit == 0);
   2229        if (digit > 9) {
   2230            carry = 1;
   2231            digit -= 10;
   2232        } else {
   2233            carry = 0;
   2234        }
   2235
   2236        bcd_put_digit(t, digit, i);
   2237    }
   2238
   2239    *overflow = carry;
   2240    return is_zero;
   2241}
   2242
   2243static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
   2244                       int *overflow)
   2245{
   2246    int carry = 0;
   2247    int i;
   2248
   2249    for (i = 1; i <= 31; i++) {
   2250        uint8_t digit = bcd_get_digit(a, i, invalid) -
   2251                        bcd_get_digit(b, i, invalid) + carry;
   2252        if (digit & 0x80) {
   2253            carry = -1;
   2254            digit += 10;
   2255        } else {
   2256            carry = 0;
   2257        }
   2258
   2259        bcd_put_digit(t, digit, i);
   2260    }
   2261
   2262    *overflow = carry;
   2263}
   2264
   2265uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2266{
   2267
   2268    int sgna = bcd_get_sgn(a);
   2269    int sgnb = bcd_get_sgn(b);
   2270    int invalid = (sgna == 0) || (sgnb == 0);
   2271    int overflow = 0;
   2272    int zero = 0;
   2273    uint32_t cr = 0;
   2274    ppc_avr_t result = { .u64 = { 0, 0 } };
   2275
   2276    if (!invalid) {
   2277        if (sgna == sgnb) {
   2278            result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
   2279            zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
   2280            cr = (sgna > 0) ? CRF_GT : CRF_LT;
   2281        } else {
   2282            int magnitude = bcd_cmp_mag(a, b);
   2283            if (magnitude > 0) {
   2284                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
   2285                bcd_sub_mag(&result, a, b, &invalid, &overflow);
   2286                cr = (sgna > 0) ? CRF_GT : CRF_LT;
   2287            } else if (magnitude < 0) {
   2288                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
   2289                bcd_sub_mag(&result, b, a, &invalid, &overflow);
   2290                cr = (sgnb > 0) ? CRF_GT : CRF_LT;
   2291            } else {
   2292                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
   2293                cr = CRF_EQ;
   2294            }
   2295        }
   2296    }
   2297
   2298    if (unlikely(invalid)) {
   2299        result.VsrD(0) = result.VsrD(1) = -1;
   2300        cr = CRF_SO;
   2301    } else if (overflow) {
   2302        cr |= CRF_SO;
   2303    } else if (zero) {
   2304        cr |= CRF_EQ;
   2305    }
   2306
   2307    *r = result;
   2308
   2309    return cr;
   2310}
   2311
   2312uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2313{
   2314    ppc_avr_t bcopy = *b;
   2315    int sgnb = bcd_get_sgn(b);
   2316    if (sgnb < 0) {
   2317        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
   2318    } else if (sgnb > 0) {
   2319        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
   2320    }
   2321    /* else invalid ... defer to bcdadd code for proper handling */
   2322
   2323    return helper_bcdadd(r, a, &bcopy, ps);
   2324}
   2325
   2326uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2327{
   2328    int i;
   2329    int cr = 0;
   2330    uint16_t national = 0;
   2331    uint16_t sgnb = get_national_digit(b, 0);
   2332    ppc_avr_t ret = { .u64 = { 0, 0 } };
   2333    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
   2334
   2335    for (i = 1; i < 8; i++) {
   2336        national = get_national_digit(b, i);
   2337        if (unlikely(national < 0x30 || national > 0x39)) {
   2338            invalid = 1;
   2339            break;
   2340        }
   2341
   2342        bcd_put_digit(&ret, national & 0xf, i);
   2343    }
   2344
   2345    if (sgnb == NATIONAL_PLUS) {
   2346        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
   2347    } else {
   2348        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
   2349    }
   2350
   2351    cr = bcd_cmp_zero(&ret);
   2352
   2353    if (unlikely(invalid)) {
   2354        cr = CRF_SO;
   2355    }
   2356
   2357    *r = ret;
   2358
   2359    return cr;
   2360}
   2361
   2362uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2363{
   2364    int i;
   2365    int cr = 0;
   2366    int sgnb = bcd_get_sgn(b);
   2367    int invalid = (sgnb == 0);
   2368    ppc_avr_t ret = { .u64 = { 0, 0 } };
   2369
   2370    int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
   2371
   2372    for (i = 1; i < 8; i++) {
   2373        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
   2374
   2375        if (unlikely(invalid)) {
   2376            break;
   2377        }
   2378    }
   2379    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
   2380
   2381    cr = bcd_cmp_zero(b);
   2382
   2383    if (ox_flag) {
   2384        cr |= CRF_SO;
   2385    }
   2386
   2387    if (unlikely(invalid)) {
   2388        cr = CRF_SO;
   2389    }
   2390
   2391    *r = ret;
   2392
   2393    return cr;
   2394}
   2395
   2396uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2397{
   2398    int i;
   2399    int cr = 0;
   2400    int invalid = 0;
   2401    int zone_digit = 0;
   2402    int zone_lead = ps ? 0xF : 0x3;
   2403    int digit = 0;
   2404    ppc_avr_t ret = { .u64 = { 0, 0 } };
   2405    int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
   2406
   2407    if (unlikely((sgnb < 0xA) && ps)) {
   2408        invalid = 1;
   2409    }
   2410
   2411    for (i = 0; i < 16; i++) {
   2412        zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
   2413        digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
   2414        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
   2415            invalid = 1;
   2416            break;
   2417        }
   2418
   2419        bcd_put_digit(&ret, digit, i + 1);
   2420    }
   2421
   2422    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
   2423            (!ps && (sgnb & 0x4))) {
   2424        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
   2425    } else {
   2426        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
   2427    }
   2428
   2429    cr = bcd_cmp_zero(&ret);
   2430
   2431    if (unlikely(invalid)) {
   2432        cr = CRF_SO;
   2433    }
   2434
   2435    *r = ret;
   2436
   2437    return cr;
   2438}
   2439
   2440uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2441{
   2442    int i;
   2443    int cr = 0;
   2444    uint8_t digit = 0;
   2445    int sgnb = bcd_get_sgn(b);
   2446    int zone_lead = (ps) ? 0xF0 : 0x30;
   2447    int invalid = (sgnb == 0);
   2448    ppc_avr_t ret = { .u64 = { 0, 0 } };
   2449
   2450    int ox_flag = ((b->VsrD(0) >> 4) != 0);
   2451
   2452    for (i = 0; i < 16; i++) {
   2453        digit = bcd_get_digit(b, i + 1, &invalid);
   2454
   2455        if (unlikely(invalid)) {
   2456            break;
   2457        }
   2458
   2459        ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
   2460    }
   2461
   2462    if (ps) {
   2463        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
   2464    } else {
   2465        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
   2466    }
   2467
   2468    cr = bcd_cmp_zero(b);
   2469
   2470    if (ox_flag) {
   2471        cr |= CRF_SO;
   2472    }
   2473
   2474    if (unlikely(invalid)) {
   2475        cr = CRF_SO;
   2476    }
   2477
   2478    *r = ret;
   2479
   2480    return cr;
   2481}
   2482
   2483/**
   2484 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
   2485 *
   2486 * Returns:
   2487 * > 0 if ahi|alo > bhi|blo,
   2488 * 0 if ahi|alo == bhi|blo,
   2489 * < 0 if ahi|alo < bhi|blo
   2490 */
   2491static inline int ucmp128(uint64_t alo, uint64_t ahi,
   2492                          uint64_t blo, uint64_t bhi)
   2493{
   2494    return (ahi == bhi) ?
   2495        (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
   2496        (ahi > bhi ? 1 : -1);
   2497}
   2498
   2499uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2500{
   2501    int i;
   2502    int cr;
   2503    uint64_t lo_value;
   2504    uint64_t hi_value;
   2505    ppc_avr_t ret = { .u64 = { 0, 0 } };
   2506
   2507    if (b->VsrSD(0) < 0) {
   2508        lo_value = -b->VsrSD(1);
   2509        hi_value = ~b->VsrD(0) + !lo_value;
   2510        bcd_put_digit(&ret, 0xD, 0);
   2511
   2512        cr = CRF_LT;
   2513    } else {
   2514        lo_value = b->VsrD(1);
   2515        hi_value = b->VsrD(0);
   2516        bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
   2517
   2518        if (hi_value == 0 && lo_value == 0) {
   2519            cr = CRF_EQ;
   2520        } else {
   2521            cr = CRF_GT;
   2522        }
   2523    }
   2524
   2525    /*
   2526     * Check src limits: abs(src) <= 10^31 - 1
   2527     *
   2528     * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
   2529     */
   2530    if (ucmp128(lo_value, hi_value,
   2531                0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
   2532        cr |= CRF_SO;
   2533
   2534        /*
   2535         * According to the ISA, if src wouldn't fit in the destination
   2536         * register, the result is undefined.
   2537         * In that case, we leave r unchanged.
   2538         */
   2539    } else {
   2540        divu128(&lo_value, &hi_value, 1000000000000000ULL);
   2541
   2542        for (i = 1; i < 16; hi_value /= 10, i++) {
   2543            bcd_put_digit(&ret, hi_value % 10, i);
   2544        }
   2545
   2546        for (; i < 32; lo_value /= 10, i++) {
   2547            bcd_put_digit(&ret, lo_value % 10, i);
   2548        }
   2549
   2550        *r = ret;
   2551    }
   2552
   2553    return cr;
   2554}
   2555
   2556uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2557{
   2558    uint8_t i;
   2559    int cr;
   2560    uint64_t carry;
   2561    uint64_t unused;
   2562    uint64_t lo_value;
   2563    uint64_t hi_value = 0;
   2564    int sgnb = bcd_get_sgn(b);
   2565    int invalid = (sgnb == 0);
   2566
   2567    lo_value = bcd_get_digit(b, 31, &invalid);
   2568    for (i = 30; i > 0; i--) {
   2569        mulu64(&lo_value, &carry, lo_value, 10ULL);
   2570        mulu64(&hi_value, &unused, hi_value, 10ULL);
   2571        lo_value += bcd_get_digit(b, i, &invalid);
   2572        hi_value += carry;
   2573
   2574        if (unlikely(invalid)) {
   2575            break;
   2576        }
   2577    }
   2578
   2579    if (sgnb == -1) {
   2580        r->VsrSD(1) = -lo_value;
   2581        r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
   2582    } else {
   2583        r->VsrSD(1) = lo_value;
   2584        r->VsrSD(0) = hi_value;
   2585    }
   2586
   2587    cr = bcd_cmp_zero(b);
   2588
   2589    if (unlikely(invalid)) {
   2590        cr = CRF_SO;
   2591    }
   2592
   2593    return cr;
   2594}
   2595
   2596uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2597{
   2598    int i;
   2599    int invalid = 0;
   2600
   2601    if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
   2602        return CRF_SO;
   2603    }
   2604
   2605    *r = *a;
   2606    bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
   2607
   2608    for (i = 1; i < 32; i++) {
   2609        bcd_get_digit(a, i, &invalid);
   2610        bcd_get_digit(b, i, &invalid);
   2611        if (unlikely(invalid)) {
   2612            return CRF_SO;
   2613        }
   2614    }
   2615
   2616    return bcd_cmp_zero(r);
   2617}
   2618
   2619uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2620{
   2621    int sgnb = bcd_get_sgn(b);
   2622
   2623    *r = *b;
   2624    bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
   2625
   2626    if (bcd_is_valid(b) == false) {
   2627        return CRF_SO;
   2628    }
   2629
   2630    return bcd_cmp_zero(r);
   2631}
   2632
   2633uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2634{
   2635    int cr;
   2636    int i = a->VsrSB(7);
   2637    bool ox_flag = false;
   2638    int sgnb = bcd_get_sgn(b);
   2639    ppc_avr_t ret = *b;
   2640    ret.VsrD(1) &= ~0xf;
   2641
   2642    if (bcd_is_valid(b) == false) {
   2643        return CRF_SO;
   2644    }
   2645
   2646    if (unlikely(i > 31)) {
   2647        i = 31;
   2648    } else if (unlikely(i < -31)) {
   2649        i = -31;
   2650    }
   2651
   2652    if (i > 0) {
   2653        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
   2654    } else {
   2655        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
   2656    }
   2657    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
   2658
   2659    *r = ret;
   2660
   2661    cr = bcd_cmp_zero(r);
   2662    if (ox_flag) {
   2663        cr |= CRF_SO;
   2664    }
   2665
   2666    return cr;
   2667}
   2668
   2669uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2670{
   2671    int cr;
   2672    int i;
   2673    int invalid = 0;
   2674    bool ox_flag = false;
   2675    ppc_avr_t ret = *b;
   2676
   2677    for (i = 0; i < 32; i++) {
   2678        bcd_get_digit(b, i, &invalid);
   2679
   2680        if (unlikely(invalid)) {
   2681            return CRF_SO;
   2682        }
   2683    }
   2684
   2685    i = a->VsrSB(7);
   2686    if (i >= 32) {
   2687        ox_flag = true;
   2688        ret.VsrD(1) = ret.VsrD(0) = 0;
   2689    } else if (i <= -32) {
   2690        ret.VsrD(1) = ret.VsrD(0) = 0;
   2691    } else if (i > 0) {
   2692        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
   2693    } else {
   2694        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
   2695    }
   2696    *r = ret;
   2697
   2698    cr = bcd_cmp_zero(r);
   2699    if (ox_flag) {
   2700        cr |= CRF_SO;
   2701    }
   2702
   2703    return cr;
   2704}
   2705
   2706uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2707{
   2708    int cr;
   2709    int unused = 0;
   2710    int invalid = 0;
   2711    bool ox_flag = false;
   2712    int sgnb = bcd_get_sgn(b);
   2713    ppc_avr_t ret = *b;
   2714    ret.VsrD(1) &= ~0xf;
   2715
   2716    int i = a->VsrSB(7);
   2717    ppc_avr_t bcd_one;
   2718
   2719    bcd_one.VsrD(0) = 0;
   2720    bcd_one.VsrD(1) = 0x10;
   2721
   2722    if (bcd_is_valid(b) == false) {
   2723        return CRF_SO;
   2724    }
   2725
   2726    if (unlikely(i > 31)) {
   2727        i = 31;
   2728    } else if (unlikely(i < -31)) {
   2729        i = -31;
   2730    }
   2731
   2732    if (i > 0) {
   2733        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
   2734    } else {
   2735        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
   2736
   2737        if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
   2738            bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
   2739        }
   2740    }
   2741    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
   2742
   2743    cr = bcd_cmp_zero(&ret);
   2744    if (ox_flag) {
   2745        cr |= CRF_SO;
   2746    }
   2747    *r = ret;
   2748
   2749    return cr;
   2750}
   2751
   2752uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2753{
   2754    uint64_t mask;
   2755    uint32_t ox_flag = 0;
   2756    int i = a->VsrSH(3) + 1;
   2757    ppc_avr_t ret = *b;
   2758
   2759    if (bcd_is_valid(b) == false) {
   2760        return CRF_SO;
   2761    }
   2762
   2763    if (i > 16 && i < 32) {
   2764        mask = (uint64_t)-1 >> (128 - i * 4);
   2765        if (ret.VsrD(0) & ~mask) {
   2766            ox_flag = CRF_SO;
   2767        }
   2768
   2769        ret.VsrD(0) &= mask;
   2770    } else if (i >= 0 && i <= 16) {
   2771        mask = (uint64_t)-1 >> (64 - i * 4);
   2772        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
   2773            ox_flag = CRF_SO;
   2774        }
   2775
   2776        ret.VsrD(1) &= mask;
   2777        ret.VsrD(0) = 0;
   2778    }
   2779    bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
   2780    *r = ret;
   2781
   2782    return bcd_cmp_zero(&ret) | ox_flag;
   2783}
   2784
   2785uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2786{
   2787    int i;
   2788    uint64_t mask;
   2789    uint32_t ox_flag = 0;
   2790    int invalid = 0;
   2791    ppc_avr_t ret = *b;
   2792
   2793    for (i = 0; i < 32; i++) {
   2794        bcd_get_digit(b, i, &invalid);
   2795
   2796        if (unlikely(invalid)) {
   2797            return CRF_SO;
   2798        }
   2799    }
   2800
   2801    i = a->VsrSH(3);
   2802    if (i > 16 && i < 33) {
   2803        mask = (uint64_t)-1 >> (128 - i * 4);
   2804        if (ret.VsrD(0) & ~mask) {
   2805            ox_flag = CRF_SO;
   2806        }
   2807
   2808        ret.VsrD(0) &= mask;
   2809    } else if (i > 0 && i <= 16) {
   2810        mask = (uint64_t)-1 >> (64 - i * 4);
   2811        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
   2812            ox_flag = CRF_SO;
   2813        }
   2814
   2815        ret.VsrD(1) &= mask;
   2816        ret.VsrD(0) = 0;
   2817    } else if (i == 0) {
   2818        if (ret.VsrD(0) || ret.VsrD(1)) {
   2819            ox_flag = CRF_SO;
   2820        }
   2821        ret.VsrD(0) = ret.VsrD(1) = 0;
   2822    }
   2823
   2824    *r = ret;
   2825    if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
   2826        return ox_flag | CRF_EQ;
   2827    }
   2828
   2829    return ox_flag | CRF_GT;
   2830}
   2831
   2832void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
   2833{
   2834    int i;
   2835    VECTOR_FOR_INORDER_I(i, u8) {
   2836        r->u8[i] = AES_sbox[a->u8[i]];
   2837    }
   2838}
   2839
   2840void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2841{
   2842    ppc_avr_t result;
   2843    int i;
   2844
   2845    VECTOR_FOR_INORDER_I(i, u32) {
   2846        result.VsrW(i) = b->VsrW(i) ^
   2847            (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
   2848             AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
   2849             AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
   2850             AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
   2851    }
   2852    *r = result;
   2853}
   2854
   2855void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2856{
   2857    ppc_avr_t result;
   2858    int i;
   2859
   2860    VECTOR_FOR_INORDER_I(i, u8) {
   2861        result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
   2862    }
   2863    *r = result;
   2864}
   2865
   2866void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2867{
   2868    /* This differs from what is written in ISA V2.07.  The RTL is */
   2869    /* incorrect and will be fixed in V2.07B.                      */
   2870    int i;
   2871    ppc_avr_t tmp;
   2872
   2873    VECTOR_FOR_INORDER_I(i, u8) {
   2874        tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
   2875    }
   2876
   2877    VECTOR_FOR_INORDER_I(i, u32) {
   2878        r->VsrW(i) =
   2879            AES_imc[tmp.VsrB(4 * i + 0)][0] ^
   2880            AES_imc[tmp.VsrB(4 * i + 1)][1] ^
   2881            AES_imc[tmp.VsrB(4 * i + 2)][2] ^
   2882            AES_imc[tmp.VsrB(4 * i + 3)][3];
   2883    }
   2884}
   2885
   2886void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2887{
   2888    ppc_avr_t result;
   2889    int i;
   2890
   2891    VECTOR_FOR_INORDER_I(i, u8) {
   2892        result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
   2893    }
   2894    *r = result;
   2895}
   2896
   2897void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
   2898{
   2899    int st = (st_six & 0x10) != 0;
   2900    int six = st_six & 0xF;
   2901    int i;
   2902
   2903    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
   2904        if (st == 0) {
   2905            if ((six & (0x8 >> i)) == 0) {
   2906                r->VsrW(i) = ror32(a->VsrW(i), 7) ^
   2907                             ror32(a->VsrW(i), 18) ^
   2908                             (a->VsrW(i) >> 3);
   2909            } else { /* six.bit[i] == 1 */
   2910                r->VsrW(i) = ror32(a->VsrW(i), 17) ^
   2911                             ror32(a->VsrW(i), 19) ^
   2912                             (a->VsrW(i) >> 10);
   2913            }
   2914        } else { /* st == 1 */
   2915            if ((six & (0x8 >> i)) == 0) {
   2916                r->VsrW(i) = ror32(a->VsrW(i), 2) ^
   2917                             ror32(a->VsrW(i), 13) ^
   2918                             ror32(a->VsrW(i), 22);
   2919            } else { /* six.bit[i] == 1 */
   2920                r->VsrW(i) = ror32(a->VsrW(i), 6) ^
   2921                             ror32(a->VsrW(i), 11) ^
   2922                             ror32(a->VsrW(i), 25);
   2923            }
   2924        }
   2925    }
   2926}
   2927
   2928void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
   2929{
   2930    int st = (st_six & 0x10) != 0;
   2931    int six = st_six & 0xF;
   2932    int i;
   2933
   2934    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
   2935        if (st == 0) {
   2936            if ((six & (0x8 >> (2 * i))) == 0) {
   2937                r->VsrD(i) = ror64(a->VsrD(i), 1) ^
   2938                             ror64(a->VsrD(i), 8) ^
   2939                             (a->VsrD(i) >> 7);
   2940            } else { /* six.bit[2*i] == 1 */
   2941                r->VsrD(i) = ror64(a->VsrD(i), 19) ^
   2942                             ror64(a->VsrD(i), 61) ^
   2943                             (a->VsrD(i) >> 6);
   2944            }
   2945        } else { /* st == 1 */
   2946            if ((six & (0x8 >> (2 * i))) == 0) {
   2947                r->VsrD(i) = ror64(a->VsrD(i), 28) ^
   2948                             ror64(a->VsrD(i), 34) ^
   2949                             ror64(a->VsrD(i), 39);
   2950            } else { /* six.bit[2*i] == 1 */
   2951                r->VsrD(i) = ror64(a->VsrD(i), 14) ^
   2952                             ror64(a->VsrD(i), 18) ^
   2953                             ror64(a->VsrD(i), 41);
   2954            }
   2955        }
   2956    }
   2957}
   2958
   2959void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   2960{
   2961    ppc_avr_t result;
   2962    int i;
   2963
   2964    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   2965        int indexA = c->VsrB(i) >> 4;
   2966        int indexB = c->VsrB(i) & 0xF;
   2967
   2968        result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
   2969    }
   2970    *r = result;
   2971}
   2972
   2973#undef VECTOR_FOR_INORDER_I
   2974
   2975/*****************************************************************************/
   2976/* SPE extension helpers */
   2977/* Use a table to make this quicker */
   2978static const uint8_t hbrev[16] = {
   2979    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
   2980    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
   2981};
   2982
   2983static inline uint8_t byte_reverse(uint8_t val)
   2984{
   2985    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
   2986}
   2987
   2988static inline uint32_t word_reverse(uint32_t val)
   2989{
   2990    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
   2991        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
   2992}
   2993
   2994#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
   2995target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
   2996{
   2997    uint32_t a, b, d, mask;
   2998
   2999    mask = UINT32_MAX >> (32 - MASKBITS);
   3000    a = arg1 & mask;
   3001    b = arg2 & mask;
   3002    d = word_reverse(1 + word_reverse(a | ~b));
   3003    return (arg1 & ~mask) | (d & b);
   3004}
   3005
   3006uint32_t helper_cntlsw32(uint32_t val)
   3007{
   3008    if (val & 0x80000000) {
   3009        return clz32(~val);
   3010    } else {
   3011        return clz32(val);
   3012    }
   3013}
   3014
   3015uint32_t helper_cntlzw32(uint32_t val)
   3016{
   3017    return clz32(val);
   3018}
   3019
   3020/* 440 specific */
   3021target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
   3022                          target_ulong low, uint32_t update_Rc)
   3023{
   3024    target_ulong mask;
   3025    int i;
   3026
   3027    i = 1;
   3028    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
   3029        if ((high & mask) == 0) {
   3030            if (update_Rc) {
   3031                env->crf[0] = 0x4;
   3032            }
   3033            goto done;
   3034        }
   3035        i++;
   3036    }
   3037    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
   3038        if ((low & mask) == 0) {
   3039            if (update_Rc) {
   3040                env->crf[0] = 0x8;
   3041            }
   3042            goto done;
   3043        }
   3044        i++;
   3045    }
   3046    i = 8;
   3047    if (update_Rc) {
   3048        env->crf[0] = 0x2;
   3049    }
   3050 done:
   3051    env->xer = (env->xer & ~0x7F) | i;
   3052    if (update_Rc) {
   3053        env->crf[0] |= xer_so;
   3054    }
   3055    return i;
   3056}