cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

msa_helper.c (337553B)


      1/*
      2 * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
      3 *
      4 * Copyright (c) 2014 Imagination Technologies
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "cpu.h"
     22#include "internal.h"
     23#include "tcg/tcg.h"
     24#include "exec/exec-all.h"
     25#include "exec/helper-proto.h"
     26#include "exec/memop.h"
     27#include "fpu/softfloat.h"
     28#include "fpu_helper.h"
     29
     30/* Data format min and max values */
     31#define DF_BITS(df) (1 << ((df) + 3))
     32
     33#define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
     34#define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
     35
     36#define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
     37#define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
     38
     39#define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
     40#define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
     41
     42#define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
     43#define SIGNED(x, df)                                                   \
     44    ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
     45
     46/* Element-by-element access macros */
     47#define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
     48
     49
     50
     51/*
     52 * Bit Count
     53 * ---------
     54 *
     55 * +---------------+----------------------------------------------------------+
     56 * | NLOC.B        | Vector Leading Ones Count (byte)                         |
     57 * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
     58 * | NLOC.W        | Vector Leading Ones Count (word)                         |
     59 * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
     60 * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
     61 * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
     62 * | NLZC.W        | Vector Leading Zeros Count (word)                        |
     63 * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
     64 * | PCNT.B        | Vector Population Count (byte)                           |
     65 * | PCNT.H        | Vector Population Count (halfword)                       |
     66 * | PCNT.W        | Vector Population Count (word)                           |
     67 * | PCNT.D        | Vector Population Count (doubleword)                     |
     68 * +---------------+----------------------------------------------------------+
     69 */
     70
     71static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
     72{
     73    uint64_t x, y;
     74    int n, c;
     75
     76    x = UNSIGNED(arg, df);
     77    n = DF_BITS(df);
     78    c = DF_BITS(df) / 2;
     79
     80    do {
     81        y = x >> c;
     82        if (y != 0) {
     83            n = n - c;
     84            x = y;
     85        }
     86        c = c >> 1;
     87    } while (c != 0);
     88
     89    return n - x;
     90}
     91
     92static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
     93{
     94    return msa_nlzc_df(df, UNSIGNED((~arg), df));
     95}
     96
     97void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
     98{
     99    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    100    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    101
    102    pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
    103    pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
    104    pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
    105    pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
    106    pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
    107    pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
    108    pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
    109    pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
    110    pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
    111    pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
    112    pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
    113    pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
    114    pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
    115    pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
    116    pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
    117    pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
    118}
    119
    120void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    121{
    122    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    123    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    124
    125    pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
    126    pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
    127    pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
    128    pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
    129    pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
    130    pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
    131    pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
    132    pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
    133}
    134
    135void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    136{
    137    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    138    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    139
    140    pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
    141    pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
    142    pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
    143    pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
    144}
    145
    146void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    147{
    148    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    149    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    150
    151    pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
    152    pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
    153}
    154
    155void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    156{
    157    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    158    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    159
    160    pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
    161    pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
    162    pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
    163    pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
    164    pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
    165    pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
    166    pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
    167    pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
    168    pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
    169    pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
    170    pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
    171    pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
    172    pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
    173    pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
    174    pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
    175    pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
    176}
    177
    178void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    179{
    180    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    181    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    182
    183    pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
    184    pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
    185    pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
    186    pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
    187    pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
    188    pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
    189    pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
    190    pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
    191}
    192
    193void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    194{
    195    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    196    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    197
    198    pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
    199    pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
    200    pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
    201    pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
    202}
    203
    204void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    205{
    206    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    207    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    208
    209    pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
    210    pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
    211}
    212
    213static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
    214{
    215    uint64_t x;
    216
    217    x = UNSIGNED(arg, df);
    218
    219    x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
    220    x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
    221    x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
    222    x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
    223    x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
    224    x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
    225
    226    return x;
    227}
    228
    229void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    230{
    231    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    232    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    233
    234    pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
    235    pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
    236    pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
    237    pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
    238    pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
    239    pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
    240    pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
    241    pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
    242    pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
    243    pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
    244    pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
    245    pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
    246    pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
    247    pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
    248    pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
    249    pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
    250}
    251
    252void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    253{
    254    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    255    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    256
    257    pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
    258    pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
    259    pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
    260    pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
    261    pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
    262    pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
    263    pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
    264    pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
    265}
    266
    267void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    268{
    269    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    270    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    271
    272    pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
    273    pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
    274    pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
    275    pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
    276}
    277
    278void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    279{
    280    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    281    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    282
    283    pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
    284    pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
    285}
    286
    287
    288/*
    289 * Bit Move
    290 * --------
    291 *
    292 * +---------------+----------------------------------------------------------+
    293 * | BINSL.B       | Vector Bit Insert Left (byte)                            |
    294 * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
    295 * | BINSL.W       | Vector Bit Insert Left (word)                            |
    296 * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
    297 * | BINSR.B       | Vector Bit Insert Right (byte)                           |
    298 * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
    299 * | BINSR.W       | Vector Bit Insert Right (word)                           |
    300 * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
    301 * | BMNZ.V        | Vector Bit Move If Not Zero                              |
    302 * | BMZ.V         | Vector Bit Move If Zero                                  |
    303 * | BSEL.V        | Vector Bit Select                                        |
    304 * +---------------+----------------------------------------------------------+
    305 */
    306
    307/* Data format bit position and unsigned values */
    308#define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
    309
    310static inline int64_t msa_binsl_df(uint32_t df,
    311                                   int64_t dest, int64_t arg1, int64_t arg2)
    312{
    313    uint64_t u_arg1 = UNSIGNED(arg1, df);
    314    uint64_t u_dest = UNSIGNED(dest, df);
    315    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
    316    int32_t sh_a = DF_BITS(df) - sh_d;
    317    if (sh_d == DF_BITS(df)) {
    318        return u_arg1;
    319    } else {
    320        return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
    321               UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
    322    }
    323}
    324
    325void helper_msa_binsl_b(CPUMIPSState *env,
    326                        uint32_t wd, uint32_t ws, uint32_t wt)
    327{
    328    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    329    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    330    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    331
    332    pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
    333    pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
    334    pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
    335    pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
    336    pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
    337    pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
    338    pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
    339    pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
    340    pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
    341    pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
    342    pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
    343    pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
    344    pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
    345    pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
    346    pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
    347    pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
    348}
    349
    350void helper_msa_binsl_h(CPUMIPSState *env,
    351                        uint32_t wd, uint32_t ws, uint32_t wt)
    352{
    353    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    354    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    355    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    356
    357    pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
    358    pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
    359    pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
    360    pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
    361    pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
    362    pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
    363    pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
    364    pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
    365}
    366
    367void helper_msa_binsl_w(CPUMIPSState *env,
    368                        uint32_t wd, uint32_t ws, uint32_t wt)
    369{
    370    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    371    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    372    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    373
    374    pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
    375    pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
    376    pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
    377    pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
    378}
    379
    380void helper_msa_binsl_d(CPUMIPSState *env,
    381                        uint32_t wd, uint32_t ws, uint32_t wt)
    382{
    383    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    384    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    385    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    386
    387    pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
    388    pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
    389}
    390
    391static inline int64_t msa_binsr_df(uint32_t df,
    392                                   int64_t dest, int64_t arg1, int64_t arg2)
    393{
    394    uint64_t u_arg1 = UNSIGNED(arg1, df);
    395    uint64_t u_dest = UNSIGNED(dest, df);
    396    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
    397    int32_t sh_a = DF_BITS(df) - sh_d;
    398    if (sh_d == DF_BITS(df)) {
    399        return u_arg1;
    400    } else {
    401        return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
    402               UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
    403    }
    404}
    405
    406void helper_msa_binsr_b(CPUMIPSState *env,
    407                        uint32_t wd, uint32_t ws, uint32_t wt)
    408{
    409    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    410    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    411    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    412
    413    pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
    414    pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
    415    pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
    416    pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
    417    pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
    418    pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
    419    pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
    420    pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
    421    pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
    422    pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
    423    pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
    424    pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
    425    pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
    426    pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
    427    pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
    428    pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
    429}
    430
    431void helper_msa_binsr_h(CPUMIPSState *env,
    432                        uint32_t wd, uint32_t ws, uint32_t wt)
    433{
    434    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    435    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    436    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    437
    438    pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
    439    pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
    440    pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
    441    pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
    442    pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
    443    pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
    444    pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
    445    pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
    446}
    447
    448void helper_msa_binsr_w(CPUMIPSState *env,
    449                        uint32_t wd, uint32_t ws, uint32_t wt)
    450{
    451    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    452    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    453    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    454
    455    pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
    456    pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
    457    pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
    458    pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
    459}
    460
    461void helper_msa_binsr_d(CPUMIPSState *env,
    462                        uint32_t wd, uint32_t ws, uint32_t wt)
    463{
    464    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    465    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    466    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    467
    468    pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
    469    pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
    470}
    471
    472void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    473{
    474    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    475    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    476    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    477
    478    pwd->d[0] = UNSIGNED(                                                     \
    479        ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
    480    pwd->d[1] = UNSIGNED(                                                     \
    481        ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
    482}
    483
    484void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    485{
    486    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    487    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    488    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    489
    490    pwd->d[0] = UNSIGNED(                                                     \
    491        ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
    492    pwd->d[1] = UNSIGNED(                                                     \
    493        ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
    494}
    495
    496void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    497{
    498    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    499    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    500    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    501
    502    pwd->d[0] = UNSIGNED(                                                     \
    503        (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
    504    pwd->d[1] = UNSIGNED(                                                     \
    505        (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
    506}
    507
    508
    509/*
    510 * Bit Set
    511 * -------
    512 *
    513 * +---------------+----------------------------------------------------------+
    514 * | BCLR.B        | Vector Bit Clear (byte)                                  |
    515 * | BCLR.H        | Vector Bit Clear (halfword)                              |
    516 * | BCLR.W        | Vector Bit Clear (word)                                  |
    517 * | BCLR.D        | Vector Bit Clear (doubleword)                            |
    518 * | BNEG.B        | Vector Bit Negate (byte)                                 |
    519 * | BNEG.H        | Vector Bit Negate (halfword)                             |
    520 * | BNEG.W        | Vector Bit Negate (word)                                 |
    521 * | BNEG.D        | Vector Bit Negate (doubleword)                           |
    522 * | BSET.B        | Vector Bit Set (byte)                                    |
    523 * | BSET.H        | Vector Bit Set (halfword)                                |
    524 * | BSET.W        | Vector Bit Set (word)                                    |
    525 * | BSET.D        | Vector Bit Set (doubleword)                              |
    526 * +---------------+----------------------------------------------------------+
    527 */
    528
    529static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
    530{
    531    int32_t b_arg2 = BIT_POSITION(arg2, df);
    532    return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
    533}
    534
    535void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    536{
    537    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    538    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    539    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    540
    541    pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    542    pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    543    pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    544    pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    545    pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    546    pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    547    pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    548    pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    549    pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    550    pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    551    pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
    552    pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
    553    pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
    554    pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
    555    pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
    556    pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
    557}
    558
    559void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    560{
    561    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    562    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    563    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    564
    565    pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
    566    pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
    567    pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
    568    pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
    569    pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
    570    pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
    571    pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
    572    pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
    573}
    574
    575void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    576{
    577    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    578    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    579    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    580
    581    pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
    582    pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
    583    pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
    584    pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
    585}
    586
    587void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    588{
    589    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    590    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    591    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    592
    593    pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    594    pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    595}
    596
    597static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
    598{
    599    int32_t b_arg2 = BIT_POSITION(arg2, df);
    600    return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
    601}
    602
    603void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    604{
    605    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    606    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    607    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    608
    609    pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    610    pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    611    pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    612    pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    613    pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    614    pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    615    pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    616    pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    617    pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    618    pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    619    pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
    620    pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
    621    pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
    622    pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
    623    pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
    624    pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
    625}
    626
    627void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    628{
    629    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    630    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    631    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    632
    633    pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
    634    pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
    635    pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
    636    pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
    637    pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
    638    pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
    639    pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
    640    pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
    641}
    642
    643void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    644{
    645    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    646    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    647    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    648
    649    pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
    650    pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
    651    pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
    652    pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
    653}
    654
    655void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    656{
    657    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    658    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    659    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    660
    661    pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    662    pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    663}
    664
    665static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
    666        int64_t arg2)
    667{
    668    int32_t b_arg2 = BIT_POSITION(arg2, df);
    669    return UNSIGNED(arg1 | (1LL << b_arg2), df);
    670}
    671
    672void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    673{
    674    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    675    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    676    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    677
    678    pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    679    pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    680    pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    681    pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    682    pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    683    pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    684    pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    685    pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    686    pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    687    pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    688    pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
    689    pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
    690    pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
    691    pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
    692    pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
    693    pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
    694}
    695
    696void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    697{
    698    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    699    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    700    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    701
    702    pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
    703    pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
    704    pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
    705    pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
    706    pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
    707    pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
    708    pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
    709    pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
    710}
    711
    712void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    713{
    714    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    715    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    716    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    717
    718    pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
    719    pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
    720    pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
    721    pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
    722}
    723
    724void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    725{
    726    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    727    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    728    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    729
    730    pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    731    pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    732}
    733
    734
    735/*
    736 * Fixed Multiply
    737 * --------------
    738 *
    739 * +---------------+----------------------------------------------------------+
    740 * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
    741 * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
    742 * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
    743 * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
    744 * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
    745 * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
    746 * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
    747 * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
    748 * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
    749 * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
    750 * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
    751 * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
    752 * +---------------+----------------------------------------------------------+
    753 */
    754
    755/* TODO: insert Fixed Multiply group helpers here */
    756
    757
    758/*
    759 * Float Max Min
    760 * -------------
    761 *
    762 * +---------------+----------------------------------------------------------+
    763 * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
    764 * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
    765 * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
    766 * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
    767 * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
    768 * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
    769 * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
    770 * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
    771 * +---------------+----------------------------------------------------------+
    772 */
    773
    774/* TODO: insert Float Max Min group helpers here */
    775
    776
    777/*
    778 * Int Add
    779 * -------
    780 *
    781 * +---------------+----------------------------------------------------------+
    782 * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
    783 * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
    784 * | ADD_A.W       | Vector Add Absolute Values (word)                        |
    785 * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
    786 * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
    787 * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
    788 * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
    789 * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
    790 * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
    791 * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
    792 * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
    793 * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
    794 * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
    795 * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
    796 * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
    797 * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
    798 * | ADDV.B        | Vector Add (byte)                                        |
    799 * | ADDV.H        | Vector Add (halfword)                                    |
    800 * | ADDV.W        | Vector Add (word)                                        |
    801 * | ADDV.D        | Vector Add (doubleword)                                  |
    802 * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
    803 * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
    804 * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
    805 * | HADD_U.H      | Vector Unigned Horizontal Add (halfword)                 |
    806 * | HADD_U.W      | Vector Unigned Horizontal Add (word)                     |
    807 * | HADD_U.D      | Vector Unigned Horizontal Add (doubleword)               |
    808 * +---------------+----------------------------------------------------------+
    809 */
    810
    811
    812static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
    813{
    814    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
    815    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
    816    return abs_arg1 + abs_arg2;
    817}
    818
    819void helper_msa_add_a_b(CPUMIPSState *env,
    820                        uint32_t wd, uint32_t ws, uint32_t wt)
    821{
    822    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    823    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    824    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    825
    826    pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    827    pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    828    pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    829    pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    830    pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    831    pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    832    pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    833    pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    834    pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    835    pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    836    pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
    837    pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
    838    pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
    839    pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
    840    pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
    841    pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
    842}
    843
    844void helper_msa_add_a_h(CPUMIPSState *env,
    845                        uint32_t wd, uint32_t ws, uint32_t wt)
    846{
    847    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    848    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    849    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    850
    851    pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
    852    pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
    853    pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
    854    pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
    855    pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
    856    pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
    857    pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
    858    pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
    859}
    860
    861void helper_msa_add_a_w(CPUMIPSState *env,
    862                        uint32_t wd, uint32_t ws, uint32_t wt)
    863{
    864    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    865    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    866    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    867
    868    pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
    869    pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
    870    pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
    871    pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
    872}
    873
    874void helper_msa_add_a_d(CPUMIPSState *env,
    875                        uint32_t wd, uint32_t ws, uint32_t wt)
    876{
    877    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    878    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    879    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    880
    881    pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    882    pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    883}
    884
    885
    886static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
    887{
    888    uint64_t max_int = (uint64_t)DF_MAX_INT(df);
    889    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
    890    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
    891    if (abs_arg1 > max_int || abs_arg2 > max_int) {
    892        return (int64_t)max_int;
    893    } else {
    894        return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
    895    }
    896}
    897
    898void helper_msa_adds_a_b(CPUMIPSState *env,
    899                         uint32_t wd, uint32_t ws, uint32_t wt)
    900{
    901    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    902    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    903    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    904
    905    pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    906    pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    907    pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    908    pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    909    pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    910    pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    911    pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    912    pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    913    pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    914    pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    915    pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
    916    pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
    917    pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
    918    pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
    919    pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
    920    pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
    921}
    922
    923void helper_msa_adds_a_h(CPUMIPSState *env,
    924                         uint32_t wd, uint32_t ws, uint32_t wt)
    925{
    926    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    927    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    928    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    929
    930    pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
    931    pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
    932    pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
    933    pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
    934    pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
    935    pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
    936    pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
    937    pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
    938}
    939
    940void helper_msa_adds_a_w(CPUMIPSState *env,
    941                         uint32_t wd, uint32_t ws, uint32_t wt)
    942{
    943    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    944    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    945    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    946
    947    pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
    948    pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
    949    pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
    950    pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
    951}
    952
    953void helper_msa_adds_a_d(CPUMIPSState *env,
    954                         uint32_t wd, uint32_t ws, uint32_t wt)
    955{
    956    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    957    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    958    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    959
    960    pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    961    pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    962}
    963
    964
    965static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
    966{
    967    int64_t max_int = DF_MAX_INT(df);
    968    int64_t min_int = DF_MIN_INT(df);
    969    if (arg1 < 0) {
    970        return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
    971    } else {
    972        return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
    973    }
    974}
    975
    976void helper_msa_adds_s_b(CPUMIPSState *env,
    977                         uint32_t wd, uint32_t ws, uint32_t wt)
    978{
    979    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    980    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    981    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    982
    983    pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    984    pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    985    pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    986    pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    987    pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    988    pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    989    pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    990    pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    991    pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    992    pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    993    pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
    994    pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
    995    pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
    996    pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
    997    pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
    998    pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
    999}
   1000
   1001void helper_msa_adds_s_h(CPUMIPSState *env,
   1002                         uint32_t wd, uint32_t ws, uint32_t wt)
   1003{
   1004    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1005    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1006    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1007
   1008    pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1009    pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1010    pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1011    pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1012    pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1013    pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1014    pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1015    pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1016}
   1017
   1018void helper_msa_adds_s_w(CPUMIPSState *env,
   1019                         uint32_t wd, uint32_t ws, uint32_t wt)
   1020{
   1021    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1022    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1023    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1024
   1025    pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1026    pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1027    pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1028    pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1029}
   1030
   1031void helper_msa_adds_s_d(CPUMIPSState *env,
   1032                         uint32_t wd, uint32_t ws, uint32_t wt)
   1033{
   1034    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1035    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1036    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1037
   1038    pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1039    pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1040}
   1041
   1042
   1043static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   1044{
   1045    uint64_t max_uint = DF_MAX_UINT(df);
   1046    uint64_t u_arg1 = UNSIGNED(arg1, df);
   1047    uint64_t u_arg2 = UNSIGNED(arg2, df);
   1048    return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
   1049}
   1050
   1051void helper_msa_adds_u_b(CPUMIPSState *env,
   1052                         uint32_t wd, uint32_t ws, uint32_t wt)
   1053{
   1054    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1055    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1056    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1057
   1058    pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1059    pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1060    pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1061    pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1062    pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1063    pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1064    pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1065    pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1066    pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1067    pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1068    pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1069    pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1070    pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1071    pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1072    pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1073    pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1074}
   1075
   1076void helper_msa_adds_u_h(CPUMIPSState *env,
   1077                         uint32_t wd, uint32_t ws, uint32_t wt)
   1078{
   1079    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1080    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1081    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1082
   1083    pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1084    pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1085    pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1086    pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1087    pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1088    pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1089    pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1090    pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1091}
   1092
   1093void helper_msa_adds_u_w(CPUMIPSState *env,
   1094                         uint32_t wd, uint32_t ws, uint32_t wt)
   1095{
   1096    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1097    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1098    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1099
   1100    pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1101    pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1102    pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1103    pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1104}
   1105
   1106void helper_msa_adds_u_d(CPUMIPSState *env,
   1107                         uint32_t wd, uint32_t ws, uint32_t wt)
   1108{
   1109    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1110    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1111    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1112
   1113    pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1114    pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1115}
   1116
   1117
   1118static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
   1119{
   1120    return arg1 + arg2;
   1121}
   1122
   1123void helper_msa_addv_b(CPUMIPSState *env,
   1124                       uint32_t wd, uint32_t ws, uint32_t wt)
   1125{
   1126    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1127    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1128    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1129
   1130    pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1131    pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1132    pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1133    pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1134    pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1135    pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1136    pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1137    pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1138    pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1139    pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1140    pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1141    pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1142    pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1143    pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1144    pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1145    pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1146}
   1147
   1148void helper_msa_addv_h(CPUMIPSState *env,
   1149                       uint32_t wd, uint32_t ws, uint32_t wt)
   1150{
   1151    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1152    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1153    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1154
   1155    pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1156    pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1157    pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1158    pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1159    pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1160    pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1161    pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1162    pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1163}
   1164
   1165void helper_msa_addv_w(CPUMIPSState *env,
   1166                       uint32_t wd, uint32_t ws, uint32_t wt)
   1167{
   1168    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1169    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1170    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1171
   1172    pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1173    pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1174    pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1175    pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1176}
   1177
   1178void helper_msa_addv_d(CPUMIPSState *env,
   1179                       uint32_t wd, uint32_t ws, uint32_t wt)
   1180{
   1181    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1182    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1183    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1184
   1185    pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1186    pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1187}
   1188
   1189
   1190#define SIGNED_EVEN(a, df) \
   1191        ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
   1192
   1193#define UNSIGNED_EVEN(a, df) \
   1194        ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
   1195
   1196#define SIGNED_ODD(a, df) \
   1197        ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
   1198
   1199#define UNSIGNED_ODD(a, df) \
   1200        ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
   1201
   1202
   1203static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1204{
   1205    return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
   1206}
   1207
   1208void helper_msa_hadd_s_h(CPUMIPSState *env,
   1209                         uint32_t wd, uint32_t ws, uint32_t wt)
   1210{
   1211    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1212    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1213    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1214
   1215    pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1216    pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1217    pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1218    pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1219    pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1220    pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1221    pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1222    pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1223}
   1224
   1225void helper_msa_hadd_s_w(CPUMIPSState *env,
   1226                         uint32_t wd, uint32_t ws, uint32_t wt)
   1227{
   1228    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1229    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1230    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1231
   1232    pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1233    pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1234    pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1235    pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1236}
   1237
   1238void helper_msa_hadd_s_d(CPUMIPSState *env,
   1239                         uint32_t wd, uint32_t ws, uint32_t wt)
   1240{
   1241    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1242    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1243    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1244
   1245    pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1246    pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1247}
   1248
   1249
   1250static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   1251{
   1252    return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
   1253}
   1254
   1255void helper_msa_hadd_u_h(CPUMIPSState *env,
   1256                         uint32_t wd, uint32_t ws, uint32_t wt)
   1257{
   1258    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1259    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1260    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1261
   1262    pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1263    pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1264    pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1265    pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1266    pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1267    pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1268    pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1269    pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1270}
   1271
   1272void helper_msa_hadd_u_w(CPUMIPSState *env,
   1273                         uint32_t wd, uint32_t ws, uint32_t wt)
   1274{
   1275    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1276    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1277    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1278
   1279    pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1280    pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1281    pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1282    pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1283}
   1284
   1285void helper_msa_hadd_u_d(CPUMIPSState *env,
   1286                         uint32_t wd, uint32_t ws, uint32_t wt)
   1287{
   1288    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1289    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1290    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1291
   1292    pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1293    pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1294}
   1295
   1296
   1297/*
   1298 * Int Average
   1299 * -----------
   1300 *
   1301 * +---------------+----------------------------------------------------------+
   1302 * | AVE_S.B       | Vector Signed Average (byte)                             |
   1303 * | AVE_S.H       | Vector Signed Average (halfword)                         |
   1304 * | AVE_S.W       | Vector Signed Average (word)                             |
   1305 * | AVE_S.D       | Vector Signed Average (doubleword)                       |
   1306 * | AVE_U.B       | Vector Unsigned Average (byte)                           |
   1307 * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
   1308 * | AVE_U.W       | Vector Unsigned Average (word)                           |
   1309 * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
   1310 * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
   1311 * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
   1312 * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
   1313 * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
   1314 * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
   1315 * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
   1316 * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
   1317 * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
   1318 * +---------------+----------------------------------------------------------+
   1319 */
   1320
   1321static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1322{
   1323    /* signed shift */
   1324    return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
   1325}
   1326
   1327void helper_msa_ave_s_b(CPUMIPSState *env,
   1328                        uint32_t wd, uint32_t ws, uint32_t wt)
   1329{
   1330    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1331    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1332    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1333
   1334    pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1335    pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1336    pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1337    pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1338    pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1339    pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1340    pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1341    pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1342    pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1343    pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1344    pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1345    pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1346    pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1347    pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1348    pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1349    pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1350}
   1351
   1352void helper_msa_ave_s_h(CPUMIPSState *env,
   1353                        uint32_t wd, uint32_t ws, uint32_t wt)
   1354{
   1355    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1356    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1357    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1358
   1359    pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1360    pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1361    pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1362    pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1363    pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1364    pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1365    pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1366    pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1367}
   1368
   1369void helper_msa_ave_s_w(CPUMIPSState *env,
   1370                        uint32_t wd, uint32_t ws, uint32_t wt)
   1371{
   1372    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1373    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1374    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1375
   1376    pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1377    pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1378    pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1379    pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1380}
   1381
   1382void helper_msa_ave_s_d(CPUMIPSState *env,
   1383                        uint32_t wd, uint32_t ws, uint32_t wt)
   1384{
   1385    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1386    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1387    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1388
   1389    pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1390    pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1391}
   1392
   1393static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   1394{
   1395    uint64_t u_arg1 = UNSIGNED(arg1, df);
   1396    uint64_t u_arg2 = UNSIGNED(arg2, df);
   1397    /* unsigned shift */
   1398    return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
   1399}
   1400
   1401void helper_msa_ave_u_b(CPUMIPSState *env,
   1402                        uint32_t wd, uint32_t ws, uint32_t wt)
   1403{
   1404    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1405    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1406    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1407
   1408    pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1409    pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1410    pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1411    pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1412    pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1413    pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1414    pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1415    pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1416    pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1417    pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1418    pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1419    pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1420    pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1421    pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1422    pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1423    pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1424}
   1425
   1426void helper_msa_ave_u_h(CPUMIPSState *env,
   1427                        uint32_t wd, uint32_t ws, uint32_t wt)
   1428{
   1429    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1430    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1431    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1432
   1433    pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1434    pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1435    pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1436    pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1437    pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1438    pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1439    pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1440    pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1441}
   1442
   1443void helper_msa_ave_u_w(CPUMIPSState *env,
   1444                        uint32_t wd, uint32_t ws, uint32_t wt)
   1445{
   1446    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1447    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1448    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1449
   1450    pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1451    pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1452    pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1453    pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1454}
   1455
   1456void helper_msa_ave_u_d(CPUMIPSState *env,
   1457                        uint32_t wd, uint32_t ws, uint32_t wt)
   1458{
   1459    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1460    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1461    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1462
   1463    pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1464    pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1465}
   1466
   1467static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1468{
   1469    /* signed shift */
   1470    return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
   1471}
   1472
   1473void helper_msa_aver_s_b(CPUMIPSState *env,
   1474                         uint32_t wd, uint32_t ws, uint32_t wt)
   1475{
   1476    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1477    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1478    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1479
   1480    pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1481    pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1482    pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1483    pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1484    pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1485    pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1486    pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1487    pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1488    pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1489    pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1490    pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1491    pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1492    pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1493    pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1494    pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1495    pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1496}
   1497
   1498void helper_msa_aver_s_h(CPUMIPSState *env,
   1499                         uint32_t wd, uint32_t ws, uint32_t wt)
   1500{
   1501    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1502    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1503    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1504
   1505    pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1506    pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1507    pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1508    pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1509    pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1510    pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1511    pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1512    pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1513}
   1514
   1515void helper_msa_aver_s_w(CPUMIPSState *env,
   1516                         uint32_t wd, uint32_t ws, uint32_t wt)
   1517{
   1518    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1519    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1520    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1521
   1522    pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1523    pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1524    pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1525    pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1526}
   1527
   1528void helper_msa_aver_s_d(CPUMIPSState *env,
   1529                         uint32_t wd, uint32_t ws, uint32_t wt)
   1530{
   1531    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1532    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1533    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1534
   1535    pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1536    pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1537}
   1538
   1539static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   1540{
   1541    uint64_t u_arg1 = UNSIGNED(arg1, df);
   1542    uint64_t u_arg2 = UNSIGNED(arg2, df);
   1543    /* unsigned shift */
   1544    return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
   1545}
   1546
   1547void helper_msa_aver_u_b(CPUMIPSState *env,
   1548                         uint32_t wd, uint32_t ws, uint32_t wt)
   1549{
   1550    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1551    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1552    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1553
   1554    pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1555    pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1556    pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1557    pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1558    pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1559    pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1560    pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1561    pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1562    pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1563    pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1564    pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1565    pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1566    pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1567    pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1568    pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1569    pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1570}
   1571
   1572void helper_msa_aver_u_h(CPUMIPSState *env,
   1573                         uint32_t wd, uint32_t ws, uint32_t wt)
   1574{
   1575    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1576    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1577    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1578
   1579    pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1580    pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1581    pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1582    pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1583    pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1584    pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1585    pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1586    pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1587}
   1588
   1589void helper_msa_aver_u_w(CPUMIPSState *env,
   1590                         uint32_t wd, uint32_t ws, uint32_t wt)
   1591{
   1592    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1593    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1594    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1595
   1596    pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1597    pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1598    pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1599    pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1600}
   1601
   1602void helper_msa_aver_u_d(CPUMIPSState *env,
   1603                         uint32_t wd, uint32_t ws, uint32_t wt)
   1604{
   1605    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1606    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1607    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1608
   1609    pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1610    pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1611}
   1612
   1613
   1614/*
   1615 * Int Compare
   1616 * -----------
   1617 *
   1618 * +---------------+----------------------------------------------------------+
   1619 * | CEQ.B         | Vector Compare Equal (byte)                              |
   1620 * | CEQ.H         | Vector Compare Equal (halfword)                          |
   1621 * | CEQ.W         | Vector Compare Equal (word)                              |
   1622 * | CEQ.D         | Vector Compare Equal (doubleword)                        |
   1623 * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
   1624 * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
   1625 * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
   1626 * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
   1627 * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
   1628 * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
   1629 * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
   1630 * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
   1631 * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
   1632 * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
   1633 * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
   1634 * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
   1635 * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
   1636 * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
   1637 * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
   1638 * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
   1639 * +---------------+----------------------------------------------------------+
   1640 */
   1641
   1642static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
   1643{
   1644    return arg1 == arg2 ? -1 : 0;
   1645}
   1646
   1647static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
   1648{
   1649    return arg1 == arg2 ? -1 : 0;
   1650}
   1651
   1652void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1653{
   1654    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1655    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1656    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1657
   1658    pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
   1659    pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
   1660    pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
   1661    pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
   1662    pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
   1663    pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
   1664    pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
   1665    pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
   1666    pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
   1667    pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
   1668    pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
   1669    pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
   1670    pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
   1671    pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
   1672    pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
   1673    pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
   1674}
   1675
   1676static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
   1677{
   1678    return arg1 == arg2 ? -1 : 0;
   1679}
   1680
   1681void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1682{
   1683    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1684    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1685    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1686
   1687    pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
   1688    pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
   1689    pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
   1690    pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
   1691    pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
   1692    pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
   1693    pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
   1694    pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
   1695}
   1696
   1697static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
   1698{
   1699    return arg1 == arg2 ? -1 : 0;
   1700}
   1701
   1702void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1703{
   1704    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1705    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1706    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1707
   1708    pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
   1709    pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
   1710    pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
   1711    pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
   1712}
   1713
   1714static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
   1715{
   1716    return arg1 == arg2 ? -1 : 0;
   1717}
   1718
   1719void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1720{
   1721    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1722    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1723    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1724
   1725    pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
   1726    pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
   1727}
   1728
   1729static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1730{
   1731    return arg1 <= arg2 ? -1 : 0;
   1732}
   1733
   1734void helper_msa_cle_s_b(CPUMIPSState *env,
   1735                        uint32_t wd, uint32_t ws, uint32_t wt)
   1736{
   1737    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1738    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1739    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1740
   1741    pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1742    pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1743    pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1744    pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1745    pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1746    pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1747    pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1748    pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1749    pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1750    pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1751    pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1752    pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1753    pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1754    pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1755    pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1756    pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1757}
   1758
   1759void helper_msa_cle_s_h(CPUMIPSState *env,
   1760                        uint32_t wd, uint32_t ws, uint32_t wt)
   1761{
   1762    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1763    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1764    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1765
   1766    pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1767    pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1768    pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1769    pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1770    pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1771    pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1772    pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1773    pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1774}
   1775
   1776void helper_msa_cle_s_w(CPUMIPSState *env,
   1777                        uint32_t wd, uint32_t ws, uint32_t wt)
   1778{
   1779    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1780    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1781    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1782
   1783    pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1784    pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1785    pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1786    pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1787}
   1788
   1789void helper_msa_cle_s_d(CPUMIPSState *env,
   1790                        uint32_t wd, uint32_t ws, uint32_t wt)
   1791{
   1792    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1793    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1794    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1795
   1796    pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1797    pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1798}
   1799
   1800static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   1801{
   1802    uint64_t u_arg1 = UNSIGNED(arg1, df);
   1803    uint64_t u_arg2 = UNSIGNED(arg2, df);
   1804    return u_arg1 <= u_arg2 ? -1 : 0;
   1805}
   1806
   1807void helper_msa_cle_u_b(CPUMIPSState *env,
   1808                        uint32_t wd, uint32_t ws, uint32_t wt)
   1809{
   1810    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1811    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1812    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1813
   1814    pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1815    pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1816    pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1817    pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1818    pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1819    pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1820    pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1821    pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1822    pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1823    pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1824    pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1825    pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1826    pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1827    pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1828    pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1829    pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1830}
   1831
   1832void helper_msa_cle_u_h(CPUMIPSState *env,
   1833                        uint32_t wd, uint32_t ws, uint32_t wt)
   1834{
   1835    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1836    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1837    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1838
   1839    pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1840    pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1841    pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1842    pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1843    pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1844    pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1845    pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1846    pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1847}
   1848
   1849void helper_msa_cle_u_w(CPUMIPSState *env,
   1850                        uint32_t wd, uint32_t ws, uint32_t wt)
   1851{
   1852    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1853    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1854    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1855
   1856    pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1857    pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1858    pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1859    pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1860}
   1861
   1862void helper_msa_cle_u_d(CPUMIPSState *env,
   1863                        uint32_t wd, uint32_t ws, uint32_t wt)
   1864{
   1865    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1866    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1867    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1868
   1869    pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1870    pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1871}
   1872
   1873static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1874{
   1875    return arg1 < arg2 ? -1 : 0;
   1876}
   1877
   1878static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
   1879{
   1880    return arg1 < arg2 ? -1 : 0;
   1881}
   1882
   1883void helper_msa_clt_s_b(CPUMIPSState *env,
   1884                        uint32_t wd, uint32_t ws, uint32_t wt)
   1885{
   1886    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1887    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1888    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1889
   1890    pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
   1891    pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
   1892    pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
   1893    pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
   1894    pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
   1895    pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
   1896    pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
   1897    pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
   1898    pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
   1899    pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
   1900    pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
   1901    pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
   1902    pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
   1903    pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
   1904    pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
   1905    pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
   1906}
   1907
   1908static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
   1909{
   1910    return arg1 < arg2 ? -1 : 0;
   1911}
   1912
   1913void helper_msa_clt_s_h(CPUMIPSState *env,
   1914                        uint32_t wd, uint32_t ws, uint32_t wt)
   1915{
   1916    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1917    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1918    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1919
   1920    pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
   1921    pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
   1922    pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
   1923    pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
   1924    pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
   1925    pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
   1926    pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
   1927    pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
   1928}
   1929
   1930static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
   1931{
   1932    return arg1 < arg2 ? -1 : 0;
   1933}
   1934
   1935void helper_msa_clt_s_w(CPUMIPSState *env,
   1936                        uint32_t wd, uint32_t ws, uint32_t wt)
   1937{
   1938    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1939    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1940    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1941
   1942    pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
   1943    pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
   1944    pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
   1945    pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
   1946}
   1947
   1948static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
   1949{
   1950    return arg1 < arg2 ? -1 : 0;
   1951}
   1952
   1953void helper_msa_clt_s_d(CPUMIPSState *env,
   1954                        uint32_t wd, uint32_t ws, uint32_t wt)
   1955{
   1956    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1957    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1958    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1959
   1960    pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
   1961    pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
   1962}
   1963
   1964static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   1965{
   1966    uint64_t u_arg1 = UNSIGNED(arg1, df);
   1967    uint64_t u_arg2 = UNSIGNED(arg2, df);
   1968    return u_arg1 < u_arg2 ? -1 : 0;
   1969}
   1970
   1971void helper_msa_clt_u_b(CPUMIPSState *env,
   1972                        uint32_t wd, uint32_t ws, uint32_t wt)
   1973{
   1974    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1975    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1976    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1977
   1978    pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1979    pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1980    pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1981    pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1982    pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1983    pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1984    pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1985    pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1986    pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1987    pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1988    pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1989    pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1990    pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1991    pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1992    pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1993    pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1994}
   1995
   1996void helper_msa_clt_u_h(CPUMIPSState *env,
   1997                        uint32_t wd, uint32_t ws, uint32_t wt)
   1998{
   1999    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2000    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2001    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2002
   2003    pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2004    pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2005    pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2006    pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2007    pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2008    pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2009    pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2010    pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2011}
   2012
   2013void helper_msa_clt_u_w(CPUMIPSState *env,
   2014                        uint32_t wd, uint32_t ws, uint32_t wt)
   2015{
   2016    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2017    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2018    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2019
   2020    pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2021    pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2022    pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2023    pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2024}
   2025
   2026void helper_msa_clt_u_d(CPUMIPSState *env,
   2027                        uint32_t wd, uint32_t ws, uint32_t wt)
   2028{
   2029    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2030    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2031    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2032
   2033    pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2034    pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2035}
   2036
   2037
   2038/*
   2039 * Int Divide
   2040 * ----------
   2041 *
   2042 * +---------------+----------------------------------------------------------+
   2043 * | DIV_S.B       | Vector Signed Divide (byte)                              |
   2044 * | DIV_S.H       | Vector Signed Divide (halfword)                          |
   2045 * | DIV_S.W       | Vector Signed Divide (word)                              |
   2046 * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
   2047 * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
   2048 * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
   2049 * | DIV_U.W       | Vector Unsigned Divide (word)                            |
   2050 * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
   2051 * +---------------+----------------------------------------------------------+
   2052 */
   2053
   2054
   2055static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2056{
   2057    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
   2058        return DF_MIN_INT(df);
   2059    }
   2060    return arg2 ? arg1 / arg2
   2061                : arg1 >= 0 ? -1 : 1;
   2062}
   2063
   2064void helper_msa_div_s_b(CPUMIPSState *env,
   2065                        uint32_t wd, uint32_t ws, uint32_t wt)
   2066{
   2067    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2068    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2069    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2070
   2071    pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2072    pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2073    pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2074    pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2075    pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2076    pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2077    pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2078    pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2079    pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2080    pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2081    pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2082    pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2083    pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2084    pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2085    pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2086    pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2087}
   2088
   2089void helper_msa_div_s_h(CPUMIPSState *env,
   2090                        uint32_t wd, uint32_t ws, uint32_t wt)
   2091{
   2092    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2093    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2094    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2095
   2096    pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2097    pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2098    pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2099    pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2100    pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2101    pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2102    pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2103    pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2104}
   2105
   2106void helper_msa_div_s_w(CPUMIPSState *env,
   2107                        uint32_t wd, uint32_t ws, uint32_t wt)
   2108{
   2109    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2110    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2111    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2112
   2113    pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2114    pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2115    pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2116    pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2117}
   2118
   2119void helper_msa_div_s_d(CPUMIPSState *env,
   2120                        uint32_t wd, uint32_t ws, uint32_t wt)
   2121{
   2122    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2123    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2124    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2125
   2126    pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2127    pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2128}
   2129
   2130static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2131{
   2132    uint64_t u_arg1 = UNSIGNED(arg1, df);
   2133    uint64_t u_arg2 = UNSIGNED(arg2, df);
   2134    return arg2 ? u_arg1 / u_arg2 : -1;
   2135}
   2136
   2137void helper_msa_div_u_b(CPUMIPSState *env,
   2138                        uint32_t wd, uint32_t ws, uint32_t wt)
   2139{
   2140    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2141    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2142    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2143
   2144    pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2145    pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2146    pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2147    pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2148    pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2149    pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2150    pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2151    pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2152    pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2153    pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2154    pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2155    pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2156    pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2157    pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2158    pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2159    pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2160}
   2161
   2162void helper_msa_div_u_h(CPUMIPSState *env,
   2163                        uint32_t wd, uint32_t ws, uint32_t wt)
   2164{
   2165    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2166    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2167    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2168
   2169    pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2170    pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2171    pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2172    pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2173    pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2174    pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2175    pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2176    pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2177}
   2178
   2179void helper_msa_div_u_w(CPUMIPSState *env,
   2180                        uint32_t wd, uint32_t ws, uint32_t wt)
   2181{
   2182    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2183    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2184    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2185
   2186    pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2187    pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2188    pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2189    pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2190}
   2191
   2192void helper_msa_div_u_d(CPUMIPSState *env,
   2193                        uint32_t wd, uint32_t ws, uint32_t wt)
   2194{
   2195    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2196    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2197    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2198
   2199    pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2200    pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2201}
   2202
   2203
   2204/*
   2205 * Int Dot Product
   2206 * ---------------
   2207 *
   2208 * +---------------+----------------------------------------------------------+
   2209 * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
   2210 * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
   2211 * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
   2212 * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
   2213 * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
   2214 * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
   2215 * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
   2216 * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
   2217 * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
   2218 * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
   2219 * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
   2220 * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
   2221 * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
   2222 * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
   2223 * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
   2224 * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
   2225 * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
   2226 * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
   2227 * +---------------+----------------------------------------------------------+
   2228 */
   2229
   2230#define SIGNED_EXTRACT(e, o, a, df)     \
   2231    do {                                \
   2232        e = SIGNED_EVEN(a, df);         \
   2233        o = SIGNED_ODD(a, df);          \
   2234    } while (0)
   2235
   2236#define UNSIGNED_EXTRACT(e, o, a, df)   \
   2237    do {                                \
   2238        e = UNSIGNED_EVEN(a, df);       \
   2239        o = UNSIGNED_ODD(a, df);        \
   2240    } while (0)
   2241
   2242
   2243static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2244{
   2245    int64_t even_arg1;
   2246    int64_t even_arg2;
   2247    int64_t odd_arg1;
   2248    int64_t odd_arg2;
   2249    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2250    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2251    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2252}
   2253
   2254void helper_msa_dotp_s_h(CPUMIPSState *env,
   2255                         uint32_t wd, uint32_t ws, uint32_t wt)
   2256{
   2257    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2258    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2259    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2260
   2261    pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2262    pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2263    pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2264    pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2265    pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2266    pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2267    pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2268    pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2269}
   2270
   2271void helper_msa_dotp_s_w(CPUMIPSState *env,
   2272                         uint32_t wd, uint32_t ws, uint32_t wt)
   2273{
   2274    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2275    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2276    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2277
   2278    pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2279    pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2280    pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2281    pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2282}
   2283
   2284void helper_msa_dotp_s_d(CPUMIPSState *env,
   2285                         uint32_t wd, uint32_t ws, uint32_t wt)
   2286{
   2287    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2288    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2289    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2290
   2291    pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2292    pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2293}
   2294
   2295
   2296static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2297{
   2298    int64_t even_arg1;
   2299    int64_t even_arg2;
   2300    int64_t odd_arg1;
   2301    int64_t odd_arg2;
   2302    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2303    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2304    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2305}
   2306
   2307void helper_msa_dotp_u_h(CPUMIPSState *env,
   2308                         uint32_t wd, uint32_t ws, uint32_t wt)
   2309{
   2310    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2311    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2312    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2313
   2314    pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2315    pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2316    pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2317    pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2318    pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2319    pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2320    pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2321    pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2322}
   2323
   2324void helper_msa_dotp_u_w(CPUMIPSState *env,
   2325                         uint32_t wd, uint32_t ws, uint32_t wt)
   2326{
   2327    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2328    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2329    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2330
   2331    pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2332    pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2333    pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2334    pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2335}
   2336
   2337void helper_msa_dotp_u_d(CPUMIPSState *env,
   2338                         uint32_t wd, uint32_t ws, uint32_t wt)
   2339{
   2340    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2341    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2342    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2343
   2344    pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2345    pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2346}
   2347
   2348
   2349static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
   2350                                     int64_t arg2)
   2351{
   2352    int64_t even_arg1;
   2353    int64_t even_arg2;
   2354    int64_t odd_arg1;
   2355    int64_t odd_arg2;
   2356    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2357    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2358    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2359}
   2360
   2361void helper_msa_dpadd_s_h(CPUMIPSState *env,
   2362                          uint32_t wd, uint32_t ws, uint32_t wt)
   2363{
   2364    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2365    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2366    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2367
   2368    pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2369    pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2370    pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2371    pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2372    pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2373    pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2374    pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2375    pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2376}
   2377
   2378void helper_msa_dpadd_s_w(CPUMIPSState *env,
   2379                          uint32_t wd, uint32_t ws, uint32_t wt)
   2380{
   2381    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2382    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2383    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2384
   2385    pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2386    pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2387    pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2388    pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2389}
   2390
   2391void helper_msa_dpadd_s_d(CPUMIPSState *env,
   2392                          uint32_t wd, uint32_t ws, uint32_t wt)
   2393{
   2394    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2395    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2396    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2397
   2398    pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2399    pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2400}
   2401
   2402
   2403static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
   2404                                     int64_t arg2)
   2405{
   2406    int64_t even_arg1;
   2407    int64_t even_arg2;
   2408    int64_t odd_arg1;
   2409    int64_t odd_arg2;
   2410    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2411    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2412    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2413}
   2414
   2415void helper_msa_dpadd_u_h(CPUMIPSState *env,
   2416                          uint32_t wd, uint32_t ws, uint32_t wt)
   2417{
   2418    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2419    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2420    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2421
   2422    pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2423    pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2424    pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2425    pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2426    pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2427    pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2428    pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2429    pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2430}
   2431
   2432void helper_msa_dpadd_u_w(CPUMIPSState *env,
   2433                          uint32_t wd, uint32_t ws, uint32_t wt)
   2434{
   2435    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2436    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2437    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2438
   2439    pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2440    pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2441    pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2442    pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2443}
   2444
   2445void helper_msa_dpadd_u_d(CPUMIPSState *env,
   2446                          uint32_t wd, uint32_t ws, uint32_t wt)
   2447{
   2448    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2449    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2450    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2451
   2452    pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2453    pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2454}
   2455
   2456
   2457static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
   2458                                     int64_t arg2)
   2459{
   2460    int64_t even_arg1;
   2461    int64_t even_arg2;
   2462    int64_t odd_arg1;
   2463    int64_t odd_arg2;
   2464    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2465    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2466    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
   2467}
   2468
   2469void helper_msa_dpsub_s_h(CPUMIPSState *env,
   2470                          uint32_t wd, uint32_t ws, uint32_t wt)
   2471{
   2472    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2473    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2474    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2475
   2476    pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2477    pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2478    pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2479    pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2480    pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2481    pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2482    pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2483    pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2484}
   2485
   2486void helper_msa_dpsub_s_w(CPUMIPSState *env,
   2487                          uint32_t wd, uint32_t ws, uint32_t wt)
   2488{
   2489    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2490    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2491    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2492
   2493    pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2494    pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2495    pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2496    pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2497}
   2498
   2499void helper_msa_dpsub_s_d(CPUMIPSState *env,
   2500                          uint32_t wd, uint32_t ws, uint32_t wt)
   2501{
   2502    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2503    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2504    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2505
   2506    pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2507    pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2508}
   2509
   2510
   2511static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
   2512                                     int64_t arg2)
   2513{
   2514    int64_t even_arg1;
   2515    int64_t even_arg2;
   2516    int64_t odd_arg1;
   2517    int64_t odd_arg2;
   2518    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2519    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2520    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
   2521}
   2522
   2523void helper_msa_dpsub_u_h(CPUMIPSState *env,
   2524                          uint32_t wd, uint32_t ws, uint32_t wt)
   2525{
   2526    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2527    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2528    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2529
   2530    pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2531    pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2532    pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2533    pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2534    pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2535    pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2536    pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2537    pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2538}
   2539
   2540void helper_msa_dpsub_u_w(CPUMIPSState *env,
   2541                          uint32_t wd, uint32_t ws, uint32_t wt)
   2542{
   2543    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2544    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2545    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2546
   2547    pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2548    pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2549    pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2550    pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2551}
   2552
   2553void helper_msa_dpsub_u_d(CPUMIPSState *env,
   2554                          uint32_t wd, uint32_t ws, uint32_t wt)
   2555{
   2556    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2557    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2558    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2559
   2560    pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2561    pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2562}
   2563
   2564
   2565/*
   2566 * Int Max Min
   2567 * -----------
   2568 *
   2569 * +---------------+----------------------------------------------------------+
   2570 * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
   2571 * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
   2572 * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
   2573 * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
   2574 * | MAX_S.B       | Vector Signed Maximum (byte)                             |
   2575 * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
   2576 * | MAX_S.W       | Vector Signed Maximum (word)                             |
   2577 * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
   2578 * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
   2579 * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
   2580 * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
   2581 * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
   2582 * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
   2583 * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
   2584 * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
   2585 * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
   2586 * | MIN_S.B       | Vector Signed Minimum (byte)                             |
   2587 * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
   2588 * | MIN_S.W       | Vector Signed Minimum (word)                             |
   2589 * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
   2590 * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
   2591 * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
   2592 * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
   2593 * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
   2594 * +---------------+----------------------------------------------------------+
   2595 */
   2596
   2597static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
   2598{
   2599    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
   2600    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
   2601    return abs_arg1 > abs_arg2 ? arg1 : arg2;
   2602}
   2603
   2604void helper_msa_max_a_b(CPUMIPSState *env,
   2605                        uint32_t wd, uint32_t ws, uint32_t wt)
   2606{
   2607    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2608    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2609    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2610
   2611    pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2612    pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2613    pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2614    pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2615    pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2616    pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2617    pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2618    pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2619    pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2620    pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2621    pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2622    pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2623    pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2624    pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2625    pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2626    pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2627}
   2628
   2629void helper_msa_max_a_h(CPUMIPSState *env,
   2630                        uint32_t wd, uint32_t ws, uint32_t wt)
   2631{
   2632    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2633    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2634    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2635
   2636    pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2637    pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2638    pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2639    pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2640    pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2641    pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2642    pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2643    pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2644}
   2645
   2646void helper_msa_max_a_w(CPUMIPSState *env,
   2647                        uint32_t wd, uint32_t ws, uint32_t wt)
   2648{
   2649    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2650    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2651    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2652
   2653    pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2654    pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2655    pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2656    pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2657}
   2658
   2659void helper_msa_max_a_d(CPUMIPSState *env,
   2660                        uint32_t wd, uint32_t ws, uint32_t wt)
   2661{
   2662    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2663    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2664    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2665
   2666    pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2667    pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2668}
   2669
   2670
   2671static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2672{
   2673    return arg1 > arg2 ? arg1 : arg2;
   2674}
   2675
   2676void helper_msa_max_s_b(CPUMIPSState *env,
   2677                        uint32_t wd, uint32_t ws, uint32_t wt)
   2678{
   2679    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2680    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2681    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2682
   2683    pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2684    pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2685    pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2686    pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2687    pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2688    pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2689    pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2690    pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2691    pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2692    pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2693    pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2694    pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2695    pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2696    pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2697    pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2698    pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2699}
   2700
   2701void helper_msa_max_s_h(CPUMIPSState *env,
   2702                        uint32_t wd, uint32_t ws, uint32_t wt)
   2703{
   2704    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2705    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2706    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2707
   2708    pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2709    pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2710    pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2711    pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2712    pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2713    pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2714    pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2715    pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2716}
   2717
   2718void helper_msa_max_s_w(CPUMIPSState *env,
   2719                        uint32_t wd, uint32_t ws, uint32_t wt)
   2720{
   2721    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2722    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2723    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2724
   2725    pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2726    pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2727    pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2728    pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2729}
   2730
   2731void helper_msa_max_s_d(CPUMIPSState *env,
   2732                        uint32_t wd, uint32_t ws, uint32_t wt)
   2733{
   2734    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2735    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2736    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2737
   2738    pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2739    pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2740}
   2741
   2742
   2743static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2744{
   2745    uint64_t u_arg1 = UNSIGNED(arg1, df);
   2746    uint64_t u_arg2 = UNSIGNED(arg2, df);
   2747    return u_arg1 > u_arg2 ? arg1 : arg2;
   2748}
   2749
   2750void helper_msa_max_u_b(CPUMIPSState *env,
   2751                        uint32_t wd, uint32_t ws, uint32_t wt)
   2752{
   2753    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2754    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2755    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2756
   2757    pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2758    pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2759    pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2760    pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2761    pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2762    pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2763    pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2764    pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2765    pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2766    pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2767    pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2768    pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2769    pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2770    pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2771    pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2772    pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2773}
   2774
   2775void helper_msa_max_u_h(CPUMIPSState *env,
   2776                        uint32_t wd, uint32_t ws, uint32_t wt)
   2777{
   2778    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2779    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2780    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2781
   2782    pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2783    pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2784    pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2785    pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2786    pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2787    pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2788    pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2789    pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2790}
   2791
   2792void helper_msa_max_u_w(CPUMIPSState *env,
   2793                        uint32_t wd, uint32_t ws, uint32_t wt)
   2794{
   2795    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2796    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2797    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2798
   2799    pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2800    pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2801    pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2802    pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2803}
   2804
   2805void helper_msa_max_u_d(CPUMIPSState *env,
   2806                        uint32_t wd, uint32_t ws, uint32_t wt)
   2807{
   2808    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2809    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2810    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2811
   2812    pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2813    pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2814}
   2815
   2816
   2817static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
   2818{
   2819    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
   2820    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
   2821    return abs_arg1 < abs_arg2 ? arg1 : arg2;
   2822}
   2823
   2824void helper_msa_min_a_b(CPUMIPSState *env,
   2825                        uint32_t wd, uint32_t ws, uint32_t wt)
   2826{
   2827    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2828    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2829    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2830
   2831    pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2832    pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2833    pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2834    pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2835    pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2836    pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2837    pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2838    pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2839    pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2840    pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2841    pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2842    pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2843    pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2844    pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2845    pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2846    pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2847}
   2848
   2849void helper_msa_min_a_h(CPUMIPSState *env,
   2850                        uint32_t wd, uint32_t ws, uint32_t wt)
   2851{
   2852    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2853    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2854    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2855
   2856    pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2857    pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2858    pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2859    pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2860    pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2861    pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2862    pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2863    pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2864}
   2865
   2866void helper_msa_min_a_w(CPUMIPSState *env,
   2867                        uint32_t wd, uint32_t ws, uint32_t wt)
   2868{
   2869    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2870    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2871    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2872
   2873    pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2874    pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2875    pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2876    pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2877}
   2878
   2879void helper_msa_min_a_d(CPUMIPSState *env,
   2880                        uint32_t wd, uint32_t ws, uint32_t wt)
   2881{
   2882    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2883    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2884    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2885
   2886    pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2887    pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2888}
   2889
   2890
   2891static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2892{
   2893    return arg1 < arg2 ? arg1 : arg2;
   2894}
   2895
   2896void helper_msa_min_s_b(CPUMIPSState *env,
   2897                        uint32_t wd, uint32_t ws, uint32_t wt)
   2898{
   2899    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2900    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2901    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2902
   2903    pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2904    pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2905    pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2906    pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2907    pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2908    pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2909    pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2910    pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2911    pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2912    pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2913    pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2914    pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2915    pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2916    pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2917    pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2918    pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2919}
   2920
   2921void helper_msa_min_s_h(CPUMIPSState *env,
   2922                        uint32_t wd, uint32_t ws, uint32_t wt)
   2923{
   2924    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2925    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2926    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2927
   2928    pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2929    pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2930    pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2931    pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2932    pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2933    pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2934    pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2935    pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2936}
   2937
   2938void helper_msa_min_s_w(CPUMIPSState *env,
   2939                        uint32_t wd, uint32_t ws, uint32_t wt)
   2940{
   2941    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2942    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2943    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2944
   2945    pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2946    pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2947    pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2948    pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2949}
   2950
   2951void helper_msa_min_s_d(CPUMIPSState *env,
   2952                        uint32_t wd, uint32_t ws, uint32_t wt)
   2953{
   2954    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2955    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2956    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2957
   2958    pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2959    pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2960}
   2961
   2962
   2963static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2964{
   2965    uint64_t u_arg1 = UNSIGNED(arg1, df);
   2966    uint64_t u_arg2 = UNSIGNED(arg2, df);
   2967    return u_arg1 < u_arg2 ? arg1 : arg2;
   2968}
   2969
   2970void helper_msa_min_u_b(CPUMIPSState *env,
   2971                        uint32_t wd, uint32_t ws, uint32_t wt)
   2972{
   2973    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2974    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2975    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2976
   2977    pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2978    pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2979    pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2980    pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2981    pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2982    pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2983    pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2984    pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2985    pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2986    pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2987    pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2988    pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2989    pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2990    pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2991    pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2992    pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2993}
   2994
   2995void helper_msa_min_u_h(CPUMIPSState *env,
   2996                        uint32_t wd, uint32_t ws, uint32_t wt)
   2997{
   2998    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2999    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3000    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3001
   3002    pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3003    pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3004    pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3005    pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3006    pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3007    pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3008    pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3009    pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3010}
   3011
   3012void helper_msa_min_u_w(CPUMIPSState *env,
   3013                        uint32_t wd, uint32_t ws, uint32_t wt)
   3014{
   3015    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3016    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3017    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3018
   3019    pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3020    pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3021    pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3022    pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3023}
   3024
   3025void helper_msa_min_u_d(CPUMIPSState *env,
   3026                        uint32_t wd, uint32_t ws, uint32_t wt)
   3027{
   3028    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3029    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3030    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3031
   3032    pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3033    pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3034}
   3035
   3036
   3037/*
   3038 * Int Modulo
   3039 * ----------
   3040 *
   3041 * +---------------+----------------------------------------------------------+
   3042 * | MOD_S.B       | Vector Signed Modulo (byte)                              |
   3043 * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
   3044 * | MOD_S.W       | Vector Signed Modulo (word)                              |
   3045 * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
   3046 * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
   3047 * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
   3048 * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
   3049 * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
   3050 * +---------------+----------------------------------------------------------+
   3051 */
   3052
   3053static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3054{
   3055    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
   3056        return 0;
   3057    }
   3058    return arg2 ? arg1 % arg2 : arg1;
   3059}
   3060
   3061void helper_msa_mod_s_b(CPUMIPSState *env,
   3062                        uint32_t wd, uint32_t ws, uint32_t wt)
   3063{
   3064    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3065    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3066    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3067
   3068    pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3069    pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3070    pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3071    pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3072    pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3073    pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3074    pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3075    pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3076    pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3077    pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3078    pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3079    pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3080    pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3081    pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3082    pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3083    pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3084}
   3085
   3086void helper_msa_mod_s_h(CPUMIPSState *env,
   3087                        uint32_t wd, uint32_t ws, uint32_t wt)
   3088{
   3089    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3090    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3091    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3092
   3093    pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3094    pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3095    pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3096    pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3097    pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3098    pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3099    pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3100    pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3101}
   3102
   3103void helper_msa_mod_s_w(CPUMIPSState *env,
   3104                        uint32_t wd, uint32_t ws, uint32_t wt)
   3105{
   3106    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3107    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3108    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3109
   3110    pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3111    pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3112    pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3113    pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3114}
   3115
   3116void helper_msa_mod_s_d(CPUMIPSState *env,
   3117                        uint32_t wd, uint32_t ws, uint32_t wt)
   3118{
   3119    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3120    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3121    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3122
   3123    pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3124    pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3125}
   3126
   3127static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3128{
   3129    uint64_t u_arg1 = UNSIGNED(arg1, df);
   3130    uint64_t u_arg2 = UNSIGNED(arg2, df);
   3131    return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
   3132}
   3133
   3134void helper_msa_mod_u_b(CPUMIPSState *env,
   3135                        uint32_t wd, uint32_t ws, uint32_t wt)
   3136{
   3137    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3138    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3139    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3140
   3141    pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3142    pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3143    pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3144    pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3145    pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3146    pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3147    pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3148    pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3149    pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3150    pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3151    pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3152    pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3153    pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3154    pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3155    pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3156    pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3157}
   3158
   3159void helper_msa_mod_u_h(CPUMIPSState *env,
   3160                        uint32_t wd, uint32_t ws, uint32_t wt)
   3161{
   3162    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3163    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3164    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3165
   3166    pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3167    pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3168    pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3169    pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3170    pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3171    pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3172    pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3173    pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3174}
   3175
   3176void helper_msa_mod_u_w(CPUMIPSState *env,
   3177                        uint32_t wd, uint32_t ws, uint32_t wt)
   3178{
   3179    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3180    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3181    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3182
   3183    pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3184    pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3185    pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3186    pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3187}
   3188
   3189void helper_msa_mod_u_d(CPUMIPSState *env,
   3190                        uint32_t wd, uint32_t ws, uint32_t wt)
   3191{
   3192    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3193    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3194    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3195
   3196    pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3197    pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3198}
   3199
   3200
   3201/*
   3202 * Int Multiply
   3203 * ------------
   3204 *
   3205 * +---------------+----------------------------------------------------------+
   3206 * | MADDV.B       | Vector Multiply and Add (byte)                           |
   3207 * | MADDV.H       | Vector Multiply and Add (halfword)                       |
   3208 * | MADDV.W       | Vector Multiply and Add (word)                           |
   3209 * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
   3210 * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
   3211 * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
   3212 * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
   3213 * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
   3214 * | MULV.B        | Vector Multiply (byte)                                   |
   3215 * | MULV.H        | Vector Multiply (halfword)                               |
   3216 * | MULV.W        | Vector Multiply (word)                                   |
   3217 * | MULV.D        | Vector Multiply (doubleword)                             |
   3218 * +---------------+----------------------------------------------------------+
   3219 */
   3220
   3221static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
   3222                                   int64_t arg2)
   3223{
   3224    return dest + arg1 * arg2;
   3225}
   3226
   3227void helper_msa_maddv_b(CPUMIPSState *env,
   3228                        uint32_t wd, uint32_t ws, uint32_t wt)
   3229{
   3230    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3231    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3232    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3233
   3234    pwd->b[0]  = msa_maddv_df(DF_BYTE, pwt->b[0],  pws->b[0],  pwt->b[0]);
   3235    pwd->b[1]  = msa_maddv_df(DF_BYTE, pwt->b[1],  pws->b[1],  pwt->b[1]);
   3236    pwd->b[2]  = msa_maddv_df(DF_BYTE, pwt->b[2],  pws->b[2],  pwt->b[2]);
   3237    pwd->b[3]  = msa_maddv_df(DF_BYTE, pwt->b[3],  pws->b[3],  pwt->b[3]);
   3238    pwd->b[4]  = msa_maddv_df(DF_BYTE, pwt->b[4],  pws->b[4],  pwt->b[4]);
   3239    pwd->b[5]  = msa_maddv_df(DF_BYTE, pwt->b[5],  pws->b[5],  pwt->b[5]);
   3240    pwd->b[6]  = msa_maddv_df(DF_BYTE, pwt->b[6],  pws->b[6],  pwt->b[6]);
   3241    pwd->b[7]  = msa_maddv_df(DF_BYTE, pwt->b[7],  pws->b[7],  pwt->b[7]);
   3242    pwd->b[8]  = msa_maddv_df(DF_BYTE, pwt->b[8],  pws->b[8],  pwt->b[8]);
   3243    pwd->b[9]  = msa_maddv_df(DF_BYTE, pwt->b[9],  pws->b[9],  pwt->b[9]);
   3244    pwd->b[10] = msa_maddv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]);
   3245    pwd->b[11] = msa_maddv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]);
   3246    pwd->b[12] = msa_maddv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]);
   3247    pwd->b[13] = msa_maddv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]);
   3248    pwd->b[14] = msa_maddv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]);
   3249    pwd->b[15] = msa_maddv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]);
   3250}
   3251
   3252void helper_msa_maddv_h(CPUMIPSState *env,
   3253                        uint32_t wd, uint32_t ws, uint32_t wt)
   3254{
   3255    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3256    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3257    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3258
   3259    pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   3260    pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   3261    pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   3262    pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   3263    pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   3264    pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   3265    pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   3266    pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   3267}
   3268
   3269void helper_msa_maddv_w(CPUMIPSState *env,
   3270                        uint32_t wd, uint32_t ws, uint32_t wt)
   3271{
   3272    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3273    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3274    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3275
   3276    pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   3277    pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   3278    pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   3279    pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   3280}
   3281
   3282void helper_msa_maddv_d(CPUMIPSState *env,
   3283                        uint32_t wd, uint32_t ws, uint32_t wt)
   3284{
   3285    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3286    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3287    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3288
   3289    pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   3290    pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   3291}
   3292
   3293static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
   3294                                   int64_t arg2)
   3295{
   3296    return dest - arg1 * arg2;
   3297}
   3298
   3299void helper_msa_msubv_b(CPUMIPSState *env,
   3300                        uint32_t wd, uint32_t ws, uint32_t wt)
   3301{
   3302    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3303    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3304    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3305
   3306    pwd->b[0]  = msa_msubv_df(DF_BYTE, pwt->b[0],  pws->b[0],  pwt->b[0]);
   3307    pwd->b[1]  = msa_msubv_df(DF_BYTE, pwt->b[1],  pws->b[1],  pwt->b[1]);
   3308    pwd->b[2]  = msa_msubv_df(DF_BYTE, pwt->b[2],  pws->b[2],  pwt->b[2]);
   3309    pwd->b[3]  = msa_msubv_df(DF_BYTE, pwt->b[3],  pws->b[3],  pwt->b[3]);
   3310    pwd->b[4]  = msa_msubv_df(DF_BYTE, pwt->b[4],  pws->b[4],  pwt->b[4]);
   3311    pwd->b[5]  = msa_msubv_df(DF_BYTE, pwt->b[5],  pws->b[5],  pwt->b[5]);
   3312    pwd->b[6]  = msa_msubv_df(DF_BYTE, pwt->b[6],  pws->b[6],  pwt->b[6]);
   3313    pwd->b[7]  = msa_msubv_df(DF_BYTE, pwt->b[7],  pws->b[7],  pwt->b[7]);
   3314    pwd->b[8]  = msa_msubv_df(DF_BYTE, pwt->b[8],  pws->b[8],  pwt->b[8]);
   3315    pwd->b[9]  = msa_msubv_df(DF_BYTE, pwt->b[9],  pws->b[9],  pwt->b[9]);
   3316    pwd->b[10] = msa_msubv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]);
   3317    pwd->b[11] = msa_msubv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]);
   3318    pwd->b[12] = msa_msubv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]);
   3319    pwd->b[13] = msa_msubv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]);
   3320    pwd->b[14] = msa_msubv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]);
   3321    pwd->b[15] = msa_msubv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]);
   3322}
   3323
   3324void helper_msa_msubv_h(CPUMIPSState *env,
   3325                        uint32_t wd, uint32_t ws, uint32_t wt)
   3326{
   3327    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3328    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3329    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3330
   3331    pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   3332    pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   3333    pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   3334    pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   3335    pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   3336    pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   3337    pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   3338    pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   3339}
   3340
   3341void helper_msa_msubv_w(CPUMIPSState *env,
   3342                        uint32_t wd, uint32_t ws, uint32_t wt)
   3343{
   3344    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3345    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3346    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3347
   3348    pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   3349    pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   3350    pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   3351    pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   3352}
   3353
   3354void helper_msa_msubv_d(CPUMIPSState *env,
   3355                        uint32_t wd, uint32_t ws, uint32_t wt)
   3356{
   3357    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3358    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3359    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3360
   3361    pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   3362    pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   3363}
   3364
   3365
   3366static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
   3367{
   3368    return arg1 * arg2;
   3369}
   3370
   3371void helper_msa_mulv_b(CPUMIPSState *env,
   3372                       uint32_t wd, uint32_t ws, uint32_t wt)
   3373{
   3374    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3375    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3376    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3377
   3378    pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3379    pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3380    pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3381    pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3382    pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3383    pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3384    pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3385    pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3386    pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3387    pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3388    pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3389    pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3390    pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3391    pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3392    pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3393    pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3394}
   3395
   3396void helper_msa_mulv_h(CPUMIPSState *env,
   3397                       uint32_t wd, uint32_t ws, uint32_t wt)
   3398{
   3399    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3400    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3401    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3402
   3403    pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3404    pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3405    pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3406    pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3407    pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3408    pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3409    pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3410    pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3411}
   3412
   3413void helper_msa_mulv_w(CPUMIPSState *env,
   3414                       uint32_t wd, uint32_t ws, uint32_t wt)
   3415{
   3416    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3417    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3418    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3419
   3420    pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3421    pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3422    pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3423    pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3424}
   3425
   3426void helper_msa_mulv_d(CPUMIPSState *env,
   3427                       uint32_t wd, uint32_t ws, uint32_t wt)
   3428{
   3429    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3430    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3431    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3432
   3433    pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3434    pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3435}
   3436
   3437
   3438/*
   3439 * Int Subtract
   3440 * ------------
   3441 *
   3442 * +---------------+----------------------------------------------------------+
   3443 * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
   3444 * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
   3445 * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
   3446 * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
   3447 * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
   3448 * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
   3449 * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
   3450 * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
   3451 * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
   3452 * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
   3453 * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
   3454 * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
   3455 * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
   3456 * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
   3457 * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
   3458 * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
   3459 * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
   3460 * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
   3461 * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
   3462 * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
   3463 * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
   3464 * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
   3465 * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
   3466 * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
   3467 * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
   3468 * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
   3469 * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
   3470 * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
   3471 * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
   3472 * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
   3473 * | SUBV.B        | Vector Subtract (byte)                                   |
   3474 * | SUBV.H        | Vector Subtract (halfword)                               |
   3475 * | SUBV.W        | Vector Subtract (word)                                   |
   3476 * | SUBV.D        | Vector Subtract (doubleword)                             |
   3477 * +---------------+----------------------------------------------------------+
   3478 */
   3479
   3480
   3481static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3482{
   3483    /* signed compare */
   3484    return (arg1 < arg2) ?
   3485        (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
   3486}
   3487
   3488void helper_msa_asub_s_b(CPUMIPSState *env,
   3489                         uint32_t wd, uint32_t ws, uint32_t wt)
   3490{
   3491    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3492    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3493    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3494
   3495    pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3496    pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3497    pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3498    pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3499    pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3500    pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3501    pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3502    pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3503    pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3504    pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3505    pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3506    pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3507    pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3508    pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3509    pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3510    pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3511}
   3512
   3513void helper_msa_asub_s_h(CPUMIPSState *env,
   3514                         uint32_t wd, uint32_t ws, uint32_t wt)
   3515{
   3516    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3517    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3518    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3519
   3520    pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3521    pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3522    pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3523    pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3524    pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3525    pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3526    pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3527    pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3528}
   3529
   3530void helper_msa_asub_s_w(CPUMIPSState *env,
   3531                         uint32_t wd, uint32_t ws, uint32_t wt)
   3532{
   3533    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3534    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3535    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3536
   3537    pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3538    pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3539    pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3540    pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3541}
   3542
   3543void helper_msa_asub_s_d(CPUMIPSState *env,
   3544                         uint32_t wd, uint32_t ws, uint32_t wt)
   3545{
   3546    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3547    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3548    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3549
   3550    pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3551    pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3552}
   3553
   3554
   3555static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   3556{
   3557    uint64_t u_arg1 = UNSIGNED(arg1, df);
   3558    uint64_t u_arg2 = UNSIGNED(arg2, df);
   3559    /* unsigned compare */
   3560    return (u_arg1 < u_arg2) ?
   3561        (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
   3562}
   3563
   3564void helper_msa_asub_u_b(CPUMIPSState *env,
   3565                         uint32_t wd, uint32_t ws, uint32_t wt)
   3566{
   3567    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3568    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3569    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3570
   3571    pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3572    pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3573    pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3574    pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3575    pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3576    pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3577    pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3578    pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3579    pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3580    pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3581    pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3582    pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3583    pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3584    pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3585    pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3586    pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3587}
   3588
   3589void helper_msa_asub_u_h(CPUMIPSState *env,
   3590                         uint32_t wd, uint32_t ws, uint32_t wt)
   3591{
   3592    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3593    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3594    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3595
   3596    pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3597    pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3598    pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3599    pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3600    pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3601    pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3602    pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3603    pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3604}
   3605
   3606void helper_msa_asub_u_w(CPUMIPSState *env,
   3607                         uint32_t wd, uint32_t ws, uint32_t wt)
   3608{
   3609    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3610    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3611    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3612
   3613    pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3614    pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3615    pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3616    pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3617}
   3618
   3619void helper_msa_asub_u_d(CPUMIPSState *env,
   3620                         uint32_t wd, uint32_t ws, uint32_t wt)
   3621{
   3622    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3623    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3624    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3625
   3626    pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3627    pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3628}
   3629
   3630
   3631static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3632{
   3633    return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
   3634}
   3635
   3636void helper_msa_hsub_s_h(CPUMIPSState *env,
   3637                         uint32_t wd, uint32_t ws, uint32_t wt)
   3638{
   3639    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3640    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3641    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3642
   3643    pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3644    pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3645    pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3646    pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3647    pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3648    pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3649    pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3650    pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3651}
   3652
   3653void helper_msa_hsub_s_w(CPUMIPSState *env,
   3654                         uint32_t wd, uint32_t ws, uint32_t wt)
   3655{
   3656    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3657    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3658    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3659
   3660    pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3661    pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3662    pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3663    pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3664}
   3665
   3666void helper_msa_hsub_s_d(CPUMIPSState *env,
   3667                         uint32_t wd, uint32_t ws, uint32_t wt)
   3668{
   3669    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3670    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3671    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3672
   3673    pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3674    pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3675}
   3676
   3677
   3678static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3679{
   3680    return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
   3681}
   3682
   3683void helper_msa_hsub_u_h(CPUMIPSState *env,
   3684                         uint32_t wd, uint32_t ws, uint32_t wt)
   3685{
   3686    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3687    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3688    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3689
   3690    pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3691    pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3692    pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3693    pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3694    pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3695    pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3696    pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3697    pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3698}
   3699
   3700void helper_msa_hsub_u_w(CPUMIPSState *env,
   3701                         uint32_t wd, uint32_t ws, uint32_t wt)
   3702{
   3703    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3704    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3705    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3706
   3707    pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3708    pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3709    pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3710    pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3711}
   3712
   3713void helper_msa_hsub_u_d(CPUMIPSState *env,
   3714                         uint32_t wd, uint32_t ws, uint32_t wt)
   3715{
   3716    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3717    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3718    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3719
   3720    pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3721    pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3722}
   3723
   3724
   3725static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3726{
   3727    int64_t max_int = DF_MAX_INT(df);
   3728    int64_t min_int = DF_MIN_INT(df);
   3729    if (arg2 > 0) {
   3730        return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
   3731    } else {
   3732        return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
   3733    }
   3734}
   3735
   3736void helper_msa_subs_s_b(CPUMIPSState *env,
   3737                         uint32_t wd, uint32_t ws, uint32_t wt)
   3738{
   3739    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3740    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3741    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3742
   3743    pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3744    pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3745    pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3746    pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3747    pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3748    pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3749    pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3750    pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3751    pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3752    pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3753    pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3754    pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3755    pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3756    pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3757    pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3758    pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3759}
   3760
   3761void helper_msa_subs_s_h(CPUMIPSState *env,
   3762                         uint32_t wd, uint32_t ws, uint32_t wt)
   3763{
   3764    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3765    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3766    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3767
   3768    pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3769    pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3770    pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3771    pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3772    pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3773    pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3774    pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3775    pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3776}
   3777
   3778void helper_msa_subs_s_w(CPUMIPSState *env,
   3779                         uint32_t wd, uint32_t ws, uint32_t wt)
   3780{
   3781    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3782    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3783    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3784
   3785    pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3786    pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3787    pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3788    pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3789}
   3790
   3791void helper_msa_subs_s_d(CPUMIPSState *env,
   3792                         uint32_t wd, uint32_t ws, uint32_t wt)
   3793{
   3794    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3795    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3796    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3797
   3798    pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3799    pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3800}
   3801
   3802
   3803static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3804{
   3805    uint64_t u_arg1 = UNSIGNED(arg1, df);
   3806    uint64_t u_arg2 = UNSIGNED(arg2, df);
   3807    return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
   3808}
   3809
   3810void helper_msa_subs_u_b(CPUMIPSState *env,
   3811                         uint32_t wd, uint32_t ws, uint32_t wt)
   3812{
   3813    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3814    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3815    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3816
   3817    pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3818    pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3819    pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3820    pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3821    pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3822    pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3823    pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3824    pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3825    pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3826    pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3827    pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3828    pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3829    pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3830    pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3831    pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3832    pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3833}
   3834
   3835void helper_msa_subs_u_h(CPUMIPSState *env,
   3836                         uint32_t wd, uint32_t ws, uint32_t wt)
   3837{
   3838    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3839    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3840    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3841
   3842    pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3843    pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3844    pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3845    pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3846    pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3847    pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3848    pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3849    pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3850}
   3851
   3852void helper_msa_subs_u_w(CPUMIPSState *env,
   3853                         uint32_t wd, uint32_t ws, uint32_t wt)
   3854{
   3855    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3856    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3857    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3858
   3859    pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3860    pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3861    pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3862    pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3863}
   3864
   3865void helper_msa_subs_u_d(CPUMIPSState *env,
   3866                         uint32_t wd, uint32_t ws, uint32_t wt)
   3867{
   3868    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3869    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3870    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3871
   3872    pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3873    pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3874}
   3875
   3876
   3877static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3878{
   3879    uint64_t u_arg1 = UNSIGNED(arg1, df);
   3880    uint64_t max_uint = DF_MAX_UINT(df);
   3881    if (arg2 >= 0) {
   3882        uint64_t u_arg2 = (uint64_t)arg2;
   3883        return (u_arg1 > u_arg2) ?
   3884            (int64_t)(u_arg1 - u_arg2) :
   3885            0;
   3886    } else {
   3887        uint64_t u_arg2 = (uint64_t)(-arg2);
   3888        return (u_arg1 < max_uint - u_arg2) ?
   3889            (int64_t)(u_arg1 + u_arg2) :
   3890            (int64_t)max_uint;
   3891    }
   3892}
   3893
   3894void helper_msa_subsus_u_b(CPUMIPSState *env,
   3895                           uint32_t wd, uint32_t ws, uint32_t wt)
   3896{
   3897    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3898    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3899    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3900
   3901    pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3902    pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3903    pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3904    pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3905    pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3906    pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3907    pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3908    pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3909    pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3910    pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3911    pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3912    pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3913    pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3914    pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3915    pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3916    pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3917}
   3918
   3919void helper_msa_subsus_u_h(CPUMIPSState *env,
   3920                           uint32_t wd, uint32_t ws, uint32_t wt)
   3921{
   3922    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3923    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3924    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3925
   3926    pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3927    pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3928    pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3929    pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3930    pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3931    pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3932    pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3933    pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3934}
   3935
   3936void helper_msa_subsus_u_w(CPUMIPSState *env,
   3937                           uint32_t wd, uint32_t ws, uint32_t wt)
   3938{
   3939    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3940    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3941    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3942
   3943    pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3944    pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3945    pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3946    pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3947}
   3948
   3949void helper_msa_subsus_u_d(CPUMIPSState *env,
   3950                           uint32_t wd, uint32_t ws, uint32_t wt)
   3951{
   3952    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3953    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3954    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3955
   3956    pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3957    pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3958}
   3959
   3960
   3961static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3962{
   3963    uint64_t u_arg1 = UNSIGNED(arg1, df);
   3964    uint64_t u_arg2 = UNSIGNED(arg2, df);
   3965    int64_t max_int = DF_MAX_INT(df);
   3966    int64_t min_int = DF_MIN_INT(df);
   3967    if (u_arg1 > u_arg2) {
   3968        return u_arg1 - u_arg2 < (uint64_t)max_int ?
   3969            (int64_t)(u_arg1 - u_arg2) :
   3970            max_int;
   3971    } else {
   3972        return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
   3973            (int64_t)(u_arg1 - u_arg2) :
   3974            min_int;
   3975    }
   3976}
   3977
   3978void helper_msa_subsuu_s_b(CPUMIPSState *env,
   3979                           uint32_t wd, uint32_t ws, uint32_t wt)
   3980{
   3981    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3982    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3983    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3984
   3985    pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3986    pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3987    pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3988    pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3989    pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3990    pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3991    pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3992    pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3993    pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3994    pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3995    pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3996    pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3997    pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3998    pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3999    pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4000    pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4001}
   4002
   4003void helper_msa_subsuu_s_h(CPUMIPSState *env,
   4004                           uint32_t wd, uint32_t ws, uint32_t wt)
   4005{
   4006    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4007    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4008    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4009
   4010    pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   4011    pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   4012    pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   4013    pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   4014    pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   4015    pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   4016    pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   4017    pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   4018}
   4019
   4020void helper_msa_subsuu_s_w(CPUMIPSState *env,
   4021                           uint32_t wd, uint32_t ws, uint32_t wt)
   4022{
   4023    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4024    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4025    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4026
   4027    pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   4028    pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   4029    pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   4030    pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   4031}
   4032
   4033void helper_msa_subsuu_s_d(CPUMIPSState *env,
   4034                           uint32_t wd, uint32_t ws, uint32_t wt)
   4035{
   4036    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4037    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4038    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4039
   4040    pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   4041    pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   4042}
   4043
   4044
   4045static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
   4046{
   4047    return arg1 - arg2;
   4048}
   4049
   4050void helper_msa_subv_b(CPUMIPSState *env,
   4051                       uint32_t wd, uint32_t ws, uint32_t wt)
   4052{
   4053    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4054    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4055    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4056
   4057    pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   4058    pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   4059    pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   4060    pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   4061    pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   4062    pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   4063    pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   4064    pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   4065    pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   4066    pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   4067    pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
   4068    pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
   4069    pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
   4070    pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
   4071    pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4072    pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4073}
   4074
   4075void helper_msa_subv_h(CPUMIPSState *env,
   4076                       uint32_t wd, uint32_t ws, uint32_t wt)
   4077{
   4078    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4079    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4080    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4081
   4082    pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
   4083    pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
   4084    pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
   4085    pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
   4086    pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
   4087    pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
   4088    pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
   4089    pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
   4090}
   4091
   4092void helper_msa_subv_w(CPUMIPSState *env,
   4093                       uint32_t wd, uint32_t ws, uint32_t wt)
   4094{
   4095    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4096    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4097    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4098
   4099    pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
   4100    pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
   4101    pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
   4102    pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
   4103}
   4104
   4105void helper_msa_subv_d(CPUMIPSState *env,
   4106                       uint32_t wd, uint32_t ws, uint32_t wt)
   4107{
   4108    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4109    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4110    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4111
   4112    pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   4113    pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   4114}
   4115
   4116
   4117/*
   4118 * Interleave
   4119 * ----------
   4120 *
   4121 * +---------------+----------------------------------------------------------+
   4122 * | ILVEV.B       | Vector Interleave Even (byte)                            |
   4123 * | ILVEV.H       | Vector Interleave Even (halfword)                        |
   4124 * | ILVEV.W       | Vector Interleave Even (word)                            |
   4125 * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
   4126 * | ILVOD.B       | Vector Interleave Odd (byte)                             |
   4127 * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
   4128 * | ILVOD.W       | Vector Interleave Odd (word)                             |
   4129 * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
   4130 * | ILVL.B        | Vector Interleave Left (byte)                            |
   4131 * | ILVL.H        | Vector Interleave Left (halfword)                        |
   4132 * | ILVL.W        | Vector Interleave Left (word)                            |
   4133 * | ILVL.D        | Vector Interleave Left (doubleword)                      |
   4134 * | ILVR.B        | Vector Interleave Right (byte)                           |
   4135 * | ILVR.H        | Vector Interleave Right (halfword)                       |
   4136 * | ILVR.W        | Vector Interleave Right (word)                           |
   4137 * | ILVR.D        | Vector Interleave Right (doubleword)                     |
   4138 * +---------------+----------------------------------------------------------+
   4139 */
   4140
   4141
   4142void helper_msa_ilvev_b(CPUMIPSState *env,
   4143                        uint32_t wd, uint32_t ws, uint32_t wt)
   4144{
   4145    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4146    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4147    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4148
   4149#if defined(HOST_WORDS_BIGENDIAN)
   4150    pwd->b[8]  = pws->b[9];
   4151    pwd->b[9]  = pwt->b[9];
   4152    pwd->b[10] = pws->b[11];
   4153    pwd->b[11] = pwt->b[11];
   4154    pwd->b[12] = pws->b[13];
   4155    pwd->b[13] = pwt->b[13];
   4156    pwd->b[14] = pws->b[15];
   4157    pwd->b[15] = pwt->b[15];
   4158    pwd->b[0]  = pws->b[1];
   4159    pwd->b[1]  = pwt->b[1];
   4160    pwd->b[2]  = pws->b[3];
   4161    pwd->b[3]  = pwt->b[3];
   4162    pwd->b[4]  = pws->b[5];
   4163    pwd->b[5]  = pwt->b[5];
   4164    pwd->b[6]  = pws->b[7];
   4165    pwd->b[7]  = pwt->b[7];
   4166#else
   4167    pwd->b[15] = pws->b[14];
   4168    pwd->b[14] = pwt->b[14];
   4169    pwd->b[13] = pws->b[12];
   4170    pwd->b[12] = pwt->b[12];
   4171    pwd->b[11] = pws->b[10];
   4172    pwd->b[10] = pwt->b[10];
   4173    pwd->b[9]  = pws->b[8];
   4174    pwd->b[8]  = pwt->b[8];
   4175    pwd->b[7]  = pws->b[6];
   4176    pwd->b[6]  = pwt->b[6];
   4177    pwd->b[5]  = pws->b[4];
   4178    pwd->b[4]  = pwt->b[4];
   4179    pwd->b[3]  = pws->b[2];
   4180    pwd->b[2]  = pwt->b[2];
   4181    pwd->b[1]  = pws->b[0];
   4182    pwd->b[0]  = pwt->b[0];
   4183#endif
   4184}
   4185
   4186void helper_msa_ilvev_h(CPUMIPSState *env,
   4187                        uint32_t wd, uint32_t ws, uint32_t wt)
   4188{
   4189    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4190    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4191    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4192
   4193#if defined(HOST_WORDS_BIGENDIAN)
   4194    pwd->h[4] = pws->h[5];
   4195    pwd->h[5] = pwt->h[5];
   4196    pwd->h[6] = pws->h[7];
   4197    pwd->h[7] = pwt->h[7];
   4198    pwd->h[0] = pws->h[1];
   4199    pwd->h[1] = pwt->h[1];
   4200    pwd->h[2] = pws->h[3];
   4201    pwd->h[3] = pwt->h[3];
   4202#else
   4203    pwd->h[7] = pws->h[6];
   4204    pwd->h[6] = pwt->h[6];
   4205    pwd->h[5] = pws->h[4];
   4206    pwd->h[4] = pwt->h[4];
   4207    pwd->h[3] = pws->h[2];
   4208    pwd->h[2] = pwt->h[2];
   4209    pwd->h[1] = pws->h[0];
   4210    pwd->h[0] = pwt->h[0];
   4211#endif
   4212}
   4213
   4214void helper_msa_ilvev_w(CPUMIPSState *env,
   4215                        uint32_t wd, uint32_t ws, uint32_t wt)
   4216{
   4217    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4218    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4219    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4220
   4221#if defined(HOST_WORDS_BIGENDIAN)
   4222    pwd->w[2] = pws->w[3];
   4223    pwd->w[3] = pwt->w[3];
   4224    pwd->w[0] = pws->w[1];
   4225    pwd->w[1] = pwt->w[1];
   4226#else
   4227    pwd->w[3] = pws->w[2];
   4228    pwd->w[2] = pwt->w[2];
   4229    pwd->w[1] = pws->w[0];
   4230    pwd->w[0] = pwt->w[0];
   4231#endif
   4232}
   4233
   4234void helper_msa_ilvev_d(CPUMIPSState *env,
   4235                        uint32_t wd, uint32_t ws, uint32_t wt)
   4236{
   4237    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4238    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4239    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4240
   4241    pwd->d[1] = pws->d[0];
   4242    pwd->d[0] = pwt->d[0];
   4243}
   4244
   4245
   4246void helper_msa_ilvod_b(CPUMIPSState *env,
   4247                        uint32_t wd, uint32_t ws, uint32_t wt)
   4248{
   4249    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4250    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4251    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4252
   4253#if defined(HOST_WORDS_BIGENDIAN)
   4254    pwd->b[7]  = pwt->b[6];
   4255    pwd->b[6]  = pws->b[6];
   4256    pwd->b[5]  = pwt->b[4];
   4257    pwd->b[4]  = pws->b[4];
   4258    pwd->b[3]  = pwt->b[2];
   4259    pwd->b[2]  = pws->b[2];
   4260    pwd->b[1]  = pwt->b[0];
   4261    pwd->b[0]  = pws->b[0];
   4262    pwd->b[15] = pwt->b[14];
   4263    pwd->b[14] = pws->b[14];
   4264    pwd->b[13] = pwt->b[12];
   4265    pwd->b[12] = pws->b[12];
   4266    pwd->b[11] = pwt->b[10];
   4267    pwd->b[10] = pws->b[10];
   4268    pwd->b[9]  = pwt->b[8];
   4269    pwd->b[8]  = pws->b[8];
   4270#else
   4271    pwd->b[0]  = pwt->b[1];
   4272    pwd->b[1]  = pws->b[1];
   4273    pwd->b[2]  = pwt->b[3];
   4274    pwd->b[3]  = pws->b[3];
   4275    pwd->b[4]  = pwt->b[5];
   4276    pwd->b[5]  = pws->b[5];
   4277    pwd->b[6]  = pwt->b[7];
   4278    pwd->b[7]  = pws->b[7];
   4279    pwd->b[8]  = pwt->b[9];
   4280    pwd->b[9]  = pws->b[9];
   4281    pwd->b[10] = pwt->b[11];
   4282    pwd->b[11] = pws->b[11];
   4283    pwd->b[12] = pwt->b[13];
   4284    pwd->b[13] = pws->b[13];
   4285    pwd->b[14] = pwt->b[15];
   4286    pwd->b[15] = pws->b[15];
   4287#endif
   4288}
   4289
   4290void helper_msa_ilvod_h(CPUMIPSState *env,
   4291                        uint32_t wd, uint32_t ws, uint32_t wt)
   4292{
   4293    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4294    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4295    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4296
   4297#if defined(HOST_WORDS_BIGENDIAN)
   4298    pwd->h[3] = pwt->h[2];
   4299    pwd->h[2] = pws->h[2];
   4300    pwd->h[1] = pwt->h[0];
   4301    pwd->h[0] = pws->h[0];
   4302    pwd->h[7] = pwt->h[6];
   4303    pwd->h[6] = pws->h[6];
   4304    pwd->h[5] = pwt->h[4];
   4305    pwd->h[4] = pws->h[4];
   4306#else
   4307    pwd->h[0] = pwt->h[1];
   4308    pwd->h[1] = pws->h[1];
   4309    pwd->h[2] = pwt->h[3];
   4310    pwd->h[3] = pws->h[3];
   4311    pwd->h[4] = pwt->h[5];
   4312    pwd->h[5] = pws->h[5];
   4313    pwd->h[6] = pwt->h[7];
   4314    pwd->h[7] = pws->h[7];
   4315#endif
   4316}
   4317
   4318void helper_msa_ilvod_w(CPUMIPSState *env,
   4319                        uint32_t wd, uint32_t ws, uint32_t wt)
   4320{
   4321    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4322    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4323    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4324
   4325#if defined(HOST_WORDS_BIGENDIAN)
   4326    pwd->w[1] = pwt->w[0];
   4327    pwd->w[0] = pws->w[0];
   4328    pwd->w[3] = pwt->w[2];
   4329    pwd->w[2] = pws->w[2];
   4330#else
   4331    pwd->w[0] = pwt->w[1];
   4332    pwd->w[1] = pws->w[1];
   4333    pwd->w[2] = pwt->w[3];
   4334    pwd->w[3] = pws->w[3];
   4335#endif
   4336}
   4337
   4338void helper_msa_ilvod_d(CPUMIPSState *env,
   4339                        uint32_t wd, uint32_t ws, uint32_t wt)
   4340{
   4341    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4342    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4343    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4344
   4345    pwd->d[0] = pwt->d[1];
   4346    pwd->d[1] = pws->d[1];
   4347}
   4348
   4349
   4350void helper_msa_ilvl_b(CPUMIPSState *env,
   4351                       uint32_t wd, uint32_t ws, uint32_t wt)
   4352{
   4353    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4354    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4355    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4356
   4357#if defined(HOST_WORDS_BIGENDIAN)
   4358    pwd->b[7]  = pwt->b[15];
   4359    pwd->b[6]  = pws->b[15];
   4360    pwd->b[5]  = pwt->b[14];
   4361    pwd->b[4]  = pws->b[14];
   4362    pwd->b[3]  = pwt->b[13];
   4363    pwd->b[2]  = pws->b[13];
   4364    pwd->b[1]  = pwt->b[12];
   4365    pwd->b[0]  = pws->b[12];
   4366    pwd->b[15] = pwt->b[11];
   4367    pwd->b[14] = pws->b[11];
   4368    pwd->b[13] = pwt->b[10];
   4369    pwd->b[12] = pws->b[10];
   4370    pwd->b[11] = pwt->b[9];
   4371    pwd->b[10] = pws->b[9];
   4372    pwd->b[9]  = pwt->b[8];
   4373    pwd->b[8]  = pws->b[8];
   4374#else
   4375    pwd->b[0]  = pwt->b[8];
   4376    pwd->b[1]  = pws->b[8];
   4377    pwd->b[2]  = pwt->b[9];
   4378    pwd->b[3]  = pws->b[9];
   4379    pwd->b[4]  = pwt->b[10];
   4380    pwd->b[5]  = pws->b[10];
   4381    pwd->b[6]  = pwt->b[11];
   4382    pwd->b[7]  = pws->b[11];
   4383    pwd->b[8]  = pwt->b[12];
   4384    pwd->b[9]  = pws->b[12];
   4385    pwd->b[10] = pwt->b[13];
   4386    pwd->b[11] = pws->b[13];
   4387    pwd->b[12] = pwt->b[14];
   4388    pwd->b[13] = pws->b[14];
   4389    pwd->b[14] = pwt->b[15];
   4390    pwd->b[15] = pws->b[15];
   4391#endif
   4392}
   4393
   4394void helper_msa_ilvl_h(CPUMIPSState *env,
   4395                       uint32_t wd, uint32_t ws, uint32_t wt)
   4396{
   4397    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4398    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4399    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4400
   4401#if defined(HOST_WORDS_BIGENDIAN)
   4402    pwd->h[3] = pwt->h[7];
   4403    pwd->h[2] = pws->h[7];
   4404    pwd->h[1] = pwt->h[6];
   4405    pwd->h[0] = pws->h[6];
   4406    pwd->h[7] = pwt->h[5];
   4407    pwd->h[6] = pws->h[5];
   4408    pwd->h[5] = pwt->h[4];
   4409    pwd->h[4] = pws->h[4];
   4410#else
   4411    pwd->h[0] = pwt->h[4];
   4412    pwd->h[1] = pws->h[4];
   4413    pwd->h[2] = pwt->h[5];
   4414    pwd->h[3] = pws->h[5];
   4415    pwd->h[4] = pwt->h[6];
   4416    pwd->h[5] = pws->h[6];
   4417    pwd->h[6] = pwt->h[7];
   4418    pwd->h[7] = pws->h[7];
   4419#endif
   4420}
   4421
   4422void helper_msa_ilvl_w(CPUMIPSState *env,
   4423                       uint32_t wd, uint32_t ws, uint32_t wt)
   4424{
   4425    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4426    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4427    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4428
   4429#if defined(HOST_WORDS_BIGENDIAN)
   4430    pwd->w[1] = pwt->w[3];
   4431    pwd->w[0] = pws->w[3];
   4432    pwd->w[3] = pwt->w[2];
   4433    pwd->w[2] = pws->w[2];
   4434#else
   4435    pwd->w[0] = pwt->w[2];
   4436    pwd->w[1] = pws->w[2];
   4437    pwd->w[2] = pwt->w[3];
   4438    pwd->w[3] = pws->w[3];
   4439#endif
   4440}
   4441
   4442void helper_msa_ilvl_d(CPUMIPSState *env,
   4443                       uint32_t wd, uint32_t ws, uint32_t wt)
   4444{
   4445    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4446    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4447    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4448
   4449    pwd->d[0] = pwt->d[1];
   4450    pwd->d[1] = pws->d[1];
   4451}
   4452
   4453
   4454void helper_msa_ilvr_b(CPUMIPSState *env,
   4455                       uint32_t wd, uint32_t ws, uint32_t wt)
   4456{
   4457    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4458    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4459    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4460
   4461#if defined(HOST_WORDS_BIGENDIAN)
   4462    pwd->b[8]  = pws->b[0];
   4463    pwd->b[9]  = pwt->b[0];
   4464    pwd->b[10] = pws->b[1];
   4465    pwd->b[11] = pwt->b[1];
   4466    pwd->b[12] = pws->b[2];
   4467    pwd->b[13] = pwt->b[2];
   4468    pwd->b[14] = pws->b[3];
   4469    pwd->b[15] = pwt->b[3];
   4470    pwd->b[0]  = pws->b[4];
   4471    pwd->b[1]  = pwt->b[4];
   4472    pwd->b[2]  = pws->b[5];
   4473    pwd->b[3]  = pwt->b[5];
   4474    pwd->b[4]  = pws->b[6];
   4475    pwd->b[5]  = pwt->b[6];
   4476    pwd->b[6]  = pws->b[7];
   4477    pwd->b[7]  = pwt->b[7];
   4478#else
   4479    pwd->b[15] = pws->b[7];
   4480    pwd->b[14] = pwt->b[7];
   4481    pwd->b[13] = pws->b[6];
   4482    pwd->b[12] = pwt->b[6];
   4483    pwd->b[11] = pws->b[5];
   4484    pwd->b[10] = pwt->b[5];
   4485    pwd->b[9]  = pws->b[4];
   4486    pwd->b[8]  = pwt->b[4];
   4487    pwd->b[7]  = pws->b[3];
   4488    pwd->b[6]  = pwt->b[3];
   4489    pwd->b[5]  = pws->b[2];
   4490    pwd->b[4]  = pwt->b[2];
   4491    pwd->b[3]  = pws->b[1];
   4492    pwd->b[2]  = pwt->b[1];
   4493    pwd->b[1]  = pws->b[0];
   4494    pwd->b[0]  = pwt->b[0];
   4495#endif
   4496}
   4497
   4498void helper_msa_ilvr_h(CPUMIPSState *env,
   4499                       uint32_t wd, uint32_t ws, uint32_t wt)
   4500{
   4501    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4502    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4503    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4504
   4505#if defined(HOST_WORDS_BIGENDIAN)
   4506    pwd->h[4] = pws->h[0];
   4507    pwd->h[5] = pwt->h[0];
   4508    pwd->h[6] = pws->h[1];
   4509    pwd->h[7] = pwt->h[1];
   4510    pwd->h[0] = pws->h[2];
   4511    pwd->h[1] = pwt->h[2];
   4512    pwd->h[2] = pws->h[3];
   4513    pwd->h[3] = pwt->h[3];
   4514#else
   4515    pwd->h[7] = pws->h[3];
   4516    pwd->h[6] = pwt->h[3];
   4517    pwd->h[5] = pws->h[2];
   4518    pwd->h[4] = pwt->h[2];
   4519    pwd->h[3] = pws->h[1];
   4520    pwd->h[2] = pwt->h[1];
   4521    pwd->h[1] = pws->h[0];
   4522    pwd->h[0] = pwt->h[0];
   4523#endif
   4524}
   4525
   4526void helper_msa_ilvr_w(CPUMIPSState *env,
   4527                       uint32_t wd, uint32_t ws, uint32_t wt)
   4528{
   4529    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4530    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4531    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4532
   4533#if defined(HOST_WORDS_BIGENDIAN)
   4534    pwd->w[2] = pws->w[0];
   4535    pwd->w[3] = pwt->w[0];
   4536    pwd->w[0] = pws->w[1];
   4537    pwd->w[1] = pwt->w[1];
   4538#else
   4539    pwd->w[3] = pws->w[1];
   4540    pwd->w[2] = pwt->w[1];
   4541    pwd->w[1] = pws->w[0];
   4542    pwd->w[0] = pwt->w[0];
   4543#endif
   4544}
   4545
   4546void helper_msa_ilvr_d(CPUMIPSState *env,
   4547                       uint32_t wd, uint32_t ws, uint32_t wt)
   4548{
   4549    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4550    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4551    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4552
   4553    pwd->d[1] = pws->d[0];
   4554    pwd->d[0] = pwt->d[0];
   4555}
   4556
   4557
   4558/*
   4559 * Logic
   4560 * -----
   4561 *
   4562 * +---------------+----------------------------------------------------------+
   4563 * | AND.V         | Vector Logical And                                       |
   4564 * | NOR.V         | Vector Logical Negated Or                                |
   4565 * | OR.V          | Vector Logical Or                                        |
   4566 * | XOR.V         | Vector Logical Exclusive Or                              |
   4567 * +---------------+----------------------------------------------------------+
   4568 */
   4569
   4570
   4571void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4572{
   4573    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4574    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4575    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4576
   4577    pwd->d[0] = pws->d[0] & pwt->d[0];
   4578    pwd->d[1] = pws->d[1] & pwt->d[1];
   4579}
   4580
   4581void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4582{
   4583    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4584    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4585    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4586
   4587    pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
   4588    pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
   4589}
   4590
   4591void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4592{
   4593    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4594    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4595    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4596
   4597    pwd->d[0] = pws->d[0] | pwt->d[0];
   4598    pwd->d[1] = pws->d[1] | pwt->d[1];
   4599}
   4600
   4601void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4602{
   4603    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4604    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4605    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4606
   4607    pwd->d[0] = pws->d[0] ^ pwt->d[0];
   4608    pwd->d[1] = pws->d[1] ^ pwt->d[1];
   4609}
   4610
   4611
   4612/*
   4613 * Move
   4614 * ----
   4615 *
   4616 * +---------------+----------------------------------------------------------+
   4617 * | MOVE.V        | Vector Move                                              |
   4618 * +---------------+----------------------------------------------------------+
   4619 */
   4620
   4621static inline void msa_move_v(wr_t *pwd, wr_t *pws)
   4622{
   4623    pwd->d[0] = pws->d[0];
   4624    pwd->d[1] = pws->d[1];
   4625}
   4626
   4627void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
   4628{
   4629    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4630    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4631
   4632    msa_move_v(pwd, pws);
   4633}
   4634
   4635
   4636/*
   4637 * Pack
   4638 * ----
   4639 *
   4640 * +---------------+----------------------------------------------------------+
   4641 * | PCKEV.B       | Vector Pack Even (byte)                                  |
   4642 * | PCKEV.H       | Vector Pack Even (halfword)                              |
   4643 * | PCKEV.W       | Vector Pack Even (word)                                  |
   4644 * | PCKEV.D       | Vector Pack Even (doubleword)                            |
   4645 * | PCKOD.B       | Vector Pack Odd (byte)                                   |
   4646 * | PCKOD.H       | Vector Pack Odd (halfword)                               |
   4647 * | PCKOD.W       | Vector Pack Odd (word)                                   |
   4648 * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
   4649 * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
   4650 * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
   4651 * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
   4652 * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
   4653 * +---------------+----------------------------------------------------------+
   4654 */
   4655
   4656
   4657void helper_msa_pckev_b(CPUMIPSState *env,
   4658                        uint32_t wd, uint32_t ws, uint32_t wt)
   4659{
   4660    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4661    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4662    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4663
   4664#if defined(HOST_WORDS_BIGENDIAN)
   4665    pwd->b[8]  = pws->b[9];
   4666    pwd->b[10] = pws->b[13];
   4667    pwd->b[12] = pws->b[1];
   4668    pwd->b[14] = pws->b[5];
   4669    pwd->b[0]  = pwt->b[9];
   4670    pwd->b[2]  = pwt->b[13];
   4671    pwd->b[4]  = pwt->b[1];
   4672    pwd->b[6]  = pwt->b[5];
   4673    pwd->b[9]  = pws->b[11];
   4674    pwd->b[13] = pws->b[3];
   4675    pwd->b[1]  = pwt->b[11];
   4676    pwd->b[5]  = pwt->b[3];
   4677    pwd->b[11] = pws->b[15];
   4678    pwd->b[3]  = pwt->b[15];
   4679    pwd->b[15] = pws->b[7];
   4680    pwd->b[7]  = pwt->b[7];
   4681#else
   4682    pwd->b[15] = pws->b[14];
   4683    pwd->b[13] = pws->b[10];
   4684    pwd->b[11] = pws->b[6];
   4685    pwd->b[9]  = pws->b[2];
   4686    pwd->b[7]  = pwt->b[14];
   4687    pwd->b[5]  = pwt->b[10];
   4688    pwd->b[3]  = pwt->b[6];
   4689    pwd->b[1]  = pwt->b[2];
   4690    pwd->b[14] = pws->b[12];
   4691    pwd->b[10] = pws->b[4];
   4692    pwd->b[6]  = pwt->b[12];
   4693    pwd->b[2]  = pwt->b[4];
   4694    pwd->b[12] = pws->b[8];
   4695    pwd->b[4]  = pwt->b[8];
   4696    pwd->b[8]  = pws->b[0];
   4697    pwd->b[0]  = pwt->b[0];
   4698#endif
   4699}
   4700
   4701void helper_msa_pckev_h(CPUMIPSState *env,
   4702                        uint32_t wd, uint32_t ws, uint32_t wt)
   4703{
   4704    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4705    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4706    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4707
   4708#if defined(HOST_WORDS_BIGENDIAN)
   4709    pwd->h[4] = pws->h[5];
   4710    pwd->h[6] = pws->h[1];
   4711    pwd->h[0] = pwt->h[5];
   4712    pwd->h[2] = pwt->h[1];
   4713    pwd->h[5] = pws->h[7];
   4714    pwd->h[1] = pwt->h[7];
   4715    pwd->h[7] = pws->h[3];
   4716    pwd->h[3] = pwt->h[3];
   4717#else
   4718    pwd->h[7] = pws->h[6];
   4719    pwd->h[5] = pws->h[2];
   4720    pwd->h[3] = pwt->h[6];
   4721    pwd->h[1] = pwt->h[2];
   4722    pwd->h[6] = pws->h[4];
   4723    pwd->h[2] = pwt->h[4];
   4724    pwd->h[4] = pws->h[0];
   4725    pwd->h[0] = pwt->h[0];
   4726#endif
   4727}
   4728
   4729void helper_msa_pckev_w(CPUMIPSState *env,
   4730                        uint32_t wd, uint32_t ws, uint32_t wt)
   4731{
   4732    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4733    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4734    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4735
   4736#if defined(HOST_WORDS_BIGENDIAN)
   4737    pwd->w[2] = pws->w[3];
   4738    pwd->w[0] = pwt->w[3];
   4739    pwd->w[3] = pws->w[1];
   4740    pwd->w[1] = pwt->w[1];
   4741#else
   4742    pwd->w[3] = pws->w[2];
   4743    pwd->w[1] = pwt->w[2];
   4744    pwd->w[2] = pws->w[0];
   4745    pwd->w[0] = pwt->w[0];
   4746#endif
   4747}
   4748
   4749void helper_msa_pckev_d(CPUMIPSState *env,
   4750                        uint32_t wd, uint32_t ws, uint32_t wt)
   4751{
   4752    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4753    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4754    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4755
   4756    pwd->d[1] = pws->d[0];
   4757    pwd->d[0] = pwt->d[0];
   4758}
   4759
   4760
   4761void helper_msa_pckod_b(CPUMIPSState *env,
   4762                        uint32_t wd, uint32_t ws, uint32_t wt)
   4763{
   4764    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4765    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4766    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4767
   4768#if defined(HOST_WORDS_BIGENDIAN)
   4769    pwd->b[7]  = pwt->b[6];
   4770    pwd->b[5]  = pwt->b[2];
   4771    pwd->b[3]  = pwt->b[14];
   4772    pwd->b[1]  = pwt->b[10];
   4773    pwd->b[15] = pws->b[6];
   4774    pwd->b[13] = pws->b[2];
   4775    pwd->b[11] = pws->b[14];
   4776    pwd->b[9]  = pws->b[10];
   4777    pwd->b[6]  = pwt->b[4];
   4778    pwd->b[2]  = pwt->b[12];
   4779    pwd->b[14] = pws->b[4];
   4780    pwd->b[10] = pws->b[12];
   4781    pwd->b[4]  = pwt->b[0];
   4782    pwd->b[12] = pws->b[0];
   4783    pwd->b[0]  = pwt->b[8];
   4784    pwd->b[8]  = pws->b[8];
   4785#else
   4786    pwd->b[0]  = pwt->b[1];
   4787    pwd->b[2]  = pwt->b[5];
   4788    pwd->b[4]  = pwt->b[9];
   4789    pwd->b[6]  = pwt->b[13];
   4790    pwd->b[8]  = pws->b[1];
   4791    pwd->b[10] = pws->b[5];
   4792    pwd->b[12] = pws->b[9];
   4793    pwd->b[14] = pws->b[13];
   4794    pwd->b[1]  = pwt->b[3];
   4795    pwd->b[5]  = pwt->b[11];
   4796    pwd->b[9]  = pws->b[3];
   4797    pwd->b[13] = pws->b[11];
   4798    pwd->b[3]  = pwt->b[7];
   4799    pwd->b[11] = pws->b[7];
   4800    pwd->b[7]  = pwt->b[15];
   4801    pwd->b[15] = pws->b[15];
   4802#endif
   4803
   4804}
   4805
   4806void helper_msa_pckod_h(CPUMIPSState *env,
   4807                        uint32_t wd, uint32_t ws, uint32_t wt)
   4808{
   4809    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4810    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4811    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4812
   4813#if defined(HOST_WORDS_BIGENDIAN)
   4814    pwd->h[3] = pwt->h[2];
   4815    pwd->h[1] = pwt->h[6];
   4816    pwd->h[7] = pws->h[2];
   4817    pwd->h[5] = pws->h[6];
   4818    pwd->h[2] = pwt->h[0];
   4819    pwd->h[6] = pws->h[0];
   4820    pwd->h[0] = pwt->h[4];
   4821    pwd->h[4] = pws->h[4];
   4822#else
   4823    pwd->h[0] = pwt->h[1];
   4824    pwd->h[2] = pwt->h[5];
   4825    pwd->h[4] = pws->h[1];
   4826    pwd->h[6] = pws->h[5];
   4827    pwd->h[1] = pwt->h[3];
   4828    pwd->h[5] = pws->h[3];
   4829    pwd->h[3] = pwt->h[7];
   4830    pwd->h[7] = pws->h[7];
   4831#endif
   4832}
   4833
   4834void helper_msa_pckod_w(CPUMIPSState *env,
   4835                        uint32_t wd, uint32_t ws, uint32_t wt)
   4836{
   4837    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4838    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4839    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4840
   4841#if defined(HOST_WORDS_BIGENDIAN)
   4842    pwd->w[1] = pwt->w[0];
   4843    pwd->w[3] = pws->w[0];
   4844    pwd->w[0] = pwt->w[2];
   4845    pwd->w[2] = pws->w[2];
   4846#else
   4847    pwd->w[0] = pwt->w[1];
   4848    pwd->w[2] = pws->w[1];
   4849    pwd->w[1] = pwt->w[3];
   4850    pwd->w[3] = pws->w[3];
   4851#endif
   4852}
   4853
   4854void helper_msa_pckod_d(CPUMIPSState *env,
   4855                        uint32_t wd, uint32_t ws, uint32_t wt)
   4856{
   4857    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4858    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4859    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4860
   4861    pwd->d[0] = pwt->d[1];
   4862    pwd->d[1] = pws->d[1];
   4863}
   4864
   4865
   4866/*
   4867 * Shift
   4868 * -----
   4869 *
   4870 * +---------------+----------------------------------------------------------+
   4871 * | SLL.B         | Vector Shift Left (byte)                                 |
   4872 * | SLL.H         | Vector Shift Left (halfword)                             |
   4873 * | SLL.W         | Vector Shift Left (word)                                 |
   4874 * | SLL.D         | Vector Shift Left (doubleword)                           |
   4875 * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
   4876 * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
   4877 * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
   4878 * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
   4879 * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
   4880 * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
   4881 * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
   4882 * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
   4883 * | SRL.B         | Vector Shift Right Logical (byte)                        |
   4884 * | SRL.H         | Vector Shift Right Logical (halfword)                    |
   4885 * | SRL.W         | Vector Shift Right Logical (word)                        |
   4886 * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
   4887 * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
   4888 * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
   4889 * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
   4890 * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
   4891 * +---------------+----------------------------------------------------------+
   4892 */
   4893
   4894
   4895static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
   4896{
   4897    int32_t b_arg2 = BIT_POSITION(arg2, df);
   4898    return arg1 << b_arg2;
   4899}
   4900
   4901void helper_msa_sll_b(CPUMIPSState *env,
   4902                      uint32_t wd, uint32_t ws, uint32_t wt)
   4903{
   4904    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4905    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4906    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4907
   4908    pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   4909    pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   4910    pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   4911    pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   4912    pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   4913    pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   4914    pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   4915    pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   4916    pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   4917    pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   4918    pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
   4919    pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
   4920    pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
   4921    pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
   4922    pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4923    pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4924}
   4925
   4926void helper_msa_sll_h(CPUMIPSState *env,
   4927                      uint32_t wd, uint32_t ws, uint32_t wt)
   4928{
   4929    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4930    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4931    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4932
   4933    pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
   4934    pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
   4935    pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
   4936    pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
   4937    pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
   4938    pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
   4939    pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
   4940    pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
   4941}
   4942
   4943void helper_msa_sll_w(CPUMIPSState *env,
   4944                      uint32_t wd, uint32_t ws, uint32_t wt)
   4945{
   4946    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4947    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4948    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4949
   4950    pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
   4951    pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
   4952    pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
   4953    pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
   4954}
   4955
   4956void helper_msa_sll_d(CPUMIPSState *env,
   4957                      uint32_t wd, uint32_t ws, uint32_t wt)
   4958{
   4959    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4960    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4961    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4962
   4963    pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   4964    pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   4965}
   4966
   4967
   4968static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
   4969{
   4970    int32_t b_arg2 = BIT_POSITION(arg2, df);
   4971    return arg1 >> b_arg2;
   4972}
   4973
   4974void helper_msa_sra_b(CPUMIPSState *env,
   4975                      uint32_t wd, uint32_t ws, uint32_t wt)
   4976{
   4977    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4978    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4979    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4980
   4981    pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   4982    pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   4983    pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   4984    pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   4985    pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   4986    pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   4987    pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   4988    pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   4989    pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   4990    pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   4991    pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
   4992    pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
   4993    pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
   4994    pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
   4995    pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4996    pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4997}
   4998
   4999void helper_msa_sra_h(CPUMIPSState *env,
   5000                      uint32_t wd, uint32_t ws, uint32_t wt)
   5001{
   5002    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5003    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5004    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5005
   5006    pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5007    pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5008    pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5009    pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5010    pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5011    pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5012    pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5013    pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5014}
   5015
   5016void helper_msa_sra_w(CPUMIPSState *env,
   5017                      uint32_t wd, uint32_t ws, uint32_t wt)
   5018{
   5019    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5020    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5021    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5022
   5023    pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5024    pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5025    pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5026    pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5027}
   5028
   5029void helper_msa_sra_d(CPUMIPSState *env,
   5030                      uint32_t wd, uint32_t ws, uint32_t wt)
   5031{
   5032    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5033    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5034    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5035
   5036    pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5037    pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5038}
   5039
   5040
   5041static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
   5042{
   5043    int32_t b_arg2 = BIT_POSITION(arg2, df);
   5044    if (b_arg2 == 0) {
   5045        return arg1;
   5046    } else {
   5047        int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
   5048        return (arg1 >> b_arg2) + r_bit;
   5049    }
   5050}
   5051
   5052void helper_msa_srar_b(CPUMIPSState *env,
   5053                       uint32_t wd, uint32_t ws, uint32_t wt)
   5054{
   5055    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5056    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5057    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5058
   5059    pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   5060    pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   5061    pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   5062    pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   5063    pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   5064    pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   5065    pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   5066    pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   5067    pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   5068    pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   5069    pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
   5070    pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
   5071    pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
   5072    pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
   5073    pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
   5074    pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
   5075}
   5076
   5077void helper_msa_srar_h(CPUMIPSState *env,
   5078                       uint32_t wd, uint32_t ws, uint32_t wt)
   5079{
   5080    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5081    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5082    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5083
   5084    pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5085    pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5086    pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5087    pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5088    pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5089    pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5090    pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5091    pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5092}
   5093
   5094void helper_msa_srar_w(CPUMIPSState *env,
   5095                       uint32_t wd, uint32_t ws, uint32_t wt)
   5096{
   5097    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5098    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5099    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5100
   5101    pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5102    pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5103    pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5104    pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5105}
   5106
   5107void helper_msa_srar_d(CPUMIPSState *env,
   5108                       uint32_t wd, uint32_t ws, uint32_t wt)
   5109{
   5110    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5111    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5112    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5113
   5114    pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5115    pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5116}
   5117
   5118
   5119static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
   5120{
   5121    uint64_t u_arg1 = UNSIGNED(arg1, df);
   5122    int32_t b_arg2 = BIT_POSITION(arg2, df);
   5123    return u_arg1 >> b_arg2;
   5124}
   5125
   5126void helper_msa_srl_b(CPUMIPSState *env,
   5127                      uint32_t wd, uint32_t ws, uint32_t wt)
   5128{
   5129    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5130    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5131    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5132
   5133    pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   5134    pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   5135    pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   5136    pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   5137    pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   5138    pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   5139    pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   5140    pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   5141    pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   5142    pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   5143    pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
   5144    pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
   5145    pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
   5146    pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
   5147    pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
   5148    pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
   5149}
   5150
   5151void helper_msa_srl_h(CPUMIPSState *env,
   5152                      uint32_t wd, uint32_t ws, uint32_t wt)
   5153{
   5154    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5155    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5156    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5157
   5158    pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5159    pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5160    pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5161    pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5162    pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5163    pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5164    pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5165    pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5166}
   5167
   5168void helper_msa_srl_w(CPUMIPSState *env,
   5169                      uint32_t wd, uint32_t ws, uint32_t wt)
   5170{
   5171    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5172    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5173    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5174
   5175    pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5176    pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5177    pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5178    pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5179}
   5180
   5181void helper_msa_srl_d(CPUMIPSState *env,
   5182                      uint32_t wd, uint32_t ws, uint32_t wt)
   5183{
   5184    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5185    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5186    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5187
   5188    pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5189    pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5190}
   5191
   5192
   5193static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
   5194{
   5195    uint64_t u_arg1 = UNSIGNED(arg1, df);
   5196    int32_t b_arg2 = BIT_POSITION(arg2, df);
   5197    if (b_arg2 == 0) {
   5198        return u_arg1;
   5199    } else {
   5200        uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
   5201        return (u_arg1 >> b_arg2) + r_bit;
   5202    }
   5203}
   5204
   5205void helper_msa_srlr_b(CPUMIPSState *env,
   5206                       uint32_t wd, uint32_t ws, uint32_t wt)
   5207{
   5208    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5209    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5210    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5211
   5212    pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   5213    pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   5214    pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   5215    pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   5216    pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   5217    pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   5218    pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   5219    pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   5220    pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   5221    pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   5222    pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
   5223    pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
   5224    pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
   5225    pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
   5226    pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
   5227    pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
   5228}
   5229
   5230void helper_msa_srlr_h(CPUMIPSState *env,
   5231                       uint32_t wd, uint32_t ws, uint32_t wt)
   5232{
   5233    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5234    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5235    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5236
   5237    pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5238    pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5239    pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5240    pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5241    pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5242    pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5243    pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5244    pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5245}
   5246
   5247void helper_msa_srlr_w(CPUMIPSState *env,
   5248                       uint32_t wd, uint32_t ws, uint32_t wt)
   5249{
   5250    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5251    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5252    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5253
   5254    pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5255    pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5256    pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5257    pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5258}
   5259
   5260void helper_msa_srlr_d(CPUMIPSState *env,
   5261                       uint32_t wd, uint32_t ws, uint32_t wt)
   5262{
   5263    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5264    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5265    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5266
   5267    pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5268    pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5269}
   5270
   5271
   5272#define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
   5273void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
   5274        uint32_t i8)                                                    \
   5275{                                                                       \
   5276    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5277    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5278    uint32_t i;                                                         \
   5279    for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
   5280        DEST = OPERATION;                                               \
   5281    }                                                                   \
   5282}
   5283
   5284MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
   5285MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
   5286MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
   5287MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
   5288
   5289#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
   5290            UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
   5291MSA_FN_IMM8(bmnzi_b, pwd->b[i],
   5292        BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
   5293
   5294#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
   5295            UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
   5296MSA_FN_IMM8(bmzi_b, pwd->b[i],
   5297        BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
   5298
   5299#define BIT_SELECT(dest, arg1, arg2, df) \
   5300            UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
   5301MSA_FN_IMM8(bseli_b, pwd->b[i],
   5302        BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
   5303
   5304#undef BIT_SELECT
   5305#undef BIT_MOVE_IF_ZERO
   5306#undef BIT_MOVE_IF_NOT_ZERO
   5307#undef MSA_FN_IMM8
   5308
   5309#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
   5310
   5311void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5312                       uint32_t ws, uint32_t imm)
   5313{
   5314    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5315    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5316    wr_t wx, *pwx = &wx;
   5317    uint32_t i;
   5318
   5319    switch (df) {
   5320    case DF_BYTE:
   5321        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   5322            pwx->b[i] = pws->b[SHF_POS(i, imm)];
   5323        }
   5324        break;
   5325    case DF_HALF:
   5326        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   5327            pwx->h[i] = pws->h[SHF_POS(i, imm)];
   5328        }
   5329        break;
   5330    case DF_WORD:
   5331        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   5332            pwx->w[i] = pws->w[SHF_POS(i, imm)];
   5333        }
   5334        break;
   5335    default:
   5336        assert(0);
   5337    }
   5338    msa_move_v(pwd, pwx);
   5339}
   5340
   5341#define MSA_BINOP_IMM_DF(helper, func)                                  \
   5342void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
   5343                        uint32_t wd, uint32_t ws, int32_t u5)           \
   5344{                                                                       \
   5345    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5346    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5347    uint32_t i;                                                         \
   5348                                                                        \
   5349    switch (df) {                                                       \
   5350    case DF_BYTE:                                                       \
   5351        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
   5352            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
   5353        }                                                               \
   5354        break;                                                          \
   5355    case DF_HALF:                                                       \
   5356        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
   5357            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
   5358        }                                                               \
   5359        break;                                                          \
   5360    case DF_WORD:                                                       \
   5361        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
   5362            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
   5363        }                                                               \
   5364        break;                                                          \
   5365    case DF_DOUBLE:                                                     \
   5366        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
   5367            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
   5368        }                                                               \
   5369        break;                                                          \
   5370    default:                                                            \
   5371        assert(0);                                                      \
   5372    }                                                                   \
   5373}
   5374
   5375MSA_BINOP_IMM_DF(addvi, addv)
   5376MSA_BINOP_IMM_DF(subvi, subv)
   5377MSA_BINOP_IMM_DF(ceqi, ceq)
   5378MSA_BINOP_IMM_DF(clei_s, cle_s)
   5379MSA_BINOP_IMM_DF(clei_u, cle_u)
   5380MSA_BINOP_IMM_DF(clti_s, clt_s)
   5381MSA_BINOP_IMM_DF(clti_u, clt_u)
   5382MSA_BINOP_IMM_DF(maxi_s, max_s)
   5383MSA_BINOP_IMM_DF(maxi_u, max_u)
   5384MSA_BINOP_IMM_DF(mini_s, min_s)
   5385MSA_BINOP_IMM_DF(mini_u, min_u)
   5386#undef MSA_BINOP_IMM_DF
   5387
   5388void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5389                       int32_t s10)
   5390{
   5391    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5392    uint32_t i;
   5393
   5394    switch (df) {
   5395    case DF_BYTE:
   5396        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   5397            pwd->b[i] = (int8_t)s10;
   5398        }
   5399        break;
   5400    case DF_HALF:
   5401        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   5402            pwd->h[i] = (int16_t)s10;
   5403        }
   5404        break;
   5405    case DF_WORD:
   5406        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   5407            pwd->w[i] = (int32_t)s10;
   5408        }
   5409        break;
   5410    case DF_DOUBLE:
   5411        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   5412            pwd->d[i] = (int64_t)s10;
   5413        }
   5414       break;
   5415    default:
   5416        assert(0);
   5417    }
   5418}
   5419
   5420static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
   5421{
   5422    return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
   5423                                    arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
   5424                                                             arg;
   5425}
   5426
   5427static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
   5428{
   5429    uint64_t u_arg = UNSIGNED(arg, df);
   5430    return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
   5431                                        M_MAX_UINT(m + 1);
   5432}
   5433
   5434#define MSA_BINOP_IMMU_DF(helper, func)                                  \
   5435void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
   5436                       uint32_t ws, uint32_t u5)                        \
   5437{                                                                       \
   5438    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5439    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5440    uint32_t i;                                                         \
   5441                                                                        \
   5442    switch (df) {                                                       \
   5443    case DF_BYTE:                                                       \
   5444        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
   5445            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
   5446        }                                                               \
   5447        break;                                                          \
   5448    case DF_HALF:                                                       \
   5449        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
   5450            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
   5451        }                                                               \
   5452        break;                                                          \
   5453    case DF_WORD:                                                       \
   5454        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
   5455            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
   5456        }                                                               \
   5457        break;                                                          \
   5458    case DF_DOUBLE:                                                     \
   5459        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
   5460            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
   5461        }                                                               \
   5462        break;                                                          \
   5463    default:                                                            \
   5464        assert(0);                                                      \
   5465    }                                                                   \
   5466}
   5467
   5468MSA_BINOP_IMMU_DF(slli, sll)
   5469MSA_BINOP_IMMU_DF(srai, sra)
   5470MSA_BINOP_IMMU_DF(srli, srl)
   5471MSA_BINOP_IMMU_DF(bclri, bclr)
   5472MSA_BINOP_IMMU_DF(bseti, bset)
   5473MSA_BINOP_IMMU_DF(bnegi, bneg)
   5474MSA_BINOP_IMMU_DF(sat_s, sat_s)
   5475MSA_BINOP_IMMU_DF(sat_u, sat_u)
   5476MSA_BINOP_IMMU_DF(srari, srar)
   5477MSA_BINOP_IMMU_DF(srlri, srlr)
   5478#undef MSA_BINOP_IMMU_DF
   5479
   5480#define MSA_TEROP_IMMU_DF(helper, func)                                  \
   5481void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
   5482                                  uint32_t wd, uint32_t ws, uint32_t u5) \
   5483{                                                                       \
   5484    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5485    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5486    uint32_t i;                                                         \
   5487                                                                        \
   5488    switch (df) {                                                       \
   5489    case DF_BYTE:                                                       \
   5490        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
   5491            pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
   5492                                            u5);                        \
   5493        }                                                               \
   5494        break;                                                          \
   5495    case DF_HALF:                                                       \
   5496        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
   5497            pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
   5498                                            u5);                        \
   5499        }                                                               \
   5500        break;                                                          \
   5501    case DF_WORD:                                                       \
   5502        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
   5503            pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
   5504                                            u5);                        \
   5505        }                                                               \
   5506        break;                                                          \
   5507    case DF_DOUBLE:                                                     \
   5508        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
   5509            pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
   5510                                            u5);                        \
   5511        }                                                               \
   5512        break;                                                          \
   5513    default:                                                            \
   5514        assert(0);                                                      \
   5515    }                                                                   \
   5516}
   5517
   5518MSA_TEROP_IMMU_DF(binsli, binsl)
   5519MSA_TEROP_IMMU_DF(binsri, binsr)
   5520#undef MSA_TEROP_IMMU_DF
   5521
   5522#define CONCATENATE_AND_SLIDE(s, k)             \
   5523    do {                                        \
   5524        for (i = 0; i < s; i++) {               \
   5525            v[i]     = pws->b[s * k + i];       \
   5526            v[i + s] = pwd->b[s * k + i];       \
   5527        }                                       \
   5528        for (i = 0; i < s; i++) {               \
   5529            pwd->b[s * k + i] = v[i + n];       \
   5530        }                                       \
   5531    } while (0)
   5532
   5533static inline void msa_sld_df(uint32_t df, wr_t *pwd,
   5534                              wr_t *pws, target_ulong rt)
   5535{
   5536    uint32_t n = rt % DF_ELEMENTS(df);
   5537    uint8_t v[64];
   5538    uint32_t i, k;
   5539
   5540    switch (df) {
   5541    case DF_BYTE:
   5542        CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
   5543        break;
   5544    case DF_HALF:
   5545        for (k = 0; k < 2; k++) {
   5546            CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
   5547        }
   5548        break;
   5549    case DF_WORD:
   5550        for (k = 0; k < 4; k++) {
   5551            CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
   5552        }
   5553        break;
   5554    case DF_DOUBLE:
   5555        for (k = 0; k < 8; k++) {
   5556            CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
   5557        }
   5558        break;
   5559    default:
   5560        assert(0);
   5561    }
   5562}
   5563
   5564static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
   5565{
   5566    int64_t q_min = DF_MIN_INT(df);
   5567    int64_t q_max = DF_MAX_INT(df);
   5568
   5569    if (arg1 == q_min && arg2 == q_min) {
   5570        return q_max;
   5571    }
   5572    return (arg1 * arg2) >> (DF_BITS(df) - 1);
   5573}
   5574
   5575static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
   5576{
   5577    int64_t q_min = DF_MIN_INT(df);
   5578    int64_t q_max = DF_MAX_INT(df);
   5579    int64_t r_bit = 1 << (DF_BITS(df) - 2);
   5580
   5581    if (arg1 == q_min && arg2 == q_min) {
   5582        return q_max;
   5583    }
   5584    return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
   5585}
   5586
   5587#define MSA_BINOP_DF(func) \
   5588void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
   5589                                uint32_t wd, uint32_t ws, uint32_t wt)  \
   5590{                                                                       \
   5591    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5592    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5593    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
   5594                                                                        \
   5595    switch (df) {                                                       \
   5596    case DF_BYTE:                                                       \
   5597        pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
   5598        pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
   5599        pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
   5600        pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
   5601        pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
   5602        pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
   5603        pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
   5604        pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
   5605        pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
   5606        pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
   5607        pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
   5608        pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
   5609        pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
   5610        pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
   5611        pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
   5612        pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
   5613        break;                                                          \
   5614    case DF_HALF:                                                       \
   5615        pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
   5616        pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
   5617        pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
   5618        pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
   5619        pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
   5620        pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
   5621        pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
   5622        pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
   5623        break;                                                          \
   5624    case DF_WORD:                                                       \
   5625        pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
   5626        pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
   5627        pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
   5628        pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
   5629        break;                                                          \
   5630    case DF_DOUBLE:                                                     \
   5631        pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
   5632        pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
   5633        break;                                                          \
   5634    default:                                                            \
   5635        assert(0);                                                      \
   5636    }                                                                   \
   5637}
   5638
   5639MSA_BINOP_DF(mul_q)
   5640MSA_BINOP_DF(mulr_q)
   5641#undef MSA_BINOP_DF
   5642
   5643void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5644                       uint32_t ws, uint32_t rt)
   5645{
   5646    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5647    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5648
   5649    msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
   5650}
   5651
   5652static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5653                                    int64_t arg2)
   5654{
   5655    int64_t q_prod, q_ret;
   5656
   5657    int64_t q_max = DF_MAX_INT(df);
   5658    int64_t q_min = DF_MIN_INT(df);
   5659
   5660    q_prod = arg1 * arg2;
   5661    q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
   5662
   5663    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5664}
   5665
   5666static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5667                                    int64_t arg2)
   5668{
   5669    int64_t q_prod, q_ret;
   5670
   5671    int64_t q_max = DF_MAX_INT(df);
   5672    int64_t q_min = DF_MIN_INT(df);
   5673
   5674    q_prod = arg1 * arg2;
   5675    q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
   5676
   5677    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5678}
   5679
   5680static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5681                                     int64_t arg2)
   5682{
   5683    int64_t q_prod, q_ret;
   5684
   5685    int64_t q_max = DF_MAX_INT(df);
   5686    int64_t q_min = DF_MIN_INT(df);
   5687    int64_t r_bit = 1 << (DF_BITS(df) - 2);
   5688
   5689    q_prod = arg1 * arg2;
   5690    q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
   5691
   5692    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5693}
   5694
   5695static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5696                                     int64_t arg2)
   5697{
   5698    int64_t q_prod, q_ret;
   5699
   5700    int64_t q_max = DF_MAX_INT(df);
   5701    int64_t q_min = DF_MIN_INT(df);
   5702    int64_t r_bit = 1 << (DF_BITS(df) - 2);
   5703
   5704    q_prod = arg1 * arg2;
   5705    q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
   5706
   5707    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5708}
   5709
   5710#define MSA_TEROP_DF(func) \
   5711void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
   5712                                uint32_t ws, uint32_t wt)                     \
   5713{                                                                             \
   5714    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
   5715    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
   5716    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
   5717                                                                              \
   5718    switch (df) {                                                             \
   5719    case DF_BYTE:                                                             \
   5720        pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
   5721                                             pwt->b[0]);                      \
   5722        pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
   5723                                             pwt->b[1]);                      \
   5724        pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
   5725                                             pwt->b[2]);                      \
   5726        pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
   5727                                             pwt->b[3]);                      \
   5728        pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
   5729                                             pwt->b[4]);                      \
   5730        pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
   5731                                             pwt->b[5]);                      \
   5732        pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
   5733                                             pwt->b[6]);                      \
   5734        pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
   5735                                             pwt->b[7]);                      \
   5736        pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
   5737                                             pwt->b[8]);                      \
   5738        pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
   5739                                             pwt->b[9]);                      \
   5740        pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
   5741                                             pwt->b[10]);                     \
   5742        pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
   5743                                             pwt->b[11]);                     \
   5744        pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
   5745                                             pwt->b[12]);                     \
   5746        pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
   5747                                             pwt->b[13]);                     \
   5748        pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
   5749                                             pwt->b[14]);                     \
   5750        pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
   5751                                             pwt->b[15]);                     \
   5752        break;                                                                \
   5753    case DF_HALF:                                                             \
   5754        pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
   5755        pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
   5756        pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
   5757        pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
   5758        pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
   5759        pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
   5760        pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
   5761        pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
   5762        break;                                                                \
   5763    case DF_WORD:                                                             \
   5764        pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
   5765        pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
   5766        pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
   5767        pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
   5768        break;                                                                \
   5769    case DF_DOUBLE:                                                           \
   5770        pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
   5771        pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
   5772        break;                                                                \
   5773    default:                                                                  \
   5774        assert(0);                                                            \
   5775    }                                                                         \
   5776}
   5777
   5778MSA_TEROP_DF(binsl)
   5779MSA_TEROP_DF(binsr)
   5780MSA_TEROP_DF(madd_q)
   5781MSA_TEROP_DF(msub_q)
   5782MSA_TEROP_DF(maddr_q)
   5783MSA_TEROP_DF(msubr_q)
   5784#undef MSA_TEROP_DF
   5785
   5786static inline void msa_splat_df(uint32_t df, wr_t *pwd,
   5787                                wr_t *pws, target_ulong rt)
   5788{
   5789    uint32_t n = rt % DF_ELEMENTS(df);
   5790    uint32_t i;
   5791
   5792    switch (df) {
   5793    case DF_BYTE:
   5794        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   5795            pwd->b[i] = pws->b[n];
   5796        }
   5797        break;
   5798    case DF_HALF:
   5799        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   5800            pwd->h[i] = pws->h[n];
   5801        }
   5802        break;
   5803    case DF_WORD:
   5804        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   5805            pwd->w[i] = pws->w[n];
   5806        }
   5807        break;
   5808    case DF_DOUBLE:
   5809        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   5810            pwd->d[i] = pws->d[n];
   5811        }
   5812       break;
   5813    default:
   5814        assert(0);
   5815    }
   5816}
   5817
   5818void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5819                         uint32_t ws, uint32_t rt)
   5820{
   5821    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5822    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5823
   5824    msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
   5825}
   5826
   5827#define MSA_DO_B MSA_DO(b)
   5828#define MSA_DO_H MSA_DO(h)
   5829#define MSA_DO_W MSA_DO(w)
   5830#define MSA_DO_D MSA_DO(d)
   5831
   5832#define MSA_LOOP_B MSA_LOOP(B)
   5833#define MSA_LOOP_H MSA_LOOP(H)
   5834#define MSA_LOOP_W MSA_LOOP(W)
   5835#define MSA_LOOP_D MSA_LOOP(D)
   5836
   5837#define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
   5838#define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
   5839#define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
   5840#define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
   5841
   5842#define MSA_LOOP(DF) \
   5843    do { \
   5844        for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
   5845            MSA_DO_ ## DF; \
   5846        } \
   5847    } while (0)
   5848
   5849#define MSA_FN_DF(FUNC)                                             \
   5850void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
   5851        uint32_t ws, uint32_t wt)                                   \
   5852{                                                                   \
   5853    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
   5854    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
   5855    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
   5856    wr_t wx, *pwx = &wx;                                            \
   5857    uint32_t i;                                                     \
   5858    switch (df) {                                                   \
   5859    case DF_BYTE:                                                   \
   5860        MSA_LOOP_B;                                                 \
   5861        break;                                                      \
   5862    case DF_HALF:                                                   \
   5863        MSA_LOOP_H;                                                 \
   5864        break;                                                      \
   5865    case DF_WORD:                                                   \
   5866        MSA_LOOP_W;                                                 \
   5867        break;                                                      \
   5868    case DF_DOUBLE:                                                 \
   5869        MSA_LOOP_D;                                                 \
   5870        break;                                                      \
   5871    default:                                                        \
   5872        assert(0);                                                  \
   5873    }                                                               \
   5874    msa_move_v(pwd, pwx);                                           \
   5875}
   5876
   5877#define MSA_LOOP_COND(DF) \
   5878            (DF_ELEMENTS(DF) / 2)
   5879
   5880#define Rb(pwr, i) (pwr->b[i])
   5881#define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
   5882#define Rh(pwr, i) (pwr->h[i])
   5883#define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
   5884#define Rw(pwr, i) (pwr->w[i])
   5885#define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
   5886#define Rd(pwr, i) (pwr->d[i])
   5887#define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
   5888
   5889#undef MSA_LOOP_COND
   5890
   5891#define MSA_LOOP_COND(DF) \
   5892            (DF_ELEMENTS(DF))
   5893
   5894#define MSA_DO(DF)                                                          \
   5895    do {                                                                    \
   5896        uint32_t n = DF_ELEMENTS(df);                                       \
   5897        uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
   5898        pwx->DF[i] =                                                        \
   5899            (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
   5900    } while (0)
   5901MSA_FN_DF(vshf_df)
   5902#undef MSA_DO
   5903#undef MSA_LOOP_COND
   5904#undef MSA_FN_DF
   5905
   5906
   5907void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5908                        uint32_t ws, uint32_t n)
   5909{
   5910    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5911    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5912
   5913    msa_sld_df(df, pwd, pws, n);
   5914}
   5915
   5916void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5917                          uint32_t ws, uint32_t n)
   5918{
   5919    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5920    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5921
   5922    msa_splat_df(df, pwd, pws, n);
   5923}
   5924
   5925void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
   5926                         uint32_t ws, uint32_t n)
   5927{
   5928    n %= 16;
   5929#if defined(HOST_WORDS_BIGENDIAN)
   5930    if (n < 8) {
   5931        n = 8 - n - 1;
   5932    } else {
   5933        n = 24 - n - 1;
   5934    }
   5935#endif
   5936    env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
   5937}
   5938
   5939void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
   5940                         uint32_t ws, uint32_t n)
   5941{
   5942    n %= 8;
   5943#if defined(HOST_WORDS_BIGENDIAN)
   5944    if (n < 4) {
   5945        n = 4 - n - 1;
   5946    } else {
   5947        n = 12 - n - 1;
   5948    }
   5949#endif
   5950    env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
   5951}
   5952
   5953void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
   5954                         uint32_t ws, uint32_t n)
   5955{
   5956    n %= 4;
   5957#if defined(HOST_WORDS_BIGENDIAN)
   5958    if (n < 2) {
   5959        n = 2 - n - 1;
   5960    } else {
   5961        n = 6 - n - 1;
   5962    }
   5963#endif
   5964    env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
   5965}
   5966
   5967void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
   5968                         uint32_t ws, uint32_t n)
   5969{
   5970    n %= 2;
   5971    env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
   5972}
   5973
   5974void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
   5975                         uint32_t ws, uint32_t n)
   5976{
   5977    n %= 16;
   5978#if defined(HOST_WORDS_BIGENDIAN)
   5979    if (n < 8) {
   5980        n = 8 - n - 1;
   5981    } else {
   5982        n = 24 - n - 1;
   5983    }
   5984#endif
   5985    env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
   5986}
   5987
   5988void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
   5989                         uint32_t ws, uint32_t n)
   5990{
   5991    n %= 8;
   5992#if defined(HOST_WORDS_BIGENDIAN)
   5993    if (n < 4) {
   5994        n = 4 - n - 1;
   5995    } else {
   5996        n = 12 - n - 1;
   5997    }
   5998#endif
   5999    env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
   6000}
   6001
   6002void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
   6003                         uint32_t ws, uint32_t n)
   6004{
   6005    n %= 4;
   6006#if defined(HOST_WORDS_BIGENDIAN)
   6007    if (n < 2) {
   6008        n = 2 - n - 1;
   6009    } else {
   6010        n = 6 - n - 1;
   6011    }
   6012#endif
   6013    env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
   6014}
   6015
   6016void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
   6017                          uint32_t rs_num, uint32_t n)
   6018{
   6019    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6020    target_ulong rs = env->active_tc.gpr[rs_num];
   6021    n %= 16;
   6022#if defined(HOST_WORDS_BIGENDIAN)
   6023    if (n < 8) {
   6024        n = 8 - n - 1;
   6025    } else {
   6026        n = 24 - n - 1;
   6027    }
   6028#endif
   6029    pwd->b[n] = (int8_t)rs;
   6030}
   6031
   6032void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
   6033                          uint32_t rs_num, uint32_t n)
   6034{
   6035    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6036    target_ulong rs = env->active_tc.gpr[rs_num];
   6037    n %= 8;
   6038#if defined(HOST_WORDS_BIGENDIAN)
   6039    if (n < 4) {
   6040        n = 4 - n - 1;
   6041    } else {
   6042        n = 12 - n - 1;
   6043    }
   6044#endif
   6045    pwd->h[n] = (int16_t)rs;
   6046}
   6047
   6048void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
   6049                          uint32_t rs_num, uint32_t n)
   6050{
   6051    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6052    target_ulong rs = env->active_tc.gpr[rs_num];
   6053    n %= 4;
   6054#if defined(HOST_WORDS_BIGENDIAN)
   6055    if (n < 2) {
   6056        n = 2 - n - 1;
   6057    } else {
   6058        n = 6 - n - 1;
   6059    }
   6060#endif
   6061    pwd->w[n] = (int32_t)rs;
   6062}
   6063
   6064void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
   6065                          uint32_t rs_num, uint32_t n)
   6066{
   6067    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6068    target_ulong rs = env->active_tc.gpr[rs_num];
   6069    n %= 2;
   6070    pwd->d[n] = (int64_t)rs;
   6071}
   6072
   6073void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6074                         uint32_t ws, uint32_t n)
   6075{
   6076    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6077    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6078
   6079    switch (df) {
   6080    case DF_BYTE:
   6081        pwd->b[n] = (int8_t)pws->b[0];
   6082        break;
   6083    case DF_HALF:
   6084        pwd->h[n] = (int16_t)pws->h[0];
   6085        break;
   6086    case DF_WORD:
   6087        pwd->w[n] = (int32_t)pws->w[0];
   6088        break;
   6089    case DF_DOUBLE:
   6090        pwd->d[n] = (int64_t)pws->d[0];
   6091        break;
   6092    default:
   6093        assert(0);
   6094    }
   6095}
   6096
   6097void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
   6098{
   6099    switch (cd) {
   6100    case 0:
   6101        break;
   6102    case 1:
   6103        env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
   6104        restore_msa_fp_status(env);
   6105        /* check exception */
   6106        if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
   6107            & GET_FP_CAUSE(env->active_tc.msacsr)) {
   6108            do_raise_exception(env, EXCP_MSAFPE, GETPC());
   6109        }
   6110        break;
   6111    }
   6112}
   6113
   6114target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
   6115{
   6116    switch (cs) {
   6117    case 0:
   6118        return env->msair;
   6119    case 1:
   6120        return env->active_tc.msacsr & MSACSR_MASK;
   6121    }
   6122    return 0;
   6123}
   6124
   6125void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6126                        uint32_t rs)
   6127{
   6128    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6129    uint32_t i;
   6130
   6131    switch (df) {
   6132    case DF_BYTE:
   6133        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   6134            pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
   6135        }
   6136        break;
   6137    case DF_HALF:
   6138        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   6139            pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
   6140        }
   6141        break;
   6142    case DF_WORD:
   6143        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6144            pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
   6145        }
   6146        break;
   6147    case DF_DOUBLE:
   6148        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6149            pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
   6150        }
   6151       break;
   6152    default:
   6153        assert(0);
   6154    }
   6155}
   6156
   6157
   6158#define FLOAT_ONE32 make_float32(0x3f8 << 20)
   6159#define FLOAT_ONE64 make_float64(0x3ffULL << 52)
   6160
   6161#define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
   6162        /* 0x7c20 */
   6163#define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
   6164        /* 0x7f800020 */
   6165#define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
   6166        /* 0x7ff0000000000020 */
   6167
   6168static inline void clear_msacsr_cause(CPUMIPSState *env)
   6169{
   6170    SET_FP_CAUSE(env->active_tc.msacsr, 0);
   6171}
   6172
   6173static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
   6174{
   6175    if ((GET_FP_CAUSE(env->active_tc.msacsr) &
   6176            (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
   6177        UPDATE_FP_FLAGS(env->active_tc.msacsr,
   6178                GET_FP_CAUSE(env->active_tc.msacsr));
   6179    } else {
   6180        do_raise_exception(env, EXCP_MSAFPE, retaddr);
   6181    }
   6182}
   6183
   6184/* Flush-to-zero use cases for update_msacsr() */
   6185#define CLEAR_FS_UNDERFLOW 1
   6186#define CLEAR_IS_INEXACT   2
   6187#define RECIPROCAL_INEXACT 4
   6188
   6189
   6190static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
   6191{
   6192    int mips_xcpt = 0;
   6193
   6194    if (ieee_xcpt & float_flag_invalid) {
   6195        mips_xcpt |= FP_INVALID;
   6196    }
   6197    if (ieee_xcpt & float_flag_overflow) {
   6198        mips_xcpt |= FP_OVERFLOW;
   6199    }
   6200    if (ieee_xcpt & float_flag_underflow) {
   6201        mips_xcpt |= FP_UNDERFLOW;
   6202    }
   6203    if (ieee_xcpt & float_flag_divbyzero) {
   6204        mips_xcpt |= FP_DIV0;
   6205    }
   6206    if (ieee_xcpt & float_flag_inexact) {
   6207        mips_xcpt |= FP_INEXACT;
   6208    }
   6209
   6210    return mips_xcpt;
   6211}
   6212
   6213static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
   6214{
   6215    int ieee_exception_flags;
   6216    int mips_exception_flags = 0;
   6217    int cause;
   6218    int enable;
   6219
   6220    ieee_exception_flags = get_float_exception_flags(
   6221                               &env->active_tc.msa_fp_status);
   6222
   6223    /* QEMU softfloat does not signal all underflow cases */
   6224    if (denormal) {
   6225        ieee_exception_flags |= float_flag_underflow;
   6226    }
   6227    if (ieee_exception_flags) {
   6228        mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
   6229    }
   6230    enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
   6231
   6232    /* Set Inexact (I) when flushing inputs to zero */
   6233    if ((ieee_exception_flags & float_flag_input_denormal) &&
   6234            (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
   6235        if (action & CLEAR_IS_INEXACT) {
   6236            mips_exception_flags &= ~FP_INEXACT;
   6237        } else {
   6238            mips_exception_flags |= FP_INEXACT;
   6239        }
   6240    }
   6241
   6242    /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
   6243    if ((ieee_exception_flags & float_flag_output_denormal) &&
   6244            (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
   6245        mips_exception_flags |= FP_INEXACT;
   6246        if (action & CLEAR_FS_UNDERFLOW) {
   6247            mips_exception_flags &= ~FP_UNDERFLOW;
   6248        } else {
   6249            mips_exception_flags |= FP_UNDERFLOW;
   6250        }
   6251    }
   6252
   6253    /* Set Inexact (I) when Overflow (O) is not enabled */
   6254    if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
   6255           (enable & FP_OVERFLOW) == 0) {
   6256        mips_exception_flags |= FP_INEXACT;
   6257    }
   6258
   6259    /* Clear Exact Underflow when Underflow (U) is not enabled */
   6260    if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
   6261           (enable & FP_UNDERFLOW) == 0 &&
   6262           (mips_exception_flags & FP_INEXACT) == 0) {
   6263        mips_exception_flags &= ~FP_UNDERFLOW;
   6264    }
   6265
   6266    /*
   6267     * Reciprocal operations set only Inexact when valid and not
   6268     * divide by zero
   6269     */
   6270    if ((action & RECIPROCAL_INEXACT) &&
   6271            (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
   6272        mips_exception_flags = FP_INEXACT;
   6273    }
   6274
   6275    cause = mips_exception_flags & enable; /* all current enabled exceptions */
   6276
   6277    if (cause == 0) {
   6278        /*
   6279         * No enabled exception, update the MSACSR Cause
   6280         * with all current exceptions
   6281         */
   6282        SET_FP_CAUSE(env->active_tc.msacsr,
   6283            (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
   6284    } else {
   6285        /* Current exceptions are enabled */
   6286        if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
   6287            /*
   6288             * Exception(s) will trap, update MSACSR Cause
   6289             * with all enabled exceptions
   6290             */
   6291            SET_FP_CAUSE(env->active_tc.msacsr,
   6292                (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
   6293        }
   6294    }
   6295
   6296    return mips_exception_flags;
   6297}
   6298
   6299static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
   6300{
   6301    int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
   6302    return c & enable;
   6303}
   6304
   6305static inline float16 float16_from_float32(int32_t a, bool ieee,
   6306                                           float_status *status)
   6307{
   6308      float16 f_val;
   6309
   6310      f_val = float32_to_float16((float32)a, ieee, status);
   6311
   6312      return a < 0 ? (f_val | (1 << 15)) : f_val;
   6313}
   6314
   6315static inline float32 float32_from_float64(int64_t a, float_status *status)
   6316{
   6317      float32 f_val;
   6318
   6319      f_val = float64_to_float32((float64)a, status);
   6320
   6321      return a < 0 ? (f_val | (1 << 31)) : f_val;
   6322}
   6323
   6324static inline float32 float32_from_float16(int16_t a, bool ieee,
   6325                                           float_status *status)
   6326{
   6327      float32 f_val;
   6328
   6329      f_val = float16_to_float32((float16)a, ieee, status);
   6330
   6331      return a < 0 ? (f_val | (1 << 31)) : f_val;
   6332}
   6333
   6334static inline float64 float64_from_float32(int32_t a, float_status *status)
   6335{
   6336      float64 f_val;
   6337
   6338      f_val = float32_to_float64((float64)a, status);
   6339
   6340      return a < 0 ? (f_val | (1ULL << 63)) : f_val;
   6341}
   6342
   6343static inline float32 float32_from_q16(int16_t a, float_status *status)
   6344{
   6345    float32 f_val;
   6346
   6347    /* conversion as integer and scaling */
   6348    f_val = int32_to_float32(a, status);
   6349    f_val = float32_scalbn(f_val, -15, status);
   6350
   6351    return f_val;
   6352}
   6353
   6354static inline float64 float64_from_q32(int32_t a, float_status *status)
   6355{
   6356    float64 f_val;
   6357
   6358    /* conversion as integer and scaling */
   6359    f_val = int32_to_float64(a, status);
   6360    f_val = float64_scalbn(f_val, -31, status);
   6361
   6362    return f_val;
   6363}
   6364
   6365static inline int16_t float32_to_q16(float32 a, float_status *status)
   6366{
   6367    int32_t q_val;
   6368    int32_t q_min = 0xffff8000;
   6369    int32_t q_max = 0x00007fff;
   6370
   6371    int ieee_ex;
   6372
   6373    if (float32_is_any_nan(a)) {
   6374        float_raise(float_flag_invalid, status);
   6375        return 0;
   6376    }
   6377
   6378    /* scaling */
   6379    a = float32_scalbn(a, 15, status);
   6380
   6381    ieee_ex = get_float_exception_flags(status);
   6382    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6383                             , status);
   6384
   6385    if (ieee_ex & float_flag_overflow) {
   6386        float_raise(float_flag_inexact, status);
   6387        return (int32_t)a < 0 ? q_min : q_max;
   6388    }
   6389
   6390    /* conversion to int */
   6391    q_val = float32_to_int32(a, status);
   6392
   6393    ieee_ex = get_float_exception_flags(status);
   6394    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6395                             , status);
   6396
   6397    if (ieee_ex & float_flag_invalid) {
   6398        set_float_exception_flags(ieee_ex & (~float_flag_invalid)
   6399                               , status);
   6400        float_raise(float_flag_overflow | float_flag_inexact, status);
   6401        return (int32_t)a < 0 ? q_min : q_max;
   6402    }
   6403
   6404    if (q_val < q_min) {
   6405        float_raise(float_flag_overflow | float_flag_inexact, status);
   6406        return (int16_t)q_min;
   6407    }
   6408
   6409    if (q_max < q_val) {
   6410        float_raise(float_flag_overflow | float_flag_inexact, status);
   6411        return (int16_t)q_max;
   6412    }
   6413
   6414    return (int16_t)q_val;
   6415}
   6416
   6417static inline int32_t float64_to_q32(float64 a, float_status *status)
   6418{
   6419    int64_t q_val;
   6420    int64_t q_min = 0xffffffff80000000LL;
   6421    int64_t q_max = 0x000000007fffffffLL;
   6422
   6423    int ieee_ex;
   6424
   6425    if (float64_is_any_nan(a)) {
   6426        float_raise(float_flag_invalid, status);
   6427        return 0;
   6428    }
   6429
   6430    /* scaling */
   6431    a = float64_scalbn(a, 31, status);
   6432
   6433    ieee_ex = get_float_exception_flags(status);
   6434    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6435           , status);
   6436
   6437    if (ieee_ex & float_flag_overflow) {
   6438        float_raise(float_flag_inexact, status);
   6439        return (int64_t)a < 0 ? q_min : q_max;
   6440    }
   6441
   6442    /* conversion to integer */
   6443    q_val = float64_to_int64(a, status);
   6444
   6445    ieee_ex = get_float_exception_flags(status);
   6446    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6447           , status);
   6448
   6449    if (ieee_ex & float_flag_invalid) {
   6450        set_float_exception_flags(ieee_ex & (~float_flag_invalid)
   6451               , status);
   6452        float_raise(float_flag_overflow | float_flag_inexact, status);
   6453        return (int64_t)a < 0 ? q_min : q_max;
   6454    }
   6455
   6456    if (q_val < q_min) {
   6457        float_raise(float_flag_overflow | float_flag_inexact, status);
   6458        return (int32_t)q_min;
   6459    }
   6460
   6461    if (q_max < q_val) {
   6462        float_raise(float_flag_overflow | float_flag_inexact, status);
   6463        return (int32_t)q_max;
   6464    }
   6465
   6466    return (int32_t)q_val;
   6467}
   6468
   6469#define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
   6470    do {                                                                    \
   6471        float_status *status = &env->active_tc.msa_fp_status;               \
   6472        int c;                                                              \
   6473        int64_t cond;                                                       \
   6474        set_float_exception_flags(0, status);                               \
   6475        if (!QUIET) {                                                       \
   6476            cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
   6477        } else {                                                            \
   6478            cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
   6479        }                                                                   \
   6480        DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
   6481        c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
   6482                                                                            \
   6483        if (get_enabled_exceptions(env, c)) {                               \
   6484            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   6485        }                                                                   \
   6486    } while (0)
   6487
   6488#define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
   6489    do {                                                            \
   6490        MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
   6491        if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
   6492            DEST = 0;                                               \
   6493        }                                                           \
   6494    } while (0)
   6495
   6496#define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
   6497    do {                                                            \
   6498        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6499        if (DEST == 0) {                                            \
   6500            MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
   6501        }                                                           \
   6502    } while (0)
   6503
   6504#define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
   6505    do {                                                            \
   6506        MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
   6507        if (DEST == 0) {                                            \
   6508            MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
   6509        }                                                           \
   6510    } while (0)
   6511
   6512#define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
   6513    do {                                                            \
   6514        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6515        if (DEST == 0) {                                            \
   6516            MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
   6517            if (DEST == 0) {                                        \
   6518                MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
   6519            }                                                       \
   6520        }                                                           \
   6521    } while (0)
   6522
   6523#define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
   6524    do {                                                            \
   6525        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6526        if (DEST == 0) {                                            \
   6527            MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
   6528        }                                                           \
   6529    } while (0)
   6530
   6531#define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
   6532    do {                                                            \
   6533        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6534        if (DEST == 0) {                                            \
   6535            MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
   6536        }                                                           \
   6537    } while (0)
   6538
   6539#define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
   6540    do {                                                            \
   6541        MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
   6542        if (DEST == 0) {                                            \
   6543            MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
   6544        }                                                           \
   6545    } while (0)
   6546
   6547static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6548                              wr_t *pwt, uint32_t df, int quiet,
   6549                              uintptr_t retaddr)
   6550{
   6551    wr_t wx, *pwx = &wx;
   6552    uint32_t i;
   6553
   6554    clear_msacsr_cause(env);
   6555
   6556    switch (df) {
   6557    case DF_WORD:
   6558        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6559            MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6560        }
   6561        break;
   6562    case DF_DOUBLE:
   6563        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6564            MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6565        }
   6566        break;
   6567    default:
   6568        assert(0);
   6569    }
   6570
   6571    check_msacsr_cause(env, retaddr);
   6572
   6573    msa_move_v(pwd, pwx);
   6574}
   6575
   6576static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6577                              wr_t *pwt, uint32_t df, int quiet,
   6578                              uintptr_t retaddr)
   6579{
   6580    wr_t wx, *pwx = &wx;
   6581    uint32_t i;
   6582
   6583    clear_msacsr_cause(env);
   6584
   6585    switch (df) {
   6586    case DF_WORD:
   6587        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6588            MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
   6589                    quiet);
   6590        }
   6591        break;
   6592    case DF_DOUBLE:
   6593        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6594            MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
   6595                    quiet);
   6596        }
   6597        break;
   6598    default:
   6599        assert(0);
   6600    }
   6601
   6602    check_msacsr_cause(env, retaddr);
   6603
   6604    msa_move_v(pwd, pwx);
   6605}
   6606
   6607static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6608                              wr_t *pwt, uint32_t df, int quiet,
   6609                              uintptr_t retaddr)
   6610{
   6611    wr_t wx, *pwx = &wx;
   6612    uint32_t i;
   6613
   6614    clear_msacsr_cause(env);
   6615
   6616    switch (df) {
   6617    case DF_WORD:
   6618        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6619            MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
   6620        }
   6621        break;
   6622    case DF_DOUBLE:
   6623        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6624            MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
   6625        }
   6626        break;
   6627    default:
   6628        assert(0);
   6629    }
   6630
   6631    check_msacsr_cause(env, retaddr);
   6632
   6633    msa_move_v(pwd, pwx);
   6634}
   6635
   6636static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6637                               wr_t *pwt, uint32_t df, int quiet,
   6638                               uintptr_t retaddr)
   6639{
   6640    wr_t wx, *pwx = &wx;
   6641    uint32_t i;
   6642
   6643    clear_msacsr_cause(env);
   6644
   6645    switch (df) {
   6646    case DF_WORD:
   6647        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6648            MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6649        }
   6650        break;
   6651    case DF_DOUBLE:
   6652        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6653            MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6654        }
   6655        break;
   6656    default:
   6657        assert(0);
   6658    }
   6659
   6660    check_msacsr_cause(env, retaddr);
   6661
   6662    msa_move_v(pwd, pwx);
   6663}
   6664
   6665static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6666                              wr_t *pwt, uint32_t df, int quiet,
   6667                              uintptr_t retaddr)
   6668{
   6669    wr_t wx, *pwx = &wx;
   6670    uint32_t i;
   6671
   6672    clear_msacsr_cause(env);
   6673
   6674    switch (df) {
   6675    case DF_WORD:
   6676        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6677            MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
   6678        }
   6679        break;
   6680    case DF_DOUBLE:
   6681        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6682            MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
   6683        }
   6684        break;
   6685    default:
   6686        assert(0);
   6687    }
   6688
   6689    check_msacsr_cause(env, retaddr);
   6690
   6691    msa_move_v(pwd, pwx);
   6692}
   6693
   6694static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6695                               wr_t *pwt, uint32_t df, int quiet,
   6696                               uintptr_t retaddr)
   6697{
   6698    wr_t wx, *pwx = &wx;
   6699    uint32_t i;
   6700
   6701    clear_msacsr_cause(env);
   6702
   6703    switch (df) {
   6704    case DF_WORD:
   6705        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6706            MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6707        }
   6708        break;
   6709    case DF_DOUBLE:
   6710        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6711            MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6712        }
   6713        break;
   6714    default:
   6715        assert(0);
   6716    }
   6717
   6718    check_msacsr_cause(env, retaddr);
   6719
   6720    msa_move_v(pwd, pwx);
   6721}
   6722
   6723static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6724                              wr_t *pwt, uint32_t df, int quiet,
   6725                              uintptr_t retaddr)
   6726{
   6727    wr_t wx, *pwx = &wx;
   6728    uint32_t i;
   6729
   6730    clear_msacsr_cause(env);
   6731
   6732    switch (df) {
   6733    case DF_WORD:
   6734        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6735            MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
   6736        }
   6737        break;
   6738    case DF_DOUBLE:
   6739        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6740            MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
   6741        }
   6742        break;
   6743    default:
   6744        assert(0);
   6745    }
   6746
   6747    check_msacsr_cause(env, retaddr);
   6748
   6749    msa_move_v(pwd, pwx);
   6750}
   6751
   6752static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6753                               wr_t *pwt, uint32_t df, int quiet,
   6754                               uintptr_t retaddr)
   6755{
   6756    wr_t wx, *pwx = &wx;
   6757    uint32_t i;
   6758
   6759    clear_msacsr_cause(env);
   6760
   6761    switch (df) {
   6762    case DF_WORD:
   6763        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6764            MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6765        }
   6766        break;
   6767    case DF_DOUBLE:
   6768        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6769            MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6770        }
   6771        break;
   6772    default:
   6773        assert(0);
   6774    }
   6775
   6776    check_msacsr_cause(env, retaddr);
   6777
   6778    msa_move_v(pwd, pwx);
   6779}
   6780
   6781static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6782                              wr_t *pwt, uint32_t df, int quiet,
   6783                              uintptr_t retaddr)
   6784{
   6785    wr_t wx, *pwx = &wx;
   6786    uint32_t i;
   6787
   6788    clear_msacsr_cause(env);
   6789
   6790    switch (df) {
   6791    case DF_WORD:
   6792        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6793            MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6794        }
   6795        break;
   6796    case DF_DOUBLE:
   6797        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6798            MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6799        }
   6800        break;
   6801    default:
   6802        assert(0);
   6803    }
   6804
   6805    check_msacsr_cause(env, retaddr);
   6806
   6807    msa_move_v(pwd, pwx);
   6808}
   6809
   6810static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6811                               wr_t *pwt, uint32_t df, int quiet,
   6812                               uintptr_t retaddr)
   6813{
   6814    wr_t wx, *pwx = &wx;
   6815    uint32_t i;
   6816
   6817    clear_msacsr_cause(env);
   6818
   6819    switch (df) {
   6820    case DF_WORD:
   6821        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6822            MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6823        }
   6824        break;
   6825    case DF_DOUBLE:
   6826        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6827            MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6828        }
   6829        break;
   6830    default:
   6831        assert(0);
   6832    }
   6833
   6834    check_msacsr_cause(env, retaddr);
   6835
   6836    msa_move_v(pwd, pwx);
   6837}
   6838
   6839static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6840                              wr_t *pwt, uint32_t df, int quiet,
   6841                              uintptr_t retaddr)
   6842{
   6843    wr_t wx, *pwx = &wx;
   6844    uint32_t i;
   6845
   6846    clear_msacsr_cause(env);
   6847
   6848    switch (df) {
   6849    case DF_WORD:
   6850        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6851            MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6852        }
   6853        break;
   6854    case DF_DOUBLE:
   6855        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6856            MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6857        }
   6858        break;
   6859    default:
   6860        assert(0);
   6861    }
   6862
   6863    check_msacsr_cause(env, retaddr);
   6864
   6865    msa_move_v(pwd, pwx);
   6866}
   6867
   6868void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6869                        uint32_t ws, uint32_t wt)
   6870{
   6871    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6872    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6873    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6874    compare_af(env, pwd, pws, pwt, df, 1, GETPC());
   6875}
   6876
   6877void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6878                        uint32_t ws, uint32_t wt)
   6879{
   6880    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6881    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6882    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6883    compare_un(env, pwd, pws, pwt, df, 1, GETPC());
   6884}
   6885
   6886void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6887                        uint32_t ws, uint32_t wt)
   6888{
   6889    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6890    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6891    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6892    compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
   6893}
   6894
   6895void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6896                         uint32_t ws, uint32_t wt)
   6897{
   6898    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6899    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6900    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6901    compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
   6902}
   6903
   6904void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6905                        uint32_t ws, uint32_t wt)
   6906{
   6907    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6908    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6909    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6910    compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
   6911}
   6912
   6913void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6914                         uint32_t ws, uint32_t wt)
   6915{
   6916    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6917    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6918    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6919    compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
   6920}
   6921
   6922void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6923                        uint32_t ws, uint32_t wt)
   6924{
   6925    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6926    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6927    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6928    compare_le(env, pwd, pws, pwt, df, 1, GETPC());
   6929}
   6930
   6931void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6932                         uint32_t ws, uint32_t wt)
   6933{
   6934    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6935    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6936    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6937    compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
   6938}
   6939
   6940void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6941                        uint32_t ws, uint32_t wt)
   6942{
   6943    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6944    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6945    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6946    compare_af(env, pwd, pws, pwt, df, 0, GETPC());
   6947}
   6948
   6949void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6950                        uint32_t ws, uint32_t wt)
   6951{
   6952    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6953    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6954    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6955    compare_un(env, pwd, pws, pwt, df, 0, GETPC());
   6956}
   6957
   6958void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6959                        uint32_t ws, uint32_t wt)
   6960{
   6961    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6962    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6963    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6964    compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
   6965}
   6966
   6967void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6968                         uint32_t ws, uint32_t wt)
   6969{
   6970    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6971    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6972    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6973    compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
   6974}
   6975
   6976void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6977                        uint32_t ws, uint32_t wt)
   6978{
   6979    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6980    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6981    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6982    compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
   6983}
   6984
   6985void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6986                         uint32_t ws, uint32_t wt)
   6987{
   6988    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6989    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6990    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6991    compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
   6992}
   6993
   6994void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6995                        uint32_t ws, uint32_t wt)
   6996{
   6997    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6998    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6999    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7000    compare_le(env, pwd, pws, pwt, df, 0, GETPC());
   7001}
   7002
   7003void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7004                         uint32_t ws, uint32_t wt)
   7005{
   7006    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7007    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7008    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7009    compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
   7010}
   7011
   7012void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7013                        uint32_t ws, uint32_t wt)
   7014{
   7015    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7016    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7017    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7018    compare_or(env, pwd, pws, pwt, df, 1, GETPC());
   7019}
   7020
   7021void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7022                         uint32_t ws, uint32_t wt)
   7023{
   7024    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7025    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7026    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7027    compare_une(env, pwd, pws, pwt, df, 1, GETPC());
   7028}
   7029
   7030void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7031                        uint32_t ws, uint32_t wt)
   7032{
   7033    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7034    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7035    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7036    compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
   7037}
   7038
   7039void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7040                        uint32_t ws, uint32_t wt)
   7041{
   7042    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7043    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7044    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7045    compare_or(env, pwd, pws, pwt, df, 0, GETPC());
   7046}
   7047
   7048void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7049                         uint32_t ws, uint32_t wt)
   7050{
   7051    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7052    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7053    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7054    compare_une(env, pwd, pws, pwt, df, 0, GETPC());
   7055}
   7056
   7057void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7058                        uint32_t ws, uint32_t wt)
   7059{
   7060    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7061    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7062    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7063    compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
   7064}
   7065
   7066#define float16_is_zero(ARG) 0
   7067#define float16_is_zero_or_denormal(ARG) 0
   7068
   7069#define IS_DENORMAL(ARG, BITS)                      \
   7070    (!float ## BITS ## _is_zero(ARG)                \
   7071    && float ## BITS ## _is_zero_or_denormal(ARG))
   7072
   7073#define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
   7074    do {                                                                    \
   7075        float_status *status = &env->active_tc.msa_fp_status;               \
   7076        int c;                                                              \
   7077                                                                            \
   7078        set_float_exception_flags(0, status);                               \
   7079        DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
   7080        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7081                                                                            \
   7082        if (get_enabled_exceptions(env, c)) {                               \
   7083            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7084        }                                                                   \
   7085    } while (0)
   7086
   7087void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7088        uint32_t ws, uint32_t wt)
   7089{
   7090    wr_t wx, *pwx = &wx;
   7091    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7092    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7093    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7094    uint32_t i;
   7095
   7096    clear_msacsr_cause(env);
   7097
   7098    switch (df) {
   7099    case DF_WORD:
   7100        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7101            MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
   7102        }
   7103        break;
   7104    case DF_DOUBLE:
   7105        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7106            MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
   7107        }
   7108        break;
   7109    default:
   7110        assert(0);
   7111    }
   7112
   7113    check_msacsr_cause(env, GETPC());
   7114    msa_move_v(pwd, pwx);
   7115}
   7116
   7117void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7118        uint32_t ws, uint32_t wt)
   7119{
   7120    wr_t wx, *pwx = &wx;
   7121    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7122    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7123    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7124    uint32_t i;
   7125
   7126    clear_msacsr_cause(env);
   7127
   7128    switch (df) {
   7129    case DF_WORD:
   7130        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7131            MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
   7132        }
   7133        break;
   7134    case DF_DOUBLE:
   7135        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7136            MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
   7137        }
   7138        break;
   7139    default:
   7140        assert(0);
   7141    }
   7142
   7143    check_msacsr_cause(env, GETPC());
   7144    msa_move_v(pwd, pwx);
   7145}
   7146
   7147void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7148        uint32_t ws, uint32_t wt)
   7149{
   7150    wr_t wx, *pwx = &wx;
   7151    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7152    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7153    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7154    uint32_t i;
   7155
   7156    clear_msacsr_cause(env);
   7157
   7158    switch (df) {
   7159    case DF_WORD:
   7160        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7161            MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
   7162        }
   7163        break;
   7164    case DF_DOUBLE:
   7165        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7166            MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
   7167        }
   7168        break;
   7169    default:
   7170        assert(0);
   7171    }
   7172
   7173    check_msacsr_cause(env, GETPC());
   7174
   7175    msa_move_v(pwd, pwx);
   7176}
   7177
   7178void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7179        uint32_t ws, uint32_t wt)
   7180{
   7181    wr_t wx, *pwx = &wx;
   7182    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7183    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7184    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7185    uint32_t i;
   7186
   7187    clear_msacsr_cause(env);
   7188
   7189    switch (df) {
   7190    case DF_WORD:
   7191        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7192            MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
   7193        }
   7194        break;
   7195    case DF_DOUBLE:
   7196        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7197            MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
   7198        }
   7199        break;
   7200    default:
   7201        assert(0);
   7202    }
   7203
   7204    check_msacsr_cause(env, GETPC());
   7205
   7206    msa_move_v(pwd, pwx);
   7207}
   7208
   7209#define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
   7210    do {                                                                    \
   7211        float_status *status = &env->active_tc.msa_fp_status;               \
   7212        int c;                                                              \
   7213                                                                            \
   7214        set_float_exception_flags(0, status);                               \
   7215        DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
   7216        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7217                                                                            \
   7218        if (get_enabled_exceptions(env, c)) {                               \
   7219            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7220        }                                                                   \
   7221    } while (0)
   7222
   7223void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7224        uint32_t ws, uint32_t wt)
   7225{
   7226    wr_t wx, *pwx = &wx;
   7227    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7228    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7229    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7230    uint32_t i;
   7231
   7232    clear_msacsr_cause(env);
   7233
   7234    switch (df) {
   7235    case DF_WORD:
   7236        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7237            MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
   7238                           pws->w[i], pwt->w[i], 0, 32);
   7239        }
   7240        break;
   7241    case DF_DOUBLE:
   7242        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7243            MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
   7244                           pws->d[i], pwt->d[i], 0, 64);
   7245        }
   7246        break;
   7247    default:
   7248        assert(0);
   7249    }
   7250
   7251    check_msacsr_cause(env, GETPC());
   7252
   7253    msa_move_v(pwd, pwx);
   7254}
   7255
   7256void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7257        uint32_t ws, uint32_t wt)
   7258{
   7259    wr_t wx, *pwx = &wx;
   7260    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7261    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7262    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7263    uint32_t i;
   7264
   7265    clear_msacsr_cause(env);
   7266
   7267    switch (df) {
   7268    case DF_WORD:
   7269        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7270            MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
   7271                           pws->w[i], pwt->w[i],
   7272                           float_muladd_negate_product, 32);
   7273      }
   7274      break;
   7275    case DF_DOUBLE:
   7276        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7277            MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
   7278                           pws->d[i], pwt->d[i],
   7279                           float_muladd_negate_product, 64);
   7280        }
   7281        break;
   7282    default:
   7283        assert(0);
   7284    }
   7285
   7286    check_msacsr_cause(env, GETPC());
   7287
   7288    msa_move_v(pwd, pwx);
   7289}
   7290
   7291void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7292        uint32_t ws, uint32_t wt)
   7293{
   7294    wr_t wx, *pwx = &wx;
   7295    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7296    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7297    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7298    uint32_t i;
   7299
   7300    clear_msacsr_cause(env);
   7301
   7302    switch (df) {
   7303    case DF_WORD:
   7304        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7305            MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
   7306                            pwt->w[i] >  0x200 ?  0x200 :
   7307                            pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
   7308                            32);
   7309        }
   7310        break;
   7311    case DF_DOUBLE:
   7312        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7313            MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
   7314                            pwt->d[i] >  0x1000 ?  0x1000 :
   7315                            pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
   7316                            64);
   7317        }
   7318        break;
   7319    default:
   7320        assert(0);
   7321    }
   7322
   7323    check_msacsr_cause(env, GETPC());
   7324
   7325    msa_move_v(pwd, pwx);
   7326}
   7327
   7328#define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
   7329    do {                                                                    \
   7330        float_status *status = &env->active_tc.msa_fp_status;               \
   7331        int c;                                                              \
   7332                                                                            \
   7333        set_float_exception_flags(0, status);                               \
   7334        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
   7335        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7336                                                                            \
   7337        if (get_enabled_exceptions(env, c)) {                               \
   7338            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7339        }                                                                   \
   7340    } while (0)
   7341
   7342void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7343                         uint32_t ws, uint32_t wt)
   7344{
   7345    wr_t wx, *pwx = &wx;
   7346    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7347    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7348    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7349    uint32_t i;
   7350
   7351    clear_msacsr_cause(env);
   7352
   7353    switch (df) {
   7354    case DF_WORD:
   7355        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7356            /*
   7357             * Half precision floats come in two formats: standard
   7358             * IEEE and "ARM" format.  The latter gains extra exponent
   7359             * range by omitting the NaN/Inf encodings.
   7360             */
   7361            bool ieee = true;
   7362
   7363            MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
   7364            MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
   7365        }
   7366        break;
   7367    case DF_DOUBLE:
   7368        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7369            MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
   7370            MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
   7371        }
   7372        break;
   7373    default:
   7374        assert(0);
   7375    }
   7376
   7377    check_msacsr_cause(env, GETPC());
   7378    msa_move_v(pwd, pwx);
   7379}
   7380
   7381#define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
   7382    do {                                                                    \
   7383        float_status *status = &env->active_tc.msa_fp_status;               \
   7384        int c;                                                              \
   7385                                                                            \
   7386        set_float_exception_flags(0, status);                               \
   7387        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
   7388        c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
   7389                                                                            \
   7390        if (get_enabled_exceptions(env, c)) {                               \
   7391            DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
   7392        }                                                                   \
   7393    } while (0)
   7394
   7395void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7396                       uint32_t ws, uint32_t wt)
   7397{
   7398    wr_t wx, *pwx = &wx;
   7399    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7400    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7401    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7402    uint32_t i;
   7403
   7404    clear_msacsr_cause(env);
   7405
   7406    switch (df) {
   7407    case DF_WORD:
   7408        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7409            MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
   7410            MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
   7411        }
   7412        break;
   7413    case DF_DOUBLE:
   7414        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7415            MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
   7416            MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
   7417        }
   7418        break;
   7419    default:
   7420        assert(0);
   7421    }
   7422
   7423    check_msacsr_cause(env, GETPC());
   7424
   7425    msa_move_v(pwd, pwx);
   7426}
   7427
   7428#define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
   7429    !float ## BITS ## _is_any_nan(ARG1)                 \
   7430    && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
   7431
   7432#define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
   7433    do {                                                                    \
   7434        float_status *status = &env->active_tc.msa_fp_status;               \
   7435        int c;                                                              \
   7436                                                                            \
   7437        set_float_exception_flags(0, status);                               \
   7438        DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
   7439        c = update_msacsr(env, 0, 0);                                       \
   7440                                                                            \
   7441        if (get_enabled_exceptions(env, c)) {                               \
   7442            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7443        }                                                                   \
   7444    } while (0)
   7445
   7446#define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
   7447    do {                                                            \
   7448        uint## BITS ##_t S = _S, T = _T;                            \
   7449        uint## BITS ##_t as, at, xs, xt, xd;                        \
   7450        if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
   7451            T = S;                                                  \
   7452        }                                                           \
   7453        else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
   7454            S = T;                                                  \
   7455        }                                                           \
   7456        as = float## BITS ##_abs(S);                                \
   7457        at = float## BITS ##_abs(T);                                \
   7458        MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
   7459        MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
   7460        MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
   7461        X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
   7462    } while (0)
   7463
   7464void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7465        uint32_t ws, uint32_t wt)
   7466{
   7467    float_status *status = &env->active_tc.msa_fp_status;
   7468    wr_t wx, *pwx = &wx;
   7469    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7470    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7471    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7472
   7473    clear_msacsr_cause(env);
   7474
   7475    if (df == DF_WORD) {
   7476
   7477        if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
   7478            MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
   7479        } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
   7480            MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
   7481        } else {
   7482            MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
   7483        }
   7484
   7485        if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
   7486            MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
   7487        } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
   7488            MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
   7489        } else {
   7490            MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
   7491        }
   7492
   7493        if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
   7494            MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
   7495        } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
   7496            MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
   7497        } else {
   7498            MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
   7499        }
   7500
   7501        if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
   7502            MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
   7503        } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
   7504            MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
   7505        } else {
   7506            MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
   7507        }
   7508
   7509    } else if (df == DF_DOUBLE) {
   7510
   7511        if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
   7512            MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
   7513        } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
   7514            MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
   7515        } else {
   7516            MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
   7517        }
   7518
   7519        if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
   7520            MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
   7521        } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
   7522            MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
   7523        } else {
   7524            MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
   7525        }
   7526
   7527    } else {
   7528
   7529        assert(0);
   7530
   7531    }
   7532
   7533    check_msacsr_cause(env, GETPC());
   7534
   7535    msa_move_v(pwd, pwx);
   7536}
   7537
   7538void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7539        uint32_t ws, uint32_t wt)
   7540{
   7541    float_status *status = &env->active_tc.msa_fp_status;
   7542    wr_t wx, *pwx = &wx;
   7543    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7544    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7545    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7546
   7547    clear_msacsr_cause(env);
   7548
   7549    if (df == DF_WORD) {
   7550        FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
   7551        FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
   7552        FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
   7553        FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
   7554    } else if (df == DF_DOUBLE) {
   7555        FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
   7556        FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
   7557    } else {
   7558        assert(0);
   7559    }
   7560
   7561    check_msacsr_cause(env, GETPC());
   7562
   7563    msa_move_v(pwd, pwx);
   7564}
   7565
   7566void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7567        uint32_t ws, uint32_t wt)
   7568{
   7569     float_status *status = &env->active_tc.msa_fp_status;
   7570    wr_t wx, *pwx = &wx;
   7571    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7572    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7573    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7574
   7575    clear_msacsr_cause(env);
   7576
   7577    if (df == DF_WORD) {
   7578
   7579        if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
   7580            MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
   7581        } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
   7582            MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
   7583        } else {
   7584            MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
   7585        }
   7586
   7587        if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
   7588            MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
   7589        } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
   7590            MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
   7591        } else {
   7592            MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
   7593        }
   7594
   7595        if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
   7596            MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
   7597        } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
   7598            MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
   7599        } else {
   7600            MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
   7601        }
   7602
   7603        if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
   7604            MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
   7605        } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
   7606            MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
   7607        } else {
   7608            MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
   7609        }
   7610
   7611    } else if (df == DF_DOUBLE) {
   7612
   7613        if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
   7614            MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
   7615        } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
   7616            MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
   7617        } else {
   7618            MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
   7619        }
   7620
   7621        if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
   7622            MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
   7623        } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
   7624            MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
   7625        } else {
   7626            MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
   7627        }
   7628
   7629    } else {
   7630
   7631        assert(0);
   7632
   7633    }
   7634
   7635    check_msacsr_cause(env, GETPC());
   7636
   7637    msa_move_v(pwd, pwx);
   7638}
   7639
   7640void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7641        uint32_t ws, uint32_t wt)
   7642{
   7643    float_status *status = &env->active_tc.msa_fp_status;
   7644    wr_t wx, *pwx = &wx;
   7645    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7646    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7647    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7648
   7649    clear_msacsr_cause(env);
   7650
   7651    if (df == DF_WORD) {
   7652        FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
   7653        FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
   7654        FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
   7655        FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
   7656    } else if (df == DF_DOUBLE) {
   7657        FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
   7658        FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
   7659    } else {
   7660        assert(0);
   7661    }
   7662
   7663    check_msacsr_cause(env, GETPC());
   7664
   7665    msa_move_v(pwd, pwx);
   7666}
   7667
   7668void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
   7669        uint32_t wd, uint32_t ws)
   7670{
   7671    float_status *status = &env->active_tc.msa_fp_status;
   7672
   7673    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7674    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7675    if (df == DF_WORD) {
   7676        pwd->w[0] = float_class_s(pws->w[0], status);
   7677        pwd->w[1] = float_class_s(pws->w[1], status);
   7678        pwd->w[2] = float_class_s(pws->w[2], status);
   7679        pwd->w[3] = float_class_s(pws->w[3], status);
   7680    } else if (df == DF_DOUBLE) {
   7681        pwd->d[0] = float_class_d(pws->d[0], status);
   7682        pwd->d[1] = float_class_d(pws->d[1], status);
   7683    } else {
   7684        assert(0);
   7685    }
   7686}
   7687
   7688#define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
   7689    do {                                                                    \
   7690        float_status *status = &env->active_tc.msa_fp_status;               \
   7691        int c;                                                              \
   7692                                                                            \
   7693        set_float_exception_flags(0, status);                               \
   7694        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
   7695        c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
   7696                                                                            \
   7697        if (get_enabled_exceptions(env, c)) {                               \
   7698            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7699        } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
   7700            DEST = 0;                                                       \
   7701        }                                                                   \
   7702    } while (0)
   7703
   7704void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7705                            uint32_t ws)
   7706{
   7707    wr_t wx, *pwx = &wx;
   7708    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7709    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7710    uint32_t i;
   7711
   7712    clear_msacsr_cause(env);
   7713
   7714    switch (df) {
   7715    case DF_WORD:
   7716        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7717            MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
   7718        }
   7719        break;
   7720    case DF_DOUBLE:
   7721        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7722            MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
   7723        }
   7724        break;
   7725    default:
   7726        assert(0);
   7727    }
   7728
   7729    check_msacsr_cause(env, GETPC());
   7730
   7731    msa_move_v(pwd, pwx);
   7732}
   7733
   7734void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7735                            uint32_t ws)
   7736{
   7737    wr_t wx, *pwx = &wx;
   7738    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7739    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7740    uint32_t i;
   7741
   7742    clear_msacsr_cause(env);
   7743
   7744    switch (df) {
   7745    case DF_WORD:
   7746        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7747            MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
   7748        }
   7749        break;
   7750    case DF_DOUBLE:
   7751        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7752            MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
   7753        }
   7754        break;
   7755    default:
   7756        assert(0);
   7757    }
   7758
   7759    check_msacsr_cause(env, GETPC());
   7760
   7761    msa_move_v(pwd, pwx);
   7762}
   7763
   7764void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7765                         uint32_t ws)
   7766{
   7767    wr_t wx, *pwx = &wx;
   7768    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7769    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7770    uint32_t i;
   7771
   7772    clear_msacsr_cause(env);
   7773
   7774    switch (df) {
   7775    case DF_WORD:
   7776        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7777            MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
   7778        }
   7779        break;
   7780    case DF_DOUBLE:
   7781        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7782            MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
   7783        }
   7784        break;
   7785    default:
   7786        assert(0);
   7787    }
   7788
   7789    check_msacsr_cause(env, GETPC());
   7790
   7791    msa_move_v(pwd, pwx);
   7792}
   7793
   7794#define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
   7795    do {                                                                    \
   7796        float_status *status = &env->active_tc.msa_fp_status;               \
   7797        int c;                                                              \
   7798                                                                            \
   7799        set_float_exception_flags(0, status);                               \
   7800        DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
   7801        c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
   7802                          float ## BITS ## _is_quiet_nan(DEST, status) ?    \
   7803                          0 : RECIPROCAL_INEXACT,                           \
   7804                          IS_DENORMAL(DEST, BITS));                         \
   7805                                                                            \
   7806        if (get_enabled_exceptions(env, c)) {                               \
   7807            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7808        }                                                                   \
   7809    } while (0)
   7810
   7811void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7812                          uint32_t ws)
   7813{
   7814    wr_t wx, *pwx = &wx;
   7815    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7816    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7817    uint32_t i;
   7818
   7819    clear_msacsr_cause(env);
   7820
   7821    switch (df) {
   7822    case DF_WORD:
   7823        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7824            MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
   7825                    &env->active_tc.msa_fp_status), 32);
   7826        }
   7827        break;
   7828    case DF_DOUBLE:
   7829        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7830            MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
   7831                    &env->active_tc.msa_fp_status), 64);
   7832        }
   7833        break;
   7834    default:
   7835        assert(0);
   7836    }
   7837
   7838    check_msacsr_cause(env, GETPC());
   7839
   7840    msa_move_v(pwd, pwx);
   7841}
   7842
   7843void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7844                        uint32_t ws)
   7845{
   7846    wr_t wx, *pwx = &wx;
   7847    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7848    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7849    uint32_t i;
   7850
   7851    clear_msacsr_cause(env);
   7852
   7853    switch (df) {
   7854    case DF_WORD:
   7855        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7856            MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
   7857        }
   7858        break;
   7859    case DF_DOUBLE:
   7860        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7861            MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
   7862        }
   7863        break;
   7864    default:
   7865        assert(0);
   7866    }
   7867
   7868    check_msacsr_cause(env, GETPC());
   7869
   7870    msa_move_v(pwd, pwx);
   7871}
   7872
   7873void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7874                         uint32_t ws)
   7875{
   7876    wr_t wx, *pwx = &wx;
   7877    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7878    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7879    uint32_t i;
   7880
   7881    clear_msacsr_cause(env);
   7882
   7883    switch (df) {
   7884    case DF_WORD:
   7885        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7886            MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
   7887        }
   7888        break;
   7889    case DF_DOUBLE:
   7890        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7891            MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
   7892        }
   7893        break;
   7894    default:
   7895        assert(0);
   7896    }
   7897
   7898    check_msacsr_cause(env, GETPC());
   7899
   7900    msa_move_v(pwd, pwx);
   7901}
   7902
   7903#define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
   7904    do {                                                                    \
   7905        float_status *status = &env->active_tc.msa_fp_status;               \
   7906        int c;                                                              \
   7907                                                                            \
   7908        set_float_exception_flags(0, status);                               \
   7909        set_float_rounding_mode(float_round_down, status);                  \
   7910        DEST = float ## BITS ## _ ## log2(ARG, status);                     \
   7911        DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
   7912        set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
   7913                                         MSACSR_RM_MASK) >> MSACSR_RM],     \
   7914                                status);                                    \
   7915                                                                            \
   7916        set_float_exception_flags(get_float_exception_flags(status) &       \
   7917                                  (~float_flag_inexact),                    \
   7918                                  status);                                  \
   7919                                                                            \
   7920        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7921                                                                            \
   7922        if (get_enabled_exceptions(env, c)) {                               \
   7923            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7924        }                                                                   \
   7925    } while (0)
   7926
   7927void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7928                         uint32_t ws)
   7929{
   7930    wr_t wx, *pwx = &wx;
   7931    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7932    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7933    uint32_t i;
   7934
   7935    clear_msacsr_cause(env);
   7936
   7937    switch (df) {
   7938    case DF_WORD:
   7939        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7940            MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
   7941        }
   7942        break;
   7943    case DF_DOUBLE:
   7944        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7945            MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
   7946        }
   7947        break;
   7948    default:
   7949        assert(0);
   7950    }
   7951
   7952    check_msacsr_cause(env, GETPC());
   7953
   7954    msa_move_v(pwd, pwx);
   7955}
   7956
   7957void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7958                          uint32_t ws)
   7959{
   7960    wr_t wx, *pwx = &wx;
   7961    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7962    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7963    uint32_t i;
   7964
   7965    clear_msacsr_cause(env);
   7966
   7967    switch (df) {
   7968    case DF_WORD:
   7969        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7970            /*
   7971             * Half precision floats come in two formats: standard
   7972             * IEEE and "ARM" format.  The latter gains extra exponent
   7973             * range by omitting the NaN/Inf encodings.
   7974             */
   7975            bool ieee = true;
   7976
   7977            MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
   7978        }
   7979        break;
   7980    case DF_DOUBLE:
   7981        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7982            MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
   7983        }
   7984        break;
   7985    default:
   7986        assert(0);
   7987    }
   7988
   7989    check_msacsr_cause(env, GETPC());
   7990    msa_move_v(pwd, pwx);
   7991}
   7992
   7993void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7994                          uint32_t ws)
   7995{
   7996    wr_t wx, *pwx = &wx;
   7997    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7998    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7999    uint32_t i;
   8000
   8001    clear_msacsr_cause(env);
   8002
   8003    switch (df) {
   8004    case DF_WORD:
   8005        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8006            /*
   8007             * Half precision floats come in two formats: standard
   8008             * IEEE and "ARM" format.  The latter gains extra exponent
   8009             * range by omitting the NaN/Inf encodings.
   8010             */
   8011            bool ieee = true;
   8012
   8013            MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
   8014        }
   8015        break;
   8016    case DF_DOUBLE:
   8017        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8018            MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
   8019        }
   8020        break;
   8021    default:
   8022        assert(0);
   8023    }
   8024
   8025    check_msacsr_cause(env, GETPC());
   8026    msa_move_v(pwd, pwx);
   8027}
   8028
   8029void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8030                        uint32_t ws)
   8031{
   8032    wr_t wx, *pwx = &wx;
   8033    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8034    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8035    uint32_t i;
   8036
   8037    switch (df) {
   8038    case DF_WORD:
   8039        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8040            MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
   8041        }
   8042        break;
   8043    case DF_DOUBLE:
   8044        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8045            MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
   8046        }
   8047        break;
   8048    default:
   8049        assert(0);
   8050    }
   8051
   8052    msa_move_v(pwd, pwx);
   8053}
   8054
   8055void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8056                        uint32_t ws)
   8057{
   8058    wr_t wx, *pwx = &wx;
   8059    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8060    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8061    uint32_t i;
   8062
   8063    switch (df) {
   8064    case DF_WORD:
   8065        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8066            MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
   8067        }
   8068        break;
   8069    case DF_DOUBLE:
   8070        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8071            MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
   8072        }
   8073        break;
   8074    default:
   8075        assert(0);
   8076    }
   8077
   8078    msa_move_v(pwd, pwx);
   8079}
   8080
   8081void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8082                           uint32_t ws)
   8083{
   8084    wr_t wx, *pwx = &wx;
   8085    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8086    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8087    uint32_t i;
   8088
   8089    clear_msacsr_cause(env);
   8090
   8091    switch (df) {
   8092    case DF_WORD:
   8093        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8094            MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
   8095        }
   8096        break;
   8097    case DF_DOUBLE:
   8098        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8099            MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
   8100        }
   8101        break;
   8102    default:
   8103        assert(0);
   8104    }
   8105
   8106    check_msacsr_cause(env, GETPC());
   8107
   8108    msa_move_v(pwd, pwx);
   8109}
   8110
   8111void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8112                           uint32_t ws)
   8113{
   8114    wr_t wx, *pwx = &wx;
   8115    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8116    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8117    uint32_t i;
   8118
   8119    clear_msacsr_cause(env);
   8120
   8121    switch (df) {
   8122    case DF_WORD:
   8123        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8124            MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
   8125        }
   8126        break;
   8127    case DF_DOUBLE:
   8128        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8129            MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
   8130        }
   8131        break;
   8132    default:
   8133        assert(0);
   8134    }
   8135
   8136    check_msacsr_cause(env, GETPC());
   8137
   8138    msa_move_v(pwd, pwx);
   8139}
   8140
   8141#define float32_from_int32 int32_to_float32
   8142#define float32_from_uint32 uint32_to_float32
   8143
   8144#define float64_from_int64 int64_to_float64
   8145#define float64_from_uint64 uint64_to_float64
   8146
   8147void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8148                           uint32_t ws)
   8149{
   8150    wr_t wx, *pwx = &wx;
   8151    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8152    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8153    uint32_t i;
   8154
   8155    clear_msacsr_cause(env);
   8156
   8157    switch (df) {
   8158    case DF_WORD:
   8159        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8160            MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
   8161        }
   8162        break;
   8163    case DF_DOUBLE:
   8164        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8165            MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
   8166        }
   8167        break;
   8168    default:
   8169        assert(0);
   8170    }
   8171
   8172    check_msacsr_cause(env, GETPC());
   8173
   8174    msa_move_v(pwd, pwx);
   8175}
   8176
   8177void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8178                           uint32_t ws)
   8179{
   8180    wr_t wx, *pwx = &wx;
   8181    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8182    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8183    uint32_t i;
   8184
   8185    clear_msacsr_cause(env);
   8186
   8187    switch (df) {
   8188    case DF_WORD:
   8189        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8190            MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
   8191        }
   8192        break;
   8193    case DF_DOUBLE:
   8194        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8195            MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
   8196        }
   8197        break;
   8198    default:
   8199        assert(0);
   8200    }
   8201
   8202    check_msacsr_cause(env, GETPC());
   8203
   8204    msa_move_v(pwd, pwx);
   8205}
   8206
   8207/* Data format min and max values */
   8208#define DF_BITS(df) (1 << ((df) + 3))
   8209
   8210/* Element-by-element access macros */
   8211#define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
   8212
   8213#if !defined(CONFIG_USER_ONLY)
   8214#define MEMOP_IDX(DF)                                                   \
   8215    MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
   8216                                 cpu_mmu_index(env, false));
   8217#else
   8218#define MEMOP_IDX(DF)
   8219#endif
   8220
   8221void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
   8222                     target_ulong addr)
   8223{
   8224    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8225    MEMOP_IDX(DF_BYTE)
   8226#if !defined(CONFIG_USER_ONLY)
   8227#if !defined(HOST_WORDS_BIGENDIAN)
   8228    pwd->b[0]  = helper_ret_ldub_mmu(env, addr + (0  << DF_BYTE), oi, GETPC());
   8229    pwd->b[1]  = helper_ret_ldub_mmu(env, addr + (1  << DF_BYTE), oi, GETPC());
   8230    pwd->b[2]  = helper_ret_ldub_mmu(env, addr + (2  << DF_BYTE), oi, GETPC());
   8231    pwd->b[3]  = helper_ret_ldub_mmu(env, addr + (3  << DF_BYTE), oi, GETPC());
   8232    pwd->b[4]  = helper_ret_ldub_mmu(env, addr + (4  << DF_BYTE), oi, GETPC());
   8233    pwd->b[5]  = helper_ret_ldub_mmu(env, addr + (5  << DF_BYTE), oi, GETPC());
   8234    pwd->b[6]  = helper_ret_ldub_mmu(env, addr + (6  << DF_BYTE), oi, GETPC());
   8235    pwd->b[7]  = helper_ret_ldub_mmu(env, addr + (7  << DF_BYTE), oi, GETPC());
   8236    pwd->b[8]  = helper_ret_ldub_mmu(env, addr + (8  << DF_BYTE), oi, GETPC());
   8237    pwd->b[9]  = helper_ret_ldub_mmu(env, addr + (9  << DF_BYTE), oi, GETPC());
   8238    pwd->b[10] = helper_ret_ldub_mmu(env, addr + (10 << DF_BYTE), oi, GETPC());
   8239    pwd->b[11] = helper_ret_ldub_mmu(env, addr + (11 << DF_BYTE), oi, GETPC());
   8240    pwd->b[12] = helper_ret_ldub_mmu(env, addr + (12 << DF_BYTE), oi, GETPC());
   8241    pwd->b[13] = helper_ret_ldub_mmu(env, addr + (13 << DF_BYTE), oi, GETPC());
   8242    pwd->b[14] = helper_ret_ldub_mmu(env, addr + (14 << DF_BYTE), oi, GETPC());
   8243    pwd->b[15] = helper_ret_ldub_mmu(env, addr + (15 << DF_BYTE), oi, GETPC());
   8244#else
   8245    pwd->b[0]  = helper_ret_ldub_mmu(env, addr + (7  << DF_BYTE), oi, GETPC());
   8246    pwd->b[1]  = helper_ret_ldub_mmu(env, addr + (6  << DF_BYTE), oi, GETPC());
   8247    pwd->b[2]  = helper_ret_ldub_mmu(env, addr + (5  << DF_BYTE), oi, GETPC());
   8248    pwd->b[3]  = helper_ret_ldub_mmu(env, addr + (4  << DF_BYTE), oi, GETPC());
   8249    pwd->b[4]  = helper_ret_ldub_mmu(env, addr + (3  << DF_BYTE), oi, GETPC());
   8250    pwd->b[5]  = helper_ret_ldub_mmu(env, addr + (2  << DF_BYTE), oi, GETPC());
   8251    pwd->b[6]  = helper_ret_ldub_mmu(env, addr + (1  << DF_BYTE), oi, GETPC());
   8252    pwd->b[7]  = helper_ret_ldub_mmu(env, addr + (0  << DF_BYTE), oi, GETPC());
   8253    pwd->b[8]  = helper_ret_ldub_mmu(env, addr + (15 << DF_BYTE), oi, GETPC());
   8254    pwd->b[9]  = helper_ret_ldub_mmu(env, addr + (14 << DF_BYTE), oi, GETPC());
   8255    pwd->b[10] = helper_ret_ldub_mmu(env, addr + (13 << DF_BYTE), oi, GETPC());
   8256    pwd->b[11] = helper_ret_ldub_mmu(env, addr + (12 << DF_BYTE), oi, GETPC());
   8257    pwd->b[12] = helper_ret_ldub_mmu(env, addr + (11 << DF_BYTE), oi, GETPC());
   8258    pwd->b[13] = helper_ret_ldub_mmu(env, addr + (10 << DF_BYTE), oi, GETPC());
   8259    pwd->b[14] = helper_ret_ldub_mmu(env, addr + (9  << DF_BYTE), oi, GETPC());
   8260    pwd->b[15] = helper_ret_ldub_mmu(env, addr + (8  << DF_BYTE), oi, GETPC());
   8261#endif
   8262#else
   8263#if !defined(HOST_WORDS_BIGENDIAN)
   8264    pwd->b[0]  = cpu_ldub_data(env, addr + (0  << DF_BYTE));
   8265    pwd->b[1]  = cpu_ldub_data(env, addr + (1  << DF_BYTE));
   8266    pwd->b[2]  = cpu_ldub_data(env, addr + (2  << DF_BYTE));
   8267    pwd->b[3]  = cpu_ldub_data(env, addr + (3  << DF_BYTE));
   8268    pwd->b[4]  = cpu_ldub_data(env, addr + (4  << DF_BYTE));
   8269    pwd->b[5]  = cpu_ldub_data(env, addr + (5  << DF_BYTE));
   8270    pwd->b[6]  = cpu_ldub_data(env, addr + (6  << DF_BYTE));
   8271    pwd->b[7]  = cpu_ldub_data(env, addr + (7  << DF_BYTE));
   8272    pwd->b[8]  = cpu_ldub_data(env, addr + (8  << DF_BYTE));
   8273    pwd->b[9]  = cpu_ldub_data(env, addr + (9  << DF_BYTE));
   8274    pwd->b[10] = cpu_ldub_data(env, addr + (10 << DF_BYTE));
   8275    pwd->b[11] = cpu_ldub_data(env, addr + (11 << DF_BYTE));
   8276    pwd->b[12] = cpu_ldub_data(env, addr + (12 << DF_BYTE));
   8277    pwd->b[13] = cpu_ldub_data(env, addr + (13 << DF_BYTE));
   8278    pwd->b[14] = cpu_ldub_data(env, addr + (14 << DF_BYTE));
   8279    pwd->b[15] = cpu_ldub_data(env, addr + (15 << DF_BYTE));
   8280#else
   8281    pwd->b[0]  = cpu_ldub_data(env, addr + (7  << DF_BYTE));
   8282    pwd->b[1]  = cpu_ldub_data(env, addr + (6  << DF_BYTE));
   8283    pwd->b[2]  = cpu_ldub_data(env, addr + (5  << DF_BYTE));
   8284    pwd->b[3]  = cpu_ldub_data(env, addr + (4  << DF_BYTE));
   8285    pwd->b[4]  = cpu_ldub_data(env, addr + (3  << DF_BYTE));
   8286    pwd->b[5]  = cpu_ldub_data(env, addr + (2  << DF_BYTE));
   8287    pwd->b[6]  = cpu_ldub_data(env, addr + (1  << DF_BYTE));
   8288    pwd->b[7]  = cpu_ldub_data(env, addr + (0  << DF_BYTE));
   8289    pwd->b[8]  = cpu_ldub_data(env, addr + (15 << DF_BYTE));
   8290    pwd->b[9]  = cpu_ldub_data(env, addr + (14 << DF_BYTE));
   8291    pwd->b[10] = cpu_ldub_data(env, addr + (13 << DF_BYTE));
   8292    pwd->b[11] = cpu_ldub_data(env, addr + (12 << DF_BYTE));
   8293    pwd->b[12] = cpu_ldub_data(env, addr + (11 << DF_BYTE));
   8294    pwd->b[13] = cpu_ldub_data(env, addr + (10 << DF_BYTE));
   8295    pwd->b[14] = cpu_ldub_data(env, addr + (9 << DF_BYTE));
   8296    pwd->b[15] = cpu_ldub_data(env, addr + (8 << DF_BYTE));
   8297#endif
   8298#endif
   8299}
   8300
   8301void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
   8302                     target_ulong addr)
   8303{
   8304    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8305    MEMOP_IDX(DF_HALF)
   8306#if !defined(CONFIG_USER_ONLY)
   8307#if !defined(HOST_WORDS_BIGENDIAN)
   8308    pwd->h[0] = helper_ret_lduw_mmu(env, addr + (0 << DF_HALF), oi, GETPC());
   8309    pwd->h[1] = helper_ret_lduw_mmu(env, addr + (1 << DF_HALF), oi, GETPC());
   8310    pwd->h[2] = helper_ret_lduw_mmu(env, addr + (2 << DF_HALF), oi, GETPC());
   8311    pwd->h[3] = helper_ret_lduw_mmu(env, addr + (3 << DF_HALF), oi, GETPC());
   8312    pwd->h[4] = helper_ret_lduw_mmu(env, addr + (4 << DF_HALF), oi, GETPC());
   8313    pwd->h[5] = helper_ret_lduw_mmu(env, addr + (5 << DF_HALF), oi, GETPC());
   8314    pwd->h[6] = helper_ret_lduw_mmu(env, addr + (6 << DF_HALF), oi, GETPC());
   8315    pwd->h[7] = helper_ret_lduw_mmu(env, addr + (7 << DF_HALF), oi, GETPC());
   8316#else
   8317    pwd->h[0] = helper_ret_lduw_mmu(env, addr + (3 << DF_HALF), oi, GETPC());
   8318    pwd->h[1] = helper_ret_lduw_mmu(env, addr + (2 << DF_HALF), oi, GETPC());
   8319    pwd->h[2] = helper_ret_lduw_mmu(env, addr + (1 << DF_HALF), oi, GETPC());
   8320    pwd->h[3] = helper_ret_lduw_mmu(env, addr + (0 << DF_HALF), oi, GETPC());
   8321    pwd->h[4] = helper_ret_lduw_mmu(env, addr + (7 << DF_HALF), oi, GETPC());
   8322    pwd->h[5] = helper_ret_lduw_mmu(env, addr + (6 << DF_HALF), oi, GETPC());
   8323    pwd->h[6] = helper_ret_lduw_mmu(env, addr + (5 << DF_HALF), oi, GETPC());
   8324    pwd->h[7] = helper_ret_lduw_mmu(env, addr + (4 << DF_HALF), oi, GETPC());
   8325#endif
   8326#else
   8327#if !defined(HOST_WORDS_BIGENDIAN)
   8328    pwd->h[0] = cpu_lduw_data(env, addr + (0 << DF_HALF));
   8329    pwd->h[1] = cpu_lduw_data(env, addr + (1 << DF_HALF));
   8330    pwd->h[2] = cpu_lduw_data(env, addr + (2 << DF_HALF));
   8331    pwd->h[3] = cpu_lduw_data(env, addr + (3 << DF_HALF));
   8332    pwd->h[4] = cpu_lduw_data(env, addr + (4 << DF_HALF));
   8333    pwd->h[5] = cpu_lduw_data(env, addr + (5 << DF_HALF));
   8334    pwd->h[6] = cpu_lduw_data(env, addr + (6 << DF_HALF));
   8335    pwd->h[7] = cpu_lduw_data(env, addr + (7 << DF_HALF));
   8336#else
   8337    pwd->h[0] = cpu_lduw_data(env, addr + (3 << DF_HALF));
   8338    pwd->h[1] = cpu_lduw_data(env, addr + (2 << DF_HALF));
   8339    pwd->h[2] = cpu_lduw_data(env, addr + (1 << DF_HALF));
   8340    pwd->h[3] = cpu_lduw_data(env, addr + (0 << DF_HALF));
   8341    pwd->h[4] = cpu_lduw_data(env, addr + (7 << DF_HALF));
   8342    pwd->h[5] = cpu_lduw_data(env, addr + (6 << DF_HALF));
   8343    pwd->h[6] = cpu_lduw_data(env, addr + (5 << DF_HALF));
   8344    pwd->h[7] = cpu_lduw_data(env, addr + (4 << DF_HALF));
   8345#endif
   8346#endif
   8347}
   8348
   8349void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
   8350                     target_ulong addr)
   8351{
   8352    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8353    MEMOP_IDX(DF_WORD)
   8354#if !defined(CONFIG_USER_ONLY)
   8355#if !defined(HOST_WORDS_BIGENDIAN)
   8356    pwd->w[0] = helper_ret_ldul_mmu(env, addr + (0 << DF_WORD), oi, GETPC());
   8357    pwd->w[1] = helper_ret_ldul_mmu(env, addr + (1 << DF_WORD), oi, GETPC());
   8358    pwd->w[2] = helper_ret_ldul_mmu(env, addr + (2 << DF_WORD), oi, GETPC());
   8359    pwd->w[3] = helper_ret_ldul_mmu(env, addr + (3 << DF_WORD), oi, GETPC());
   8360#else
   8361    pwd->w[0] = helper_ret_ldul_mmu(env, addr + (1 << DF_WORD), oi, GETPC());
   8362    pwd->w[1] = helper_ret_ldul_mmu(env, addr + (0 << DF_WORD), oi, GETPC());
   8363    pwd->w[2] = helper_ret_ldul_mmu(env, addr + (3 << DF_WORD), oi, GETPC());
   8364    pwd->w[3] = helper_ret_ldul_mmu(env, addr + (2 << DF_WORD), oi, GETPC());
   8365#endif
   8366#else
   8367#if !defined(HOST_WORDS_BIGENDIAN)
   8368    pwd->w[0] = cpu_ldl_data(env, addr + (0 << DF_WORD));
   8369    pwd->w[1] = cpu_ldl_data(env, addr + (1 << DF_WORD));
   8370    pwd->w[2] = cpu_ldl_data(env, addr + (2 << DF_WORD));
   8371    pwd->w[3] = cpu_ldl_data(env, addr + (3 << DF_WORD));
   8372#else
   8373    pwd->w[0] = cpu_ldl_data(env, addr + (1 << DF_WORD));
   8374    pwd->w[1] = cpu_ldl_data(env, addr + (0 << DF_WORD));
   8375    pwd->w[2] = cpu_ldl_data(env, addr + (3 << DF_WORD));
   8376    pwd->w[3] = cpu_ldl_data(env, addr + (2 << DF_WORD));
   8377#endif
   8378#endif
   8379}
   8380
   8381void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
   8382                     target_ulong addr)
   8383{
   8384    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8385    MEMOP_IDX(DF_DOUBLE)
   8386#if !defined(CONFIG_USER_ONLY)
   8387    pwd->d[0] = helper_ret_ldq_mmu(env, addr + (0 << DF_DOUBLE), oi, GETPC());
   8388    pwd->d[1] = helper_ret_ldq_mmu(env, addr + (1 << DF_DOUBLE), oi, GETPC());
   8389#else
   8390    pwd->d[0] = cpu_ldq_data(env, addr + (0 << DF_DOUBLE));
   8391    pwd->d[1] = cpu_ldq_data(env, addr + (1 << DF_DOUBLE));
   8392#endif
   8393}
   8394
   8395#define MSA_PAGESPAN(x) \
   8396        ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
   8397
   8398static inline void ensure_writable_pages(CPUMIPSState *env,
   8399                                         target_ulong addr,
   8400                                         int mmu_idx,
   8401                                         uintptr_t retaddr)
   8402{
   8403    /* FIXME: Probe the actual accesses (pass and use a size) */
   8404    if (unlikely(MSA_PAGESPAN(addr))) {
   8405        /* first page */
   8406        probe_write(env, addr, 0, mmu_idx, retaddr);
   8407        /* second page */
   8408        addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
   8409        probe_write(env, addr, 0, mmu_idx, retaddr);
   8410    }
   8411}
   8412
   8413void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
   8414                     target_ulong addr)
   8415{
   8416    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8417    int mmu_idx = cpu_mmu_index(env, false);
   8418
   8419    MEMOP_IDX(DF_BYTE)
   8420    ensure_writable_pages(env, addr, mmu_idx, GETPC());
   8421#if !defined(CONFIG_USER_ONLY)
   8422#if !defined(HOST_WORDS_BIGENDIAN)
   8423    helper_ret_stb_mmu(env, addr + (0  << DF_BYTE), pwd->b[0],  oi, GETPC());
   8424    helper_ret_stb_mmu(env, addr + (1  << DF_BYTE), pwd->b[1],  oi, GETPC());
   8425    helper_ret_stb_mmu(env, addr + (2  << DF_BYTE), pwd->b[2],  oi, GETPC());
   8426    helper_ret_stb_mmu(env, addr + (3  << DF_BYTE), pwd->b[3],  oi, GETPC());
   8427    helper_ret_stb_mmu(env, addr + (4  << DF_BYTE), pwd->b[4],  oi, GETPC());
   8428    helper_ret_stb_mmu(env, addr + (5  << DF_BYTE), pwd->b[5],  oi, GETPC());
   8429    helper_ret_stb_mmu(env, addr + (6  << DF_BYTE), pwd->b[6],  oi, GETPC());
   8430    helper_ret_stb_mmu(env, addr + (7  << DF_BYTE), pwd->b[7],  oi, GETPC());
   8431    helper_ret_stb_mmu(env, addr + (8  << DF_BYTE), pwd->b[8],  oi, GETPC());
   8432    helper_ret_stb_mmu(env, addr + (9  << DF_BYTE), pwd->b[9],  oi, GETPC());
   8433    helper_ret_stb_mmu(env, addr + (10 << DF_BYTE), pwd->b[10], oi, GETPC());
   8434    helper_ret_stb_mmu(env, addr + (11 << DF_BYTE), pwd->b[11], oi, GETPC());
   8435    helper_ret_stb_mmu(env, addr + (12 << DF_BYTE), pwd->b[12], oi, GETPC());
   8436    helper_ret_stb_mmu(env, addr + (13 << DF_BYTE), pwd->b[13], oi, GETPC());
   8437    helper_ret_stb_mmu(env, addr + (14 << DF_BYTE), pwd->b[14], oi, GETPC());
   8438    helper_ret_stb_mmu(env, addr + (15 << DF_BYTE), pwd->b[15], oi, GETPC());
   8439#else
   8440    helper_ret_stb_mmu(env, addr + (7  << DF_BYTE), pwd->b[0],  oi, GETPC());
   8441    helper_ret_stb_mmu(env, addr + (6  << DF_BYTE), pwd->b[1],  oi, GETPC());
   8442    helper_ret_stb_mmu(env, addr + (5  << DF_BYTE), pwd->b[2],  oi, GETPC());
   8443    helper_ret_stb_mmu(env, addr + (4  << DF_BYTE), pwd->b[3],  oi, GETPC());
   8444    helper_ret_stb_mmu(env, addr + (3  << DF_BYTE), pwd->b[4],  oi, GETPC());
   8445    helper_ret_stb_mmu(env, addr + (2  << DF_BYTE), pwd->b[5],  oi, GETPC());
   8446    helper_ret_stb_mmu(env, addr + (1  << DF_BYTE), pwd->b[6],  oi, GETPC());
   8447    helper_ret_stb_mmu(env, addr + (0  << DF_BYTE), pwd->b[7],  oi, GETPC());
   8448    helper_ret_stb_mmu(env, addr + (15 << DF_BYTE), pwd->b[8],  oi, GETPC());
   8449    helper_ret_stb_mmu(env, addr + (14 << DF_BYTE), pwd->b[9],  oi, GETPC());
   8450    helper_ret_stb_mmu(env, addr + (13 << DF_BYTE), pwd->b[10], oi, GETPC());
   8451    helper_ret_stb_mmu(env, addr + (12 << DF_BYTE), pwd->b[11], oi, GETPC());
   8452    helper_ret_stb_mmu(env, addr + (11 << DF_BYTE), pwd->b[12], oi, GETPC());
   8453    helper_ret_stb_mmu(env, addr + (10 << DF_BYTE), pwd->b[13], oi, GETPC());
   8454    helper_ret_stb_mmu(env, addr + (9  << DF_BYTE), pwd->b[14], oi, GETPC());
   8455    helper_ret_stb_mmu(env, addr + (8  << DF_BYTE), pwd->b[15], oi, GETPC());
   8456#endif
   8457#else
   8458#if !defined(HOST_WORDS_BIGENDIAN)
   8459    cpu_stb_data(env, addr + (0  << DF_BYTE), pwd->b[0]);
   8460    cpu_stb_data(env, addr + (1  << DF_BYTE), pwd->b[1]);
   8461    cpu_stb_data(env, addr + (2  << DF_BYTE), pwd->b[2]);
   8462    cpu_stb_data(env, addr + (3  << DF_BYTE), pwd->b[3]);
   8463    cpu_stb_data(env, addr + (4  << DF_BYTE), pwd->b[4]);
   8464    cpu_stb_data(env, addr + (5  << DF_BYTE), pwd->b[5]);
   8465    cpu_stb_data(env, addr + (6  << DF_BYTE), pwd->b[6]);
   8466    cpu_stb_data(env, addr + (7  << DF_BYTE), pwd->b[7]);
   8467    cpu_stb_data(env, addr + (8  << DF_BYTE), pwd->b[8]);
   8468    cpu_stb_data(env, addr + (9  << DF_BYTE), pwd->b[9]);
   8469    cpu_stb_data(env, addr + (10 << DF_BYTE), pwd->b[10]);
   8470    cpu_stb_data(env, addr + (11 << DF_BYTE), pwd->b[11]);
   8471    cpu_stb_data(env, addr + (12 << DF_BYTE), pwd->b[12]);
   8472    cpu_stb_data(env, addr + (13 << DF_BYTE), pwd->b[13]);
   8473    cpu_stb_data(env, addr + (14 << DF_BYTE), pwd->b[14]);
   8474    cpu_stb_data(env, addr + (15 << DF_BYTE), pwd->b[15]);
   8475#else
   8476    cpu_stb_data(env, addr + (7  << DF_BYTE), pwd->b[0]);
   8477    cpu_stb_data(env, addr + (6  << DF_BYTE), pwd->b[1]);
   8478    cpu_stb_data(env, addr + (5  << DF_BYTE), pwd->b[2]);
   8479    cpu_stb_data(env, addr + (4  << DF_BYTE), pwd->b[3]);
   8480    cpu_stb_data(env, addr + (3  << DF_BYTE), pwd->b[4]);
   8481    cpu_stb_data(env, addr + (2  << DF_BYTE), pwd->b[5]);
   8482    cpu_stb_data(env, addr + (1  << DF_BYTE), pwd->b[6]);
   8483    cpu_stb_data(env, addr + (0  << DF_BYTE), pwd->b[7]);
   8484    cpu_stb_data(env, addr + (15 << DF_BYTE), pwd->b[8]);
   8485    cpu_stb_data(env, addr + (14 << DF_BYTE), pwd->b[9]);
   8486    cpu_stb_data(env, addr + (13 << DF_BYTE), pwd->b[10]);
   8487    cpu_stb_data(env, addr + (12 << DF_BYTE), pwd->b[11]);
   8488    cpu_stb_data(env, addr + (11 << DF_BYTE), pwd->b[12]);
   8489    cpu_stb_data(env, addr + (10 << DF_BYTE), pwd->b[13]);
   8490    cpu_stb_data(env, addr + (9  << DF_BYTE), pwd->b[14]);
   8491    cpu_stb_data(env, addr + (8  << DF_BYTE), pwd->b[15]);
   8492#endif
   8493#endif
   8494}
   8495
   8496void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
   8497                     target_ulong addr)
   8498{
   8499    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8500    int mmu_idx = cpu_mmu_index(env, false);
   8501
   8502    MEMOP_IDX(DF_HALF)
   8503    ensure_writable_pages(env, addr, mmu_idx, GETPC());
   8504#if !defined(CONFIG_USER_ONLY)
   8505#if !defined(HOST_WORDS_BIGENDIAN)
   8506    helper_ret_stw_mmu(env, addr + (0 << DF_HALF), pwd->h[0], oi, GETPC());
   8507    helper_ret_stw_mmu(env, addr + (1 << DF_HALF), pwd->h[1], oi, GETPC());
   8508    helper_ret_stw_mmu(env, addr + (2 << DF_HALF), pwd->h[2], oi, GETPC());
   8509    helper_ret_stw_mmu(env, addr + (3 << DF_HALF), pwd->h[3], oi, GETPC());
   8510    helper_ret_stw_mmu(env, addr + (4 << DF_HALF), pwd->h[4], oi, GETPC());
   8511    helper_ret_stw_mmu(env, addr + (5 << DF_HALF), pwd->h[5], oi, GETPC());
   8512    helper_ret_stw_mmu(env, addr + (6 << DF_HALF), pwd->h[6], oi, GETPC());
   8513    helper_ret_stw_mmu(env, addr + (7 << DF_HALF), pwd->h[7], oi, GETPC());
   8514#else
   8515    helper_ret_stw_mmu(env, addr + (3 << DF_HALF), pwd->h[0], oi, GETPC());
   8516    helper_ret_stw_mmu(env, addr + (2 << DF_HALF), pwd->h[1], oi, GETPC());
   8517    helper_ret_stw_mmu(env, addr + (1 << DF_HALF), pwd->h[2], oi, GETPC());
   8518    helper_ret_stw_mmu(env, addr + (0 << DF_HALF), pwd->h[3], oi, GETPC());
   8519    helper_ret_stw_mmu(env, addr + (7 << DF_HALF), pwd->h[4], oi, GETPC());
   8520    helper_ret_stw_mmu(env, addr + (6 << DF_HALF), pwd->h[5], oi, GETPC());
   8521    helper_ret_stw_mmu(env, addr + (5 << DF_HALF), pwd->h[6], oi, GETPC());
   8522    helper_ret_stw_mmu(env, addr + (4 << DF_HALF), pwd->h[7], oi, GETPC());
   8523#endif
   8524#else
   8525#if !defined(HOST_WORDS_BIGENDIAN)
   8526    cpu_stw_data(env, addr + (0 << DF_HALF), pwd->h[0]);
   8527    cpu_stw_data(env, addr + (1 << DF_HALF), pwd->h[1]);
   8528    cpu_stw_data(env, addr + (2 << DF_HALF), pwd->h[2]);
   8529    cpu_stw_data(env, addr + (3 << DF_HALF), pwd->h[3]);
   8530    cpu_stw_data(env, addr + (4 << DF_HALF), pwd->h[4]);
   8531    cpu_stw_data(env, addr + (5 << DF_HALF), pwd->h[5]);
   8532    cpu_stw_data(env, addr + (6 << DF_HALF), pwd->h[6]);
   8533    cpu_stw_data(env, addr + (7 << DF_HALF), pwd->h[7]);
   8534#else
   8535    cpu_stw_data(env, addr + (3 << DF_HALF), pwd->h[0]);
   8536    cpu_stw_data(env, addr + (2 << DF_HALF), pwd->h[1]);
   8537    cpu_stw_data(env, addr + (1 << DF_HALF), pwd->h[2]);
   8538    cpu_stw_data(env, addr + (0 << DF_HALF), pwd->h[3]);
   8539    cpu_stw_data(env, addr + (7 << DF_HALF), pwd->h[4]);
   8540    cpu_stw_data(env, addr + (6 << DF_HALF), pwd->h[5]);
   8541    cpu_stw_data(env, addr + (5 << DF_HALF), pwd->h[6]);
   8542    cpu_stw_data(env, addr + (4 << DF_HALF), pwd->h[7]);
   8543#endif
   8544#endif
   8545}
   8546
   8547void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
   8548                     target_ulong addr)
   8549{
   8550    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8551    int mmu_idx = cpu_mmu_index(env, false);
   8552
   8553    MEMOP_IDX(DF_WORD)
   8554    ensure_writable_pages(env, addr, mmu_idx, GETPC());
   8555#if !defined(CONFIG_USER_ONLY)
   8556#if !defined(HOST_WORDS_BIGENDIAN)
   8557    helper_ret_stl_mmu(env, addr + (0 << DF_WORD), pwd->w[0], oi, GETPC());
   8558    helper_ret_stl_mmu(env, addr + (1 << DF_WORD), pwd->w[1], oi, GETPC());
   8559    helper_ret_stl_mmu(env, addr + (2 << DF_WORD), pwd->w[2], oi, GETPC());
   8560    helper_ret_stl_mmu(env, addr + (3 << DF_WORD), pwd->w[3], oi, GETPC());
   8561#else
   8562    helper_ret_stl_mmu(env, addr + (1 << DF_WORD), pwd->w[0], oi, GETPC());
   8563    helper_ret_stl_mmu(env, addr + (0 << DF_WORD), pwd->w[1], oi, GETPC());
   8564    helper_ret_stl_mmu(env, addr + (3 << DF_WORD), pwd->w[2], oi, GETPC());
   8565    helper_ret_stl_mmu(env, addr + (2 << DF_WORD), pwd->w[3], oi, GETPC());
   8566#endif
   8567#else
   8568#if !defined(HOST_WORDS_BIGENDIAN)
   8569    cpu_stl_data(env, addr + (0 << DF_WORD), pwd->w[0]);
   8570    cpu_stl_data(env, addr + (1 << DF_WORD), pwd->w[1]);
   8571    cpu_stl_data(env, addr + (2 << DF_WORD), pwd->w[2]);
   8572    cpu_stl_data(env, addr + (3 << DF_WORD), pwd->w[3]);
   8573#else
   8574    cpu_stl_data(env, addr + (1 << DF_WORD), pwd->w[0]);
   8575    cpu_stl_data(env, addr + (0 << DF_WORD), pwd->w[1]);
   8576    cpu_stl_data(env, addr + (3 << DF_WORD), pwd->w[2]);
   8577    cpu_stl_data(env, addr + (2 << DF_WORD), pwd->w[3]);
   8578#endif
   8579#endif
   8580}
   8581
   8582void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
   8583                     target_ulong addr)
   8584{
   8585    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8586    int mmu_idx = cpu_mmu_index(env, false);
   8587
   8588    MEMOP_IDX(DF_DOUBLE)
   8589    ensure_writable_pages(env, addr, mmu_idx, GETPC());
   8590#if !defined(CONFIG_USER_ONLY)
   8591    helper_ret_stq_mmu(env, addr + (0 << DF_DOUBLE), pwd->d[0], oi, GETPC());
   8592    helper_ret_stq_mmu(env, addr + (1 << DF_DOUBLE), pwd->d[1], oi, GETPC());
   8593#else
   8594    cpu_stq_data(env, addr + (0 << DF_DOUBLE), pwd->d[0]);
   8595    cpu_stq_data(env, addr + (1 << DF_DOUBLE), pwd->d[1]);
   8596#endif
   8597}