cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

tcg-runtime-gvec.c (40491B)


      1/*
      2 * Generic vectorized operation runtime
      3 *
      4 * Copyright (c) 2018 Linaro
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "qemu/host-utils.h"
     22#include "cpu.h"
     23#include "exec/helper-proto.h"
     24#include "tcg/tcg-gvec-desc.h"
     25
     26
     27static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
     28{
     29    intptr_t maxsz = simd_maxsz(desc);
     30    intptr_t i;
     31
     32    if (unlikely(maxsz > oprsz)) {
     33        for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
     34            *(uint64_t *)(d + i) = 0;
     35        }
     36    }
     37}
     38
     39void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
     40{
     41    intptr_t oprsz = simd_oprsz(desc);
     42    intptr_t i;
     43
     44    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
     45        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
     46    }
     47    clear_high(d, oprsz, desc);
     48}
     49
     50void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
     51{
     52    intptr_t oprsz = simd_oprsz(desc);
     53    intptr_t i;
     54
     55    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
     56        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
     57    }
     58    clear_high(d, oprsz, desc);
     59}
     60
     61void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
     62{
     63    intptr_t oprsz = simd_oprsz(desc);
     64    intptr_t i;
     65
     66    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
     67        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
     68    }
     69    clear_high(d, oprsz, desc);
     70}
     71
     72void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
     73{
     74    intptr_t oprsz = simd_oprsz(desc);
     75    intptr_t i;
     76
     77    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
     78        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
     79    }
     80    clear_high(d, oprsz, desc);
     81}
     82
     83void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
     84{
     85    intptr_t oprsz = simd_oprsz(desc);
     86    intptr_t i;
     87
     88    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
     89        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
     90    }
     91    clear_high(d, oprsz, desc);
     92}
     93
     94void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
     95{
     96    intptr_t oprsz = simd_oprsz(desc);
     97    intptr_t i;
     98
     99    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    100        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
    101    }
    102    clear_high(d, oprsz, desc);
    103}
    104
    105void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
    106{
    107    intptr_t oprsz = simd_oprsz(desc);
    108    intptr_t i;
    109
    110    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    111        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
    112    }
    113    clear_high(d, oprsz, desc);
    114}
    115
    116void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
    117{
    118    intptr_t oprsz = simd_oprsz(desc);
    119    intptr_t i;
    120
    121    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    122        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
    123    }
    124    clear_high(d, oprsz, desc);
    125}
    126
    127void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
    128{
    129    intptr_t oprsz = simd_oprsz(desc);
    130    intptr_t i;
    131
    132    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    133        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
    134    }
    135    clear_high(d, oprsz, desc);
    136}
    137
    138void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
    139{
    140    intptr_t oprsz = simd_oprsz(desc);
    141    intptr_t i;
    142
    143    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    144        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
    145    }
    146    clear_high(d, oprsz, desc);
    147}
    148
    149void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
    150{
    151    intptr_t oprsz = simd_oprsz(desc);
    152    intptr_t i;
    153
    154    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    155        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
    156    }
    157    clear_high(d, oprsz, desc);
    158}
    159
    160void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
    161{
    162    intptr_t oprsz = simd_oprsz(desc);
    163    intptr_t i;
    164
    165    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    166        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
    167    }
    168    clear_high(d, oprsz, desc);
    169}
    170
    171void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
    172{
    173    intptr_t oprsz = simd_oprsz(desc);
    174    intptr_t i;
    175
    176    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    177        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
    178    }
    179    clear_high(d, oprsz, desc);
    180}
    181
    182void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
    183{
    184    intptr_t oprsz = simd_oprsz(desc);
    185    intptr_t i;
    186
    187    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    188        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
    189    }
    190    clear_high(d, oprsz, desc);
    191}
    192
    193void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
    194{
    195    intptr_t oprsz = simd_oprsz(desc);
    196    intptr_t i;
    197
    198    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    199        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
    200    }
    201    clear_high(d, oprsz, desc);
    202}
    203
    204void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
    205{
    206    intptr_t oprsz = simd_oprsz(desc);
    207    intptr_t i;
    208
    209    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    210        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
    211    }
    212    clear_high(d, oprsz, desc);
    213}
    214
    215void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
    216{
    217    intptr_t oprsz = simd_oprsz(desc);
    218    intptr_t i;
    219
    220    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    221        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
    222    }
    223    clear_high(d, oprsz, desc);
    224}
    225
    226void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
    227{
    228    intptr_t oprsz = simd_oprsz(desc);
    229    intptr_t i;
    230
    231    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    232        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
    233    }
    234    clear_high(d, oprsz, desc);
    235}
    236
    237void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
    238{
    239    intptr_t oprsz = simd_oprsz(desc);
    240    intptr_t i;
    241
    242    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    243        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
    244    }
    245    clear_high(d, oprsz, desc);
    246}
    247
    248void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
    249{
    250    intptr_t oprsz = simd_oprsz(desc);
    251    intptr_t i;
    252
    253    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    254        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
    255    }
    256    clear_high(d, oprsz, desc);
    257}
    258
    259void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
    260{
    261    intptr_t oprsz = simd_oprsz(desc);
    262    intptr_t i;
    263
    264    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    265        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
    266    }
    267    clear_high(d, oprsz, desc);
    268}
    269
    270void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
    271{
    272    intptr_t oprsz = simd_oprsz(desc);
    273    intptr_t i;
    274
    275    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    276        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
    277    }
    278    clear_high(d, oprsz, desc);
    279}
    280
    281void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
    282{
    283    intptr_t oprsz = simd_oprsz(desc);
    284    intptr_t i;
    285
    286    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    287        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
    288    }
    289    clear_high(d, oprsz, desc);
    290}
    291
    292void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
    293{
    294    intptr_t oprsz = simd_oprsz(desc);
    295    intptr_t i;
    296
    297    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    298        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
    299    }
    300    clear_high(d, oprsz, desc);
    301}
    302
    303void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
    304{
    305    intptr_t oprsz = simd_oprsz(desc);
    306    intptr_t i;
    307
    308    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    309        *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
    310    }
    311    clear_high(d, oprsz, desc);
    312}
    313
    314void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
    315{
    316    intptr_t oprsz = simd_oprsz(desc);
    317    intptr_t i;
    318
    319    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    320        *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
    321    }
    322    clear_high(d, oprsz, desc);
    323}
    324
    325void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
    326{
    327    intptr_t oprsz = simd_oprsz(desc);
    328    intptr_t i;
    329
    330    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    331        *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
    332    }
    333    clear_high(d, oprsz, desc);
    334}
    335
    336void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
    337{
    338    intptr_t oprsz = simd_oprsz(desc);
    339    intptr_t i;
    340
    341    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    342        *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
    343    }
    344    clear_high(d, oprsz, desc);
    345}
    346
    347void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
    348{
    349    intptr_t oprsz = simd_oprsz(desc);
    350    intptr_t i;
    351
    352    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
    353        int8_t aa = *(int8_t *)(a + i);
    354        *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
    355    }
    356    clear_high(d, oprsz, desc);
    357}
    358
    359void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
    360{
    361    intptr_t oprsz = simd_oprsz(desc);
    362    intptr_t i;
    363
    364    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
    365        int16_t aa = *(int16_t *)(a + i);
    366        *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
    367    }
    368    clear_high(d, oprsz, desc);
    369}
    370
    371void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
    372{
    373    intptr_t oprsz = simd_oprsz(desc);
    374    intptr_t i;
    375
    376    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
    377        int32_t aa = *(int32_t *)(a + i);
    378        *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
    379    }
    380    clear_high(d, oprsz, desc);
    381}
    382
    383void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
    384{
    385    intptr_t oprsz = simd_oprsz(desc);
    386    intptr_t i;
    387
    388    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
    389        int64_t aa = *(int64_t *)(a + i);
    390        *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
    391    }
    392    clear_high(d, oprsz, desc);
    393}
    394
    395void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
    396{
    397    intptr_t oprsz = simd_oprsz(desc);
    398
    399    memcpy(d, a, oprsz);
    400    clear_high(d, oprsz, desc);
    401}
    402
    403void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
    404{
    405    intptr_t oprsz = simd_oprsz(desc);
    406    intptr_t i;
    407
    408    if (c == 0) {
    409        oprsz = 0;
    410    } else {
    411        for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    412            *(uint64_t *)(d + i) = c;
    413        }
    414    }
    415    clear_high(d, oprsz, desc);
    416}
    417
    418void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
    419{
    420    intptr_t oprsz = simd_oprsz(desc);
    421    intptr_t i;
    422
    423    if (c == 0) {
    424        oprsz = 0;
    425    } else {
    426        for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    427            *(uint32_t *)(d + i) = c;
    428        }
    429    }
    430    clear_high(d, oprsz, desc);
    431}
    432
    433void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
    434{
    435    HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
    436}
    437
    438void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
    439{
    440    HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
    441}
    442
    443void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
    444{
    445    intptr_t oprsz = simd_oprsz(desc);
    446    intptr_t i;
    447
    448    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    449        *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
    450    }
    451    clear_high(d, oprsz, desc);
    452}
    453
    454void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
    455{
    456    intptr_t oprsz = simd_oprsz(desc);
    457    intptr_t i;
    458
    459    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    460        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
    461    }
    462    clear_high(d, oprsz, desc);
    463}
    464
    465void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
    466{
    467    intptr_t oprsz = simd_oprsz(desc);
    468    intptr_t i;
    469
    470    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    471        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
    472    }
    473    clear_high(d, oprsz, desc);
    474}
    475
    476void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
    477{
    478    intptr_t oprsz = simd_oprsz(desc);
    479    intptr_t i;
    480
    481    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    482        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
    483    }
    484    clear_high(d, oprsz, desc);
    485}
    486
    487void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
    488{
    489    intptr_t oprsz = simd_oprsz(desc);
    490    intptr_t i;
    491
    492    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    493        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
    494    }
    495    clear_high(d, oprsz, desc);
    496}
    497
    498void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
    499{
    500    intptr_t oprsz = simd_oprsz(desc);
    501    intptr_t i;
    502
    503    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    504        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
    505    }
    506    clear_high(d, oprsz, desc);
    507}
    508
    509void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
    510{
    511    intptr_t oprsz = simd_oprsz(desc);
    512    intptr_t i;
    513
    514    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    515        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
    516    }
    517    clear_high(d, oprsz, desc);
    518}
    519
    520void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
    521{
    522    intptr_t oprsz = simd_oprsz(desc);
    523    intptr_t i;
    524
    525    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    526        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
    527    }
    528    clear_high(d, oprsz, desc);
    529}
    530
    531void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
    532{
    533    intptr_t oprsz = simd_oprsz(desc);
    534    intptr_t i;
    535
    536    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    537        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
    538    }
    539    clear_high(d, oprsz, desc);
    540}
    541
    542void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
    543{
    544    intptr_t oprsz = simd_oprsz(desc);
    545    intptr_t i;
    546
    547    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    548        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
    549    }
    550    clear_high(d, oprsz, desc);
    551}
    552
    553void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
    554{
    555    intptr_t oprsz = simd_oprsz(desc);
    556    intptr_t i;
    557
    558    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    559        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
    560    }
    561    clear_high(d, oprsz, desc);
    562}
    563
    564void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
    565{
    566    intptr_t oprsz = simd_oprsz(desc);
    567    intptr_t i;
    568
    569    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    570        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
    571    }
    572    clear_high(d, oprsz, desc);
    573}
    574
    575void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
    576{
    577    intptr_t oprsz = simd_oprsz(desc);
    578    int shift = simd_data(desc);
    579    intptr_t i;
    580
    581    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    582        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
    583    }
    584    clear_high(d, oprsz, desc);
    585}
    586
    587void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
    588{
    589    intptr_t oprsz = simd_oprsz(desc);
    590    int shift = simd_data(desc);
    591    intptr_t i;
    592
    593    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    594        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
    595    }
    596    clear_high(d, oprsz, desc);
    597}
    598
    599void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
    600{
    601    intptr_t oprsz = simd_oprsz(desc);
    602    int shift = simd_data(desc);
    603    intptr_t i;
    604
    605    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    606        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
    607    }
    608    clear_high(d, oprsz, desc);
    609}
    610
    611void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
    612{
    613    intptr_t oprsz = simd_oprsz(desc);
    614    int shift = simd_data(desc);
    615    intptr_t i;
    616
    617    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    618        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
    619    }
    620    clear_high(d, oprsz, desc);
    621}
    622
    623void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
    624{
    625    intptr_t oprsz = simd_oprsz(desc);
    626    int shift = simd_data(desc);
    627    intptr_t i;
    628
    629    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    630        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
    631    }
    632    clear_high(d, oprsz, desc);
    633}
    634
    635void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
    636{
    637    intptr_t oprsz = simd_oprsz(desc);
    638    int shift = simd_data(desc);
    639    intptr_t i;
    640
    641    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    642        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
    643    }
    644    clear_high(d, oprsz, desc);
    645}
    646
    647void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
    648{
    649    intptr_t oprsz = simd_oprsz(desc);
    650    int shift = simd_data(desc);
    651    intptr_t i;
    652
    653    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    654        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
    655    }
    656    clear_high(d, oprsz, desc);
    657}
    658
    659void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
    660{
    661    intptr_t oprsz = simd_oprsz(desc);
    662    int shift = simd_data(desc);
    663    intptr_t i;
    664
    665    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    666        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
    667    }
    668    clear_high(d, oprsz, desc);
    669}
    670
    671void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
    672{
    673    intptr_t oprsz = simd_oprsz(desc);
    674    int shift = simd_data(desc);
    675    intptr_t i;
    676
    677    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    678        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
    679    }
    680    clear_high(d, oprsz, desc);
    681}
    682
    683void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
    684{
    685    intptr_t oprsz = simd_oprsz(desc);
    686    int shift = simd_data(desc);
    687    intptr_t i;
    688
    689    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    690        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
    691    }
    692    clear_high(d, oprsz, desc);
    693}
    694
    695void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
    696{
    697    intptr_t oprsz = simd_oprsz(desc);
    698    int shift = simd_data(desc);
    699    intptr_t i;
    700
    701    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    702        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
    703    }
    704    clear_high(d, oprsz, desc);
    705}
    706
    707void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
    708{
    709    intptr_t oprsz = simd_oprsz(desc);
    710    int shift = simd_data(desc);
    711    intptr_t i;
    712
    713    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    714        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
    715    }
    716    clear_high(d, oprsz, desc);
    717}
    718
    719void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
    720{
    721    intptr_t oprsz = simd_oprsz(desc);
    722    int shift = simd_data(desc);
    723    intptr_t i;
    724
    725    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    726        *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
    727    }
    728    clear_high(d, oprsz, desc);
    729}
    730
    731void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
    732{
    733    intptr_t oprsz = simd_oprsz(desc);
    734    int shift = simd_data(desc);
    735    intptr_t i;
    736
    737    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    738        *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
    739    }
    740    clear_high(d, oprsz, desc);
    741}
    742
    743void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
    744{
    745    intptr_t oprsz = simd_oprsz(desc);
    746    int shift = simd_data(desc);
    747    intptr_t i;
    748
    749    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    750        *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
    751    }
    752    clear_high(d, oprsz, desc);
    753}
    754
    755void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
    756{
    757    intptr_t oprsz = simd_oprsz(desc);
    758    int shift = simd_data(desc);
    759    intptr_t i;
    760
    761    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    762        *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
    763    }
    764    clear_high(d, oprsz, desc);
    765}
    766
    767void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
    768{
    769    intptr_t oprsz = simd_oprsz(desc);
    770    intptr_t i;
    771
    772    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    773        uint8_t sh = *(uint8_t *)(b + i) & 7;
    774        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
    775    }
    776    clear_high(d, oprsz, desc);
    777}
    778
    779void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
    780{
    781    intptr_t oprsz = simd_oprsz(desc);
    782    intptr_t i;
    783
    784    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    785        uint8_t sh = *(uint16_t *)(b + i) & 15;
    786        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
    787    }
    788    clear_high(d, oprsz, desc);
    789}
    790
    791void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
    792{
    793    intptr_t oprsz = simd_oprsz(desc);
    794    intptr_t i;
    795
    796    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    797        uint8_t sh = *(uint32_t *)(b + i) & 31;
    798        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
    799    }
    800    clear_high(d, oprsz, desc);
    801}
    802
    803void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
    804{
    805    intptr_t oprsz = simd_oprsz(desc);
    806    intptr_t i;
    807
    808    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    809        uint8_t sh = *(uint64_t *)(b + i) & 63;
    810        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
    811    }
    812    clear_high(d, oprsz, desc);
    813}
    814
    815void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
    816{
    817    intptr_t oprsz = simd_oprsz(desc);
    818    intptr_t i;
    819
    820    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    821        uint8_t sh = *(uint8_t *)(b + i) & 7;
    822        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
    823    }
    824    clear_high(d, oprsz, desc);
    825}
    826
    827void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
    828{
    829    intptr_t oprsz = simd_oprsz(desc);
    830    intptr_t i;
    831
    832    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    833        uint8_t sh = *(uint16_t *)(b + i) & 15;
    834        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
    835    }
    836    clear_high(d, oprsz, desc);
    837}
    838
    839void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
    840{
    841    intptr_t oprsz = simd_oprsz(desc);
    842    intptr_t i;
    843
    844    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    845        uint8_t sh = *(uint32_t *)(b + i) & 31;
    846        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
    847    }
    848    clear_high(d, oprsz, desc);
    849}
    850
    851void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
    852{
    853    intptr_t oprsz = simd_oprsz(desc);
    854    intptr_t i;
    855
    856    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    857        uint8_t sh = *(uint64_t *)(b + i) & 63;
    858        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
    859    }
    860    clear_high(d, oprsz, desc);
    861}
    862
    863void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
    864{
    865    intptr_t oprsz = simd_oprsz(desc);
    866    intptr_t i;
    867
    868    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
    869        uint8_t sh = *(uint8_t *)(b + i) & 7;
    870        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
    871    }
    872    clear_high(d, oprsz, desc);
    873}
    874
    875void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
    876{
    877    intptr_t oprsz = simd_oprsz(desc);
    878    intptr_t i;
    879
    880    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
    881        uint8_t sh = *(uint16_t *)(b + i) & 15;
    882        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
    883    }
    884    clear_high(d, oprsz, desc);
    885}
    886
    887void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
    888{
    889    intptr_t oprsz = simd_oprsz(desc);
    890    intptr_t i;
    891
    892    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
    893        uint8_t sh = *(uint32_t *)(b + i) & 31;
    894        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
    895    }
    896    clear_high(d, oprsz, desc);
    897}
    898
    899void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
    900{
    901    intptr_t oprsz = simd_oprsz(desc);
    902    intptr_t i;
    903
    904    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
    905        uint8_t sh = *(uint64_t *)(b + i) & 63;
    906        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
    907    }
    908    clear_high(d, oprsz, desc);
    909}
    910
    911void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
    912{
    913    intptr_t oprsz = simd_oprsz(desc);
    914    intptr_t i;
    915
    916    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    917        uint8_t sh = *(uint8_t *)(b + i) & 7;
    918        *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
    919    }
    920    clear_high(d, oprsz, desc);
    921}
    922
    923void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
    924{
    925    intptr_t oprsz = simd_oprsz(desc);
    926    intptr_t i;
    927
    928    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    929        uint8_t sh = *(uint16_t *)(b + i) & 15;
    930        *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
    931    }
    932    clear_high(d, oprsz, desc);
    933}
    934
    935void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
    936{
    937    intptr_t oprsz = simd_oprsz(desc);
    938    intptr_t i;
    939
    940    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    941        uint8_t sh = *(uint32_t *)(b + i) & 31;
    942        *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
    943    }
    944    clear_high(d, oprsz, desc);
    945}
    946
    947void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
    948{
    949    intptr_t oprsz = simd_oprsz(desc);
    950    intptr_t i;
    951
    952    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    953        uint8_t sh = *(uint64_t *)(b + i) & 63;
    954        *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
    955    }
    956    clear_high(d, oprsz, desc);
    957}
    958
    959void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
    960{
    961    intptr_t oprsz = simd_oprsz(desc);
    962    intptr_t i;
    963
    964    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    965        uint8_t sh = *(uint8_t *)(b + i) & 7;
    966        *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
    967    }
    968    clear_high(d, oprsz, desc);
    969}
    970
    971void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
    972{
    973    intptr_t oprsz = simd_oprsz(desc);
    974    intptr_t i;
    975
    976    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    977        uint8_t sh = *(uint16_t *)(b + i) & 15;
    978        *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
    979    }
    980    clear_high(d, oprsz, desc);
    981}
    982
    983void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
    984{
    985    intptr_t oprsz = simd_oprsz(desc);
    986    intptr_t i;
    987
    988    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    989        uint8_t sh = *(uint32_t *)(b + i) & 31;
    990        *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
    991    }
    992    clear_high(d, oprsz, desc);
    993}
    994
    995void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
    996{
    997    intptr_t oprsz = simd_oprsz(desc);
    998    intptr_t i;
    999
   1000    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1001        uint8_t sh = *(uint64_t *)(b + i) & 63;
   1002        *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
   1003    }
   1004    clear_high(d, oprsz, desc);
   1005}
   1006
   1007#define DO_CMP1(NAME, TYPE, OP)                                            \
   1008void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
   1009{                                                                          \
   1010    intptr_t oprsz = simd_oprsz(desc);                                     \
   1011    intptr_t i;                                                            \
   1012    for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
   1013        *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
   1014    }                                                                      \
   1015    clear_high(d, oprsz, desc);                                            \
   1016}
   1017
   1018#define DO_CMP2(SZ) \
   1019    DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
   1020    DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
   1021    DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
   1022    DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
   1023    DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
   1024    DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
   1025
   1026DO_CMP2(8)
   1027DO_CMP2(16)
   1028DO_CMP2(32)
   1029DO_CMP2(64)
   1030
   1031#undef DO_CMP1
   1032#undef DO_CMP2
   1033
   1034void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
   1035{
   1036    intptr_t oprsz = simd_oprsz(desc);
   1037    intptr_t i;
   1038
   1039    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
   1040        int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
   1041        if (r > INT8_MAX) {
   1042            r = INT8_MAX;
   1043        } else if (r < INT8_MIN) {
   1044            r = INT8_MIN;
   1045        }
   1046        *(int8_t *)(d + i) = r;
   1047    }
   1048    clear_high(d, oprsz, desc);
   1049}
   1050
   1051void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
   1052{
   1053    intptr_t oprsz = simd_oprsz(desc);
   1054    intptr_t i;
   1055
   1056    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1057        int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
   1058        if (r > INT16_MAX) {
   1059            r = INT16_MAX;
   1060        } else if (r < INT16_MIN) {
   1061            r = INT16_MIN;
   1062        }
   1063        *(int16_t *)(d + i) = r;
   1064    }
   1065    clear_high(d, oprsz, desc);
   1066}
   1067
   1068void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
   1069{
   1070    intptr_t oprsz = simd_oprsz(desc);
   1071    intptr_t i;
   1072
   1073    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1074        int32_t ai = *(int32_t *)(a + i);
   1075        int32_t bi = *(int32_t *)(b + i);
   1076        int32_t di;
   1077        if (sadd32_overflow(ai, bi, &di)) {
   1078            di = (di < 0 ? INT32_MAX : INT32_MIN);
   1079        }
   1080        *(int32_t *)(d + i) = di;
   1081    }
   1082    clear_high(d, oprsz, desc);
   1083}
   1084
   1085void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
   1086{
   1087    intptr_t oprsz = simd_oprsz(desc);
   1088    intptr_t i;
   1089
   1090    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1091        int64_t ai = *(int64_t *)(a + i);
   1092        int64_t bi = *(int64_t *)(b + i);
   1093        int64_t di;
   1094        if (sadd64_overflow(ai, bi, &di)) {
   1095            di = (di < 0 ? INT64_MAX : INT64_MIN);
   1096        }
   1097        *(int64_t *)(d + i) = di;
   1098    }
   1099    clear_high(d, oprsz, desc);
   1100}
   1101
   1102void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
   1103{
   1104    intptr_t oprsz = simd_oprsz(desc);
   1105    intptr_t i;
   1106
   1107    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1108        int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
   1109        if (r > INT8_MAX) {
   1110            r = INT8_MAX;
   1111        } else if (r < INT8_MIN) {
   1112            r = INT8_MIN;
   1113        }
   1114        *(uint8_t *)(d + i) = r;
   1115    }
   1116    clear_high(d, oprsz, desc);
   1117}
   1118
   1119void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
   1120{
   1121    intptr_t oprsz = simd_oprsz(desc);
   1122    intptr_t i;
   1123
   1124    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1125        int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
   1126        if (r > INT16_MAX) {
   1127            r = INT16_MAX;
   1128        } else if (r < INT16_MIN) {
   1129            r = INT16_MIN;
   1130        }
   1131        *(int16_t *)(d + i) = r;
   1132    }
   1133    clear_high(d, oprsz, desc);
   1134}
   1135
   1136void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
   1137{
   1138    intptr_t oprsz = simd_oprsz(desc);
   1139    intptr_t i;
   1140
   1141    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1142        int32_t ai = *(int32_t *)(a + i);
   1143        int32_t bi = *(int32_t *)(b + i);
   1144        int32_t di;
   1145        if (ssub32_overflow(ai, bi, &di)) {
   1146            di = (di < 0 ? INT32_MAX : INT32_MIN);
   1147        }
   1148        *(int32_t *)(d + i) = di;
   1149    }
   1150    clear_high(d, oprsz, desc);
   1151}
   1152
   1153void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
   1154{
   1155    intptr_t oprsz = simd_oprsz(desc);
   1156    intptr_t i;
   1157
   1158    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1159        int64_t ai = *(int64_t *)(a + i);
   1160        int64_t bi = *(int64_t *)(b + i);
   1161        int64_t di;
   1162        if (ssub64_overflow(ai, bi, &di)) {
   1163            di = (di < 0 ? INT64_MAX : INT64_MIN);
   1164        }
   1165        *(int64_t *)(d + i) = di;
   1166    }
   1167    clear_high(d, oprsz, desc);
   1168}
   1169
   1170void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
   1171{
   1172    intptr_t oprsz = simd_oprsz(desc);
   1173    intptr_t i;
   1174
   1175    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1176        unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
   1177        if (r > UINT8_MAX) {
   1178            r = UINT8_MAX;
   1179        }
   1180        *(uint8_t *)(d + i) = r;
   1181    }
   1182    clear_high(d, oprsz, desc);
   1183}
   1184
   1185void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
   1186{
   1187    intptr_t oprsz = simd_oprsz(desc);
   1188    intptr_t i;
   1189
   1190    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1191        unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
   1192        if (r > UINT16_MAX) {
   1193            r = UINT16_MAX;
   1194        }
   1195        *(uint16_t *)(d + i) = r;
   1196    }
   1197    clear_high(d, oprsz, desc);
   1198}
   1199
   1200void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
   1201{
   1202    intptr_t oprsz = simd_oprsz(desc);
   1203    intptr_t i;
   1204
   1205    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1206        uint32_t ai = *(uint32_t *)(a + i);
   1207        uint32_t bi = *(uint32_t *)(b + i);
   1208        uint32_t di;
   1209        if (uadd32_overflow(ai, bi, &di)) {
   1210            di = UINT32_MAX;
   1211        }
   1212        *(uint32_t *)(d + i) = di;
   1213    }
   1214    clear_high(d, oprsz, desc);
   1215}
   1216
   1217void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
   1218{
   1219    intptr_t oprsz = simd_oprsz(desc);
   1220    intptr_t i;
   1221
   1222    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1223        uint64_t ai = *(uint64_t *)(a + i);
   1224        uint64_t bi = *(uint64_t *)(b + i);
   1225        uint64_t di;
   1226        if (uadd64_overflow(ai, bi, &di)) {
   1227            di = UINT64_MAX;
   1228        }
   1229        *(uint64_t *)(d + i) = di;
   1230    }
   1231    clear_high(d, oprsz, desc);
   1232}
   1233
   1234void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
   1235{
   1236    intptr_t oprsz = simd_oprsz(desc);
   1237    intptr_t i;
   1238
   1239    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1240        int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
   1241        if (r < 0) {
   1242            r = 0;
   1243        }
   1244        *(uint8_t *)(d + i) = r;
   1245    }
   1246    clear_high(d, oprsz, desc);
   1247}
   1248
   1249void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
   1250{
   1251    intptr_t oprsz = simd_oprsz(desc);
   1252    intptr_t i;
   1253
   1254    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1255        int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
   1256        if (r < 0) {
   1257            r = 0;
   1258        }
   1259        *(uint16_t *)(d + i) = r;
   1260    }
   1261    clear_high(d, oprsz, desc);
   1262}
   1263
   1264void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
   1265{
   1266    intptr_t oprsz = simd_oprsz(desc);
   1267    intptr_t i;
   1268
   1269    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1270        uint32_t ai = *(uint32_t *)(a + i);
   1271        uint32_t bi = *(uint32_t *)(b + i);
   1272        uint32_t di;
   1273        if (usub32_overflow(ai, bi, &di)) {
   1274            di = 0;
   1275        }
   1276        *(uint32_t *)(d + i) = di;
   1277    }
   1278    clear_high(d, oprsz, desc);
   1279}
   1280
   1281void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
   1282{
   1283    intptr_t oprsz = simd_oprsz(desc);
   1284    intptr_t i;
   1285
   1286    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1287        uint64_t ai = *(uint64_t *)(a + i);
   1288        uint64_t bi = *(uint64_t *)(b + i);
   1289        uint64_t di;
   1290        if (usub64_overflow(ai, bi, &di)) {
   1291            di = 0;
   1292        }
   1293        *(uint64_t *)(d + i) = di;
   1294    }
   1295    clear_high(d, oprsz, desc);
   1296}
   1297
   1298void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
   1299{
   1300    intptr_t oprsz = simd_oprsz(desc);
   1301    intptr_t i;
   1302
   1303    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
   1304        int8_t aa = *(int8_t *)(a + i);
   1305        int8_t bb = *(int8_t *)(b + i);
   1306        int8_t dd = aa < bb ? aa : bb;
   1307        *(int8_t *)(d + i) = dd;
   1308    }
   1309    clear_high(d, oprsz, desc);
   1310}
   1311
   1312void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
   1313{
   1314    intptr_t oprsz = simd_oprsz(desc);
   1315    intptr_t i;
   1316
   1317    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1318        int16_t aa = *(int16_t *)(a + i);
   1319        int16_t bb = *(int16_t *)(b + i);
   1320        int16_t dd = aa < bb ? aa : bb;
   1321        *(int16_t *)(d + i) = dd;
   1322    }
   1323    clear_high(d, oprsz, desc);
   1324}
   1325
   1326void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
   1327{
   1328    intptr_t oprsz = simd_oprsz(desc);
   1329    intptr_t i;
   1330
   1331    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1332        int32_t aa = *(int32_t *)(a + i);
   1333        int32_t bb = *(int32_t *)(b + i);
   1334        int32_t dd = aa < bb ? aa : bb;
   1335        *(int32_t *)(d + i) = dd;
   1336    }
   1337    clear_high(d, oprsz, desc);
   1338}
   1339
   1340void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
   1341{
   1342    intptr_t oprsz = simd_oprsz(desc);
   1343    intptr_t i;
   1344
   1345    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1346        int64_t aa = *(int64_t *)(a + i);
   1347        int64_t bb = *(int64_t *)(b + i);
   1348        int64_t dd = aa < bb ? aa : bb;
   1349        *(int64_t *)(d + i) = dd;
   1350    }
   1351    clear_high(d, oprsz, desc);
   1352}
   1353
   1354void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
   1355{
   1356    intptr_t oprsz = simd_oprsz(desc);
   1357    intptr_t i;
   1358
   1359    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
   1360        int8_t aa = *(int8_t *)(a + i);
   1361        int8_t bb = *(int8_t *)(b + i);
   1362        int8_t dd = aa > bb ? aa : bb;
   1363        *(int8_t *)(d + i) = dd;
   1364    }
   1365    clear_high(d, oprsz, desc);
   1366}
   1367
   1368void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
   1369{
   1370    intptr_t oprsz = simd_oprsz(desc);
   1371    intptr_t i;
   1372
   1373    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1374        int16_t aa = *(int16_t *)(a + i);
   1375        int16_t bb = *(int16_t *)(b + i);
   1376        int16_t dd = aa > bb ? aa : bb;
   1377        *(int16_t *)(d + i) = dd;
   1378    }
   1379    clear_high(d, oprsz, desc);
   1380}
   1381
   1382void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
   1383{
   1384    intptr_t oprsz = simd_oprsz(desc);
   1385    intptr_t i;
   1386
   1387    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1388        int32_t aa = *(int32_t *)(a + i);
   1389        int32_t bb = *(int32_t *)(b + i);
   1390        int32_t dd = aa > bb ? aa : bb;
   1391        *(int32_t *)(d + i) = dd;
   1392    }
   1393    clear_high(d, oprsz, desc);
   1394}
   1395
   1396void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
   1397{
   1398    intptr_t oprsz = simd_oprsz(desc);
   1399    intptr_t i;
   1400
   1401    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1402        int64_t aa = *(int64_t *)(a + i);
   1403        int64_t bb = *(int64_t *)(b + i);
   1404        int64_t dd = aa > bb ? aa : bb;
   1405        *(int64_t *)(d + i) = dd;
   1406    }
   1407    clear_high(d, oprsz, desc);
   1408}
   1409
   1410void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
   1411{
   1412    intptr_t oprsz = simd_oprsz(desc);
   1413    intptr_t i;
   1414
   1415    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1416        uint8_t aa = *(uint8_t *)(a + i);
   1417        uint8_t bb = *(uint8_t *)(b + i);
   1418        uint8_t dd = aa < bb ? aa : bb;
   1419        *(uint8_t *)(d + i) = dd;
   1420    }
   1421    clear_high(d, oprsz, desc);
   1422}
   1423
   1424void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
   1425{
   1426    intptr_t oprsz = simd_oprsz(desc);
   1427    intptr_t i;
   1428
   1429    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1430        uint16_t aa = *(uint16_t *)(a + i);
   1431        uint16_t bb = *(uint16_t *)(b + i);
   1432        uint16_t dd = aa < bb ? aa : bb;
   1433        *(uint16_t *)(d + i) = dd;
   1434    }
   1435    clear_high(d, oprsz, desc);
   1436}
   1437
   1438void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
   1439{
   1440    intptr_t oprsz = simd_oprsz(desc);
   1441    intptr_t i;
   1442
   1443    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1444        uint32_t aa = *(uint32_t *)(a + i);
   1445        uint32_t bb = *(uint32_t *)(b + i);
   1446        uint32_t dd = aa < bb ? aa : bb;
   1447        *(uint32_t *)(d + i) = dd;
   1448    }
   1449    clear_high(d, oprsz, desc);
   1450}
   1451
   1452void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
   1453{
   1454    intptr_t oprsz = simd_oprsz(desc);
   1455    intptr_t i;
   1456
   1457    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1458        uint64_t aa = *(uint64_t *)(a + i);
   1459        uint64_t bb = *(uint64_t *)(b + i);
   1460        uint64_t dd = aa < bb ? aa : bb;
   1461        *(uint64_t *)(d + i) = dd;
   1462    }
   1463    clear_high(d, oprsz, desc);
   1464}
   1465
   1466void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
   1467{
   1468    intptr_t oprsz = simd_oprsz(desc);
   1469    intptr_t i;
   1470
   1471    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1472        uint8_t aa = *(uint8_t *)(a + i);
   1473        uint8_t bb = *(uint8_t *)(b + i);
   1474        uint8_t dd = aa > bb ? aa : bb;
   1475        *(uint8_t *)(d + i) = dd;
   1476    }
   1477    clear_high(d, oprsz, desc);
   1478}
   1479
   1480void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
   1481{
   1482    intptr_t oprsz = simd_oprsz(desc);
   1483    intptr_t i;
   1484
   1485    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1486        uint16_t aa = *(uint16_t *)(a + i);
   1487        uint16_t bb = *(uint16_t *)(b + i);
   1488        uint16_t dd = aa > bb ? aa : bb;
   1489        *(uint16_t *)(d + i) = dd;
   1490    }
   1491    clear_high(d, oprsz, desc);
   1492}
   1493
   1494void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
   1495{
   1496    intptr_t oprsz = simd_oprsz(desc);
   1497    intptr_t i;
   1498
   1499    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1500        uint32_t aa = *(uint32_t *)(a + i);
   1501        uint32_t bb = *(uint32_t *)(b + i);
   1502        uint32_t dd = aa > bb ? aa : bb;
   1503        *(uint32_t *)(d + i) = dd;
   1504    }
   1505    clear_high(d, oprsz, desc);
   1506}
   1507
   1508void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
   1509{
   1510    intptr_t oprsz = simd_oprsz(desc);
   1511    intptr_t i;
   1512
   1513    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1514        uint64_t aa = *(uint64_t *)(a + i);
   1515        uint64_t bb = *(uint64_t *)(b + i);
   1516        uint64_t dd = aa > bb ? aa : bb;
   1517        *(uint64_t *)(d + i) = dd;
   1518    }
   1519    clear_high(d, oprsz, desc);
   1520}
   1521
   1522void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
   1523{
   1524    intptr_t oprsz = simd_oprsz(desc);
   1525    intptr_t i;
   1526
   1527    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1528        uint64_t aa = *(uint64_t *)(a + i);
   1529        uint64_t bb = *(uint64_t *)(b + i);
   1530        uint64_t cc = *(uint64_t *)(c + i);
   1531        *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
   1532    }
   1533    clear_high(d, oprsz, desc);
   1534}