cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

crypto_helper.c (26097B)


      1/*
      2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
      3 *
      4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 */
     11
     12#include "qemu/osdep.h"
     13
     14#include "cpu.h"
     15#include "exec/helper-proto.h"
     16#include "tcg/tcg-gvec-desc.h"
     17#include "crypto/aes.h"
     18#include "vec_internal.h"
     19
     20union CRYPTO_STATE {
     21    uint8_t    bytes[16];
     22    uint32_t   words[4];
     23    uint64_t   l[2];
     24};
     25
     26#ifdef HOST_WORDS_BIGENDIAN
     27#define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
     28#define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
     29#else
     30#define CR_ST_BYTE(state, i)   ((state).bytes[i])
     31#define CR_ST_WORD(state, i)   ((state).words[i])
     32#endif
     33
     34/*
     35 * The caller has not been converted to full gvec, and so only
     36 * modifies the low 16 bytes of the vector register.
     37 */
     38static void clear_tail_16(void *vd, uint32_t desc)
     39{
     40    int opr_sz = simd_oprsz(desc);
     41    int max_sz = simd_maxsz(desc);
     42
     43    assert(opr_sz == 16);
     44    clear_tail(vd, opr_sz, max_sz);
     45}
     46
     47static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
     48                           uint64_t *rm, bool decrypt)
     49{
     50    static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
     51    static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
     52    union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
     53    union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
     54    int i;
     55
     56    /* xor state vector with round key */
     57    rk.l[0] ^= st.l[0];
     58    rk.l[1] ^= st.l[1];
     59
     60    /* combine ShiftRows operation and sbox substitution */
     61    for (i = 0; i < 16; i++) {
     62        CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
     63    }
     64
     65    rd[0] = st.l[0];
     66    rd[1] = st.l[1];
     67}
     68
     69void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
     70{
     71    intptr_t i, opr_sz = simd_oprsz(desc);
     72    bool decrypt = simd_data(desc);
     73
     74    for (i = 0; i < opr_sz; i += 16) {
     75        do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
     76    }
     77    clear_tail(vd, opr_sz, simd_maxsz(desc));
     78}
     79
     80static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
     81{
     82    static uint32_t const mc[][256] = { {
     83        /* MixColumns lookup table */
     84        0x00000000, 0x03010102, 0x06020204, 0x05030306,
     85        0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
     86        0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
     87        0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
     88        0x30101020, 0x33111122, 0x36121224, 0x35131326,
     89        0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
     90        0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
     91        0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
     92        0x60202040, 0x63212142, 0x66222244, 0x65232346,
     93        0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
     94        0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
     95        0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
     96        0x50303060, 0x53313162, 0x56323264, 0x55333366,
     97        0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
     98        0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
     99        0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
    100        0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
    101        0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
    102        0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
    103        0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
    104        0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
    105        0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
    106        0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
    107        0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
    108        0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
    109        0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
    110        0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
    111        0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
    112        0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
    113        0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
    114        0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
    115        0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
    116        0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
    117        0x97848413, 0x94858511, 0x91868617, 0x92878715,
    118        0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
    119        0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
    120        0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
    121        0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
    122        0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
    123        0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
    124        0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
    125        0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
    126        0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
    127        0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
    128        0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
    129        0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
    130        0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
    131        0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
    132        0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
    133        0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
    134        0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
    135        0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
    136        0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
    137        0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
    138        0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
    139        0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
    140        0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
    141        0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
    142        0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
    143        0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
    144        0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
    145        0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
    146        0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
    147        0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
    148    }, {
    149        /* Inverse MixColumns lookup table */
    150        0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
    151        0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
    152        0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
    153        0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
    154        0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
    155        0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
    156        0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
    157        0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
    158        0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
    159        0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
    160        0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
    161        0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
    162        0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
    163        0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
    164        0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
    165        0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
    166        0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
    167        0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
    168        0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
    169        0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
    170        0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
    171        0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
    172        0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
    173        0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
    174        0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
    175        0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
    176        0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
    177        0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
    178        0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
    179        0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
    180        0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
    181        0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
    182        0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
    183        0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
    184        0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
    185        0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
    186        0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
    187        0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
    188        0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
    189        0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
    190        0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
    191        0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
    192        0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
    193        0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
    194        0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
    195        0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
    196        0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
    197        0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
    198        0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
    199        0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
    200        0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
    201        0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
    202        0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
    203        0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
    204        0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
    205        0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
    206        0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
    207        0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
    208        0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
    209        0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
    210        0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
    211        0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
    212        0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
    213        0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
    214    } };
    215
    216    union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
    217    int i;
    218
    219    for (i = 0; i < 16; i += 4) {
    220        CR_ST_WORD(st, i >> 2) =
    221            mc[decrypt][CR_ST_BYTE(st, i)] ^
    222            rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
    223            rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
    224            rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
    225    }
    226
    227    rd[0] = st.l[0];
    228    rd[1] = st.l[1];
    229}
    230
    231void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
    232{
    233    intptr_t i, opr_sz = simd_oprsz(desc);
    234    bool decrypt = simd_data(desc);
    235
    236    for (i = 0; i < opr_sz; i += 16) {
    237        do_crypto_aesmc(vd + i, vm + i, decrypt);
    238    }
    239    clear_tail(vd, opr_sz, simd_maxsz(desc));
    240}
    241
    242/*
    243 * SHA-1 logical functions
    244 */
    245
    246static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
    247{
    248    return (x & (y ^ z)) ^ z;
    249}
    250
    251static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
    252{
    253    return x ^ y ^ z;
    254}
    255
    256static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
    257{
    258    return (x & y) | ((x | y) & z);
    259}
    260
    261void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
    262{
    263    uint64_t *d = vd, *n = vn, *m = vm;
    264    uint64_t d0, d1;
    265
    266    d0 = d[1] ^ d[0] ^ m[0];
    267    d1 = n[0] ^ d[1] ^ m[1];
    268    d[0] = d0;
    269    d[1] = d1;
    270
    271    clear_tail_16(vd, desc);
    272}
    273
    274static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
    275                                    uint64_t *rm, uint32_t desc,
    276                                    uint32_t (*fn)(union CRYPTO_STATE *d))
    277{
    278    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    279    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    280    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    281    int i;
    282
    283    for (i = 0; i < 4; i++) {
    284        uint32_t t = fn(&d);
    285
    286        t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
    287             + CR_ST_WORD(m, i);
    288
    289        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
    290        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
    291        CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
    292        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
    293        CR_ST_WORD(d, 0) = t;
    294    }
    295    rd[0] = d.l[0];
    296    rd[1] = d.l[1];
    297
    298    clear_tail_16(rd, desc);
    299}
    300
    301static uint32_t do_sha1c(union CRYPTO_STATE *d)
    302{
    303    return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
    304}
    305
    306void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
    307{
    308    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
    309}
    310
    311static uint32_t do_sha1p(union CRYPTO_STATE *d)
    312{
    313    return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
    314}
    315
    316void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
    317{
    318    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
    319}
    320
    321static uint32_t do_sha1m(union CRYPTO_STATE *d)
    322{
    323    return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
    324}
    325
    326void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
    327{
    328    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
    329}
    330
    331void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
    332{
    333    uint64_t *rd = vd;
    334    uint64_t *rm = vm;
    335    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    336
    337    CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
    338    CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
    339
    340    rd[0] = m.l[0];
    341    rd[1] = m.l[1];
    342
    343    clear_tail_16(vd, desc);
    344}
    345
    346void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
    347{
    348    uint64_t *rd = vd;
    349    uint64_t *rm = vm;
    350    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    351    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    352
    353    CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
    354    CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
    355    CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
    356    CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
    357
    358    rd[0] = d.l[0];
    359    rd[1] = d.l[1];
    360
    361    clear_tail_16(vd, desc);
    362}
    363
    364/*
    365 * The SHA-256 logical functions, according to
    366 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
    367 */
    368
    369static uint32_t S0(uint32_t x)
    370{
    371    return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
    372}
    373
    374static uint32_t S1(uint32_t x)
    375{
    376    return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
    377}
    378
    379static uint32_t s0(uint32_t x)
    380{
    381    return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
    382}
    383
    384static uint32_t s1(uint32_t x)
    385{
    386    return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
    387}
    388
    389void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
    390{
    391    uint64_t *rd = vd;
    392    uint64_t *rn = vn;
    393    uint64_t *rm = vm;
    394    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    395    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    396    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    397    int i;
    398
    399    for (i = 0; i < 4; i++) {
    400        uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
    401                     + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
    402                     + CR_ST_WORD(m, i);
    403
    404        CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
    405        CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
    406        CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
    407        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
    408
    409        t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
    410             + S0(CR_ST_WORD(d, 0));
    411
    412        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
    413        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
    414        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
    415        CR_ST_WORD(d, 0) = t;
    416    }
    417
    418    rd[0] = d.l[0];
    419    rd[1] = d.l[1];
    420
    421    clear_tail_16(vd, desc);
    422}
    423
    424void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
    425{
    426    uint64_t *rd = vd;
    427    uint64_t *rn = vn;
    428    uint64_t *rm = vm;
    429    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    430    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    431    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    432    int i;
    433
    434    for (i = 0; i < 4; i++) {
    435        uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
    436                     + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
    437                     + CR_ST_WORD(m, i);
    438
    439        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
    440        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
    441        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
    442        CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
    443    }
    444
    445    rd[0] = d.l[0];
    446    rd[1] = d.l[1];
    447
    448    clear_tail_16(vd, desc);
    449}
    450
    451void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
    452{
    453    uint64_t *rd = vd;
    454    uint64_t *rm = vm;
    455    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    456    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    457
    458    CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
    459    CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
    460    CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
    461    CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
    462
    463    rd[0] = d.l[0];
    464    rd[1] = d.l[1];
    465
    466    clear_tail_16(vd, desc);
    467}
    468
    469void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
    470{
    471    uint64_t *rd = vd;
    472    uint64_t *rn = vn;
    473    uint64_t *rm = vm;
    474    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    475    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    476    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    477
    478    CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
    479    CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
    480    CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
    481    CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
    482
    483    rd[0] = d.l[0];
    484    rd[1] = d.l[1];
    485
    486    clear_tail_16(vd, desc);
    487}
    488
    489/*
    490 * The SHA-512 logical functions (same as above but using 64-bit operands)
    491 */
    492
    493static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
    494{
    495    return (x & (y ^ z)) ^ z;
    496}
    497
    498static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
    499{
    500    return (x & y) | ((x | y) & z);
    501}
    502
    503static uint64_t S0_512(uint64_t x)
    504{
    505    return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
    506}
    507
    508static uint64_t S1_512(uint64_t x)
    509{
    510    return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
    511}
    512
    513static uint64_t s0_512(uint64_t x)
    514{
    515    return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
    516}
    517
    518static uint64_t s1_512(uint64_t x)
    519{
    520    return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
    521}
    522
    523void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
    524{
    525    uint64_t *rd = vd;
    526    uint64_t *rn = vn;
    527    uint64_t *rm = vm;
    528    uint64_t d0 = rd[0];
    529    uint64_t d1 = rd[1];
    530
    531    d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
    532    d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
    533
    534    rd[0] = d0;
    535    rd[1] = d1;
    536
    537    clear_tail_16(vd, desc);
    538}
    539
    540void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
    541{
    542    uint64_t *rd = vd;
    543    uint64_t *rn = vn;
    544    uint64_t *rm = vm;
    545    uint64_t d0 = rd[0];
    546    uint64_t d1 = rd[1];
    547
    548    d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
    549    d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
    550
    551    rd[0] = d0;
    552    rd[1] = d1;
    553
    554    clear_tail_16(vd, desc);
    555}
    556
    557void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
    558{
    559    uint64_t *rd = vd;
    560    uint64_t *rn = vn;
    561    uint64_t d0 = rd[0];
    562    uint64_t d1 = rd[1];
    563
    564    d0 += s0_512(rd[1]);
    565    d1 += s0_512(rn[0]);
    566
    567    rd[0] = d0;
    568    rd[1] = d1;
    569
    570    clear_tail_16(vd, desc);
    571}
    572
    573void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
    574{
    575    uint64_t *rd = vd;
    576    uint64_t *rn = vn;
    577    uint64_t *rm = vm;
    578
    579    rd[0] += s1_512(rn[0]) + rm[0];
    580    rd[1] += s1_512(rn[1]) + rm[1];
    581
    582    clear_tail_16(vd, desc);
    583}
    584
    585void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
    586{
    587    uint64_t *rd = vd;
    588    uint64_t *rn = vn;
    589    uint64_t *rm = vm;
    590    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    591    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    592    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    593    uint32_t t;
    594
    595    t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
    596    CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
    597
    598    t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
    599    CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
    600
    601    t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
    602    CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
    603
    604    t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
    605    CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
    606
    607    rd[0] = d.l[0];
    608    rd[1] = d.l[1];
    609
    610    clear_tail_16(vd, desc);
    611}
    612
    613void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
    614{
    615    uint64_t *rd = vd;
    616    uint64_t *rn = vn;
    617    uint64_t *rm = vm;
    618    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    619    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    620    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    621    uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
    622
    623    CR_ST_WORD(d, 0) ^= t;
    624    CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
    625    CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
    626    CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
    627                        ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
    628
    629    rd[0] = d.l[0];
    630    rd[1] = d.l[1];
    631
    632    clear_tail_16(vd, desc);
    633}
    634
    635static inline void QEMU_ALWAYS_INLINE
    636crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
    637             uint32_t desc, uint32_t opcode)
    638{
    639    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
    640    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    641    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    642    uint32_t imm2 = simd_data(desc);
    643    uint32_t t;
    644
    645    assert(imm2 < 4);
    646
    647    if (opcode == 0 || opcode == 2) {
    648        /* SM3TT1A, SM3TT2A */
    649        t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
    650    } else if (opcode == 1) {
    651        /* SM3TT1B */
    652        t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
    653    } else if (opcode == 3) {
    654        /* SM3TT2B */
    655        t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
    656    } else {
    657        qemu_build_not_reached();
    658    }
    659
    660    t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
    661
    662    CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
    663
    664    if (opcode < 2) {
    665        /* SM3TT1A, SM3TT1B */
    666        t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
    667
    668        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
    669    } else {
    670        /* SM3TT2A, SM3TT2B */
    671        t += CR_ST_WORD(n, 3);
    672        t ^= rol32(t, 9) ^ rol32(t, 17);
    673
    674        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
    675    }
    676
    677    CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
    678    CR_ST_WORD(d, 3) = t;
    679
    680    rd[0] = d.l[0];
    681    rd[1] = d.l[1];
    682
    683    clear_tail_16(rd, desc);
    684}
    685
    686#define DO_SM3TT(NAME, OPCODE) \
    687    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
    688    { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
    689
    690DO_SM3TT(crypto_sm3tt1a, 0)
    691DO_SM3TT(crypto_sm3tt1b, 1)
    692DO_SM3TT(crypto_sm3tt2a, 2)
    693DO_SM3TT(crypto_sm3tt2b, 3)
    694
    695#undef DO_SM3TT
    696
    697static uint8_t const sm4_sbox[] = {
    698    0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
    699    0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
    700    0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
    701    0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
    702    0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
    703    0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
    704    0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
    705    0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
    706    0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
    707    0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
    708    0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
    709    0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
    710    0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
    711    0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
    712    0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
    713    0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
    714    0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
    715    0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
    716    0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
    717    0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
    718    0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
    719    0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
    720    0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
    721    0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
    722    0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
    723    0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
    724    0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
    725    0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
    726    0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
    727    0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
    728    0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
    729    0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
    730};
    731
    732static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
    733{
    734    union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
    735    union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
    736    uint32_t t, i;
    737
    738    for (i = 0; i < 4; i++) {
    739        t = CR_ST_WORD(d, (i + 1) % 4) ^
    740            CR_ST_WORD(d, (i + 2) % 4) ^
    741            CR_ST_WORD(d, (i + 3) % 4) ^
    742            CR_ST_WORD(n, i);
    743
    744        t = sm4_sbox[t & 0xff] |
    745            sm4_sbox[(t >> 8) & 0xff] << 8 |
    746            sm4_sbox[(t >> 16) & 0xff] << 16 |
    747            sm4_sbox[(t >> 24) & 0xff] << 24;
    748
    749        CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
    750                            rol32(t, 24);
    751    }
    752
    753    rd[0] = d.l[0];
    754    rd[1] = d.l[1];
    755}
    756
    757void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
    758{
    759    intptr_t i, opr_sz = simd_oprsz(desc);
    760
    761    for (i = 0; i < opr_sz; i += 16) {
    762        do_crypto_sm4e(vd + i, vn + i, vm + i);
    763    }
    764    clear_tail(vd, opr_sz, simd_maxsz(desc));
    765}
    766
    767static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
    768{
    769    union CRYPTO_STATE d;
    770    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
    771    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
    772    uint32_t t, i;
    773
    774    d = n;
    775    for (i = 0; i < 4; i++) {
    776        t = CR_ST_WORD(d, (i + 1) % 4) ^
    777            CR_ST_WORD(d, (i + 2) % 4) ^
    778            CR_ST_WORD(d, (i + 3) % 4) ^
    779            CR_ST_WORD(m, i);
    780
    781        t = sm4_sbox[t & 0xff] |
    782            sm4_sbox[(t >> 8) & 0xff] << 8 |
    783            sm4_sbox[(t >> 16) & 0xff] << 16 |
    784            sm4_sbox[(t >> 24) & 0xff] << 24;
    785
    786        CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
    787    }
    788
    789    rd[0] = d.l[0];
    790    rd[1] = d.l[1];
    791}
    792
    793void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
    794{
    795    intptr_t i, opr_sz = simd_oprsz(desc);
    796
    797    for (i = 0; i < opr_sz; i += 16) {
    798        do_crypto_sm4ekey(vd + i, vn + i, vm + i);
    799    }
    800    clear_tail(vd, opr_sz, simd_maxsz(desc));
    801}
    802
    803void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
    804{
    805    intptr_t i, opr_sz = simd_oprsz(desc);
    806    uint64_t *d = vd, *n = vn, *m = vm;
    807
    808    for (i = 0; i < opr_sz / 8; ++i) {
    809        d[i] = n[i] ^ rol64(m[i], 1);
    810    }
    811    clear_tail(vd, opr_sz, simd_maxsz(desc));
    812}