cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

poly1305_glue.c (9183B)


      1// SPDX-License-Identifier: GPL-2.0 OR MIT
      2/*
      3 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
      4 */
      5
      6#include <crypto/algapi.h>
      7#include <crypto/internal/hash.h>
      8#include <crypto/internal/poly1305.h>
      9#include <crypto/internal/simd.h>
     10#include <linux/crypto.h>
     11#include <linux/jump_label.h>
     12#include <linux/kernel.h>
     13#include <linux/module.h>
     14#include <linux/sizes.h>
     15#include <asm/intel-family.h>
     16#include <asm/simd.h>
     17
     18asmlinkage void poly1305_init_x86_64(void *ctx,
     19				     const u8 key[POLY1305_BLOCK_SIZE]);
     20asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
     21				       const size_t len, const u32 padbit);
     22asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
     23				     const u32 nonce[4]);
     24asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
     25				  const u32 nonce[4]);
     26asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
     27				    const u32 padbit);
     28asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
     29				     const u32 padbit);
     30asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
     31				       const size_t len, const u32 padbit);
     32
     33static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
     34static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
     35static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512);
     36
     37struct poly1305_arch_internal {
     38	union {
     39		struct {
     40			u32 h[5];
     41			u32 is_base2_26;
     42		};
     43		u64 hs[3];
     44	};
     45	u64 r[2];
     46	u64 pad;
     47	struct { u32 r2, r1, r4, r3; } rn[9];
     48};
     49
     50/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
     51 * the unfortunate situation of using AVX and then having to go back to scalar
     52 * -- because the user is silly and has called the update function from two
     53 * separate contexts -- then we need to convert back to the original base before
     54 * proceeding. It is possible to reason that the initial reduction below is
     55 * sufficient given the implementation invariants. However, for an avoidance of
     56 * doubt and because this is not performance critical, we do the full reduction
     57 * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py
     58 */
     59static void convert_to_base2_64(void *ctx)
     60{
     61	struct poly1305_arch_internal *state = ctx;
     62	u32 cy;
     63
     64	if (!state->is_base2_26)
     65		return;
     66
     67	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
     68	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
     69	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
     70	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
     71	state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
     72	state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
     73	state->hs[2] = state->h[4] >> 24;
     74#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
     75	cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
     76	state->hs[2] &= 3;
     77	state->hs[0] += cy;
     78	state->hs[1] += (cy = ULT(state->hs[0], cy));
     79	state->hs[2] += ULT(state->hs[1], cy);
     80#undef ULT
     81	state->is_base2_26 = 0;
     82}
     83
     84static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE])
     85{
     86	poly1305_init_x86_64(ctx, key);
     87}
     88
     89static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
     90				 const u32 padbit)
     91{
     92	struct poly1305_arch_internal *state = ctx;
     93
     94	/* SIMD disables preemption, so relax after processing each page. */
     95	BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
     96		     SZ_4K % POLY1305_BLOCK_SIZE);
     97
     98	if (!static_branch_likely(&poly1305_use_avx) ||
     99	    (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
    100	    !crypto_simd_usable()) {
    101		convert_to_base2_64(ctx);
    102		poly1305_blocks_x86_64(ctx, inp, len, padbit);
    103		return;
    104	}
    105
    106	do {
    107		const size_t bytes = min_t(size_t, len, SZ_4K);
    108
    109		kernel_fpu_begin();
    110		if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
    111			poly1305_blocks_avx512(ctx, inp, bytes, padbit);
    112		else if (static_branch_likely(&poly1305_use_avx2))
    113			poly1305_blocks_avx2(ctx, inp, bytes, padbit);
    114		else
    115			poly1305_blocks_avx(ctx, inp, bytes, padbit);
    116		kernel_fpu_end();
    117
    118		len -= bytes;
    119		inp += bytes;
    120	} while (len);
    121}
    122
    123static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
    124			       const u32 nonce[4])
    125{
    126	if (!static_branch_likely(&poly1305_use_avx))
    127		poly1305_emit_x86_64(ctx, mac, nonce);
    128	else
    129		poly1305_emit_avx(ctx, mac, nonce);
    130}
    131
    132void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
    133{
    134	poly1305_simd_init(&dctx->h, key);
    135	dctx->s[0] = get_unaligned_le32(&key[16]);
    136	dctx->s[1] = get_unaligned_le32(&key[20]);
    137	dctx->s[2] = get_unaligned_le32(&key[24]);
    138	dctx->s[3] = get_unaligned_le32(&key[28]);
    139	dctx->buflen = 0;
    140	dctx->sset = true;
    141}
    142EXPORT_SYMBOL(poly1305_init_arch);
    143
    144static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
    145					       const u8 *inp, unsigned int len)
    146{
    147	unsigned int acc = 0;
    148	if (unlikely(!dctx->sset)) {
    149		if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
    150			poly1305_simd_init(&dctx->h, inp);
    151			inp += POLY1305_BLOCK_SIZE;
    152			len -= POLY1305_BLOCK_SIZE;
    153			acc += POLY1305_BLOCK_SIZE;
    154			dctx->rset = 1;
    155		}
    156		if (len >= POLY1305_BLOCK_SIZE) {
    157			dctx->s[0] = get_unaligned_le32(&inp[0]);
    158			dctx->s[1] = get_unaligned_le32(&inp[4]);
    159			dctx->s[2] = get_unaligned_le32(&inp[8]);
    160			dctx->s[3] = get_unaligned_le32(&inp[12]);
    161			acc += POLY1305_BLOCK_SIZE;
    162			dctx->sset = true;
    163		}
    164	}
    165	return acc;
    166}
    167
    168void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
    169			  unsigned int srclen)
    170{
    171	unsigned int bytes, used;
    172
    173	if (unlikely(dctx->buflen)) {
    174		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
    175		memcpy(dctx->buf + dctx->buflen, src, bytes);
    176		src += bytes;
    177		srclen -= bytes;
    178		dctx->buflen += bytes;
    179
    180		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
    181			if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE)))
    182				poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
    183			dctx->buflen = 0;
    184		}
    185	}
    186
    187	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
    188		bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
    189		srclen -= bytes;
    190		used = crypto_poly1305_setdctxkey(dctx, src, bytes);
    191		if (likely(bytes - used))
    192			poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1);
    193		src += bytes;
    194	}
    195
    196	if (unlikely(srclen)) {
    197		dctx->buflen = srclen;
    198		memcpy(dctx->buf, src, srclen);
    199	}
    200}
    201EXPORT_SYMBOL(poly1305_update_arch);
    202
    203void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
    204{
    205	if (unlikely(dctx->buflen)) {
    206		dctx->buf[dctx->buflen++] = 1;
    207		memset(dctx->buf + dctx->buflen, 0,
    208		       POLY1305_BLOCK_SIZE - dctx->buflen);
    209		poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
    210	}
    211
    212	poly1305_simd_emit(&dctx->h, dst, dctx->s);
    213	memzero_explicit(dctx, sizeof(*dctx));
    214}
    215EXPORT_SYMBOL(poly1305_final_arch);
    216
    217static int crypto_poly1305_init(struct shash_desc *desc)
    218{
    219	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
    220
    221	*dctx = (struct poly1305_desc_ctx){};
    222	return 0;
    223}
    224
    225static int crypto_poly1305_update(struct shash_desc *desc,
    226				  const u8 *src, unsigned int srclen)
    227{
    228	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
    229
    230	poly1305_update_arch(dctx, src, srclen);
    231	return 0;
    232}
    233
    234static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
    235{
    236	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
    237
    238	if (unlikely(!dctx->sset))
    239		return -ENOKEY;
    240
    241	poly1305_final_arch(dctx, dst);
    242	return 0;
    243}
    244
    245static struct shash_alg alg = {
    246	.digestsize	= POLY1305_DIGEST_SIZE,
    247	.init		= crypto_poly1305_init,
    248	.update		= crypto_poly1305_update,
    249	.final		= crypto_poly1305_final,
    250	.descsize	= sizeof(struct poly1305_desc_ctx),
    251	.base		= {
    252		.cra_name		= "poly1305",
    253		.cra_driver_name	= "poly1305-simd",
    254		.cra_priority		= 300,
    255		.cra_blocksize		= POLY1305_BLOCK_SIZE,
    256		.cra_module		= THIS_MODULE,
    257	},
    258};
    259
    260static int __init poly1305_simd_mod_init(void)
    261{
    262	if (boot_cpu_has(X86_FEATURE_AVX) &&
    263	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
    264		static_branch_enable(&poly1305_use_avx);
    265	if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) &&
    266	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
    267		static_branch_enable(&poly1305_use_avx2);
    268	if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
    269	    boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) &&
    270	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
    271	    /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */
    272	    boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X)
    273		static_branch_enable(&poly1305_use_avx512);
    274	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
    275}
    276
    277static void __exit poly1305_simd_mod_exit(void)
    278{
    279	if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
    280		crypto_unregister_shash(&alg);
    281}
    282
    283module_init(poly1305_simd_mod_init);
    284module_exit(poly1305_simd_mod_exit);
    285
    286MODULE_LICENSE("GPL");
    287MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
    288MODULE_DESCRIPTION("Poly1305 authenticator");
    289MODULE_ALIAS_CRYPTO("poly1305");
    290MODULE_ALIAS_CRYPTO("poly1305-simd");