sha1-ce-core.S (3189B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions 4 * 5 * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8#include <linux/linkage.h> 9#include <asm/assembler.h> 10 11 .text 12 .arch armv8-a+crypto 13 14 k0 .req v0 15 k1 .req v1 16 k2 .req v2 17 k3 .req v3 18 19 t0 .req v4 20 t1 .req v5 21 22 dga .req q6 23 dgav .req v6 24 dgb .req s7 25 dgbv .req v7 26 27 dg0q .req q12 28 dg0s .req s12 29 dg0v .req v12 30 dg1s .req s13 31 dg1v .req v13 32 dg2s .req s14 33 34 .macro add_only, op, ev, rc, s0, dg1 35 .ifc \ev, ev 36 add t1.4s, v\s0\().4s, \rc\().4s 37 sha1h dg2s, dg0s 38 .ifnb \dg1 39 sha1\op dg0q, \dg1, t0.4s 40 .else 41 sha1\op dg0q, dg1s, t0.4s 42 .endif 43 .else 44 .ifnb \s0 45 add t0.4s, v\s0\().4s, \rc\().4s 46 .endif 47 sha1h dg1s, dg0s 48 sha1\op dg0q, dg2s, t1.4s 49 .endif 50 .endm 51 52 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 53 sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s 54 add_only \op, \ev, \rc, \s1, \dg1 55 sha1su1 v\s0\().4s, v\s3\().4s 56 .endm 57 58 .macro loadrc, k, val, tmp 59 movz \tmp, :abs_g0_nc:\val 60 movk \tmp, :abs_g1:\val 61 dup \k, \tmp 62 .endm 63 64 /* 65 * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, 66 * int blocks) 67 */ 68SYM_FUNC_START(sha1_ce_transform) 69 /* load round constants */ 70 loadrc k0.4s, 0x5a827999, w6 71 loadrc k1.4s, 0x6ed9eba1, w6 72 loadrc k2.4s, 0x8f1bbcdc, w6 73 loadrc k3.4s, 0xca62c1d6, w6 74 75 /* load state */ 76 ld1 {dgav.4s}, [x0] 77 ldr dgb, [x0, #16] 78 79 /* load sha1_ce_state::finalize */ 80 ldr_l w4, sha1_ce_offsetof_finalize, x4 81 ldr w4, [x0, x4] 82 83 /* load input */ 840: ld1 {v8.4s-v11.4s}, [x1], #64 85 sub w2, w2, #1 86 87CPU_LE( rev32 v8.16b, v8.16b ) 88CPU_LE( rev32 v9.16b, v9.16b ) 89CPU_LE( rev32 v10.16b, v10.16b ) 90CPU_LE( rev32 v11.16b, v11.16b ) 91 921: add t0.4s, v8.4s, k0.4s 93 mov dg0v.16b, dgav.16b 94 95 add_update c, ev, k0, 8, 9, 10, 11, dgb 96 add_update c, od, k0, 9, 10, 11, 8 97 add_update c, ev, k0, 10, 11, 8, 9 98 add_update c, od, k0, 11, 8, 9, 10 99 add_update c, ev, k1, 8, 9, 10, 11 100 101 add_update p, od, k1, 9, 10, 11, 8 102 add_update p, ev, k1, 10, 11, 8, 9 103 add_update p, od, k1, 11, 8, 9, 10 104 add_update p, ev, k1, 8, 9, 10, 11 105 add_update p, od, k2, 9, 10, 11, 8 106 107 add_update m, ev, k2, 10, 11, 8, 9 108 add_update m, od, k2, 11, 8, 9, 10 109 add_update m, ev, k2, 8, 9, 10, 11 110 add_update m, od, k2, 9, 10, 11, 8 111 add_update m, ev, k3, 10, 11, 8, 9 112 113 add_update p, od, k3, 11, 8, 9, 10 114 add_only p, ev, k3, 9 115 add_only p, od, k3, 10 116 add_only p, ev, k3, 11 117 add_only p, od 118 119 /* update state */ 120 add dgbv.2s, dgbv.2s, dg1v.2s 121 add dgav.4s, dgav.4s, dg0v.4s 122 123 cbz w2, 2f 124 cond_yield 3f, x5, x6 125 b 0b 126 127 /* 128 * Final block: add padding and total bit count. 129 * Skip if the input size was not a round multiple of the block size, 130 * the padding is handled by the C code in that case. 131 */ 1322: cbz x4, 3f 133 ldr_l w4, sha1_ce_offsetof_count, x4 134 ldr x4, [x0, x4] 135 movi v9.2d, #0 136 mov x8, #0x80000000 137 movi v10.2d, #0 138 ror x7, x4, #29 // ror(lsl(x4, 3), 32) 139 fmov d8, x8 140 mov x4, #0 141 mov v11.d[0], xzr 142 mov v11.d[1], x7 143 b 1b 144 145 /* store new state */ 1463: st1 {dgav.4s}, [x0] 147 str dgb, [x0, #16] 148 mov w0, w2 149 ret 150SYM_FUNC_END(sha1_ce_transform)