sha3-ce-core.S (6227B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions 4 * 5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12#include <linux/linkage.h> 13#include <asm/assembler.h> 14 15 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 16 .set .Lv\b\().2d, \b 17 .set .Lv\b\().16b, \b 18 .endr 19 20 /* 21 * ARMv8.2 Crypto Extensions instructions 22 */ 23 .macro eor3, rd, rn, rm, ra 24 .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 25 .endm 26 27 .macro rax1, rd, rn, rm 28 .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 29 .endm 30 31 .macro bcax, rd, rn, rm, ra 32 .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 33 .endm 34 35 .macro xar, rd, rn, rm, imm6 36 .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) 37 .endm 38 39 /* 40 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) 41 */ 42 .text 43SYM_FUNC_START(sha3_ce_transform) 44 /* load state */ 45 add x8, x0, #32 46 ld1 { v0.1d- v3.1d}, [x0] 47 ld1 { v4.1d- v7.1d}, [x8], #32 48 ld1 { v8.1d-v11.1d}, [x8], #32 49 ld1 {v12.1d-v15.1d}, [x8], #32 50 ld1 {v16.1d-v19.1d}, [x8], #32 51 ld1 {v20.1d-v23.1d}, [x8], #32 52 ld1 {v24.1d}, [x8] 53 540: sub w2, w2, #1 55 mov w8, #24 56 adr_l x9, .Lsha3_rcon 57 58 /* load input */ 59 ld1 {v25.8b-v28.8b}, [x1], #32 60 ld1 {v29.8b-v31.8b}, [x1], #24 61 eor v0.8b, v0.8b, v25.8b 62 eor v1.8b, v1.8b, v26.8b 63 eor v2.8b, v2.8b, v27.8b 64 eor v3.8b, v3.8b, v28.8b 65 eor v4.8b, v4.8b, v29.8b 66 eor v5.8b, v5.8b, v30.8b 67 eor v6.8b, v6.8b, v31.8b 68 69 tbnz x3, #6, 2f // SHA3-512 70 71 ld1 {v25.8b-v28.8b}, [x1], #32 72 ld1 {v29.8b-v30.8b}, [x1], #16 73 eor v7.8b, v7.8b, v25.8b 74 eor v8.8b, v8.8b, v26.8b 75 eor v9.8b, v9.8b, v27.8b 76 eor v10.8b, v10.8b, v28.8b 77 eor v11.8b, v11.8b, v29.8b 78 eor v12.8b, v12.8b, v30.8b 79 80 tbnz x3, #4, 1f // SHA3-384 or SHA3-224 81 82 // SHA3-256 83 ld1 {v25.8b-v28.8b}, [x1], #32 84 eor v13.8b, v13.8b, v25.8b 85 eor v14.8b, v14.8b, v26.8b 86 eor v15.8b, v15.8b, v27.8b 87 eor v16.8b, v16.8b, v28.8b 88 b 3f 89 901: tbz x3, #2, 3f // bit 2 cleared? SHA-384 91 92 // SHA3-224 93 ld1 {v25.8b-v28.8b}, [x1], #32 94 ld1 {v29.8b}, [x1], #8 95 eor v13.8b, v13.8b, v25.8b 96 eor v14.8b, v14.8b, v26.8b 97 eor v15.8b, v15.8b, v27.8b 98 eor v16.8b, v16.8b, v28.8b 99 eor v17.8b, v17.8b, v29.8b 100 b 3f 101 102 // SHA3-512 1032: ld1 {v25.8b-v26.8b}, [x1], #16 104 eor v7.8b, v7.8b, v25.8b 105 eor v8.8b, v8.8b, v26.8b 106 1073: sub w8, w8, #1 108 109 eor3 v29.16b, v4.16b, v9.16b, v14.16b 110 eor3 v26.16b, v1.16b, v6.16b, v11.16b 111 eor3 v28.16b, v3.16b, v8.16b, v13.16b 112 eor3 v25.16b, v0.16b, v5.16b, v10.16b 113 eor3 v27.16b, v2.16b, v7.16b, v12.16b 114 eor3 v29.16b, v29.16b, v19.16b, v24.16b 115 eor3 v26.16b, v26.16b, v16.16b, v21.16b 116 eor3 v28.16b, v28.16b, v18.16b, v23.16b 117 eor3 v25.16b, v25.16b, v15.16b, v20.16b 118 eor3 v27.16b, v27.16b, v17.16b, v22.16b 119 120 rax1 v30.2d, v29.2d, v26.2d // bc[0] 121 rax1 v26.2d, v26.2d, v28.2d // bc[2] 122 rax1 v28.2d, v28.2d, v25.2d // bc[4] 123 rax1 v25.2d, v25.2d, v27.2d // bc[1] 124 rax1 v27.2d, v27.2d, v29.2d // bc[3] 125 126 eor v0.16b, v0.16b, v30.16b 127 xar v29.2d, v1.2d, v25.2d, (64 - 1) 128 xar v1.2d, v6.2d, v25.2d, (64 - 44) 129 xar v6.2d, v9.2d, v28.2d, (64 - 20) 130 xar v9.2d, v22.2d, v26.2d, (64 - 61) 131 xar v22.2d, v14.2d, v28.2d, (64 - 39) 132 xar v14.2d, v20.2d, v30.2d, (64 - 18) 133 xar v31.2d, v2.2d, v26.2d, (64 - 62) 134 xar v2.2d, v12.2d, v26.2d, (64 - 43) 135 xar v12.2d, v13.2d, v27.2d, (64 - 25) 136 xar v13.2d, v19.2d, v28.2d, (64 - 8) 137 xar v19.2d, v23.2d, v27.2d, (64 - 56) 138 xar v23.2d, v15.2d, v30.2d, (64 - 41) 139 xar v15.2d, v4.2d, v28.2d, (64 - 27) 140 xar v28.2d, v24.2d, v28.2d, (64 - 14) 141 xar v24.2d, v21.2d, v25.2d, (64 - 2) 142 xar v8.2d, v8.2d, v27.2d, (64 - 55) 143 xar v4.2d, v16.2d, v25.2d, (64 - 45) 144 xar v16.2d, v5.2d, v30.2d, (64 - 36) 145 xar v5.2d, v3.2d, v27.2d, (64 - 28) 146 xar v27.2d, v18.2d, v27.2d, (64 - 21) 147 xar v3.2d, v17.2d, v26.2d, (64 - 15) 148 xar v25.2d, v11.2d, v25.2d, (64 - 10) 149 xar v26.2d, v7.2d, v26.2d, (64 - 6) 150 xar v30.2d, v10.2d, v30.2d, (64 - 3) 151 152 bcax v20.16b, v31.16b, v22.16b, v8.16b 153 bcax v21.16b, v8.16b, v23.16b, v22.16b 154 bcax v22.16b, v22.16b, v24.16b, v23.16b 155 bcax v23.16b, v23.16b, v31.16b, v24.16b 156 bcax v24.16b, v24.16b, v8.16b, v31.16b 157 158 ld1r {v31.2d}, [x9], #8 159 160 bcax v17.16b, v25.16b, v19.16b, v3.16b 161 bcax v18.16b, v3.16b, v15.16b, v19.16b 162 bcax v19.16b, v19.16b, v16.16b, v15.16b 163 bcax v15.16b, v15.16b, v25.16b, v16.16b 164 bcax v16.16b, v16.16b, v3.16b, v25.16b 165 166 bcax v10.16b, v29.16b, v12.16b, v26.16b 167 bcax v11.16b, v26.16b, v13.16b, v12.16b 168 bcax v12.16b, v12.16b, v14.16b, v13.16b 169 bcax v13.16b, v13.16b, v29.16b, v14.16b 170 bcax v14.16b, v14.16b, v26.16b, v29.16b 171 172 bcax v7.16b, v30.16b, v9.16b, v4.16b 173 bcax v8.16b, v4.16b, v5.16b, v9.16b 174 bcax v9.16b, v9.16b, v6.16b, v5.16b 175 bcax v5.16b, v5.16b, v30.16b, v6.16b 176 bcax v6.16b, v6.16b, v4.16b, v30.16b 177 178 bcax v3.16b, v27.16b, v0.16b, v28.16b 179 bcax v4.16b, v28.16b, v1.16b, v0.16b 180 bcax v0.16b, v0.16b, v2.16b, v1.16b 181 bcax v1.16b, v1.16b, v27.16b, v2.16b 182 bcax v2.16b, v2.16b, v28.16b, v27.16b 183 184 eor v0.16b, v0.16b, v31.16b 185 186 cbnz w8, 3b 187 cond_yield 4f, x8, x9 188 cbnz w2, 0b 189 190 /* save state */ 1914: st1 { v0.1d- v3.1d}, [x0], #32 192 st1 { v4.1d- v7.1d}, [x0], #32 193 st1 { v8.1d-v11.1d}, [x0], #32 194 st1 {v12.1d-v15.1d}, [x0], #32 195 st1 {v16.1d-v19.1d}, [x0], #32 196 st1 {v20.1d-v23.1d}, [x0], #32 197 st1 {v24.1d}, [x0] 198 mov w0, w2 199 ret 200SYM_FUNC_END(sha3_ce_transform) 201 202 .section ".rodata", "a" 203 .align 8 204.Lsha3_rcon: 205 .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a 206 .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 207 .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a 208 .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a 209 .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 210 .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 211 .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 212 .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008