aes-ce-ccm-core.S (5964B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8#include <linux/linkage.h> 9#include <asm/assembler.h> 10 11 .text 12 .arch armv8-a+crypto 13 14 /* 15 * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 16 * u32 macp, u8 const rk[], u32 rounds); 17 */ 18SYM_FUNC_START(ce_aes_ccm_auth_data) 19 ld1 {v0.16b}, [x0] /* load mac */ 20 cbz w3, 1f 21 sub w3, w3, #16 22 eor v1.16b, v1.16b, v1.16b 230: ldrb w7, [x1], #1 /* get 1 byte of input */ 24 subs w2, w2, #1 25 add w3, w3, #1 26 ins v1.b[0], w7 27 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 28 beq 8f /* out of input? */ 29 cbnz w3, 0b 30 eor v0.16b, v0.16b, v1.16b 311: ld1 {v3.4s}, [x4] /* load first round key */ 32 prfm pldl1strm, [x1] 33 cmp w5, #12 /* which key size? */ 34 add x6, x4, #16 35 sub w7, w5, #2 /* modified # of rounds */ 36 bmi 2f 37 bne 5f 38 mov v5.16b, v3.16b 39 b 4f 402: mov v4.16b, v3.16b 41 ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ 423: aese v0.16b, v4.16b 43 aesmc v0.16b, v0.16b 444: ld1 {v3.4s}, [x6], #16 /* load next round key */ 45 aese v0.16b, v5.16b 46 aesmc v0.16b, v0.16b 475: ld1 {v4.4s}, [x6], #16 /* load next round key */ 48 subs w7, w7, #3 49 aese v0.16b, v3.16b 50 aesmc v0.16b, v0.16b 51 ld1 {v5.4s}, [x6], #16 /* load next round key */ 52 bpl 3b 53 aese v0.16b, v4.16b 54 subs w2, w2, #16 /* last data? */ 55 eor v0.16b, v0.16b, v5.16b /* final round */ 56 bmi 6f 57 ld1 {v1.16b}, [x1], #16 /* load next input block */ 58 eor v0.16b, v0.16b, v1.16b /* xor with mac */ 59 bne 1b 606: st1 {v0.16b}, [x0] /* store mac */ 61 beq 10f 62 adds w2, w2, #16 63 beq 10f 64 mov w3, w2 657: ldrb w7, [x1], #1 66 umov w6, v0.b[0] 67 eor w6, w6, w7 68 strb w6, [x0], #1 69 subs w2, w2, #1 70 beq 10f 71 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 72 b 7b 738: cbz w3, 91f 74 mov w7, w3 75 add w3, w3, #16 769: ext v1.16b, v1.16b, v1.16b, #1 77 adds w7, w7, #1 78 bne 9b 7991: eor v0.16b, v0.16b, v1.16b 80 st1 {v0.16b}, [x0] 8110: mov w0, w3 82 ret 83SYM_FUNC_END(ce_aes_ccm_auth_data) 84 85 /* 86 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], 87 * u32 rounds); 88 */ 89SYM_FUNC_START(ce_aes_ccm_final) 90 ld1 {v3.4s}, [x2], #16 /* load first round key */ 91 ld1 {v0.16b}, [x0] /* load mac */ 92 cmp w3, #12 /* which key size? */ 93 sub w3, w3, #2 /* modified # of rounds */ 94 ld1 {v1.16b}, [x1] /* load 1st ctriv */ 95 bmi 0f 96 bne 3f 97 mov v5.16b, v3.16b 98 b 2f 990: mov v4.16b, v3.16b 1001: ld1 {v5.4s}, [x2], #16 /* load next round key */ 101 aese v0.16b, v4.16b 102 aesmc v0.16b, v0.16b 103 aese v1.16b, v4.16b 104 aesmc v1.16b, v1.16b 1052: ld1 {v3.4s}, [x2], #16 /* load next round key */ 106 aese v0.16b, v5.16b 107 aesmc v0.16b, v0.16b 108 aese v1.16b, v5.16b 109 aesmc v1.16b, v1.16b 1103: ld1 {v4.4s}, [x2], #16 /* load next round key */ 111 subs w3, w3, #3 112 aese v0.16b, v3.16b 113 aesmc v0.16b, v0.16b 114 aese v1.16b, v3.16b 115 aesmc v1.16b, v1.16b 116 bpl 1b 117 aese v0.16b, v4.16b 118 aese v1.16b, v4.16b 119 /* final round key cancels out */ 120 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 121 st1 {v0.16b}, [x0] /* store result */ 122 ret 123SYM_FUNC_END(ce_aes_ccm_final) 124 125 .macro aes_ccm_do_crypt,enc 126 cbz x2, 5f 127 ldr x8, [x6, #8] /* load lower ctr */ 128 ld1 {v0.16b}, [x5] /* load mac */ 129CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 1300: /* outer loop */ 131 ld1 {v1.8b}, [x6] /* load upper ctr */ 132 prfm pldl1strm, [x1] 133 add x8, x8, #1 134 rev x9, x8 135 cmp w4, #12 /* which key size? */ 136 sub w7, w4, #2 /* get modified # of rounds */ 137 ins v1.d[1], x9 /* no carry in lower ctr */ 138 ld1 {v3.4s}, [x3] /* load first round key */ 139 add x10, x3, #16 140 bmi 1f 141 bne 4f 142 mov v5.16b, v3.16b 143 b 3f 1441: mov v4.16b, v3.16b 145 ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ 1462: /* inner loop: 3 rounds, 2x interleaved */ 147 aese v0.16b, v4.16b 148 aesmc v0.16b, v0.16b 149 aese v1.16b, v4.16b 150 aesmc v1.16b, v1.16b 1513: ld1 {v3.4s}, [x10], #16 /* load next round key */ 152 aese v0.16b, v5.16b 153 aesmc v0.16b, v0.16b 154 aese v1.16b, v5.16b 155 aesmc v1.16b, v1.16b 1564: ld1 {v4.4s}, [x10], #16 /* load next round key */ 157 subs w7, w7, #3 158 aese v0.16b, v3.16b 159 aesmc v0.16b, v0.16b 160 aese v1.16b, v3.16b 161 aesmc v1.16b, v1.16b 162 ld1 {v5.4s}, [x10], #16 /* load next round key */ 163 bpl 2b 164 aese v0.16b, v4.16b 165 aese v1.16b, v4.16b 166 subs w2, w2, #16 167 bmi 6f /* partial block? */ 168 ld1 {v2.16b}, [x1], #16 /* load next input block */ 169 .if \enc == 1 170 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 171 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ 172 .else 173 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ 174 eor v1.16b, v2.16b, v5.16b /* final round enc */ 175 .endif 176 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 177 st1 {v1.16b}, [x0], #16 /* write output block */ 178 bne 0b 179CPU_LE( rev x8, x8 ) 180 st1 {v0.16b}, [x5] /* store mac */ 181 str x8, [x6, #8] /* store lsb end of ctr (BE) */ 1825: ret 183 1846: eor v0.16b, v0.16b, v5.16b /* final round mac */ 185 eor v1.16b, v1.16b, v5.16b /* final round enc */ 186 st1 {v0.16b}, [x5] /* store mac */ 187 add w2, w2, #16 /* process partial tail block */ 1887: ldrb w9, [x1], #1 /* get 1 byte of input */ 189 umov w6, v1.b[0] /* get top crypted ctr byte */ 190 umov w7, v0.b[0] /* get top mac byte */ 191 .if \enc == 1 192 eor w7, w7, w9 193 eor w9, w9, w6 194 .else 195 eor w9, w9, w6 196 eor w7, w7, w9 197 .endif 198 strb w9, [x0], #1 /* store out byte */ 199 strb w7, [x5], #1 /* store mac byte */ 200 subs w2, w2, #1 201 beq 5b 202 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 203 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 204 b 7b 205 .endm 206 207 /* 208 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 209 * u8 const rk[], u32 rounds, u8 mac[], 210 * u8 ctr[]); 211 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, 212 * u8 const rk[], u32 rounds, u8 mac[], 213 * u8 ctr[]); 214 */ 215SYM_FUNC_START(ce_aes_ccm_encrypt) 216 aes_ccm_do_crypt 1 217SYM_FUNC_END(ce_aes_ccm_encrypt) 218 219SYM_FUNC_START(ce_aes_ccm_decrypt) 220 aes_ccm_do_crypt 0 221SYM_FUNC_END(ce_aes_ccm_decrypt)