aes-modes.S (17382B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8/* included by aes-ce.S and aes-neon.S */ 9 10 .text 11 .align 4 12 13#ifndef MAX_STRIDE 14#define MAX_STRIDE 4 15#endif 16 17#if MAX_STRIDE == 4 18#define ST4(x...) x 19#define ST5(x...) 20#else 21#define ST4(x...) 22#define ST5(x...) x 23#endif 24 25SYM_FUNC_START_LOCAL(aes_encrypt_block4x) 26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 27 ret 28SYM_FUNC_END(aes_encrypt_block4x) 29 30SYM_FUNC_START_LOCAL(aes_decrypt_block4x) 31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 32 ret 33SYM_FUNC_END(aes_decrypt_block4x) 34 35#if MAX_STRIDE == 5 36SYM_FUNC_START_LOCAL(aes_encrypt_block5x) 37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 38 ret 39SYM_FUNC_END(aes_encrypt_block5x) 40 41SYM_FUNC_START_LOCAL(aes_decrypt_block5x) 42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 43 ret 44SYM_FUNC_END(aes_decrypt_block5x) 45#endif 46 47 /* 48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 49 * int blocks) 50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 51 * int blocks) 52 */ 53 54AES_FUNC_START(aes_ecb_encrypt) 55 stp x29, x30, [sp, #-16]! 56 mov x29, sp 57 58 enc_prepare w3, x2, x5 59 60.LecbencloopNx: 61 subs w4, w4, #MAX_STRIDE 62 bmi .Lecbenc1x 63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 64ST4( bl aes_encrypt_block4x ) 65ST5( ld1 {v4.16b}, [x1], #16 ) 66ST5( bl aes_encrypt_block5x ) 67 st1 {v0.16b-v3.16b}, [x0], #64 68ST5( st1 {v4.16b}, [x0], #16 ) 69 b .LecbencloopNx 70.Lecbenc1x: 71 adds w4, w4, #MAX_STRIDE 72 beq .Lecbencout 73.Lecbencloop: 74 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 75 encrypt_block v0, w3, x2, x5, w6 76 st1 {v0.16b}, [x0], #16 77 subs w4, w4, #1 78 bne .Lecbencloop 79.Lecbencout: 80 ldp x29, x30, [sp], #16 81 ret 82AES_FUNC_END(aes_ecb_encrypt) 83 84 85AES_FUNC_START(aes_ecb_decrypt) 86 stp x29, x30, [sp, #-16]! 87 mov x29, sp 88 89 dec_prepare w3, x2, x5 90 91.LecbdecloopNx: 92 subs w4, w4, #MAX_STRIDE 93 bmi .Lecbdec1x 94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 95ST4( bl aes_decrypt_block4x ) 96ST5( ld1 {v4.16b}, [x1], #16 ) 97ST5( bl aes_decrypt_block5x ) 98 st1 {v0.16b-v3.16b}, [x0], #64 99ST5( st1 {v4.16b}, [x0], #16 ) 100 b .LecbdecloopNx 101.Lecbdec1x: 102 adds w4, w4, #MAX_STRIDE 103 beq .Lecbdecout 104.Lecbdecloop: 105 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 106 decrypt_block v0, w3, x2, x5, w6 107 st1 {v0.16b}, [x0], #16 108 subs w4, w4, #1 109 bne .Lecbdecloop 110.Lecbdecout: 111 ldp x29, x30, [sp], #16 112 ret 113AES_FUNC_END(aes_ecb_decrypt) 114 115 116 /* 117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 118 * int blocks, u8 iv[]) 119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 120 * int blocks, u8 iv[]) 121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], 122 * int rounds, int blocks, u8 iv[], 123 * u32 const rk2[]); 124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], 125 * int rounds, int blocks, u8 iv[], 126 * u32 const rk2[]); 127 */ 128 129AES_FUNC_START(aes_essiv_cbc_encrypt) 130 ld1 {v4.16b}, [x5] /* get iv */ 131 132 mov w8, #14 /* AES-256: 14 rounds */ 133 enc_prepare w8, x6, x7 134 encrypt_block v4, w8, x6, x7, w9 135 enc_switch_key w3, x2, x6 136 b .Lcbcencloop4x 137 138AES_FUNC_START(aes_cbc_encrypt) 139 ld1 {v4.16b}, [x5] /* get iv */ 140 enc_prepare w3, x2, x6 141 142.Lcbcencloop4x: 143 subs w4, w4, #4 144 bmi .Lcbcenc1x 145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 147 encrypt_block v0, w3, x2, x6, w7 148 eor v1.16b, v1.16b, v0.16b 149 encrypt_block v1, w3, x2, x6, w7 150 eor v2.16b, v2.16b, v1.16b 151 encrypt_block v2, w3, x2, x6, w7 152 eor v3.16b, v3.16b, v2.16b 153 encrypt_block v3, w3, x2, x6, w7 154 st1 {v0.16b-v3.16b}, [x0], #64 155 mov v4.16b, v3.16b 156 b .Lcbcencloop4x 157.Lcbcenc1x: 158 adds w4, w4, #4 159 beq .Lcbcencout 160.Lcbcencloop: 161 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 163 encrypt_block v4, w3, x2, x6, w7 164 st1 {v4.16b}, [x0], #16 165 subs w4, w4, #1 166 bne .Lcbcencloop 167.Lcbcencout: 168 st1 {v4.16b}, [x5] /* return iv */ 169 ret 170AES_FUNC_END(aes_cbc_encrypt) 171AES_FUNC_END(aes_essiv_cbc_encrypt) 172 173AES_FUNC_START(aes_essiv_cbc_decrypt) 174 stp x29, x30, [sp, #-16]! 175 mov x29, sp 176 177 ld1 {cbciv.16b}, [x5] /* get iv */ 178 179 mov w8, #14 /* AES-256: 14 rounds */ 180 enc_prepare w8, x6, x7 181 encrypt_block cbciv, w8, x6, x7, w9 182 b .Lessivcbcdecstart 183 184AES_FUNC_START(aes_cbc_decrypt) 185 stp x29, x30, [sp, #-16]! 186 mov x29, sp 187 188 ld1 {cbciv.16b}, [x5] /* get iv */ 189.Lessivcbcdecstart: 190 dec_prepare w3, x2, x6 191 192.LcbcdecloopNx: 193 subs w4, w4, #MAX_STRIDE 194 bmi .Lcbcdec1x 195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 196#if MAX_STRIDE == 5 197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ 198 mov v5.16b, v0.16b 199 mov v6.16b, v1.16b 200 mov v7.16b, v2.16b 201 bl aes_decrypt_block5x 202 sub x1, x1, #32 203 eor v0.16b, v0.16b, cbciv.16b 204 eor v1.16b, v1.16b, v5.16b 205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ 206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 207 eor v2.16b, v2.16b, v6.16b 208 eor v3.16b, v3.16b, v7.16b 209 eor v4.16b, v4.16b, v5.16b 210#else 211 mov v4.16b, v0.16b 212 mov v5.16b, v1.16b 213 mov v6.16b, v2.16b 214 bl aes_decrypt_block4x 215 sub x1, x1, #16 216 eor v0.16b, v0.16b, cbciv.16b 217 eor v1.16b, v1.16b, v4.16b 218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 219 eor v2.16b, v2.16b, v5.16b 220 eor v3.16b, v3.16b, v6.16b 221#endif 222 st1 {v0.16b-v3.16b}, [x0], #64 223ST5( st1 {v4.16b}, [x0], #16 ) 224 b .LcbcdecloopNx 225.Lcbcdec1x: 226 adds w4, w4, #MAX_STRIDE 227 beq .Lcbcdecout 228.Lcbcdecloop: 229 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 230 mov v0.16b, v1.16b /* ...and copy to v0 */ 231 decrypt_block v0, w3, x2, x6, w7 232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ 233 mov cbciv.16b, v1.16b /* ct is next iv */ 234 st1 {v0.16b}, [x0], #16 235 subs w4, w4, #1 236 bne .Lcbcdecloop 237.Lcbcdecout: 238 st1 {cbciv.16b}, [x5] /* return iv */ 239 ldp x29, x30, [sp], #16 240 ret 241AES_FUNC_END(aes_cbc_decrypt) 242AES_FUNC_END(aes_essiv_cbc_decrypt) 243 244 245 /* 246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 247 * int rounds, int bytes, u8 const iv[]) 248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 249 * int rounds, int bytes, u8 const iv[]) 250 */ 251 252AES_FUNC_START(aes_cbc_cts_encrypt) 253 adr_l x8, .Lcts_permute_table 254 sub x4, x4, #16 255 add x9, x8, #32 256 add x8, x8, x4 257 sub x9, x9, x4 258 ld1 {v3.16b}, [x8] 259 ld1 {v4.16b}, [x9] 260 261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 262 ld1 {v1.16b}, [x1] 263 264 ld1 {v5.16b}, [x5] /* get iv */ 265 enc_prepare w3, x2, x6 266 267 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 268 tbl v1.16b, {v1.16b}, v4.16b 269 encrypt_block v0, w3, x2, x6, w7 270 271 eor v1.16b, v1.16b, v0.16b 272 tbl v0.16b, {v0.16b}, v3.16b 273 encrypt_block v1, w3, x2, x6, w7 274 275 add x4, x0, x4 276 st1 {v0.16b}, [x4] /* overlapping stores */ 277 st1 {v1.16b}, [x0] 278 ret 279AES_FUNC_END(aes_cbc_cts_encrypt) 280 281AES_FUNC_START(aes_cbc_cts_decrypt) 282 adr_l x8, .Lcts_permute_table 283 sub x4, x4, #16 284 add x9, x8, #32 285 add x8, x8, x4 286 sub x9, x9, x4 287 ld1 {v3.16b}, [x8] 288 ld1 {v4.16b}, [x9] 289 290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 291 ld1 {v1.16b}, [x1] 292 293 ld1 {v5.16b}, [x5] /* get iv */ 294 dec_prepare w3, x2, x6 295 296 decrypt_block v0, w3, x2, x6, w7 297 tbl v2.16b, {v0.16b}, v3.16b 298 eor v2.16b, v2.16b, v1.16b 299 300 tbx v0.16b, {v1.16b}, v4.16b 301 decrypt_block v0, w3, x2, x6, w7 302 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 303 304 add x4, x0, x4 305 st1 {v2.16b}, [x4] /* overlapping stores */ 306 st1 {v0.16b}, [x0] 307 ret 308AES_FUNC_END(aes_cbc_cts_decrypt) 309 310 .section ".rodata", "a" 311 .align 6 312.Lcts_permute_table: 313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 319 .previous 320 321 322 /* 323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 324 * int bytes, u8 ctr[]) 325 */ 326 327AES_FUNC_START(aes_ctr_encrypt) 328 stp x29, x30, [sp, #-16]! 329 mov x29, sp 330 331 enc_prepare w3, x2, x12 332 ld1 {vctr.16b}, [x5] 333 334 umov x12, vctr.d[1] /* keep swabbed ctr in reg */ 335 rev x12, x12 336 337.LctrloopNx: 338 add w7, w4, #15 339 sub w4, w4, #MAX_STRIDE << 4 340 lsr w7, w7, #4 341 mov w8, #MAX_STRIDE 342 cmp w7, w8 343 csel w7, w7, w8, lt 344 adds x12, x12, x7 345 346 mov v0.16b, vctr.16b 347 mov v1.16b, vctr.16b 348 mov v2.16b, vctr.16b 349 mov v3.16b, vctr.16b 350ST5( mov v4.16b, vctr.16b ) 351 bcs 0f 352 353 .subsection 1 354 /* apply carry to outgoing counter */ 3550: umov x8, vctr.d[0] 356 rev x8, x8 357 add x8, x8, #1 358 rev x8, x8 359 ins vctr.d[0], x8 360 361 /* apply carry to N counter blocks for N := x12 */ 362 cbz x12, 2f 363 adr x16, 1f 364 sub x16, x16, x12, lsl #3 365 br x16 366 bti c 367 mov v0.d[0], vctr.d[0] 368 bti c 369 mov v1.d[0], vctr.d[0] 370 bti c 371 mov v2.d[0], vctr.d[0] 372 bti c 373 mov v3.d[0], vctr.d[0] 374ST5( bti c ) 375ST5( mov v4.d[0], vctr.d[0] ) 3761: b 2f 377 .previous 378 3792: rev x7, x12 380 ins vctr.d[1], x7 381 sub x7, x12, #MAX_STRIDE - 1 382 sub x8, x12, #MAX_STRIDE - 2 383 sub x9, x12, #MAX_STRIDE - 3 384 rev x7, x7 385 rev x8, x8 386 mov v1.d[1], x7 387 rev x9, x9 388ST5( sub x10, x12, #MAX_STRIDE - 4 ) 389 mov v2.d[1], x8 390ST5( rev x10, x10 ) 391 mov v3.d[1], x9 392ST5( mov v4.d[1], x10 ) 393 tbnz w4, #31, .Lctrtail 394 ld1 {v5.16b-v7.16b}, [x1], #48 395ST4( bl aes_encrypt_block4x ) 396ST5( bl aes_encrypt_block5x ) 397 eor v0.16b, v5.16b, v0.16b 398ST4( ld1 {v5.16b}, [x1], #16 ) 399 eor v1.16b, v6.16b, v1.16b 400ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) 401 eor v2.16b, v7.16b, v2.16b 402 eor v3.16b, v5.16b, v3.16b 403ST5( eor v4.16b, v6.16b, v4.16b ) 404 st1 {v0.16b-v3.16b}, [x0], #64 405ST5( st1 {v4.16b}, [x0], #16 ) 406 cbz w4, .Lctrout 407 b .LctrloopNx 408 409.Lctrout: 410 st1 {vctr.16b}, [x5] /* return next CTR value */ 411 ldp x29, x30, [sp], #16 412 ret 413 414.Lctrtail: 415 /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */ 416 mov x16, #16 417 ands x6, x4, #0xf 418 csel x13, x6, x16, ne 419 420ST5( cmp w4, #64 - (MAX_STRIDE << 4) ) 421ST5( csel x14, x16, xzr, gt ) 422 cmp w4, #48 - (MAX_STRIDE << 4) 423 csel x15, x16, xzr, gt 424 cmp w4, #32 - (MAX_STRIDE << 4) 425 csel x16, x16, xzr, gt 426 cmp w4, #16 - (MAX_STRIDE << 4) 427 428 adr_l x12, .Lcts_permute_table 429 add x12, x12, x13 430 ble .Lctrtail1x 431 432ST5( ld1 {v5.16b}, [x1], x14 ) 433 ld1 {v6.16b}, [x1], x15 434 ld1 {v7.16b}, [x1], x16 435 436ST4( bl aes_encrypt_block4x ) 437ST5( bl aes_encrypt_block5x ) 438 439 ld1 {v8.16b}, [x1], x13 440 ld1 {v9.16b}, [x1] 441 ld1 {v10.16b}, [x12] 442 443ST4( eor v6.16b, v6.16b, v0.16b ) 444ST4( eor v7.16b, v7.16b, v1.16b ) 445ST4( tbl v3.16b, {v3.16b}, v10.16b ) 446ST4( eor v8.16b, v8.16b, v2.16b ) 447ST4( eor v9.16b, v9.16b, v3.16b ) 448 449ST5( eor v5.16b, v5.16b, v0.16b ) 450ST5( eor v6.16b, v6.16b, v1.16b ) 451ST5( tbl v4.16b, {v4.16b}, v10.16b ) 452ST5( eor v7.16b, v7.16b, v2.16b ) 453ST5( eor v8.16b, v8.16b, v3.16b ) 454ST5( eor v9.16b, v9.16b, v4.16b ) 455 456ST5( st1 {v5.16b}, [x0], x14 ) 457 st1 {v6.16b}, [x0], x15 458 st1 {v7.16b}, [x0], x16 459 add x13, x13, x0 460 st1 {v9.16b}, [x13] // overlapping stores 461 st1 {v8.16b}, [x0] 462 b .Lctrout 463 464.Lctrtail1x: 465 sub x7, x6, #16 466 csel x6, x6, x7, eq 467 add x1, x1, x6 468 add x0, x0, x6 469 ld1 {v5.16b}, [x1] 470 ld1 {v6.16b}, [x0] 471ST5( mov v3.16b, v4.16b ) 472 encrypt_block v3, w3, x2, x8, w7 473 ld1 {v10.16b-v11.16b}, [x12] 474 tbl v3.16b, {v3.16b}, v10.16b 475 sshr v11.16b, v11.16b, #7 476 eor v5.16b, v5.16b, v3.16b 477 bif v5.16b, v6.16b, v11.16b 478 st1 {v5.16b}, [x0] 479 b .Lctrout 480AES_FUNC_END(aes_ctr_encrypt) 481 482 483 /* 484 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 485 * int bytes, u8 const rk2[], u8 iv[], int first) 486 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 487 * int bytes, u8 const rk2[], u8 iv[], int first) 488 */ 489 490 .macro next_tweak, out, in, tmp 491 sshr \tmp\().2d, \in\().2d, #63 492 and \tmp\().16b, \tmp\().16b, xtsmask.16b 493 add \out\().2d, \in\().2d, \in\().2d 494 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 495 eor \out\().16b, \out\().16b, \tmp\().16b 496 .endm 497 498 .macro xts_load_mask, tmp 499 movi xtsmask.2s, #0x1 500 movi \tmp\().2s, #0x87 501 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 502 .endm 503 504AES_FUNC_START(aes_xts_encrypt) 505 stp x29, x30, [sp, #-16]! 506 mov x29, sp 507 508 ld1 {v4.16b}, [x6] 509 xts_load_mask v8 510 cbz w7, .Lxtsencnotfirst 511 512 enc_prepare w3, x5, x8 513 xts_cts_skip_tw w7, .LxtsencNx 514 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 515 enc_switch_key w3, x2, x8 516 b .LxtsencNx 517 518.Lxtsencnotfirst: 519 enc_prepare w3, x2, x8 520.LxtsencloopNx: 521 next_tweak v4, v4, v8 522.LxtsencNx: 523 subs w4, w4, #64 524 bmi .Lxtsenc1x 525 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 526 next_tweak v5, v4, v8 527 eor v0.16b, v0.16b, v4.16b 528 next_tweak v6, v5, v8 529 eor v1.16b, v1.16b, v5.16b 530 eor v2.16b, v2.16b, v6.16b 531 next_tweak v7, v6, v8 532 eor v3.16b, v3.16b, v7.16b 533 bl aes_encrypt_block4x 534 eor v3.16b, v3.16b, v7.16b 535 eor v0.16b, v0.16b, v4.16b 536 eor v1.16b, v1.16b, v5.16b 537 eor v2.16b, v2.16b, v6.16b 538 st1 {v0.16b-v3.16b}, [x0], #64 539 mov v4.16b, v7.16b 540 cbz w4, .Lxtsencret 541 xts_reload_mask v8 542 b .LxtsencloopNx 543.Lxtsenc1x: 544 adds w4, w4, #64 545 beq .Lxtsencout 546 subs w4, w4, #16 547 bmi .LxtsencctsNx 548.Lxtsencloop: 549 ld1 {v0.16b}, [x1], #16 550.Lxtsencctsout: 551 eor v0.16b, v0.16b, v4.16b 552 encrypt_block v0, w3, x2, x8, w7 553 eor v0.16b, v0.16b, v4.16b 554 cbz w4, .Lxtsencout 555 subs w4, w4, #16 556 next_tweak v4, v4, v8 557 bmi .Lxtsenccts 558 st1 {v0.16b}, [x0], #16 559 b .Lxtsencloop 560.Lxtsencout: 561 st1 {v0.16b}, [x0] 562.Lxtsencret: 563 st1 {v4.16b}, [x6] 564 ldp x29, x30, [sp], #16 565 ret 566 567.LxtsencctsNx: 568 mov v0.16b, v3.16b 569 sub x0, x0, #16 570.Lxtsenccts: 571 adr_l x8, .Lcts_permute_table 572 573 add x1, x1, w4, sxtw /* rewind input pointer */ 574 add w4, w4, #16 /* # bytes in final block */ 575 add x9, x8, #32 576 add x8, x8, x4 577 sub x9, x9, x4 578 add x4, x0, x4 /* output address of final block */ 579 580 ld1 {v1.16b}, [x1] /* load final block */ 581 ld1 {v2.16b}, [x8] 582 ld1 {v3.16b}, [x9] 583 584 tbl v2.16b, {v0.16b}, v2.16b 585 tbx v0.16b, {v1.16b}, v3.16b 586 st1 {v2.16b}, [x4] /* overlapping stores */ 587 mov w4, wzr 588 b .Lxtsencctsout 589AES_FUNC_END(aes_xts_encrypt) 590 591AES_FUNC_START(aes_xts_decrypt) 592 stp x29, x30, [sp, #-16]! 593 mov x29, sp 594 595 /* subtract 16 bytes if we are doing CTS */ 596 sub w8, w4, #0x10 597 tst w4, #0xf 598 csel w4, w4, w8, eq 599 600 ld1 {v4.16b}, [x6] 601 xts_load_mask v8 602 xts_cts_skip_tw w7, .Lxtsdecskiptw 603 cbz w7, .Lxtsdecnotfirst 604 605 enc_prepare w3, x5, x8 606 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 607.Lxtsdecskiptw: 608 dec_prepare w3, x2, x8 609 b .LxtsdecNx 610 611.Lxtsdecnotfirst: 612 dec_prepare w3, x2, x8 613.LxtsdecloopNx: 614 next_tweak v4, v4, v8 615.LxtsdecNx: 616 subs w4, w4, #64 617 bmi .Lxtsdec1x 618 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 619 next_tweak v5, v4, v8 620 eor v0.16b, v0.16b, v4.16b 621 next_tweak v6, v5, v8 622 eor v1.16b, v1.16b, v5.16b 623 eor v2.16b, v2.16b, v6.16b 624 next_tweak v7, v6, v8 625 eor v3.16b, v3.16b, v7.16b 626 bl aes_decrypt_block4x 627 eor v3.16b, v3.16b, v7.16b 628 eor v0.16b, v0.16b, v4.16b 629 eor v1.16b, v1.16b, v5.16b 630 eor v2.16b, v2.16b, v6.16b 631 st1 {v0.16b-v3.16b}, [x0], #64 632 mov v4.16b, v7.16b 633 cbz w4, .Lxtsdecout 634 xts_reload_mask v8 635 b .LxtsdecloopNx 636.Lxtsdec1x: 637 adds w4, w4, #64 638 beq .Lxtsdecout 639 subs w4, w4, #16 640.Lxtsdecloop: 641 ld1 {v0.16b}, [x1], #16 642 bmi .Lxtsdeccts 643.Lxtsdecctsout: 644 eor v0.16b, v0.16b, v4.16b 645 decrypt_block v0, w3, x2, x8, w7 646 eor v0.16b, v0.16b, v4.16b 647 st1 {v0.16b}, [x0], #16 648 cbz w4, .Lxtsdecout 649 subs w4, w4, #16 650 next_tweak v4, v4, v8 651 b .Lxtsdecloop 652.Lxtsdecout: 653 st1 {v4.16b}, [x6] 654 ldp x29, x30, [sp], #16 655 ret 656 657.Lxtsdeccts: 658 adr_l x8, .Lcts_permute_table 659 660 add x1, x1, w4, sxtw /* rewind input pointer */ 661 add w4, w4, #16 /* # bytes in final block */ 662 add x9, x8, #32 663 add x8, x8, x4 664 sub x9, x9, x4 665 add x4, x0, x4 /* output address of final block */ 666 667 next_tweak v5, v4, v8 668 669 ld1 {v1.16b}, [x1] /* load final block */ 670 ld1 {v2.16b}, [x8] 671 ld1 {v3.16b}, [x9] 672 673 eor v0.16b, v0.16b, v5.16b 674 decrypt_block v0, w3, x2, x8, w7 675 eor v0.16b, v0.16b, v5.16b 676 677 tbl v2.16b, {v0.16b}, v2.16b 678 tbx v0.16b, {v1.16b}, v3.16b 679 680 st1 {v2.16b}, [x4] /* overlapping stores */ 681 mov w4, wzr 682 b .Lxtsdecctsout 683AES_FUNC_END(aes_xts_decrypt) 684 685 /* 686 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 687 * int blocks, u8 dg[], int enc_before, int enc_after) 688 */ 689AES_FUNC_START(aes_mac_update) 690 ld1 {v0.16b}, [x4] /* get dg */ 691 enc_prepare w2, x1, x7 692 cbz w5, .Lmacloop4x 693 694 encrypt_block v0, w2, x1, x7, w8 695 696.Lmacloop4x: 697 subs w3, w3, #4 698 bmi .Lmac1x 699 ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ 700 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 701 encrypt_block v0, w2, x1, x7, w8 702 eor v0.16b, v0.16b, v2.16b 703 encrypt_block v0, w2, x1, x7, w8 704 eor v0.16b, v0.16b, v3.16b 705 encrypt_block v0, w2, x1, x7, w8 706 eor v0.16b, v0.16b, v4.16b 707 cmp w3, wzr 708 csinv x5, x6, xzr, eq 709 cbz w5, .Lmacout 710 encrypt_block v0, w2, x1, x7, w8 711 st1 {v0.16b}, [x4] /* return dg */ 712 cond_yield .Lmacout, x7, x8 713 b .Lmacloop4x 714.Lmac1x: 715 add w3, w3, #4 716.Lmacloop: 717 cbz w3, .Lmacout 718 ld1 {v1.16b}, [x0], #16 /* get next pt block */ 719 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 720 721 subs w3, w3, #1 722 csinv x5, x6, xzr, eq 723 cbz w5, .Lmacout 724 725.Lmacenc: 726 encrypt_block v0, w2, x1, x7, w8 727 b .Lmacloop 728 729.Lmacout: 730 st1 {v0.16b}, [x4] /* return dg */ 731 mov w0, w3 732 ret 733AES_FUNC_END(aes_mac_update)