sm3-ce-core.S (3276B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions 4 * 5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8#include <linux/linkage.h> 9#include <asm/assembler.h> 10 11 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 12 .set .Lv\b\().4s, \b 13 .endr 14 15 .macro sm3partw1, rd, rn, rm 16 .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 17 .endm 18 19 .macro sm3partw2, rd, rn, rm 20 .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 21 .endm 22 23 .macro sm3ss1, rd, rn, rm, ra 24 .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 25 .endm 26 27 .macro sm3tt1a, rd, rn, rm, imm2 28 .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 29 .endm 30 31 .macro sm3tt1b, rd, rn, rm, imm2 32 .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 33 .endm 34 35 .macro sm3tt2a, rd, rn, rm, imm2 36 .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 37 .endm 38 39 .macro sm3tt2b, rd, rn, rm, imm2 40 .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 41 .endm 42 43 .macro round, ab, s0, t0, t1, i 44 sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s 45 shl \t1\().4s, \t0\().4s, #1 46 sri \t1\().4s, \t0\().4s, #31 47 sm3tt1\ab v8.4s, v5.4s, v10.4s, \i 48 sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i 49 .endm 50 51 .macro qround, ab, s0, s1, s2, s3, s4 52 .ifnb \s4 53 ext \s4\().16b, \s1\().16b, \s2\().16b, #12 54 ext v6.16b, \s0\().16b, \s1\().16b, #12 55 ext v7.16b, \s2\().16b, \s3\().16b, #8 56 sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s 57 .endif 58 59 eor v10.16b, \s0\().16b, \s1\().16b 60 61 round \ab, \s0, v11, v12, 0 62 round \ab, \s0, v12, v11, 1 63 round \ab, \s0, v11, v12, 2 64 round \ab, \s0, v12, v11, 3 65 66 .ifnb \s4 67 sm3partw2 \s4\().4s, v7.4s, v6.4s 68 .endif 69 .endm 70 71 /* 72 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src, 73 * int blocks) 74 */ 75 .text 76SYM_FUNC_START(sm3_ce_transform) 77 /* load state */ 78 ld1 {v8.4s-v9.4s}, [x0] 79 rev64 v8.4s, v8.4s 80 rev64 v9.4s, v9.4s 81 ext v8.16b, v8.16b, v8.16b, #8 82 ext v9.16b, v9.16b, v9.16b, #8 83 84 adr_l x8, .Lt 85 ldp s13, s14, [x8] 86 87 /* load input */ 880: ld1 {v0.16b-v3.16b}, [x1], #64 89 sub w2, w2, #1 90 91 mov v15.16b, v8.16b 92 mov v16.16b, v9.16b 93 94CPU_LE( rev32 v0.16b, v0.16b ) 95CPU_LE( rev32 v1.16b, v1.16b ) 96CPU_LE( rev32 v2.16b, v2.16b ) 97CPU_LE( rev32 v3.16b, v3.16b ) 98 99 ext v11.16b, v13.16b, v13.16b, #4 100 101 qround a, v0, v1, v2, v3, v4 102 qround a, v1, v2, v3, v4, v0 103 qround a, v2, v3, v4, v0, v1 104 qround a, v3, v4, v0, v1, v2 105 106 ext v11.16b, v14.16b, v14.16b, #4 107 108 qround b, v4, v0, v1, v2, v3 109 qround b, v0, v1, v2, v3, v4 110 qround b, v1, v2, v3, v4, v0 111 qround b, v2, v3, v4, v0, v1 112 qround b, v3, v4, v0, v1, v2 113 qround b, v4, v0, v1, v2, v3 114 qround b, v0, v1, v2, v3, v4 115 qround b, v1, v2, v3, v4, v0 116 qround b, v2, v3, v4, v0, v1 117 qround b, v3, v4 118 qround b, v4, v0 119 qround b, v0, v1 120 121 eor v8.16b, v8.16b, v15.16b 122 eor v9.16b, v9.16b, v16.16b 123 124 /* handled all input blocks? */ 125 cbnz w2, 0b 126 127 /* save state */ 128 rev64 v8.4s, v8.4s 129 rev64 v9.4s, v9.4s 130 ext v8.16b, v8.16b, v8.16b, #8 131 ext v9.16b, v9.16b, v9.16b, #8 132 st1 {v8.4s-v9.4s}, [x0] 133 ret 134SYM_FUNC_END(sm3_ce_transform) 135 136 .section ".rodata", "a" 137 .align 3 138.Lt: .word 0x79cc4519, 0x9d8a7a87