sha256_ni_asm.S (10619B)
1/* 2 * Intel SHA Extensions optimized implementation of a SHA-256 update function 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2015 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * Contact Information: 21 * Sean Gulley <sean.m.gulley@intel.com> 22 * Tim Chen <tim.c.chen@linux.intel.com> 23 * 24 * BSD LICENSE 25 * 26 * Copyright(c) 2015 Intel Corporation. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 32 * * Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * * Redistributions in binary form must reproduce the above copyright 35 * notice, this list of conditions and the following disclaimer in 36 * the documentation and/or other materials provided with the 37 * distribution. 38 * * Neither the name of Intel Corporation nor the names of its 39 * contributors may be used to endorse or promote products derived 40 * from this software without specific prior written permission. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * 54 */ 55 56#include <linux/linkage.h> 57 58#define DIGEST_PTR %rdi /* 1st arg */ 59#define DATA_PTR %rsi /* 2nd arg */ 60#define NUM_BLKS %rdx /* 3rd arg */ 61 62#define SHA256CONSTANTS %rax 63 64#define MSG %xmm0 65#define STATE0 %xmm1 66#define STATE1 %xmm2 67#define MSGTMP0 %xmm3 68#define MSGTMP1 %xmm4 69#define MSGTMP2 %xmm5 70#define MSGTMP3 %xmm6 71#define MSGTMP4 %xmm7 72 73#define SHUF_MASK %xmm8 74 75#define ABEF_SAVE %xmm9 76#define CDGH_SAVE %xmm10 77 78/* 79 * Intel SHA Extensions optimized implementation of a SHA-256 update function 80 * 81 * The function takes a pointer to the current hash values, a pointer to the 82 * input data, and a number of 64 byte blocks to process. Once all blocks have 83 * been processed, the digest pointer is updated with the resulting hash value. 84 * The function only processes complete blocks, there is no functionality to 85 * store partial blocks. All message padding and hash value initialization must 86 * be done outside the update function. 87 * 88 * The indented lines in the loop are instructions related to rounds processing. 89 * The non-indented lines are instructions related to the message schedule. 90 * 91 * void sha256_ni_transform(uint32_t *digest, const void *data, 92 uint32_t numBlocks); 93 * digest : pointer to digest 94 * data: pointer to input data 95 * numBlocks: Number of blocks to process 96 */ 97 98.text 99.align 32 100SYM_FUNC_START(sha256_ni_transform) 101 102 shl $6, NUM_BLKS /* convert to bytes */ 103 jz .Ldone_hash 104 add DATA_PTR, NUM_BLKS /* pointer to end of data */ 105 106 /* 107 * load initial hash values 108 * Need to reorder these appropriately 109 * DCBA, HGFE -> ABEF, CDGH 110 */ 111 movdqu 0*16(DIGEST_PTR), STATE0 112 movdqu 1*16(DIGEST_PTR), STATE1 113 114 pshufd $0xB1, STATE0, STATE0 /* CDAB */ 115 pshufd $0x1B, STATE1, STATE1 /* EFGH */ 116 movdqa STATE0, MSGTMP4 117 palignr $8, STATE1, STATE0 /* ABEF */ 118 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ 119 120 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 121 lea K256(%rip), SHA256CONSTANTS 122 123.Lloop0: 124 /* Save hash values for addition after rounds */ 125 movdqa STATE0, ABEF_SAVE 126 movdqa STATE1, CDGH_SAVE 127 128 /* Rounds 0-3 */ 129 movdqu 0*16(DATA_PTR), MSG 130 pshufb SHUF_MASK, MSG 131 movdqa MSG, MSGTMP0 132 paddd 0*16(SHA256CONSTANTS), MSG 133 sha256rnds2 STATE0, STATE1 134 pshufd $0x0E, MSG, MSG 135 sha256rnds2 STATE1, STATE0 136 137 /* Rounds 4-7 */ 138 movdqu 1*16(DATA_PTR), MSG 139 pshufb SHUF_MASK, MSG 140 movdqa MSG, MSGTMP1 141 paddd 1*16(SHA256CONSTANTS), MSG 142 sha256rnds2 STATE0, STATE1 143 pshufd $0x0E, MSG, MSG 144 sha256rnds2 STATE1, STATE0 145 sha256msg1 MSGTMP1, MSGTMP0 146 147 /* Rounds 8-11 */ 148 movdqu 2*16(DATA_PTR), MSG 149 pshufb SHUF_MASK, MSG 150 movdqa MSG, MSGTMP2 151 paddd 2*16(SHA256CONSTANTS), MSG 152 sha256rnds2 STATE0, STATE1 153 pshufd $0x0E, MSG, MSG 154 sha256rnds2 STATE1, STATE0 155 sha256msg1 MSGTMP2, MSGTMP1 156 157 /* Rounds 12-15 */ 158 movdqu 3*16(DATA_PTR), MSG 159 pshufb SHUF_MASK, MSG 160 movdqa MSG, MSGTMP3 161 paddd 3*16(SHA256CONSTANTS), MSG 162 sha256rnds2 STATE0, STATE1 163 movdqa MSGTMP3, MSGTMP4 164 palignr $4, MSGTMP2, MSGTMP4 165 paddd MSGTMP4, MSGTMP0 166 sha256msg2 MSGTMP3, MSGTMP0 167 pshufd $0x0E, MSG, MSG 168 sha256rnds2 STATE1, STATE0 169 sha256msg1 MSGTMP3, MSGTMP2 170 171 /* Rounds 16-19 */ 172 movdqa MSGTMP0, MSG 173 paddd 4*16(SHA256CONSTANTS), MSG 174 sha256rnds2 STATE0, STATE1 175 movdqa MSGTMP0, MSGTMP4 176 palignr $4, MSGTMP3, MSGTMP4 177 paddd MSGTMP4, MSGTMP1 178 sha256msg2 MSGTMP0, MSGTMP1 179 pshufd $0x0E, MSG, MSG 180 sha256rnds2 STATE1, STATE0 181 sha256msg1 MSGTMP0, MSGTMP3 182 183 /* Rounds 20-23 */ 184 movdqa MSGTMP1, MSG 185 paddd 5*16(SHA256CONSTANTS), MSG 186 sha256rnds2 STATE0, STATE1 187 movdqa MSGTMP1, MSGTMP4 188 palignr $4, MSGTMP0, MSGTMP4 189 paddd MSGTMP4, MSGTMP2 190 sha256msg2 MSGTMP1, MSGTMP2 191 pshufd $0x0E, MSG, MSG 192 sha256rnds2 STATE1, STATE0 193 sha256msg1 MSGTMP1, MSGTMP0 194 195 /* Rounds 24-27 */ 196 movdqa MSGTMP2, MSG 197 paddd 6*16(SHA256CONSTANTS), MSG 198 sha256rnds2 STATE0, STATE1 199 movdqa MSGTMP2, MSGTMP4 200 palignr $4, MSGTMP1, MSGTMP4 201 paddd MSGTMP4, MSGTMP3 202 sha256msg2 MSGTMP2, MSGTMP3 203 pshufd $0x0E, MSG, MSG 204 sha256rnds2 STATE1, STATE0 205 sha256msg1 MSGTMP2, MSGTMP1 206 207 /* Rounds 28-31 */ 208 movdqa MSGTMP3, MSG 209 paddd 7*16(SHA256CONSTANTS), MSG 210 sha256rnds2 STATE0, STATE1 211 movdqa MSGTMP3, MSGTMP4 212 palignr $4, MSGTMP2, MSGTMP4 213 paddd MSGTMP4, MSGTMP0 214 sha256msg2 MSGTMP3, MSGTMP0 215 pshufd $0x0E, MSG, MSG 216 sha256rnds2 STATE1, STATE0 217 sha256msg1 MSGTMP3, MSGTMP2 218 219 /* Rounds 32-35 */ 220 movdqa MSGTMP0, MSG 221 paddd 8*16(SHA256CONSTANTS), MSG 222 sha256rnds2 STATE0, STATE1 223 movdqa MSGTMP0, MSGTMP4 224 palignr $4, MSGTMP3, MSGTMP4 225 paddd MSGTMP4, MSGTMP1 226 sha256msg2 MSGTMP0, MSGTMP1 227 pshufd $0x0E, MSG, MSG 228 sha256rnds2 STATE1, STATE0 229 sha256msg1 MSGTMP0, MSGTMP3 230 231 /* Rounds 36-39 */ 232 movdqa MSGTMP1, MSG 233 paddd 9*16(SHA256CONSTANTS), MSG 234 sha256rnds2 STATE0, STATE1 235 movdqa MSGTMP1, MSGTMP4 236 palignr $4, MSGTMP0, MSGTMP4 237 paddd MSGTMP4, MSGTMP2 238 sha256msg2 MSGTMP1, MSGTMP2 239 pshufd $0x0E, MSG, MSG 240 sha256rnds2 STATE1, STATE0 241 sha256msg1 MSGTMP1, MSGTMP0 242 243 /* Rounds 40-43 */ 244 movdqa MSGTMP2, MSG 245 paddd 10*16(SHA256CONSTANTS), MSG 246 sha256rnds2 STATE0, STATE1 247 movdqa MSGTMP2, MSGTMP4 248 palignr $4, MSGTMP1, MSGTMP4 249 paddd MSGTMP4, MSGTMP3 250 sha256msg2 MSGTMP2, MSGTMP3 251 pshufd $0x0E, MSG, MSG 252 sha256rnds2 STATE1, STATE0 253 sha256msg1 MSGTMP2, MSGTMP1 254 255 /* Rounds 44-47 */ 256 movdqa MSGTMP3, MSG 257 paddd 11*16(SHA256CONSTANTS), MSG 258 sha256rnds2 STATE0, STATE1 259 movdqa MSGTMP3, MSGTMP4 260 palignr $4, MSGTMP2, MSGTMP4 261 paddd MSGTMP4, MSGTMP0 262 sha256msg2 MSGTMP3, MSGTMP0 263 pshufd $0x0E, MSG, MSG 264 sha256rnds2 STATE1, STATE0 265 sha256msg1 MSGTMP3, MSGTMP2 266 267 /* Rounds 48-51 */ 268 movdqa MSGTMP0, MSG 269 paddd 12*16(SHA256CONSTANTS), MSG 270 sha256rnds2 STATE0, STATE1 271 movdqa MSGTMP0, MSGTMP4 272 palignr $4, MSGTMP3, MSGTMP4 273 paddd MSGTMP4, MSGTMP1 274 sha256msg2 MSGTMP0, MSGTMP1 275 pshufd $0x0E, MSG, MSG 276 sha256rnds2 STATE1, STATE0 277 sha256msg1 MSGTMP0, MSGTMP3 278 279 /* Rounds 52-55 */ 280 movdqa MSGTMP1, MSG 281 paddd 13*16(SHA256CONSTANTS), MSG 282 sha256rnds2 STATE0, STATE1 283 movdqa MSGTMP1, MSGTMP4 284 palignr $4, MSGTMP0, MSGTMP4 285 paddd MSGTMP4, MSGTMP2 286 sha256msg2 MSGTMP1, MSGTMP2 287 pshufd $0x0E, MSG, MSG 288 sha256rnds2 STATE1, STATE0 289 290 /* Rounds 56-59 */ 291 movdqa MSGTMP2, MSG 292 paddd 14*16(SHA256CONSTANTS), MSG 293 sha256rnds2 STATE0, STATE1 294 movdqa MSGTMP2, MSGTMP4 295 palignr $4, MSGTMP1, MSGTMP4 296 paddd MSGTMP4, MSGTMP3 297 sha256msg2 MSGTMP2, MSGTMP3 298 pshufd $0x0E, MSG, MSG 299 sha256rnds2 STATE1, STATE0 300 301 /* Rounds 60-63 */ 302 movdqa MSGTMP3, MSG 303 paddd 15*16(SHA256CONSTANTS), MSG 304 sha256rnds2 STATE0, STATE1 305 pshufd $0x0E, MSG, MSG 306 sha256rnds2 STATE1, STATE0 307 308 /* Add current hash values with previously saved */ 309 paddd ABEF_SAVE, STATE0 310 paddd CDGH_SAVE, STATE1 311 312 /* Increment data pointer and loop if more to process */ 313 add $64, DATA_PTR 314 cmp NUM_BLKS, DATA_PTR 315 jne .Lloop0 316 317 /* Write hash values back in the correct order */ 318 pshufd $0x1B, STATE0, STATE0 /* FEBA */ 319 pshufd $0xB1, STATE1, STATE1 /* DCHG */ 320 movdqa STATE0, MSGTMP4 321 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 322 palignr $8, MSGTMP4, STATE1 /* HGFE */ 323 324 movdqu STATE0, 0*16(DIGEST_PTR) 325 movdqu STATE1, 1*16(DIGEST_PTR) 326 327.Ldone_hash: 328 329 RET 330SYM_FUNC_END(sha256_ni_transform) 331 332.section .rodata.cst256.K256, "aM", @progbits, 256 333.align 64 334K256: 335 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 336 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 337 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 338 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 339 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 340 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 341 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 342 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 343 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 344 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 345 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 346 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 347 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 348 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 349 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 350 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 351 352.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 353.align 16 354PSHUFFLE_BYTE_FLIP_MASK: 355 .octa 0x0c0d0e0f08090a0b0405060700010203