cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sha256_ni_asm.S (10619B)


      1/*
      2 * Intel SHA Extensions optimized implementation of a SHA-256 update function
      3 *
      4 * This file is provided under a dual BSD/GPLv2 license.  When using or
      5 * redistributing this file, you may do so under either license.
      6 *
      7 * GPL LICENSE SUMMARY
      8 *
      9 * Copyright(c) 2015 Intel Corporation.
     10 *
     11 * This program is free software; you can redistribute it and/or modify
     12 * it under the terms of version 2 of the GNU General Public License as
     13 * published by the Free Software Foundation.
     14 *
     15 * This program is distributed in the hope that it will be useful, but
     16 * WITHOUT ANY WARRANTY; without even the implied warranty of
     17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18 * General Public License for more details.
     19 *
     20 * Contact Information:
     21 * 	Sean Gulley <sean.m.gulley@intel.com>
     22 * 	Tim Chen <tim.c.chen@linux.intel.com>
     23 *
     24 * BSD LICENSE
     25 *
     26 * Copyright(c) 2015 Intel Corporation.
     27 *
     28 * Redistribution and use in source and binary forms, with or without
     29 * modification, are permitted provided that the following conditions
     30 * are met:
     31 *
     32 * 	* Redistributions of source code must retain the above copyright
     33 * 	  notice, this list of conditions and the following disclaimer.
     34 * 	* Redistributions in binary form must reproduce the above copyright
     35 * 	  notice, this list of conditions and the following disclaimer in
     36 * 	  the documentation and/or other materials provided with the
     37 * 	  distribution.
     38 * 	* Neither the name of Intel Corporation nor the names of its
     39 * 	  contributors may be used to endorse or promote products derived
     40 * 	  from this software without specific prior written permission.
     41 *
     42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     53 *
     54 */
     55
     56#include <linux/linkage.h>
     57
     58#define DIGEST_PTR	%rdi	/* 1st arg */
     59#define DATA_PTR	%rsi	/* 2nd arg */
     60#define NUM_BLKS	%rdx	/* 3rd arg */
     61
     62#define SHA256CONSTANTS	%rax
     63
     64#define MSG		%xmm0
     65#define STATE0		%xmm1
     66#define STATE1		%xmm2
     67#define MSGTMP0		%xmm3
     68#define MSGTMP1		%xmm4
     69#define MSGTMP2		%xmm5
     70#define MSGTMP3		%xmm6
     71#define MSGTMP4		%xmm7
     72
     73#define SHUF_MASK	%xmm8
     74
     75#define ABEF_SAVE	%xmm9
     76#define CDGH_SAVE	%xmm10
     77
     78/*
     79 * Intel SHA Extensions optimized implementation of a SHA-256 update function
     80 *
     81 * The function takes a pointer to the current hash values, a pointer to the
     82 * input data, and a number of 64 byte blocks to process.  Once all blocks have
     83 * been processed, the digest pointer is  updated with the resulting hash value.
     84 * The function only processes complete blocks, there is no functionality to
     85 * store partial blocks.  All message padding and hash value initialization must
     86 * be done outside the update function.
     87 *
     88 * The indented lines in the loop are instructions related to rounds processing.
     89 * The non-indented lines are instructions related to the message schedule.
     90 *
     91 * void sha256_ni_transform(uint32_t *digest, const void *data,
     92		uint32_t numBlocks);
     93 * digest : pointer to digest
     94 * data: pointer to input data
     95 * numBlocks: Number of blocks to process
     96 */
     97
     98.text
     99.align 32
    100SYM_FUNC_START(sha256_ni_transform)
    101
    102	shl		$6, NUM_BLKS		/*  convert to bytes */
    103	jz		.Ldone_hash
    104	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
    105
    106	/*
    107	 * load initial hash values
    108	 * Need to reorder these appropriately
    109	 * DCBA, HGFE -> ABEF, CDGH
    110	 */
    111	movdqu		0*16(DIGEST_PTR), STATE0
    112	movdqu		1*16(DIGEST_PTR), STATE1
    113
    114	pshufd		$0xB1, STATE0,  STATE0		/* CDAB */
    115	pshufd		$0x1B, STATE1,  STATE1		/* EFGH */
    116	movdqa		STATE0, MSGTMP4
    117	palignr		$8, STATE1,  STATE0		/* ABEF */
    118	pblendw		$0xF0, MSGTMP4, STATE1		/* CDGH */
    119
    120	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
    121	lea		K256(%rip), SHA256CONSTANTS
    122
    123.Lloop0:
    124	/* Save hash values for addition after rounds */
    125	movdqa		STATE0, ABEF_SAVE
    126	movdqa		STATE1, CDGH_SAVE
    127
    128	/* Rounds 0-3 */
    129	movdqu		0*16(DATA_PTR), MSG
    130	pshufb		SHUF_MASK, MSG
    131	movdqa		MSG, MSGTMP0
    132		paddd		0*16(SHA256CONSTANTS), MSG
    133		sha256rnds2	STATE0, STATE1
    134		pshufd 		$0x0E, MSG, MSG
    135		sha256rnds2	STATE1, STATE0
    136
    137	/* Rounds 4-7 */
    138	movdqu		1*16(DATA_PTR), MSG
    139	pshufb		SHUF_MASK, MSG
    140	movdqa		MSG, MSGTMP1
    141		paddd		1*16(SHA256CONSTANTS), MSG
    142		sha256rnds2	STATE0, STATE1
    143		pshufd 		$0x0E, MSG, MSG
    144		sha256rnds2	STATE1, STATE0
    145	sha256msg1	MSGTMP1, MSGTMP0
    146
    147	/* Rounds 8-11 */
    148	movdqu		2*16(DATA_PTR), MSG
    149	pshufb		SHUF_MASK, MSG
    150	movdqa		MSG, MSGTMP2
    151		paddd		2*16(SHA256CONSTANTS), MSG
    152		sha256rnds2	STATE0, STATE1
    153		pshufd 		$0x0E, MSG, MSG
    154		sha256rnds2	STATE1, STATE0
    155	sha256msg1	MSGTMP2, MSGTMP1
    156
    157	/* Rounds 12-15 */
    158	movdqu		3*16(DATA_PTR), MSG
    159	pshufb		SHUF_MASK, MSG
    160	movdqa		MSG, MSGTMP3
    161		paddd		3*16(SHA256CONSTANTS), MSG
    162		sha256rnds2	STATE0, STATE1
    163	movdqa		MSGTMP3, MSGTMP4
    164	palignr		$4, MSGTMP2, MSGTMP4
    165	paddd		MSGTMP4, MSGTMP0
    166	sha256msg2	MSGTMP3, MSGTMP0
    167		pshufd 		$0x0E, MSG, MSG
    168		sha256rnds2	STATE1, STATE0
    169	sha256msg1	MSGTMP3, MSGTMP2
    170
    171	/* Rounds 16-19 */
    172	movdqa		MSGTMP0, MSG
    173		paddd		4*16(SHA256CONSTANTS), MSG
    174		sha256rnds2	STATE0, STATE1
    175	movdqa		MSGTMP0, MSGTMP4
    176	palignr		$4, MSGTMP3, MSGTMP4
    177	paddd		MSGTMP4, MSGTMP1
    178	sha256msg2	MSGTMP0, MSGTMP1
    179		pshufd 		$0x0E, MSG, MSG
    180		sha256rnds2	STATE1, STATE0
    181	sha256msg1	MSGTMP0, MSGTMP3
    182
    183	/* Rounds 20-23 */
    184	movdqa		MSGTMP1, MSG
    185		paddd		5*16(SHA256CONSTANTS), MSG
    186		sha256rnds2	STATE0, STATE1
    187	movdqa		MSGTMP1, MSGTMP4
    188	palignr		$4, MSGTMP0, MSGTMP4
    189	paddd		MSGTMP4, MSGTMP2
    190	sha256msg2	MSGTMP1, MSGTMP2
    191		pshufd 		$0x0E, MSG, MSG
    192		sha256rnds2	STATE1, STATE0
    193	sha256msg1	MSGTMP1, MSGTMP0
    194
    195	/* Rounds 24-27 */
    196	movdqa		MSGTMP2, MSG
    197		paddd		6*16(SHA256CONSTANTS), MSG
    198		sha256rnds2	STATE0, STATE1
    199	movdqa		MSGTMP2, MSGTMP4
    200	palignr		$4, MSGTMP1, MSGTMP4
    201	paddd		MSGTMP4, MSGTMP3
    202	sha256msg2	MSGTMP2, MSGTMP3
    203		pshufd 		$0x0E, MSG, MSG
    204		sha256rnds2	STATE1, STATE0
    205	sha256msg1	MSGTMP2, MSGTMP1
    206
    207	/* Rounds 28-31 */
    208	movdqa		MSGTMP3, MSG
    209		paddd		7*16(SHA256CONSTANTS), MSG
    210		sha256rnds2	STATE0, STATE1
    211	movdqa		MSGTMP3, MSGTMP4
    212	palignr		$4, MSGTMP2, MSGTMP4
    213	paddd		MSGTMP4, MSGTMP0
    214	sha256msg2	MSGTMP3, MSGTMP0
    215		pshufd 		$0x0E, MSG, MSG
    216		sha256rnds2	STATE1, STATE0
    217	sha256msg1	MSGTMP3, MSGTMP2
    218
    219	/* Rounds 32-35 */
    220	movdqa		MSGTMP0, MSG
    221		paddd		8*16(SHA256CONSTANTS), MSG
    222		sha256rnds2	STATE0, STATE1
    223	movdqa		MSGTMP0, MSGTMP4
    224	palignr		$4, MSGTMP3, MSGTMP4
    225	paddd		MSGTMP4, MSGTMP1
    226	sha256msg2	MSGTMP0, MSGTMP1
    227		pshufd 		$0x0E, MSG, MSG
    228		sha256rnds2	STATE1, STATE0
    229	sha256msg1	MSGTMP0, MSGTMP3
    230
    231	/* Rounds 36-39 */
    232	movdqa		MSGTMP1, MSG
    233		paddd		9*16(SHA256CONSTANTS), MSG
    234		sha256rnds2	STATE0, STATE1
    235	movdqa		MSGTMP1, MSGTMP4
    236	palignr		$4, MSGTMP0, MSGTMP4
    237	paddd		MSGTMP4, MSGTMP2
    238	sha256msg2	MSGTMP1, MSGTMP2
    239		pshufd 		$0x0E, MSG, MSG
    240		sha256rnds2	STATE1, STATE0
    241	sha256msg1	MSGTMP1, MSGTMP0
    242
    243	/* Rounds 40-43 */
    244	movdqa		MSGTMP2, MSG
    245		paddd		10*16(SHA256CONSTANTS), MSG
    246		sha256rnds2	STATE0, STATE1
    247	movdqa		MSGTMP2, MSGTMP4
    248	palignr		$4, MSGTMP1, MSGTMP4
    249	paddd		MSGTMP4, MSGTMP3
    250	sha256msg2	MSGTMP2, MSGTMP3
    251		pshufd 		$0x0E, MSG, MSG
    252		sha256rnds2	STATE1, STATE0
    253	sha256msg1	MSGTMP2, MSGTMP1
    254
    255	/* Rounds 44-47 */
    256	movdqa		MSGTMP3, MSG
    257		paddd		11*16(SHA256CONSTANTS), MSG
    258		sha256rnds2	STATE0, STATE1
    259	movdqa		MSGTMP3, MSGTMP4
    260	palignr		$4, MSGTMP2, MSGTMP4
    261	paddd		MSGTMP4, MSGTMP0
    262	sha256msg2	MSGTMP3, MSGTMP0
    263		pshufd 		$0x0E, MSG, MSG
    264		sha256rnds2	STATE1, STATE0
    265	sha256msg1	MSGTMP3, MSGTMP2
    266
    267	/* Rounds 48-51 */
    268	movdqa		MSGTMP0, MSG
    269		paddd		12*16(SHA256CONSTANTS), MSG
    270		sha256rnds2	STATE0, STATE1
    271	movdqa		MSGTMP0, MSGTMP4
    272	palignr		$4, MSGTMP3, MSGTMP4
    273	paddd		MSGTMP4, MSGTMP1
    274	sha256msg2	MSGTMP0, MSGTMP1
    275		pshufd 		$0x0E, MSG, MSG
    276		sha256rnds2	STATE1, STATE0
    277	sha256msg1	MSGTMP0, MSGTMP3
    278
    279	/* Rounds 52-55 */
    280	movdqa		MSGTMP1, MSG
    281		paddd		13*16(SHA256CONSTANTS), MSG
    282		sha256rnds2	STATE0, STATE1
    283	movdqa		MSGTMP1, MSGTMP4
    284	palignr		$4, MSGTMP0, MSGTMP4
    285	paddd		MSGTMP4, MSGTMP2
    286	sha256msg2	MSGTMP1, MSGTMP2
    287		pshufd 		$0x0E, MSG, MSG
    288		sha256rnds2	STATE1, STATE0
    289
    290	/* Rounds 56-59 */
    291	movdqa		MSGTMP2, MSG
    292		paddd		14*16(SHA256CONSTANTS), MSG
    293		sha256rnds2	STATE0, STATE1
    294	movdqa		MSGTMP2, MSGTMP4
    295	palignr		$4, MSGTMP1, MSGTMP4
    296	paddd		MSGTMP4, MSGTMP3
    297	sha256msg2	MSGTMP2, MSGTMP3
    298		pshufd 		$0x0E, MSG, MSG
    299		sha256rnds2	STATE1, STATE0
    300
    301	/* Rounds 60-63 */
    302	movdqa		MSGTMP3, MSG
    303		paddd		15*16(SHA256CONSTANTS), MSG
    304		sha256rnds2	STATE0, STATE1
    305		pshufd 		$0x0E, MSG, MSG
    306		sha256rnds2	STATE1, STATE0
    307
    308	/* Add current hash values with previously saved */
    309	paddd		ABEF_SAVE, STATE0
    310	paddd		CDGH_SAVE, STATE1
    311
    312	/* Increment data pointer and loop if more to process */
    313	add		$64, DATA_PTR
    314	cmp		NUM_BLKS, DATA_PTR
    315	jne		.Lloop0
    316
    317	/* Write hash values back in the correct order */
    318	pshufd		$0x1B, STATE0,  STATE0		/* FEBA */
    319	pshufd		$0xB1, STATE1,  STATE1		/* DCHG */
    320	movdqa		STATE0, MSGTMP4
    321	pblendw		$0xF0, STATE1,  STATE0		/* DCBA */
    322	palignr		$8, MSGTMP4, STATE1		/* HGFE */
    323
    324	movdqu		STATE0, 0*16(DIGEST_PTR)
    325	movdqu		STATE1, 1*16(DIGEST_PTR)
    326
    327.Ldone_hash:
    328
    329	RET
    330SYM_FUNC_END(sha256_ni_transform)
    331
    332.section	.rodata.cst256.K256, "aM", @progbits, 256
    333.align 64
    334K256:
    335	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
    336	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
    337	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
    338	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
    339	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
    340	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
    341	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
    342	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
    343	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
    344	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
    345	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
    346	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
    347	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
    348	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
    349	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
    350	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
    351
    352.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
    353.align 16
    354PSHUFFLE_BYTE_FLIP_MASK:
    355	.octa 0x0c0d0e0f08090a0b0405060700010203