cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

camellia_asm.S (13850B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#include <linux/linkage.h>
      3#include <asm/visasm.h>
      4
      5#include "opcodes.h"
      6
      7#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
      8	CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \
      9	CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \
     10	CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \
     11	CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \
     12	CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \
     13	CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
     14
     15#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
     16	CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
     17	CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
     18	CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
     19
     20	.data
     21
     22	.align	8
     23SIGMA:	.xword	0xA09E667F3BCC908B
     24	.xword	0xB67AE8584CAA73B2
     25	.xword	0xC6EF372FE94F82BE
     26	.xword	0x54FF53A5F1D36F1C
     27	.xword	0x10E527FADE682D1D
     28	.xword	0xB05688C2B3E6C1FD
     29
     30	.text
     31
     32	.align	32
     33ENTRY(camellia_sparc64_key_expand)
     34	/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
     35	VISEntry
     36	ld	[%o0 + 0x00], %f0	! i0, k[0]
     37	ld	[%o0 + 0x04], %f1	! i1, k[1]
     38	ld	[%o0 + 0x08], %f2	! i2, k[2]
     39	ld	[%o0 + 0x0c], %f3	! i3, k[3]
     40	std	%f0, [%o1 + 0x00]	! k[0, 1]
     41	fsrc2	%f0, %f28
     42	std	%f2, [%o1 + 0x08]	! k[2, 3]
     43	cmp	%o2, 16
     44	be	10f
     45	 fsrc2	%f2, %f30
     46
     47	ld	[%o0 + 0x10], %f0
     48	ld	[%o0 + 0x14], %f1
     49	std	%f0, [%o1 + 0x20]	! k[8, 9]
     50	cmp	%o2, 24
     51	fone	%f10
     52	be,a	1f
     53	 fxor	%f10, %f0, %f2
     54	ld	[%o0 + 0x18], %f2
     55	ld	[%o0 + 0x1c], %f3
     561:
     57	std	%f2, [%o1 + 0x28]	! k[10, 11]
     58	fxor	%f28, %f0, %f0
     59	fxor	%f30, %f2, %f2
     60
     6110:
     62	sethi	%hi(SIGMA), %g3
     63	or	%g3, %lo(SIGMA), %g3
     64	ldd	[%g3 + 0x00], %f16
     65	ldd	[%g3 + 0x08], %f18
     66	ldd	[%g3 + 0x10], %f20
     67	ldd	[%g3 + 0x18], %f22
     68	ldd	[%g3 + 0x20], %f24
     69	ldd	[%g3 + 0x28], %f26
     70	CAMELLIA_F(16, 2, 0, 2)
     71	CAMELLIA_F(18, 0, 2, 0)
     72	fxor	%f28, %f0, %f0
     73	fxor	%f30, %f2, %f2
     74	CAMELLIA_F(20, 2, 0, 2)
     75	CAMELLIA_F(22, 0, 2, 0)
     76
     77#define ROTL128(S01, S23, TMP1, TMP2, N)	\
     78	srlx	S01, (64 - N), TMP1;		\
     79	sllx	S01, N, S01;			\
     80	srlx	S23, (64 - N), TMP2;		\
     81	sllx	S23, N, S23;			\
     82	or	S01, TMP2, S01;			\
     83	or	S23, TMP1, S23
     84
     85	cmp	%o2, 16
     86	bne	1f
     87	 nop
     88	/* 128-bit key */
     89	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
     90	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
     91	MOVDTOX_F0_O4
     92	MOVDTOX_F2_O5
     93	ROTL128(%o4, %o5, %g2, %g3, 15)
     94	stx	%o4, [%o1 + 0x30]	! k[12, 13]
     95	stx	%o5, [%o1 + 0x38]	! k[14, 15]
     96	ROTL128(%o4, %o5, %g2, %g3, 15)
     97	stx	%o4, [%o1 + 0x40]	! k[16, 17]
     98	stx	%o5, [%o1 + 0x48]	! k[18, 19]
     99	ROTL128(%o4, %o5, %g2, %g3, 15)
    100	stx	%o4, [%o1 + 0x60]	! k[24, 25]
    101	ROTL128(%o4, %o5, %g2, %g3, 15)
    102	stx	%o4, [%o1 + 0x70]	! k[28, 29]
    103	stx	%o5, [%o1 + 0x78]	! k[30, 31]
    104	ROTL128(%o4, %o5, %g2, %g3, 34)
    105	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
    106	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
    107	ROTL128(%o4, %o5, %g2, %g3, 17)
    108	stx	%o4, [%o1 + 0xc0]	! k[48, 49]
    109	stx	%o5, [%o1 + 0xc8]	! k[50, 51]
    110
    111	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
    112	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
    113	ROTL128(%o4, %o5, %g2, %g3, 15)
    114	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
    115	stx	%o5, [%o1 + 0x28]	! k[10, 11]
    116	ROTL128(%o4, %o5, %g2, %g3, 30)
    117	stx	%o4, [%o1 + 0x50]	! k[20, 21]
    118	stx	%o5, [%o1 + 0x58]	! k[22, 23]
    119	ROTL128(%o4, %o5, %g2, %g3, 15)
    120	stx	%o5, [%o1 + 0x68]	! k[26, 27]
    121	ROTL128(%o4, %o5, %g2, %g3, 17)
    122	stx	%o4, [%o1 + 0x80]	! k[32, 33]
    123	stx	%o5, [%o1 + 0x88]	! k[34, 35]
    124	ROTL128(%o4, %o5, %g2, %g3, 17)
    125	stx	%o4, [%o1 + 0x90]	! k[36, 37]
    126	stx	%o5, [%o1 + 0x98]	! k[38, 39]
    127	ROTL128(%o4, %o5, %g2, %g3, 17)
    128	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
    129	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
    130
    131	ba,pt	%xcc, 2f
    132	 mov	(3 * 16 * 4), %o0
    133
    1341:
    135	/* 192-bit or 256-bit key */
    136	std	%f0, [%o1 + 0x30]	! k[12, 13]
    137	std	%f2, [%o1 + 0x38]	! k[14, 15]
    138	ldd	[%o1 + 0x20], %f4	! k[ 8,  9]
    139	ldd	[%o1 + 0x28], %f6	! k[10, 11]
    140	fxor	%f0, %f4, %f0
    141	fxor	%f2, %f6, %f2
    142	CAMELLIA_F(24, 2, 0, 2)
    143	CAMELLIA_F(26, 0, 2, 0)
    144	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
    145	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
    146	MOVDTOX_F0_O4
    147	MOVDTOX_F2_O5
    148	ROTL128(%o4, %o5, %g2, %g3, 30)
    149	stx	%o4, [%o1 + 0x50]	! k[20, 21]
    150	stx	%o5, [%o1 + 0x58]	! k[22, 23]
    151	ROTL128(%o4, %o5, %g2, %g3, 30)
    152	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
    153	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
    154	ROTL128(%o4, %o5, %g2, %g3, 51)
    155	stx	%o4, [%o1 + 0x100]	! k[64, 65]
    156	stx	%o5, [%o1 + 0x108]	! k[66, 67]
    157	ldx	[%o1 + 0x20], %o4	! k[ 8,  9]
    158	ldx	[%o1 + 0x28], %o5	! k[10, 11]
    159	ROTL128(%o4, %o5, %g2, %g3, 15)
    160	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
    161	stx	%o5, [%o1 + 0x28]	! k[10, 11]
    162	ROTL128(%o4, %o5, %g2, %g3, 15)
    163	stx	%o4, [%o1 + 0x40]	! k[16, 17]
    164	stx	%o5, [%o1 + 0x48]	! k[18, 19]
    165	ROTL128(%o4, %o5, %g2, %g3, 30)
    166	stx	%o4, [%o1 + 0x90]	! k[36, 37]
    167	stx	%o5, [%o1 + 0x98]	! k[38, 39]
    168	ROTL128(%o4, %o5, %g2, %g3, 34)
    169	stx	%o4, [%o1 + 0xd0]	! k[52, 53]
    170	stx	%o5, [%o1 + 0xd8]	! k[54, 55]
    171	ldx	[%o1 + 0x30], %o4	! k[12, 13]
    172	ldx	[%o1 + 0x38], %o5	! k[14, 15]
    173	ROTL128(%o4, %o5, %g2, %g3, 15)
    174	stx	%o4, [%o1 + 0x30]	! k[12, 13]
    175	stx	%o5, [%o1 + 0x38]	! k[14, 15]
    176	ROTL128(%o4, %o5, %g2, %g3, 30)
    177	stx	%o4, [%o1 + 0x70]	! k[28, 29]
    178	stx	%o5, [%o1 + 0x78]	! k[30, 31]
    179	srlx	%o4, 32, %g2
    180	srlx	%o5, 32, %g3
    181	stw	%o4, [%o1 + 0xc0]	! k[48]
    182	stw	%g3, [%o1 + 0xc4]	! k[49]
    183	stw	%o5, [%o1 + 0xc8]	! k[50]
    184	stw	%g2, [%o1 + 0xcc]	! k[51]
    185	ROTL128(%o4, %o5, %g2, %g3, 49)
    186	stx	%o4, [%o1 + 0xe0]	! k[56, 57]
    187	stx	%o5, [%o1 + 0xe8]	! k[58, 59]
    188	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
    189	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
    190	ROTL128(%o4, %o5, %g2, %g3, 45)
    191	stx	%o4, [%o1 + 0x60]	! k[24, 25]
    192	stx	%o5, [%o1 + 0x68]	! k[26, 27]
    193	ROTL128(%o4, %o5, %g2, %g3, 15)
    194	stx	%o4, [%o1 + 0x80]	! k[32, 33]
    195	stx	%o5, [%o1 + 0x88]	! k[34, 35]
    196	ROTL128(%o4, %o5, %g2, %g3, 17)
    197	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
    198	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
    199	ROTL128(%o4, %o5, %g2, %g3, 34)
    200	stx	%o4, [%o1 + 0xf0]	! k[60, 61]
    201	stx	%o5, [%o1 + 0xf8]	! k[62, 63]
    202	mov	(4 * 16 * 4), %o0
    2032:
    204	add	%o1, %o0, %o1
    205	ldd	[%o1 + 0x00], %f0
    206	ldd	[%o1 + 0x08], %f2
    207	std	%f0, [%o3 + 0x00]
    208	std	%f2, [%o3 + 0x08]
    209	add	%o3, 0x10, %o3
    2101:
    211	sub	%o1, (16 * 4), %o1
    212	ldd	[%o1 + 0x38], %f0
    213	ldd	[%o1 + 0x30], %f2
    214	ldd	[%o1 + 0x28], %f4
    215	ldd	[%o1 + 0x20], %f6
    216	ldd	[%o1 + 0x18], %f8
    217	ldd	[%o1 + 0x10], %f10
    218	std	%f0, [%o3 + 0x00]
    219	std	%f2, [%o3 + 0x08]
    220	std	%f4, [%o3 + 0x10]
    221	std	%f6, [%o3 + 0x18]
    222	std	%f8, [%o3 + 0x20]
    223	std	%f10, [%o3 + 0x28]
    224
    225	ldd	[%o1 + 0x08], %f0
    226	ldd	[%o1 + 0x00], %f2
    227	std	%f0, [%o3 + 0x30]
    228	std	%f2, [%o3 + 0x38]
    229	subcc	%o0, (16 * 4), %o0
    230	bne,pt	%icc, 1b
    231	 add	%o3, (16 * 4), %o3
    232
    233	std	%f2, [%o3 - 0x10]
    234	std	%f0, [%o3 - 0x08]
    235
    236	retl
    237	 VISExit
    238ENDPROC(camellia_sparc64_key_expand)
    239
    240	.align	32
    241ENTRY(camellia_sparc64_crypt)
    242	/* %o0=key, %o1=input, %o2=output, %o3=key_len */
    243	VISEntry
    244
    245	ld	[%o1 + 0x00], %f0
    246	ld	[%o1 + 0x04], %f1
    247	ld	[%o1 + 0x08], %f2
    248	ld	[%o1 + 0x0c], %f3
    249
    250	ldd	[%o0 + 0x00], %f4
    251	ldd	[%o0 + 0x08], %f6
    252
    253	cmp	%o3, 16
    254	fxor	%f4, %f0, %f0
    255	be	1f
    256	 fxor	%f6, %f2, %f2
    257
    258	ldd	[%o0 + 0x10], %f8
    259	ldd	[%o0 + 0x18], %f10
    260	ldd	[%o0 + 0x20], %f12
    261	ldd	[%o0 + 0x28], %f14
    262	ldd	[%o0 + 0x30], %f16
    263	ldd	[%o0 + 0x38], %f18
    264	ldd	[%o0 + 0x40], %f20
    265	ldd	[%o0 + 0x48], %f22
    266	add	%o0, 0x40, %o0
    267
    268	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    269
    2701:
    271	ldd	[%o0 + 0x10], %f8
    272	ldd	[%o0 + 0x18], %f10
    273	ldd	[%o0 + 0x20], %f12
    274	ldd	[%o0 + 0x28], %f14
    275	ldd	[%o0 + 0x30], %f16
    276	ldd	[%o0 + 0x38], %f18
    277	ldd	[%o0 + 0x40], %f20
    278	ldd	[%o0 + 0x48], %f22
    279	ldd	[%o0 + 0x50], %f24
    280	ldd	[%o0 + 0x58], %f26
    281	ldd	[%o0 + 0x60], %f28
    282	ldd	[%o0 + 0x68], %f30
    283	ldd	[%o0 + 0x70], %f32
    284	ldd	[%o0 + 0x78], %f34
    285	ldd	[%o0 + 0x80], %f36
    286	ldd	[%o0 + 0x88], %f38
    287	ldd	[%o0 + 0x90], %f40
    288	ldd	[%o0 + 0x98], %f42
    289	ldd	[%o0 + 0xa0], %f44
    290	ldd	[%o0 + 0xa8], %f46
    291	ldd	[%o0 + 0xb0], %f48
    292	ldd	[%o0 + 0xb8], %f50
    293	ldd	[%o0 + 0xc0], %f52
    294	ldd	[%o0 + 0xc8], %f54
    295
    296	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    297	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
    298	CAMELLIA_6ROUNDS(40, 0, 2)
    299	fxor	%f52, %f2, %f2
    300	fxor	%f54, %f0, %f0
    301
    302	st	%f2, [%o2 + 0x00]
    303	st	%f3, [%o2 + 0x04]
    304	st	%f0, [%o2 + 0x08]
    305	st	%f1, [%o2 + 0x0c]
    306
    307	retl
    308	 VISExit
    309ENDPROC(camellia_sparc64_crypt)
    310
    311	.align	32
    312ENTRY(camellia_sparc64_load_keys)
    313	/* %o0=key, %o1=key_len */
    314	VISEntry
    315	ldd	[%o0 + 0x00], %f4
    316	ldd	[%o0 + 0x08], %f6
    317	ldd	[%o0 + 0x10], %f8
    318	ldd	[%o0 + 0x18], %f10
    319	ldd	[%o0 + 0x20], %f12
    320	ldd	[%o0 + 0x28], %f14
    321	ldd	[%o0 + 0x30], %f16
    322	ldd	[%o0 + 0x38], %f18
    323	ldd	[%o0 + 0x40], %f20
    324	ldd	[%o0 + 0x48], %f22
    325	ldd	[%o0 + 0x50], %f24
    326	ldd	[%o0 + 0x58], %f26
    327	ldd	[%o0 + 0x60], %f28
    328	ldd	[%o0 + 0x68], %f30
    329	ldd	[%o0 + 0x70], %f32
    330	ldd	[%o0 + 0x78], %f34
    331	ldd	[%o0 + 0x80], %f36
    332	ldd	[%o0 + 0x88], %f38
    333	ldd	[%o0 + 0x90], %f40
    334	ldd	[%o0 + 0x98], %f42
    335	ldd	[%o0 + 0xa0], %f44
    336	ldd	[%o0 + 0xa8], %f46
    337	ldd	[%o0 + 0xb0], %f48
    338	ldd	[%o0 + 0xb8], %f50
    339	ldd	[%o0 + 0xc0], %f52
    340	retl
    341	 ldd	[%o0 + 0xc8], %f54
    342ENDPROC(camellia_sparc64_load_keys)
    343
    344	.align	32
    345ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
    346	/* %o0=input, %o1=output, %o2=len, %o3=key */
    3471:	ldd	[%o0 + 0x00], %f0
    348	ldd	[%o0 + 0x08], %f2
    349	add	%o0, 0x10, %o0
    350	fxor	%f4, %f0, %f0
    351	fxor	%f6, %f2, %f2
    352	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    353	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
    354	CAMELLIA_6ROUNDS(40, 0, 2)
    355	fxor	%f52, %f2, %f2
    356	fxor	%f54, %f0, %f0
    357	std	%f2, [%o1 + 0x00]
    358	std	%f0, [%o1 + 0x08]
    359	subcc	%o2, 0x10, %o2
    360	bne,pt	%icc, 1b
    361	 add	%o1, 0x10, %o1
    362	retl
    363	 nop
    364ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
    365
    366	.align	32
    367ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
    368	/* %o0=input, %o1=output, %o2=len, %o3=key */
    3691:	ldd	[%o0 + 0x00], %f0
    370	ldd	[%o0 + 0x08], %f2
    371	add	%o0, 0x10, %o0
    372	fxor	%f4, %f0, %f0
    373	fxor	%f6, %f2, %f2
    374	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    375	ldd	[%o3 + 0xd0], %f8
    376	ldd	[%o3 + 0xd8], %f10
    377	ldd	[%o3 + 0xe0], %f12
    378	ldd	[%o3 + 0xe8], %f14
    379	ldd	[%o3 + 0xf0], %f16
    380	ldd	[%o3 + 0xf8], %f18
    381	ldd	[%o3 + 0x100], %f20
    382	ldd	[%o3 + 0x108], %f22
    383	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
    384	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
    385	CAMELLIA_F(8, 2, 0, 2)
    386	CAMELLIA_F(10, 0, 2, 0)
    387	ldd	[%o3 + 0x10], %f8
    388	ldd	[%o3 + 0x18], %f10
    389	CAMELLIA_F(12, 2, 0, 2)
    390	CAMELLIA_F(14, 0, 2, 0)
    391	ldd	[%o3 + 0x20], %f12
    392	ldd	[%o3 + 0x28], %f14
    393	CAMELLIA_F(16, 2, 0, 2)
    394	CAMELLIA_F(18, 0, 2, 0)
    395	ldd	[%o3 + 0x30], %f16
    396	ldd	[%o3 + 0x38], %f18
    397	fxor	%f20, %f2, %f2
    398	fxor	%f22, %f0, %f0
    399	ldd	[%o3 + 0x40], %f20
    400	ldd	[%o3 + 0x48], %f22
    401	std	%f2, [%o1 + 0x00]
    402	std	%f0, [%o1 + 0x08]
    403	subcc	%o2, 0x10, %o2
    404	bne,pt	%icc, 1b
    405	 add	%o1, 0x10, %o1
    406	retl
    407	 nop
    408ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
    409
    410	.align	32
    411ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
    412	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
    413	ldd	[%o4 + 0x00], %f60
    414	ldd	[%o4 + 0x08], %f62
    4151:	ldd	[%o0 + 0x00], %f0
    416	ldd	[%o0 + 0x08], %f2
    417	add	%o0, 0x10, %o0
    418	fxor	%f60, %f0, %f0
    419	fxor	%f62, %f2, %f2
    420	fxor	%f4, %f0, %f0
    421	fxor	%f6, %f2, %f2
    422	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    423	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
    424	CAMELLIA_6ROUNDS(40, 0, 2)
    425	fxor	%f52, %f2, %f60
    426	fxor	%f54, %f0, %f62
    427	std	%f60, [%o1 + 0x00]
    428	std	%f62, [%o1 + 0x08]
    429	subcc	%o2, 0x10, %o2
    430	bne,pt	%icc, 1b
    431	 add	%o1, 0x10, %o1
    432	std	%f60, [%o4 + 0x00]
    433	retl
    434	 std	%f62, [%o4 + 0x08]
    435ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
    436
    437	.align	32
    438ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
    439	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
    440	ldd	[%o4 + 0x00], %f60
    441	ldd	[%o4 + 0x08], %f62
    4421:	ldd	[%o0 + 0x00], %f0
    443	ldd	[%o0 + 0x08], %f2
    444	add	%o0, 0x10, %o0
    445	fxor	%f60, %f0, %f0
    446	fxor	%f62, %f2, %f2
    447	fxor	%f4, %f0, %f0
    448	fxor	%f6, %f2, %f2
    449	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    450	ldd	[%o3 + 0xd0], %f8
    451	ldd	[%o3 + 0xd8], %f10
    452	ldd	[%o3 + 0xe0], %f12
    453	ldd	[%o3 + 0xe8], %f14
    454	ldd	[%o3 + 0xf0], %f16
    455	ldd	[%o3 + 0xf8], %f18
    456	ldd	[%o3 + 0x100], %f20
    457	ldd	[%o3 + 0x108], %f22
    458	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
    459	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
    460	CAMELLIA_F(8, 2, 0, 2)
    461	CAMELLIA_F(10, 0, 2, 0)
    462	ldd	[%o3 + 0x10], %f8
    463	ldd	[%o3 + 0x18], %f10
    464	CAMELLIA_F(12, 2, 0, 2)
    465	CAMELLIA_F(14, 0, 2, 0)
    466	ldd	[%o3 + 0x20], %f12
    467	ldd	[%o3 + 0x28], %f14
    468	CAMELLIA_F(16, 2, 0, 2)
    469	CAMELLIA_F(18, 0, 2, 0)
    470	ldd	[%o3 + 0x30], %f16
    471	ldd	[%o3 + 0x38], %f18
    472	fxor	%f20, %f2, %f60
    473	fxor	%f22, %f0, %f62
    474	ldd	[%o3 + 0x40], %f20
    475	ldd	[%o3 + 0x48], %f22
    476	std	%f60, [%o1 + 0x00]
    477	std	%f62, [%o1 + 0x08]
    478	subcc	%o2, 0x10, %o2
    479	bne,pt	%icc, 1b
    480	 add	%o1, 0x10, %o1
    481	std	%f60, [%o4 + 0x00]
    482	retl
    483	 std	%f62, [%o4 + 0x08]
    484ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
    485
    486	.align	32
    487ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
    488	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
    489	ldd	[%o4 + 0x00], %f60
    490	ldd	[%o4 + 0x08], %f62
    4911:	ldd	[%o0 + 0x00], %f56
    492	ldd	[%o0 + 0x08], %f58
    493	add	%o0, 0x10, %o0
    494	fxor	%f4, %f56, %f0
    495	fxor	%f6, %f58, %f2
    496	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    497	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
    498	CAMELLIA_6ROUNDS(40, 0, 2)
    499	fxor	%f52, %f2, %f2
    500	fxor	%f54, %f0, %f0
    501	fxor	%f60, %f2, %f2
    502	fxor	%f62, %f0, %f0
    503	fsrc2	%f56, %f60
    504	fsrc2	%f58, %f62
    505	std	%f2, [%o1 + 0x00]
    506	std	%f0, [%o1 + 0x08]
    507	subcc	%o2, 0x10, %o2
    508	bne,pt	%icc, 1b
    509	 add	%o1, 0x10, %o1
    510	std	%f60, [%o4 + 0x00]
    511	retl
    512	 std	%f62, [%o4 + 0x08]
    513ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
    514
    515	.align	32
    516ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
    517	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
    518	ldd	[%o4 + 0x00], %f60
    519	ldd	[%o4 + 0x08], %f62
    5201:	ldd	[%o0 + 0x00], %f56
    521	ldd	[%o0 + 0x08], %f58
    522	add	%o0, 0x10, %o0
    523	fxor	%f4, %f56, %f0
    524	fxor	%f6, %f58, %f2
    525	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
    526	ldd	[%o3 + 0xd0], %f8
    527	ldd	[%o3 + 0xd8], %f10
    528	ldd	[%o3 + 0xe0], %f12
    529	ldd	[%o3 + 0xe8], %f14
    530	ldd	[%o3 + 0xf0], %f16
    531	ldd	[%o3 + 0xf8], %f18
    532	ldd	[%o3 + 0x100], %f20
    533	ldd	[%o3 + 0x108], %f22
    534	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
    535	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
    536	CAMELLIA_F(8, 2, 0, 2)
    537	CAMELLIA_F(10, 0, 2, 0)
    538	ldd	[%o3 + 0x10], %f8
    539	ldd	[%o3 + 0x18], %f10
    540	CAMELLIA_F(12, 2, 0, 2)
    541	CAMELLIA_F(14, 0, 2, 0)
    542	ldd	[%o3 + 0x20], %f12
    543	ldd	[%o3 + 0x28], %f14
    544	CAMELLIA_F(16, 2, 0, 2)
    545	CAMELLIA_F(18, 0, 2, 0)
    546	ldd	[%o3 + 0x30], %f16
    547	ldd	[%o3 + 0x38], %f18
    548	fxor	%f20, %f2, %f2
    549	fxor	%f22, %f0, %f0
    550	ldd	[%o3 + 0x40], %f20
    551	ldd	[%o3 + 0x48], %f22
    552	fxor	%f60, %f2, %f2
    553	fxor	%f62, %f0, %f0
    554	fsrc2	%f56, %f60
    555	fsrc2	%f58, %f62
    556	std	%f2, [%o1 + 0x00]
    557	std	%f0, [%o1 + 0x08]
    558	subcc	%o2, 0x10, %o2
    559	bne,pt	%icc, 1b
    560	 add	%o1, 0x10, %o1
    561	std	%f60, [%o4 + 0x00]
    562	retl
    563	 std	%f62, [%o4 + 0x08]
    564ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)