cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

copy_page.S (6226B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * copy_page, __copy_user_page, __copy_user implementation of SuperH
      4 *
      5 * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
      6 * Copyright (C) 2002  Toshinobu Sugioka
      7 * Copyright (C) 2006  Paul Mundt
      8 */
      9#include <linux/linkage.h>
     10#include <asm/page.h>
     11
     12/*
     13 * copy_page
     14 * @to: P1 address
     15 * @from: P1 address
     16 *
     17 * void copy_page(void *to, void *from)
     18 */
     19
     20/*
     21 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 
     22 * r8 --- from + PAGE_SIZE
     23 * r9 --- not used
     24 * r10 --- to
     25 * r11 --- from
     26 */
     27ENTRY(copy_page)
     28	mov.l	r8,@-r15
     29	mov.l	r10,@-r15
     30	mov.l	r11,@-r15
     31	mov	r4,r10
     32	mov	r5,r11
     33	mov	r5,r8
     34	mov	#(PAGE_SIZE >> 10), r0
     35	shll8	r0
     36	shll2	r0
     37	add	r0,r8
     38	!
     391:	mov.l	@r11+,r0
     40	mov.l	@r11+,r1
     41	mov.l	@r11+,r2
     42	mov.l	@r11+,r3
     43	mov.l	@r11+,r4
     44	mov.l	@r11+,r5
     45	mov.l	@r11+,r6
     46	mov.l	@r11+,r7
     47#if defined(CONFIG_CPU_SH4)
     48	movca.l	r0,@r10
     49#else
     50	mov.l	r0,@r10
     51#endif
     52	add	#32,r10
     53	mov.l	r7,@-r10
     54	mov.l	r6,@-r10
     55	mov.l	r5,@-r10
     56	mov.l	r4,@-r10
     57	mov.l	r3,@-r10
     58	mov.l	r2,@-r10
     59	mov.l	r1,@-r10
     60	cmp/eq	r11,r8
     61	bf/s	1b
     62	 add	#28,r10
     63	!
     64	mov.l	@r15+,r11
     65	mov.l	@r15+,r10
     66	mov.l	@r15+,r8
     67	rts
     68	 nop
     69
     70/*
     71 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
     72 * Return the number of bytes NOT copied
     73 */
     74#define EX(...)			\
     75	9999: __VA_ARGS__ ;		\
     76	.section __ex_table, "a";	\
     77	.long 9999b, 6000f	;	\
     78	.previous
     79#define EX_NO_POP(...)			\
     80	9999: __VA_ARGS__ ;		\
     81	.section __ex_table, "a";	\
     82	.long 9999b, 6005f	;	\
     83	.previous
     84ENTRY(__copy_user)
     85	! Check if small number of bytes
     86	mov	#11,r0
     87	mov	r4,r3
     88	cmp/gt	r0,r6		! r6 (len) > r0 (11)
     89	bf/s	.L_cleanup_loop_no_pop
     90	 add	r6,r3		! last destination address
     91
     92	! Calculate bytes needed to align to src
     93	mov.l	r11,@-r15
     94	neg	r5,r0
     95	mov.l	r10,@-r15
     96	add	#4,r0
     97	mov.l	r9,@-r15
     98	and	#3,r0
     99	mov.l	r8,@-r15
    100	tst	r0,r0
    101	bt	2f
    102
    1031:
    104	! Copy bytes to long word align src
    105EX(	mov.b	@r5+,r1		)
    106	dt	r0
    107	add	#-1,r6
    108EX(	mov.b	r1,@r4		)
    109	bf/s	1b
    110	 add	#1,r4
    111
    112	! Jump to appropriate routine depending on dest
    1132:	mov	#3,r1
    114	mov	r6, r2
    115	and	r4,r1
    116	shlr2	r2
    117	shll2	r1
    118	mova	.L_jump_tbl,r0
    119	mov.l	@(r0,r1),r1
    120	jmp	@r1
    121	 nop
    122
    123	.align 2
    124.L_jump_tbl:
    125	.long	.L_dest00
    126	.long	.L_dest01
    127	.long	.L_dest10
    128	.long	.L_dest11
    129
    130/*
    131 * Come here if there are less than 12 bytes to copy
    132 *
    133 * Keep the branch target close, so the bf/s callee doesn't overflow
    134 * and result in a more expensive branch being inserted. This is the
    135 * fast-path for small copies, the jump via the jump table will hit the
    136 * default slow-path cleanup. -PFM.
    137 */
    138.L_cleanup_loop_no_pop:
    139	tst	r6,r6		! Check explicitly for zero
    140	bt	1f
    141
    1422:
    143EX_NO_POP(	mov.b	@r5+,r0		)
    144	dt	r6
    145EX_NO_POP(	mov.b	r0,@r4		)
    146	bf/s	2b
    147	 add	#1,r4
    148
    1491:	mov	#0,r0		! normal return
    1505000:
    151
    152# Exception handler:
    153.section .fixup, "ax"
    1546005:
    155	mov.l	8000f,r1
    156	mov	r3,r0
    157	jmp	@r1
    158	 sub	r4,r0
    159	.align	2
    1608000:	.long	5000b
    161
    162.previous
    163	rts
    164	 nop
    165
    166! Destination = 00
    167
    168.L_dest00:
    169	! Skip the large copy for small transfers
    170	mov	#(32+32-4), r0
    171	cmp/gt	r6, r0		! r0 (60) > r6 (len)
    172	bt	1f
    173
    174	! Align dest to a 32 byte boundary
    175	neg	r4,r0
    176	add	#0x20, r0
    177	and	#0x1f, r0
    178	tst	r0, r0
    179	bt	2f
    180
    181	sub	r0, r6
    182	shlr2	r0
    1833:
    184EX(	mov.l	@r5+,r1		)
    185	dt	r0
    186EX(	mov.l	r1,@r4		)
    187	bf/s	3b
    188	 add	#4,r4
    189
    1902:
    191EX(	mov.l	@r5+,r0		)
    192EX(	mov.l	@r5+,r1		)
    193EX(	mov.l	@r5+,r2		)
    194EX(	mov.l	@r5+,r7		)
    195EX(	mov.l	@r5+,r8		)
    196EX(	mov.l	@r5+,r9		)
    197EX(	mov.l	@r5+,r10	)
    198EX(	mov.l	@r5+,r11	)
    199#ifdef CONFIG_CPU_SH4
    200EX(	movca.l	r0,@r4		)
    201#else
    202EX(	mov.l	r0,@r4		)
    203#endif
    204	add	#-32, r6
    205EX(	mov.l	r1,@(4,r4)	)
    206	mov	#32, r0
    207EX(	mov.l	r2,@(8,r4)	)
    208	cmp/gt	r6, r0		! r0 (32) > r6 (len)
    209EX(	mov.l	r7,@(12,r4)	)
    210EX(	mov.l	r8,@(16,r4)	)
    211EX(	mov.l	r9,@(20,r4)	)
    212EX(	mov.l	r10,@(24,r4)	)
    213EX(	mov.l	r11,@(28,r4)	)
    214	bf/s	2b
    215	 add	#32,r4
    216
    2171:	mov	r6, r0
    218	shlr2	r0
    219	tst	r0, r0
    220	bt	.L_cleanup
    2211:
    222EX(	mov.l	@r5+,r1		)
    223	dt	r0
    224EX(	mov.l	r1,@r4		)
    225	bf/s	1b
    226	 add	#4,r4
    227
    228	bra	.L_cleanup
    229	 nop
    230
    231! Destination = 10
    232
    233.L_dest10:
    234	mov	r2,r7
    235	shlr2	r7
    236	shlr	r7
    237	tst	r7,r7
    238	mov	#7,r0
    239	bt/s	1f
    240	 and	r0,r2
    2412:
    242	dt	r7
    243#ifdef CONFIG_CPU_LITTLE_ENDIAN
    244EX(	mov.l	@r5+,r0		)
    245EX(	mov.l	@r5+,r1		)
    246EX(	mov.l	@r5+,r8		)
    247EX(	mov.l	@r5+,r9		)
    248EX(	mov.l	@r5+,r10	)
    249EX(	mov.w	r0,@r4		)
    250	add	#2,r4
    251	xtrct	r1,r0
    252	xtrct	r8,r1
    253	xtrct	r9,r8
    254	xtrct	r10,r9
    255
    256EX(	mov.l	r0,@r4		)
    257EX(	mov.l	r1,@(4,r4)	)
    258EX(	mov.l	r8,@(8,r4)	)
    259EX(	mov.l	r9,@(12,r4)	)
    260
    261EX(	mov.l	@r5+,r1		)
    262EX(	mov.l	@r5+,r8		)
    263EX(	mov.l	@r5+,r0		)
    264	xtrct	r1,r10
    265	xtrct	r8,r1
    266	xtrct	r0,r8
    267	shlr16	r0
    268EX(	mov.l	r10,@(16,r4)	)
    269EX(	mov.l	r1,@(20,r4)	)
    270EX(	mov.l	r8,@(24,r4)	)
    271EX(	mov.w	r0,@(28,r4)	)
    272	bf/s	2b
    273	 add	#30,r4
    274#else
    275EX(	mov.l	@(28,r5),r0	)
    276EX(	mov.l	@(24,r5),r8	)
    277EX(	mov.l	@(20,r5),r9	)
    278EX(	mov.l	@(16,r5),r10	)
    279EX(	mov.w	r0,@(30,r4)	)
    280	add	#-2,r4
    281	xtrct	r8,r0
    282	xtrct	r9,r8
    283	xtrct	r10,r9
    284EX(	mov.l	r0,@(28,r4)	)
    285EX(	mov.l	r8,@(24,r4)	)
    286EX(	mov.l	r9,@(20,r4)	)
    287
    288EX(	mov.l	@(12,r5),r0	)
    289EX(	mov.l	@(8,r5),r8	)
    290	xtrct	r0,r10
    291EX(	mov.l	@(4,r5),r9	)
    292	mov.l	r10,@(16,r4)
    293EX(	mov.l	@r5,r10		)
    294	xtrct	r8,r0
    295	xtrct	r9,r8
    296	xtrct	r10,r9
    297EX(	mov.l	r0,@(12,r4)	)
    298EX(	mov.l	r8,@(8,r4)	)
    299	swap.w	r10,r0
    300EX(	mov.l	r9,@(4,r4)	)
    301EX(	mov.w	r0,@(2,r4)	)
    302
    303	add	#32,r5
    304	bf/s	2b
    305	 add	#34,r4
    306#endif
    307	tst	r2,r2
    308	bt	.L_cleanup
    309
    3101:	! Read longword, write two words per iteration
    311EX(	mov.l	@r5+,r0		)
    312	dt	r2
    313#ifdef CONFIG_CPU_LITTLE_ENDIAN
    314EX(	mov.w	r0,@r4		)
    315	shlr16	r0
    316EX(	mov.w 	r0,@(2,r4)	)
    317#else
    318EX(	mov.w	r0,@(2,r4)	)
    319	shlr16	r0
    320EX(	mov.w	r0,@r4		)
    321#endif
    322	bf/s	1b
    323	 add	#4,r4
    324
    325	bra	.L_cleanup
    326	 nop
    327
    328! Destination = 01 or 11
    329
    330.L_dest01:
    331.L_dest11:
    332	! Read longword, write byte, word, byte per iteration
    333EX(	mov.l	@r5+,r0		)
    334	dt	r2
    335#ifdef CONFIG_CPU_LITTLE_ENDIAN
    336EX(	mov.b	r0,@r4		)
    337	shlr8	r0
    338	add	#1,r4
    339EX(	mov.w	r0,@r4		)
    340	shlr16	r0
    341EX(	mov.b	r0,@(2,r4)	)
    342	bf/s	.L_dest01
    343	 add	#3,r4
    344#else
    345EX(	mov.b	r0,@(3,r4)	)
    346	shlr8	r0
    347	swap.w	r0,r7
    348EX(	mov.b	r7,@r4		)
    349	add	#1,r4
    350EX(	mov.w	r0,@r4		)
    351	bf/s	.L_dest01
    352	 add	#3,r4
    353#endif
    354
    355! Cleanup last few bytes
    356.L_cleanup:
    357	mov	r6,r0
    358	and	#3,r0
    359	tst	r0,r0
    360	bt	.L_exit
    361	mov	r0,r6
    362
    363.L_cleanup_loop:
    364EX(	mov.b	@r5+,r0		)
    365	dt	r6
    366EX(	mov.b	r0,@r4		)
    367	bf/s	.L_cleanup_loop
    368	 add	#1,r4
    369
    370.L_exit:
    371	mov	#0,r0		! normal return
    372
    3735000:
    374
    375# Exception handler:
    376.section .fixup, "ax"
    3776000:
    378	mov.l	8000f,r1
    379	mov	r3,r0
    380	jmp	@r1
    381	 sub	r4,r0
    382	.align	2
    3838000:	.long	5000b
    384
    385.previous
    386	mov.l	@r15+,r8
    387	mov.l	@r15+,r9
    388	mov.l	@r15+,r10
    389	rts
    390	 mov.l	@r15+,r11