cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

lusercopy.S (8979B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 *    User Space Access Routines
      4 *
      5 *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
      6 *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
      7 *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
      8 *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
      9 *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
     10 *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
     11 */
     12
     13/*
     14 * These routines still have plenty of room for optimization
     15 * (word & doubleword load/store, dual issue, store hints, etc.).
     16 */
     17
     18/*
     19 * The following routines assume that space register 3 (sr3) contains
     20 * the space id associated with the current users address space.
     21 */
     22
     23
     24	.text
     25	
     26#include <asm/assembly.h>
     27#include <asm/errno.h>
     28#include <linux/linkage.h>
     29
     30	/*
     31	 * unsigned long lclear_user(void *to, unsigned long n)
     32	 *
     33	 * Returns 0 for success.
     34	 * otherwise, returns number of bytes not transferred.
     35	 */
     36
     37ENTRY_CFI(lclear_user)
     38	comib,=,n   0,%r25,$lclu_done
     39$lclu_loop:
     40	addib,<>    -1,%r25,$lclu_loop
     411:	stbs,ma     %r0,1(%sr3,%r26)
     42
     43$lclu_done:
     44	bv          %r0(%r2)
     45	copy        %r25,%r28
     46
     472:	b           $lclu_done
     48	ldo         1(%r25),%r25
     49
     50	ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
     51ENDPROC_CFI(lclear_user)
     52
     53
     54/*
     55 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
     56 *
     57 * Inputs:
     58 * - sr1 already contains space of source region
     59 * - sr2 already contains space of destination region
     60 *
     61 * Returns:
     62 * - number of bytes that could not be copied.
     63 *   On success, this will be zero.
     64 *
     65 * This code is based on a C-implementation of a copy routine written by
     66 * Randolph Chung, which in turn was derived from the glibc.
     67 *
     68 * Several strategies are tried to try to get the best performance for various
     69 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
     70 * at a time using general registers.  Unaligned copies are handled either by
     71 * aligning the destination and then using shift-and-write method, or in a few
     72 * cases by falling back to a byte-at-a-time copy.
     73 *
     74 * Testing with various alignments and buffer sizes shows that this code is
     75 * often >10x faster than a simple byte-at-a-time copy, even for strangely
     76 * aligned operands. It is interesting to note that the glibc version of memcpy
     77 * (written in C) is actually quite fast already. This routine is able to beat
     78 * it by 30-40% for aligned copies because of the loop unrolling, but in some
     79 * cases the glibc version is still slightly faster. This lends more
     80 * credibility that gcc can generate very good code as long as we are careful.
     81 *
     82 * Possible optimizations:
     83 * - add cache prefetching
     84 * - try not to use the post-increment address modifiers; they may create
     85 *   additional interlocks. Assumption is that those were only efficient on old
     86 *   machines (pre PA8000 processors)
     87 */
     88
     89	dst = arg0
     90	src = arg1
     91	len = arg2
     92	end = arg3
     93	t1  = r19
     94	t2  = r20
     95	t3  = r21
     96	t4  = r22
     97	srcspc = sr1
     98	dstspc = sr2
     99
    100	t0 = r1
    101	a1 = t1
    102	a2 = t2
    103	a3 = t3
    104	a0 = t4
    105
    106	save_src = ret0
    107	save_dst = ret1
    108	save_len = r31
    109
    110ENTRY_CFI(pa_memcpy)
    111	/* Last destination address */
    112	add	dst,len,end
    113
    114	/* short copy with less than 16 bytes? */
    115	cmpib,COND(>>=),n 15,len,.Lbyte_loop
    116
    117	/* same alignment? */
    118	xor	src,dst,t0
    119	extru	t0,31,2,t1
    120	cmpib,<>,n  0,t1,.Lunaligned_copy
    121
    122#ifdef CONFIG_64BIT
    123	/* only do 64-bit copies if we can get aligned. */
    124	extru	t0,31,3,t1
    125	cmpib,<>,n  0,t1,.Lalign_loop32
    126
    127	/* loop until we are 64-bit aligned */
    128.Lalign_loop64:
    129	extru	dst,31,3,t1
    130	cmpib,=,n	0,t1,.Lcopy_loop_16_start
    13120:	ldb,ma	1(srcspc,src),t1
    13221:	stb,ma	t1,1(dstspc,dst)
    133	b	.Lalign_loop64
    134	ldo	-1(len),len
    135
    136	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
    137	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
    138
    139.Lcopy_loop_16_start:
    140	ldi	31,t0
    141.Lcopy_loop_16:
    142	cmpb,COND(>>=),n t0,len,.Lword_loop
    143
    14410:	ldd	0(srcspc,src),t1
    14511:	ldd	8(srcspc,src),t2
    146	ldo	16(src),src
    14712:	std,ma	t1,8(dstspc,dst)
    14813:	std,ma	t2,8(dstspc,dst)
    14914:	ldd	0(srcspc,src),t1
    15015:	ldd	8(srcspc,src),t2
    151	ldo	16(src),src
    15216:	std,ma	t1,8(dstspc,dst)
    15317:	std,ma	t2,8(dstspc,dst)
    154
    155	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
    156	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
    157	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
    158	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
    159	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
    160	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
    161	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
    162	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
    163
    164	b	.Lcopy_loop_16
    165	ldo	-32(len),len
    166
    167.Lword_loop:
    168	cmpib,COND(>>=),n 3,len,.Lbyte_loop
    16920:	ldw,ma	4(srcspc,src),t1
    17021:	stw,ma	t1,4(dstspc,dst)
    171	b	.Lword_loop
    172	ldo	-4(len),len
    173
    174	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
    175	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
    176
    177#endif /* CONFIG_64BIT */
    178
    179	/* loop until we are 32-bit aligned */
    180.Lalign_loop32:
    181	extru	dst,31,2,t1
    182	cmpib,=,n	0,t1,.Lcopy_loop_8
    18320:	ldb,ma	1(srcspc,src),t1
    18421:	stb,ma	t1,1(dstspc,dst)
    185	b	.Lalign_loop32
    186	ldo	-1(len),len
    187
    188	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
    189	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
    190
    191
    192.Lcopy_loop_8:
    193	cmpib,COND(>>=),n 15,len,.Lbyte_loop
    194
    19510:	ldw	0(srcspc,src),t1
    19611:	ldw	4(srcspc,src),t2
    19712:	stw,ma	t1,4(dstspc,dst)
    19813:	stw,ma	t2,4(dstspc,dst)
    19914:	ldw	8(srcspc,src),t1
    20015:	ldw	12(srcspc,src),t2
    201	ldo	16(src),src
    20216:	stw,ma	t1,4(dstspc,dst)
    20317:	stw,ma	t2,4(dstspc,dst)
    204
    205	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
    206	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
    207	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
    208	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
    209	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
    210	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
    211	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
    212	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
    213
    214	b	.Lcopy_loop_8
    215	ldo	-16(len),len
    216
    217.Lbyte_loop:
    218	cmpclr,COND(<>) len,%r0,%r0
    219	b,n	.Lcopy_done
    22020:	ldb	0(srcspc,src),t1
    221	ldo	1(src),src
    22221:	stb,ma	t1,1(dstspc,dst)
    223	b	.Lbyte_loop
    224	ldo	-1(len),len
    225
    226	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
    227	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
    228
    229.Lcopy_done:
    230	bv	%r0(%r2)
    231	sub	end,dst,ret0
    232
    233
    234	/* src and dst are not aligned the same way. */
    235	/* need to go the hard way */
    236.Lunaligned_copy:
    237	/* align until dst is 32bit-word-aligned */
    238	extru	dst,31,2,t1
    239	cmpib,=,n	0,t1,.Lcopy_dstaligned
    24020:	ldb	0(srcspc,src),t1
    241	ldo	1(src),src
    24221:	stb,ma	t1,1(dstspc,dst)
    243	b	.Lunaligned_copy
    244	ldo	-1(len),len
    245
    246	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
    247	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
    248
    249.Lcopy_dstaligned:
    250
    251	/* store src, dst and len in safe place */
    252	copy	src,save_src
    253	copy	dst,save_dst
    254	copy	len,save_len
    255
    256	/* len now needs give number of words to copy */
    257	SHRREG	len,2,len
    258
    259	/*
    260	 * Copy from a not-aligned src to an aligned dst using shifts.
    261	 * Handles 4 words per loop.
    262	 */
    263
    264	depw,z src,28,2,t0
    265	subi 32,t0,t0
    266	mtsar t0
    267	extru len,31,2,t0
    268	cmpib,= 2,t0,.Lcase2
    269	/* Make src aligned by rounding it down.  */
    270	depi 0,31,2,src
    271
    272	cmpiclr,<> 3,t0,%r0
    273	b,n .Lcase3
    274	cmpiclr,<> 1,t0,%r0
    275	b,n .Lcase1
    276.Lcase0:
    277	cmpb,COND(=) %r0,len,.Lcda_finish
    278	nop
    279
    2801:	ldw,ma 4(srcspc,src), a3
    281	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    2821:	ldw,ma 4(srcspc,src), a0
    283	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    284	b,n .Ldo3
    285.Lcase1:
    2861:	ldw,ma 4(srcspc,src), a2
    287	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    2881:	ldw,ma 4(srcspc,src), a3
    289	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    290	ldo -1(len),len
    291	cmpb,COND(=),n %r0,len,.Ldo0
    292.Ldo4:
    2931:	ldw,ma 4(srcspc,src), a0
    294	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    295	shrpw a2, a3, %sar, t0
    2961:	stw,ma t0, 4(dstspc,dst)
    297	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
    298.Ldo3:
    2991:	ldw,ma 4(srcspc,src), a1
    300	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    301	shrpw a3, a0, %sar, t0
    3021:	stw,ma t0, 4(dstspc,dst)
    303	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
    304.Ldo2:
    3051:	ldw,ma 4(srcspc,src), a2
    306	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    307	shrpw a0, a1, %sar, t0
    3081:	stw,ma t0, 4(dstspc,dst)
    309	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
    310.Ldo1:
    3111:	ldw,ma 4(srcspc,src), a3
    312	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    313	shrpw a1, a2, %sar, t0
    3141:	stw,ma t0, 4(dstspc,dst)
    315	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
    316	ldo -4(len),len
    317	cmpb,COND(<>) %r0,len,.Ldo4
    318	nop
    319.Ldo0:
    320	shrpw a2, a3, %sar, t0
    3211:	stw,ma t0, 4(dstspc,dst)
    322	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
    323
    324.Lcda_rdfault:
    325.Lcda_finish:
    326	/* calculate new src, dst and len and jump to byte-copy loop */
    327	sub	dst,save_dst,t0
    328	add	save_src,t0,src
    329	b	.Lbyte_loop
    330	sub	save_len,t0,len
    331
    332.Lcase3:
    3331:	ldw,ma 4(srcspc,src), a0
    334	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    3351:	ldw,ma 4(srcspc,src), a1
    336	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    337	b .Ldo2
    338	ldo 1(len),len
    339.Lcase2:
    3401:	ldw,ma 4(srcspc,src), a1
    341	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    3421:	ldw,ma 4(srcspc,src), a2
    343	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
    344	b .Ldo1
    345	ldo 2(len),len
    346
    347
    348	/* fault exception fixup handlers: */
    349#ifdef CONFIG_64BIT
    350.Lcopy16_fault:
    351	b	.Lcopy_done
    35210:	std,ma	t1,8(dstspc,dst)
    353	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
    354#endif
    355
    356.Lcopy8_fault:
    357	b	.Lcopy_done
    35810:	stw,ma	t1,4(dstspc,dst)
    359	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
    360ENDPROC_CFI(pa_memcpy)
    361
    362	.end