usercopy.S - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
usercopy.S (7519B)
      1/*
      2 *  arch/xtensa/lib/usercopy.S
      3 *
      4 *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
      5 *
      6 *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
      7 *  It needs to remain separate and distinct.  The hal files are part
      8 *  of the Xtensa link-time HAL, and those files may differ per
      9 *  processor configuration.  Patching the kernel for another
     10 *  processor configuration includes replacing the hal files, and we
     11 *  could lose the special functionality for accessing user-space
     12 *  memory during such a patch.  We sacrifice a little code space here
     13 *  in favor to simplify code maintenance.
     14 *
     15 *  This file is subject to the terms and conditions of the GNU General
     16 *  Public License.  See the file "COPYING" in the main directory of
     17 *  this archive for more details.
     18 *
     19 *  Copyright (C) 2002 Tensilica Inc.
     20 */
     21
     22
     23/*
     24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
     25 *
     26 * The returned value is the number of bytes not copied.  Implies zero
     27 * is success.
     28 *
     29 * The general case algorithm is as follows:
     30 *   If the destination and source are both aligned,
     31 *     do 16B chunks with a loop, and then finish up with
     32 *     8B, 4B, 2B, and 1B copies conditional on the length.
     33 *   If destination is aligned and source unaligned,
     34 *     do the same, but use SRC to align the source data.
     35 *   If destination is unaligned, align it by conditionally
     36 *     copying 1B and 2B and then retest.
     37 *   This code tries to use fall-through braches for the common
     38 *     case of aligned destinations (except for the branches to
     39 *     the alignment label).
     40 *
     41 * Register use:
     42 *	a0/ return address
     43 *	a1/ stack pointer
     44 *	a2/ return value
     45 *	a3/ src
     46 *	a4/ length
     47 *	a5/ dst
     48 *	a6/ tmp
     49 *	a7/ tmp
     50 *	a8/ tmp
     51 *	a9/ tmp
     52 *	a10/ tmp
     53 *	a11/ original length
     54 */
     55
     56#include <linux/linkage.h>
     57#include <asm/asmmacro.h>
     58#include <asm/core.h>
     59
     60	.text
     61ENTRY(__xtensa_copy_user)
     62
     63#if !XCHAL_HAVE_LOOPS && defined(__XTENSA_CALL0_ABI__)
     64#define STACK_SIZE 4
     65#else
     66#define STACK_SIZE 0
     67#endif
     68	abi_entry(STACK_SIZE)
     69	# a2/ dst, a3/ src, a4/ len
     70	mov	a5, a2		# copy dst so that a2 is return value
     71	mov	a11, a4		# preserve original len for error case
     72.Lcommon:
     73	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
     74	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
     75.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
     76	srli	a7, a4, 4	# number of loop iterations with 16B
     77				# per iteration
     78	movi	a8, 3		  # if source is also aligned,
     79	bnone	a3, a8, .Laligned # then use word copy
     80	__ssa8	a3		# set shift amount from byte offset
     81	bnez	a4, .Lsrcunaligned
     82	movi	a2, 0		# return success for len==0
     83	abi_ret(STACK_SIZE)
     84
     85/*
     86 * Destination is unaligned
     87 */
     88
     89.Ldst1mod2:	# dst is only byte aligned
     90	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
     91
     92	# copy 1 byte
     93EX(10f)	l8ui	a6, a3, 0
     94	addi	a3, a3,  1
     95EX(10f)	s8i	a6, a5,  0
     96	addi	a5, a5,  1
     97	addi	a4, a4, -1
     98	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
     99					# return to main algorithm
    100.Ldst2mod4:	# dst 16-bit aligned
    101	# copy 2 bytes
    102	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
    103EX(10f)	l8ui	a6, a3, 0
    104EX(10f)	l8ui	a7, a3, 1
    105	addi	a3, a3,  2
    106EX(10f)	s8i	a6, a5,  0
    107EX(10f)	s8i	a7, a5,  1
    108	addi	a5, a5,  2
    109	addi	a4, a4, -2
    110	j	.Ldstaligned	# dst is now aligned, return to main algorithm
    111
    112/*
    113 * Byte by byte copy
    114 */
    115	.align	4
    116	.byte	0		# 1 mod 4 alignment for LOOPNEZ
    117				# (0 mod 4 alignment for LBEG)
    118.Lbytecopy:
    119#if XCHAL_HAVE_LOOPS
    120	loopnez	a4, .Lbytecopydone
    121#else /* !XCHAL_HAVE_LOOPS */
    122	beqz	a4, .Lbytecopydone
    123	add	a7, a3, a4	# a7 = end address for source
    124#endif /* !XCHAL_HAVE_LOOPS */
    125.Lnextbyte:
    126EX(10f)	l8ui	a6, a3, 0
    127	addi	a3, a3, 1
    128EX(10f)	s8i	a6, a5, 0
    129	addi	a5, a5, 1
    130#if !XCHAL_HAVE_LOOPS
    131	blt	a3, a7, .Lnextbyte
    132#endif /* !XCHAL_HAVE_LOOPS */
    133.Lbytecopydone:
    134	movi	a2, 0		# return success for len bytes copied
    135	abi_ret(STACK_SIZE)
    136
    137/*
    138 * Destination and source are word-aligned.
    139 */
    140	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
    141	.align	4		# 1 mod 4 alignment for LOOPNEZ
    142	.byte	0		# (0 mod 4 alignment for LBEG)
    143.Laligned:
    144#if XCHAL_HAVE_LOOPS
    145	loopnez	a7, .Loop1done
    146#else /* !XCHAL_HAVE_LOOPS */
    147	beqz	a7, .Loop1done
    148	slli	a8, a7, 4
    149	add	a8, a8, a3	# a8 = end of last 16B source chunk
    150#endif /* !XCHAL_HAVE_LOOPS */
    151.Loop1:
    152EX(10f)	l32i	a6, a3,  0
    153EX(10f)	l32i	a7, a3,  4
    154EX(10f)	s32i	a6, a5,  0
    155EX(10f)	l32i	a6, a3,  8
    156EX(10f)	s32i	a7, a5,  4
    157EX(10f)	l32i	a7, a3, 12
    158EX(10f)	s32i	a6, a5,  8
    159	addi	a3, a3, 16
    160EX(10f)	s32i	a7, a5, 12
    161	addi	a5, a5, 16
    162#if !XCHAL_HAVE_LOOPS
    163	blt	a3, a8, .Loop1
    164#endif /* !XCHAL_HAVE_LOOPS */
    165.Loop1done:
    166	bbci.l	a4, 3, .L2
    167	# copy 8 bytes
    168EX(10f)	l32i	a6, a3,  0
    169EX(10f)	l32i	a7, a3,  4
    170	addi	a3, a3,  8
    171EX(10f)	s32i	a6, a5,  0
    172EX(10f)	s32i	a7, a5,  4
    173	addi	a5, a5,  8
    174.L2:
    175	bbci.l	a4, 2, .L3
    176	# copy 4 bytes
    177EX(10f)	l32i	a6, a3,  0
    178	addi	a3, a3,  4
    179EX(10f)	s32i	a6, a5,  0
    180	addi	a5, a5,  4
    181.L3:
    182	bbci.l	a4, 1, .L4
    183	# copy 2 bytes
    184EX(10f)	l16ui	a6, a3,  0
    185	addi	a3, a3,  2
    186EX(10f)	s16i	a6, a5,  0
    187	addi	a5, a5,  2
    188.L4:
    189	bbci.l	a4, 0, .L5
    190	# copy 1 byte
    191EX(10f)	l8ui	a6, a3,  0
    192EX(10f)	s8i	a6, a5,  0
    193.L5:
    194	movi	a2, 0		# return success for len bytes copied
    195	abi_ret(STACK_SIZE)
    196
    197/*
    198 * Destination is aligned, Source is unaligned
    199 */
    200
    201	.align	4
    202	.byte	0		# 1 mod 4 alignement for LOOPNEZ
    203				# (0 mod 4 alignment for LBEG)
    204.Lsrcunaligned:
    205	# copy 16 bytes per iteration for word-aligned dst and unaligned src
    206	and	a10, a3, a8	# save unalignment offset for below
    207	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
    208EX(10f)	l32i	a6, a3, 0	# load first word
    209#if XCHAL_HAVE_LOOPS
    210	loopnez	a7, .Loop2done
    211#else /* !XCHAL_HAVE_LOOPS */
    212	beqz	a7, .Loop2done
    213#if defined(__XTENSA_CALL0_ABI__)
    214	s32i	a10, a1, 0
    215	slli	a10, a7, 4
    216	add	a10, a10, a3	# a10 = end of last 16B source chunk
    217#else
    218	slli	a12, a7, 4
    219	add	a12, a12, a3	# a12 = end of last 16B source chunk
    220#endif
    221#endif /* !XCHAL_HAVE_LOOPS */
    222.Loop2:
    223EX(10f)	l32i	a7, a3,  4
    224EX(10f)	l32i	a8, a3,  8
    225	__src_b	a6, a6, a7
    226EX(10f)	s32i	a6, a5,  0
    227EX(10f)	l32i	a9, a3, 12
    228	__src_b	a7, a7, a8
    229EX(10f)	s32i	a7, a5,  4
    230EX(10f)	l32i	a6, a3, 16
    231	__src_b	a8, a8, a9
    232EX(10f)	s32i	a8, a5,  8
    233	addi	a3, a3, 16
    234	__src_b	a9, a9, a6
    235EX(10f)	s32i	a9, a5, 12
    236	addi	a5, a5, 16
    237#if !XCHAL_HAVE_LOOPS
    238#if defined(__XTENSA_CALL0_ABI__)
    239	blt	a3, a10, .Loop2
    240	l32i	a10, a1, 0
    241#else
    242	blt	a3, a12, .Loop2
    243#endif
    244#endif /* !XCHAL_HAVE_LOOPS */
    245.Loop2done:
    246	bbci.l	a4, 3, .L12
    247	# copy 8 bytes
    248EX(10f)	l32i	a7, a3,  4
    249EX(10f)	l32i	a8, a3,  8
    250	__src_b	a6, a6, a7
    251EX(10f)	s32i	a6, a5,  0
    252	addi	a3, a3,  8
    253	__src_b	a7, a7, a8
    254EX(10f)	s32i	a7, a5,  4
    255	addi	a5, a5,  8
    256	mov	a6, a8
    257.L12:
    258	bbci.l	a4, 2, .L13
    259	# copy 4 bytes
    260EX(10f)	l32i	a7, a3,  4
    261	addi	a3, a3,  4
    262	__src_b	a6, a6, a7
    263EX(10f)	s32i	a6, a5,  0
    264	addi	a5, a5,  4
    265	mov	a6, a7
    266.L13:
    267	add	a3, a3, a10	# readjust a3 with correct misalignment
    268	bbci.l	a4, 1, .L14
    269	# copy 2 bytes
    270EX(10f)	l8ui	a6, a3,  0
    271EX(10f)	l8ui	a7, a3,  1
    272	addi	a3, a3,  2
    273EX(10f)	s8i	a6, a5,  0
    274EX(10f)	s8i	a7, a5,  1
    275	addi	a5, a5,  2
    276.L14:
    277	bbci.l	a4, 0, .L15
    278	# copy 1 byte
    279EX(10f)	l8ui	a6, a3,  0
    280EX(10f)	s8i	a6, a5,  0
    281.L15:
    282	movi	a2, 0		# return success for len bytes copied
    283	abi_ret(STACK_SIZE)
    284
    285ENDPROC(__xtensa_copy_user)
    286
    287	.section .fixup, "ax"
    288	.align	4
    289
    290/* a2 = original dst; a5 = current dst; a11= original len
    291 * bytes_copied = a5 - a2
    292 * retval = bytes_not_copied = original len - bytes_copied
    293 * retval = a11 - (a5 - a2)
    294 */
    295
    296
    29710:
    298	sub	a2, a5, a2	/* a2 <-- bytes copied */
    299	sub	a2, a11, a2	/* a2 <-- bytes not copied */
    300	abi_ret(STACK_SIZE)