memmove_64.S - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
memmove_64.S (3690B)
      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * Normally compiler builtins are used, but sometimes the compiler calls out
      4 * of line code. Based on asm-i386/string.h.
      5 *
      6 * This assembly file is re-written from memmove_64.c file.
      7 *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
      8 */
      9#include <linux/linkage.h>
     10#include <asm/cpufeatures.h>
     11#include <asm/alternative.h>
     12#include <asm/export.h>
     13
     14#undef memmove
     15
     16/*
     17 * Implement memmove(). This can handle overlap between src and dst.
     18 *
     19 * Input:
     20 * rdi: dest
     21 * rsi: src
     22 * rdx: count
     23 *
     24 * Output:
     25 * rax: dest
     26 */
     27SYM_FUNC_START(__memmove)
     28
     29	mov %rdi, %rax
     30
     31	/* Decide forward/backward copy mode */
     32	cmp %rdi, %rsi
     33	jge .Lmemmove_begin_forward
     34	mov %rsi, %r8
     35	add %rdx, %r8
     36	cmp %rdi, %r8
     37	jg 2f
     38
     39	/* FSRM implies ERMS => no length checks, do the copy directly */
     40.Lmemmove_begin_forward:
     41	ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
     42	ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
     43
     44	/*
     45	 * movsq instruction have many startup latency
     46	 * so we handle small size by general register.
     47	 */
     48	cmp  $680, %rdx
     49	jb	3f
     50	/*
     51	 * movsq instruction is only good for aligned case.
     52	 */
     53
     54	cmpb %dil, %sil
     55	je 4f
     563:
     57	sub $0x20, %rdx
     58	/*
     59	 * We gobble 32 bytes forward in each loop.
     60	 */
     615:
     62	sub $0x20, %rdx
     63	movq 0*8(%rsi), %r11
     64	movq 1*8(%rsi), %r10
     65	movq 2*8(%rsi), %r9
     66	movq 3*8(%rsi), %r8
     67	leaq 4*8(%rsi), %rsi
     68
     69	movq %r11, 0*8(%rdi)
     70	movq %r10, 1*8(%rdi)
     71	movq %r9, 2*8(%rdi)
     72	movq %r8, 3*8(%rdi)
     73	leaq 4*8(%rdi), %rdi
     74	jae 5b
     75	addq $0x20, %rdx
     76	jmp 1f
     77	/*
     78	 * Handle data forward by movsq.
     79	 */
     80	.p2align 4
     814:
     82	movq %rdx, %rcx
     83	movq -8(%rsi, %rdx), %r11
     84	lea -8(%rdi, %rdx), %r10
     85	shrq $3, %rcx
     86	rep movsq
     87	movq %r11, (%r10)
     88	jmp 13f
     89.Lmemmove_end_forward:
     90
     91	/*
     92	 * Handle data backward by movsq.
     93	 */
     94	.p2align 4
     957:
     96	movq %rdx, %rcx
     97	movq (%rsi), %r11
     98	movq %rdi, %r10
     99	leaq -8(%rsi, %rdx), %rsi
    100	leaq -8(%rdi, %rdx), %rdi
    101	shrq $3, %rcx
    102	std
    103	rep movsq
    104	cld
    105	movq %r11, (%r10)
    106	jmp 13f
    107
    108	/*
    109	 * Start to prepare for backward copy.
    110	 */
    111	.p2align 4
    1122:
    113	cmp $0x20, %rdx
    114	jb 1f
    115	cmp $680, %rdx
    116	jb 6f
    117	cmp %dil, %sil
    118	je 7b
    1196:
    120	/*
    121	 * Calculate copy position to tail.
    122	 */
    123	addq %rdx, %rsi
    124	addq %rdx, %rdi
    125	subq $0x20, %rdx
    126	/*
    127	 * We gobble 32 bytes backward in each loop.
    128	 */
    1298:
    130	subq $0x20, %rdx
    131	movq -1*8(%rsi), %r11
    132	movq -2*8(%rsi), %r10
    133	movq -3*8(%rsi), %r9
    134	movq -4*8(%rsi), %r8
    135	leaq -4*8(%rsi), %rsi
    136
    137	movq %r11, -1*8(%rdi)
    138	movq %r10, -2*8(%rdi)
    139	movq %r9, -3*8(%rdi)
    140	movq %r8, -4*8(%rdi)
    141	leaq -4*8(%rdi), %rdi
    142	jae 8b
    143	/*
    144	 * Calculate copy position to head.
    145	 */
    146	addq $0x20, %rdx
    147	subq %rdx, %rsi
    148	subq %rdx, %rdi
    1491:
    150	cmpq $16, %rdx
    151	jb 9f
    152	/*
    153	 * Move data from 16 bytes to 31 bytes.
    154	 */
    155	movq 0*8(%rsi), %r11
    156	movq 1*8(%rsi), %r10
    157	movq -2*8(%rsi, %rdx), %r9
    158	movq -1*8(%rsi, %rdx), %r8
    159	movq %r11, 0*8(%rdi)
    160	movq %r10, 1*8(%rdi)
    161	movq %r9, -2*8(%rdi, %rdx)
    162	movq %r8, -1*8(%rdi, %rdx)
    163	jmp 13f
    164	.p2align 4
    1659:
    166	cmpq $8, %rdx
    167	jb 10f
    168	/*
    169	 * Move data from 8 bytes to 15 bytes.
    170	 */
    171	movq 0*8(%rsi), %r11
    172	movq -1*8(%rsi, %rdx), %r10
    173	movq %r11, 0*8(%rdi)
    174	movq %r10, -1*8(%rdi, %rdx)
    175	jmp 13f
    17610:
    177	cmpq $4, %rdx
    178	jb 11f
    179	/*
    180	 * Move data from 4 bytes to 7 bytes.
    181	 */
    182	movl (%rsi), %r11d
    183	movl -4(%rsi, %rdx), %r10d
    184	movl %r11d, (%rdi)
    185	movl %r10d, -4(%rdi, %rdx)
    186	jmp 13f
    18711:
    188	cmp $2, %rdx
    189	jb 12f
    190	/*
    191	 * Move data from 2 bytes to 3 bytes.
    192	 */
    193	movw (%rsi), %r11w
    194	movw -2(%rsi, %rdx), %r10w
    195	movw %r11w, (%rdi)
    196	movw %r10w, -2(%rdi, %rdx)
    197	jmp 13f
    19812:
    199	cmp $1, %rdx
    200	jb 13f
    201	/*
    202	 * Move data for 1 byte.
    203	 */
    204	movb (%rsi), %r11b
    205	movb %r11b, (%rdi)
    20613:
    207	RET
    208SYM_FUNC_END(__memmove)
    209EXPORT_SYMBOL(__memmove)
    210
    211SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
    212EXPORT_SYMBOL(memmove)