cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memset_64.S (2817B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/* Copyright 2002 Andi Kleen, SuSE Labs */
      3
      4#include <linux/linkage.h>
      5#include <asm/cpufeatures.h>
      6#include <asm/alternative.h>
      7#include <asm/export.h>
      8
      9/*
     10 * ISO C memset - set a memory block to a byte value. This function uses fast
     11 * string to get better performance than the original function. The code is
     12 * simpler and shorter than the original function as well.
     13 *
     14 * rdi   destination
     15 * rsi   value (char)
     16 * rdx   count (bytes)
     17 *
     18 * rax   original destination
     19 */
     20SYM_FUNC_START(__memset)
     21	/*
     22	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
     23	 * to use it when possible. If not available, use fast string instructions.
     24	 *
     25	 * Otherwise, use original memset function.
     26	 */
     27	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
     28		      "jmp memset_erms", X86_FEATURE_ERMS
     29
     30	movq %rdi,%r9
     31	movq %rdx,%rcx
     32	andl $7,%edx
     33	shrq $3,%rcx
     34	/* expand byte value  */
     35	movzbl %sil,%esi
     36	movabs $0x0101010101010101,%rax
     37	imulq %rsi,%rax
     38	rep stosq
     39	movl %edx,%ecx
     40	rep stosb
     41	movq %r9,%rax
     42	RET
     43SYM_FUNC_END(__memset)
     44EXPORT_SYMBOL(__memset)
     45
     46SYM_FUNC_ALIAS_WEAK(memset, __memset)
     47EXPORT_SYMBOL(memset)
     48
     49/*
     50 * ISO C memset - set a memory block to a byte value. This function uses
     51 * enhanced rep stosb to override the fast string function.
     52 * The code is simpler and shorter than the fast string function as well.
     53 *
     54 * rdi   destination
     55 * rsi   value (char)
     56 * rdx   count (bytes)
     57 *
     58 * rax   original destination
     59 */
     60SYM_FUNC_START_LOCAL(memset_erms)
     61	movq %rdi,%r9
     62	movb %sil,%al
     63	movq %rdx,%rcx
     64	rep stosb
     65	movq %r9,%rax
     66	RET
     67SYM_FUNC_END(memset_erms)
     68
     69SYM_FUNC_START_LOCAL(memset_orig)
     70	movq %rdi,%r10
     71
     72	/* expand byte value  */
     73	movzbl %sil,%ecx
     74	movabs $0x0101010101010101,%rax
     75	imulq  %rcx,%rax
     76
     77	/* align dst */
     78	movl  %edi,%r9d
     79	andl  $7,%r9d
     80	jnz  .Lbad_alignment
     81.Lafter_bad_alignment:
     82
     83	movq  %rdx,%rcx
     84	shrq  $6,%rcx
     85	jz	 .Lhandle_tail
     86
     87	.p2align 4
     88.Lloop_64:
     89	decq  %rcx
     90	movq  %rax,(%rdi)
     91	movq  %rax,8(%rdi)
     92	movq  %rax,16(%rdi)
     93	movq  %rax,24(%rdi)
     94	movq  %rax,32(%rdi)
     95	movq  %rax,40(%rdi)
     96	movq  %rax,48(%rdi)
     97	movq  %rax,56(%rdi)
     98	leaq  64(%rdi),%rdi
     99	jnz    .Lloop_64
    100
    101	/* Handle tail in loops. The loops should be faster than hard
    102	   to predict jump tables. */
    103	.p2align 4
    104.Lhandle_tail:
    105	movl	%edx,%ecx
    106	andl    $63&(~7),%ecx
    107	jz 		.Lhandle_7
    108	shrl	$3,%ecx
    109	.p2align 4
    110.Lloop_8:
    111	decl   %ecx
    112	movq  %rax,(%rdi)
    113	leaq  8(%rdi),%rdi
    114	jnz    .Lloop_8
    115
    116.Lhandle_7:
    117	andl	$7,%edx
    118	jz      .Lende
    119	.p2align 4
    120.Lloop_1:
    121	decl    %edx
    122	movb 	%al,(%rdi)
    123	leaq	1(%rdi),%rdi
    124	jnz     .Lloop_1
    125
    126.Lende:
    127	movq	%r10,%rax
    128	RET
    129
    130.Lbad_alignment:
    131	cmpq $7,%rdx
    132	jbe	.Lhandle_7
    133	movq %rax,(%rdi)	/* unaligned store */
    134	movq $8,%r8
    135	subq %r9,%r8
    136	addq %r8,%rdi
    137	subq %r8,%rdx
    138	jmp .Lafter_bad_alignment
    139.Lfinal:
    140SYM_FUNC_END(memset_orig)