cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

copy_user_64.S (9550B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
      4 * Copyright 2002 Andi Kleen, SuSE Labs.
      5 *
      6 * Functions to copy from and to user space.
      7 */
      8
      9#include <linux/linkage.h>
     10#include <asm/current.h>
     11#include <asm/asm-offsets.h>
     12#include <asm/thread_info.h>
     13#include <asm/cpufeatures.h>
     14#include <asm/alternative.h>
     15#include <asm/asm.h>
     16#include <asm/smap.h>
     17#include <asm/export.h>
     18#include <asm/trapnr.h>
     19
     20.macro ALIGN_DESTINATION
     21	/* check for bad alignment of destination */
     22	movl %edi,%ecx
     23	andl $7,%ecx
     24	jz 102f				/* already aligned */
     25	subl $8,%ecx
     26	negl %ecx
     27	subl %ecx,%edx
     28100:	movb (%rsi),%al
     29101:	movb %al,(%rdi)
     30	incq %rsi
     31	incq %rdi
     32	decl %ecx
     33	jnz 100b
     34102:
     35
     36	_ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
     37	_ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
     38.endm
     39
     40/*
     41 * copy_user_generic_unrolled - memory copy with exception handling.
     42 * This version is for CPUs like P4 that don't have efficient micro
     43 * code for rep movsq
     44 *
     45 * Input:
     46 * rdi destination
     47 * rsi source
     48 * rdx count
     49 *
     50 * Output:
     51 * eax uncopied bytes or 0 if successful.
     52 */
     53SYM_FUNC_START(copy_user_generic_unrolled)
     54	ASM_STAC
     55	cmpl $8,%edx
     56	jb .Lcopy_user_short_string_bytes
     57	ALIGN_DESTINATION
     58	movl %edx,%ecx
     59	andl $63,%edx
     60	shrl $6,%ecx
     61	jz copy_user_short_string
     621:	movq (%rsi),%r8
     632:	movq 1*8(%rsi),%r9
     643:	movq 2*8(%rsi),%r10
     654:	movq 3*8(%rsi),%r11
     665:	movq %r8,(%rdi)
     676:	movq %r9,1*8(%rdi)
     687:	movq %r10,2*8(%rdi)
     698:	movq %r11,3*8(%rdi)
     709:	movq 4*8(%rsi),%r8
     7110:	movq 5*8(%rsi),%r9
     7211:	movq 6*8(%rsi),%r10
     7312:	movq 7*8(%rsi),%r11
     7413:	movq %r8,4*8(%rdi)
     7514:	movq %r9,5*8(%rdi)
     7615:	movq %r10,6*8(%rdi)
     7716:	movq %r11,7*8(%rdi)
     78	leaq 64(%rsi),%rsi
     79	leaq 64(%rdi),%rdi
     80	decl %ecx
     81	jnz 1b
     82	jmp copy_user_short_string
     83
     8430:	shll $6,%ecx
     85	addl %ecx,%edx
     86	jmp .Lcopy_user_handle_tail
     87
     88	_ASM_EXTABLE_CPY(1b, 30b)
     89	_ASM_EXTABLE_CPY(2b, 30b)
     90	_ASM_EXTABLE_CPY(3b, 30b)
     91	_ASM_EXTABLE_CPY(4b, 30b)
     92	_ASM_EXTABLE_CPY(5b, 30b)
     93	_ASM_EXTABLE_CPY(6b, 30b)
     94	_ASM_EXTABLE_CPY(7b, 30b)
     95	_ASM_EXTABLE_CPY(8b, 30b)
     96	_ASM_EXTABLE_CPY(9b, 30b)
     97	_ASM_EXTABLE_CPY(10b, 30b)
     98	_ASM_EXTABLE_CPY(11b, 30b)
     99	_ASM_EXTABLE_CPY(12b, 30b)
    100	_ASM_EXTABLE_CPY(13b, 30b)
    101	_ASM_EXTABLE_CPY(14b, 30b)
    102	_ASM_EXTABLE_CPY(15b, 30b)
    103	_ASM_EXTABLE_CPY(16b, 30b)
    104SYM_FUNC_END(copy_user_generic_unrolled)
    105EXPORT_SYMBOL(copy_user_generic_unrolled)
    106
    107/* Some CPUs run faster using the string copy instructions.
    108 * This is also a lot simpler. Use them when possible.
    109 *
    110 * Only 4GB of copy is supported. This shouldn't be a problem
    111 * because the kernel normally only writes from/to page sized chunks
    112 * even if user space passed a longer buffer.
    113 * And more would be dangerous because both Intel and AMD have
    114 * errata with rep movsq > 4GB. If someone feels the need to fix
    115 * this please consider this.
    116 *
    117 * Input:
    118 * rdi destination
    119 * rsi source
    120 * rdx count
    121 *
    122 * Output:
    123 * eax uncopied bytes or 0 if successful.
    124 */
    125SYM_FUNC_START(copy_user_generic_string)
    126	ASM_STAC
    127	cmpl $8,%edx
    128	jb 2f		/* less than 8 bytes, go to byte copy loop */
    129	ALIGN_DESTINATION
    130	movl %edx,%ecx
    131	shrl $3,%ecx
    132	andl $7,%edx
    1331:	rep movsq
    1342:	movl %edx,%ecx
    1353:	rep movsb
    136	xorl %eax,%eax
    137	ASM_CLAC
    138	RET
    139
    14011:	leal (%rdx,%rcx,8),%ecx
    14112:	movl %ecx,%edx		/* ecx is zerorest also */
    142	jmp .Lcopy_user_handle_tail
    143
    144	_ASM_EXTABLE_CPY(1b, 11b)
    145	_ASM_EXTABLE_CPY(3b, 12b)
    146SYM_FUNC_END(copy_user_generic_string)
    147EXPORT_SYMBOL(copy_user_generic_string)
    148
    149/*
    150 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
    151 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
    152 *
    153 * Input:
    154 * rdi destination
    155 * rsi source
    156 * rdx count
    157 *
    158 * Output:
    159 * eax uncopied bytes or 0 if successful.
    160 */
    161SYM_FUNC_START(copy_user_enhanced_fast_string)
    162	ASM_STAC
    163	/* CPUs without FSRM should avoid rep movsb for short copies */
    164	ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
    165	movl %edx,%ecx
    1661:	rep movsb
    167	xorl %eax,%eax
    168	ASM_CLAC
    169	RET
    170
    17112:	movl %ecx,%edx		/* ecx is zerorest also */
    172	jmp .Lcopy_user_handle_tail
    173
    174	_ASM_EXTABLE_CPY(1b, 12b)
    175SYM_FUNC_END(copy_user_enhanced_fast_string)
    176EXPORT_SYMBOL(copy_user_enhanced_fast_string)
    177
    178/*
    179 * Try to copy last bytes and clear the rest if needed.
    180 * Since protection fault in copy_from/to_user is not a normal situation,
    181 * it is not necessary to optimize tail handling.
    182 * Don't try to copy the tail if machine check happened
    183 *
    184 * Input:
    185 * eax trap number written by ex_handler_copy()
    186 * rdi destination
    187 * rsi source
    188 * rdx count
    189 *
    190 * Output:
    191 * eax uncopied bytes or 0 if successful.
    192 */
    193SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
    194	cmp $X86_TRAP_MC,%eax
    195	je 3f
    196
    197	movl %edx,%ecx
    1981:	rep movsb
    1992:	mov %ecx,%eax
    200	ASM_CLAC
    201	RET
    202
    2033:
    204	movl %edx,%eax
    205	ASM_CLAC
    206	RET
    207
    208	_ASM_EXTABLE_CPY(1b, 2b)
    209
    210.Lcopy_user_handle_align:
    211	addl %ecx,%edx			/* ecx is zerorest also */
    212	jmp .Lcopy_user_handle_tail
    213
    214SYM_CODE_END(.Lcopy_user_handle_tail)
    215
    216/*
    217 * Finish memcpy of less than 64 bytes.  #AC should already be set.
    218 *
    219 * Input:
    220 * rdi destination
    221 * rsi source
    222 * rdx count (< 64)
    223 *
    224 * Output:
    225 * eax uncopied bytes or 0 if successful.
    226 */
    227SYM_CODE_START_LOCAL(copy_user_short_string)
    228	movl %edx,%ecx
    229	andl $7,%edx
    230	shrl $3,%ecx
    231	jz .Lcopy_user_short_string_bytes
    23218:	movq (%rsi),%r8
    23319:	movq %r8,(%rdi)
    234	leaq 8(%rsi),%rsi
    235	leaq 8(%rdi),%rdi
    236	decl %ecx
    237	jnz 18b
    238.Lcopy_user_short_string_bytes:
    239	andl %edx,%edx
    240	jz 23f
    241	movl %edx,%ecx
    24221:	movb (%rsi),%al
    24322:	movb %al,(%rdi)
    244	incq %rsi
    245	incq %rdi
    246	decl %ecx
    247	jnz 21b
    24823:	xor %eax,%eax
    249	ASM_CLAC
    250	RET
    251
    25240:	leal (%rdx,%rcx,8),%edx
    253	jmp 60f
    25450:	movl %ecx,%edx		/* ecx is zerorest also */
    25560:	jmp .Lcopy_user_handle_tail
    256
    257	_ASM_EXTABLE_CPY(18b, 40b)
    258	_ASM_EXTABLE_CPY(19b, 40b)
    259	_ASM_EXTABLE_CPY(21b, 50b)
    260	_ASM_EXTABLE_CPY(22b, 50b)
    261SYM_CODE_END(copy_user_short_string)
    262
    263/*
    264 * copy_user_nocache - Uncached memory copy with exception handling
    265 * This will force destination out of cache for more performance.
    266 *
    267 * Note: Cached memory copy is used when destination or size is not
    268 * naturally aligned. That is:
    269 *  - Require 8-byte alignment when size is 8 bytes or larger.
    270 *  - Require 4-byte alignment when size is 4 bytes.
    271 */
    272SYM_FUNC_START(__copy_user_nocache)
    273	ASM_STAC
    274
    275	/* If size is less than 8 bytes, go to 4-byte copy */
    276	cmpl $8,%edx
    277	jb .L_4b_nocache_copy_entry
    278
    279	/* If destination is not 8-byte aligned, "cache" copy to align it */
    280	ALIGN_DESTINATION
    281
    282	/* Set 4x8-byte copy count and remainder */
    283	movl %edx,%ecx
    284	andl $63,%edx
    285	shrl $6,%ecx
    286	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
    287
    288	/* Perform 4x8-byte nocache loop-copy */
    289.L_4x8b_nocache_copy_loop:
    2901:	movq (%rsi),%r8
    2912:	movq 1*8(%rsi),%r9
    2923:	movq 2*8(%rsi),%r10
    2934:	movq 3*8(%rsi),%r11
    2945:	movnti %r8,(%rdi)
    2956:	movnti %r9,1*8(%rdi)
    2967:	movnti %r10,2*8(%rdi)
    2978:	movnti %r11,3*8(%rdi)
    2989:	movq 4*8(%rsi),%r8
    29910:	movq 5*8(%rsi),%r9
    30011:	movq 6*8(%rsi),%r10
    30112:	movq 7*8(%rsi),%r11
    30213:	movnti %r8,4*8(%rdi)
    30314:	movnti %r9,5*8(%rdi)
    30415:	movnti %r10,6*8(%rdi)
    30516:	movnti %r11,7*8(%rdi)
    306	leaq 64(%rsi),%rsi
    307	leaq 64(%rdi),%rdi
    308	decl %ecx
    309	jnz .L_4x8b_nocache_copy_loop
    310
    311	/* Set 8-byte copy count and remainder */
    312.L_8b_nocache_copy_entry:
    313	movl %edx,%ecx
    314	andl $7,%edx
    315	shrl $3,%ecx
    316	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
    317
    318	/* Perform 8-byte nocache loop-copy */
    319.L_8b_nocache_copy_loop:
    32020:	movq (%rsi),%r8
    32121:	movnti %r8,(%rdi)
    322	leaq 8(%rsi),%rsi
    323	leaq 8(%rdi),%rdi
    324	decl %ecx
    325	jnz .L_8b_nocache_copy_loop
    326
    327	/* If no byte left, we're done */
    328.L_4b_nocache_copy_entry:
    329	andl %edx,%edx
    330	jz .L_finish_copy
    331
    332	/* If destination is not 4-byte aligned, go to byte copy: */
    333	movl %edi,%ecx
    334	andl $3,%ecx
    335	jnz .L_1b_cache_copy_entry
    336
    337	/* Set 4-byte copy count (1 or 0) and remainder */
    338	movl %edx,%ecx
    339	andl $3,%edx
    340	shrl $2,%ecx
    341	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
    342
    343	/* Perform 4-byte nocache copy: */
    34430:	movl (%rsi),%r8d
    34531:	movnti %r8d,(%rdi)
    346	leaq 4(%rsi),%rsi
    347	leaq 4(%rdi),%rdi
    348
    349	/* If no bytes left, we're done: */
    350	andl %edx,%edx
    351	jz .L_finish_copy
    352
    353	/* Perform byte "cache" loop-copy for the remainder */
    354.L_1b_cache_copy_entry:
    355	movl %edx,%ecx
    356.L_1b_cache_copy_loop:
    35740:	movb (%rsi),%al
    35841:	movb %al,(%rdi)
    359	incq %rsi
    360	incq %rdi
    361	decl %ecx
    362	jnz .L_1b_cache_copy_loop
    363
    364	/* Finished copying; fence the prior stores */
    365.L_finish_copy:
    366	xorl %eax,%eax
    367	ASM_CLAC
    368	sfence
    369	RET
    370
    371.L_fixup_4x8b_copy:
    372	shll $6,%ecx
    373	addl %ecx,%edx
    374	jmp .L_fixup_handle_tail
    375.L_fixup_8b_copy:
    376	lea (%rdx,%rcx,8),%rdx
    377	jmp .L_fixup_handle_tail
    378.L_fixup_4b_copy:
    379	lea (%rdx,%rcx,4),%rdx
    380	jmp .L_fixup_handle_tail
    381.L_fixup_1b_copy:
    382	movl %ecx,%edx
    383.L_fixup_handle_tail:
    384	sfence
    385	jmp .Lcopy_user_handle_tail
    386
    387	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
    388	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
    389	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
    390	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
    391	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
    392	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
    393	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
    394	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
    395	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
    396	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
    397	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
    398	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
    399	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
    400	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
    401	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
    402	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
    403	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
    404	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
    405	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
    406	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
    407	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
    408	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
    409SYM_FUNC_END(__copy_user_nocache)
    410EXPORT_SYMBOL(__copy_user_nocache)