copy_user_64.S (9550B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18#include <asm/trapnr.h> 19 20.macro ALIGN_DESTINATION 21 /* check for bad alignment of destination */ 22 movl %edi,%ecx 23 andl $7,%ecx 24 jz 102f /* already aligned */ 25 subl $8,%ecx 26 negl %ecx 27 subl %ecx,%edx 28100: movb (%rsi),%al 29101: movb %al,(%rdi) 30 incq %rsi 31 incq %rdi 32 decl %ecx 33 jnz 100b 34102: 35 36 _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align) 37 _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) 38.endm 39 40/* 41 * copy_user_generic_unrolled - memory copy with exception handling. 42 * This version is for CPUs like P4 that don't have efficient micro 43 * code for rep movsq 44 * 45 * Input: 46 * rdi destination 47 * rsi source 48 * rdx count 49 * 50 * Output: 51 * eax uncopied bytes or 0 if successful. 52 */ 53SYM_FUNC_START(copy_user_generic_unrolled) 54 ASM_STAC 55 cmpl $8,%edx 56 jb .Lcopy_user_short_string_bytes 57 ALIGN_DESTINATION 58 movl %edx,%ecx 59 andl $63,%edx 60 shrl $6,%ecx 61 jz copy_user_short_string 621: movq (%rsi),%r8 632: movq 1*8(%rsi),%r9 643: movq 2*8(%rsi),%r10 654: movq 3*8(%rsi),%r11 665: movq %r8,(%rdi) 676: movq %r9,1*8(%rdi) 687: movq %r10,2*8(%rdi) 698: movq %r11,3*8(%rdi) 709: movq 4*8(%rsi),%r8 7110: movq 5*8(%rsi),%r9 7211: movq 6*8(%rsi),%r10 7312: movq 7*8(%rsi),%r11 7413: movq %r8,4*8(%rdi) 7514: movq %r9,5*8(%rdi) 7615: movq %r10,6*8(%rdi) 7716: movq %r11,7*8(%rdi) 78 leaq 64(%rsi),%rsi 79 leaq 64(%rdi),%rdi 80 decl %ecx 81 jnz 1b 82 jmp copy_user_short_string 83 8430: shll $6,%ecx 85 addl %ecx,%edx 86 jmp .Lcopy_user_handle_tail 87 88 _ASM_EXTABLE_CPY(1b, 30b) 89 _ASM_EXTABLE_CPY(2b, 30b) 90 _ASM_EXTABLE_CPY(3b, 30b) 91 _ASM_EXTABLE_CPY(4b, 30b) 92 _ASM_EXTABLE_CPY(5b, 30b) 93 _ASM_EXTABLE_CPY(6b, 30b) 94 _ASM_EXTABLE_CPY(7b, 30b) 95 _ASM_EXTABLE_CPY(8b, 30b) 96 _ASM_EXTABLE_CPY(9b, 30b) 97 _ASM_EXTABLE_CPY(10b, 30b) 98 _ASM_EXTABLE_CPY(11b, 30b) 99 _ASM_EXTABLE_CPY(12b, 30b) 100 _ASM_EXTABLE_CPY(13b, 30b) 101 _ASM_EXTABLE_CPY(14b, 30b) 102 _ASM_EXTABLE_CPY(15b, 30b) 103 _ASM_EXTABLE_CPY(16b, 30b) 104SYM_FUNC_END(copy_user_generic_unrolled) 105EXPORT_SYMBOL(copy_user_generic_unrolled) 106 107/* Some CPUs run faster using the string copy instructions. 108 * This is also a lot simpler. Use them when possible. 109 * 110 * Only 4GB of copy is supported. This shouldn't be a problem 111 * because the kernel normally only writes from/to page sized chunks 112 * even if user space passed a longer buffer. 113 * And more would be dangerous because both Intel and AMD have 114 * errata with rep movsq > 4GB. If someone feels the need to fix 115 * this please consider this. 116 * 117 * Input: 118 * rdi destination 119 * rsi source 120 * rdx count 121 * 122 * Output: 123 * eax uncopied bytes or 0 if successful. 124 */ 125SYM_FUNC_START(copy_user_generic_string) 126 ASM_STAC 127 cmpl $8,%edx 128 jb 2f /* less than 8 bytes, go to byte copy loop */ 129 ALIGN_DESTINATION 130 movl %edx,%ecx 131 shrl $3,%ecx 132 andl $7,%edx 1331: rep movsq 1342: movl %edx,%ecx 1353: rep movsb 136 xorl %eax,%eax 137 ASM_CLAC 138 RET 139 14011: leal (%rdx,%rcx,8),%ecx 14112: movl %ecx,%edx /* ecx is zerorest also */ 142 jmp .Lcopy_user_handle_tail 143 144 _ASM_EXTABLE_CPY(1b, 11b) 145 _ASM_EXTABLE_CPY(3b, 12b) 146SYM_FUNC_END(copy_user_generic_string) 147EXPORT_SYMBOL(copy_user_generic_string) 148 149/* 150 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 151 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 152 * 153 * Input: 154 * rdi destination 155 * rsi source 156 * rdx count 157 * 158 * Output: 159 * eax uncopied bytes or 0 if successful. 160 */ 161SYM_FUNC_START(copy_user_enhanced_fast_string) 162 ASM_STAC 163 /* CPUs without FSRM should avoid rep movsb for short copies */ 164 ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM 165 movl %edx,%ecx 1661: rep movsb 167 xorl %eax,%eax 168 ASM_CLAC 169 RET 170 17112: movl %ecx,%edx /* ecx is zerorest also */ 172 jmp .Lcopy_user_handle_tail 173 174 _ASM_EXTABLE_CPY(1b, 12b) 175SYM_FUNC_END(copy_user_enhanced_fast_string) 176EXPORT_SYMBOL(copy_user_enhanced_fast_string) 177 178/* 179 * Try to copy last bytes and clear the rest if needed. 180 * Since protection fault in copy_from/to_user is not a normal situation, 181 * it is not necessary to optimize tail handling. 182 * Don't try to copy the tail if machine check happened 183 * 184 * Input: 185 * eax trap number written by ex_handler_copy() 186 * rdi destination 187 * rsi source 188 * rdx count 189 * 190 * Output: 191 * eax uncopied bytes or 0 if successful. 192 */ 193SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 194 cmp $X86_TRAP_MC,%eax 195 je 3f 196 197 movl %edx,%ecx 1981: rep movsb 1992: mov %ecx,%eax 200 ASM_CLAC 201 RET 202 2033: 204 movl %edx,%eax 205 ASM_CLAC 206 RET 207 208 _ASM_EXTABLE_CPY(1b, 2b) 209 210.Lcopy_user_handle_align: 211 addl %ecx,%edx /* ecx is zerorest also */ 212 jmp .Lcopy_user_handle_tail 213 214SYM_CODE_END(.Lcopy_user_handle_tail) 215 216/* 217 * Finish memcpy of less than 64 bytes. #AC should already be set. 218 * 219 * Input: 220 * rdi destination 221 * rsi source 222 * rdx count (< 64) 223 * 224 * Output: 225 * eax uncopied bytes or 0 if successful. 226 */ 227SYM_CODE_START_LOCAL(copy_user_short_string) 228 movl %edx,%ecx 229 andl $7,%edx 230 shrl $3,%ecx 231 jz .Lcopy_user_short_string_bytes 23218: movq (%rsi),%r8 23319: movq %r8,(%rdi) 234 leaq 8(%rsi),%rsi 235 leaq 8(%rdi),%rdi 236 decl %ecx 237 jnz 18b 238.Lcopy_user_short_string_bytes: 239 andl %edx,%edx 240 jz 23f 241 movl %edx,%ecx 24221: movb (%rsi),%al 24322: movb %al,(%rdi) 244 incq %rsi 245 incq %rdi 246 decl %ecx 247 jnz 21b 24823: xor %eax,%eax 249 ASM_CLAC 250 RET 251 25240: leal (%rdx,%rcx,8),%edx 253 jmp 60f 25450: movl %ecx,%edx /* ecx is zerorest also */ 25560: jmp .Lcopy_user_handle_tail 256 257 _ASM_EXTABLE_CPY(18b, 40b) 258 _ASM_EXTABLE_CPY(19b, 40b) 259 _ASM_EXTABLE_CPY(21b, 50b) 260 _ASM_EXTABLE_CPY(22b, 50b) 261SYM_CODE_END(copy_user_short_string) 262 263/* 264 * copy_user_nocache - Uncached memory copy with exception handling 265 * This will force destination out of cache for more performance. 266 * 267 * Note: Cached memory copy is used when destination or size is not 268 * naturally aligned. That is: 269 * - Require 8-byte alignment when size is 8 bytes or larger. 270 * - Require 4-byte alignment when size is 4 bytes. 271 */ 272SYM_FUNC_START(__copy_user_nocache) 273 ASM_STAC 274 275 /* If size is less than 8 bytes, go to 4-byte copy */ 276 cmpl $8,%edx 277 jb .L_4b_nocache_copy_entry 278 279 /* If destination is not 8-byte aligned, "cache" copy to align it */ 280 ALIGN_DESTINATION 281 282 /* Set 4x8-byte copy count and remainder */ 283 movl %edx,%ecx 284 andl $63,%edx 285 shrl $6,%ecx 286 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 287 288 /* Perform 4x8-byte nocache loop-copy */ 289.L_4x8b_nocache_copy_loop: 2901: movq (%rsi),%r8 2912: movq 1*8(%rsi),%r9 2923: movq 2*8(%rsi),%r10 2934: movq 3*8(%rsi),%r11 2945: movnti %r8,(%rdi) 2956: movnti %r9,1*8(%rdi) 2967: movnti %r10,2*8(%rdi) 2978: movnti %r11,3*8(%rdi) 2989: movq 4*8(%rsi),%r8 29910: movq 5*8(%rsi),%r9 30011: movq 6*8(%rsi),%r10 30112: movq 7*8(%rsi),%r11 30213: movnti %r8,4*8(%rdi) 30314: movnti %r9,5*8(%rdi) 30415: movnti %r10,6*8(%rdi) 30516: movnti %r11,7*8(%rdi) 306 leaq 64(%rsi),%rsi 307 leaq 64(%rdi),%rdi 308 decl %ecx 309 jnz .L_4x8b_nocache_copy_loop 310 311 /* Set 8-byte copy count and remainder */ 312.L_8b_nocache_copy_entry: 313 movl %edx,%ecx 314 andl $7,%edx 315 shrl $3,%ecx 316 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 317 318 /* Perform 8-byte nocache loop-copy */ 319.L_8b_nocache_copy_loop: 32020: movq (%rsi),%r8 32121: movnti %r8,(%rdi) 322 leaq 8(%rsi),%rsi 323 leaq 8(%rdi),%rdi 324 decl %ecx 325 jnz .L_8b_nocache_copy_loop 326 327 /* If no byte left, we're done */ 328.L_4b_nocache_copy_entry: 329 andl %edx,%edx 330 jz .L_finish_copy 331 332 /* If destination is not 4-byte aligned, go to byte copy: */ 333 movl %edi,%ecx 334 andl $3,%ecx 335 jnz .L_1b_cache_copy_entry 336 337 /* Set 4-byte copy count (1 or 0) and remainder */ 338 movl %edx,%ecx 339 andl $3,%edx 340 shrl $2,%ecx 341 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 342 343 /* Perform 4-byte nocache copy: */ 34430: movl (%rsi),%r8d 34531: movnti %r8d,(%rdi) 346 leaq 4(%rsi),%rsi 347 leaq 4(%rdi),%rdi 348 349 /* If no bytes left, we're done: */ 350 andl %edx,%edx 351 jz .L_finish_copy 352 353 /* Perform byte "cache" loop-copy for the remainder */ 354.L_1b_cache_copy_entry: 355 movl %edx,%ecx 356.L_1b_cache_copy_loop: 35740: movb (%rsi),%al 35841: movb %al,(%rdi) 359 incq %rsi 360 incq %rdi 361 decl %ecx 362 jnz .L_1b_cache_copy_loop 363 364 /* Finished copying; fence the prior stores */ 365.L_finish_copy: 366 xorl %eax,%eax 367 ASM_CLAC 368 sfence 369 RET 370 371.L_fixup_4x8b_copy: 372 shll $6,%ecx 373 addl %ecx,%edx 374 jmp .L_fixup_handle_tail 375.L_fixup_8b_copy: 376 lea (%rdx,%rcx,8),%rdx 377 jmp .L_fixup_handle_tail 378.L_fixup_4b_copy: 379 lea (%rdx,%rcx,4),%rdx 380 jmp .L_fixup_handle_tail 381.L_fixup_1b_copy: 382 movl %ecx,%edx 383.L_fixup_handle_tail: 384 sfence 385 jmp .Lcopy_user_handle_tail 386 387 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) 388 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) 389 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) 390 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) 391 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) 392 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) 393 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) 394 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) 395 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) 396 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) 397 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) 398 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) 399 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) 400 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) 401 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) 402 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) 403 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) 404 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) 405 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) 406 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) 407 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) 408 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) 409SYM_FUNC_END(__copy_user_nocache) 410EXPORT_SYMBOL(__copy_user_nocache)