copy_page.S (6226B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * copy_page, __copy_user_page, __copy_user implementation of SuperH 4 * 5 * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima 6 * Copyright (C) 2002 Toshinobu Sugioka 7 * Copyright (C) 2006 Paul Mundt 8 */ 9#include <linux/linkage.h> 10#include <asm/page.h> 11 12/* 13 * copy_page 14 * @to: P1 address 15 * @from: P1 address 16 * 17 * void copy_page(void *to, void *from) 18 */ 19 20/* 21 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 22 * r8 --- from + PAGE_SIZE 23 * r9 --- not used 24 * r10 --- to 25 * r11 --- from 26 */ 27ENTRY(copy_page) 28 mov.l r8,@-r15 29 mov.l r10,@-r15 30 mov.l r11,@-r15 31 mov r4,r10 32 mov r5,r11 33 mov r5,r8 34 mov #(PAGE_SIZE >> 10), r0 35 shll8 r0 36 shll2 r0 37 add r0,r8 38 ! 391: mov.l @r11+,r0 40 mov.l @r11+,r1 41 mov.l @r11+,r2 42 mov.l @r11+,r3 43 mov.l @r11+,r4 44 mov.l @r11+,r5 45 mov.l @r11+,r6 46 mov.l @r11+,r7 47#if defined(CONFIG_CPU_SH4) 48 movca.l r0,@r10 49#else 50 mov.l r0,@r10 51#endif 52 add #32,r10 53 mov.l r7,@-r10 54 mov.l r6,@-r10 55 mov.l r5,@-r10 56 mov.l r4,@-r10 57 mov.l r3,@-r10 58 mov.l r2,@-r10 59 mov.l r1,@-r10 60 cmp/eq r11,r8 61 bf/s 1b 62 add #28,r10 63 ! 64 mov.l @r15+,r11 65 mov.l @r15+,r10 66 mov.l @r15+,r8 67 rts 68 nop 69 70/* 71 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); 72 * Return the number of bytes NOT copied 73 */ 74#define EX(...) \ 75 9999: __VA_ARGS__ ; \ 76 .section __ex_table, "a"; \ 77 .long 9999b, 6000f ; \ 78 .previous 79#define EX_NO_POP(...) \ 80 9999: __VA_ARGS__ ; \ 81 .section __ex_table, "a"; \ 82 .long 9999b, 6005f ; \ 83 .previous 84ENTRY(__copy_user) 85 ! Check if small number of bytes 86 mov #11,r0 87 mov r4,r3 88 cmp/gt r0,r6 ! r6 (len) > r0 (11) 89 bf/s .L_cleanup_loop_no_pop 90 add r6,r3 ! last destination address 91 92 ! Calculate bytes needed to align to src 93 mov.l r11,@-r15 94 neg r5,r0 95 mov.l r10,@-r15 96 add #4,r0 97 mov.l r9,@-r15 98 and #3,r0 99 mov.l r8,@-r15 100 tst r0,r0 101 bt 2f 102 1031: 104 ! Copy bytes to long word align src 105EX( mov.b @r5+,r1 ) 106 dt r0 107 add #-1,r6 108EX( mov.b r1,@r4 ) 109 bf/s 1b 110 add #1,r4 111 112 ! Jump to appropriate routine depending on dest 1132: mov #3,r1 114 mov r6, r2 115 and r4,r1 116 shlr2 r2 117 shll2 r1 118 mova .L_jump_tbl,r0 119 mov.l @(r0,r1),r1 120 jmp @r1 121 nop 122 123 .align 2 124.L_jump_tbl: 125 .long .L_dest00 126 .long .L_dest01 127 .long .L_dest10 128 .long .L_dest11 129 130/* 131 * Come here if there are less than 12 bytes to copy 132 * 133 * Keep the branch target close, so the bf/s callee doesn't overflow 134 * and result in a more expensive branch being inserted. This is the 135 * fast-path for small copies, the jump via the jump table will hit the 136 * default slow-path cleanup. -PFM. 137 */ 138.L_cleanup_loop_no_pop: 139 tst r6,r6 ! Check explicitly for zero 140 bt 1f 141 1422: 143EX_NO_POP( mov.b @r5+,r0 ) 144 dt r6 145EX_NO_POP( mov.b r0,@r4 ) 146 bf/s 2b 147 add #1,r4 148 1491: mov #0,r0 ! normal return 1505000: 151 152# Exception handler: 153.section .fixup, "ax" 1546005: 155 mov.l 8000f,r1 156 mov r3,r0 157 jmp @r1 158 sub r4,r0 159 .align 2 1608000: .long 5000b 161 162.previous 163 rts 164 nop 165 166! Destination = 00 167 168.L_dest00: 169 ! Skip the large copy for small transfers 170 mov #(32+32-4), r0 171 cmp/gt r6, r0 ! r0 (60) > r6 (len) 172 bt 1f 173 174 ! Align dest to a 32 byte boundary 175 neg r4,r0 176 add #0x20, r0 177 and #0x1f, r0 178 tst r0, r0 179 bt 2f 180 181 sub r0, r6 182 shlr2 r0 1833: 184EX( mov.l @r5+,r1 ) 185 dt r0 186EX( mov.l r1,@r4 ) 187 bf/s 3b 188 add #4,r4 189 1902: 191EX( mov.l @r5+,r0 ) 192EX( mov.l @r5+,r1 ) 193EX( mov.l @r5+,r2 ) 194EX( mov.l @r5+,r7 ) 195EX( mov.l @r5+,r8 ) 196EX( mov.l @r5+,r9 ) 197EX( mov.l @r5+,r10 ) 198EX( mov.l @r5+,r11 ) 199#ifdef CONFIG_CPU_SH4 200EX( movca.l r0,@r4 ) 201#else 202EX( mov.l r0,@r4 ) 203#endif 204 add #-32, r6 205EX( mov.l r1,@(4,r4) ) 206 mov #32, r0 207EX( mov.l r2,@(8,r4) ) 208 cmp/gt r6, r0 ! r0 (32) > r6 (len) 209EX( mov.l r7,@(12,r4) ) 210EX( mov.l r8,@(16,r4) ) 211EX( mov.l r9,@(20,r4) ) 212EX( mov.l r10,@(24,r4) ) 213EX( mov.l r11,@(28,r4) ) 214 bf/s 2b 215 add #32,r4 216 2171: mov r6, r0 218 shlr2 r0 219 tst r0, r0 220 bt .L_cleanup 2211: 222EX( mov.l @r5+,r1 ) 223 dt r0 224EX( mov.l r1,@r4 ) 225 bf/s 1b 226 add #4,r4 227 228 bra .L_cleanup 229 nop 230 231! Destination = 10 232 233.L_dest10: 234 mov r2,r7 235 shlr2 r7 236 shlr r7 237 tst r7,r7 238 mov #7,r0 239 bt/s 1f 240 and r0,r2 2412: 242 dt r7 243#ifdef CONFIG_CPU_LITTLE_ENDIAN 244EX( mov.l @r5+,r0 ) 245EX( mov.l @r5+,r1 ) 246EX( mov.l @r5+,r8 ) 247EX( mov.l @r5+,r9 ) 248EX( mov.l @r5+,r10 ) 249EX( mov.w r0,@r4 ) 250 add #2,r4 251 xtrct r1,r0 252 xtrct r8,r1 253 xtrct r9,r8 254 xtrct r10,r9 255 256EX( mov.l r0,@r4 ) 257EX( mov.l r1,@(4,r4) ) 258EX( mov.l r8,@(8,r4) ) 259EX( mov.l r9,@(12,r4) ) 260 261EX( mov.l @r5+,r1 ) 262EX( mov.l @r5+,r8 ) 263EX( mov.l @r5+,r0 ) 264 xtrct r1,r10 265 xtrct r8,r1 266 xtrct r0,r8 267 shlr16 r0 268EX( mov.l r10,@(16,r4) ) 269EX( mov.l r1,@(20,r4) ) 270EX( mov.l r8,@(24,r4) ) 271EX( mov.w r0,@(28,r4) ) 272 bf/s 2b 273 add #30,r4 274#else 275EX( mov.l @(28,r5),r0 ) 276EX( mov.l @(24,r5),r8 ) 277EX( mov.l @(20,r5),r9 ) 278EX( mov.l @(16,r5),r10 ) 279EX( mov.w r0,@(30,r4) ) 280 add #-2,r4 281 xtrct r8,r0 282 xtrct r9,r8 283 xtrct r10,r9 284EX( mov.l r0,@(28,r4) ) 285EX( mov.l r8,@(24,r4) ) 286EX( mov.l r9,@(20,r4) ) 287 288EX( mov.l @(12,r5),r0 ) 289EX( mov.l @(8,r5),r8 ) 290 xtrct r0,r10 291EX( mov.l @(4,r5),r9 ) 292 mov.l r10,@(16,r4) 293EX( mov.l @r5,r10 ) 294 xtrct r8,r0 295 xtrct r9,r8 296 xtrct r10,r9 297EX( mov.l r0,@(12,r4) ) 298EX( mov.l r8,@(8,r4) ) 299 swap.w r10,r0 300EX( mov.l r9,@(4,r4) ) 301EX( mov.w r0,@(2,r4) ) 302 303 add #32,r5 304 bf/s 2b 305 add #34,r4 306#endif 307 tst r2,r2 308 bt .L_cleanup 309 3101: ! Read longword, write two words per iteration 311EX( mov.l @r5+,r0 ) 312 dt r2 313#ifdef CONFIG_CPU_LITTLE_ENDIAN 314EX( mov.w r0,@r4 ) 315 shlr16 r0 316EX( mov.w r0,@(2,r4) ) 317#else 318EX( mov.w r0,@(2,r4) ) 319 shlr16 r0 320EX( mov.w r0,@r4 ) 321#endif 322 bf/s 1b 323 add #4,r4 324 325 bra .L_cleanup 326 nop 327 328! Destination = 01 or 11 329 330.L_dest01: 331.L_dest11: 332 ! Read longword, write byte, word, byte per iteration 333EX( mov.l @r5+,r0 ) 334 dt r2 335#ifdef CONFIG_CPU_LITTLE_ENDIAN 336EX( mov.b r0,@r4 ) 337 shlr8 r0 338 add #1,r4 339EX( mov.w r0,@r4 ) 340 shlr16 r0 341EX( mov.b r0,@(2,r4) ) 342 bf/s .L_dest01 343 add #3,r4 344#else 345EX( mov.b r0,@(3,r4) ) 346 shlr8 r0 347 swap.w r0,r7 348EX( mov.b r7,@r4 ) 349 add #1,r4 350EX( mov.w r0,@r4 ) 351 bf/s .L_dest01 352 add #3,r4 353#endif 354 355! Cleanup last few bytes 356.L_cleanup: 357 mov r6,r0 358 and #3,r0 359 tst r0,r0 360 bt .L_exit 361 mov r0,r6 362 363.L_cleanup_loop: 364EX( mov.b @r5+,r0 ) 365 dt r6 366EX( mov.b r0,@r4 ) 367 bf/s .L_cleanup_loop 368 add #1,r4 369 370.L_exit: 371 mov #0,r0 ! normal return 372 3735000: 374 375# Exception handler: 376.section .fixup, "ax" 3776000: 378 mov.l 8000f,r1 379 mov r3,r0 380 jmp @r1 381 sub r4,r0 382 .align 2 3838000: .long 5000b 384 385.previous 386 mov.l @r15+,r8 387 mov.l @r15+,r9 388 mov.l @r15+,r10 389 rts 390 mov.l @r15+,r11