copy_user.S (9607B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. 3 * 4 * Copyright(C) 1995 Linus Torvalds 5 * Copyright(C) 1996 David S. Miller 6 * Copyright(C) 1996 Eddie C. Dost 7 * Copyright(C) 1996,1998 Jakub Jelinek 8 * 9 * derived from: 10 * e-mail between David and Eddie. 11 * 12 * Returns 0 if successful, otherwise count of bytes not copied yet 13 */ 14 15#include <asm/ptrace.h> 16#include <asm/asmmacro.h> 17#include <asm/page.h> 18#include <asm/thread_info.h> 19#include <asm/export.h> 20 21/* Work around cpp -rob */ 22#define ALLOC #alloc 23#define EXECINSTR #execinstr 24 25#define EX_ENTRY(l1, l2) \ 26 .section __ex_table,ALLOC; \ 27 .align 4; \ 28 .word l1, l2; \ 29 .text; 30 31#define EX(x,y,a,b) \ 3298: x,y; \ 33 .section .fixup,ALLOC,EXECINSTR; \ 34 .align 4; \ 3599: retl; \ 36 a, b, %o0; \ 37 EX_ENTRY(98b, 99b) 38 39#define EX2(x,y,c,d,e,a,b) \ 4098: x,y; \ 41 .section .fixup,ALLOC,EXECINSTR; \ 42 .align 4; \ 4399: c, d, e; \ 44 retl; \ 45 a, b, %o0; \ 46 EX_ENTRY(98b, 99b) 47 48#define EXO2(x,y) \ 4998: x, y; \ 50 EX_ENTRY(98b, 97f) 51 52#define LD(insn, src, offset, reg, label) \ 5398: insn [%src + (offset)], %reg; \ 54 .section .fixup,ALLOC,EXECINSTR; \ 5599: ba label; \ 56 mov offset, %g5; \ 57 EX_ENTRY(98b, 99b) 58 59#define ST(insn, dst, offset, reg, label) \ 6098: insn %reg, [%dst + (offset)]; \ 61 .section .fixup,ALLOC,EXECINSTR; \ 6299: ba label; \ 63 mov offset, %g5; \ 64 EX_ENTRY(98b, 99b) 65 66/* Both these macros have to start with exactly the same insn */ 67/* left: g7 + (g1 % 128) - offset */ 68#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 69 LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ 70 LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ 71 LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ 72 LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ 73 ST(st, dst, offset + 0x00, t0, bigchunk_fault) \ 74 ST(st, dst, offset + 0x04, t1, bigchunk_fault) \ 75 ST(st, dst, offset + 0x08, t2, bigchunk_fault) \ 76 ST(st, dst, offset + 0x0c, t3, bigchunk_fault) \ 77 ST(st, dst, offset + 0x10, t4, bigchunk_fault) \ 78 ST(st, dst, offset + 0x14, t5, bigchunk_fault) \ 79 ST(st, dst, offset + 0x18, t6, bigchunk_fault) \ 80 ST(st, dst, offset + 0x1c, t7, bigchunk_fault) 81 82/* left: g7 + (g1 % 128) - offset */ 83#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 84 LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ 85 LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ 86 LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ 87 LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ 88 ST(std, dst, offset + 0x00, t0, bigchunk_fault) \ 89 ST(std, dst, offset + 0x08, t2, bigchunk_fault) \ 90 ST(std, dst, offset + 0x10, t4, bigchunk_fault) \ 91 ST(std, dst, offset + 0x18, t6, bigchunk_fault) 92 93 .section .fixup,#alloc,#execinstr 94bigchunk_fault: 95 sub %g7, %g5, %o0 96 and %g1, 127, %g1 97 retl 98 add %o0, %g1, %o0 99 100/* left: offset + 16 + (g1 % 16) */ 101#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 102 LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault) \ 103 LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault) \ 104 ST(st, dst, -(offset + 0x10), t0, lastchunk_fault) \ 105 ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault) \ 106 ST(st, dst, -(offset + 0x08), t2, lastchunk_fault) \ 107 ST(st, dst, -(offset + 0x04), t3, lastchunk_fault) 108 109 .section .fixup,#alloc,#execinstr 110lastchunk_fault: 111 and %g1, 15, %g1 112 retl 113 sub %g1, %g5, %o0 114 115/* left: o3 + (o2 % 16) - offset */ 116#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ 117 LD(lduh, src, offset + 0x00, t0, halfchunk_fault) \ 118 LD(lduh, src, offset + 0x02, t1, halfchunk_fault) \ 119 LD(lduh, src, offset + 0x04, t2, halfchunk_fault) \ 120 LD(lduh, src, offset + 0x06, t3, halfchunk_fault) \ 121 ST(sth, dst, offset + 0x00, t0, halfchunk_fault) \ 122 ST(sth, dst, offset + 0x02, t1, halfchunk_fault) \ 123 ST(sth, dst, offset + 0x04, t2, halfchunk_fault) \ 124 ST(sth, dst, offset + 0x06, t3, halfchunk_fault) 125 126/* left: o3 + (o2 % 16) + offset + 2 */ 127#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 128 LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault) \ 129 LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault) \ 130 ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault) \ 131 ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault) 132 133 .section .fixup,#alloc,#execinstr 134halfchunk_fault: 135 and %o2, 15, %o2 136 sub %o3, %g5, %o3 137 retl 138 add %o2, %o3, %o0 139 140/* left: offset + 2 + (o2 % 2) */ 141#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \ 142 LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault) \ 143 LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault) \ 144 ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault) \ 145 ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault) 146 147 .section .fixup,#alloc,#execinstr 148last_shortchunk_fault: 149 and %o2, 1, %o2 150 retl 151 sub %o2, %g5, %o0 152 153 .text 154 .align 4 155 156 .globl __copy_user_begin 157__copy_user_begin: 158 159 .globl __copy_user 160 EXPORT_SYMBOL(__copy_user) 161dword_align: 162 andcc %o1, 1, %g0 163 be 4f 164 andcc %o1, 2, %g0 165 166 EXO2(ldub [%o1], %g2) 167 add %o1, 1, %o1 168 EXO2(stb %g2, [%o0]) 169 sub %o2, 1, %o2 170 bne 3f 171 add %o0, 1, %o0 172 173 EXO2(lduh [%o1], %g2) 174 add %o1, 2, %o1 175 EXO2(sth %g2, [%o0]) 176 sub %o2, 2, %o2 177 b 3f 178 add %o0, 2, %o0 1794: 180 EXO2(lduh [%o1], %g2) 181 add %o1, 2, %o1 182 EXO2(sth %g2, [%o0]) 183 sub %o2, 2, %o2 184 b 3f 185 add %o0, 2, %o0 186 187__copy_user: /* %o0=dst %o1=src %o2=len */ 188 xor %o0, %o1, %o4 1891: 190 andcc %o4, 3, %o5 1912: 192 bne cannot_optimize 193 cmp %o2, 15 194 195 bleu short_aligned_end 196 andcc %o1, 3, %g0 197 198 bne dword_align 1993: 200 andcc %o1, 4, %g0 201 202 be 2f 203 mov %o2, %g1 204 205 EXO2(ld [%o1], %o4) 206 sub %g1, 4, %g1 207 EXO2(st %o4, [%o0]) 208 add %o1, 4, %o1 209 add %o0, 4, %o0 2102: 211 andcc %g1, 0xffffff80, %g7 212 be 3f 213 andcc %o0, 4, %g0 214 215 be ldd_std + 4 2165: 217 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 218 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 219 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 220 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 221 subcc %g7, 128, %g7 222 add %o1, 128, %o1 223 bne 5b 224 add %o0, 128, %o0 2253: 226 andcc %g1, 0x70, %g7 227 be copy_user_table_end 228 andcc %g1, 8, %g0 229 230 sethi %hi(copy_user_table_end), %o5 231 srl %g7, 1, %o4 232 add %g7, %o4, %o4 233 add %o1, %g7, %o1 234 sub %o5, %o4, %o5 235 jmpl %o5 + %lo(copy_user_table_end), %g0 236 add %o0, %g7, %o0 237 238 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 239 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 240 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 241 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 242 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 243 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 244 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 245copy_user_table_end: 246 be copy_user_last7 247 andcc %g1, 4, %g0 248 249 EX(ldd [%o1], %g2, and %g1, 0xf) 250 add %o0, 8, %o0 251 add %o1, 8, %o1 252 EX(st %g2, [%o0 - 0x08], and %g1, 0xf) 253 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) 254copy_user_last7: 255 be 1f 256 andcc %g1, 2, %g0 257 258 EX(ld [%o1], %g2, and %g1, 7) 259 add %o1, 4, %o1 260 EX(st %g2, [%o0], and %g1, 7) 261 add %o0, 4, %o0 2621: 263 be 1f 264 andcc %g1, 1, %g0 265 266 EX(lduh [%o1], %g2, and %g1, 3) 267 add %o1, 2, %o1 268 EX(sth %g2, [%o0], and %g1, 3) 269 add %o0, 2, %o0 2701: 271 be 1f 272 nop 273 274 EX(ldub [%o1], %g2, add %g0, 1) 275 EX(stb %g2, [%o0], add %g0, 1) 2761: 277 retl 278 clr %o0 279 280ldd_std: 281 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 282 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 283 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 284 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 285 subcc %g7, 128, %g7 286 add %o1, 128, %o1 287 bne ldd_std 288 add %o0, 128, %o0 289 290 andcc %g1, 0x70, %g7 291 be copy_user_table_end 292 andcc %g1, 8, %g0 293 294 sethi %hi(copy_user_table_end), %o5 295 srl %g7, 1, %o4 296 add %g7, %o4, %o4 297 add %o1, %g7, %o1 298 sub %o5, %o4, %o5 299 jmpl %o5 + %lo(copy_user_table_end), %g0 300 add %o0, %g7, %o0 301 302cannot_optimize: 303 bleu short_end 304 cmp %o5, 2 305 306 bne byte_chunk 307 and %o2, 0xfffffff0, %o3 308 309 andcc %o1, 1, %g0 310 be 10f 311 nop 312 313 EXO2(ldub [%o1], %g2) 314 add %o1, 1, %o1 315 EXO2(stb %g2, [%o0]) 316 sub %o2, 1, %o2 317 andcc %o2, 0xfffffff0, %o3 318 be short_end 319 add %o0, 1, %o0 32010: 321 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 322 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) 323 subcc %o3, 0x10, %o3 324 add %o1, 0x10, %o1 325 bne 10b 326 add %o0, 0x10, %o0 327 b 2f 328 and %o2, 0xe, %o3 329 330byte_chunk: 331 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) 332 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) 333 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) 334 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) 335 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) 336 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) 337 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) 338 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) 339 subcc %o3, 0x10, %o3 340 add %o1, 0x10, %o1 341 bne byte_chunk 342 add %o0, 0x10, %o0 343 344short_end: 345 and %o2, 0xe, %o3 3462: 347 sethi %hi(short_table_end), %o5 348 sll %o3, 3, %o4 349 add %o0, %o3, %o0 350 sub %o5, %o4, %o5 351 add %o1, %o3, %o1 352 jmpl %o5 + %lo(short_table_end), %g0 353 andcc %o2, 1, %g0 354 MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 355 MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 356 MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3) 357 MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3) 358 MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3) 359 MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3) 360 MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3) 361short_table_end: 362 be 1f 363 nop 364 EX(ldub [%o1], %g2, add %g0, 1) 365 EX(stb %g2, [%o0], add %g0, 1) 3661: 367 retl 368 clr %o0 369 370short_aligned_end: 371 bne short_end 372 andcc %o2, 8, %g0 373 374 be 1f 375 andcc %o2, 4, %g0 376 377 EXO2(ld [%o1 + 0x00], %g2) 378 EXO2(ld [%o1 + 0x04], %g3) 379 add %o1, 8, %o1 380 EXO2(st %g2, [%o0 + 0x00]) 381 EX(st %g3, [%o0 + 0x04], sub %o2, 4) 382 add %o0, 8, %o0 3831: 384 b copy_user_last7 385 mov %o2, %g1 386 387 .section .fixup,#alloc,#execinstr 388 .align 4 38997: 390 retl 391 mov %o2, %o0 392 393 .globl __copy_user_end 394__copy_user_end: