memcpy_32.c (3800B)
1// SPDX-License-Identifier: GPL-2.0 2#include <linux/string.h> 3#include <linux/export.h> 4 5#undef memcpy 6#undef memset 7#undef memmove 8 9__visible void *memcpy(void *to, const void *from, size_t n) 10{ 11 return __memcpy(to, from, n); 12} 13EXPORT_SYMBOL(memcpy); 14 15__visible void *memset(void *s, int c, size_t count) 16{ 17 return __memset(s, c, count); 18} 19EXPORT_SYMBOL(memset); 20 21__visible void *memmove(void *dest, const void *src, size_t n) 22{ 23 int d0,d1,d2,d3,d4,d5; 24 char *ret = dest; 25 26 __asm__ __volatile__( 27 /* Handle more 16 bytes in loop */ 28 "cmp $0x10, %0\n\t" 29 "jb 1f\n\t" 30 31 /* Decide forward/backward copy mode */ 32 "cmp %2, %1\n\t" 33 "jb 2f\n\t" 34 35 /* 36 * movs instruction have many startup latency 37 * so we handle small size by general register. 38 */ 39 "cmp $680, %0\n\t" 40 "jb 3f\n\t" 41 /* 42 * movs instruction is only good for aligned case. 43 */ 44 "mov %1, %3\n\t" 45 "xor %2, %3\n\t" 46 "and $0xff, %3\n\t" 47 "jz 4f\n\t" 48 "3:\n\t" 49 "sub $0x10, %0\n\t" 50 51 /* 52 * We gobble 16 bytes forward in each loop. 53 */ 54 "3:\n\t" 55 "sub $0x10, %0\n\t" 56 "mov 0*4(%1), %3\n\t" 57 "mov 1*4(%1), %4\n\t" 58 "mov %3, 0*4(%2)\n\t" 59 "mov %4, 1*4(%2)\n\t" 60 "mov 2*4(%1), %3\n\t" 61 "mov 3*4(%1), %4\n\t" 62 "mov %3, 2*4(%2)\n\t" 63 "mov %4, 3*4(%2)\n\t" 64 "lea 0x10(%1), %1\n\t" 65 "lea 0x10(%2), %2\n\t" 66 "jae 3b\n\t" 67 "add $0x10, %0\n\t" 68 "jmp 1f\n\t" 69 70 /* 71 * Handle data forward by movs. 72 */ 73 ".p2align 4\n\t" 74 "4:\n\t" 75 "mov -4(%1, %0), %3\n\t" 76 "lea -4(%2, %0), %4\n\t" 77 "shr $2, %0\n\t" 78 "rep movsl\n\t" 79 "mov %3, (%4)\n\t" 80 "jmp 11f\n\t" 81 /* 82 * Handle data backward by movs. 83 */ 84 ".p2align 4\n\t" 85 "6:\n\t" 86 "mov (%1), %3\n\t" 87 "mov %2, %4\n\t" 88 "lea -4(%1, %0), %1\n\t" 89 "lea -4(%2, %0), %2\n\t" 90 "shr $2, %0\n\t" 91 "std\n\t" 92 "rep movsl\n\t" 93 "mov %3,(%4)\n\t" 94 "cld\n\t" 95 "jmp 11f\n\t" 96 97 /* 98 * Start to prepare for backward copy. 99 */ 100 ".p2align 4\n\t" 101 "2:\n\t" 102 "cmp $680, %0\n\t" 103 "jb 5f\n\t" 104 "mov %1, %3\n\t" 105 "xor %2, %3\n\t" 106 "and $0xff, %3\n\t" 107 "jz 6b\n\t" 108 109 /* 110 * Calculate copy position to tail. 111 */ 112 "5:\n\t" 113 "add %0, %1\n\t" 114 "add %0, %2\n\t" 115 "sub $0x10, %0\n\t" 116 117 /* 118 * We gobble 16 bytes backward in each loop. 119 */ 120 "7:\n\t" 121 "sub $0x10, %0\n\t" 122 123 "mov -1*4(%1), %3\n\t" 124 "mov -2*4(%1), %4\n\t" 125 "mov %3, -1*4(%2)\n\t" 126 "mov %4, -2*4(%2)\n\t" 127 "mov -3*4(%1), %3\n\t" 128 "mov -4*4(%1), %4\n\t" 129 "mov %3, -3*4(%2)\n\t" 130 "mov %4, -4*4(%2)\n\t" 131 "lea -0x10(%1), %1\n\t" 132 "lea -0x10(%2), %2\n\t" 133 "jae 7b\n\t" 134 /* 135 * Calculate copy position to head. 136 */ 137 "add $0x10, %0\n\t" 138 "sub %0, %1\n\t" 139 "sub %0, %2\n\t" 140 141 /* 142 * Move data from 8 bytes to 15 bytes. 143 */ 144 ".p2align 4\n\t" 145 "1:\n\t" 146 "cmp $8, %0\n\t" 147 "jb 8f\n\t" 148 "mov 0*4(%1), %3\n\t" 149 "mov 1*4(%1), %4\n\t" 150 "mov -2*4(%1, %0), %5\n\t" 151 "mov -1*4(%1, %0), %1\n\t" 152 153 "mov %3, 0*4(%2)\n\t" 154 "mov %4, 1*4(%2)\n\t" 155 "mov %5, -2*4(%2, %0)\n\t" 156 "mov %1, -1*4(%2, %0)\n\t" 157 "jmp 11f\n\t" 158 159 /* 160 * Move data from 4 bytes to 7 bytes. 161 */ 162 ".p2align 4\n\t" 163 "8:\n\t" 164 "cmp $4, %0\n\t" 165 "jb 9f\n\t" 166 "mov 0*4(%1), %3\n\t" 167 "mov -1*4(%1, %0), %4\n\t" 168 "mov %3, 0*4(%2)\n\t" 169 "mov %4, -1*4(%2, %0)\n\t" 170 "jmp 11f\n\t" 171 172 /* 173 * Move data from 2 bytes to 3 bytes. 174 */ 175 ".p2align 4\n\t" 176 "9:\n\t" 177 "cmp $2, %0\n\t" 178 "jb 10f\n\t" 179 "movw 0*2(%1), %%dx\n\t" 180 "movw -1*2(%1, %0), %%bx\n\t" 181 "movw %%dx, 0*2(%2)\n\t" 182 "movw %%bx, -1*2(%2, %0)\n\t" 183 "jmp 11f\n\t" 184 185 /* 186 * Move data for 1 byte. 187 */ 188 ".p2align 4\n\t" 189 "10:\n\t" 190 "cmp $1, %0\n\t" 191 "jb 11f\n\t" 192 "movb (%1), %%cl\n\t" 193 "movb %%cl, (%2)\n\t" 194 ".p2align 4\n\t" 195 "11:" 196 : "=&c" (d0), "=&S" (d1), "=&D" (d2), 197 "=r" (d3),"=r" (d4), "=r"(d5) 198 :"0" (n), 199 "1" (src), 200 "2" (dest) 201 :"memory"); 202 203 return ret; 204 205} 206EXPORT_SYMBOL(memmove);