memcpy.c (2641B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * arch/openrisc/lib/memcpy.c 4 * 5 * Optimized memory copy routines for openrisc. These are mostly copied 6 * from ohter sources but slightly entended based on ideas discuassed in 7 * #openrisc. 8 * 9 * The word unroll implementation is an extension to the arm byte 10 * unrolled implementation, but using word copies (if things are 11 * properly aligned) 12 * 13 * The great arm loop unroll algorithm can be found at: 14 * arch/arm/boot/compressed/string.c 15 */ 16 17#include <linux/export.h> 18 19#include <linux/string.h> 20 21#ifdef CONFIG_OR1K_1200 22/* 23 * Do memcpy with word copies and loop unrolling. This gives the 24 * best performance on the OR1200 and MOR1KX archirectures 25 */ 26void *memcpy(void *dest, __const void *src, __kernel_size_t n) 27{ 28 int i = 0; 29 unsigned char *d, *s; 30 uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; 31 32 /* If both source and dest are word aligned copy words */ 33 if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { 34 /* Copy 32 bytes per loop */ 35 for (i = n >> 5; i > 0; i--) { 36 *dest_w++ = *src_w++; 37 *dest_w++ = *src_w++; 38 *dest_w++ = *src_w++; 39 *dest_w++ = *src_w++; 40 *dest_w++ = *src_w++; 41 *dest_w++ = *src_w++; 42 *dest_w++ = *src_w++; 43 *dest_w++ = *src_w++; 44 } 45 46 if (n & 1 << 4) { 47 *dest_w++ = *src_w++; 48 *dest_w++ = *src_w++; 49 *dest_w++ = *src_w++; 50 *dest_w++ = *src_w++; 51 } 52 53 if (n & 1 << 3) { 54 *dest_w++ = *src_w++; 55 *dest_w++ = *src_w++; 56 } 57 58 if (n & 1 << 2) 59 *dest_w++ = *src_w++; 60 61 d = (unsigned char *)dest_w; 62 s = (unsigned char *)src_w; 63 64 } else { 65 d = (unsigned char *)dest_w; 66 s = (unsigned char *)src_w; 67 68 for (i = n >> 3; i > 0; i--) { 69 *d++ = *s++; 70 *d++ = *s++; 71 *d++ = *s++; 72 *d++ = *s++; 73 *d++ = *s++; 74 *d++ = *s++; 75 *d++ = *s++; 76 *d++ = *s++; 77 } 78 79 if (n & 1 << 2) { 80 *d++ = *s++; 81 *d++ = *s++; 82 *d++ = *s++; 83 *d++ = *s++; 84 } 85 } 86 87 if (n & 1 << 1) { 88 *d++ = *s++; 89 *d++ = *s++; 90 } 91 92 if (n & 1) 93 *d++ = *s++; 94 95 return dest; 96} 97#else 98/* 99 * Use word copies but no loop unrolling as we cannot assume there 100 * will be benefits on the archirecture 101 */ 102void *memcpy(void *dest, __const void *src, __kernel_size_t n) 103{ 104 unsigned char *d, *s; 105 uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; 106 107 /* If both source and dest are word aligned copy words */ 108 if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { 109 for (; n >= 4; n -= 4) 110 *dest_w++ = *src_w++; 111 } 112 113 d = (unsigned char *)dest_w; 114 s = (unsigned char *)src_w; 115 116 /* For remaining or if not aligned, copy bytes */ 117 for (; n >= 1; n -= 1) 118 *d++ = *s++; 119 120 return dest; 121 122} 123#endif 124 125EXPORT_SYMBOL(memcpy);