memcpy-archs.S (4405B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 4 */ 5 6#include <linux/linkage.h> 7 8#ifdef __LITTLE_ENDIAN__ 9# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << 10# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> 11# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM 12# define MERGE_2(RX,RY,IMM) 13# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF 14# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM 15#else 16# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> 17# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << 18# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << 19# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << 20# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM 21# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 22#endif 23 24#ifdef CONFIG_ARC_HAS_LL64 25# define LOADX(DST,RX) ldd.ab DST, [RX, 8] 26# define STOREX(SRC,RX) std.ab SRC, [RX, 8] 27# define ZOLSHFT 5 28# define ZOLAND 0x1F 29#else 30# define LOADX(DST,RX) ld.ab DST, [RX, 4] 31# define STOREX(SRC,RX) st.ab SRC, [RX, 4] 32# define ZOLSHFT 4 33# define ZOLAND 0xF 34#endif 35 36ENTRY_CFI(memcpy) 37 mov.f 0, r2 38;;; if size is zero 39 jz.d [blink] 40 mov r3, r0 ; don;t clobber ret val 41 42;;; if size <= 8 43 cmp r2, 8 44 bls.d @.Lsmallchunk 45 mov.f lp_count, r2 46 47 and.f r4, r0, 0x03 48 rsub lp_count, r4, 4 49 lpnz @.Laligndestination 50 ;; LOOP BEGIN 51 ldb.ab r5, [r1,1] 52 sub r2, r2, 1 53 stb.ab r5, [r3,1] 54.Laligndestination: 55 56;;; Check the alignment of the source 57 and.f r4, r1, 0x03 58 bnz.d @.Lsourceunaligned 59 60;;; CASE 0: Both source and destination are 32bit aligned 61;;; Convert len to Dwords, unfold x4 62 lsr.f lp_count, r2, ZOLSHFT 63 lpnz @.Lcopy32_64bytes 64 ;; LOOP START 65 LOADX (r6, r1) 66 LOADX (r8, r1) 67 LOADX (r10, r1) 68 LOADX (r4, r1) 69 STOREX (r6, r3) 70 STOREX (r8, r3) 71 STOREX (r10, r3) 72 STOREX (r4, r3) 73.Lcopy32_64bytes: 74 75 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes 76.Lsmallchunk: 77 lpnz @.Lcopyremainingbytes 78 ;; LOOP START 79 ldb.ab r5, [r1,1] 80 stb.ab r5, [r3,1] 81.Lcopyremainingbytes: 82 83 j [blink] 84;;; END CASE 0 85 86.Lsourceunaligned: 87 cmp r4, 2 88 beq.d @.LunalignedOffby2 89 sub r2, r2, 1 90 91 bhi.d @.LunalignedOffby3 92 ldb.ab r5, [r1, 1] 93 94;;; CASE 1: The source is unaligned, off by 1 95 ;; Hence I need to read 1 byte for a 16bit alignment 96 ;; and 2bytes to reach 32bit alignment 97 ldh.ab r6, [r1, 2] 98 sub r2, r2, 2 99 ;; Convert to words, unfold x2 100 lsr.f lp_count, r2, 3 101 MERGE_1 (r6, r6, 8) 102 MERGE_2 (r5, r5, 24) 103 or r5, r5, r6 104 105 ;; Both src and dst are aligned 106 lpnz @.Lcopy8bytes_1 107 ;; LOOP START 108 ld.ab r6, [r1, 4] 109 ld.ab r8, [r1,4] 110 111 SHIFT_1 (r7, r6, 24) 112 or r7, r7, r5 113 SHIFT_2 (r5, r6, 8) 114 115 SHIFT_1 (r9, r8, 24) 116 or r9, r9, r5 117 SHIFT_2 (r5, r8, 8) 118 119 st.ab r7, [r3, 4] 120 st.ab r9, [r3, 4] 121.Lcopy8bytes_1: 122 123 ;; Write back the remaining 16bits 124 EXTRACT_1 (r6, r5, 16) 125 sth.ab r6, [r3, 2] 126 ;; Write back the remaining 8bits 127 EXTRACT_2 (r5, r5, 16) 128 stb.ab r5, [r3, 1] 129 130 and.f lp_count, r2, 0x07 ;Last 8bytes 131 lpnz @.Lcopybytewise_1 132 ;; LOOP START 133 ldb.ab r6, [r1,1] 134 stb.ab r6, [r3,1] 135.Lcopybytewise_1: 136 j [blink] 137 138.LunalignedOffby2: 139;;; CASE 2: The source is unaligned, off by 2 140 ldh.ab r5, [r1, 2] 141 sub r2, r2, 1 142 143 ;; Both src and dst are aligned 144 ;; Convert to words, unfold x2 145 lsr.f lp_count, r2, 3 146#ifdef __BIG_ENDIAN__ 147 asl.nz r5, r5, 16 148#endif 149 lpnz @.Lcopy8bytes_2 150 ;; LOOP START 151 ld.ab r6, [r1, 4] 152 ld.ab r8, [r1,4] 153 154 SHIFT_1 (r7, r6, 16) 155 or r7, r7, r5 156 SHIFT_2 (r5, r6, 16) 157 158 SHIFT_1 (r9, r8, 16) 159 or r9, r9, r5 160 SHIFT_2 (r5, r8, 16) 161 162 st.ab r7, [r3, 4] 163 st.ab r9, [r3, 4] 164.Lcopy8bytes_2: 165 166#ifdef __BIG_ENDIAN__ 167 lsr.nz r5, r5, 16 168#endif 169 sth.ab r5, [r3, 2] 170 171 and.f lp_count, r2, 0x07 ;Last 8bytes 172 lpnz @.Lcopybytewise_2 173 ;; LOOP START 174 ldb.ab r6, [r1,1] 175 stb.ab r6, [r3,1] 176.Lcopybytewise_2: 177 j [blink] 178 179.LunalignedOffby3: 180;;; CASE 3: The source is unaligned, off by 3 181;;; Hence, I need to read 1byte for achieve the 32bit alignment 182 183 ;; Both src and dst are aligned 184 ;; Convert to words, unfold x2 185 lsr.f lp_count, r2, 3 186#ifdef __BIG_ENDIAN__ 187 asl.ne r5, r5, 24 188#endif 189 lpnz @.Lcopy8bytes_3 190 ;; LOOP START 191 ld.ab r6, [r1, 4] 192 ld.ab r8, [r1,4] 193 194 SHIFT_1 (r7, r6, 8) 195 or r7, r7, r5 196 SHIFT_2 (r5, r6, 24) 197 198 SHIFT_1 (r9, r8, 8) 199 or r9, r9, r5 200 SHIFT_2 (r5, r8, 24) 201 202 st.ab r7, [r3, 4] 203 st.ab r9, [r3, 4] 204.Lcopy8bytes_3: 205 206#ifdef __BIG_ENDIAN__ 207 lsr.nz r5, r5, 24 208#endif 209 stb.ab r5, [r3, 1] 210 211 and.f lp_count, r2, 0x07 ;Last 8bytes 212 lpnz @.Lcopybytewise_3 213 ;; LOOP START 214 ldb.ab r6, [r1,1] 215 stb.ab r6, [r3,1] 216.Lcopybytewise_3: 217 j [blink] 218 219END_CFI(memcpy)