copy_mc_64.S (4168B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) IBM Corporation, 2011 4 * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> 5 * Author - Balbir Singh <bsingharora@gmail.com> 6 */ 7#include <asm/ppc_asm.h> 8#include <asm/errno.h> 9#include <asm/export.h> 10 11 .macro err1 12100: 13 EX_TABLE(100b,.Ldo_err1) 14 .endm 15 16 .macro err2 17200: 18 EX_TABLE(200b,.Ldo_err2) 19 .endm 20 21 .macro err3 22300: EX_TABLE(300b,.Ldone) 23 .endm 24 25.Ldo_err2: 26 ld r22,STK_REG(R22)(r1) 27 ld r21,STK_REG(R21)(r1) 28 ld r20,STK_REG(R20)(r1) 29 ld r19,STK_REG(R19)(r1) 30 ld r18,STK_REG(R18)(r1) 31 ld r17,STK_REG(R17)(r1) 32 ld r16,STK_REG(R16)(r1) 33 ld r15,STK_REG(R15)(r1) 34 ld r14,STK_REG(R14)(r1) 35 addi r1,r1,STACKFRAMESIZE 36.Ldo_err1: 37 /* Do a byte by byte copy to get the exact remaining size */ 38 mtctr r7 3946: 40err3; lbz r0,0(r4) 41 addi r4,r4,1 42err3; stb r0,0(r3) 43 addi r3,r3,1 44 bdnz 46b 45 li r3,0 46 blr 47 48.Ldone: 49 mfctr r3 50 blr 51 52 53_GLOBAL(copy_mc_generic) 54 mr r7,r5 55 cmpldi r5,16 56 blt .Lshort_copy 57 58.Lcopy: 59 /* Get the source 8B aligned */ 60 neg r6,r4 61 mtocrf 0x01,r6 62 clrldi r6,r6,(64-3) 63 64 bf cr7*4+3,1f 65err1; lbz r0,0(r4) 66 addi r4,r4,1 67err1; stb r0,0(r3) 68 addi r3,r3,1 69 subi r7,r7,1 70 711: bf cr7*4+2,2f 72err1; lhz r0,0(r4) 73 addi r4,r4,2 74err1; sth r0,0(r3) 75 addi r3,r3,2 76 subi r7,r7,2 77 782: bf cr7*4+1,3f 79err1; lwz r0,0(r4) 80 addi r4,r4,4 81err1; stw r0,0(r3) 82 addi r3,r3,4 83 subi r7,r7,4 84 853: sub r5,r5,r6 86 cmpldi r5,128 87 88 mflr r0 89 stdu r1,-STACKFRAMESIZE(r1) 90 std r14,STK_REG(R14)(r1) 91 std r15,STK_REG(R15)(r1) 92 std r16,STK_REG(R16)(r1) 93 std r17,STK_REG(R17)(r1) 94 std r18,STK_REG(R18)(r1) 95 std r19,STK_REG(R19)(r1) 96 std r20,STK_REG(R20)(r1) 97 std r21,STK_REG(R21)(r1) 98 std r22,STK_REG(R22)(r1) 99 std r0,STACKFRAMESIZE+16(r1) 100 101 blt 5f 102 srdi r6,r5,7 103 mtctr r6 104 105 /* Now do cacheline (128B) sized loads and stores. */ 106 .align 5 1074: 108err2; ld r0,0(r4) 109err2; ld r6,8(r4) 110err2; ld r8,16(r4) 111err2; ld r9,24(r4) 112err2; ld r10,32(r4) 113err2; ld r11,40(r4) 114err2; ld r12,48(r4) 115err2; ld r14,56(r4) 116err2; ld r15,64(r4) 117err2; ld r16,72(r4) 118err2; ld r17,80(r4) 119err2; ld r18,88(r4) 120err2; ld r19,96(r4) 121err2; ld r20,104(r4) 122err2; ld r21,112(r4) 123err2; ld r22,120(r4) 124 addi r4,r4,128 125err2; std r0,0(r3) 126err2; std r6,8(r3) 127err2; std r8,16(r3) 128err2; std r9,24(r3) 129err2; std r10,32(r3) 130err2; std r11,40(r3) 131err2; std r12,48(r3) 132err2; std r14,56(r3) 133err2; std r15,64(r3) 134err2; std r16,72(r3) 135err2; std r17,80(r3) 136err2; std r18,88(r3) 137err2; std r19,96(r3) 138err2; std r20,104(r3) 139err2; std r21,112(r3) 140err2; std r22,120(r3) 141 addi r3,r3,128 142 subi r7,r7,128 143 bdnz 4b 144 145 clrldi r5,r5,(64-7) 146 147 /* Up to 127B to go */ 1485: srdi r6,r5,4 149 mtocrf 0x01,r6 150 1516: bf cr7*4+1,7f 152err2; ld r0,0(r4) 153err2; ld r6,8(r4) 154err2; ld r8,16(r4) 155err2; ld r9,24(r4) 156err2; ld r10,32(r4) 157err2; ld r11,40(r4) 158err2; ld r12,48(r4) 159err2; ld r14,56(r4) 160 addi r4,r4,64 161err2; std r0,0(r3) 162err2; std r6,8(r3) 163err2; std r8,16(r3) 164err2; std r9,24(r3) 165err2; std r10,32(r3) 166err2; std r11,40(r3) 167err2; std r12,48(r3) 168err2; std r14,56(r3) 169 addi r3,r3,64 170 subi r7,r7,64 171 1727: ld r14,STK_REG(R14)(r1) 173 ld r15,STK_REG(R15)(r1) 174 ld r16,STK_REG(R16)(r1) 175 ld r17,STK_REG(R17)(r1) 176 ld r18,STK_REG(R18)(r1) 177 ld r19,STK_REG(R19)(r1) 178 ld r20,STK_REG(R20)(r1) 179 ld r21,STK_REG(R21)(r1) 180 ld r22,STK_REG(R22)(r1) 181 addi r1,r1,STACKFRAMESIZE 182 183 /* Up to 63B to go */ 184 bf cr7*4+2,8f 185err1; ld r0,0(r4) 186err1; ld r6,8(r4) 187err1; ld r8,16(r4) 188err1; ld r9,24(r4) 189 addi r4,r4,32 190err1; std r0,0(r3) 191err1; std r6,8(r3) 192err1; std r8,16(r3) 193err1; std r9,24(r3) 194 addi r3,r3,32 195 subi r7,r7,32 196 197 /* Up to 31B to go */ 1988: bf cr7*4+3,9f 199err1; ld r0,0(r4) 200err1; ld r6,8(r4) 201 addi r4,r4,16 202err1; std r0,0(r3) 203err1; std r6,8(r3) 204 addi r3,r3,16 205 subi r7,r7,16 206 2079: clrldi r5,r5,(64-4) 208 209 /* Up to 15B to go */ 210.Lshort_copy: 211 mtocrf 0x01,r5 212 bf cr7*4+0,12f 213err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ 214err1; lwz r6,4(r4) 215 addi r4,r4,8 216err1; stw r0,0(r3) 217err1; stw r6,4(r3) 218 addi r3,r3,8 219 subi r7,r7,8 220 22112: bf cr7*4+1,13f 222err1; lwz r0,0(r4) 223 addi r4,r4,4 224err1; stw r0,0(r3) 225 addi r3,r3,4 226 subi r7,r7,4 227 22813: bf cr7*4+2,14f 229err1; lhz r0,0(r4) 230 addi r4,r4,2 231err1; sth r0,0(r3) 232 addi r3,r3,2 233 subi r7,r7,2 234 23514: bf cr7*4+3,15f 236err1; lbz r0,0(r4) 237err1; stb r0,0(r3) 238 23915: li r3,0 240 blr 241 242EXPORT_SYMBOL_GPL(copy_mc_generic);