string_64.S (2807B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * 4 * Copyright (C) IBM Corporation, 2012 5 * 6 * Author: Anton Blanchard <anton@au.ibm.com> 7 */ 8 9#include <asm/ppc_asm.h> 10#include <asm/linkage.h> 11#include <asm/asm-offsets.h> 12#include <asm/export.h> 13 14 .section ".toc","aw" 15PPC64_CACHES: 16 .tc ppc64_caches[TC],ppc64_caches 17 .section ".text" 18 19/** 20 * __arch_clear_user: - Zero a block of memory in user space, with less checking. 21 * @to: Destination address, in user space. 22 * @n: Number of bytes to zero. 23 * 24 * Zero a block of memory in user space. Caller must check 25 * the specified block with access_ok() before calling this function. 26 * 27 * Returns number of bytes that could not be cleared. 28 * On success, this will be zero. 29 */ 30 31 .macro err1 32100: 33 EX_TABLE(100b,.Ldo_err1) 34 .endm 35 36 .macro err2 37200: 38 EX_TABLE(200b,.Ldo_err2) 39 .endm 40 41 .macro err3 42300: 43 EX_TABLE(300b,.Ldo_err3) 44 .endm 45 46.Ldo_err1: 47 mr r3,r8 48 49.Ldo_err2: 50 mtctr r4 511: 52err3; stb r0,0(r3) 53 addi r3,r3,1 54 addi r4,r4,-1 55 bdnz 1b 56 57.Ldo_err3: 58 mr r3,r4 59 blr 60 61_GLOBAL_TOC(__arch_clear_user) 62 cmpdi r4,32 63 neg r6,r3 64 li r0,0 65 blt .Lshort_clear 66 mr r8,r3 67 mtocrf 0x01,r6 68 clrldi r6,r6,(64-3) 69 70 /* Get the destination 8 byte aligned */ 71 bf cr7*4+3,1f 72err1; stb r0,0(r3) 73 addi r3,r3,1 74 751: bf cr7*4+2,2f 76err1; sth r0,0(r3) 77 addi r3,r3,2 78 792: bf cr7*4+1,3f 80err1; stw r0,0(r3) 81 addi r3,r3,4 82 833: sub r4,r4,r6 84 85 cmpdi r4,32 86 cmpdi cr1,r4,512 87 blt .Lshort_clear 88 bgt cr1,.Llong_clear 89 90.Lmedium_clear: 91 srdi r6,r4,5 92 mtctr r6 93 94 /* Do 32 byte chunks */ 954: 96err2; std r0,0(r3) 97err2; std r0,8(r3) 98err2; std r0,16(r3) 99err2; std r0,24(r3) 100 addi r3,r3,32 101 addi r4,r4,-32 102 bdnz 4b 103 104.Lshort_clear: 105 /* up to 31 bytes to go */ 106 cmpdi r4,16 107 blt 6f 108err2; std r0,0(r3) 109err2; std r0,8(r3) 110 addi r3,r3,16 111 addi r4,r4,-16 112 113 /* Up to 15 bytes to go */ 1146: mr r8,r3 115 clrldi r4,r4,(64-4) 116 mtocrf 0x01,r4 117 bf cr7*4+0,7f 118err1; std r0,0(r3) 119 addi r3,r3,8 120 1217: bf cr7*4+1,8f 122err1; stw r0,0(r3) 123 addi r3,r3,4 124 1258: bf cr7*4+2,9f 126err1; sth r0,0(r3) 127 addi r3,r3,2 128 1299: bf cr7*4+3,10f 130err1; stb r0,0(r3) 131 13210: li r3,0 133 blr 134 135.Llong_clear: 136 ld r5,PPC64_CACHES@toc(r2) 137 138 bf cr7*4+0,11f 139err2; std r0,0(r3) 140 addi r3,r3,8 141 addi r4,r4,-8 142 143 /* Destination is 16 byte aligned, need to get it cache block aligned */ 14411: lwz r7,DCACHEL1LOGBLOCKSIZE(r5) 145 lwz r9,DCACHEL1BLOCKSIZE(r5) 146 147 /* 148 * With worst case alignment the long clear loop takes a minimum 149 * of 1 byte less than 2 cachelines. 150 */ 151 sldi r10,r9,2 152 cmpd r4,r10 153 blt .Lmedium_clear 154 155 neg r6,r3 156 addi r10,r9,-1 157 and. r5,r6,r10 158 beq 13f 159 160 srdi r6,r5,4 161 mtctr r6 162 mr r8,r3 16312: 164err1; std r0,0(r3) 165err1; std r0,8(r3) 166 addi r3,r3,16 167 bdnz 12b 168 169 sub r4,r4,r5 170 17113: srd r6,r4,r7 172 mtctr r6 173 mr r8,r3 17414: 175err1; dcbz 0,r3 176 add r3,r3,r9 177 bdnz 14b 178 179 and r4,r4,r10 180 181 cmpdi r4,32 182 blt .Lshort_clear 183 b .Lmedium_clear 184EXPORT_SYMBOL(__arch_clear_user)