copypage_power7.S (2834B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * 4 * Copyright (C) IBM Corporation, 2012 5 * 6 * Author: Anton Blanchard <anton@au.ibm.com> 7 */ 8#include <asm/page.h> 9#include <asm/ppc_asm.h> 10 11_GLOBAL(copypage_power7) 12 /* 13 * We prefetch both the source and destination using enhanced touch 14 * instructions. We use a stream ID of 0 for the load side and 15 * 1 for the store side. Since source and destination are page 16 * aligned we don't need to clear the bottom 7 bits of either 17 * address. 18 */ 19 ori r9,r3,1 /* stream=1 => to */ 20 21#ifdef CONFIG_PPC_64K_PAGES 22 lis r7,0x0E01 /* depth=7 23 * units/cachelines=512 */ 24#else 25 lis r7,0x0E00 /* depth=7 */ 26 ori r7,r7,0x1000 /* units/cachelines=32 */ 27#endif 28 ori r10,r7,1 /* stream=1 */ 29 30 lis r8,0x8000 /* GO=1 */ 31 clrldi r8,r8,32 32 33 /* setup read stream 0 */ 34 dcbt 0,r4,0b01000 /* addr from */ 35 dcbt 0,r7,0b01010 /* length and depth from */ 36 /* setup write stream 1 */ 37 dcbtst 0,r9,0b01000 /* addr to */ 38 dcbtst 0,r10,0b01010 /* length and depth to */ 39 eieio 40 dcbt 0,r8,0b01010 /* all streams GO */ 41 42#ifdef CONFIG_ALTIVEC 43 mflr r0 44 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 45 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 46 std r0,16(r1) 47 stdu r1,-STACKFRAMESIZE(r1) 48 bl enter_vmx_ops 49 cmpwi r3,0 50 ld r0,STACKFRAMESIZE+16(r1) 51 ld r3,STK_REG(R31)(r1) 52 ld r4,STK_REG(R30)(r1) 53 mtlr r0 54 55 li r0,(PAGE_SIZE/128) 56 mtctr r0 57 58 beq .Lnonvmx_copy 59 60 addi r1,r1,STACKFRAMESIZE 61 62 li r6,16 63 li r7,32 64 li r8,48 65 li r9,64 66 li r10,80 67 li r11,96 68 li r12,112 69 70 .align 5 711: lvx v7,0,r4 72 lvx v6,r4,r6 73 lvx v5,r4,r7 74 lvx v4,r4,r8 75 lvx v3,r4,r9 76 lvx v2,r4,r10 77 lvx v1,r4,r11 78 lvx v0,r4,r12 79 addi r4,r4,128 80 stvx v7,0,r3 81 stvx v6,r3,r6 82 stvx v5,r3,r7 83 stvx v4,r3,r8 84 stvx v3,r3,r9 85 stvx v2,r3,r10 86 stvx v1,r3,r11 87 stvx v0,r3,r12 88 addi r3,r3,128 89 bdnz 1b 90 91 b exit_vmx_ops /* tail call optimise */ 92 93#else 94 li r0,(PAGE_SIZE/128) 95 mtctr r0 96 97 stdu r1,-STACKFRAMESIZE(r1) 98#endif 99 100.Lnonvmx_copy: 101 std r14,STK_REG(R14)(r1) 102 std r15,STK_REG(R15)(r1) 103 std r16,STK_REG(R16)(r1) 104 std r17,STK_REG(R17)(r1) 105 std r18,STK_REG(R18)(r1) 106 std r19,STK_REG(R19)(r1) 107 std r20,STK_REG(R20)(r1) 108 1091: ld r0,0(r4) 110 ld r5,8(r4) 111 ld r6,16(r4) 112 ld r7,24(r4) 113 ld r8,32(r4) 114 ld r9,40(r4) 115 ld r10,48(r4) 116 ld r11,56(r4) 117 ld r12,64(r4) 118 ld r14,72(r4) 119 ld r15,80(r4) 120 ld r16,88(r4) 121 ld r17,96(r4) 122 ld r18,104(r4) 123 ld r19,112(r4) 124 ld r20,120(r4) 125 addi r4,r4,128 126 std r0,0(r3) 127 std r5,8(r3) 128 std r6,16(r3) 129 std r7,24(r3) 130 std r8,32(r3) 131 std r9,40(r3) 132 std r10,48(r3) 133 std r11,56(r3) 134 std r12,64(r3) 135 std r14,72(r3) 136 std r15,80(r3) 137 std r16,88(r3) 138 std r17,96(r3) 139 std r18,104(r3) 140 std r19,112(r3) 141 std r20,120(r3) 142 addi r3,r3,128 143 bdnz 1b 144 145 ld r14,STK_REG(R14)(r1) 146 ld r15,STK_REG(R15)(r1) 147 ld r16,STK_REG(R16)(r1) 148 ld r17,STK_REG(R17)(r1) 149 ld r18,STK_REG(R18)(r1) 150 ld r19,STK_REG(R19)(r1) 151 ld r20,STK_REG(R20)(r1) 152 addi r1,r1,STACKFRAMESIZE 153 blr