1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/page.h>
9#include <asm/ppc_asm.h>
10
11_GLOBAL(copypage_power7)
12 /*
13 * We prefetch both the source and destination using enhanced touch
14 * instructions. We use a stream ID of 0 for the load side and
15 * 1 for the store side. Since source and destination are page
16 * aligned we don't need to clear the bottom 7 bits of either
17 * address.
18 */
19 ori r9,r3,1 /* stream=1 => to */
20
21#ifdef CONFIG_PPC_64K_PAGES
22 lis r7,0x0E01 /* depth=7
23 * units/cachelines=512 */
24#else
25 lis r7,0x0E00 /* depth=7 */
26 ori r7,r7,0x1000 /* units/cachelines=32 */
27#endif
28 ori r10,r7,1 /* stream=1 */
29
30 DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
31
32#ifdef CONFIG_ALTIVEC
33 mflr r0
34 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
35 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
36 std r0,16(r1)
37 stdu r1,-STACKFRAMESIZE(r1)
38 bl CFUNC(enter_vmx_ops)
39 cmpwi r3,0
40 ld r0,STACKFRAMESIZE+16(r1)
41 ld r3,STK_REG(R31)(r1)
42 ld r4,STK_REG(R30)(r1)
43 mtlr r0
44
45 li r0,(PAGE_SIZE/128)
46 mtctr r0
47
48 beq .Lnonvmx_copy
49
50 addi r1,r1,STACKFRAMESIZE
51
52 li r6,16
53 li r7,32
54 li r8,48
55 li r9,64
56 li r10,80
57 li r11,96
58 li r12,112
59
60 .align 5
611: lvx v7,0,r4
62 lvx v6,r4,r6
63 lvx v5,r4,r7
64 lvx v4,r4,r8
65 lvx v3,r4,r9
66 lvx v2,r4,r10
67 lvx v1,r4,r11
68 lvx v0,r4,r12
69 addi r4,r4,128
70 stvx v7,0,r3
71 stvx v6,r3,r6
72 stvx v5,r3,r7
73 stvx v4,r3,r8
74 stvx v3,r3,r9
75 stvx v2,r3,r10
76 stvx v1,r3,r11
77 stvx v0,r3,r12
78 addi r3,r3,128
79 bdnz 1b
80
81 b CFUNC(exit_vmx_ops) /* tail call optimise */
82
83#else
84 li r0,(PAGE_SIZE/128)
85 mtctr r0
86
87 stdu r1,-STACKFRAMESIZE(r1)
88#endif
89
90.Lnonvmx_copy:
91 std r14,STK_REG(R14)(r1)
92 std r15,STK_REG(R15)(r1)
93 std r16,STK_REG(R16)(r1)
94 std r17,STK_REG(R17)(r1)
95 std r18,STK_REG(R18)(r1)
96 std r19,STK_REG(R19)(r1)
97 std r20,STK_REG(R20)(r1)
98
991: ld r0,0(r4)
100 ld r5,8(r4)
101 ld r6,16(r4)
102 ld r7,24(r4)
103 ld r8,32(r4)
104 ld r9,40(r4)
105 ld r10,48(r4)
106 ld r11,56(r4)
107 ld r12,64(r4)
108 ld r14,72(r4)
109 ld r15,80(r4)
110 ld r16,88(r4)
111 ld r17,96(r4)
112 ld r18,104(r4)
113 ld r19,112(r4)
114 ld r20,120(r4)
115 addi r4,r4,128
116 std r0,0(r3)
117 std r5,8(r3)
118 std r6,16(r3)
119 std r7,24(r3)
120 std r8,32(r3)
121 std r9,40(r3)
122 std r10,48(r3)
123 std r11,56(r3)
124 std r12,64(r3)
125 std r14,72(r3)
126 std r15,80(r3)
127 std r16,88(r3)
128 std r17,96(r3)
129 std r18,104(r3)
130 std r19,112(r3)
131 std r20,120(r3)
132 addi r3,r3,128
133 bdnz 1b
134
135 ld r14,STK_REG(R14)(r1)
136 ld r15,STK_REG(R15)(r1)
137 ld r16,STK_REG(R16)(r1)
138 ld r17,STK_REG(R17)(r1)
139 ld r18,STK_REG(R18)(r1)
140 ld r19,STK_REG(R19)(r1)
141 ld r20,STK_REG(R20)(r1)
142 addi r1,r1,STACKFRAMESIZE
143 blr
144

source code of linux/arch/powerpc/lib/copypage_power7.S