1/* memcpy - copy a block from source to destination. 31/64 bit S/390 version.
2 Copyright (C) 2012-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22#include <ifunc-memcpy.h>
23
24/* INPUT PARAMETERS
25 %r2 = address of destination memory area
26 %r3 = address of source memory area
27 %r4 = number of bytes to copy. */
28
29 .text
30
31#if defined __s390x__
32# define LTGR ltgr
33# define CGHI cghi
34# define LGR lgr
35# define AGHI aghi
36# define BRCTG brctg
37#else
38# define LTGR ltr
39# define CGHI chi
40# define LGR lr
41# define AGHI ahi
42# define BRCTG brct
43#endif /* ! defined __s390x__ */
44
45#if HAVE_MEMCPY_Z900_G5
46ENTRY(MEMPCPY_Z900_G5)
47# if defined __s390x__
48 .machine "z900"
49# else
50 .machine "g5"
51# endif /* ! defined __s390x__ */
52 LGR %r1,%r2 # Use as dest
53 la %r2,0(%r4,%r2) # Return dest + n
54 j .L_Z900_G5_start
55END(MEMPCPY_Z900_G5)
56
57ENTRY(MEMCPY_Z900_G5)
58# if defined __s390x__
59 .machine "z900"
60# else
61 .machine "g5"
62# endif /* ! defined __s390x__ */
63 LGR %r1,%r2 # r1: Use as dest ; r2: Return dest
64.L_Z900_G5_start:
65 LTGR %r4,%r4
66 je .L_Z900_G5_4
67 AGHI %r4,-1
68# if defined __s390x__
69 srlg %r5,%r4,8
70# else
71 lr %r5,%r4
72 srl %r5,8
73# endif /* ! defined __s390x__ */
74 LTGR %r5,%r5
75 jne .L_Z900_G5_13
76.L_Z900_G5_3:
77# if defined __s390x__
78 larl %r5,.L_Z900_G5_15
79# define Z900_G5_EX_D 0
80# else
81 basr %r5,0
82.L_Z900_G5_14:
83# define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14
84# endif /* ! defined __s390x__ */
85 ex %r4,Z900_G5_EX_D(%r5)
86.L_Z900_G5_4:
87 br %r14
88.L_Z900_G5_13:
89 CGHI %r5,4096 # Switch to mvcle for copies >1MB
90 jh __memcpy_mvcle
91.L_Z900_G5_12:
92 mvc 0(256,%r1),0(%r3)
93 la %r1,256(%r1)
94 la %r3,256(%r3)
95 BRCTG %r5,.L_Z900_G5_12
96 j .L_Z900_G5_3
97.L_Z900_G5_15:
98 mvc 0(1,%r1),0(%r3)
99END(MEMCPY_Z900_G5)
100#endif /* HAVE_MEMCPY_Z900_G5 */
101
102ENTRY(__memcpy_mvcle)
103 # Using as standalone function will result in unexpected
104 # results since the length field is incremented by 1 in order to
105 # compensate the changes already done in the functions above.
106 LGR %r0,%r2 # backup return dest [ + n ]
107 AGHI %r4,1 # length + 1
108 LGR %r5,%r4 # source length
109 LGR %r4,%r3 # source address
110 LGR %r2,%r1 # destination address
111 LGR %r3,%r5 # destination length = source length
112.L_MVCLE_1:
113 mvcle %r2,%r4,0 # thats it, MVCLE is your friend
114 jo .L_MVCLE_1
115 LGR %r2,%r0 # return destination address
116 br %r14
117END(__memcpy_mvcle)
118
119#undef LTGR
120#undef CGHI
121#undef LGR
122#undef AGHI
123#undef BRCTG
124
125#if HAVE_MEMCPY_Z10
126ENTRY(MEMPCPY_Z10)
127 .machine "z10"
128 .machinemode "zarch_nohighgprs"
129 lgr %r1,%r2 # Use as dest
130 la %r2,0(%r4,%r2) # Return dest + n
131 j .L_Z10_start
132END(MEMPCPY_Z10)
133
134ENTRY(MEMCPY_Z10)
135 .machine "z10"
136 .machinemode "zarch_nohighgprs"
137 lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
138.L_Z10_start:
139# if !defined __s390x__
140 llgfr %r4,%r4
141# endif /* !defined __s390x__ */
142 cgije %r4,0,.L_Z10_4
143 aghi %r4,-1
144 srlg %r5,%r4,8
145 cgijlh %r5,0,.L_Z10_13
146.L_Z10_3:
147 exrl %r4,.L_Z10_15
148.L_Z10_4:
149 br %r14
150.L_Z10_13:
151 cgfi %r5,65535 # Switch to mvcle for copies >16MB
152 jh __memcpy_mvcle
153.L_Z10_12:
154 pfd 1,768(%r3)
155 pfd 2,768(%r1)
156 mvc 0(256,%r1),0(%r3)
157 la %r1,256(%r1)
158 la %r3,256(%r3)
159 brctg %r5,.L_Z10_12
160 j .L_Z10_3
161.L_Z10_15:
162 mvc 0(1,%r1),0(%r3)
163END(MEMCPY_Z10)
164#endif /* HAVE_MEMCPY_Z10 */
165
166#if HAVE_MEMCPY_Z196
167ENTRY(MEMPCPY_Z196)
168 .machine "z196"
169 .machinemode "zarch_nohighgprs"
170 lgr %r1,%r2 # Use as dest
171 la %r2,0(%r4,%r2) # Return dest + n
172 j .L_Z196_start
173END(MEMPCPY_Z196)
174
175ENTRY(MEMCPY_Z196)
176 .machine "z196"
177 .machinemode "zarch_nohighgprs"
178 lgr %r1,%r2 # r1: Use as dest ; r2: Return dest
179.L_Z196_start:
180# if !defined __s390x__
181 llgfr %r4,%r4
182# endif /* !defined __s390x__ */
183 ltgr %r4,%r4
184 je .L_Z196_4
185.L_Z196_start2:
186 aghi %r4,-1
187 risbg %r5,%r4,8,128+63,56 # r0 = r5 / 256
188 jne .L_Z196_5
189.L_Z196_3:
190 exrl %r4,.L_Z196_14
191.L_Z196_4:
192 br %r14
193.L_Z196_5:
194 cgfi %r5,255 # Switch to loop with pfd for copies >=64kB
195 jh .L_Z196_6
196.L_Z196_2:
197 mvc 0(256,%r1),0(%r3)
198 aghi %r5,-1
199 la %r1,256(%r1)
200 la %r3,256(%r3)
201 jne .L_Z196_2
202 j .L_Z196_3
203.L_Z196_6:
204 cgfi %r5,262144 # Switch to mvcle for copies >64MB
205 jh __memcpy_mvcle
206.L_Z196_7:
207 pfd 1,1024(%r3)
208 pfd 2,1024(%r1)
209 mvc 0(256,%r1),0(%r3)
210 aghi %r5,-1
211 la %r1,256(%r1)
212 la %r3,256(%r3)
213 jne .L_Z196_7
214 j .L_Z196_3
215.L_Z196_14:
216 mvc 0(1,%r1),0(%r3)
217END(MEMCPY_Z196)
218#endif /* HAVE_MEMCPY_Z196 */
219
220#if HAVE_MEMMOVE_Z13
221ENTRY(MEMMOVE_Z13)
222 .machine "z13"
223 .machinemode "zarch_nohighgprs"
224# if !defined __s390x__
225 /* Note: The 31bit dst and src pointers are prefixed with zeroes. */
226 llgfr %r4,%r4
227 llgfr %r3,%r3
228 llgfr %r2,%r2
229# endif /* !defined __s390x__ */
230 sgrk %r0,%r2,%r3
231 clgijh %r4,16,.L_MEMMOVE_Z13_LARGE
232 aghik %r5,%r4,-1
233.L_MEMMOVE_Z13_SMALL:
234 jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */
235 /* Store up to 16 bytes with vll/vstl which needs the index
236 instead of lengths. */
237 vll %v16,%r5,0(%r3)
238 vstl %v16,%r5,0(%r2)
239.L_MEMMOVE_Z13_END:
240 br %r14
241.L_MEMMOVE_Z13_LARGE:
242 lgr %r1,%r2 /* For memcpy: r1: Use as dest ;
243 r2: Return dest */
244 /* The unsigned comparison (dst - src >= len) determines if we can
245 execute the forward case with memcpy. */
246#if ! HAVE_MEMCPY_Z196
247# error The z13 variant of memmove needs the z196 variant of memcpy!
248#endif
249 clgrjhe %r0,%r4,.L_Z196_start2
250 risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */
251 aghi %r4,-16
252 clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B
253.L_MEMMOVE_Z13_LARGE_16B_LOOP:
254 /* Store at least 16 bytes with vl/vst. The number of 16byte blocks
255 is stored in r5. */
256 vl %v16,0(%r4,%r3)
257 vst %v16,0(%r4,%r2)
258 aghi %r4,-16
259 brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP
260 aghik %r5,%r4,15
261 j .L_MEMMOVE_Z13_SMALL
262.L_MEMMOVE_Z13_LARGE_64B:
263 /* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks
264 will be stored in r0. */
265 aghi %r4,-48
266 srlg %r0,%r5,2 /* r5 = %r0 / 4
267 => Number of 64byte blocks. */
268.L_MEMMOVE_Z13_LARGE_64B_LOOP:
269 vl %v20,48(%r4,%r3)
270 vl %v19,32(%r4,%r3)
271 vl %v18,16(%r4,%r3)
272 vl %v17,0(%r4,%r3)
273 vst %v20,48(%r4,%r2)
274 vst %v19,32(%r4,%r2)
275 vst %v18,16(%r4,%r2)
276 vst %v17,0(%r4,%r2)
277 aghi %r4,-64
278 brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP
279 aghi %r4,48
280 /* Recalculate the number of 16byte blocks. */
281 risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3
282 => Remaining 16byte blocks. */
283 jne .L_MEMMOVE_Z13_LARGE_16B_LOOP
284 aghik %r5,%r4,15
285 j .L_MEMMOVE_Z13_SMALL
286END(MEMMOVE_Z13)
287#endif /* HAVE_MEMMOVE_Z13 */
288
289#if HAVE_MEMMOVE_ARCH13
290ENTRY(MEMMOVE_ARCH13)
291 .machine "arch13"
292 .machinemode "zarch_nohighgprs"
293# if ! defined __s390x__
294 /* Note: The 31bit dst and src pointers are prefixed with zeroes. */
295 llgfr %r4,%r4
296 llgfr %r3,%r3
297 llgfr %r2,%r2
298# endif /* ! defined __s390x__ */
299 sgrk %r5,%r2,%r3
300 aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */
301 clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE
302.L_MEMMOVE_ARCH13_SMALL:
303 jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */
304 /* Store up to 16 bytes with vll/vstl (needs highest index). */
305 vll %v16,%r0,0(%r3)
306 vstl %v16,%r0,0(%r2)
307.L_MEMMOVE_ARCH13_END:
308 br %r14
309.L_MEMMOVE_ARCH13_LARGE:
310 lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */
311 /* The unsigned comparison (dst - src >= len) determines if we can
312 execute the forward case with memcpy. */
313#if ! HAVE_MEMCPY_Z196
314# error The arch13 variant of memmove needs the z196 variant of memcpy!
315#endif
316 /* Backward case. */
317 clgrjhe %r5,%r4,.L_Z196_start2
318 clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B
319 /* Move up to 256bytes with mvcrl (move right to left). */
320 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
321 br %r14
322.L_MEMMOVE_ARCH13_LARGER_256B:
323 /* First move the "remaining" block of up to 256 bytes at the end of
324 src/dst buffers. Then move blocks of 256bytes in a loop starting
325 with the block at the end.
326 (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers
327 passed to mvcrl instructions are aligned, too) */
328 risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */
329 risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */
330 slgr %r4,%r0
331 lay %r1,-1(%r4,%r1)
332 lay %r3,-1(%r4,%r3)
333 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
334 lghi %r0,255 /* Always copy 256 bytes in the loop below! */
335.L_MEMMOVE_ARCH13_LARGE_256B_LOOP:
336 aghi %r1,-256
337 aghi %r3,-256
338 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
339 brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP
340 br %r14
341END(MEMMOVE_ARCH13)
342#endif /* HAVE_MEMMOVE_ARCH13 */
343
344#if ! HAVE_MEMCPY_IFUNC
345/* If we don't use ifunc, define an alias for mem[p]cpy here.
346 Otherwise see sysdeps/s390/mem[p]cpy.c. */
347strong_alias (MEMCPY_DEFAULT, memcpy)
348strong_alias (MEMPCPY_DEFAULT, __mempcpy)
349weak_alias (__mempcpy, mempcpy)
350#endif
351
352#if ! HAVE_MEMMOVE_IFUNC
353/* If we don't use ifunc, define an alias for memmove here.
354 Otherwise see sysdeps/s390/memmove.c. */
355# if ! HAVE_MEMMOVE_C
356/* If the c variant is needed, then sysdeps/s390/memmove-c.c
357 defines memmove.
358 Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
359strong_alias (MEMMOVE_DEFAULT, memmove)
360# endif
361#endif
362
363#if defined SHARED && IS_IN (libc)
364/* Defines the internal symbols.
365 Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */
366strong_alias (MEMCPY_DEFAULT, __GI_memcpy)
367strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy)
368strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy)
369# if ! HAVE_MEMMOVE_C
370/* If the c variant is needed, then sysdeps/s390/memmove-c.c
371 defines the internal symbol.
372 Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */
373strong_alias (MEMMOVE_DEFAULT, __GI_memmove)
374# endif
375#endif
376

source code of glibc/sysdeps/s390/memcpy-z900.S