1 | /* memcpy - copy a block from source to destination. 31/64 bit S/390 version. |
2 | Copyright (C) 2012-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | |
20 | #include <sysdep.h> |
21 | #include "asm-syntax.h" |
22 | #include <ifunc-memcpy.h> |
23 | |
24 | /* INPUT PARAMETERS |
25 | %r2 = address of destination memory area |
26 | %r3 = address of source memory area |
27 | %r4 = number of bytes to copy. */ |
28 | |
29 | .text |
30 | |
31 | #if defined __s390x__ |
32 | # define LTGR ltgr |
33 | # define CGHI cghi |
34 | # define LGR lgr |
35 | # define AGHI aghi |
36 | # define BRCTG brctg |
37 | #else |
38 | # define LTGR ltr |
39 | # define CGHI chi |
40 | # define LGR lr |
41 | # define AGHI ahi |
42 | # define BRCTG brct |
43 | #endif /* ! defined __s390x__ */ |
44 | |
45 | #if HAVE_MEMCPY_Z900_G5 |
46 | ENTRY(MEMPCPY_Z900_G5) |
47 | # if defined __s390x__ |
48 | .machine "z900" |
49 | # else |
50 | .machine "g5" |
51 | # endif /* ! defined __s390x__ */ |
52 | LGR %r1,%r2 # Use as dest |
53 | la %r2,0(%r4,%r2) # Return dest + n |
54 | j .L_Z900_G5_start |
55 | END(MEMPCPY_Z900_G5) |
56 | |
57 | ENTRY(MEMCPY_Z900_G5) |
58 | # if defined __s390x__ |
59 | .machine "z900" |
60 | # else |
61 | .machine "g5" |
62 | # endif /* ! defined __s390x__ */ |
63 | LGR %r1,%r2 # r1: Use as dest ; r2: Return dest |
64 | .L_Z900_G5_start: |
65 | LTGR %r4,%r4 |
66 | je .L_Z900_G5_4 |
67 | AGHI %r4,-1 |
68 | # if defined __s390x__ |
69 | srlg %r5,%r4,8 |
70 | # else |
71 | lr %r5,%r4 |
72 | srl %r5,8 |
73 | # endif /* ! defined __s390x__ */ |
74 | LTGR %r5,%r5 |
75 | jne .L_Z900_G5_13 |
76 | .L_Z900_G5_3: |
77 | # if defined __s390x__ |
78 | larl %r5,.L_Z900_G5_15 |
79 | # define Z900_G5_EX_D 0 |
80 | # else |
81 | basr %r5,0 |
82 | .L_Z900_G5_14: |
83 | # define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14 |
84 | # endif /* ! defined __s390x__ */ |
85 | ex %r4,Z900_G5_EX_D(%r5) |
86 | .L_Z900_G5_4: |
87 | br %r14 |
88 | .L_Z900_G5_13: |
89 | CGHI %r5,4096 # Switch to mvcle for copies >1MB |
90 | jh __memcpy_mvcle |
91 | .L_Z900_G5_12: |
92 | mvc 0(256,%r1),0(%r3) |
93 | la %r1,256(%r1) |
94 | la %r3,256(%r3) |
95 | BRCTG %r5,.L_Z900_G5_12 |
96 | j .L_Z900_G5_3 |
97 | .L_Z900_G5_15: |
98 | mvc 0(1,%r1),0(%r3) |
99 | END(MEMCPY_Z900_G5) |
100 | #endif /* HAVE_MEMCPY_Z900_G5 */ |
101 | |
102 | ENTRY(__memcpy_mvcle) |
103 | # Using as standalone function will result in unexpected |
104 | # results since the length field is incremented by 1 in order to |
105 | # compensate the changes already done in the functions above. |
106 | LGR %r0,%r2 # backup return dest [ + n ] |
107 | AGHI %r4,1 # length + 1 |
108 | LGR %r5,%r4 # source length |
109 | LGR %r4,%r3 # source address |
110 | LGR %r2,%r1 # destination address |
111 | LGR %r3,%r5 # destination length = source length |
112 | .L_MVCLE_1: |
113 | mvcle %r2,%r4,0 # thats it, MVCLE is your friend |
114 | jo .L_MVCLE_1 |
115 | LGR %r2,%r0 # return destination address |
116 | br %r14 |
117 | END(__memcpy_mvcle) |
118 | |
119 | #undef LTGR |
120 | #undef CGHI |
121 | #undef LGR |
122 | #undef AGHI |
123 | #undef BRCTG |
124 | |
125 | #if HAVE_MEMCPY_Z10 |
126 | ENTRY(MEMPCPY_Z10) |
127 | .machine "z10" |
128 | .machinemode "zarch_nohighgprs" |
129 | lgr %r1,%r2 # Use as dest |
130 | la %r2,0(%r4,%r2) # Return dest + n |
131 | j .L_Z10_start |
132 | END(MEMPCPY_Z10) |
133 | |
134 | ENTRY(MEMCPY_Z10) |
135 | .machine "z10" |
136 | .machinemode "zarch_nohighgprs" |
137 | lgr %r1,%r2 # r1: Use as dest ; r2: Return dest |
138 | .L_Z10_start: |
139 | # if !defined __s390x__ |
140 | llgfr %r4,%r4 |
141 | # endif /* !defined __s390x__ */ |
142 | cgije %r4,0,.L_Z10_4 |
143 | aghi %r4,-1 |
144 | srlg %r5,%r4,8 |
145 | cgijlh %r5,0,.L_Z10_13 |
146 | .L_Z10_3: |
147 | exrl %r4,.L_Z10_15 |
148 | .L_Z10_4: |
149 | br %r14 |
150 | .L_Z10_13: |
151 | cgfi %r5,65535 # Switch to mvcle for copies >16MB |
152 | jh __memcpy_mvcle |
153 | .L_Z10_12: |
154 | pfd 1,768(%r3) |
155 | pfd 2,768(%r1) |
156 | mvc 0(256,%r1),0(%r3) |
157 | la %r1,256(%r1) |
158 | la %r3,256(%r3) |
159 | brctg %r5,.L_Z10_12 |
160 | j .L_Z10_3 |
161 | .L_Z10_15: |
162 | mvc 0(1,%r1),0(%r3) |
163 | END(MEMCPY_Z10) |
164 | #endif /* HAVE_MEMCPY_Z10 */ |
165 | |
166 | #if HAVE_MEMCPY_Z196 |
167 | ENTRY(MEMPCPY_Z196) |
168 | .machine "z196" |
169 | .machinemode "zarch_nohighgprs" |
170 | lgr %r1,%r2 # Use as dest |
171 | la %r2,0(%r4,%r2) # Return dest + n |
172 | j .L_Z196_start |
173 | END(MEMPCPY_Z196) |
174 | |
175 | ENTRY(MEMCPY_Z196) |
176 | .machine "z196" |
177 | .machinemode "zarch_nohighgprs" |
178 | lgr %r1,%r2 # r1: Use as dest ; r2: Return dest |
179 | .L_Z196_start: |
180 | # if !defined __s390x__ |
181 | llgfr %r4,%r4 |
182 | # endif /* !defined __s390x__ */ |
183 | ltgr %r4,%r4 |
184 | je .L_Z196_4 |
185 | .L_Z196_start2: |
186 | aghi %r4,-1 |
187 | risbg %r5,%r4,8,128+63,56 # r0 = r5 / 256 |
188 | jne .L_Z196_5 |
189 | .L_Z196_3: |
190 | exrl %r4,.L_Z196_14 |
191 | .L_Z196_4: |
192 | br %r14 |
193 | .L_Z196_5: |
194 | cgfi %r5,255 # Switch to loop with pfd for copies >=64kB |
195 | jh .L_Z196_6 |
196 | .L_Z196_2: |
197 | mvc 0(256,%r1),0(%r3) |
198 | aghi %r5,-1 |
199 | la %r1,256(%r1) |
200 | la %r3,256(%r3) |
201 | jne .L_Z196_2 |
202 | j .L_Z196_3 |
203 | .L_Z196_6: |
204 | cgfi %r5,262144 # Switch to mvcle for copies >64MB |
205 | jh __memcpy_mvcle |
206 | .L_Z196_7: |
207 | pfd 1,1024(%r3) |
208 | pfd 2,1024(%r1) |
209 | mvc 0(256,%r1),0(%r3) |
210 | aghi %r5,-1 |
211 | la %r1,256(%r1) |
212 | la %r3,256(%r3) |
213 | jne .L_Z196_7 |
214 | j .L_Z196_3 |
215 | .L_Z196_14: |
216 | mvc 0(1,%r1),0(%r3) |
217 | END(MEMCPY_Z196) |
218 | #endif /* HAVE_MEMCPY_Z196 */ |
219 | |
220 | #if HAVE_MEMMOVE_Z13 |
221 | ENTRY(MEMMOVE_Z13) |
222 | .machine "z13" |
223 | .machinemode "zarch_nohighgprs" |
224 | # if !defined __s390x__ |
225 | /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ |
226 | llgfr %r4,%r4 |
227 | llgfr %r3,%r3 |
228 | llgfr %r2,%r2 |
229 | # endif /* !defined __s390x__ */ |
230 | sgrk %r0,%r2,%r3 |
231 | clgijh %r4,16,.L_MEMMOVE_Z13_LARGE |
232 | aghik %r5,%r4,-1 |
233 | .L_MEMMOVE_Z13_SMALL: |
234 | jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */ |
235 | /* Store up to 16 bytes with vll/vstl which needs the index |
236 | instead of lengths. */ |
237 | vll %v16,%r5,0(%r3) |
238 | vstl %v16,%r5,0(%r2) |
239 | .L_MEMMOVE_Z13_END: |
240 | br %r14 |
241 | .L_MEMMOVE_Z13_LARGE: |
242 | lgr %r1,%r2 /* For memcpy: r1: Use as dest ; |
243 | r2: Return dest */ |
244 | /* The unsigned comparison (dst - src >= len) determines if we can |
245 | execute the forward case with memcpy. */ |
246 | #if ! HAVE_MEMCPY_Z196 |
247 | # error The z13 variant of memmove needs the z196 variant of memcpy! |
248 | #endif |
249 | clgrjhe %r0,%r4,.L_Z196_start2 |
250 | risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */ |
251 | aghi %r4,-16 |
252 | clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B |
253 | .L_MEMMOVE_Z13_LARGE_16B_LOOP: |
254 | /* Store at least 16 bytes with vl/vst. The number of 16byte blocks |
255 | is stored in r5. */ |
256 | vl %v16,0(%r4,%r3) |
257 | vst %v16,0(%r4,%r2) |
258 | aghi %r4,-16 |
259 | brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP |
260 | aghik %r5,%r4,15 |
261 | j .L_MEMMOVE_Z13_SMALL |
262 | .L_MEMMOVE_Z13_LARGE_64B: |
263 | /* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks |
264 | will be stored in r0. */ |
265 | aghi %r4,-48 |
266 | srlg %r0,%r5,2 /* r5 = %r0 / 4 |
267 | => Number of 64byte blocks. */ |
268 | .L_MEMMOVE_Z13_LARGE_64B_LOOP: |
269 | vl %v20,48(%r4,%r3) |
270 | vl %v19,32(%r4,%r3) |
271 | vl %v18,16(%r4,%r3) |
272 | vl %v17,0(%r4,%r3) |
273 | vst %v20,48(%r4,%r2) |
274 | vst %v19,32(%r4,%r2) |
275 | vst %v18,16(%r4,%r2) |
276 | vst %v17,0(%r4,%r2) |
277 | aghi %r4,-64 |
278 | brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP |
279 | aghi %r4,48 |
280 | /* Recalculate the number of 16byte blocks. */ |
281 | risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3 |
282 | => Remaining 16byte blocks. */ |
283 | jne .L_MEMMOVE_Z13_LARGE_16B_LOOP |
284 | aghik %r5,%r4,15 |
285 | j .L_MEMMOVE_Z13_SMALL |
286 | END(MEMMOVE_Z13) |
287 | #endif /* HAVE_MEMMOVE_Z13 */ |
288 | |
289 | #if HAVE_MEMMOVE_ARCH13 |
290 | ENTRY(MEMMOVE_ARCH13) |
291 | .machine "arch13" |
292 | .machinemode "zarch_nohighgprs" |
293 | # if ! defined __s390x__ |
294 | /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ |
295 | llgfr %r4,%r4 |
296 | llgfr %r3,%r3 |
297 | llgfr %r2,%r2 |
298 | # endif /* ! defined __s390x__ */ |
299 | sgrk %r5,%r2,%r3 |
300 | aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */ |
301 | clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE |
302 | .L_MEMMOVE_ARCH13_SMALL: |
303 | jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */ |
304 | /* Store up to 16 bytes with vll/vstl (needs highest index). */ |
305 | vll %v16,%r0,0(%r3) |
306 | vstl %v16,%r0,0(%r2) |
307 | .L_MEMMOVE_ARCH13_END: |
308 | br %r14 |
309 | .L_MEMMOVE_ARCH13_LARGE: |
310 | lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */ |
311 | /* The unsigned comparison (dst - src >= len) determines if we can |
312 | execute the forward case with memcpy. */ |
313 | #if ! HAVE_MEMCPY_Z196 |
314 | # error The arch13 variant of memmove needs the z196 variant of memcpy! |
315 | #endif |
316 | /* Backward case. */ |
317 | clgrjhe %r5,%r4,.L_Z196_start2 |
318 | clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B |
319 | /* Move up to 256bytes with mvcrl (move right to left). */ |
320 | mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ |
321 | br %r14 |
322 | .L_MEMMOVE_ARCH13_LARGER_256B: |
323 | /* First move the "remaining" block of up to 256 bytes at the end of |
324 | src/dst buffers. Then move blocks of 256bytes in a loop starting |
325 | with the block at the end. |
326 | (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers |
327 | passed to mvcrl instructions are aligned, too) */ |
328 | risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */ |
329 | risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */ |
330 | slgr %r4,%r0 |
331 | lay %r1,-1(%r4,%r1) |
332 | lay %r3,-1(%r4,%r3) |
333 | mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ |
334 | lghi %r0,255 /* Always copy 256 bytes in the loop below! */ |
335 | .L_MEMMOVE_ARCH13_LARGE_256B_LOOP: |
336 | aghi %r1,-256 |
337 | aghi %r3,-256 |
338 | mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ |
339 | brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP |
340 | br %r14 |
341 | END(MEMMOVE_ARCH13) |
342 | #endif /* HAVE_MEMMOVE_ARCH13 */ |
343 | |
344 | #if ! HAVE_MEMCPY_IFUNC |
345 | /* If we don't use ifunc, define an alias for mem[p]cpy here. |
346 | Otherwise see sysdeps/s390/mem[p]cpy.c. */ |
347 | strong_alias (MEMCPY_DEFAULT, memcpy) |
348 | strong_alias (MEMPCPY_DEFAULT, __mempcpy) |
349 | weak_alias (__mempcpy, mempcpy) |
350 | #endif |
351 | |
352 | #if ! HAVE_MEMMOVE_IFUNC |
353 | /* If we don't use ifunc, define an alias for memmove here. |
354 | Otherwise see sysdeps/s390/memmove.c. */ |
355 | # if ! HAVE_MEMMOVE_C |
356 | /* If the c variant is needed, then sysdeps/s390/memmove-c.c |
357 | defines memmove. |
358 | Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ |
359 | strong_alias (MEMMOVE_DEFAULT, memmove) |
360 | # endif |
361 | #endif |
362 | |
363 | #if defined SHARED && IS_IN (libc) |
364 | /* Defines the internal symbols. |
365 | Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */ |
366 | strong_alias (MEMCPY_DEFAULT, __GI_memcpy) |
367 | strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy) |
368 | strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy) |
369 | # if ! HAVE_MEMMOVE_C |
370 | /* If the c variant is needed, then sysdeps/s390/memmove-c.c |
371 | defines the internal symbol. |
372 | Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ |
373 | strong_alias (MEMMOVE_DEFAULT, __GI_memmove) |
374 | # endif |
375 | #endif |
376 | |