1 | /* _memcopy.c -- subroutines for memory copy functions. |
2 | Copyright (C) 1991-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ |
20 | |
21 | #include <stddef.h> |
22 | #include <libc-diag.h> |
23 | /* Compiling with -O1 might warn that 'a2' and 'a3' may be used |
24 | uninitialized. There are only two ways to arrive at labels 'do4', 'do3' |
25 | or 'do1', all of which use 'a2' or 'a3' in the MERGE macro: either from |
26 | the earlier switch case statement or via a loop iteration. In all cases |
27 | the switch statement or previous loop sets both 'a2' and 'a3'. |
28 | |
29 | Since the usage is within the MERGE macro we disable the |
30 | warning in the definition, but only in this file. */ |
31 | DIAG_PUSH_NEEDS_COMMENT; |
32 | DIAG_IGNORE_NEEDS_COMMENT (6, "-Wmaybe-uninitialized" ); |
33 | #include <memcopy.h> |
34 | DIAG_POP_NEEDS_COMMENT; |
35 | |
36 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to |
37 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |
38 | Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ |
39 | |
40 | #ifndef WORDCOPY_FWD_ALIGNED |
41 | # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned |
42 | #endif |
43 | |
44 | void |
45 | WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) |
46 | { |
47 | op_t a0, a1; |
48 | |
49 | switch (len % 8) |
50 | { |
51 | case 2: |
52 | a0 = ((op_t *) srcp)[0]; |
53 | srcp -= 6 * OPSIZ; |
54 | dstp -= 7 * OPSIZ; |
55 | len += 6; |
56 | goto do1; |
57 | case 3: |
58 | a1 = ((op_t *) srcp)[0]; |
59 | srcp -= 5 * OPSIZ; |
60 | dstp -= 6 * OPSIZ; |
61 | len += 5; |
62 | goto do2; |
63 | case 4: |
64 | a0 = ((op_t *) srcp)[0]; |
65 | srcp -= 4 * OPSIZ; |
66 | dstp -= 5 * OPSIZ; |
67 | len += 4; |
68 | goto do3; |
69 | case 5: |
70 | a1 = ((op_t *) srcp)[0]; |
71 | srcp -= 3 * OPSIZ; |
72 | dstp -= 4 * OPSIZ; |
73 | len += 3; |
74 | goto do4; |
75 | case 6: |
76 | a0 = ((op_t *) srcp)[0]; |
77 | srcp -= 2 * OPSIZ; |
78 | dstp -= 3 * OPSIZ; |
79 | len += 2; |
80 | goto do5; |
81 | case 7: |
82 | a1 = ((op_t *) srcp)[0]; |
83 | srcp -= 1 * OPSIZ; |
84 | dstp -= 2 * OPSIZ; |
85 | len += 1; |
86 | goto do6; |
87 | |
88 | case 0: |
89 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
90 | return; |
91 | a0 = ((op_t *) srcp)[0]; |
92 | srcp -= 0 * OPSIZ; |
93 | dstp -= 1 * OPSIZ; |
94 | goto do7; |
95 | case 1: |
96 | a1 = ((op_t *) srcp)[0]; |
97 | srcp -=-1 * OPSIZ; |
98 | dstp -= 0 * OPSIZ; |
99 | len -= 1; |
100 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
101 | goto do0; |
102 | goto do8; /* No-op. */ |
103 | } |
104 | |
105 | do |
106 | { |
107 | do8: |
108 | a0 = ((op_t *) srcp)[0]; |
109 | /* Compiling with -O1 may warn that 'a1' may be used uninitialized. |
110 | There are only two ways to arrive at label 'do8' and they are via a |
111 | do-while loop iteration or directly via the earlier switch 'case 1:' |
112 | case. The switch case always sets 'a1' and all previous loop |
113 | iterations will also have set 'a1' before the use. */ |
114 | DIAG_PUSH_NEEDS_COMMENT; |
115 | DIAG_IGNORE_NEEDS_COMMENT (6, "-Wmaybe-uninitialized" ); |
116 | ((op_t *) dstp)[0] = a1; |
117 | DIAG_POP_NEEDS_COMMENT; |
118 | do7: |
119 | a1 = ((op_t *) srcp)[1]; |
120 | ((op_t *) dstp)[1] = a0; |
121 | do6: |
122 | a0 = ((op_t *) srcp)[2]; |
123 | ((op_t *) dstp)[2] = a1; |
124 | do5: |
125 | a1 = ((op_t *) srcp)[3]; |
126 | ((op_t *) dstp)[3] = a0; |
127 | do4: |
128 | a0 = ((op_t *) srcp)[4]; |
129 | ((op_t *) dstp)[4] = a1; |
130 | do3: |
131 | a1 = ((op_t *) srcp)[5]; |
132 | ((op_t *) dstp)[5] = a0; |
133 | do2: |
134 | a0 = ((op_t *) srcp)[6]; |
135 | ((op_t *) dstp)[6] = a1; |
136 | do1: |
137 | a1 = ((op_t *) srcp)[7]; |
138 | ((op_t *) dstp)[7] = a0; |
139 | |
140 | srcp += 8 * OPSIZ; |
141 | dstp += 8 * OPSIZ; |
142 | len -= 8; |
143 | } |
144 | while (len != 0); |
145 | |
146 | /* This is the right position for do0. Please don't move |
147 | it into the loop. */ |
148 | do0: |
149 | ((op_t *) dstp)[0] = a1; |
150 | } |
151 | |
152 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to |
153 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |
154 | DSTP should be aligned for memory operations on `op_t's, but SRCP must |
155 | *not* be aligned. */ |
156 | |
157 | #ifndef WORDCOPY_FWD_DEST_ALIGNED |
158 | # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned |
159 | #endif |
160 | |
161 | void |
162 | WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
163 | { |
164 | op_t a0, a1, a2, a3; |
165 | int sh_1, sh_2; |
166 | |
167 | /* Calculate how to shift a word read at the memory operation |
168 | aligned srcp to make it aligned for copy. */ |
169 | |
170 | sh_1 = 8 * (srcp % OPSIZ); |
171 | sh_2 = 8 * OPSIZ - sh_1; |
172 | |
173 | /* Make SRCP aligned by rounding it down to the beginning of the `op_t' |
174 | it points in the middle of. */ |
175 | srcp &= -OPSIZ; |
176 | |
177 | switch (len % 4) |
178 | { |
179 | case 2: |
180 | a1 = ((op_t *) srcp)[0]; |
181 | a2 = ((op_t *) srcp)[1]; |
182 | srcp -= 1 * OPSIZ; |
183 | dstp -= 3 * OPSIZ; |
184 | len += 2; |
185 | goto do1; |
186 | case 3: |
187 | a0 = ((op_t *) srcp)[0]; |
188 | a1 = ((op_t *) srcp)[1]; |
189 | srcp -= 0 * OPSIZ; |
190 | dstp -= 2 * OPSIZ; |
191 | len += 1; |
192 | goto do2; |
193 | case 0: |
194 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
195 | return; |
196 | a3 = ((op_t *) srcp)[0]; |
197 | a0 = ((op_t *) srcp)[1]; |
198 | srcp -=-1 * OPSIZ; |
199 | dstp -= 1 * OPSIZ; |
200 | len += 0; |
201 | goto do3; |
202 | case 1: |
203 | a2 = ((op_t *) srcp)[0]; |
204 | a3 = ((op_t *) srcp)[1]; |
205 | srcp -=-2 * OPSIZ; |
206 | dstp -= 0 * OPSIZ; |
207 | len -= 1; |
208 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
209 | goto do0; |
210 | goto do4; /* No-op. */ |
211 | } |
212 | |
213 | do |
214 | { |
215 | do4: |
216 | a0 = ((op_t *) srcp)[0]; |
217 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |
218 | do3: |
219 | a1 = ((op_t *) srcp)[1]; |
220 | ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); |
221 | do2: |
222 | a2 = ((op_t *) srcp)[2]; |
223 | ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); |
224 | do1: |
225 | a3 = ((op_t *) srcp)[3]; |
226 | ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); |
227 | |
228 | srcp += 4 * OPSIZ; |
229 | dstp += 4 * OPSIZ; |
230 | len -= 4; |
231 | } |
232 | while (len != 0); |
233 | |
234 | /* This is the right position for do0. Please don't move |
235 | it into the loop. */ |
236 | do0: |
237 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |
238 | } |
239 | |
240 | /* _wordcopy_bwd_aligned -- Copy block finishing right before |
241 | SRCP to block finishing right before DSTP with LEN `op_t' words |
242 | (not LEN bytes!). Both SRCP and DSTP should be aligned for memory |
243 | operations on `op_t's. */ |
244 | |
245 | #ifndef WORDCOPY_BWD_ALIGNED |
246 | # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned |
247 | #endif |
248 | |
249 | void |
250 | WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) |
251 | { |
252 | op_t a0, a1; |
253 | |
254 | switch (len % 8) |
255 | { |
256 | case 2: |
257 | srcp -= 2 * OPSIZ; |
258 | dstp -= 1 * OPSIZ; |
259 | a0 = ((op_t *) srcp)[1]; |
260 | len += 6; |
261 | goto do1; |
262 | case 3: |
263 | srcp -= 3 * OPSIZ; |
264 | dstp -= 2 * OPSIZ; |
265 | a1 = ((op_t *) srcp)[2]; |
266 | len += 5; |
267 | goto do2; |
268 | case 4: |
269 | srcp -= 4 * OPSIZ; |
270 | dstp -= 3 * OPSIZ; |
271 | a0 = ((op_t *) srcp)[3]; |
272 | len += 4; |
273 | goto do3; |
274 | case 5: |
275 | srcp -= 5 * OPSIZ; |
276 | dstp -= 4 * OPSIZ; |
277 | a1 = ((op_t *) srcp)[4]; |
278 | len += 3; |
279 | goto do4; |
280 | case 6: |
281 | srcp -= 6 * OPSIZ; |
282 | dstp -= 5 * OPSIZ; |
283 | a0 = ((op_t *) srcp)[5]; |
284 | len += 2; |
285 | goto do5; |
286 | case 7: |
287 | srcp -= 7 * OPSIZ; |
288 | dstp -= 6 * OPSIZ; |
289 | a1 = ((op_t *) srcp)[6]; |
290 | len += 1; |
291 | goto do6; |
292 | |
293 | case 0: |
294 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
295 | return; |
296 | srcp -= 8 * OPSIZ; |
297 | dstp -= 7 * OPSIZ; |
298 | a0 = ((op_t *) srcp)[7]; |
299 | goto do7; |
300 | case 1: |
301 | srcp -= 9 * OPSIZ; |
302 | dstp -= 8 * OPSIZ; |
303 | a1 = ((op_t *) srcp)[8]; |
304 | len -= 1; |
305 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
306 | goto do0; |
307 | goto do8; /* No-op. */ |
308 | } |
309 | |
310 | do |
311 | { |
312 | do8: |
313 | a0 = ((op_t *) srcp)[7]; |
314 | /* Check the comment on WORDCOPY_FWD_ALIGNED. */ |
315 | DIAG_PUSH_NEEDS_COMMENT; |
316 | DIAG_IGNORE_NEEDS_COMMENT (6, "-Wmaybe-uninitialized" ); |
317 | ((op_t *) dstp)[7] = a1; |
318 | DIAG_POP_NEEDS_COMMENT; |
319 | do7: |
320 | a1 = ((op_t *) srcp)[6]; |
321 | ((op_t *) dstp)[6] = a0; |
322 | do6: |
323 | a0 = ((op_t *) srcp)[5]; |
324 | ((op_t *) dstp)[5] = a1; |
325 | do5: |
326 | a1 = ((op_t *) srcp)[4]; |
327 | ((op_t *) dstp)[4] = a0; |
328 | do4: |
329 | a0 = ((op_t *) srcp)[3]; |
330 | ((op_t *) dstp)[3] = a1; |
331 | do3: |
332 | a1 = ((op_t *) srcp)[2]; |
333 | ((op_t *) dstp)[2] = a0; |
334 | do2: |
335 | a0 = ((op_t *) srcp)[1]; |
336 | ((op_t *) dstp)[1] = a1; |
337 | do1: |
338 | a1 = ((op_t *) srcp)[0]; |
339 | ((op_t *) dstp)[0] = a0; |
340 | |
341 | srcp -= 8 * OPSIZ; |
342 | dstp -= 8 * OPSIZ; |
343 | len -= 8; |
344 | } |
345 | while (len != 0); |
346 | |
347 | /* This is the right position for do0. Please don't move |
348 | it into the loop. */ |
349 | do0: |
350 | ((op_t *) dstp)[7] = a1; |
351 | } |
352 | |
353 | /* _wordcopy_bwd_dest_aligned -- Copy block finishing right |
354 | before SRCP to block finishing right before DSTP with LEN `op_t' |
355 | words (not LEN bytes!). DSTP should be aligned for memory |
356 | operations on `op_t', but SRCP must *not* be aligned. */ |
357 | |
358 | #ifndef WORDCOPY_BWD_DEST_ALIGNED |
359 | # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned |
360 | #endif |
361 | |
362 | void |
363 | WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
364 | { |
365 | op_t a0, a1, a2, a3; |
366 | int sh_1, sh_2; |
367 | |
368 | /* Calculate how to shift a word read at the memory operation |
369 | aligned srcp to make it aligned for copy. */ |
370 | |
371 | sh_1 = 8 * (srcp % OPSIZ); |
372 | sh_2 = 8 * OPSIZ - sh_1; |
373 | |
374 | /* Make srcp aligned by rounding it down to the beginning of the op_t |
375 | it points in the middle of. */ |
376 | srcp &= -OPSIZ; |
377 | srcp += OPSIZ; |
378 | |
379 | switch (len % 4) |
380 | { |
381 | case 2: |
382 | srcp -= 3 * OPSIZ; |
383 | dstp -= 1 * OPSIZ; |
384 | a2 = ((op_t *) srcp)[2]; |
385 | a1 = ((op_t *) srcp)[1]; |
386 | len += 2; |
387 | goto do1; |
388 | case 3: |
389 | srcp -= 4 * OPSIZ; |
390 | dstp -= 2 * OPSIZ; |
391 | a3 = ((op_t *) srcp)[3]; |
392 | a2 = ((op_t *) srcp)[2]; |
393 | len += 1; |
394 | goto do2; |
395 | case 0: |
396 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
397 | return; |
398 | srcp -= 5 * OPSIZ; |
399 | dstp -= 3 * OPSIZ; |
400 | a0 = ((op_t *) srcp)[4]; |
401 | a3 = ((op_t *) srcp)[3]; |
402 | goto do3; |
403 | case 1: |
404 | srcp -= 6 * OPSIZ; |
405 | dstp -= 4 * OPSIZ; |
406 | a1 = ((op_t *) srcp)[5]; |
407 | a0 = ((op_t *) srcp)[4]; |
408 | len -= 1; |
409 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
410 | goto do0; |
411 | goto do4; /* No-op. */ |
412 | } |
413 | |
414 | do |
415 | { |
416 | do4: |
417 | a3 = ((op_t *) srcp)[3]; |
418 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |
419 | do3: |
420 | a2 = ((op_t *) srcp)[2]; |
421 | ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); |
422 | do2: |
423 | a1 = ((op_t *) srcp)[1]; |
424 | ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); |
425 | do1: |
426 | a0 = ((op_t *) srcp)[0]; |
427 | ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); |
428 | |
429 | srcp -= 4 * OPSIZ; |
430 | dstp -= 4 * OPSIZ; |
431 | len -= 4; |
432 | } |
433 | while (len != 0); |
434 | |
435 | /* This is the right position for do0. Please don't move |
436 | it into the loop. */ |
437 | do0: |
438 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |
439 | } |
440 | |