1 | /* _memcopy.c -- subroutines for memory copy functions. |
2 | Copyright (C) 1991-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ |
20 | |
21 | #include <stddef.h> |
22 | #include <memcopy.h> |
23 | |
24 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to |
25 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |
26 | Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ |
27 | |
28 | #ifndef WORDCOPY_FWD_ALIGNED |
29 | # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned |
30 | #endif |
31 | |
32 | void |
33 | WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) |
34 | { |
35 | op_t a0, a1; |
36 | |
37 | switch (len % 8) |
38 | { |
39 | case 2: |
40 | a0 = ((op_t *) srcp)[0]; |
41 | srcp -= 6 * OPSIZ; |
42 | dstp -= 7 * OPSIZ; |
43 | len += 6; |
44 | goto do1; |
45 | case 3: |
46 | a1 = ((op_t *) srcp)[0]; |
47 | srcp -= 5 * OPSIZ; |
48 | dstp -= 6 * OPSIZ; |
49 | len += 5; |
50 | goto do2; |
51 | case 4: |
52 | a0 = ((op_t *) srcp)[0]; |
53 | srcp -= 4 * OPSIZ; |
54 | dstp -= 5 * OPSIZ; |
55 | len += 4; |
56 | goto do3; |
57 | case 5: |
58 | a1 = ((op_t *) srcp)[0]; |
59 | srcp -= 3 * OPSIZ; |
60 | dstp -= 4 * OPSIZ; |
61 | len += 3; |
62 | goto do4; |
63 | case 6: |
64 | a0 = ((op_t *) srcp)[0]; |
65 | srcp -= 2 * OPSIZ; |
66 | dstp -= 3 * OPSIZ; |
67 | len += 2; |
68 | goto do5; |
69 | case 7: |
70 | a1 = ((op_t *) srcp)[0]; |
71 | srcp -= 1 * OPSIZ; |
72 | dstp -= 2 * OPSIZ; |
73 | len += 1; |
74 | goto do6; |
75 | |
76 | case 0: |
77 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
78 | return; |
79 | a0 = ((op_t *) srcp)[0]; |
80 | srcp -= 0 * OPSIZ; |
81 | dstp -= 1 * OPSIZ; |
82 | goto do7; |
83 | case 1: |
84 | a1 = ((op_t *) srcp)[0]; |
85 | srcp -=-1 * OPSIZ; |
86 | dstp -= 0 * OPSIZ; |
87 | len -= 1; |
88 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
89 | goto do0; |
90 | goto do8; /* No-op. */ |
91 | } |
92 | |
93 | do |
94 | { |
95 | do8: |
96 | a0 = ((op_t *) srcp)[0]; |
97 | ((op_t *) dstp)[0] = a1; |
98 | do7: |
99 | a1 = ((op_t *) srcp)[1]; |
100 | ((op_t *) dstp)[1] = a0; |
101 | do6: |
102 | a0 = ((op_t *) srcp)[2]; |
103 | ((op_t *) dstp)[2] = a1; |
104 | do5: |
105 | a1 = ((op_t *) srcp)[3]; |
106 | ((op_t *) dstp)[3] = a0; |
107 | do4: |
108 | a0 = ((op_t *) srcp)[4]; |
109 | ((op_t *) dstp)[4] = a1; |
110 | do3: |
111 | a1 = ((op_t *) srcp)[5]; |
112 | ((op_t *) dstp)[5] = a0; |
113 | do2: |
114 | a0 = ((op_t *) srcp)[6]; |
115 | ((op_t *) dstp)[6] = a1; |
116 | do1: |
117 | a1 = ((op_t *) srcp)[7]; |
118 | ((op_t *) dstp)[7] = a0; |
119 | |
120 | srcp += 8 * OPSIZ; |
121 | dstp += 8 * OPSIZ; |
122 | len -= 8; |
123 | } |
124 | while (len != 0); |
125 | |
126 | /* This is the right position for do0. Please don't move |
127 | it into the loop. */ |
128 | do0: |
129 | ((op_t *) dstp)[0] = a1; |
130 | } |
131 | |
132 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to |
133 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |
134 | DSTP should be aligned for memory operations on `op_t's, but SRCP must |
135 | *not* be aligned. */ |
136 | |
137 | #ifndef WORDCOPY_FWD_DEST_ALIGNED |
138 | # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned |
139 | #endif |
140 | |
141 | void |
142 | WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
143 | { |
144 | op_t a0, a1, a2, a3; |
145 | int sh_1, sh_2; |
146 | |
147 | /* Calculate how to shift a word read at the memory operation |
148 | aligned srcp to make it aligned for copy. */ |
149 | |
150 | sh_1 = 8 * (srcp % OPSIZ); |
151 | sh_2 = 8 * OPSIZ - sh_1; |
152 | |
153 | /* Make SRCP aligned by rounding it down to the beginning of the `op_t' |
154 | it points in the middle of. */ |
155 | srcp &= -OPSIZ; |
156 | |
157 | switch (len % 4) |
158 | { |
159 | case 2: |
160 | a1 = ((op_t *) srcp)[0]; |
161 | a2 = ((op_t *) srcp)[1]; |
162 | srcp -= 1 * OPSIZ; |
163 | dstp -= 3 * OPSIZ; |
164 | len += 2; |
165 | goto do1; |
166 | case 3: |
167 | a0 = ((op_t *) srcp)[0]; |
168 | a1 = ((op_t *) srcp)[1]; |
169 | srcp -= 0 * OPSIZ; |
170 | dstp -= 2 * OPSIZ; |
171 | len += 1; |
172 | goto do2; |
173 | case 0: |
174 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
175 | return; |
176 | a3 = ((op_t *) srcp)[0]; |
177 | a0 = ((op_t *) srcp)[1]; |
178 | srcp -=-1 * OPSIZ; |
179 | dstp -= 1 * OPSIZ; |
180 | len += 0; |
181 | goto do3; |
182 | case 1: |
183 | a2 = ((op_t *) srcp)[0]; |
184 | a3 = ((op_t *) srcp)[1]; |
185 | srcp -=-2 * OPSIZ; |
186 | dstp -= 0 * OPSIZ; |
187 | len -= 1; |
188 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
189 | goto do0; |
190 | goto do4; /* No-op. */ |
191 | } |
192 | |
193 | do |
194 | { |
195 | do4: |
196 | a0 = ((op_t *) srcp)[0]; |
197 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |
198 | do3: |
199 | a1 = ((op_t *) srcp)[1]; |
200 | ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); |
201 | do2: |
202 | a2 = ((op_t *) srcp)[2]; |
203 | ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); |
204 | do1: |
205 | a3 = ((op_t *) srcp)[3]; |
206 | ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); |
207 | |
208 | srcp += 4 * OPSIZ; |
209 | dstp += 4 * OPSIZ; |
210 | len -= 4; |
211 | } |
212 | while (len != 0); |
213 | |
214 | /* This is the right position for do0. Please don't move |
215 | it into the loop. */ |
216 | do0: |
217 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |
218 | } |
219 | |
220 | /* _wordcopy_bwd_aligned -- Copy block finishing right before |
221 | SRCP to block finishing right before DSTP with LEN `op_t' words |
222 | (not LEN bytes!). Both SRCP and DSTP should be aligned for memory |
223 | operations on `op_t's. */ |
224 | |
225 | #ifndef WORDCOPY_BWD_ALIGNED |
226 | # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned |
227 | #endif |
228 | |
229 | void |
230 | WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) |
231 | { |
232 | op_t a0, a1; |
233 | |
234 | switch (len % 8) |
235 | { |
236 | case 2: |
237 | srcp -= 2 * OPSIZ; |
238 | dstp -= 1 * OPSIZ; |
239 | a0 = ((op_t *) srcp)[1]; |
240 | len += 6; |
241 | goto do1; |
242 | case 3: |
243 | srcp -= 3 * OPSIZ; |
244 | dstp -= 2 * OPSIZ; |
245 | a1 = ((op_t *) srcp)[2]; |
246 | len += 5; |
247 | goto do2; |
248 | case 4: |
249 | srcp -= 4 * OPSIZ; |
250 | dstp -= 3 * OPSIZ; |
251 | a0 = ((op_t *) srcp)[3]; |
252 | len += 4; |
253 | goto do3; |
254 | case 5: |
255 | srcp -= 5 * OPSIZ; |
256 | dstp -= 4 * OPSIZ; |
257 | a1 = ((op_t *) srcp)[4]; |
258 | len += 3; |
259 | goto do4; |
260 | case 6: |
261 | srcp -= 6 * OPSIZ; |
262 | dstp -= 5 * OPSIZ; |
263 | a0 = ((op_t *) srcp)[5]; |
264 | len += 2; |
265 | goto do5; |
266 | case 7: |
267 | srcp -= 7 * OPSIZ; |
268 | dstp -= 6 * OPSIZ; |
269 | a1 = ((op_t *) srcp)[6]; |
270 | len += 1; |
271 | goto do6; |
272 | |
273 | case 0: |
274 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
275 | return; |
276 | srcp -= 8 * OPSIZ; |
277 | dstp -= 7 * OPSIZ; |
278 | a0 = ((op_t *) srcp)[7]; |
279 | goto do7; |
280 | case 1: |
281 | srcp -= 9 * OPSIZ; |
282 | dstp -= 8 * OPSIZ; |
283 | a1 = ((op_t *) srcp)[8]; |
284 | len -= 1; |
285 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
286 | goto do0; |
287 | goto do8; /* No-op. */ |
288 | } |
289 | |
290 | do |
291 | { |
292 | do8: |
293 | a0 = ((op_t *) srcp)[7]; |
294 | ((op_t *) dstp)[7] = a1; |
295 | do7: |
296 | a1 = ((op_t *) srcp)[6]; |
297 | ((op_t *) dstp)[6] = a0; |
298 | do6: |
299 | a0 = ((op_t *) srcp)[5]; |
300 | ((op_t *) dstp)[5] = a1; |
301 | do5: |
302 | a1 = ((op_t *) srcp)[4]; |
303 | ((op_t *) dstp)[4] = a0; |
304 | do4: |
305 | a0 = ((op_t *) srcp)[3]; |
306 | ((op_t *) dstp)[3] = a1; |
307 | do3: |
308 | a1 = ((op_t *) srcp)[2]; |
309 | ((op_t *) dstp)[2] = a0; |
310 | do2: |
311 | a0 = ((op_t *) srcp)[1]; |
312 | ((op_t *) dstp)[1] = a1; |
313 | do1: |
314 | a1 = ((op_t *) srcp)[0]; |
315 | ((op_t *) dstp)[0] = a0; |
316 | |
317 | srcp -= 8 * OPSIZ; |
318 | dstp -= 8 * OPSIZ; |
319 | len -= 8; |
320 | } |
321 | while (len != 0); |
322 | |
323 | /* This is the right position for do0. Please don't move |
324 | it into the loop. */ |
325 | do0: |
326 | ((op_t *) dstp)[7] = a1; |
327 | } |
328 | |
329 | /* _wordcopy_bwd_dest_aligned -- Copy block finishing right |
330 | before SRCP to block finishing right before DSTP with LEN `op_t' |
331 | words (not LEN bytes!). DSTP should be aligned for memory |
332 | operations on `op_t', but SRCP must *not* be aligned. */ |
333 | |
334 | #ifndef WORDCOPY_BWD_DEST_ALIGNED |
335 | # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned |
336 | #endif |
337 | |
338 | void |
339 | WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
340 | { |
341 | op_t a0, a1, a2, a3; |
342 | int sh_1, sh_2; |
343 | |
344 | /* Calculate how to shift a word read at the memory operation |
345 | aligned srcp to make it aligned for copy. */ |
346 | |
347 | sh_1 = 8 * (srcp % OPSIZ); |
348 | sh_2 = 8 * OPSIZ - sh_1; |
349 | |
350 | /* Make srcp aligned by rounding it down to the beginning of the op_t |
351 | it points in the middle of. */ |
352 | srcp &= -OPSIZ; |
353 | srcp += OPSIZ; |
354 | |
355 | switch (len % 4) |
356 | { |
357 | case 2: |
358 | srcp -= 3 * OPSIZ; |
359 | dstp -= 1 * OPSIZ; |
360 | a2 = ((op_t *) srcp)[2]; |
361 | a1 = ((op_t *) srcp)[1]; |
362 | len += 2; |
363 | goto do1; |
364 | case 3: |
365 | srcp -= 4 * OPSIZ; |
366 | dstp -= 2 * OPSIZ; |
367 | a3 = ((op_t *) srcp)[3]; |
368 | a2 = ((op_t *) srcp)[2]; |
369 | len += 1; |
370 | goto do2; |
371 | case 0: |
372 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
373 | return; |
374 | srcp -= 5 * OPSIZ; |
375 | dstp -= 3 * OPSIZ; |
376 | a0 = ((op_t *) srcp)[4]; |
377 | a3 = ((op_t *) srcp)[3]; |
378 | goto do3; |
379 | case 1: |
380 | srcp -= 6 * OPSIZ; |
381 | dstp -= 4 * OPSIZ; |
382 | a1 = ((op_t *) srcp)[5]; |
383 | a0 = ((op_t *) srcp)[4]; |
384 | len -= 1; |
385 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
386 | goto do0; |
387 | goto do4; /* No-op. */ |
388 | } |
389 | |
390 | do |
391 | { |
392 | do4: |
393 | a3 = ((op_t *) srcp)[3]; |
394 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |
395 | do3: |
396 | a2 = ((op_t *) srcp)[2]; |
397 | ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); |
398 | do2: |
399 | a1 = ((op_t *) srcp)[1]; |
400 | ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); |
401 | do1: |
402 | a0 = ((op_t *) srcp)[0]; |
403 | ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); |
404 | |
405 | srcp -= 4 * OPSIZ; |
406 | dstp -= 4 * OPSIZ; |
407 | len -= 4; |
408 | } |
409 | while (len != 0); |
410 | |
411 | /* This is the right position for do0. Please don't move |
412 | it into the loop. */ |
413 | do0: |
414 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |
415 | } |
416 | |