1 | /* Copyright (C) 2014-2024 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <sysdep.h> |
19 | |
20 | /* Implements the functions |
21 | |
22 | char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5]) |
23 | |
24 | AND |
25 | |
26 | char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5]) |
27 | |
28 | The algorithm is as follows: |
29 | > if src and dest are 8 byte aligned, perform double word copy |
30 | else |
31 | > copy byte by byte on unaligned addresses. |
32 | |
33 | The aligned comparison are made using cmpb instructions. */ |
34 | |
35 | /* The focus on optimization for performance improvements are as follows: |
36 | 1. data alignment [gain from aligned memory access on read/write] |
37 | 2. POWER7 gains performance with loop unrolling/unwinding |
38 | [gain by reduction of branch penalty]. |
39 | 3. The final pad with null bytes is done by calling an optimized |
40 | memset. */ |
41 | |
42 | #ifdef USE_AS_STPNCPY |
43 | # ifndef STPNCPY |
44 | # define FUNC_NAME __stpncpy |
45 | # else |
46 | # define FUNC_NAME STPNCPY |
47 | # endif |
48 | #else |
49 | # ifndef STRNCPY |
50 | # define FUNC_NAME strncpy |
51 | # else |
52 | # define FUNC_NAME STRNCPY |
53 | # endif |
54 | #endif /* !USE_AS_STPNCPY */ |
55 | |
56 | #define FRAMESIZE (FRAME_MIN_SIZE+16) |
57 | |
58 | #ifndef MEMSET |
59 | /* For builds with no IFUNC support, local calls should be made to internal |
60 | GLIBC symbol (created by libc_hidden_builtin_def). */ |
61 | # ifdef SHARED |
62 | # define MEMSET_is_local |
63 | # define MEMSET __GI_memset |
64 | # else |
65 | # define MEMSET memset |
66 | # endif |
67 | #endif |
68 | |
69 | .machine power7 |
70 | #ifdef MEMSET_is_local |
71 | ENTRY_TOCLESS (FUNC_NAME, 4) |
72 | #else |
73 | ENTRY (FUNC_NAME, 4) |
74 | #endif |
75 | CALL_MCOUNT 3 |
76 | |
77 | or r10, r3, r4 /* to verify source and destination */ |
78 | rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */ |
79 | |
80 | std r19, -8(r1) /* save callers register , r19 */ |
81 | std r18, -16(r1) /* save callers register , r18 */ |
82 | cfi_offset(r19, -8) |
83 | cfi_offset(r18, -16) |
84 | |
85 | mr r9, r3 /* save r3 into r9 for use */ |
86 | mr r18, r3 /* save r3 for retCode of strncpy */ |
87 | bne 0, L(unaligned) |
88 | |
89 | L(aligned): |
90 | srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */ |
91 | cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */ |
92 | ble 7, L(update1) |
93 | |
94 | ld r10, 0(r4) /* load doubleWord from src */ |
95 | cmpb r8, r10, r8 /* compare src with NULL ,we read just now */ |
96 | cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ |
97 | bne cr7, L(update3) |
98 | |
99 | std r10, 0(r3) /* copy doubleword at offset=0 */ |
100 | ld r10, 8(r4) /* load next doubleword from offset=8 */ |
101 | cmpb r8, r10, r8 /* compare src with NULL , we read just now */ |
102 | cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ |
103 | bne 7,L(HopBy8) |
104 | |
105 | addi r8, r11, -4 |
106 | mr r7, r3 |
107 | srdi r8, r8, 2 |
108 | mr r6, r4 |
109 | addi r8, r8, 1 |
110 | li r12, 0 |
111 | mtctr r8 |
112 | b L(dwordCopy) |
113 | |
114 | .p2align 4 |
115 | L(dWordUnroll): |
116 | std r8, 16(r9) |
117 | ld r8, 24(r4) /* load dword,perform loop unrolling again */ |
118 | cmpb r10, r8, r10 |
119 | cmpdi cr7, r10, 0 |
120 | bne cr7, L(HopBy24) |
121 | |
122 | std r8, 24(r7) /* copy dword at offset=24 */ |
123 | addi r9, r9, 32 |
124 | addi r4, r4, 32 |
125 | bdz L(leftDwords) /* continue with loop on counter */ |
126 | |
127 | ld r3, 32(r6) |
128 | cmpb r8, r3, r10 |
129 | cmpdi cr7, r8, 0 |
130 | bne cr7, L(update2) |
131 | |
132 | std r3, 32(r7) |
133 | ld r10, 40(r6) |
134 | cmpb r8, r10, r8 |
135 | cmpdi cr7, r8, 0 |
136 | bne cr7, L(HopBy40) |
137 | |
138 | mr r6, r4 /* update values */ |
139 | mr r7, r9 |
140 | mr r11, r0 |
141 | mr r5, r19 |
142 | |
143 | L(dwordCopy): |
144 | std r10, 8(r9) /* copy dword at offset=8 */ |
145 | addi r19, r5, -32 |
146 | addi r0, r11, -4 |
147 | ld r8, 16(r4) |
148 | cmpb r10, r8, r12 |
149 | cmpdi cr7, r10, 0 |
150 | beq cr7, L(dWordUnroll) |
151 | |
152 | addi r9, r9, 16 /* increment dst by 16 */ |
153 | addi r4, r4, 16 /* increment src by 16 */ |
154 | addi r5, r5, -16 /* decrement length 'n' by 16 */ |
155 | addi r0, r11, -2 /* decrement loop counter */ |
156 | |
157 | L(dWordUnrollOFF): |
158 | ld r10, 0(r4) /* load first dword */ |
159 | li r8, 0 /* load mask */ |
160 | cmpb r8, r10, r8 |
161 | cmpdi cr7, r8, 0 |
162 | bne cr7, L(byte_by_byte) |
163 | mtctr r0 |
164 | li r7, 0 |
165 | b L(CopyDword) |
166 | |
167 | .p2align 4 |
168 | L(loadDWordandCompare): |
169 | ld r10, 0(r4) |
170 | cmpb r8, r10, r7 |
171 | cmpdi cr7, r8, 0 |
172 | bne cr7, L(byte_by_byte) |
173 | |
174 | L(CopyDword): |
175 | addi r9, r9, 8 |
176 | std r10, -8(r9) |
177 | addi r4, r4, 8 |
178 | addi r5, r5, -8 |
179 | bdnz L(loadDWordandCompare) |
180 | |
181 | L(byte_by_byte): |
182 | cmpldi cr7, r5, 3 |
183 | ble cr7, L(verifyByte) |
184 | srdi r10, r5, 2 |
185 | mr r19, r9 |
186 | mtctr r10 |
187 | b L(firstByteUnroll) |
188 | |
189 | .p2align 4 |
190 | L(bytes_unroll): |
191 | lbz r10, 1(r4) /* load byte from src */ |
192 | cmpdi cr7, r10, 0 /* compare for NULL */ |
193 | stb r10, 1(r19) /* store byte to dst */ |
194 | beq cr7, L(updtDestComputeN2ndByte) |
195 | |
196 | addi r4, r4, 4 /* advance src */ |
197 | |
198 | lbz r10, -2(r4) /* perform loop unrolling for byte r/w */ |
199 | cmpdi cr7, r10, 0 |
200 | stb r10, 2(r19) |
201 | beq cr7, L(updtDestComputeN3rdByte) |
202 | |
203 | lbz r10, -1(r4) /* perform loop unrolling for byte r/w */ |
204 | addi r19, r19, 4 |
205 | cmpdi cr7, r10, 0 |
206 | stb r10, -1(r19) |
207 | beq cr7, L(ComputeNByte) |
208 | |
209 | bdz L(update0) |
210 | |
211 | L(firstByteUnroll): |
212 | lbz r10, 0(r4) /* perform loop unrolling for byte r/w */ |
213 | cmpdi cr7, 10, 0 |
214 | stb r10, 0(r19) |
215 | bne cr7, L(bytes_unroll) |
216 | addi r19, r19, 1 |
217 | |
218 | L(ComputeNByte): |
219 | subf r9, r19, r9 /* compute 'n'n bytes to fill */ |
220 | add r8, r9, r5 |
221 | |
222 | L(zeroFill): |
223 | cmpdi cr7, r8, 0 /* compare if length is zero */ |
224 | beq cr7, L(update3return) |
225 | |
226 | mflr r0 /* load link register LR to r0 */ |
227 | std r0, 16(r1) /* store the link register */ |
228 | stdu r1, -FRAMESIZE(r1) /* create the stack frame */ |
229 | cfi_adjust_cfa_offset(FRAMESIZE) |
230 | cfi_offset(lr, 16) |
231 | mr r3, r19 /* fill buffer with */ |
232 | li r4, 0 /* zero fill buffer */ |
233 | mr r5, r8 /* how many bytes to fill buffer with */ |
234 | bl MEMSET /* call optimized memset */ |
235 | #ifndef MEMSET_is_local |
236 | nop |
237 | #endif |
238 | ld r0, FRAMESIZE+16(r1) /* read the saved link register */ |
239 | addi r1, r1, FRAMESIZE /* restore stack pointer */ |
240 | cfi_adjust_cfa_offset(-FRAMESIZE) |
241 | mtlr r0 |
242 | cfi_restore(lr) |
243 | |
244 | L(update3return): |
245 | #ifdef USE_AS_STPNCPY |
246 | addi r3, r19, -1 /* update return value */ |
247 | #endif |
248 | |
249 | L(hop2return): |
250 | #ifndef USE_AS_STPNCPY |
251 | mr r3, r18 /* set return value */ |
252 | #endif |
253 | ld r18, -16(r1) /* restore callers save register, r18 */ |
254 | ld r19, -8(r1) /* restore callers save register, r19 */ |
255 | blr /* return */ |
256 | |
257 | .p2align 4 |
258 | L(update0): |
259 | mr r9, r19 |
260 | |
261 | .p2align 4 |
262 | L(verifyByte): |
263 | rldicl. r8, r5, 0, 62 |
264 | #ifdef USE_AS_STPNCPY |
265 | mr r3, r9 |
266 | #endif |
267 | beq cr0, L(hop2return) |
268 | mtctr r8 |
269 | addi r4, r4, -1 |
270 | mr r19, r9 |
271 | b L(oneBYone) |
272 | |
273 | .p2align 4 |
274 | L(proceed): |
275 | bdz L(done) |
276 | |
277 | L(oneBYone): |
278 | lbzu r10, 1(r4) /* copy byte */ |
279 | addi r19, r19, 1 |
280 | addi r8, r8, -1 |
281 | cmpdi cr7, r10, 0 |
282 | stb r10, -1(r19) |
283 | bne cr7, L(proceed) |
284 | b L(zeroFill) |
285 | |
286 | .p2align 4 |
287 | L(done): |
288 | #ifdef USE_AS_STPNCPY |
289 | mr r3, r19 /* set the return value */ |
290 | #else |
291 | mr r3, r18 /* set the return value */ |
292 | #endif |
293 | ld r18, -16(r1) /* restore callers save register, r18 */ |
294 | ld r19, -8(r1) /* restore callers save register, r19 */ |
295 | blr /* return */ |
296 | |
297 | L(update1): |
298 | mr r0, r11 |
299 | mr r19, r5 |
300 | |
301 | .p2align 4 |
302 | L(leftDwords): |
303 | cmpdi cr7, r0, 0 |
304 | mr r5, r19 |
305 | bne cr7, L(dWordUnrollOFF) |
306 | b L(byte_by_byte) |
307 | |
308 | .p2align 4 |
309 | L(updtDestComputeN2ndByte): |
310 | addi r19, r19, 2 /* update dst by 2 */ |
311 | subf r9, r19, r9 /* compute distance covered */ |
312 | add r8, r9, r5 |
313 | b L(zeroFill) |
314 | |
315 | .p2align 4 |
316 | L(updtDestComputeN3rdByte): |
317 | addi r19, r19, 3 /* update dst by 3 */ |
318 | subf r9, r19, r9 /* compute distance covered */ |
319 | add r8, r9, r5 |
320 | b L(zeroFill) |
321 | |
322 | .p2align 4 |
323 | L(HopBy24): |
324 | addi r9, r9, 24 /* increment dst by 24 */ |
325 | addi r4, r4, 24 /* increment src by 24 */ |
326 | addi r5, r5, -24 /* decrement length 'n' by 24 */ |
327 | addi r0, r11, -3 /* decrement loop counter */ |
328 | b L(dWordUnrollOFF) |
329 | |
330 | .p2align 4 |
331 | L(update2): |
332 | mr r5, r19 |
333 | b L(dWordUnrollOFF) |
334 | |
335 | .p2align 4 |
336 | L(HopBy40): |
337 | addi r9, r7, 40 /* increment dst by 40 */ |
338 | addi r4, r6, 40 /* increment src by 40 */ |
339 | addi r5, r5, -40 /* decrement length 'n' by 40 */ |
340 | addi r0, r11, -5 /* decrement loop counter */ |
341 | b L(dWordUnrollOFF) |
342 | |
343 | L(update3): |
344 | mr r0, r11 |
345 | b L(dWordUnrollOFF) |
346 | |
347 | L(HopBy8): |
348 | addi r9, r3, 8 /* increment dst by 8 */ |
349 | addi r4, r4, 8 /* increment src by 8 */ |
350 | addi r5, r5, -8 /* decrement length 'n' by 8 */ |
351 | addi r0, r11, -1 /* decrement loop counter */ |
352 | b L(dWordUnrollOFF) |
353 | |
354 | L(unaligned): |
355 | cmpdi r5, 16 /* Proceed byte by byte for less than 16 */ |
356 | ble L(byte_by_byte) |
357 | rldicl r7, r3, 0, 61 |
358 | rldicl r6, r4, 0, 61 |
359 | cmpdi r6, 0 /* Check src alignment */ |
360 | beq L(srcaligndstunalign) |
361 | /* src is unaligned */ |
362 | rlwinm r10, r4, 3,26,28 /* Calculate padding. */ |
363 | clrrdi r4, r4, 3 /* Align the addr to dw boundary */ |
364 | ld r8, 0(r4) /* Load doubleword from memory. */ |
365 | li r0, 0 |
366 | /* Discard bits not part of the string */ |
367 | #ifdef __LITTLE_ENDIAN__ |
368 | srd r7, r8, r10 |
369 | #else |
370 | sld r7, r8, r10 |
371 | #endif |
372 | cmpb r0, r7, r0 /* Compare each byte against null */ |
373 | /* Discard bits not part of the string */ |
374 | #ifdef __LITTLE_ENDIAN__ |
375 | sld r0, r0, r10 |
376 | #else |
377 | srd r0, r0, r10 |
378 | #endif |
379 | cmpdi r0, 0 |
380 | bne L(bytebybyte) /* if it has null, copy byte by byte */ |
381 | subfic r6, r6, 8 |
382 | rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */ |
383 | rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */ |
384 | addi r3, r3, -1 |
385 | |
386 | cmpdi r12, 0 /* check dest alignment */ |
387 | beq L(srcunaligndstalign) |
388 | |
389 | /* both src and dst unaligned */ |
390 | #ifdef __LITTLE_ENDIAN__ |
391 | sld r8, r7, r10 |
392 | mr r11, r10 |
393 | addi r11, r11, -8 /* Adjust byte pointer on loaded dw */ |
394 | #else |
395 | srd r8, r7, r10 |
396 | subfic r11, r10, 64 |
397 | #endif |
398 | /* dst alignment is greater then src alignment? */ |
399 | cmpd cr7, r12, r10 |
400 | ble cr7, L(dst_align_small) |
401 | /* src alignment is less than dst */ |
402 | |
403 | /* Calculate the dst alignment difference */ |
404 | subfic r7, r9, 8 |
405 | mtctr r7 |
406 | |
407 | /* Write until dst is aligned */ |
408 | cmpdi r0, r7, 4 |
409 | blt L(storebyte1) /* less than 4, store byte by byte */ |
410 | beq L(equal1) /* if its 4, store word */ |
411 | addi r0, r7, -4 /* greater than 4, so stb and stw */ |
412 | mtctr r0 |
413 | L(storebyte1): |
414 | #ifdef __LITTLE_ENDIAN__ |
415 | addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ |
416 | #else |
417 | addi r11, r11, -8 |
418 | #endif |
419 | srd r7, r8, r11 |
420 | stbu r7, 1(r3) |
421 | addi r5, r5, -1 |
422 | bdnz L(storebyte1) |
423 | |
424 | subfic r7, r9, 8 /* Check the remaining bytes */ |
425 | cmpdi r0, r7, 4 |
426 | blt L(proceed1) |
427 | |
428 | .align 4 |
429 | L(equal1): |
430 | #ifdef __LITTLE_ENDIAN__ |
431 | addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ |
432 | srd r7, r8, r11 |
433 | #else |
434 | subfic r11, r11, 64 |
435 | sld r7, r8, r11 |
436 | srdi r7, r7, 32 |
437 | #endif |
438 | stw r7, 1(r3) |
439 | addi r3, r3, 4 |
440 | addi r5, r5, -4 |
441 | |
442 | L(proceed1): |
443 | mr r7, r8 |
444 | /* calculate the Left over bytes to be written */ |
445 | subfic r11, r10, 64 |
446 | subfic r12, r12, 64 |
447 | subf r12, r12, r11 /* remaining bytes on second dw */ |
448 | subfic r10, r12, 64 /* remaining bytes on first dw */ |
449 | subfic r9, r9, 8 |
450 | subf r6, r9, r6 /* recalculate padding */ |
451 | L(srcunaligndstalign): |
452 | addi r3, r3, 1 |
453 | subfic r12, r10, 64 /* remaining bytes on second dw */ |
454 | addi r4, r4, 8 |
455 | li r0,0 |
456 | b L(storedouble) |
457 | |
458 | .align 4 |
459 | L(dst_align_small): |
460 | mtctr r6 |
461 | /* Write until src is aligned */ |
462 | L(storebyte2): |
463 | #ifdef __LITTLE_ENDIAN__ |
464 | addi r11, r11, 8 /* Adjust byte pointer on dw */ |
465 | #else |
466 | addi r11, r11, -8 |
467 | #endif |
468 | srd r7, r8, r11 |
469 | stbu r7, 1(r3) |
470 | addi r5, r5, -1 |
471 | bdnz L(storebyte2) |
472 | |
473 | addi r4, r4, 8 /* Increment src pointer */ |
474 | addi r3, r3, 1 /* Increment dst pointer */ |
475 | mr r9, r3 |
476 | li r8, 0 |
477 | cmpd cr7, r12, r10 |
478 | beq cr7, L(aligned) |
479 | rldicl r6, r3, 0, 61 /* Recalculate padding */ |
480 | mr r7, r6 |
481 | |
482 | /* src is aligned */ |
483 | L(srcaligndstunalign): |
484 | mr r9, r3 |
485 | mr r6, r7 |
486 | ld r8, 0(r4) |
487 | subfic r10, r7, 8 |
488 | mr r7, r8 |
489 | li r0, 0 /* Check null */ |
490 | cmpb r0, r8, r0 |
491 | cmpdi r0, 0 |
492 | bne L(byte_by_byte) /* Do byte by byte if there is NULL */ |
493 | rlwinm r12, r3, 3,26,28 /* Calculate padding */ |
494 | addi r3, r3, -1 |
495 | /* write byte by byte until aligned */ |
496 | #ifdef __LITTLE_ENDIAN__ |
497 | li r11, -8 |
498 | #else |
499 | li r11, 64 |
500 | #endif |
501 | mtctr r10 |
502 | cmpdi r0, r10, 4 |
503 | blt L(storebyte) |
504 | beq L(equal) |
505 | addi r0, r10, -4 |
506 | mtctr r0 |
507 | L(storebyte): |
508 | #ifdef __LITTLE_ENDIAN__ |
509 | addi r11, r11, 8 /* Adjust byte pointer on dw */ |
510 | #else |
511 | addi r11, r11, -8 |
512 | #endif |
513 | srd r7, r8, r11 |
514 | stbu r7, 1(r3) |
515 | addi r5, r5, -1 |
516 | bdnz L(storebyte) |
517 | |
518 | cmpdi r0, r10, 4 |
519 | blt L(align) |
520 | |
521 | .align 4 |
522 | L(equal): |
523 | #ifdef __LITTLE_ENDIAN__ |
524 | addi r11, r11, 8 |
525 | srd r7, r8, r11 |
526 | #else |
527 | subfic r11, r11, 64 |
528 | sld r7, r8, r11 |
529 | srdi r7, r7, 32 |
530 | #endif |
531 | stw r7, 1(r3) |
532 | addi r5, r5, -4 |
533 | addi r3, r3, 4 |
534 | L(align): |
535 | addi r3, r3, 1 |
536 | addi r4, r4, 8 /* Increment src pointer */ |
537 | subfic r10, r12, 64 |
538 | li r0, 0 |
539 | /* dst addr aligned to 8 */ |
540 | L(storedouble): |
541 | cmpdi r5, 8 |
542 | ble L(null1) |
543 | ld r7, 0(r4) /* load next dw */ |
544 | cmpb r0, r7, r0 |
545 | cmpdi r0, 0 /* check for null on each new dw */ |
546 | bne L(null) |
547 | #ifdef __LITTLE_ENDIAN__ |
548 | srd r9, r8, r10 /* bytes from first dw */ |
549 | sld r11, r7, r12 /* bytes from second dw */ |
550 | #else |
551 | sld r9, r8, r10 |
552 | srd r11, r7, r12 |
553 | #endif |
554 | or r11, r9, r11 /* make as a single dw */ |
555 | std r11, 0(r3) /* store as std on aligned addr */ |
556 | mr r8, r7 /* still few bytes left to be written */ |
557 | addi r3, r3, 8 /* increment dst addr */ |
558 | addi r4, r4, 8 /* increment src addr */ |
559 | addi r5, r5, -8 |
560 | b L(storedouble) /* Loop until NULL */ |
561 | |
562 | .align 4 |
563 | |
564 | /* We've hit the end of the string. Do the rest byte-by-byte. */ |
565 | L(null): |
566 | addi r3, r3, -1 |
567 | mr r10, r12 |
568 | mtctr r6 |
569 | #ifdef __LITTLE_ENDIAN__ |
570 | subfic r10, r10, 64 |
571 | addi r10, r10, -8 |
572 | #endif |
573 | cmpdi r0, r5, 4 |
574 | blt L(loop) |
575 | cmpdi r0, r6, 4 |
576 | blt L(loop) |
577 | |
578 | /* we can still use stw if leftover >= 4 */ |
579 | #ifdef __LITTLE_ENDIAN__ |
580 | addi r10, r10, 8 |
581 | srd r11, r8, r10 |
582 | #else |
583 | subfic r10, r10, 64 |
584 | sld r11, r8, r10 |
585 | srdi r11, r11, 32 |
586 | #endif |
587 | stw r11, 1(r3) |
588 | addi r5, r5, -4 |
589 | addi r3, r3, 4 |
590 | cmpdi r0, r5, 0 |
591 | beq L(g1) |
592 | cmpdi r0, r6, 4 |
593 | beq L(bytebybyte1) |
594 | addi r10, r10, 32 |
595 | #ifdef __LITTLE_ENDIAN__ |
596 | addi r10, r10, -8 |
597 | #else |
598 | subfic r10, r10, 64 |
599 | #endif |
600 | addi r0, r6, -4 |
601 | mtctr r0 |
602 | /* remaining byte by byte part of first dw */ |
603 | L(loop): |
604 | #ifdef __LITTLE_ENDIAN__ |
605 | addi r10, r10, 8 |
606 | #else |
607 | addi r10, r10, -8 |
608 | #endif |
609 | srd r0, r8, r10 |
610 | stbu r0, 1(r3) |
611 | addi r5, r5, -1 |
612 | cmpdi r0, r5, 0 |
613 | beq L(g1) |
614 | bdnz L(loop) |
615 | L(bytebybyte1): |
616 | addi r3, r3, 1 |
617 | /* remaining byte by byte part of second dw */ |
618 | L(bytebybyte): |
619 | addi r3, r3, -8 |
620 | addi r4, r4, -1 |
621 | |
622 | #ifdef __LITTLE_ENDIAN__ |
623 | extrdi. r0, r7, 8, 56 |
624 | stbu r7, 8(r3) |
625 | addi r5, r5, -1 |
626 | beq L(g2) |
627 | cmpdi r5, 0 |
628 | beq L(g1) |
629 | extrdi. r0, r7, 8, 48 |
630 | stbu r0, 1(r3) |
631 | addi r5, r5, -1 |
632 | beq L(g2) |
633 | cmpdi r5, 0 |
634 | beq L(g1) |
635 | extrdi. r0, r7, 8, 40 |
636 | stbu r0, 1(r3) |
637 | addi r5, r5, -1 |
638 | beq L(g2) |
639 | cmpdi r5, 0 |
640 | beq L(g1) |
641 | extrdi. r0, r7, 8, 32 |
642 | stbu r0, 1(r3) |
643 | addi r5, r5, -1 |
644 | beq L(g2) |
645 | cmpdi r5, 0 |
646 | beq L(g1) |
647 | extrdi. r0, r7, 8, 24 |
648 | stbu r0, 1(r3) |
649 | addi r5, r5, -1 |
650 | beq L(g2) |
651 | cmpdi r5, 0 |
652 | beq L(g1) |
653 | extrdi. r0, r7, 8, 16 |
654 | stbu r0, 1(r3) |
655 | addi r5, r5, -1 |
656 | beq L(g2) |
657 | cmpdi r5, 0 |
658 | beq L(g1) |
659 | extrdi. r0, r7, 8, 8 |
660 | stbu r0, 1(r3) |
661 | addi r5, r5, -1 |
662 | beq L(g2) |
663 | cmpdi r5, 0 |
664 | beq L(g1) |
665 | extrdi r0, r7, 8, 0 |
666 | stbu r0, 1(r3) |
667 | addi r5, r5, -1 |
668 | b L(g2) |
669 | #else |
670 | extrdi. r0, r7, 8, 0 |
671 | stbu r0, 8(r3) |
672 | addi r5, r5, -1 |
673 | beq L(g2) |
674 | cmpdi r5, 0 |
675 | beq L(g1) |
676 | extrdi. r0, r7, 8, 8 |
677 | stbu r0, 1(r3) |
678 | addi r5, r5, -1 |
679 | beq L(g2) |
680 | cmpdi r5, 0 |
681 | beq L(g1) |
682 | extrdi. r0, r7, 8, 16 |
683 | stbu r0, 1(r3) |
684 | addi r5, r5, -1 |
685 | beq L(g2) |
686 | cmpdi r5, 0 |
687 | beq L(g1) |
688 | extrdi. r0, r7, 8, 24 |
689 | stbu r0, 1(r3) |
690 | addi r5, r5, -1 |
691 | beq L(g2) |
692 | cmpdi r5, 0 |
693 | beq L(g1) |
694 | extrdi. r0, r7, 8, 32 |
695 | stbu r0, 1(r3) |
696 | addi r5, r5, -1 |
697 | beq L(g2) |
698 | cmpdi r5, 0 |
699 | beq L(g1) |
700 | extrdi. r0, r7, 8, 40 |
701 | stbu r0, 1(r3) |
702 | addi r5, r5, -1 |
703 | beq L(g2) |
704 | cmpdi r5, 0 |
705 | beq L(g1) |
706 | extrdi. r0, r7, 8, 48 |
707 | stbu r0, 1(r3) |
708 | addi r5, r5, -1 |
709 | beq L(g2) |
710 | cmpdi r5, 0 |
711 | beq L(g1) |
712 | stbu r7, 1(r3) |
713 | addi r5, r5, -1 |
714 | b L(g2) |
715 | #endif |
716 | L(g1): |
717 | #ifdef USE_AS_STPNCPY |
718 | addi r3, r3, 1 |
719 | #endif |
720 | L(g2): |
721 | addi r3, r3, 1 |
722 | mr r19, r3 |
723 | mr r8, r5 |
724 | b L(zeroFill) |
725 | L(null1): |
726 | mr r9, r3 |
727 | subf r4, r6, r4 |
728 | b L(byte_by_byte) |
729 | END(FUNC_NAME) |
730 | #ifndef USE_AS_STPNCPY |
731 | libc_hidden_builtin_def (strncpy) |
732 | #endif |
733 | |