1 | /* Optimized memcpy implementation for PowerPC476. |
2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library. If not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | |
21 | /* memcpy |
22 | |
23 | r0:return address |
24 | r3:destination address |
25 | r4:source address |
26 | r5:byte count |
27 | |
28 | Save return address in r0. |
29 | If destination and source are unaligned and copy count is greater than 256 |
30 | then copy 0-3 bytes to make destination aligned. |
31 | If 32 or more bytes to copy we use 32 byte copy loop. |
32 | Finally we copy 0-31 extra bytes. */ |
33 | |
34 | EALIGN (memcpy, 5, 0) |
35 | /* Check if bytes to copy are greater than 256 and if |
36 | source and destination are unaligned */ |
37 | cmpwi r5,0x0100 |
38 | addi r0,r3,0 |
39 | ble L(string_count_loop) |
40 | neg r6,r3 |
41 | clrlwi. r6,r6,30 |
42 | beq L(string_count_loop) |
43 | neg r6,r4 |
44 | clrlwi. r6,r6,30 |
45 | beq L(string_count_loop) |
46 | mtctr r6 |
47 | subf r5,r6,r5 |
48 | |
49 | L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */ |
50 | lbz r8,0x0(r4) |
51 | addi r4,r4,1 |
52 | stb r8,0x0(r3) |
53 | addi r3,r3,1 |
54 | bdnz L(unaligned_bytecopy_loop) |
55 | srwi. r7,r5,5 |
56 | beq L(preword2_count_loop) |
57 | mtctr r7 |
58 | |
59 | L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */ |
60 | lwz r6,0(r4) |
61 | lwz r7,4(r4) |
62 | lwz r8,8(r4) |
63 | lwz r9,12(r4) |
64 | subi r5,r5,0x20 |
65 | stw r6,0(r3) |
66 | stw r7,4(r3) |
67 | stw r8,8(r3) |
68 | stw r9,12(r3) |
69 | lwz r6,16(r4) |
70 | lwz r7,20(r4) |
71 | lwz r8,24(r4) |
72 | lwz r9,28(r4) |
73 | addi r4,r4,0x20 |
74 | stw r6,16(r3) |
75 | stw r7,20(r3) |
76 | stw r8,24(r3) |
77 | stw r9,28(r3) |
78 | addi r3,r3,0x20 |
79 | bdnz L(word8_count_loop_no_dcbt) |
80 | |
81 | L(preword2_count_loop): /* Copy remaining 0-31 bytes */ |
82 | clrlwi. r12,r5,27 |
83 | beq L(end_memcpy) |
84 | mtxer r12 |
85 | lswx r5,0,r4 |
86 | stswx r5,0,r3 |
87 | mr r3,r0 |
88 | blr |
89 | |
90 | L(string_count_loop): /* Copy odd 0-31 bytes */ |
91 | clrlwi. r12,r5,28 |
92 | add r3,r3,r5 |
93 | add r4,r4,r5 |
94 | beq L(pre_string_copy) |
95 | mtxer r12 |
96 | subf r4,r12,r4 |
97 | subf r3,r12,r3 |
98 | lswx r6,0,r4 |
99 | stswx r6,0,r3 |
100 | |
101 | L(pre_string_copy): /* Check how many 32 byte chunks to copy */ |
102 | srwi. r7,r5,4 |
103 | beq L(end_memcpy) |
104 | mtctr r7 |
105 | |
106 | L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */ |
107 | lwz r6,-4(r4) |
108 | lwz r7,-8(r4) |
109 | lwz r8,-12(r4) |
110 | lwzu r9,-16(r4) |
111 | stw r6,-4(r3) |
112 | stw r7,-8(r3) |
113 | stw r8,-12(r3) |
114 | stwu r9,-16(r3) |
115 | bdz L(end_memcpy) |
116 | lwz r6,-4(r4) |
117 | lwz r7,-8(r4) |
118 | lwz r8,-12(r4) |
119 | lwzu r9,-16(r4) |
120 | stw r6,-4(r3) |
121 | stw r7,-8(r3) |
122 | stw r8,-12(r3) |
123 | stwu r9,-16(r3) |
124 | bdnz L(word4_count_loop_no_dcbt) |
125 | |
126 | L(end_memcpy): |
127 | mr r3,r0 |
128 | blr |
129 | END (memcpy) |
130 | libc_hidden_builtin_def (memcpy) |
131 | |