1 | /* Optimized memset for PowerPC405,440,464 (32-byte cacheline). |
2 | Copyright (C) 2012-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library. If not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | |
21 | /* memset |
22 | |
23 | r3:destination address and return address |
24 | r4:source integer to copy |
25 | r5:byte count |
26 | r11:sources integer to copy in all 32 bits of reg |
27 | r12:temp return address |
28 | |
29 | Save return address in r12 |
30 | If destination is unaligned and count is greater than 255 bytes |
31 | set 0-3 bytes to make destination aligned |
32 | If count is greater than 255 bytes and setting zero to memory |
33 | use dbcz to set memory when we can |
34 | otherwise do the following |
35 | If 16 or more words to set we use 16 word copy loop. |
36 | Finally we set 0-15 extra bytes with string store. */ |
37 | |
38 | EALIGN (memset, 5, 0) |
39 | rlwinm r11,r4,0,24,31 |
40 | rlwimi r11,r4,8,16,23 |
41 | rlwimi r11,r11,16,0,15 |
42 | addi r12,r3,0 |
43 | cmpwi r5,0x00FF |
44 | ble L(preword8_count_loop) |
45 | cmpwi r4,0x00 |
46 | beq L(use_dcbz) |
47 | neg r6,r3 |
48 | clrlwi. r6,r6,30 |
49 | beq L(preword8_count_loop) |
50 | addi r8,0,1 |
51 | mtctr r6 |
52 | subi r3,r3,1 |
53 | |
54 | L(unaligned_bytecopy_loop): |
55 | stbu r11,0x1(r3) |
56 | subf. r5,r8,r5 |
57 | beq L(end_memset) |
58 | bdnz L(unaligned_bytecopy_loop) |
59 | addi r3,r3,1 |
60 | |
61 | L(preword8_count_loop): |
62 | srwi. r6,r5,4 |
63 | beq L(preword2_count_loop) |
64 | mtctr r6 |
65 | addi r3,r3,-4 |
66 | mr r8,r11 |
67 | mr r9,r11 |
68 | mr r10,r11 |
69 | |
70 | L(word8_count_loop_no_dcbt): |
71 | stwu r8,4(r3) |
72 | stwu r9,4(r3) |
73 | subi r5,r5,0x10 |
74 | stwu r10,4(r3) |
75 | stwu r11,4(r3) |
76 | bdnz L(word8_count_loop_no_dcbt) |
77 | addi r3,r3,4 |
78 | |
79 | L(preword2_count_loop): |
80 | clrlwi. r7,r5,28 |
81 | beq L(end_memset) |
82 | mr r8,r11 |
83 | mr r9,r11 |
84 | mr r10,r11 |
85 | mtxer r7 |
86 | stswx r8,0,r3 |
87 | |
88 | L(end_memset): |
89 | addi r3,r12,0 |
90 | blr |
91 | |
92 | L(use_dcbz): |
93 | neg r6,r3 |
94 | clrlwi. r7,r6,28 |
95 | beq L(skip_string_loop) |
96 | mr r8,r11 |
97 | mr r9,r11 |
98 | mr r10,r11 |
99 | subf r5,r7,r5 |
100 | mtxer r7 |
101 | stswx r8,0,r3 |
102 | add r3,r3,r7 |
103 | |
104 | L(skip_string_loop): |
105 | clrlwi r8,r6,27 |
106 | srwi. r8,r8,4 |
107 | beq L(dcbz_pre_loop) |
108 | mtctr r8 |
109 | |
110 | L(word_loop): |
111 | stw r11,0(r3) |
112 | subi r5,r5,0x10 |
113 | stw r11,4(r3) |
114 | stw r11,8(r3) |
115 | stw r11,12(r3) |
116 | addi r3,r3,0x10 |
117 | bdnz L(word_loop) |
118 | |
119 | L(dcbz_pre_loop): |
120 | srwi r6,r5,5 |
121 | mtctr r6 |
122 | addi r7,0,0 |
123 | |
124 | L(dcbz_loop): |
125 | dcbz r3,r7 |
126 | addi r3,r3,0x20 |
127 | subi r5,r5,0x20 |
128 | bdnz L(dcbz_loop) |
129 | srwi. r6,r5,4 |
130 | beq L(postword2_count_loop) |
131 | mtctr r6 |
132 | |
133 | L(postword8_count_loop): |
134 | stw r11,0(r3) |
135 | subi r5,r5,0x10 |
136 | stw r11,4(r3) |
137 | stw r11,8(r3) |
138 | stw r11,12(r3) |
139 | addi r3,r3,0x10 |
140 | bdnz L(postword8_count_loop) |
141 | |
142 | L(postword2_count_loop): |
143 | clrlwi. r7,r5,28 |
144 | beq L(end_memset) |
145 | mr r8,r11 |
146 | mr r9,r11 |
147 | mr r10,r11 |
148 | mtxer r7 |
149 | stswx r8,0,r3 |
150 | b L(end_memset) |
151 | END (memset) |
152 | libc_hidden_builtin_def (memset) |
153 | |