1 | /* Copyright (C) 2014-2022 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library. If not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #ifdef ANDROID_CHANGES |
19 | # include "machine/asm.h" |
20 | # include "machine/regdef.h" |
21 | #elif _LIBC |
22 | # include <sysdep.h> |
23 | # include <regdef.h> |
24 | # include <sys/asm.h> |
25 | #elif defined _COMPILING_NEWLIB |
26 | # include "machine/asm.h" |
27 | # include "machine/regdef.h" |
28 | #else |
29 | # include <regdef.h> |
30 | # include <sys/asm.h> |
31 | #endif |
32 | |
33 | /* Technically strcmp should not read past the end of the strings being |
34 | compared. We will read a full word that may contain excess bits beyond |
35 | the NULL string terminator but unless ENABLE_READAHEAD is set, we will not |
36 | read the next word after the end of string. Setting ENABLE_READAHEAD will |
37 | improve performance but is technically illegal based on the definition of |
38 | strcmp. */ |
39 | #ifdef ENABLE_READAHEAD |
40 | # define DELAY_READ |
41 | #else |
42 | # define DELAY_READ nop |
43 | #endif |
44 | |
45 | /* Testing on a little endian machine showed using CLZ was a |
46 | performance loss, so we are not turning it on by default. */ |
47 | #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) |
48 | # define USE_CLZ |
49 | #endif |
50 | |
51 | /* Some asm.h files do not have the L macro definition. */ |
52 | #ifndef L |
53 | # if _MIPS_SIM == _ABIO32 |
54 | # define L(label) $L ## label |
55 | # else |
56 | # define L(label) .L ## label |
57 | # endif |
58 | #endif |
59 | |
60 | /* Some asm.h files do not have the PTR_ADDIU macro definition. */ |
61 | #ifndef PTR_ADDIU |
62 | # ifdef USE_DOUBLE |
63 | # define PTR_ADDIU daddiu |
64 | # else |
65 | # define PTR_ADDIU addiu |
66 | # endif |
67 | #endif |
68 | |
69 | /* Allow the routine to be named something else if desired. */ |
70 | #ifndef STRCMP_NAME |
71 | # define STRCMP_NAME strcmp |
72 | #endif |
73 | |
74 | #ifdef ANDROID_CHANGES |
75 | LEAF(STRCMP_NAME, 0) |
76 | #else |
77 | LEAF(STRCMP_NAME) |
78 | #endif |
79 | .set nomips16 |
80 | .set noreorder |
81 | |
82 | or t0, a0, a1 |
83 | andi t0,0x3 |
84 | bne t0, zero, L(byteloop) |
85 | |
86 | /* Both strings are 4 byte aligned at this point. */ |
87 | |
88 | lui t8, 0x0101 |
89 | ori t8, t8, 0x0101 |
90 | lui t9, 0x7f7f |
91 | ori t9, 0x7f7f |
92 | |
93 | #define STRCMP32(OFFSET) \ |
94 | lw v0, OFFSET(a0); \ |
95 | lw v1, OFFSET(a1); \ |
96 | subu t0, v0, t8; \ |
97 | bne v0, v1, L(worddiff); \ |
98 | nor t1, v0, t9; \ |
99 | and t0, t0, t1; \ |
100 | bne t0, zero, L(returnzero) |
101 | |
102 | L(wordloop): |
103 | STRCMP32(0) |
104 | DELAY_READ |
105 | STRCMP32(4) |
106 | DELAY_READ |
107 | STRCMP32(8) |
108 | DELAY_READ |
109 | STRCMP32(12) |
110 | DELAY_READ |
111 | STRCMP32(16) |
112 | DELAY_READ |
113 | STRCMP32(20) |
114 | DELAY_READ |
115 | STRCMP32(24) |
116 | DELAY_READ |
117 | STRCMP32(28) |
118 | PTR_ADDIU a0, a0, 32 |
119 | b L(wordloop) |
120 | PTR_ADDIU a1, a1, 32 |
121 | |
122 | L(returnzero): |
123 | j ra |
124 | move v0, zero |
125 | |
126 | L(worddiff): |
127 | #ifdef USE_CLZ |
128 | subu t0, v0, t8 |
129 | nor t1, v0, t9 |
130 | and t1, t0, t1 |
131 | xor t0, v0, v1 |
132 | or t0, t0, t1 |
133 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
134 | wsbh t0, t0 |
135 | rotr t0, t0, 16 |
136 | # endif |
137 | clz t1, t0 |
138 | and t1, 0xf8 |
139 | # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
140 | neg t1 |
141 | addu t1, 24 |
142 | # endif |
143 | rotrv v0, v0, t1 |
144 | rotrv v1, v1, t1 |
145 | and v0, v0, 0xff |
146 | and v1, v1, 0xff |
147 | j ra |
148 | subu v0, v0, v1 |
149 | #else /* USE_CLZ */ |
150 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
151 | andi t0, v0, 0xff |
152 | beq t0, zero, L(wexit01) |
153 | andi t1, v1, 0xff |
154 | bne t0, t1, L(wexit01) |
155 | |
156 | srl t8, v0, 8 |
157 | srl t9, v1, 8 |
158 | andi t8, t8, 0xff |
159 | beq t8, zero, L(wexit89) |
160 | andi t9, t9, 0xff |
161 | bne t8, t9, L(wexit89) |
162 | |
163 | srl t0, v0, 16 |
164 | srl t1, v1, 16 |
165 | andi t0, t0, 0xff |
166 | beq t0, zero, L(wexit01) |
167 | andi t1, t1, 0xff |
168 | bne t0, t1, L(wexit01) |
169 | |
170 | srl t8, v0, 24 |
171 | srl t9, v1, 24 |
172 | # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
173 | srl t0, v0, 24 |
174 | beq t0, zero, L(wexit01) |
175 | srl t1, v1, 24 |
176 | bne t0, t1, L(wexit01) |
177 | |
178 | srl t8, v0, 16 |
179 | srl t9, v1, 16 |
180 | andi t8, t8, 0xff |
181 | beq t8, zero, L(wexit89) |
182 | andi t9, t9, 0xff |
183 | bne t8, t9, L(wexit89) |
184 | |
185 | srl t0, v0, 8 |
186 | srl t1, v1, 8 |
187 | andi t0, t0, 0xff |
188 | beq t0, zero, L(wexit01) |
189 | andi t1, t1, 0xff |
190 | bne t0, t1, L(wexit01) |
191 | |
192 | andi t8, v0, 0xff |
193 | andi t9, v1, 0xff |
194 | # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
195 | |
196 | L(wexit89): |
197 | j ra |
198 | subu v0, t8, t9 |
199 | L(wexit01): |
200 | j ra |
201 | subu v0, t0, t1 |
202 | #endif /* USE_CLZ */ |
203 | |
204 | /* It might seem better to do the 'beq' instruction between the two 'lbu' |
205 | instructions so that the nop is not needed but testing showed that this |
206 | code is actually faster (based on glibc strcmp test). */ |
207 | #define BYTECMP01(OFFSET) \ |
208 | lbu v0, OFFSET(a0); \ |
209 | lbu v1, OFFSET(a1); \ |
210 | beq v0, zero, L(bexit01); \ |
211 | nop; \ |
212 | bne v0, v1, L(bexit01) |
213 | |
214 | #define BYTECMP89(OFFSET) \ |
215 | lbu t8, OFFSET(a0); \ |
216 | lbu t9, OFFSET(a1); \ |
217 | beq t8, zero, L(bexit89); \ |
218 | nop; \ |
219 | bne t8, t9, L(bexit89) |
220 | |
221 | L(byteloop): |
222 | BYTECMP01(0) |
223 | BYTECMP89(1) |
224 | BYTECMP01(2) |
225 | BYTECMP89(3) |
226 | BYTECMP01(4) |
227 | BYTECMP89(5) |
228 | BYTECMP01(6) |
229 | BYTECMP89(7) |
230 | PTR_ADDIU a0, a0, 8 |
231 | b L(byteloop) |
232 | PTR_ADDIU a1, a1, 8 |
233 | |
234 | L(bexit01): |
235 | j ra |
236 | subu v0, v0, v1 |
237 | L(bexit89): |
238 | j ra |
239 | subu v0, t8, t9 |
240 | |
241 | .set at |
242 | .set reorder |
243 | |
244 | END(STRCMP_NAME) |
245 | #ifndef ANDROID_CHANGES |
246 | # ifdef _LIBC |
247 | libc_hidden_builtin_def (STRCMP_NAME) |
248 | # endif |
249 | #endif |
250 | |