1 | /* Compare two memory blocks for differences in the first COUNT bytes. |
2 | Copyright (C) 2004-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | #include "asm-syntax.h" |
21 | |
22 | #define PARMS 4+4 /* Preserve EBX. */ |
23 | #define BLK1 PARMS |
24 | #define BLK2 BLK1+4 |
25 | #define LEN BLK2+4 |
26 | #define ENTRANCE pushl %ebx; cfi_adjust_cfa_offset (4); \ |
27 | cfi_rel_offset (ebx, 0) |
28 | #define RETURN popl %ebx; cfi_adjust_cfa_offset (-4); \ |
29 | cfi_restore (ebx); ret |
30 | |
31 | /* Load an entry in a jump table into EBX. TABLE is a jump table |
32 | with relative offsets. INDEX is a register contains the index |
33 | into the jump table. */ |
34 | #define LOAD_JUMP_TABLE_ENTRY(TABLE, INDEX) \ |
35 | /* We first load PC into EBX. */ \ |
36 | SETUP_PIC_REG(bx); \ |
37 | /* Get the address of the jump table. */ \ |
38 | addl $(TABLE - .), %ebx; \ |
39 | /* Get the entry and convert the relative offset to the \ |
40 | absolute address. */ \ |
41 | addl (%ebx,INDEX,4), %ebx |
42 | |
43 | .text |
44 | ALIGN (4) |
45 | ENTRY (memcmp) |
46 | ENTRANCE |
47 | |
48 | movl BLK1(%esp), %eax |
49 | movl BLK2(%esp), %edx |
50 | movl LEN(%esp), %ecx |
51 | |
52 | cmpl $1, %ecx |
53 | jne L(not_1) |
54 | movzbl (%eax), %ecx /* LEN == 1 */ |
55 | cmpb (%edx), %cl |
56 | jne L(neq) |
57 | L(bye): |
58 | xorl %eax, %eax |
59 | RETURN |
60 | |
61 | cfi_adjust_cfa_offset (4) |
62 | cfi_rel_offset (ebx, 0) |
63 | L(neq): |
64 | sbbl %eax, %eax |
65 | sbbl $-1, %eax |
66 | RETURN |
67 | |
68 | cfi_adjust_cfa_offset (4) |
69 | cfi_rel_offset (ebx, 0) |
70 | L(not_1): |
71 | jl L(bye) /* LEN == 0 */ |
72 | |
73 | pushl %esi |
74 | cfi_adjust_cfa_offset (4) |
75 | movl %eax, %esi |
76 | cfi_rel_offset (esi, 0) |
77 | cmpl $32, %ecx; |
78 | jge L(32bytesormore) /* LEN => 32 */ |
79 | |
80 | LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx) |
81 | addl %ecx, %edx |
82 | addl %ecx, %esi |
83 | jmp *%ebx |
84 | |
85 | ALIGN (4) |
86 | L(28bytes): |
87 | movl -28(%esi), %eax |
88 | movl -28(%edx), %ecx |
89 | cmpl %ecx, %eax |
90 | jne L(find_diff) |
91 | L(24bytes): |
92 | movl -24(%esi), %eax |
93 | movl -24(%edx), %ecx |
94 | cmpl %ecx, %eax |
95 | jne L(find_diff) |
96 | L(20bytes): |
97 | movl -20(%esi), %eax |
98 | movl -20(%edx), %ecx |
99 | cmpl %ecx, %eax |
100 | jne L(find_diff) |
101 | L(16bytes): |
102 | movl -16(%esi), %eax |
103 | movl -16(%edx), %ecx |
104 | cmpl %ecx, %eax |
105 | jne L(find_diff) |
106 | L(12bytes): |
107 | movl -12(%esi), %eax |
108 | movl -12(%edx), %ecx |
109 | cmpl %ecx, %eax |
110 | jne L(find_diff) |
111 | L(8bytes): |
112 | movl -8(%esi), %eax |
113 | movl -8(%edx), %ecx |
114 | cmpl %ecx, %eax |
115 | jne L(find_diff) |
116 | L(4bytes): |
117 | movl -4(%esi), %eax |
118 | movl -4(%edx), %ecx |
119 | cmpl %ecx, %eax |
120 | jne L(find_diff) |
121 | L(0bytes): |
122 | popl %esi |
123 | cfi_adjust_cfa_offset (-4) |
124 | cfi_restore (esi) |
125 | xorl %eax, %eax |
126 | RETURN |
127 | |
128 | cfi_adjust_cfa_offset (8) |
129 | cfi_rel_offset (esi, 0) |
130 | cfi_rel_offset (ebx, 4) |
131 | L(29bytes): |
132 | movl -29(%esi), %eax |
133 | movl -29(%edx), %ecx |
134 | cmpl %ecx, %eax |
135 | jne L(find_diff) |
136 | L(25bytes): |
137 | movl -25(%esi), %eax |
138 | movl -25(%edx), %ecx |
139 | cmpl %ecx, %eax |
140 | jne L(find_diff) |
141 | L(21bytes): |
142 | movl -21(%esi), %eax |
143 | movl -21(%edx), %ecx |
144 | cmpl %ecx, %eax |
145 | jne L(find_diff) |
146 | L(17bytes): |
147 | movl -17(%esi), %eax |
148 | movl -17(%edx), %ecx |
149 | cmpl %ecx, %eax |
150 | jne L(find_diff) |
151 | L(13bytes): |
152 | movl -13(%esi), %eax |
153 | movl -13(%edx), %ecx |
154 | cmpl %ecx, %eax |
155 | jne L(find_diff) |
156 | L(9bytes): |
157 | movl -9(%esi), %eax |
158 | movl -9(%edx), %ecx |
159 | cmpl %ecx, %eax |
160 | jne L(find_diff) |
161 | L(5bytes): |
162 | movl -5(%esi), %eax |
163 | movl -5(%edx), %ecx |
164 | cmpl %ecx, %eax |
165 | jne L(find_diff) |
166 | L(1bytes): |
167 | movzbl -1(%esi), %eax |
168 | cmpb -1(%edx), %al |
169 | jne L(set) |
170 | popl %esi |
171 | cfi_adjust_cfa_offset (-4) |
172 | cfi_restore (esi) |
173 | xorl %eax, %eax |
174 | RETURN |
175 | |
176 | cfi_adjust_cfa_offset (8) |
177 | cfi_rel_offset (esi, 0) |
178 | cfi_rel_offset (ebx, 4) |
179 | L(30bytes): |
180 | movl -30(%esi), %eax |
181 | movl -30(%edx), %ecx |
182 | cmpl %ecx, %eax |
183 | jne L(find_diff) |
184 | L(26bytes): |
185 | movl -26(%esi), %eax |
186 | movl -26(%edx), %ecx |
187 | cmpl %ecx, %eax |
188 | jne L(find_diff) |
189 | L(22bytes): |
190 | movl -22(%esi), %eax |
191 | movl -22(%edx), %ecx |
192 | cmpl %ecx, %eax |
193 | jne L(find_diff) |
194 | L(18bytes): |
195 | movl -18(%esi), %eax |
196 | movl -18(%edx), %ecx |
197 | cmpl %ecx, %eax |
198 | jne L(find_diff) |
199 | L(14bytes): |
200 | movl -14(%esi), %eax |
201 | movl -14(%edx), %ecx |
202 | cmpl %ecx, %eax |
203 | jne L(find_diff) |
204 | L(10bytes): |
205 | movl -10(%esi), %eax |
206 | movl -10(%edx), %ecx |
207 | cmpl %ecx, %eax |
208 | jne L(find_diff) |
209 | L(6bytes): |
210 | movl -6(%esi), %eax |
211 | movl -6(%edx), %ecx |
212 | cmpl %ecx, %eax |
213 | jne L(find_diff) |
214 | L(2bytes): |
215 | movzwl -2(%esi), %eax |
216 | movzwl -2(%edx), %ecx |
217 | cmpb %cl, %al |
218 | jne L(set) |
219 | cmpl %ecx, %eax |
220 | jne L(set) |
221 | popl %esi |
222 | cfi_adjust_cfa_offset (-4) |
223 | cfi_restore (esi) |
224 | xorl %eax, %eax |
225 | RETURN |
226 | |
227 | cfi_adjust_cfa_offset (8) |
228 | cfi_rel_offset (esi, 0) |
229 | cfi_rel_offset (ebx, 4) |
230 | L(31bytes): |
231 | movl -31(%esi), %eax |
232 | movl -31(%edx), %ecx |
233 | cmpl %ecx, %eax |
234 | jne L(find_diff) |
235 | L(27bytes): |
236 | movl -27(%esi), %eax |
237 | movl -27(%edx), %ecx |
238 | cmpl %ecx, %eax |
239 | jne L(find_diff) |
240 | L(23bytes): |
241 | movl -23(%esi), %eax |
242 | movl -23(%edx), %ecx |
243 | cmpl %ecx, %eax |
244 | jne L(find_diff) |
245 | L(19bytes): |
246 | movl -19(%esi), %eax |
247 | movl -19(%edx), %ecx |
248 | cmpl %ecx, %eax |
249 | jne L(find_diff) |
250 | L(15bytes): |
251 | movl -15(%esi), %eax |
252 | movl -15(%edx), %ecx |
253 | cmpl %ecx, %eax |
254 | jne L(find_diff) |
255 | L(11bytes): |
256 | movl -11(%esi), %eax |
257 | movl -11(%edx), %ecx |
258 | cmpl %ecx, %eax |
259 | jne L(find_diff) |
260 | L(7bytes): |
261 | movl -7(%esi), %eax |
262 | movl -7(%edx), %ecx |
263 | cmpl %ecx, %eax |
264 | jne L(find_diff) |
265 | L(3bytes): |
266 | movzwl -3(%esi), %eax |
267 | movzwl -3(%edx), %ecx |
268 | cmpb %cl, %al |
269 | jne L(set) |
270 | cmpl %ecx, %eax |
271 | jne L(set) |
272 | movzbl -1(%esi), %eax |
273 | cmpb -1(%edx), %al |
274 | jne L(set) |
275 | popl %esi |
276 | cfi_adjust_cfa_offset (-4) |
277 | cfi_restore (esi) |
278 | xorl %eax, %eax |
279 | RETURN |
280 | |
281 | cfi_adjust_cfa_offset (8) |
282 | cfi_rel_offset (esi, 0) |
283 | cfi_rel_offset (ebx, 4) |
284 | ALIGN (4) |
285 | /* ECX >= 32. */ |
286 | L(32bytesormore): |
287 | subl $32, %ecx |
288 | |
289 | movl (%esi), %eax |
290 | cmpl (%edx), %eax |
291 | jne L(load_ecx) |
292 | |
293 | movl 4(%esi), %eax |
294 | cmpl 4(%edx), %eax |
295 | jne L(load_ecx_4) |
296 | |
297 | movl 8(%esi), %eax |
298 | cmpl 8(%edx), %eax |
299 | jne L(load_ecx_8) |
300 | |
301 | movl 12(%esi), %eax |
302 | cmpl 12(%edx), %eax |
303 | jne L(load_ecx_12) |
304 | |
305 | movl 16(%esi), %eax |
306 | cmpl 16(%edx), %eax |
307 | jne L(load_ecx_16) |
308 | |
309 | movl 20(%esi), %eax |
310 | cmpl 20(%edx), %eax |
311 | jne L(load_ecx_20) |
312 | |
313 | movl 24(%esi), %eax |
314 | cmpl 24(%edx), %eax |
315 | jne L(load_ecx_24) |
316 | |
317 | movl 28(%esi), %eax |
318 | cmpl 28(%edx), %eax |
319 | jne L(load_ecx_28) |
320 | |
321 | addl $32, %esi |
322 | addl $32, %edx |
323 | cmpl $32, %ecx |
324 | jge L(32bytesormore) |
325 | |
326 | LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx) |
327 | addl %ecx, %edx |
328 | addl %ecx, %esi |
329 | jmp *%ebx |
330 | |
331 | L(load_ecx_28): |
332 | addl $0x4, %edx |
333 | L(load_ecx_24): |
334 | addl $0x4, %edx |
335 | L(load_ecx_20): |
336 | addl $0x4, %edx |
337 | L(load_ecx_16): |
338 | addl $0x4, %edx |
339 | L(load_ecx_12): |
340 | addl $0x4, %edx |
341 | L(load_ecx_8): |
342 | addl $0x4, %edx |
343 | L(load_ecx_4): |
344 | addl $0x4, %edx |
345 | L(load_ecx): |
346 | movl (%edx), %ecx |
347 | |
348 | L(find_diff): |
349 | cmpb %cl, %al |
350 | jne L(set) |
351 | cmpb %ch, %ah |
352 | jne L(set) |
353 | shrl $16,%eax |
354 | shrl $16,%ecx |
355 | cmpb %cl, %al |
356 | jne L(set) |
357 | /* We get there only if we already know there is a |
358 | difference. */ |
359 | cmpl %ecx, %eax |
360 | L(set): |
361 | sbbl %eax, %eax |
362 | sbbl $-1, %eax |
363 | popl %esi |
364 | cfi_adjust_cfa_offset (-4) |
365 | cfi_restore (esi) |
366 | RETURN |
367 | END (memcmp) |
368 | |
369 | .section .rodata |
370 | ALIGN (2) |
371 | L(table_32bytes) : |
372 | .long L(0bytes) - L(table_32bytes) |
373 | .long L(1bytes) - L(table_32bytes) |
374 | .long L(2bytes) - L(table_32bytes) |
375 | .long L(3bytes) - L(table_32bytes) |
376 | .long L(4bytes) - L(table_32bytes) |
377 | .long L(5bytes) - L(table_32bytes) |
378 | .long L(6bytes) - L(table_32bytes) |
379 | .long L(7bytes) - L(table_32bytes) |
380 | .long L(8bytes) - L(table_32bytes) |
381 | .long L(9bytes) - L(table_32bytes) |
382 | .long L(10bytes) - L(table_32bytes) |
383 | .long L(11bytes) - L(table_32bytes) |
384 | .long L(12bytes) - L(table_32bytes) |
385 | .long L(13bytes) - L(table_32bytes) |
386 | .long L(14bytes) - L(table_32bytes) |
387 | .long L(15bytes) - L(table_32bytes) |
388 | .long L(16bytes) - L(table_32bytes) |
389 | .long L(17bytes) - L(table_32bytes) |
390 | .long L(18bytes) - L(table_32bytes) |
391 | .long L(19bytes) - L(table_32bytes) |
392 | .long L(20bytes) - L(table_32bytes) |
393 | .long L(21bytes) - L(table_32bytes) |
394 | .long L(22bytes) - L(table_32bytes) |
395 | .long L(23bytes) - L(table_32bytes) |
396 | .long L(24bytes) - L(table_32bytes) |
397 | .long L(25bytes) - L(table_32bytes) |
398 | .long L(26bytes) - L(table_32bytes) |
399 | .long L(27bytes) - L(table_32bytes) |
400 | .long L(28bytes) - L(table_32bytes) |
401 | .long L(29bytes) - L(table_32bytes) |
402 | .long L(30bytes) - L(table_32bytes) |
403 | .long L(31bytes) - L(table_32bytes) |
404 | |
405 | |
406 | #undef bcmp |
407 | weak_alias (memcmp, bcmp) |
408 | #undef __memcmpeq |
409 | strong_alias (memcmp, __memcmpeq) |
410 | libc_hidden_builtin_def (memcmp) |
411 | libc_hidden_def (__memcmpeq) |
412 | |