1 | /* strlen with SSE2 and BSF |
2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #if defined SHARED && IS_IN (libc) |
20 | |
21 | #include <sysdep.h> |
22 | |
23 | #define CFI_PUSH(REG) \ |
24 | cfi_adjust_cfa_offset (4); \ |
25 | cfi_rel_offset (REG, 0) |
26 | |
27 | #define CFI_POP(REG) \ |
28 | cfi_adjust_cfa_offset (-4); \ |
29 | cfi_restore (REG) |
30 | |
31 | #define PUSH(REG) pushl REG; CFI_PUSH (REG) |
32 | #define POP(REG) popl REG; CFI_POP (REG) |
33 | #define PARMS 4 + 8 /* Preserve ESI and EDI. */ |
34 | #define STR PARMS |
35 | #define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state |
36 | #define RETURN POP (%edi); POP (%esi); ret; \ |
37 | cfi_restore_state; cfi_remember_state |
38 | |
39 | .text |
40 | ENTRY ( __strlen_sse2_bsf) |
41 | ENTRANCE |
42 | mov STR(%esp), %edi |
43 | xor %eax, %eax |
44 | mov %edi, %ecx |
45 | and $0x3f, %ecx |
46 | pxor %xmm0, %xmm0 |
47 | cmp $0x30, %ecx |
48 | ja L(next) |
49 | movdqu (%edi), %xmm1 |
50 | pcmpeqb %xmm1, %xmm0 |
51 | pmovmskb %xmm0, %edx |
52 | test %edx, %edx |
53 | jnz L(exit_less16) |
54 | mov %edi, %eax |
55 | and $-16, %eax |
56 | jmp L(align16_start) |
57 | L(next): |
58 | |
59 | mov %edi, %eax |
60 | and $-16, %eax |
61 | pcmpeqb (%eax), %xmm0 |
62 | mov $-1, %esi |
63 | sub %eax, %ecx |
64 | shl %cl, %esi |
65 | pmovmskb %xmm0, %edx |
66 | and %esi, %edx |
67 | jnz L(exit) |
68 | L(align16_start): |
69 | pxor %xmm0, %xmm0 |
70 | pxor %xmm1, %xmm1 |
71 | pxor %xmm2, %xmm2 |
72 | pxor %xmm3, %xmm3 |
73 | .p2align 4 |
74 | L(align16_loop): |
75 | pcmpeqb 16(%eax), %xmm0 |
76 | pmovmskb %xmm0, %edx |
77 | test %edx, %edx |
78 | jnz L(exit16) |
79 | |
80 | pcmpeqb 32(%eax), %xmm1 |
81 | pmovmskb %xmm1, %edx |
82 | test %edx, %edx |
83 | jnz L(exit32) |
84 | |
85 | pcmpeqb 48(%eax), %xmm2 |
86 | pmovmskb %xmm2, %edx |
87 | test %edx, %edx |
88 | jnz L(exit48) |
89 | |
90 | pcmpeqb 64(%eax), %xmm3 |
91 | pmovmskb %xmm3, %edx |
92 | lea 64(%eax), %eax |
93 | test %edx, %edx |
94 | jz L(align16_loop) |
95 | L(exit): |
96 | sub %edi, %eax |
97 | L(exit_less16): |
98 | bsf %edx, %edx |
99 | add %edx, %eax |
100 | RETURN |
101 | L(exit16): |
102 | sub %edi, %eax |
103 | bsf %edx, %edx |
104 | add %edx, %eax |
105 | add $16, %eax |
106 | RETURN |
107 | L(exit32): |
108 | sub %edi, %eax |
109 | bsf %edx, %edx |
110 | add %edx, %eax |
111 | add $32, %eax |
112 | RETURN |
113 | L(exit48): |
114 | sub %edi, %eax |
115 | bsf %edx, %edx |
116 | add %edx, %eax |
117 | add $48, %eax |
118 | POP (%edi) |
119 | POP (%esi) |
120 | ret |
121 | |
122 | END ( __strlen_sse2_bsf) |
123 | |
124 | #endif |
125 | |