1 | /* strchr with SSE2 with bsf |
2 | Copyright (C) 2011-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #if IS_IN (libc) |
20 | |
21 | # include <sysdep.h> |
22 | |
23 | # define CFI_PUSH(REG) \ |
24 | cfi_adjust_cfa_offset (4); \ |
25 | cfi_rel_offset (REG, 0) |
26 | |
27 | # define CFI_POP(REG) \ |
28 | cfi_adjust_cfa_offset (-4); \ |
29 | cfi_restore (REG) |
30 | |
31 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) |
32 | # define POP(REG) popl REG; CFI_POP (REG) |
33 | |
34 | # define PARMS 8 |
35 | # define ENTRANCE PUSH(%edi) |
36 | # define RETURN POP(%edi); ret; CFI_PUSH(%edi); |
37 | |
38 | # define STR1 PARMS |
39 | # define STR2 STR1+4 |
40 | |
41 | .text |
42 | ENTRY (__strchr_sse2_bsf) |
43 | |
44 | ENTRANCE |
45 | mov STR1(%esp), %ecx |
46 | movd STR2(%esp), %xmm1 |
47 | |
48 | pxor %xmm2, %xmm2 |
49 | mov %ecx, %edi |
50 | punpcklbw %xmm1, %xmm1 |
51 | punpcklbw %xmm1, %xmm1 |
52 | /* ECX has OFFSET. */ |
53 | and $15, %ecx |
54 | pshufd $0, %xmm1, %xmm1 |
55 | je L(loop) |
56 | |
57 | /* Handle unaligned string. */ |
58 | and $-16, %edi |
59 | movdqa (%edi), %xmm0 |
60 | pcmpeqb %xmm0, %xmm2 |
61 | pcmpeqb %xmm1, %xmm0 |
62 | /* Find where NULL is. */ |
63 | pmovmskb %xmm2, %edx |
64 | /* Check if there is a match. */ |
65 | pmovmskb %xmm0, %eax |
66 | /* Remove the leading bytes. */ |
67 | sarl %cl, %edx |
68 | sarl %cl, %eax |
69 | test %eax, %eax |
70 | je L(unaligned_no_match) |
71 | /* Check which byte is a match. */ |
72 | bsf %eax, %eax |
73 | /* Is there a NULL? */ |
74 | test %edx, %edx |
75 | je L(unaligned_match) |
76 | bsf %edx, %edx |
77 | cmpl %edx, %eax |
78 | /* Return NULL if NULL comes first. */ |
79 | ja L(return_null) |
80 | L(unaligned_match): |
81 | add %edi, %eax |
82 | add %ecx, %eax |
83 | RETURN |
84 | |
85 | .p2align 4 |
86 | L(unaligned_no_match): |
87 | test %edx, %edx |
88 | jne L(return_null) |
89 | pxor %xmm2, %xmm2 |
90 | |
91 | add $16, %edi |
92 | |
93 | .p2align 4 |
94 | /* Loop start on aligned string. */ |
95 | L(loop): |
96 | movdqa (%edi), %xmm0 |
97 | pcmpeqb %xmm0, %xmm2 |
98 | add $16, %edi |
99 | pcmpeqb %xmm1, %xmm0 |
100 | pmovmskb %xmm2, %edx |
101 | pmovmskb %xmm0, %eax |
102 | or %eax, %edx |
103 | jnz L(matches) |
104 | |
105 | movdqa (%edi), %xmm0 |
106 | pcmpeqb %xmm0, %xmm2 |
107 | add $16, %edi |
108 | pcmpeqb %xmm1, %xmm0 |
109 | pmovmskb %xmm2, %edx |
110 | pmovmskb %xmm0, %eax |
111 | or %eax, %edx |
112 | jnz L(matches) |
113 | |
114 | movdqa (%edi), %xmm0 |
115 | pcmpeqb %xmm0, %xmm2 |
116 | add $16, %edi |
117 | pcmpeqb %xmm1, %xmm0 |
118 | pmovmskb %xmm2, %edx |
119 | pmovmskb %xmm0, %eax |
120 | or %eax, %edx |
121 | jnz L(matches) |
122 | |
123 | movdqa (%edi), %xmm0 |
124 | pcmpeqb %xmm0, %xmm2 |
125 | add $16, %edi |
126 | pcmpeqb %xmm1, %xmm0 |
127 | pmovmskb %xmm2, %edx |
128 | pmovmskb %xmm0, %eax |
129 | or %eax, %edx |
130 | jnz L(matches) |
131 | jmp L(loop) |
132 | |
133 | L(matches): |
134 | pmovmskb %xmm2, %edx |
135 | test %eax, %eax |
136 | jz L(return_null) |
137 | bsf %eax, %eax |
138 | /* There is a match. First find where NULL is. */ |
139 | test %edx, %edx |
140 | je L(match) |
141 | bsf %edx, %ecx |
142 | /* Check if NULL comes first. */ |
143 | cmpl %ecx, %eax |
144 | ja L(return_null) |
145 | L(match): |
146 | sub $16, %edi |
147 | add %edi, %eax |
148 | RETURN |
149 | |
150 | /* Return NULL. */ |
151 | .p2align 4 |
152 | L(return_null): |
153 | xor %eax, %eax |
154 | RETURN |
155 | |
156 | END (__strchr_sse2_bsf) |
157 | #endif |
158 | |