1/* strlen with SSE2 and BSF
2 Copyright (C) 2010-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if defined SHARED && IS_IN (libc)
20
21#include <sysdep.h>
22
23#define CFI_PUSH(REG) \
24 cfi_adjust_cfa_offset (4); \
25 cfi_rel_offset (REG, 0)
26
27#define CFI_POP(REG) \
28 cfi_adjust_cfa_offset (-4); \
29 cfi_restore (REG)
30
31#define PUSH(REG) pushl REG; CFI_PUSH (REG)
32#define POP(REG) popl REG; CFI_POP (REG)
33#define PARMS 4 + 8 /* Preserve ESI and EDI. */
34#define STR PARMS
35#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state
36#define RETURN POP (%edi); POP (%esi); ret; \
37 cfi_restore_state; cfi_remember_state
38
39 .text
40ENTRY ( __strlen_sse2_bsf)
41 ENTRANCE
42 mov STR(%esp), %edi
43 xor %eax, %eax
44 mov %edi, %ecx
45 and $0x3f, %ecx
46 pxor %xmm0, %xmm0
47 cmp $0x30, %ecx
48 ja L(next)
49 movdqu (%edi), %xmm1
50 pcmpeqb %xmm1, %xmm0
51 pmovmskb %xmm0, %edx
52 test %edx, %edx
53 jnz L(exit_less16)
54 mov %edi, %eax
55 and $-16, %eax
56 jmp L(align16_start)
57L(next):
58
59 mov %edi, %eax
60 and $-16, %eax
61 pcmpeqb (%eax), %xmm0
62 mov $-1, %esi
63 sub %eax, %ecx
64 shl %cl, %esi
65 pmovmskb %xmm0, %edx
66 and %esi, %edx
67 jnz L(exit)
68L(align16_start):
69 pxor %xmm0, %xmm0
70 pxor %xmm1, %xmm1
71 pxor %xmm2, %xmm2
72 pxor %xmm3, %xmm3
73 .p2align 4
74L(align16_loop):
75 pcmpeqb 16(%eax), %xmm0
76 pmovmskb %xmm0, %edx
77 test %edx, %edx
78 jnz L(exit16)
79
80 pcmpeqb 32(%eax), %xmm1
81 pmovmskb %xmm1, %edx
82 test %edx, %edx
83 jnz L(exit32)
84
85 pcmpeqb 48(%eax), %xmm2
86 pmovmskb %xmm2, %edx
87 test %edx, %edx
88 jnz L(exit48)
89
90 pcmpeqb 64(%eax), %xmm3
91 pmovmskb %xmm3, %edx
92 lea 64(%eax), %eax
93 test %edx, %edx
94 jz L(align16_loop)
95L(exit):
96 sub %edi, %eax
97L(exit_less16):
98 bsf %edx, %edx
99 add %edx, %eax
100 RETURN
101L(exit16):
102 sub %edi, %eax
103 bsf %edx, %edx
104 add %edx, %eax
105 add $16, %eax
106 RETURN
107L(exit32):
108 sub %edi, %eax
109 bsf %edx, %edx
110 add %edx, %eax
111 add $32, %eax
112 RETURN
113L(exit48):
114 sub %edi, %eax
115 bsf %edx, %edx
116 add %edx, %eax
117 add $48, %eax
118 POP (%edi)
119 POP (%esi)
120 ret
121
122END ( __strlen_sse2_bsf)
123
124#endif
125

source code of glibc/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S