1 | /* Optimized strnlen implementation for PowerPC64/POWER7 using cmpb insn. |
2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | |
21 | #ifndef STRNLEN |
22 | # define STRNLEN __strnlen |
23 | #endif |
24 | |
25 | /* int [r3] strnlen (char *s [r3], int size [r4]) */ |
26 | .machine power7 |
27 | ENTRY_TOCLESS (STRNLEN) |
28 | CALL_MCOUNT 2 |
29 | dcbt 0,r3 |
30 | clrrdi r8,r3,3 |
31 | add r7,r3,r4 /* Calculate the last acceptable address. */ |
32 | cmpldi r4,32 |
33 | li r0,0 /* Doubleword with null chars. */ |
34 | addi r7,r7,-1 |
35 | |
36 | /* If we have less than 33 bytes to search, skip to a faster code. */ |
37 | ble L(small_range) |
38 | |
39 | rlwinm r6,r3,3,26,28 /* Calculate padding. */ |
40 | ld r12,0(r8) /* Load doubleword from memory. */ |
41 | cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ |
42 | #ifdef __LITTLE_ENDIAN__ |
43 | srd r10,r10,r6 |
44 | sld r10,r10,r6 |
45 | #else |
46 | sld r10,r10,r6 |
47 | srd r10,r10,r6 |
48 | #endif |
49 | cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */ |
50 | bne cr7,L(done) |
51 | |
52 | clrrdi r7,r7,3 /* Address of last doubleword. */ |
53 | mtcrf 0x01,r8 |
54 | /* Are we now aligned to a quadword boundary? If so, skip to |
55 | the main loop. Otherwise, go through the alignment code. */ |
56 | |
57 | bt 28,L(loop_setup) |
58 | |
59 | /* Handle DWORD2 of pair. */ |
60 | ldu r12,8(r8) |
61 | cmpb r10,r12,r0 |
62 | cmpldi cr7,r10,0 |
63 | bne cr7,L(done) |
64 | |
65 | L(loop_setup): |
66 | /* The last dword we want to read in the loop below is the one |
67 | containing the last byte of the string, ie. the dword at |
68 | (s + size - 1) & ~7, or r7. The first dword read is at |
69 | r8 + 8, we read 2 * cnt dwords, so the last dword read will |
70 | be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives |
71 | cnt = (r7 - r8) / 16 */ |
72 | sub r5,r7,r8 |
73 | srdi r6,r5,4 /* Number of loop iterations. */ |
74 | mtctr r6 /* Setup the counter. */ |
75 | |
76 | /* Main loop to look for the null byte in the string. Since |
77 | it's a small loop (< 8 instructions), align it to 32-bytes. */ |
78 | .p2align 5 |
79 | L(loop): |
80 | /* Load two doublewords, compare and merge in a |
81 | single register for speed. This is an attempt |
82 | to speed up the null-checking process for bigger strings. */ |
83 | |
84 | ld r12,8(r8) |
85 | ldu r11,16(r8) |
86 | cmpb r10,r12,r0 |
87 | cmpb r9,r11,r0 |
88 | or r5,r9,r10 /* Merge everything in one doubleword. */ |
89 | cmpldi cr7,r5,0 |
90 | bne cr7,L(found) |
91 | bdnz L(loop) |
92 | |
93 | /* We may have one more dword to read. */ |
94 | cmpld cr6,r8,r7 |
95 | beq cr6,L(end_max) |
96 | |
97 | ldu r12,8(r8) |
98 | cmpb r10,r12,r0 |
99 | cmpldi cr6,r10,0 |
100 | bne cr6,L(done) |
101 | |
102 | L(end_max): |
103 | mr r3,r4 |
104 | blr |
105 | |
106 | /* OK, one (or both) of the doublewords contains a null byte. Check |
107 | the first doubleword and decrement the address in case the first |
108 | doubleword really contains a null byte. */ |
109 | .align 4 |
110 | L(found): |
111 | cmpldi cr6,r10,0 |
112 | addi r8,r8,-8 |
113 | bne cr6,L(done) |
114 | |
115 | /* The null byte must be in the second doubleword. Adjust the address |
116 | again and move the result of cmpb to r10 so we can calculate the |
117 | length. */ |
118 | |
119 | mr r10,r9 |
120 | addi r8,r8,8 |
121 | |
122 | /* r10 has the output of the cmpb instruction, that is, it contains |
123 | 0xff in the same position as the null byte in the original |
124 | doubleword from the string. Use that to calculate the length. |
125 | We need to make sure the null char is *before* the end of the |
126 | range. */ |
127 | L(done): |
128 | #ifdef __LITTLE_ENDIAN__ |
129 | addi r0,r10,-1 |
130 | andc r0,r0,r10 |
131 | popcntd r0,r0 |
132 | #else |
133 | cntlzd r0,r10 /* Count leading zeros before the match. */ |
134 | #endif |
135 | sub r3,r8,r3 |
136 | srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ |
137 | add r3,r3,r0 /* Length until the match. */ |
138 | cmpld r3,r4 |
139 | blelr |
140 | mr r3,r4 |
141 | blr |
142 | |
143 | /* Deals with size <= 32. */ |
144 | .align 4 |
145 | L(small_range): |
146 | cmpldi r4,0 |
147 | beq L(end_max) |
148 | |
149 | clrrdi r7,r7,3 /* Address of last doubleword. */ |
150 | |
151 | rlwinm r6,r3,3,26,28 /* Calculate padding. */ |
152 | ld r12,0(r8) /* Load doubleword from memory. */ |
153 | cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ |
154 | #ifdef __LITTLE_ENDIAN__ |
155 | srd r10,r10,r6 |
156 | sld r10,r10,r6 |
157 | #else |
158 | sld r10,r10,r6 |
159 | srd r10,r10,r6 |
160 | #endif |
161 | cmpldi cr7,r10,0 |
162 | bne cr7,L(done) |
163 | |
164 | cmpld r8,r7 |
165 | beq L(end_max) |
166 | |
167 | .p2align 5 |
168 | L(loop_small): |
169 | ldu r12,8(r8) |
170 | cmpb r10,r12,r0 |
171 | cmpldi cr6,r10,0 |
172 | bne cr6,L(done) |
173 | cmpld r8,r7 |
174 | bne L(loop_small) |
175 | mr r3,r4 |
176 | blr |
177 | |
178 | END (STRNLEN) |
179 | libc_hidden_def (__strnlen) |
180 | weak_alias (__strnlen, strnlen) |
181 | libc_hidden_def (strnlen) |
182 | |