1 | /* Measure strstr functions. |
2 | Copyright (C) 2013-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #define MIN_PAGE_SIZE 131072 |
20 | #define TEST_MAIN |
21 | #define TEST_NAME "strstr" |
22 | #include "bench-string.h" |
23 | |
24 | #include "json-lib.h" |
25 | |
26 | static const char input[] = |
27 | "This manual is written with the assumption that you are at least " |
28 | "somewhat familiar with the C programming language and basic programming " |
29 | "concepts. Specifically, familiarity with ISO standard C (*note ISO " |
30 | "C::), rather than “traditional” pre-ISO C dialects, is assumed.\n" |
31 | |
32 | " The GNU C Library includes several “header files”, each of which " |
33 | "provides definitions and declarations for a group of related facilities; " |
34 | "this information is used by the C compiler when processing your program. " |
35 | "For example, the header file ‘stdio.h’ declares facilities for " |
36 | "performing input and output, and the header file ‘string.h’ declares " |
37 | "string processing utilities. The organization of this manual generally " |
38 | "follows the same division as the header files.\n" |
39 | |
40 | " If you are reading this manual for the first time, you should read " |
41 | "all of the introductory material and skim the remaining chapters. There " |
42 | "are a _lot_ of functions in the GNU C Library and it’s not realistic to " |
43 | "expect that you will be able to remember exactly _how_ to use each and " |
44 | "every one of them. It’s more important to become generally familiar " |
45 | "with the kinds of facilities that the library provides, so that when you " |
46 | "are writing your programs you can recognize _when_ to make use of " |
47 | "library functions, and _where_ in this manual you can find more specific " |
48 | "information about them.\n" ; |
49 | |
50 | /* Simple yet efficient strstr - for needles < 32 bytes it is 2-4 times |
51 | faster than the optimized twoway_strstr. */ |
52 | static char * |
53 | basic_strstr (const char *s1, const char *s2) |
54 | { |
55 | size_t i; |
56 | int c = s2[0]; |
57 | |
58 | if (c == 0) |
59 | return (char*)s1; |
60 | |
61 | for ( ; s1[0] != '\0'; s1++) |
62 | { |
63 | if (s1[0] != c) |
64 | continue; |
65 | for (i = 1; s2[i] != 0; i++) |
66 | if (s1[i] != s2[i]) |
67 | break; |
68 | if (s2[i] == '\0') |
69 | return (char*)s1; |
70 | } |
71 | |
72 | return NULL; |
73 | } |
74 | |
75 | #define RETURN_TYPE char * |
76 | #define AVAILABLE(h, h_l, j, n_l) \ |
77 | (((j) + (n_l) <= (h_l)) \ |
78 | || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \ |
79 | (j) + (n_l) <= (h_l))) |
80 | #define CHECK_EOL (1) |
81 | #define RET0_IF_0(a) if (!a) goto ret0 |
82 | #define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C)) |
83 | #define LONG_NEEDLE_THRESHOLD 32U |
84 | #define __strnlen strnlen |
85 | #include "string/str-two-way.h" |
86 | |
87 | /* Optimized Two-way implementation from GLIBC 2.29. */ |
88 | static char * |
89 | twoway_strstr (const char *haystack, const char *needle) |
90 | { |
91 | size_t needle_len; /* Length of NEEDLE. */ |
92 | size_t haystack_len; /* Known minimum length of HAYSTACK. */ |
93 | |
94 | /* Handle empty NEEDLE special case. */ |
95 | if (needle[0] == '\0') |
96 | return (char *) haystack; |
97 | |
98 | /* Skip until we find the first matching char from NEEDLE. */ |
99 | haystack = strchr (haystack, needle[0]); |
100 | if (haystack == NULL || needle[1] == '\0') |
101 | return (char *) haystack; |
102 | |
103 | /* Ensure HAYSTACK length is at least as long as NEEDLE length. |
104 | Since a match may occur early on in a huge HAYSTACK, use strnlen |
105 | and read ahead a few cachelines for improved performance. */ |
106 | needle_len = strlen (needle); |
107 | haystack_len = __strnlen (haystack, needle_len + 256); |
108 | if (haystack_len < needle_len) |
109 | return NULL; |
110 | |
111 | /* Check whether we have a match. This improves performance since we avoid |
112 | the initialization overhead of the two-way algorithm. */ |
113 | if (memcmp (haystack, needle, needle_len) == 0) |
114 | return (char *) haystack; |
115 | |
116 | /* Perform the search. Abstract memory is considered to be an array |
117 | of 'unsigned char' values, not an array of 'char' values. See |
118 | ISO C 99 section 6.2.6.1. */ |
119 | if (needle_len < LONG_NEEDLE_THRESHOLD) |
120 | return two_way_short_needle (haystack: (const unsigned char *) haystack, |
121 | haystack_len, |
122 | needle: (const unsigned char *) needle, needle_len); |
123 | return two_way_long_needle (haystack: (const unsigned char *) haystack, haystack_len, |
124 | needle: (const unsigned char *) needle, needle_len); |
125 | } |
126 | |
127 | typedef char *(*proto_t) (const char *, const char *); |
128 | |
129 | IMPL (strstr, 1) |
130 | IMPL (twoway_strstr, 0) |
131 | IMPL (basic_strstr, 0) |
132 | |
133 | static void |
134 | do_one_test (json_ctx_t *json_ctx, impl_t *impl, const char *s1, |
135 | const char *s2, char *exp_result) |
136 | { |
137 | size_t i, iters = INNER_LOOP_ITERS_SMALL / 8; |
138 | timing_t start, stop, cur; |
139 | char *res; |
140 | |
141 | TIMING_NOW (start); |
142 | for (i = 0; i < iters; ++i) |
143 | res = CALL (impl, s1, s2); |
144 | TIMING_NOW (stop); |
145 | |
146 | TIMING_DIFF (cur, start, stop); |
147 | |
148 | json_element_double (ctx: json_ctx, d: (double) cur / (double) iters); |
149 | |
150 | if (res != exp_result) |
151 | { |
152 | error (status: 0, errnum: 0, format: "Wrong result in function %s %s %s" , impl->name, |
153 | (res == NULL) ? "(null)" : res, |
154 | (exp_result == NULL) ? "(null)" : exp_result); |
155 | ret = 1; |
156 | } |
157 | } |
158 | |
159 | static void |
160 | do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len1, |
161 | size_t len2, int fail) |
162 | { |
163 | char *s1 = (char *) (buf1 + align1); |
164 | char *s2 = (char *) (buf2 + align2); |
165 | |
166 | size_t size = sizeof (input) - 1; |
167 | size_t pos = (len1 + len2) % size; |
168 | |
169 | char *ss2 = s2; |
170 | for (size_t l = len2; l > 0; l = l > size ? l - size : 0) |
171 | { |
172 | size_t t = l > size ? size : l; |
173 | if (pos + t <= size) |
174 | ss2 = mempcpy (ss2, input + pos, t); |
175 | else |
176 | { |
177 | ss2 = mempcpy (ss2, input + pos, size - pos); |
178 | ss2 = mempcpy (ss2, input, t - (size - pos)); |
179 | } |
180 | } |
181 | s2[len2] = '\0'; |
182 | |
183 | char *ss1 = s1; |
184 | for (size_t l = len1; l > 0; l = l > size ? l - size : 0) |
185 | { |
186 | size_t t = l > size ? size : l; |
187 | memcpy (ss1, input, t); |
188 | ss1 += t; |
189 | } |
190 | |
191 | if (!fail) |
192 | memcpy (s1 + len1 - len2, s2, len2); |
193 | s1[len1] = '\0'; |
194 | |
195 | /* Remove any accidental matches except for the last if !fail. */ |
196 | for (ss1 = basic_strstr (s1, s2); ss1; ss1 = basic_strstr (s1: ss1 + 1, s2)) |
197 | if (fail || ss1 != s1 + len1 - len2) |
198 | ++ss1[len2 / 2]; |
199 | |
200 | json_element_object_begin (ctx: json_ctx); |
201 | json_attr_uint (ctx: json_ctx, name: "len_haystack" , d: len1); |
202 | json_attr_uint (ctx: json_ctx, name: "len_needle" , d: len2); |
203 | json_attr_uint (ctx: json_ctx, name: "align_haystack" , d: align1); |
204 | json_attr_uint (ctx: json_ctx, name: "align_needle" , d: align2); |
205 | json_attr_uint (ctx: json_ctx, name: "fail" , d: fail); |
206 | |
207 | json_array_begin (ctx: json_ctx, name: "timings" ); |
208 | |
209 | FOR_EACH_IMPL (impl, 0) |
210 | do_one_test (json_ctx, impl, s1, s2, exp_result: fail ? NULL : s1 + len1 - len2); |
211 | |
212 | json_array_end (ctx: json_ctx); |
213 | json_element_object_end (ctx: json_ctx); |
214 | |
215 | } |
216 | |
217 | /* Test needles which exhibit worst-case performance. This shows that |
218 | basic_strstr is quadratic and thus unsuitable for large needles. |
219 | On the other hand Two-way and skip table implementations are linear with |
220 | increasing needle sizes. The slowest cases of the two implementations are |
221 | within a factor of 2 on several different microarchitectures. */ |
222 | |
223 | static void |
224 | test_hard_needle (json_ctx_t *json_ctx, size_t ne_len, size_t hs_len) |
225 | { |
226 | char *ne = (char *) buf1; |
227 | char *hs = (char *) buf2; |
228 | |
229 | /* Hard needle for strstr algorithm using skip table. This results in many |
230 | memcmp calls comparing most of the needle. */ |
231 | { |
232 | memset (ne, 'a', ne_len); |
233 | ne[ne_len] = '\0'; |
234 | ne[ne_len - 14] = 'b'; |
235 | |
236 | memset (hs, 'a', hs_len); |
237 | for (size_t i = ne_len; i <= hs_len; i += ne_len) |
238 | { |
239 | hs[i - 5] = 'b'; |
240 | hs[i - 62] = 'b'; |
241 | } |
242 | |
243 | json_element_object_begin (ctx: json_ctx); |
244 | json_attr_uint (ctx: json_ctx, name: "len_haystack" , d: hs_len); |
245 | json_attr_uint (ctx: json_ctx, name: "len_needle" , d: ne_len); |
246 | json_attr_uint (ctx: json_ctx, name: "align_haystack" , d: 0); |
247 | json_attr_uint (ctx: json_ctx, name: "align_needle" , d: 0); |
248 | json_attr_uint (ctx: json_ctx, name: "fail" , d: 1); |
249 | json_attr_string (ctx: json_ctx, name: "desc" , s: "Difficult skiptable(0)" ); |
250 | |
251 | json_array_begin (ctx: json_ctx, name: "timings" ); |
252 | |
253 | FOR_EACH_IMPL (impl, 0) |
254 | do_one_test (json_ctx, impl, s1: hs, s2: ne, NULL); |
255 | |
256 | json_array_end (ctx: json_ctx); |
257 | json_element_object_end (ctx: json_ctx); |
258 | } |
259 | |
260 | /* 2nd hard needle for strstr algorithm using skip table. This results in |
261 | many memcmp calls comparing most of the needle. */ |
262 | { |
263 | memset (ne, 'a', ne_len); |
264 | ne[ne_len] = '\0'; |
265 | ne[ne_len - 6] = 'b'; |
266 | |
267 | memset (hs, 'a', hs_len); |
268 | for (size_t i = ne_len; i <= hs_len; i += ne_len) |
269 | { |
270 | hs[i - 5] = 'b'; |
271 | hs[i - 6] = 'b'; |
272 | } |
273 | |
274 | json_element_object_begin (ctx: json_ctx); |
275 | json_attr_uint (ctx: json_ctx, name: "len_haystack" , d: hs_len); |
276 | json_attr_uint (ctx: json_ctx, name: "len_needle" , d: ne_len); |
277 | json_attr_uint (ctx: json_ctx, name: "align_haystack" , d: 0); |
278 | json_attr_uint (ctx: json_ctx, name: "align_needle" , d: 0); |
279 | json_attr_uint (ctx: json_ctx, name: "fail" , d: 1); |
280 | json_attr_string (ctx: json_ctx, name: "desc" , s: "Difficult skiptable(1)" ); |
281 | |
282 | json_array_begin (ctx: json_ctx, name: "timings" ); |
283 | |
284 | FOR_EACH_IMPL (impl, 0) |
285 | do_one_test (json_ctx, impl, s1: hs, s2: ne, NULL); |
286 | |
287 | json_array_end (ctx: json_ctx); |
288 | json_element_object_end (ctx: json_ctx); |
289 | } |
290 | |
291 | /* Hard needle for Two-way algorithm - the random input causes a large number |
292 | of branch mispredictions which significantly reduces performance on modern |
293 | micro architectures. */ |
294 | { |
295 | for (int i = 0; i < hs_len; i++) |
296 | hs[i] = (rand () & 255) > 155 ? 'a' : 'b'; |
297 | hs[hs_len] = 0; |
298 | |
299 | memset (ne, 'a', ne_len); |
300 | ne[ne_len - 2] = 'b'; |
301 | ne[0] = 'b'; |
302 | ne[ne_len] = 0; |
303 | |
304 | json_element_object_begin (ctx: json_ctx); |
305 | json_attr_uint (ctx: json_ctx, name: "len_haystack" , d: hs_len); |
306 | json_attr_uint (ctx: json_ctx, name: "len_needle" , d: ne_len); |
307 | json_attr_uint (ctx: json_ctx, name: "align_haystack" , d: 0); |
308 | json_attr_uint (ctx: json_ctx, name: "align_needle" , d: 0); |
309 | json_attr_uint (ctx: json_ctx, name: "fail" , d: 1); |
310 | json_attr_string (ctx: json_ctx, name: "desc" , s: "Difficult 2-way" ); |
311 | |
312 | json_array_begin (ctx: json_ctx, name: "timings" ); |
313 | |
314 | FOR_EACH_IMPL (impl, 0) |
315 | do_one_test (json_ctx, impl, s1: hs, s2: ne, NULL); |
316 | |
317 | json_array_end (ctx: json_ctx); |
318 | json_element_object_end (ctx: json_ctx); |
319 | } |
320 | |
321 | /* Hard needle for standard algorithm testing first few characters of |
322 | * needle. */ |
323 | { |
324 | for (int i = 0; i < hs_len; i++) |
325 | hs[i] = (rand () & 255) >= 128 ? 'a' : 'b'; |
326 | hs[hs_len] = 0; |
327 | |
328 | for (int i = 0; i < ne_len; i++) |
329 | { |
330 | if (i % 3 == 0) |
331 | ne[i] = 'a'; |
332 | else if (i % 3 == 1) |
333 | ne[i] = 'b'; |
334 | else |
335 | ne[i] = 'c'; |
336 | } |
337 | ne[ne_len] = 0; |
338 | |
339 | json_element_object_begin (ctx: json_ctx); |
340 | json_attr_uint (ctx: json_ctx, name: "len_haystack" , d: hs_len); |
341 | json_attr_uint (ctx: json_ctx, name: "len_needle" , d: ne_len); |
342 | json_attr_uint (ctx: json_ctx, name: "align_haystack" , d: 0); |
343 | json_attr_uint (ctx: json_ctx, name: "align_needle" , d: 0); |
344 | json_attr_uint (ctx: json_ctx, name: "fail" , d: 1); |
345 | json_attr_string (ctx: json_ctx, name: "desc" , s: "Difficult testing first 2" ); |
346 | |
347 | json_array_begin (ctx: json_ctx, name: "timings" ); |
348 | |
349 | FOR_EACH_IMPL (impl, 0) |
350 | do_one_test (json_ctx, impl, s1: hs, s2: ne, NULL); |
351 | |
352 | json_array_end (ctx: json_ctx); |
353 | json_element_object_end (ctx: json_ctx); |
354 | } |
355 | } |
356 | |
357 | static int |
358 | test_main (void) |
359 | { |
360 | json_ctx_t json_ctx; |
361 | test_init (); |
362 | |
363 | json_init (ctx: &json_ctx, indent_level: 0, stdout); |
364 | |
365 | json_document_begin (ctx: &json_ctx); |
366 | json_attr_string (ctx: &json_ctx, name: "timing_type" , TIMING_TYPE); |
367 | |
368 | json_attr_object_begin (ctx: &json_ctx, name: "functions" ); |
369 | json_attr_object_begin (ctx: &json_ctx, TEST_NAME); |
370 | json_attr_string (ctx: &json_ctx, name: "bench-variant" , s: "" ); |
371 | |
372 | json_array_begin (ctx: &json_ctx, name: "ifuncs" ); |
373 | FOR_EACH_IMPL (impl, 0) |
374 | json_element_string (ctx: &json_ctx, s: impl->name); |
375 | json_array_end (ctx: &json_ctx); |
376 | |
377 | json_array_begin (ctx: &json_ctx, name: "results" ); |
378 | |
379 | for (size_t hlen = 8; hlen <= 256;) |
380 | for (size_t klen = 1; klen <= 16; klen++) |
381 | { |
382 | do_test (json_ctx: &json_ctx, align1: 1, align2: 3, len1: hlen, len2: klen, fail: 0); |
383 | do_test (json_ctx: &json_ctx, align1: 0, align2: 9, len1: hlen, len2: klen, fail: 1); |
384 | |
385 | do_test (json_ctx: &json_ctx, align1: 1, align2: 3, len1: hlen + 1, len2: klen, fail: 0); |
386 | do_test (json_ctx: &json_ctx, align1: 0, align2: 9, len1: hlen + 1, len2: klen, fail: 1); |
387 | |
388 | do_test (json_ctx: &json_ctx, align1: getpagesize () - 15, align2: 9, len1: hlen, len2: klen, fail: 1); |
389 | if (hlen < 64) |
390 | { |
391 | hlen += 8; |
392 | } |
393 | else |
394 | { |
395 | hlen += 32; |
396 | } |
397 | } |
398 | |
399 | for (size_t hlen = 256; hlen <= 65536; hlen *= 2) |
400 | for (size_t klen = 4; klen <= 256; klen *= 2) |
401 | { |
402 | do_test (json_ctx: &json_ctx, align1: 1, align2: 11, len1: hlen, len2: klen, fail: 0); |
403 | do_test (json_ctx: &json_ctx, align1: 14, align2: 5, len1: hlen, len2: klen, fail: 1); |
404 | |
405 | do_test (json_ctx: &json_ctx, align1: 1, align2: 11, len1: hlen + 1, len2: klen + 1, fail: 0); |
406 | do_test (json_ctx: &json_ctx, align1: 14, align2: 5, len1: hlen + 1, len2: klen + 1, fail: 1); |
407 | |
408 | do_test (json_ctx: &json_ctx, align1: 1, align2: 11, len1: hlen + 1, len2: klen, fail: 0); |
409 | do_test (json_ctx: &json_ctx, align1: 14, align2: 5, len1: hlen + 1, len2: klen, fail: 1); |
410 | |
411 | do_test (json_ctx: &json_ctx, align1: getpagesize () - 15, align2: 5, len1: hlen + 1, len2: klen, fail: 1); |
412 | } |
413 | |
414 | test_hard_needle (json_ctx: &json_ctx, ne_len: 64, hs_len: 65536); |
415 | test_hard_needle (json_ctx: &json_ctx, ne_len: 256, hs_len: 65536); |
416 | test_hard_needle (json_ctx: &json_ctx, ne_len: 1024, hs_len: 65536); |
417 | |
418 | json_array_end (ctx: &json_ctx); |
419 | json_attr_object_end (ctx: &json_ctx); |
420 | json_attr_object_end (ctx: &json_ctx); |
421 | json_document_end (ctx: &json_ctx); |
422 | |
423 | return ret; |
424 | } |
425 | |
426 | #include <support/test-driver.c> |
427 | |