1 | /* Vector optimized 32/64 bit S/390 version of strstr. |
2 | Copyright (C) 2019-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <ifunc-strstr.h> |
20 | #if HAVE_STRSTR_ARCH13 |
21 | # include "sysdep.h" |
22 | # include "asm-syntax.h" |
23 | .text |
24 | |
25 | /* char *strstr (const char *haystack=r2, const char *needle=r3) |
26 | Locate a substring. */ |
27 | ENTRY(STRSTR_ARCH13) |
28 | .machine "arch13" |
29 | .machinemode "zarch_nohighgprs" |
30 | lcbb %r1,0(%r3),6 |
31 | jo .Lneedle_on_bb /* Needle on block-boundary? */ |
32 | vl %v18,0(%r3),6 /* Load needle. */ |
33 | vfenezb %v19,%v18,%v18 /* v19[7] contains the length of needle. */ |
34 | .Lneedle_loaded: |
35 | vlgvb %r4,%v19,7 /* Get index of zero or 16 if not found. */ |
36 | lghi %r5,17 /* See below: min-skip-partial-match-index. */ |
37 | cgibe %r4,0,0(%r14) /* Test if needle is zero and return. */ |
38 | |
39 | /* The vstrs instruction is able to handle needles up to a length of 16, |
40 | but then we may have to load the next part of haystack with a |
41 | small offset. This will be slow - see examples: |
42 | haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma |
43 | needle = mmmmmmmmmmmmmma0 |
44 | => needle_len=15; vstrs reports a partial match; haystack+=2 |
45 | haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma |
46 | needle = mmmmmmmma0000000 |
47 | => needle_len=9; vstrs reports a partial match; haystack+=8 */ |
48 | # if ! HAVE_STRSTR_Z13 |
49 | # error The arch13 variant of strstr needs the z13 variant of strstr! |
50 | # endif |
51 | clgfi %r4,9 |
52 | jgh STRSTR_Z13 |
53 | |
54 | /* In case of a partial match, the vstrs instruction returns the index |
55 | of the partial match in a vector-register. Then we have to |
56 | reload the string at the "current-position plus this index" and run |
57 | vstrs again in order to determine if it was a full match or no match. |
58 | Transferring this index from vr to gr, compute the haystack-address |
59 | and loading with vl is quite slow as all instructions have data |
60 | dependencies. Thus we assume, that a partial match is always at the |
61 | first possible index and just load the next part of haystack from |
62 | there instead of waiting until the correct index is computed: |
63 | min-skip-partial-match-index = (16 - n_len) + 1 */ |
64 | sgr %r5,%r4 |
65 | |
66 | .Lloop: |
67 | lcbb %r1,0(%r2),6 |
68 | jo .Lloop_haystack_on_bb /* Haystack on block-boundary? */ |
69 | vl %v16,0(%r2) /* Load next part of haystack. */ |
70 | .Lloop_haystack_loaded: |
71 | /* Vector string search with zero search (cc=0 => no match). */ |
72 | vstrs %v20,%v16,%v18,%v19,0,2 |
73 | jne .Lloop_vstrs_nonzero_cc |
74 | lcbb %r1,16(%r2),6 /* Next part of haystack. */ |
75 | jo .Lloop_haystack_on_bb16 |
76 | vl %v16,16(%r2) |
77 | vstrs %v20,%v16,%v18,%v19,0,2 |
78 | jne .Lloop_vstrs_nonzero_cc16 |
79 | lcbb %r1,32(%r2),6 /* Next part of haystack. */ |
80 | jo .Lloop_haystack_on_bb32 |
81 | vl %v16,32(%r2) |
82 | vstrs %v20,%v16,%v18,%v19,0,2 |
83 | jne .Lloop_vstrs_nonzero_cc32 |
84 | lcbb %r1,48(%r2),6 /* Next part of haystack. */ |
85 | jo .Lloop_haystack_on_bb48 |
86 | vl %v16,48(%r2) |
87 | vstrs %v20,%v16,%v18,%v19,0,2 |
88 | jne .Lloop_vstrs_nonzero_cc48 |
89 | la %r2,64(%r2) |
90 | j .Lloop |
91 | |
92 | .Lloop_vstrs_nonzero_cc48: |
93 | la %r2,16(%r2) |
94 | .Lloop_vstrs_nonzero_cc32: |
95 | la %r2,16(%r2) |
96 | .Lloop_vstrs_nonzero_cc16: |
97 | la %r2,16(%r2) |
98 | .Lloop_vstrs_nonzero_cc: |
99 | jh .Lend_match_found /* cc == 2 (full match) */ |
100 | jl .Lend_no_match /* cc == 1 (no match, end of string) */ |
101 | /* cc == 3 (partial match) See above: min-skip-partial-match-index! */ |
102 | lcbb %r1,0(%r5,%r2),6 |
103 | la %r2,0(%r5,%r2) |
104 | jo .Lloop_haystack_on_bb |
105 | vl %v16,0(%r2) |
106 | vstrs %v20,%v16,%v18,%v19,0,2 |
107 | .Lloop_vstrs_nonzero_cc_loop: |
108 | jh .Lend_match_found |
109 | jl .Lend_no_match |
110 | la %r2,0(%r5,%r2) |
111 | je .Lloop |
112 | lcbb %r1,0(%r2),6 /* Next part of haystack. */ |
113 | jo .Lloop_haystack_on_bb |
114 | vl %v16,0(%r2) |
115 | vstrs %v20,%v16,%v18,%v19,0,2 |
116 | jh .Lend_match_found |
117 | jl .Lend_no_match |
118 | la %r2,0(%r5,%r2) |
119 | je .Lloop |
120 | lcbb %r1,0(%r2),6 /* Next part of haystack. */ |
121 | jo .Lloop_haystack_on_bb |
122 | vl %v16,0(%r2) |
123 | vstrs %v20,%v16,%v18,%v19,0,2 |
124 | jh .Lend_match_found |
125 | jl .Lend_no_match |
126 | la %r2,0(%r5,%r2) |
127 | je .Lloop |
128 | lcbb %r1,0(%r2),6 /* Next part of haystack. */ |
129 | jo .Lloop_haystack_on_bb |
130 | vl %v16,0(%r2) |
131 | vstrs %v20,%v16,%v18,%v19,0,2 |
132 | j .Lloop_vstrs_nonzero_cc_loop |
133 | |
134 | .Lend_no_match: |
135 | lghi %r2,0 |
136 | br %r14 |
137 | .Lend_match_found: |
138 | vlgvb %r4,%v20,7 |
139 | la %r2,0(%r4,%r2) |
140 | br %r14 |
141 | |
142 | .Lloop_haystack_on_bb48: |
143 | la %r2,16(%r2) |
144 | .Lloop_haystack_on_bb32: |
145 | la %r2,16(%r2) |
146 | .Lloop_haystack_on_bb16: |
147 | la %r2,16(%r2) |
148 | .Lloop_haystack_on_bb: |
149 | /* Haystack located on page-boundary. */ |
150 | ahi %r1,-1 /* vll needs highest index instead of count. */ |
151 | vll %v16,%r1,0(%r2) |
152 | vlvgb %v21,%r1,7 |
153 | vfenezb %v17,%v16,%v16 /* Search zero in loaded haystack bytes. */ |
154 | veclb %v17,%v21 /* Zero index <= loaded byte index? */ |
155 | jle .Lloop_haystack_loaded /* -> v16 contains full haystack. */ |
156 | vl %v16,0(%r2) /* Load haystack beyond page boundary. */ |
157 | j .Lloop_haystack_loaded |
158 | |
159 | .Lneedle_on_bb: |
160 | /* Needle located on page-boundary. */ |
161 | ahi %r1,-1 /* vll needs highest index instead of count. */ |
162 | vll %v18,%r1,0(%r3) |
163 | vlvgb %v21,%r1,7 |
164 | vfenezb %v19,%v18,%v18 /* Search zero in loaded needle bytes. */ |
165 | veclb %v19,%v21 /* Zero index <= max loaded byte index? */ |
166 | jle .Lneedle_loaded /* -> v18 contains full needle. */ |
167 | vl %v18,0(%r3) /* Load needle beyond page boundary. */ |
168 | vfenezb %v19,%v18,%v18 |
169 | j .Lneedle_loaded |
170 | END(STRSTR_ARCH13) |
171 | |
172 | # if ! HAVE_STRSTR_IFUNC |
173 | strong_alias (STRSTR_ARCH13, strstr) |
174 | # endif |
175 | |
176 | # if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc) |
177 | strong_alias (STRSTR_ARCH13, __GI_strstr) |
178 | # endif |
179 | #endif |
180 | |