1 | /* Vector optimized 32/64 bit S/390 version of memmem. |
2 | Copyright (C) 2019-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <ifunc-memmem.h> |
20 | #if HAVE_MEMMEM_ARCH13 |
21 | # include "sysdep.h" |
22 | # include "asm-syntax.h" |
23 | .text |
24 | |
25 | /* void *memmem(const void *haystack=r2, size_t haystacklen=r3, |
26 | const void *needle=r4, size_t needlelen=r5); |
27 | Locate a substring. */ |
28 | ENTRY(MEMMEM_ARCH13) |
29 | .machine "arch13" |
30 | .machinemode "zarch_nohighgprs" |
31 | # if ! defined __s390x__ |
32 | llgfr %r3,%r3 |
33 | llgfr %r5,%r5 |
34 | llgfr %r4,%r4 |
35 | llgfr %r2,%r2 |
36 | # endif /* ! defined __s390x__ */ |
37 | clgrjl %r3,%r5,.Lend_no_match /* Haystack < needle? */ |
38 | |
39 | /* Jump to fallback if needle > 9. See also strstr-arch13.S. */ |
40 | # if ! HAVE_MEMMEM_Z13 |
41 | # error The arch13 variant of memmem needs the z13 variant of memmem! |
42 | # endif |
43 | clgfi %r5,9 |
44 | jgh MEMMEM_Z13 |
45 | |
46 | aghik %r0,%r5,-1 /* vll needs highest index. */ |
47 | bc 4,0(%r14) /* cc==1: return if needle-len == 0. */ |
48 | vll %v18,%r0,0(%r4) /* Load needle. */ |
49 | vlvgb %v19,%r5,7 /* v19[7] contains length of needle. */ |
50 | |
51 | clgijh %r3,16,.Lhaystack_larger_16 |
52 | .Lhaystack_smaller_16_on_bb: |
53 | aghik %r0,%r3,-1 /* vll needs highest index. */ |
54 | vll %v16,%r0,0(%r2) /* Load haystack. */ |
55 | .Lhaystack_smaller_16: |
56 | sgr %r3,%r5 /* r3 = largest valid match-index. */ |
57 | jl .Lend_no_match /* Haystack-len < needle-len? */ |
58 | vstrs %v20,%v16,%v18,%v19,0,0 |
59 | /* Vector string search without zero search where v20 will contain |
60 | the index of a partial/full match or 16 (index is named k). |
61 | cc=0 (no match; k=16): .Lend_no_match |
62 | cc=1 (only available with zero-search): Ignore |
63 | cc=2 (full match; k<16): Needle found, but could be beyond haystack! |
64 | cc=3 (partial match; k<16): Always at end of v16 and thus beyond! */ |
65 | brc 9,.Lend_no_match /* Jump away if cc == 0 || cc == 3. */ |
66 | vlgvb %r1,%v20,7 |
67 | /* Verify that the full-match (cc=2) is valid! */ |
68 | clgrjh %r1,%r3,.Lend_no_match /* Jump away if match is beyond. */ |
69 | la %r2,0(%r1,%r2) |
70 | br %r14 |
71 | .Lend_no_match: |
72 | lghi %r2,0 |
73 | br %r14 |
74 | |
75 | .Lhaystack_larger_16: |
76 | vl %v16,0(%r2) |
77 | lghi %r1,17 |
78 | lay %r4,-16(%r3,%r2) /* Boundary for loading with vl. */ |
79 | lay %r0,-64(%r3,%r2) /* Boundary for loading with 4xvl. */ |
80 | /* See also strstr-arch13.S: |
81 | min-skip-partial-match-index = (16 - n_len) + 1 */ |
82 | sgr %r1,%r5 |
83 | clgfi %r3,64 /* Set Boundary to zero ... */ |
84 | la %r3,0(%r3,%r2) |
85 | locghil %r0,0 /* ... if haystack < 64bytes. */ |
86 | jh .Lloop64 |
87 | .Lloop: |
88 | la %r2,16(%r2) |
89 | /* Vector string search with zero search. cc=0 => no match. */ |
90 | vstrs %v20,%v16,%v18,%v19,0,0 |
91 | jne .Lloop_vstrs_nonzero_cc |
92 | clgrjh %r2,%r4,.Lhaystack_too_small |
93 | .Lloop16: |
94 | vl %v16,0(%r2) |
95 | la %r2,16(%r2) |
96 | vstrs %v20,%v16,%v18,%v19,0,0 |
97 | jne .Lloop_vstrs_nonzero_cc |
98 | clgrjle %r2,%r4,.Lloop16 |
99 | .Lhaystack_too_small: |
100 | sgr %r3,%r2 /* r3 = (haystack + len) - curr_pos */ |
101 | je .Lend_no_match /* Remaining haystack is empty. */ |
102 | lcbb %r0,0(%r2),6 |
103 | jo .Lhaystack_smaller_16_on_bb |
104 | vl %v16,0(%r2) /* Load haystack. */ |
105 | j .Lhaystack_smaller_16 |
106 | |
107 | .Lend_match_found: |
108 | vlgvb %r4,%v20,7 |
109 | sgr %r2,%r1 |
110 | la %r2,0(%r4,%r2) |
111 | br %r14 |
112 | |
113 | .Lloop_vstrs_nonzero_cc32: |
114 | la %r2,16(%r2) |
115 | .Lloop_vstrs_nonzero_cc16: |
116 | la %r2,16(%r2) |
117 | .Lloop_vstrs_nonzero_cc0: |
118 | la %r2,16(%r2) |
119 | .Lloop_vstrs_nonzero_cc: |
120 | lay %r2,-16(%r1,%r2) /* Compute next load address. */ |
121 | jh .Lend_match_found /* cc == 2 (full match) */ |
122 | clgrjh %r2,%r4,.Lhaystack_too_small |
123 | vl %v16,0(%r2) |
124 | .Lloop_vstrs_nonzero_cc_loop: |
125 | la %r2,0(%r1,%r2) |
126 | vstrs %v20,%v16,%v18,%v19,0,0 |
127 | jh .Lend_match_found |
128 | clgrjh %r2,%r4,.Lhaystack_too_small |
129 | vl %v16,0(%r2) /* Next part of haystack. */ |
130 | jo .Lloop_vstrs_nonzero_cc_loop |
131 | /* Case: no-match. */ |
132 | clgrjh %r2,%r0,.Lloop /* Jump away if haystack has less than 64b. */ |
133 | .Lloop64: |
134 | vstrs %v20,%v16,%v18,%v19,0,0 |
135 | jne .Lloop_vstrs_nonzero_cc0 |
136 | vl %v16,16(%r2) /* Next part of haystack. */ |
137 | vstrs %v20,%v16,%v18,%v19,0,0 |
138 | jne .Lloop_vstrs_nonzero_cc16 |
139 | vl %v16,32(%r2) /* Next part of haystack. */ |
140 | vstrs %v20,%v16,%v18,%v19,0,0 |
141 | jne .Lloop_vstrs_nonzero_cc32 |
142 | vl %v16,48(%r2) /* Next part of haystack. */ |
143 | la %r2,64(%r2) |
144 | vstrs %v20,%v16,%v18,%v19,0,0 |
145 | jne .Lloop_vstrs_nonzero_cc |
146 | clgrjh %r2,%r4,.Lhaystack_too_small |
147 | vl %v16,0(%r2) /* Next part of haystack. */ |
148 | clgrjle %r2,%r0,.Lloop64 |
149 | j .Lloop |
150 | END(MEMMEM_ARCH13) |
151 | |
152 | # if ! HAVE_MEMMEM_IFUNC |
153 | strong_alias (MEMMEM_ARCH13, __memmem) |
154 | weak_alias (__memmem, memmem) |
155 | # endif |
156 | |
157 | # if MEMMEM_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc) |
158 | weak_alias (MEMMEM_ARCH13, __GI_memmem) |
159 | strong_alias (MEMMEM_ARCH13, __GI___memmem) |
160 | # endif |
161 | #endif |
162 | |