1/* Vector optimized 32/64 bit S/390 version of wcsrchr.
2 Copyright (C) 2015-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <ifunc-wcsrchr.h>
20#if HAVE_WCSRCHR_Z13
21
22# include "sysdep.h"
23# include "asm-syntax.h"
24
25 .text
26
27/* wchar_t *wcsrchr (const wchar_t *s, wchar_t c)
28 Locate the last character c in string.
29
30 Register usage:
31 -r0=loaded bytes in first part of s.
32 -r1=pointer to last occurence of c or NULL if not found.
33 -r2=s
34 -r3=c
35 -r4=tmp
36 -r5=current_len
37 -v16=part of s
38 -v17=index of found element
39 -v18=replicated c
40 -v19=part of s with last occurence of c.
41 -v20=permute pattern
42*/
43ENTRY(WCSRCHR_Z13)
44 .machine "z13"
45 .machinemode "zarch_nohighgprs"
46
47 vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */
48 lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */
49
50 tmll %r2,3 /* Test if s is 4-byte aligned? */
51 jne .Lfallback /* And use common-code variant if not. */
52
53 vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */
54 vrepf %v18,%v18,0
55
56 lghi %r1,-1 /* Currently no c found. */
57 lghi %r5,0 /* current_len = 0. */
58
59 vfeezfs %v17,%v16,%v18 /* Find element equal or zero. */
60 vlgvb %r4,%v17,7 /* Load byte index of c/zero or 16. */
61 clrjl %r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes. */
62.Lalign:
63 /* Align s to 16 byte. */
64 risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */
65 lghi %r5,16 /* current_len = 16. */
66 slr %r5,%r4 /* Compute bytes to 16bytes boundary. */
67
68.Lloop:
69 vl %v16,0(%r5,%r2) /* Load s. */
70 vfeezfs %v17,%v16,%v18 /* Find element equal with zero search. */
71 jno .Lfound /* Found c/zero (cc=0|1|2). */
72 vl %v16,16(%r5,%r2)
73 vfeezfs %v17,%v16,%v18
74 jno .Lfound16
75 vl %v16,32(%r5,%r2)
76 vfeezfs %v17,%v16,%v18
77 jno .Lfound32
78 vl %v16,48(%r5,%r2)
79 vfeezfs %v17,%v16,%v18
80 jno .Lfound48
81
82 aghi %r5,64
83 j .Lloop /* No character and no zero -> loop. */
84
85.Lfound48:
86 la %r5,16(%r5) /* Use la since aghi would clobber cc. */
87.Lfound32:
88 la %r5,16(%r5)
89.Lfound16:
90 la %r5,16(%r5)
91.Lfound:
92 je .Lzero /* Found zero, but no c before that zero. */
93 /* Save this part of s to check for further matches after reaching
94 the end of the complete string. */
95 vlr %v19,%v16
96 lgr %r1,%r5
97
98 jh .Lzero /* Found a zero after the found c. */
99 aghi %r5,16 /* Start search of next part of s. */
100 j .Lloop
101
102.Lfound_first_part:
103 /* This code is only executed if the found c/zero is whithin loaded
104 bytes. If no c/zero was found (cc==3) the found index = 16, thus
105 this code is not called.
106 Resulting condition code of vector find element equal:
107 cc==0: no c, found zero
108 cc==1: c found, no zero
109 cc==2: c found, found zero after c
110 cc==3: no c, no zero (this case can be ignored). */
111 je .Lzero /* Found zero, but no c before that zero. */
112
113 locgrne %r1,%r5 /* Mark c as found in first part of s. */
114 vlr %v19,%v16
115
116 jl .Lalign /* No zero (e.g. if vr was fully loaded)
117 -> Align and loop afterwards. */
118
119 /* Found a zero in vr. If vr was not fully loaded due to block
120 boundary, the remaining bytes are filled with zero and we can't
121 rely on zero indication of condition code here! */
122
123 vfenezf %v17,%v16,%v16
124 vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */
125 clrjl %r4,%r0,.Lzero /* Zero within loaded bytes -> end. */
126 j .Lalign /* Align and loop afterwards. */
127
128.Lend_searched_zero:
129 vlgvb %r4,%v17,7 /* Load byte index of zero. */
130 algr %r5,%r4
131 la %r2,0(%r5,%r2) /* Return pointer to zero. */
132 br %r14
133
134.Lzero:
135 /* Reached end of string. Check if one c was found before. */
136 clije %r3,0,.Lend_searched_zero /* Found zero and c is zero. */
137
138 cgfi %r1,-1 /* No c found -> return NULL. */
139 locghie %r2,0
140 ber %r14
141
142 larl %r3,.Lpermute_mask /* Load permute mask. */
143 vl %v20,0(%r3)
144
145 /* c was found and is part of v19. */
146 vfenezf %v17,%v19,%v19 /* Find zero. */
147 vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */
148 ahi %r4,3 /* Found zero index is first byte,
149 thus highest byte index is last byte of
150 wchar_t zero. */
151
152 clgfi %r5,0 /* Loaded byte count in v19 is 16, ... */
153 lochine %r0,16 /* ... if v19 is not the first part of s. */
154 ahi %r0,-1 /* Convert byte count to highest index. */
155
156 clr %r0,%r4
157 locrl %r4,%r0 /* r4 = min (zero-index, highest-index). */
158
159 /* Right-shift of v19 to mask bytes after zero. */
160 clije %r4,15,.Lzero_permute /* No shift is needed if highest index
161 in vr is 15. */
162 lhi %r0,15
163 slr %r0,%r4 /* Compute byte count for vector shift left. */
164 sll %r0,3 /* Convert to bit count. */
165 vlvgb %v17,%r0,7
166 vsrlb %v19,%v19,%v17 /* Vector shift right by byte by number of bytes
167 specified in bits 1-4 of byte 7 in v17. */
168
169 /* Reverse bytes in v19. */
170.Lzero_permute:
171 vperm %v19,%v19,%v19,%v20 /* Permute v19 to reversed order. */
172
173 /* Find c in reversed v19. */
174 vfeef %v19,%v19,%v18 /* Find c. */
175 la %r2,0(%r1,%r2)
176 vlgvb %r3,%v19,7 /* Load byte index of c. */
177
178 /* Compute index in real s and return. */
179 slgr %r4,%r3
180 lay %r2,-3(%r4,%r2) /* Return pointer to zero. -3 is needed,
181 because the found byte index is reversed in
182 vector-register. Thus point to first byte of
183 wchar_t. */
184 br %r14
185.Lpermute_mask:
186 .byte 0x0C,0x0D,0x0E,0x0F,0x08,0x09,0x0A,0x0B
187 .byte 0x04,0x05,0x06,0x07,0x00,0x01,0x02,0x03
188.Lfallback:
189 jg WCSRCHR_C
190END(WCSRCHR_Z13)
191
192# if ! HAVE_WCSRCHR_IFUNC
193strong_alias (WCSRCHR_Z13, wcsrchr)
194# endif
195#endif
196

source code of glibc/sysdeps/s390/wcsrchr-vx.S