1 | /* Simple transformations functions - s390 version. |
2 | Copyright (C) 2016-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #if defined HAVE_S390_VX_ASM_SUPPORT |
20 | # include <ifunc-resolve.h> |
21 | |
22 | # if defined HAVE_S390_VX_GCC_SUPPORT |
23 | # define ASM_CLOBBER_VR(NR) , NR |
24 | # else |
25 | # define ASM_CLOBBER_VR(NR) |
26 | # endif |
27 | |
28 | # define ICONV_C_NAME(NAME) __##NAME##_c |
29 | # define ICONV_VX_NAME(NAME) __##NAME##_vx |
30 | # ifdef HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT |
31 | /* We support z13 instructions by default -> Just use the vector variant. */ |
32 | # define ICONV_VX_IFUNC(FUNC) strong_alias (ICONV_VX_NAME (FUNC), FUNC) |
33 | # else |
34 | /* We have to use ifunc to determine if z13 instructions are supported. */ |
35 | # define ICONV_VX_IFUNC(FUNC) \ |
36 | s390_libc_ifunc_expr (ICONV_C_NAME (FUNC), FUNC, \ |
37 | (hwcap & HWCAP_S390_VX) \ |
38 | ? ICONV_VX_NAME (FUNC) \ |
39 | : ICONV_C_NAME (FUNC) \ |
40 | ) |
41 | # endif |
42 | # define ICONV_VX_SINGLE(NAME) \ |
43 | static __typeof (NAME##_single) __##NAME##_vx_single __attribute__((alias(#NAME "_single"))); |
44 | |
45 | /* Generate the transformations which are used, if the target machine does not |
46 | support vector instructions. */ |
47 | # define __gconv_transform_ascii_internal \ |
48 | ICONV_C_NAME (__gconv_transform_ascii_internal) |
49 | # define __gconv_transform_internal_ascii \ |
50 | ICONV_C_NAME (__gconv_transform_internal_ascii) |
51 | # define __gconv_transform_internal_ucs4le \ |
52 | ICONV_C_NAME (__gconv_transform_internal_ucs4le) |
53 | # define __gconv_transform_ucs4_internal \ |
54 | ICONV_C_NAME (__gconv_transform_ucs4_internal) |
55 | # define __gconv_transform_ucs4le_internal \ |
56 | ICONV_C_NAME (__gconv_transform_ucs4le_internal) |
57 | # define __gconv_transform_ucs2_internal \ |
58 | ICONV_C_NAME (__gconv_transform_ucs2_internal) |
59 | # define __gconv_transform_ucs2reverse_internal \ |
60 | ICONV_C_NAME (__gconv_transform_ucs2reverse_internal) |
61 | # define __gconv_transform_internal_ucs2 \ |
62 | ICONV_C_NAME (__gconv_transform_internal_ucs2) |
63 | # define __gconv_transform_internal_ucs2reverse \ |
64 | ICONV_C_NAME (__gconv_transform_internal_ucs2reverse) |
65 | |
66 | |
67 | # include <iconv/gconv_simple.c> |
68 | |
69 | # undef __gconv_transform_ascii_internal |
70 | # undef __gconv_transform_internal_ascii |
71 | # undef __gconv_transform_internal_ucs4le |
72 | # undef __gconv_transform_ucs4_internal |
73 | # undef __gconv_transform_ucs4le_internal |
74 | # undef __gconv_transform_ucs2_internal |
75 | # undef __gconv_transform_ucs2reverse_internal |
76 | # undef __gconv_transform_internal_ucs2 |
77 | # undef __gconv_transform_internal_ucs2reverse |
78 | |
79 | /* Now define the functions with vector support. */ |
80 | # if defined __s390x__ |
81 | # define CONVERT_32BIT_SIZE_T(REG) |
82 | # else |
83 | # define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t" |
84 | # endif |
85 | |
86 | /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ |
87 | # define DEFINE_INIT 0 |
88 | # define DEFINE_FINI 0 |
89 | # define MIN_NEEDED_FROM 1 |
90 | # define MIN_NEEDED_TO 4 |
91 | # define FROM_DIRECTION 1 |
92 | # define FROM_LOOP ICONV_VX_NAME (ascii_internal_loop) |
93 | # define TO_LOOP ICONV_VX_NAME (ascii_internal_loop) /* This is not used. */ |
94 | # define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ascii_internal) |
95 | # define ONE_DIRECTION 1 |
96 | |
97 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
98 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
99 | # define LOOPFCT FROM_LOOP |
100 | # define BODY_ORIG_ERROR \ |
101 | /* The value is too large. We don't try transliteration here since \ |
102 | this is not an error because of the lack of possibilities to \ |
103 | represent the result. This is a genuine bug in the input since \ |
104 | ASCII does not allow such values. */ \ |
105 | STANDARD_FROM_LOOP_ERR_HANDLER (1); |
106 | |
107 | # define BODY_ORIG \ |
108 | { \ |
109 | if (__glibc_unlikely (*inptr > '\x7f')) \ |
110 | { \ |
111 | BODY_ORIG_ERROR \ |
112 | } \ |
113 | else \ |
114 | { \ |
115 | /* It's an one byte sequence. */ \ |
116 | *((uint32_t *) outptr) = *inptr++; \ |
117 | outptr += sizeof (uint32_t); \ |
118 | } \ |
119 | } |
120 | # define BODY \ |
121 | { \ |
122 | size_t len = inend - inptr; \ |
123 | if (len > (outend - outptr) / 4) \ |
124 | len = (outend - outptr) / 4; \ |
125 | size_t loop_count, tmp; \ |
126 | __asm__ volatile (".machine push\n\t" \ |
127 | ".machine \"z13\"\n\t" \ |
128 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
129 | CONVERT_32BIT_SIZE_T ([R_LEN]) \ |
130 | " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ |
131 | " srlg %[R_LI],%[R_LEN],4\n\t" \ |
132 | " vrepib %%v31,0x20\n\t" \ |
133 | " clgije %[R_LI],0,1f\n\t" \ |
134 | "0: \n\t" /* Handle 16-byte blocks. */ \ |
135 | " vl %%v16,0(%[R_IN])\n\t" \ |
136 | /* Checking for values > 0x7f. */ \ |
137 | " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ |
138 | " jno 10f\n\t" \ |
139 | /* Enlarge to UCS4. */ \ |
140 | " vuplhb %%v17,%%v16\n\t" \ |
141 | " vupllb %%v18,%%v16\n\t" \ |
142 | " vuplhh %%v19,%%v17\n\t" \ |
143 | " vupllh %%v20,%%v17\n\t" \ |
144 | " vuplhh %%v21,%%v18\n\t" \ |
145 | " vupllh %%v22,%%v18\n\t" \ |
146 | /* Store 64bytes to buf_out. */ \ |
147 | " vstm %%v19,%%v22,0(%[R_OUT])\n\t" \ |
148 | " la %[R_IN],16(%[R_IN])\n\t" \ |
149 | " la %[R_OUT],64(%[R_OUT])\n\t" \ |
150 | " brctg %[R_LI],0b\n\t" \ |
151 | " lghi %[R_LI],15\n\t" \ |
152 | " ngr %[R_LEN],%[R_LI]\n\t" \ |
153 | " je 20f\n\t" /* Jump away if no remaining bytes. */ \ |
154 | /* Handle remaining bytes. */ \ |
155 | "1: aghik %[R_LI],%[R_LEN],-1\n\t" \ |
156 | " jl 20f\n\t" /* Jump away if no remaining bytes. */ \ |
157 | " vll %%v16,%[R_LI],0(%[R_IN])\n\t" \ |
158 | /* Checking for values > 0x7f. */ \ |
159 | " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ |
160 | " vlgvb %[R_TMP],%%v17,7\n\t" \ |
161 | " clr %[R_TMP],%[R_LI]\n\t" \ |
162 | " locrh %[R_TMP],%[R_LEN]\n\t" \ |
163 | " locghih %[R_LEN],0\n\t" \ |
164 | " j 12f\n\t" \ |
165 | "10:\n\t" \ |
166 | /* Found a value > 0x7f. \ |
167 | Store the preceding chars. */ \ |
168 | " vlgvb %[R_TMP],%%v17,7\n\t" \ |
169 | "12: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ |
170 | " sllk %[R_TMP],%[R_TMP],2\n\t" \ |
171 | " ahi %[R_TMP],-1\n\t" \ |
172 | " jl 20f\n\t" \ |
173 | " lgr %[R_LI],%[R_TMP]\n\t" \ |
174 | " vuplhb %%v17,%%v16\n\t" \ |
175 | " vuplhh %%v19,%%v17\n\t" \ |
176 | " vstl %%v19,%[R_LI],0(%[R_OUT])\n\t" \ |
177 | " ahi %[R_LI],-16\n\t" \ |
178 | " jl 11f\n\t" \ |
179 | " vupllh %%v20,%%v17\n\t" \ |
180 | " vstl %%v20,%[R_LI],16(%[R_OUT])\n\t" \ |
181 | " ahi %[R_LI],-16\n\t" \ |
182 | " jl 11f\n\t" \ |
183 | " vupllb %%v18,%%v16\n\t" \ |
184 | " vuplhh %%v21,%%v18\n\t" \ |
185 | " vstl %%v21,%[R_LI],32(%[R_OUT])\n\t" \ |
186 | " ahi %[R_LI],-16\n\t" \ |
187 | " jl 11f\n\t" \ |
188 | " vupllh %%v22,%%v18\n\t" \ |
189 | " vstl %%v22,%[R_LI],48(%[R_OUT])\n\t" \ |
190 | "11:\n\t" \ |
191 | " la %[R_OUT],1(%[R_TMP],%[R_OUT])\n\t" \ |
192 | "20:\n\t" \ |
193 | ".machine pop" \ |
194 | : /* outputs */ [R_OUT] "+a" (outptr) \ |
195 | , [R_IN] "+a" (inptr) \ |
196 | , [R_LEN] "+d" (len) \ |
197 | , [R_LI] "=d" (loop_count) \ |
198 | , [R_TMP] "=a" (tmp) \ |
199 | : /* inputs */ \ |
200 | : /* clobber list*/ "memory", "cc" \ |
201 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
202 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
203 | ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ |
204 | ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30") \ |
205 | ASM_CLOBBER_VR ("v31") \ |
206 | ); \ |
207 | if (len > 0) \ |
208 | { \ |
209 | /* Found an invalid character at the next input byte. */ \ |
210 | BODY_ORIG_ERROR \ |
211 | } \ |
212 | } |
213 | |
214 | # define LOOP_NEED_FLAGS |
215 | # include <iconv/loop.c> |
216 | # include <iconv/skeleton.c> |
217 | # undef BODY_ORIG |
218 | # undef BODY_ORIG_ERROR |
219 | ICONV_VX_IFUNC (__gconv_transform_ascii_internal) |
220 | |
221 | /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ |
222 | # define DEFINE_INIT 0 |
223 | # define DEFINE_FINI 0 |
224 | # define MIN_NEEDED_FROM 4 |
225 | # define MIN_NEEDED_TO 1 |
226 | # define FROM_DIRECTION 1 |
227 | # define FROM_LOOP ICONV_VX_NAME (internal_ascii_loop) |
228 | # define TO_LOOP ICONV_VX_NAME (internal_ascii_loop) /* This is not used. */ |
229 | # define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ascii) |
230 | # define ONE_DIRECTION 1 |
231 | |
232 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
233 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
234 | # define LOOPFCT FROM_LOOP |
235 | # define BODY_ORIG_ERROR \ |
236 | UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ |
237 | STANDARD_TO_LOOP_ERR_HANDLER (4); |
238 | |
239 | # define BODY_ORIG \ |
240 | { \ |
241 | if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \ |
242 | { \ |
243 | BODY_ORIG_ERROR \ |
244 | } \ |
245 | else \ |
246 | { \ |
247 | /* It's an one byte sequence. */ \ |
248 | *outptr++ = *((const uint32_t *) inptr); \ |
249 | inptr += sizeof (uint32_t); \ |
250 | } \ |
251 | } |
252 | # define BODY \ |
253 | { \ |
254 | size_t len = (inend - inptr) / 4; \ |
255 | if (len > outend - outptr) \ |
256 | len = outend - outptr; \ |
257 | size_t loop_count, tmp, tmp2; \ |
258 | __asm__ volatile (".machine push\n\t" \ |
259 | ".machine \"z13\"\n\t" \ |
260 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
261 | CONVERT_32BIT_SIZE_T ([R_LEN]) \ |
262 | /* Setup to check for ch > 0x7f. */ \ |
263 | " vzero %%v21\n\t" \ |
264 | " srlg %[R_LI],%[R_LEN],4\n\t" \ |
265 | " vleih %%v21,8192,0\n\t" /* element 0: > */ \ |
266 | " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ |
267 | " vleif %%v20,127,0\n\t" /* element 0: 127 */ \ |
268 | " lghi %[R_TMP],0\n\t" \ |
269 | " clgije %[R_LI],0,1f\n\t" \ |
270 | "0:\n\t" \ |
271 | " vlm %%v16,%%v19,0(%[R_IN])\n\t" \ |
272 | /* Shorten to byte values. */ \ |
273 | " vpkf %%v23,%%v16,%%v17\n\t" \ |
274 | " vpkf %%v24,%%v18,%%v19\n\t" \ |
275 | " vpkh %%v23,%%v23,%%v24\n\t" \ |
276 | /* Checking for values > 0x7f. */ \ |
277 | " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ |
278 | " jno 10f\n\t" \ |
279 | " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ |
280 | " jno 11f\n\t" \ |
281 | " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ |
282 | " jno 12f\n\t" \ |
283 | " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ |
284 | " jno 13f\n\t" \ |
285 | /* Store 16bytes to outptr. */ \ |
286 | " vst %%v23,0(%[R_OUT])\n\t" \ |
287 | " la %[R_IN],64(%[R_IN])\n\t" \ |
288 | " la %[R_OUT],16(%[R_OUT])\n\t" \ |
289 | " brctg %[R_LI],0b\n\t" \ |
290 | " lghi %[R_LI],15\n\t" \ |
291 | " ngr %[R_LEN],%[R_LI]\n\t" \ |
292 | " je 20f\n\t" /* Jump away if no remaining bytes. */ \ |
293 | /* Handle remaining bytes. */ \ |
294 | "1: sllg %[R_LI],%[R_LEN],2\n\t" \ |
295 | " aghi %[R_LI],-1\n\t" \ |
296 | " jl 20f\n\t" /* Jump away if no remaining bytes. */ \ |
297 | /* Load remaining 1...63 bytes. */ \ |
298 | " vll %%v16,%[R_LI],0(%[R_IN])\n\t" \ |
299 | " ahi %[R_LI],-16\n\t" \ |
300 | " jl 2f\n\t" \ |
301 | " vll %%v17,%[R_LI],16(%[R_IN])\n\t" \ |
302 | " ahi %[R_LI],-16\n\t" \ |
303 | " jl 2f\n\t" \ |
304 | " vll %%v18,%[R_LI],32(%[R_IN])\n\t" \ |
305 | " ahi %[R_LI],-16\n\t" \ |
306 | " jl 2f\n\t" \ |
307 | " vll %%v19,%[R_LI],48(%[R_IN])\n\t" \ |
308 | "2:\n\t" \ |
309 | /* Shorten to byte values. */ \ |
310 | " vpkf %%v23,%%v16,%%v17\n\t" \ |
311 | " vpkf %%v24,%%v18,%%v19\n\t" \ |
312 | " vpkh %%v23,%%v23,%%v24\n\t" \ |
313 | " sllg %[R_LI],%[R_LEN],2\n\t" \ |
314 | " aghi %[R_LI],-16\n\t" \ |
315 | " jl 3f\n\t" /* v16 is not fully loaded. */ \ |
316 | " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ |
317 | " jno 10f\n\t" \ |
318 | " aghi %[R_LI],-16\n\t" \ |
319 | " jl 4f\n\t" /* v17 is not fully loaded. */ \ |
320 | " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ |
321 | " jno 11f\n\t" \ |
322 | " aghi %[R_LI],-16\n\t" \ |
323 | " jl 5f\n\t" /* v18 is not fully loaded. */ \ |
324 | " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ |
325 | " jno 12f\n\t" \ |
326 | " aghi %[R_LI],-16\n\t" \ |
327 | /* v19 is not fully loaded. */ \ |
328 | " lghi %[R_TMP],12\n\t" \ |
329 | " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ |
330 | "6: vlgvb %[R_I],%%v22,7\n\t" \ |
331 | " aghi %[R_LI],16\n\t" \ |
332 | " clrjl %[R_I],%[R_LI],14f\n\t" \ |
333 | " lgr %[R_I],%[R_LEN]\n\t" \ |
334 | " lghi %[R_LEN],0\n\t" \ |
335 | " j 15f\n\t" \ |
336 | "3: vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ |
337 | " j 6b\n\t" \ |
338 | "4: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ |
339 | " lghi %[R_TMP],4\n\t" \ |
340 | " j 6b\n\t" \ |
341 | "5: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ |
342 | " lghi %[R_TMP],8\n\t" \ |
343 | " j 6b\n\t" \ |
344 | /* Found a value > 0x7f. */ \ |
345 | "13: ahi %[R_TMP],4\n\t" \ |
346 | "12: ahi %[R_TMP],4\n\t" \ |
347 | "11: ahi %[R_TMP],4\n\t" \ |
348 | "10: vlgvb %[R_I],%%v22,7\n\t" \ |
349 | "14: srlg %[R_I],%[R_I],2\n\t" \ |
350 | " agr %[R_I],%[R_TMP]\n\t" \ |
351 | " je 20f\n\t" \ |
352 | /* Store characters before invalid one... */ \ |
353 | "15: aghi %[R_I],-1\n\t" \ |
354 | " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \ |
355 | /* ... and update pointers. */ \ |
356 | " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ |
357 | " sllg %[R_I],%[R_I],2\n\t" \ |
358 | " la %[R_IN],4(%[R_I],%[R_IN])\n\t" \ |
359 | "20:\n\t" \ |
360 | ".machine pop" \ |
361 | : /* outputs */ [R_OUT] "+a" (outptr) \ |
362 | , [R_IN] "+a" (inptr) \ |
363 | , [R_LEN] "+d" (len) \ |
364 | , [R_LI] "=d" (loop_count) \ |
365 | , [R_I] "=a" (tmp2) \ |
366 | , [R_TMP] "=d" (tmp) \ |
367 | : /* inputs */ \ |
368 | : /* clobber list*/ "memory", "cc" \ |
369 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
370 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
371 | ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ |
372 | ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ |
373 | ASM_CLOBBER_VR ("v24") \ |
374 | ); \ |
375 | if (len > 0) \ |
376 | { \ |
377 | /* Found an invalid character > 0x7f at next character. */ \ |
378 | BODY_ORIG_ERROR \ |
379 | } \ |
380 | } |
381 | # define LOOP_NEED_FLAGS |
382 | # include <iconv/loop.c> |
383 | # include <iconv/skeleton.c> |
384 | # undef BODY_ORIG |
385 | # undef BODY_ORIG_ERROR |
386 | ICONV_VX_IFUNC (__gconv_transform_internal_ascii) |
387 | |
388 | |
389 | /* Convert from internal UCS4 to UCS4 little endian form. */ |
390 | # define DEFINE_INIT 0 |
391 | # define DEFINE_FINI 0 |
392 | # define MIN_NEEDED_FROM 4 |
393 | # define MIN_NEEDED_TO 4 |
394 | # define FROM_DIRECTION 1 |
395 | # define FROM_LOOP ICONV_VX_NAME (internal_ucs4le_loop) |
396 | # define TO_LOOP ICONV_VX_NAME (internal_ucs4le_loop) /* This is not used. */ |
397 | # define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs4le) |
398 | # define ONE_DIRECTION 0 |
399 | |
400 | static inline int |
401 | __attribute ((always_inline)) |
402 | ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step, |
403 | struct __gconv_step_data *step_data, |
404 | const unsigned char **inptrp, |
405 | const unsigned char *inend, |
406 | unsigned char **outptrp, |
407 | const unsigned char *outend, |
408 | size_t *irreversible) |
409 | { |
410 | const unsigned char *inptr = *inptrp; |
411 | unsigned char *outptr = *outptrp; |
412 | int result; |
413 | size_t len = MIN (inend - inptr, outend - outptr) / 4; |
414 | size_t loop_count; |
415 | __asm__ volatile (".machine push\n\t" |
416 | ".machine \"z13\"\n\t" |
417 | ".machinemode \"zarch_nohighgprs\"\n\t" |
418 | CONVERT_32BIT_SIZE_T ([R_LEN]) |
419 | " bras %[R_LI],1f\n\t" |
420 | /* Vector permute mask: */ |
421 | " .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t" |
422 | "1: vl %%v20,0(%[R_LI])\n\t" |
423 | /* Process 64byte (16char) blocks. */ |
424 | " srlg %[R_LI],%[R_LEN],4\n\t" |
425 | " clgije %[R_LI],0,10f\n\t" |
426 | "0: vlm %%v16,%%v19,0(%[R_IN])\n\t" |
427 | " vperm %%v16,%%v16,%%v16,%%v20\n\t" |
428 | " vperm %%v17,%%v17,%%v17,%%v20\n\t" |
429 | " vperm %%v18,%%v18,%%v18,%%v20\n\t" |
430 | " vperm %%v19,%%v19,%%v19,%%v20\n\t" |
431 | " vstm %%v16,%%v19,0(%[R_OUT])\n\t" |
432 | " la %[R_IN],64(%[R_IN])\n\t" |
433 | " la %[R_OUT],64(%[R_OUT])\n\t" |
434 | " brctg %[R_LI],0b\n\t" |
435 | " llgfr %[R_LEN],%[R_LEN]\n\t" |
436 | " nilf %[R_LEN],15\n\t" |
437 | /* Process 16byte (4char) blocks. */ |
438 | "10: srlg %[R_LI],%[R_LEN],2\n\t" |
439 | " clgije %[R_LI],0,20f\n\t" |
440 | "11: vl %%v16,0(%[R_IN])\n\t" |
441 | " vperm %%v16,%%v16,%%v16,%%v20\n\t" |
442 | " vst %%v16,0(%[R_OUT])\n\t" |
443 | " la %[R_IN],16(%[R_IN])\n\t" |
444 | " la %[R_OUT],16(%[R_OUT])\n\t" |
445 | " brctg %[R_LI],11b\n\t" |
446 | " nill %[R_LEN],3\n\t" |
447 | /* Process <16bytes. */ |
448 | "20: sll %[R_LEN],2\n\t" |
449 | " ahi %[R_LEN],-1\n\t" |
450 | " jl 30f\n\t" |
451 | " vll %%v16,%[R_LEN],0(%[R_IN])\n\t" |
452 | " vperm %%v16,%%v16,%%v16,%%v20\n\t" |
453 | " vstl %%v16,%[R_LEN],0(%[R_OUT])\n\t" |
454 | " la %[R_IN],1(%[R_LEN],%[R_IN])\n\t" |
455 | " la %[R_OUT],1(%[R_LEN],%[R_OUT])\n\t" |
456 | "30: \n\t" |
457 | ".machine pop" |
458 | : /* outputs */ [R_OUT] "+a" (outptr) |
459 | , [R_IN] "+a" (inptr) |
460 | , [R_LI] "=a" (loop_count) |
461 | , [R_LEN] "+a" (len) |
462 | : /* inputs */ |
463 | : /* clobber list*/ "memory" , "cc" |
464 | ASM_CLOBBER_VR ("v16" ) ASM_CLOBBER_VR ("v17" ) |
465 | ASM_CLOBBER_VR ("v18" ) ASM_CLOBBER_VR ("v19" ) |
466 | ASM_CLOBBER_VR ("v20" ) |
467 | ); |
468 | *inptrp = inptr; |
469 | *outptrp = outptr; |
470 | |
471 | /* Determine the status. */ |
472 | if (*inptrp == inend) |
473 | result = __GCONV_EMPTY_INPUT; |
474 | else if (*outptrp + 4 > outend) |
475 | result = __GCONV_FULL_OUTPUT; |
476 | else |
477 | result = __GCONV_INCOMPLETE_INPUT; |
478 | |
479 | return result; |
480 | } |
481 | |
482 | ICONV_VX_SINGLE (internal_ucs4le_loop) |
483 | # include <iconv/skeleton.c> |
484 | ICONV_VX_IFUNC (__gconv_transform_internal_ucs4le) |
485 | |
486 | |
487 | /* Transform from UCS4 to the internal, UCS4-like format. Unlike |
488 | for the other direction we have to check for correct values here. */ |
489 | # define DEFINE_INIT 0 |
490 | # define DEFINE_FINI 0 |
491 | # define MIN_NEEDED_FROM 4 |
492 | # define MIN_NEEDED_TO 4 |
493 | # define FROM_DIRECTION 1 |
494 | # define FROM_LOOP ICONV_VX_NAME (ucs4_internal_loop) |
495 | # define TO_LOOP ICONV_VX_NAME (ucs4_internal_loop) /* This is not used. */ |
496 | # define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs4_internal) |
497 | # define ONE_DIRECTION 0 |
498 | |
499 | |
500 | static inline int |
501 | __attribute ((always_inline)) |
502 | ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step, |
503 | struct __gconv_step_data *step_data, |
504 | const unsigned char **inptrp, |
505 | const unsigned char *inend, |
506 | unsigned char **outptrp, |
507 | const unsigned char *outend, |
508 | size_t *irreversible) |
509 | { |
510 | int flags = step_data->__flags; |
511 | const unsigned char *inptr = *inptrp; |
512 | unsigned char *outptr = *outptrp; |
513 | int result; |
514 | size_t len, loop_count; |
515 | do |
516 | { |
517 | len = MIN (inend - inptr, outend - outptr) / 4; |
518 | __asm__ volatile (".machine push\n\t" |
519 | ".machine \"z13\"\n\t" |
520 | ".machinemode \"zarch_nohighgprs\"\n\t" |
521 | CONVERT_32BIT_SIZE_T ([R_LEN]) |
522 | /* Setup to check for ch > 0x7fffffff. */ |
523 | " larl %[R_LI],9f\n\t" |
524 | " vlm %%v20,%%v21,0(%[R_LI])\n\t" |
525 | " srlg %[R_LI],%[R_LEN],2\n\t" |
526 | " clgije %[R_LI],0,1f\n\t" |
527 | /* Process 16byte (4char) blocks. */ |
528 | "0: vl %%v16,0(%[R_IN])\n\t" |
529 | " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" |
530 | " jno 10f\n\t" |
531 | " vst %%v16,0(%[R_OUT])\n\t" |
532 | " la %[R_IN],16(%[R_IN])\n\t" |
533 | " la %[R_OUT],16(%[R_OUT])\n\t" |
534 | " brctg %[R_LI],0b\n\t" |
535 | " llgfr %[R_LEN],%[R_LEN]\n\t" |
536 | " nilf %[R_LEN],3\n\t" |
537 | /* Process <16bytes. */ |
538 | "1: sll %[R_LEN],2\n\t" |
539 | " ahik %[R_LI],%[R_LEN],-1\n\t" |
540 | " jl 20f\n\t" /* No further bytes available. */ |
541 | " vll %%v16,%[R_LI],0(%[R_IN])\n\t" |
542 | " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" |
543 | " vlgvb %[R_LI],%%v22,7\n\t" |
544 | " clr %[R_LI],%[R_LEN]\n\t" |
545 | " locgrhe %[R_LI],%[R_LEN]\n\t" |
546 | " locghihe %[R_LEN],0\n\t" |
547 | " j 11f\n\t" |
548 | /* v20: Vector string range compare values. */ |
549 | "9: .long 0x7fffffff,0x0,0x0,0x0\n\t" |
550 | /* v21: Vector string range compare control-bits. |
551 | element 0: >; element 1: =<> (always true) */ |
552 | " .long 0x20000000,0xE0000000,0x0,0x0\n\t" |
553 | /* Found a value > 0x7fffffff. */ |
554 | "10: vlgvb %[R_LI],%%v22,7\n\t" |
555 | /* Store characters before invalid one. */ |
556 | "11: aghi %[R_LI],-1\n\t" |
557 | " jl 20f\n\t" |
558 | " vstl %%v16,%[R_LI],0(%[R_OUT])\n\t" |
559 | " la %[R_IN],1(%[R_LI],%[R_IN])\n\t" |
560 | " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" |
561 | "20:\n\t" |
562 | ".machine pop" |
563 | : /* outputs */ [R_OUT] "+a" (outptr) |
564 | , [R_IN] "+a" (inptr) |
565 | , [R_LI] "=a" (loop_count) |
566 | , [R_LEN] "+d" (len) |
567 | : /* inputs */ |
568 | : /* clobber list*/ "memory" , "cc" |
569 | ASM_CLOBBER_VR ("v16" ) ASM_CLOBBER_VR ("v20" ) |
570 | ASM_CLOBBER_VR ("v21" ) ASM_CLOBBER_VR ("v22" ) |
571 | ); |
572 | if (len > 0) |
573 | { |
574 | /* The value is too large. We don't try transliteration here since |
575 | this is not an error because of the lack of possibilities to |
576 | represent the result. This is a genuine bug in the input since |
577 | UCS4 does not allow such values. */ |
578 | if (irreversible == NULL) |
579 | /* We are transliterating, don't try to correct anything. */ |
580 | return __GCONV_ILLEGAL_INPUT; |
581 | |
582 | if (flags & __GCONV_IGNORE_ERRORS) |
583 | { |
584 | /* Just ignore this character. */ |
585 | ++*irreversible; |
586 | inptr += 4; |
587 | continue; |
588 | } |
589 | |
590 | *inptrp = inptr; |
591 | *outptrp = outptr; |
592 | return __GCONV_ILLEGAL_INPUT; |
593 | } |
594 | } |
595 | while (len > 0); |
596 | |
597 | *inptrp = inptr; |
598 | *outptrp = outptr; |
599 | |
600 | /* Determine the status. */ |
601 | if (*inptrp == inend) |
602 | result = __GCONV_EMPTY_INPUT; |
603 | else if (*outptrp + 4 > outend) |
604 | result = __GCONV_FULL_OUTPUT; |
605 | else |
606 | result = __GCONV_INCOMPLETE_INPUT; |
607 | |
608 | return result; |
609 | } |
610 | |
611 | ICONV_VX_SINGLE (ucs4_internal_loop) |
612 | # include <iconv/skeleton.c> |
613 | ICONV_VX_IFUNC (__gconv_transform_ucs4_internal) |
614 | |
615 | |
616 | /* Transform from UCS4-LE to the internal encoding. */ |
617 | # define DEFINE_INIT 0 |
618 | # define DEFINE_FINI 0 |
619 | # define MIN_NEEDED_FROM 4 |
620 | # define MIN_NEEDED_TO 4 |
621 | # define FROM_DIRECTION 1 |
622 | # define FROM_LOOP ICONV_VX_NAME (ucs4le_internal_loop) |
623 | # define TO_LOOP ICONV_VX_NAME (ucs4le_internal_loop) /* This is not used. */ |
624 | # define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs4le_internal) |
625 | # define ONE_DIRECTION 0 |
626 | |
627 | static inline int |
628 | __attribute ((always_inline)) |
629 | ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step, |
630 | struct __gconv_step_data *step_data, |
631 | const unsigned char **inptrp, |
632 | const unsigned char *inend, |
633 | unsigned char **outptrp, |
634 | const unsigned char *outend, |
635 | size_t *irreversible) |
636 | { |
637 | int flags = step_data->__flags; |
638 | const unsigned char *inptr = *inptrp; |
639 | unsigned char *outptr = *outptrp; |
640 | int result; |
641 | size_t len, loop_count; |
642 | do |
643 | { |
644 | len = MIN (inend - inptr, outend - outptr) / 4; |
645 | __asm__ volatile (".machine push\n\t" |
646 | ".machine \"z13\"\n\t" |
647 | ".machinemode \"zarch_nohighgprs\"\n\t" |
648 | CONVERT_32BIT_SIZE_T ([R_LEN]) |
649 | /* Setup to check for ch > 0x7fffffff. */ |
650 | " larl %[R_LI],9f\n\t" |
651 | " vlm %%v20,%%v22,0(%[R_LI])\n\t" |
652 | " srlg %[R_LI],%[R_LEN],2\n\t" |
653 | " clgije %[R_LI],0,1f\n\t" |
654 | /* Process 16byte (4char) blocks. */ |
655 | "0: vl %%v16,0(%[R_IN])\n\t" |
656 | " vperm %%v16,%%v16,%%v16,%%v22\n\t" |
657 | " vstrcfs %%v23,%%v16,%%v20,%%v21\n\t" |
658 | " jno 10f\n\t" |
659 | " vst %%v16,0(%[R_OUT])\n\t" |
660 | " la %[R_IN],16(%[R_IN])\n\t" |
661 | " la %[R_OUT],16(%[R_OUT])\n\t" |
662 | " brctg %[R_LI],0b\n\t" |
663 | " llgfr %[R_LEN],%[R_LEN]\n\t" |
664 | " nilf %[R_LEN],3\n\t" |
665 | /* Process <16bytes. */ |
666 | "1: sll %[R_LEN],2\n\t" |
667 | " ahik %[R_LI],%[R_LEN],-1\n\t" |
668 | " jl 20f\n\t" /* No further bytes available. */ |
669 | " vll %%v16,%[R_LI],0(%[R_IN])\n\t" |
670 | " vperm %%v16,%%v16,%%v16,%%v22\n\t" |
671 | " vstrcfs %%v23,%%v16,%%v20,%%v21\n\t" |
672 | " vlgvb %[R_LI],%%v23,7\n\t" |
673 | " clr %[R_LI],%[R_LEN]\n\t" |
674 | " locgrhe %[R_LI],%[R_LEN]\n\t" |
675 | " locghihe %[R_LEN],0\n\t" |
676 | " j 11f\n\t" |
677 | /* v20: Vector string range compare values. */ |
678 | "9: .long 0x7fffffff,0x0,0x0,0x0\n\t" |
679 | /* v21: Vector string range compare control-bits. |
680 | element 0: >; element 1: =<> (always true) */ |
681 | " .long 0x20000000,0xE0000000,0x0,0x0\n\t" |
682 | /* v22: Vector permute mask. */ |
683 | " .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t" |
684 | /* Found a value > 0x7fffffff. */ |
685 | "10: vlgvb %[R_LI],%%v23,7\n\t" |
686 | /* Store characters before invalid one. */ |
687 | "11: aghi %[R_LI],-1\n\t" |
688 | " jl 20f\n\t" |
689 | " vstl %%v16,%[R_LI],0(%[R_OUT])\n\t" |
690 | " la %[R_IN],1(%[R_LI],%[R_IN])\n\t" |
691 | " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" |
692 | "20:\n\t" |
693 | ".machine pop" |
694 | : /* outputs */ [R_OUT] "+a" (outptr) |
695 | , [R_IN] "+a" (inptr) |
696 | , [R_LI] "=a" (loop_count) |
697 | , [R_LEN] "+d" (len) |
698 | : /* inputs */ |
699 | : /* clobber list*/ "memory" , "cc" |
700 | ASM_CLOBBER_VR ("v16" ) ASM_CLOBBER_VR ("v20" ) |
701 | ASM_CLOBBER_VR ("v21" ) ASM_CLOBBER_VR ("v22" ) |
702 | ASM_CLOBBER_VR ("v23" ) |
703 | ); |
704 | if (len > 0) |
705 | { |
706 | /* The value is too large. We don't try transliteration here since |
707 | this is not an error because of the lack of possibilities to |
708 | represent the result. This is a genuine bug in the input since |
709 | UCS4 does not allow such values. */ |
710 | if (irreversible == NULL) |
711 | /* We are transliterating, don't try to correct anything. */ |
712 | return __GCONV_ILLEGAL_INPUT; |
713 | |
714 | if (flags & __GCONV_IGNORE_ERRORS) |
715 | { |
716 | /* Just ignore this character. */ |
717 | ++*irreversible; |
718 | inptr += 4; |
719 | continue; |
720 | } |
721 | |
722 | *inptrp = inptr; |
723 | *outptrp = outptr; |
724 | return __GCONV_ILLEGAL_INPUT; |
725 | } |
726 | } |
727 | while (len > 0); |
728 | |
729 | *inptrp = inptr; |
730 | *outptrp = outptr; |
731 | |
732 | /* Determine the status. */ |
733 | if (*inptrp == inend) |
734 | result = __GCONV_EMPTY_INPUT; |
735 | else if (*inptrp + 4 > inend) |
736 | result = __GCONV_INCOMPLETE_INPUT; |
737 | else |
738 | { |
739 | assert (*outptrp + 4 > outend); |
740 | result = __GCONV_FULL_OUTPUT; |
741 | } |
742 | |
743 | return result; |
744 | } |
745 | ICONV_VX_SINGLE (ucs4le_internal_loop) |
746 | # include <iconv/skeleton.c> |
747 | ICONV_VX_IFUNC (__gconv_transform_ucs4le_internal) |
748 | |
749 | /* Convert from UCS2 to the internal (UCS4-like) format. */ |
750 | # define DEFINE_INIT 0 |
751 | # define DEFINE_FINI 0 |
752 | # define MIN_NEEDED_FROM 2 |
753 | # define MIN_NEEDED_TO 4 |
754 | # define FROM_DIRECTION 1 |
755 | # define FROM_LOOP ICONV_VX_NAME (ucs2_internal_loop) |
756 | # define TO_LOOP ICONV_VX_NAME (ucs2_internal_loop) /* This is not used. */ |
757 | # define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs2_internal) |
758 | # define ONE_DIRECTION 1 |
759 | |
760 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
761 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
762 | # define LOOPFCT FROM_LOOP |
763 | # define BODY_ORIG_ERROR \ |
764 | /* Surrogate characters in UCS-2 input are not valid. Reject \ |
765 | them. (Catching this here is not security relevant.) */ \ |
766 | STANDARD_FROM_LOOP_ERR_HANDLER (2); |
767 | # define BODY_ORIG \ |
768 | { \ |
769 | uint16_t u1 = get16 (inptr); \ |
770 | \ |
771 | if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ |
772 | { \ |
773 | BODY_ORIG_ERROR \ |
774 | } \ |
775 | \ |
776 | *((uint32_t *) outptr) = u1; \ |
777 | outptr += sizeof (uint32_t); \ |
778 | inptr += 2; \ |
779 | } |
780 | # define BODY \ |
781 | { \ |
782 | size_t len, tmp, tmp2; \ |
783 | len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \ |
784 | __asm__ volatile (".machine push\n\t" \ |
785 | ".machine \"z13\"\n\t" \ |
786 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
787 | CONVERT_32BIT_SIZE_T ([R_LEN]) \ |
788 | /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \ |
789 | " larl %[R_TMP],9f\n\t" \ |
790 | " vlm %%v20,%%v21,0(%[R_TMP])\n\t" \ |
791 | " srlg %[R_TMP],%[R_LEN],3\n\t" \ |
792 | " clgije %[R_TMP],0,1f\n\t" \ |
793 | /* Process 16byte (8char) blocks. */ \ |
794 | "0: vl %%v16,0(%[R_IN])\n\t" \ |
795 | " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ |
796 | /* Enlarge UCS2 to UCS4. */ \ |
797 | " vuplhh %%v17,%%v16\n\t" \ |
798 | " vupllh %%v18,%%v16\n\t" \ |
799 | " jno 10f\n\t" \ |
800 | /* Store 32bytes to buf_out. */ \ |
801 | " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ |
802 | " la %[R_IN],16(%[R_IN])\n\t" \ |
803 | " la %[R_OUT],32(%[R_OUT])\n\t" \ |
804 | " brctg %[R_TMP],0b\n\t" \ |
805 | " llgfr %[R_LEN],%[R_LEN]\n\t" \ |
806 | " nilf %[R_LEN],7\n\t" \ |
807 | /* Process <16bytes. */ \ |
808 | "1: sll %[R_LEN],1\n\t" \ |
809 | " ahik %[R_TMP],%[R_LEN],-1\n\t" \ |
810 | " jl 20f\n\t" /* No further bytes available. */ \ |
811 | " vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \ |
812 | " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ |
813 | /* Enlarge UCS2 to UCS4. */ \ |
814 | " vuplhh %%v17,%%v16\n\t" \ |
815 | " vupllh %%v18,%%v16\n\t" \ |
816 | " vlgvb %[R_TMP],%%v19,7\n\t" \ |
817 | " clr %[R_TMP],%[R_LEN]\n\t" \ |
818 | " locgrhe %[R_TMP],%[R_LEN]\n\t" \ |
819 | " locghihe %[R_LEN],0\n\t" \ |
820 | " j 11f\n\t" \ |
821 | /* v20: Vector string range compare values. */ \ |
822 | "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
823 | /* v21: Vector string range compare control-bits. \ |
824 | element 0: =>; element 1: < */ \ |
825 | " .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
826 | /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \ |
827 | "10: vlgvb %[R_TMP],%%v19,7\n\t" \ |
828 | "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ |
829 | " sll %[R_TMP],1\n\t" \ |
830 | " lgr %[R_TMP2],%[R_TMP]\n\t" \ |
831 | " ahi %[R_TMP],-1\n\t" \ |
832 | " jl 20f\n\t" \ |
833 | " vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \ |
834 | " ahi %[R_TMP],-16\n\t" \ |
835 | " jl 19f\n\t" \ |
836 | " vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \ |
837 | "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \ |
838 | "20: \n\t" \ |
839 | ".machine pop" \ |
840 | : /* outputs */ [R_OUT] "+a" (outptr) \ |
841 | , [R_IN] "+a" (inptr) \ |
842 | , [R_TMP] "=a" (tmp) \ |
843 | , [R_TMP2] "=a" (tmp2) \ |
844 | , [R_LEN] "+d" (len) \ |
845 | : /* inputs */ \ |
846 | : /* clobber list*/ "memory", "cc" \ |
847 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
848 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
849 | ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ |
850 | ); \ |
851 | if (len > 0) \ |
852 | { \ |
853 | /* Found an invalid character at next input-char. */ \ |
854 | BODY_ORIG_ERROR \ |
855 | } \ |
856 | } |
857 | |
858 | # define LOOP_NEED_FLAGS |
859 | # include <iconv/loop.c> |
860 | # include <iconv/skeleton.c> |
861 | # undef BODY_ORIG |
862 | # undef BODY_ORIG_ERROR |
863 | ICONV_VX_IFUNC (__gconv_transform_ucs2_internal) |
864 | |
865 | /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ |
866 | # define DEFINE_INIT 0 |
867 | # define DEFINE_FINI 0 |
868 | # define MIN_NEEDED_FROM 2 |
869 | # define MIN_NEEDED_TO 4 |
870 | # define FROM_DIRECTION 1 |
871 | # define FROM_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop) |
872 | # define TO_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop) /* This is not used.*/ |
873 | # define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs2reverse_internal) |
874 | # define ONE_DIRECTION 1 |
875 | |
876 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
877 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
878 | # define LOOPFCT FROM_LOOP |
879 | # define BODY_ORIG_ERROR \ |
880 | /* Surrogate characters in UCS-2 input are not valid. Reject \ |
881 | them. (Catching this here is not security relevant.) */ \ |
882 | if (! ignore_errors_p ()) \ |
883 | { \ |
884 | result = __GCONV_ILLEGAL_INPUT; \ |
885 | break; \ |
886 | } \ |
887 | inptr += 2; \ |
888 | ++*irreversible; \ |
889 | continue; |
890 | |
891 | # define BODY_ORIG \ |
892 | { \ |
893 | uint16_t u1 = bswap_16 (get16 (inptr)); \ |
894 | \ |
895 | if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ |
896 | { \ |
897 | BODY_ORIG_ERROR \ |
898 | } \ |
899 | \ |
900 | *((uint32_t *) outptr) = u1; \ |
901 | outptr += sizeof (uint32_t); \ |
902 | inptr += 2; \ |
903 | } |
904 | # define BODY \ |
905 | { \ |
906 | size_t len, tmp, tmp2; \ |
907 | len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \ |
908 | __asm__ volatile (".machine push\n\t" \ |
909 | ".machine \"z13\"\n\t" \ |
910 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
911 | CONVERT_32BIT_SIZE_T ([R_LEN]) \ |
912 | /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \ |
913 | " larl %[R_TMP],9f\n\t" \ |
914 | " vlm %%v20,%%v22,0(%[R_TMP])\n\t" \ |
915 | " srlg %[R_TMP],%[R_LEN],3\n\t" \ |
916 | " clgije %[R_TMP],0,1f\n\t" \ |
917 | /* Process 16byte (8char) blocks. */ \ |
918 | "0: vl %%v16,0(%[R_IN])\n\t" \ |
919 | " vperm %%v16,%%v16,%%v16,%%v22\n\t" \ |
920 | " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ |
921 | /* Enlarge UCS2 to UCS4. */ \ |
922 | " vuplhh %%v17,%%v16\n\t" \ |
923 | " vupllh %%v18,%%v16\n\t" \ |
924 | " jno 10f\n\t" \ |
925 | /* Store 32bytes to buf_out. */ \ |
926 | " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ |
927 | " la %[R_IN],16(%[R_IN])\n\t" \ |
928 | " la %[R_OUT],32(%[R_OUT])\n\t" \ |
929 | " brctg %[R_TMP],0b\n\t" \ |
930 | " llgfr %[R_LEN],%[R_LEN]\n\t" \ |
931 | " nilf %[R_LEN],7\n\t" \ |
932 | /* Process <16bytes. */ \ |
933 | "1: sll %[R_LEN],1\n\t" \ |
934 | " ahik %[R_TMP],%[R_LEN],-1\n\t" \ |
935 | " jl 20f\n\t" /* No further bytes available. */ \ |
936 | " vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \ |
937 | " vperm %%v16,%%v16,%%v16,%%v22\n\t" \ |
938 | " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ |
939 | /* Enlarge UCS2 to UCS4. */ \ |
940 | " vuplhh %%v17,%%v16\n\t" \ |
941 | " vupllh %%v18,%%v16\n\t" \ |
942 | " vlgvb %[R_TMP],%%v19,7\n\t" \ |
943 | " clr %[R_TMP],%[R_LEN]\n\t" \ |
944 | " locgrhe %[R_TMP],%[R_LEN]\n\t" \ |
945 | " locghihe %[R_LEN],0\n\t" \ |
946 | " j 11f\n\t" \ |
947 | /* v20: Vector string range compare values. */ \ |
948 | "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
949 | /* v21: Vector string range compare control-bits. \ |
950 | element 0: =>; element 1: < */ \ |
951 | " .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
952 | /* v22: Vector permute mask. */ \ |
953 | " .short 0x0100,0x0302,0x0504,0x0706\n\t" \ |
954 | " .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \ |
955 | /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \ |
956 | "10: vlgvb %[R_TMP],%%v19,7\n\t" \ |
957 | "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ |
958 | " sll %[R_TMP],1\n\t" \ |
959 | " lgr %[R_TMP2],%[R_TMP]\n\t" \ |
960 | " ahi %[R_TMP],-1\n\t" \ |
961 | " jl 20f\n\t" \ |
962 | " vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \ |
963 | " ahi %[R_TMP],-16\n\t" \ |
964 | " jl 19f\n\t" \ |
965 | " vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \ |
966 | "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \ |
967 | "20: \n\t" \ |
968 | ".machine pop" \ |
969 | : /* outputs */ [R_OUT] "+a" (outptr) \ |
970 | , [R_IN] "+a" (inptr) \ |
971 | , [R_TMP] "=a" (tmp) \ |
972 | , [R_TMP2] "=a" (tmp2) \ |
973 | , [R_LEN] "+d" (len) \ |
974 | : /* inputs */ \ |
975 | : /* clobber list*/ "memory", "cc" \ |
976 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
977 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
978 | ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ |
979 | ASM_CLOBBER_VR ("v22") \ |
980 | ); \ |
981 | if (len > 0) \ |
982 | { \ |
983 | /* Found an invalid character at next input-char. */ \ |
984 | BODY_ORIG_ERROR \ |
985 | } \ |
986 | } |
987 | # define LOOP_NEED_FLAGS |
988 | # include <iconv/loop.c> |
989 | # include <iconv/skeleton.c> |
990 | # undef BODY_ORIG |
991 | # undef BODY_ORIG_ERROR |
992 | ICONV_VX_IFUNC (__gconv_transform_ucs2reverse_internal) |
993 | |
994 | /* Convert from the internal (UCS4-like) format to UCS2. */ |
995 | #define DEFINE_INIT 0 |
996 | #define DEFINE_FINI 0 |
997 | #define MIN_NEEDED_FROM 4 |
998 | #define MIN_NEEDED_TO 2 |
999 | #define FROM_DIRECTION 1 |
1000 | #define FROM_LOOP ICONV_VX_NAME (internal_ucs2_loop) |
1001 | #define TO_LOOP ICONV_VX_NAME (internal_ucs2_loop) /* This is not used. */ |
1002 | #define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs2) |
1003 | #define ONE_DIRECTION 1 |
1004 | |
1005 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
1006 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
1007 | #define LOOPFCT FROM_LOOP |
1008 | #define BODY_ORIG \ |
1009 | { \ |
1010 | uint32_t val = *((const uint32_t *) inptr); \ |
1011 | \ |
1012 | if (__glibc_unlikely (val >= 0x10000)) \ |
1013 | { \ |
1014 | UNICODE_TAG_HANDLER (val, 4); \ |
1015 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
1016 | } \ |
1017 | else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ |
1018 | { \ |
1019 | /* Surrogate characters in UCS-4 input are not valid. \ |
1020 | We must catch this, because the UCS-2 output might be \ |
1021 | interpreted as UTF-16 by other programs. If we let \ |
1022 | surrogates pass through, attackers could make a security \ |
1023 | hole exploit by synthesizing any desired plane 1-16 \ |
1024 | character. */ \ |
1025 | result = __GCONV_ILLEGAL_INPUT; \ |
1026 | if (! ignore_errors_p ()) \ |
1027 | break; \ |
1028 | inptr += 4; \ |
1029 | ++*irreversible; \ |
1030 | continue; \ |
1031 | } \ |
1032 | else \ |
1033 | { \ |
1034 | put16 (outptr, val); \ |
1035 | outptr += sizeof (uint16_t); \ |
1036 | inptr += 4; \ |
1037 | } \ |
1038 | } |
1039 | # define BODY \ |
1040 | { \ |
1041 | if (__builtin_expect (inend - inptr < 32, 1) \ |
1042 | || outend - outptr < 16) \ |
1043 | /* Convert remaining bytes with c code. */ \ |
1044 | BODY_ORIG \ |
1045 | else \ |
1046 | { \ |
1047 | /* Convert in 32 byte blocks. */ \ |
1048 | size_t loop_count = (inend - inptr) / 32; \ |
1049 | size_t tmp, tmp2; \ |
1050 | if (loop_count > (outend - outptr) / 16) \ |
1051 | loop_count = (outend - outptr) / 16; \ |
1052 | __asm__ volatile (".machine push\n\t" \ |
1053 | ".machine \"z13\"\n\t" \ |
1054 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
1055 | CONVERT_32BIT_SIZE_T ([R_LI]) \ |
1056 | " larl %[R_I],3f\n\t" \ |
1057 | " vlm %%v20,%%v23,0(%[R_I])\n\t" \ |
1058 | "0: \n\t" \ |
1059 | " vlm %%v16,%%v17,0(%[R_IN])\n\t" \ |
1060 | /* Shorten UCS4 to UCS2. */ \ |
1061 | " vpkf %%v18,%%v16,%%v17\n\t" \ |
1062 | " vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \ |
1063 | " jno 11f\n\t" \ |
1064 | "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \ |
1065 | " jno 10f\n\t" \ |
1066 | /* Store 16bytes to buf_out. */ \ |
1067 | "2: vst %%v18,0(%[R_OUT])\n\t" \ |
1068 | " la %[R_IN],32(%[R_IN])\n\t" \ |
1069 | " la %[R_OUT],16(%[R_OUT])\n\t" \ |
1070 | " brctg %[R_LI],0b\n\t" \ |
1071 | " j 20f\n\t" \ |
1072 | /* Setup to check for ch >= 0xd800. (v20, v21) */ \ |
1073 | "3: .long 0xd800,0xd800,0x0,0x0\n\t" \ |
1074 | " .long 0xa0000000,0xa0000000,0x0,0x0\n\t" \ |
1075 | /* Setup to check for ch >= 0xe000 \ |
1076 | && ch < 0x10000. (v22,v23) */ \ |
1077 | " .long 0xe000,0x10000,0x0,0x0\n\t" \ |
1078 | " .long 0xa0000000,0x40000000,0x0,0x0\n\t" \ |
1079 | /* v16 contains only valid chars. Check in v17: \ |
1080 | ch >= 0xe000 && ch <= 0xffff. */ \ |
1081 | "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \ |
1082 | " jo 2b\n\t" /* All ch's in this range, proceed. */ \ |
1083 | " lghi %[R_TMP],16\n\t" \ |
1084 | " j 12f\n\t" \ |
1085 | /* Maybe v16 contains invalid chars. \ |
1086 | Check ch >= 0xe000 && ch <= 0xffff. */ \ |
1087 | "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \ |
1088 | " jo 1b\n\t" /* All ch's in this range, proceed. */ \ |
1089 | " lghi %[R_TMP],0\n\t" \ |
1090 | "12: vlgvb %[R_I],%%v19,7\n\t" \ |
1091 | " agr %[R_I],%[R_TMP]\n\t" \ |
1092 | " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ |
1093 | " srl %[R_I],1\n\t" \ |
1094 | " ahi %[R_I],-1\n\t" \ |
1095 | " jl 20f\n\t" \ |
1096 | " vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \ |
1097 | " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ |
1098 | "20:\n\t" \ |
1099 | ".machine pop" \ |
1100 | : /* outputs */ [R_OUT] "+a" (outptr) \ |
1101 | , [R_IN] "+a" (inptr) \ |
1102 | , [R_LI] "+d" (loop_count) \ |
1103 | , [R_I] "=a" (tmp2) \ |
1104 | , [R_TMP] "=d" (tmp) \ |
1105 | : /* inputs */ \ |
1106 | : /* clobber list*/ "memory", "cc" \ |
1107 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
1108 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
1109 | ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ |
1110 | ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ |
1111 | ); \ |
1112 | if (loop_count > 0) \ |
1113 | { \ |
1114 | /* Found an invalid character at next character. */ \ |
1115 | BODY_ORIG \ |
1116 | } \ |
1117 | } \ |
1118 | } |
1119 | #define LOOP_NEED_FLAGS |
1120 | #include <iconv/loop.c> |
1121 | #include <iconv/skeleton.c> |
1122 | # undef BODY_ORIG |
1123 | ICONV_VX_IFUNC (__gconv_transform_internal_ucs2) |
1124 | |
1125 | /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ |
1126 | #define DEFINE_INIT 0 |
1127 | #define DEFINE_FINI 0 |
1128 | #define MIN_NEEDED_FROM 4 |
1129 | #define MIN_NEEDED_TO 2 |
1130 | #define FROM_DIRECTION 1 |
1131 | #define FROM_LOOP ICONV_VX_NAME (internal_ucs2reverse_loop) |
1132 | #define TO_LOOP ICONV_VX_NAME (internal_ucs2reverse_loop)/* This is not used.*/ |
1133 | #define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs2reverse) |
1134 | #define ONE_DIRECTION 1 |
1135 | |
1136 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
1137 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
1138 | #define LOOPFCT FROM_LOOP |
1139 | #define BODY_ORIG \ |
1140 | { \ |
1141 | uint32_t val = *((const uint32_t *) inptr); \ |
1142 | if (__glibc_unlikely (val >= 0x10000)) \ |
1143 | { \ |
1144 | UNICODE_TAG_HANDLER (val, 4); \ |
1145 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
1146 | } \ |
1147 | else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ |
1148 | { \ |
1149 | /* Surrogate characters in UCS-4 input are not valid. \ |
1150 | We must catch this, because the UCS-2 output might be \ |
1151 | interpreted as UTF-16 by other programs. If we let \ |
1152 | surrogates pass through, attackers could make a security \ |
1153 | hole exploit by synthesizing any desired plane 1-16 \ |
1154 | character. */ \ |
1155 | if (! ignore_errors_p ()) \ |
1156 | { \ |
1157 | result = __GCONV_ILLEGAL_INPUT; \ |
1158 | break; \ |
1159 | } \ |
1160 | inptr += 4; \ |
1161 | ++*irreversible; \ |
1162 | continue; \ |
1163 | } \ |
1164 | else \ |
1165 | { \ |
1166 | put16 (outptr, bswap_16 (val)); \ |
1167 | outptr += sizeof (uint16_t); \ |
1168 | inptr += 4; \ |
1169 | } \ |
1170 | } |
1171 | # define BODY \ |
1172 | { \ |
1173 | if (__builtin_expect (inend - inptr < 32, 1) \ |
1174 | || outend - outptr < 16) \ |
1175 | /* Convert remaining bytes with c code. */ \ |
1176 | BODY_ORIG \ |
1177 | else \ |
1178 | { \ |
1179 | /* Convert in 32 byte blocks. */ \ |
1180 | size_t loop_count = (inend - inptr) / 32; \ |
1181 | size_t tmp, tmp2; \ |
1182 | if (loop_count > (outend - outptr) / 16) \ |
1183 | loop_count = (outend - outptr) / 16; \ |
1184 | __asm__ volatile (".machine push\n\t" \ |
1185 | ".machine \"z13\"\n\t" \ |
1186 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
1187 | CONVERT_32BIT_SIZE_T ([R_LI]) \ |
1188 | " larl %[R_I],3f\n\t" \ |
1189 | " vlm %%v20,%%v24,0(%[R_I])\n\t" \ |
1190 | "0: \n\t" \ |
1191 | " vlm %%v16,%%v17,0(%[R_IN])\n\t" \ |
1192 | /* Shorten UCS4 to UCS2 and byteswap. */ \ |
1193 | " vpkf %%v18,%%v16,%%v17\n\t" \ |
1194 | " vperm %%v18,%%v18,%%v18,%%v24\n\t" \ |
1195 | " vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \ |
1196 | " jno 11f\n\t" \ |
1197 | "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \ |
1198 | " jno 10f\n\t" \ |
1199 | /* Store 16bytes to buf_out. */ \ |
1200 | "2: vst %%v18,0(%[R_OUT])\n\t" \ |
1201 | " la %[R_IN],32(%[R_IN])\n\t" \ |
1202 | " la %[R_OUT],16(%[R_OUT])\n\t" \ |
1203 | " brctg %[R_LI],0b\n\t" \ |
1204 | " j 20f\n\t" \ |
1205 | /* Setup to check for ch >= 0xd800. (v20, v21) */ \ |
1206 | "3: .long 0xd800,0xd800,0x0,0x0\n\t" \ |
1207 | " .long 0xa0000000,0xa0000000,0x0,0x0\n\t" \ |
1208 | /* Setup to check for ch >= 0xe000 \ |
1209 | && ch < 0x10000. (v22,v23) */ \ |
1210 | " .long 0xe000,0x10000,0x0,0x0\n\t" \ |
1211 | " .long 0xa0000000,0x40000000,0x0,0x0\n\t" \ |
1212 | /* Vector permute mask (v24) */ \ |
1213 | " .short 0x0100,0x0302,0x0504,0x0706\n\t" \ |
1214 | " .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \ |
1215 | /* v16 contains only valid chars. Check in v17: \ |
1216 | ch >= 0xe000 && ch <= 0xffff. */ \ |
1217 | "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \ |
1218 | " jo 2b\n\t" /* All ch's in this range, proceed. */ \ |
1219 | " lghi %[R_TMP],16\n\t" \ |
1220 | " j 12f\n\t" \ |
1221 | /* Maybe v16 contains invalid chars. \ |
1222 | Check ch >= 0xe000 && ch <= 0xffff. */ \ |
1223 | "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \ |
1224 | " jo 1b\n\t" /* All ch's in this range, proceed. */ \ |
1225 | " lghi %[R_TMP],0\n\t" \ |
1226 | "12: vlgvb %[R_I],%%v19,7\n\t" \ |
1227 | " agr %[R_I],%[R_TMP]\n\t" \ |
1228 | " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ |
1229 | " srl %[R_I],1\n\t" \ |
1230 | " ahi %[R_I],-1\n\t" \ |
1231 | " jl 20f\n\t" \ |
1232 | " vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \ |
1233 | " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ |
1234 | "20:\n\t" \ |
1235 | ".machine pop" \ |
1236 | : /* outputs */ [R_OUT] "+a" (outptr) \ |
1237 | , [R_IN] "+a" (inptr) \ |
1238 | , [R_LI] "+d" (loop_count) \ |
1239 | , [R_I] "=a" (tmp2) \ |
1240 | , [R_TMP] "=d" (tmp) \ |
1241 | : /* inputs */ \ |
1242 | : /* clobber list*/ "memory", "cc" \ |
1243 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
1244 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
1245 | ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ |
1246 | ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ |
1247 | ASM_CLOBBER_VR ("v24") \ |
1248 | ); \ |
1249 | if (loop_count > 0) \ |
1250 | { \ |
1251 | /* Found an invalid character at next character. */ \ |
1252 | BODY_ORIG \ |
1253 | } \ |
1254 | } \ |
1255 | } |
1256 | #define LOOP_NEED_FLAGS |
1257 | #include <iconv/loop.c> |
1258 | #include <iconv/skeleton.c> |
1259 | # undef BODY_ORIG |
1260 | ICONV_VX_IFUNC (__gconv_transform_internal_ucs2reverse) |
1261 | |
1262 | |
1263 | #else |
1264 | /* Generate the internal transformations without ifunc if build environment |
1265 | lacks vector support. Instead simply include the common version. */ |
1266 | # include <iconv/gconv_simple.c> |
1267 | #endif /* !defined HAVE_S390_VX_ASM_SUPPORT */ |
1268 | |