1/* Conversion between UTF-16 and UTF-32 BE/internal.
2
3 This module uses the Z9-109 variants of the Convert Unicode
4 instructions.
5 Copyright (C) 1997-2022 Free Software Foundation, Inc.
6
7 This is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 This is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <https://www.gnu.org/licenses/>. */
20
21#include <dlfcn.h>
22#include <stdint.h>
23#include <unistd.h>
24#include <gconv.h>
25#include <string.h>
26
27/* Select which versions should be defined depending on support
28 for multiarch, vector and used minimum architecture level. */
29#define HAVE_FROM_C 1
30#define FROM_LOOP_DEFAULT FROM_LOOP_C
31#define HAVE_TO_C 1
32#define TO_LOOP_DEFAULT TO_LOOP_C
33
34#if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH
35# define HAVE_FROM_VX 1
36# define HAVE_FROM_VX_CU 1
37# define HAVE_TO_VX 1
38# define HAVE_TO_VX_CU 1
39#else
40# define HAVE_FROM_VX 0
41# define HAVE_FROM_VX_CU 0
42# define HAVE_TO_VX 0
43# define HAVE_TO_VX_CU 0
44#endif
45
46#if defined HAVE_S390_VX_GCC_SUPPORT
47# define ASM_CLOBBER_VR(NR) , NR
48#else
49# define ASM_CLOBBER_VR(NR)
50#endif
51
52#if defined __s390x__
53# define CONVERT_32BIT_SIZE_T(REG)
54#else
55# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t"
56#endif
57
58/* UTF-32 big endian byte order mark. */
59#define BOM_UTF32 0x0000feffu
60
61/* UTF-16 big endian byte order mark. */
62#define BOM_UTF16 0xfeff
63
64#define DEFINE_INIT 0
65#define DEFINE_FINI 0
66#define MIN_NEEDED_FROM 2
67#define MAX_NEEDED_FROM 4
68#define MIN_NEEDED_TO 4
69#define FROM_LOOP FROM_LOOP_DEFAULT
70#define TO_LOOP TO_LOOP_DEFAULT
71#define FROM_DIRECTION (dir == from_utf16)
72#define ONE_DIRECTION 0
73
74/* Direction of the transformation. */
75enum direction
76{
77 illegal_dir,
78 to_utf16,
79 from_utf16
80};
81
82struct utf16_data
83{
84 enum direction dir;
85 int emit_bom;
86};
87
88
89extern int gconv_init (struct __gconv_step *step);
90int
91gconv_init (struct __gconv_step *step)
92{
93 /* Determine which direction. */
94 struct utf16_data *new_data;
95 enum direction dir = illegal_dir;
96 int emit_bom;
97 int result;
98
99 emit_bom = (__strcasecmp (s1: step->__to_name, s2: "UTF-32//") == 0
100 || __strcasecmp (s1: step->__to_name, s2: "UTF-16//") == 0);
101
102 if (__strcasecmp (s1: step->__from_name, s2: "UTF-16BE//") == 0
103 && (__strcasecmp (s1: step->__to_name, s2: "UTF-32//") == 0
104 || __strcasecmp (s1: step->__to_name, s2: "UTF-32BE//") == 0
105 || __strcasecmp (s1: step->__to_name, s2: "INTERNAL") == 0))
106 {
107 dir = from_utf16;
108 }
109 else if ((__strcasecmp (s1: step->__to_name, s2: "UTF-16//") == 0
110 || __strcasecmp (s1: step->__to_name, s2: "UTF-16BE//") == 0)
111 && (__strcasecmp (s1: step->__from_name, s2: "UTF-32BE//") == 0
112 || __strcasecmp (s1: step->__from_name, s2: "INTERNAL") == 0))
113 {
114 dir = to_utf16;
115 }
116
117 result = __GCONV_NOCONV;
118 if (dir != illegal_dir)
119 {
120 new_data = (struct utf16_data *) malloc (size: sizeof (struct utf16_data));
121
122 result = __GCONV_NOMEM;
123 if (new_data != NULL)
124 {
125 new_data->dir = dir;
126 new_data->emit_bom = emit_bom;
127 step->__data = new_data;
128
129 if (dir == from_utf16)
130 {
131 step->__min_needed_from = MIN_NEEDED_FROM;
132 step->__max_needed_from = MIN_NEEDED_FROM;
133 step->__min_needed_to = MIN_NEEDED_TO;
134 step->__max_needed_to = MIN_NEEDED_TO;
135 }
136 else
137 {
138 step->__min_needed_from = MIN_NEEDED_TO;
139 step->__max_needed_from = MIN_NEEDED_TO;
140 step->__min_needed_to = MIN_NEEDED_FROM;
141 step->__max_needed_to = MIN_NEEDED_FROM;
142 }
143
144 step->__stateful = 0;
145
146 result = __GCONV_OK;
147 }
148 }
149
150 return result;
151}
152
153
154extern void gconv_end (struct __gconv_step *data);
155void
156gconv_end (struct __gconv_step *data)
157{
158 free (ptr: data->__data);
159}
160
161#define PREPARE_LOOP \
162 enum direction dir = ((struct utf16_data *) step->__data)->dir; \
163 int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
164 \
165 if (emit_bom && !data->__internal_use \
166 && data->__invocation_counter == 0) \
167 { \
168 if (dir == to_utf16) \
169 { \
170 /* Emit the UTF-16 Byte Order Mark. */ \
171 if (__glibc_unlikely (outbuf + 2 > outend)) \
172 return __GCONV_FULL_OUTPUT; \
173 \
174 put16u (outbuf, BOM_UTF16); \
175 outbuf += 2; \
176 } \
177 else \
178 { \
179 /* Emit the UTF-32 Byte Order Mark. */ \
180 if (__glibc_unlikely (outbuf + 4 > outend)) \
181 return __GCONV_FULL_OUTPUT; \
182 \
183 put32u (outbuf, BOM_UTF32); \
184 outbuf += 4; \
185 } \
186 }
187
188/* Conversion function from UTF-16 to UTF-32 internal/BE. */
189
190#if HAVE_FROM_C == 1
191/* The software routine is copied from utf-16.c (minus bytes
192 swapping). */
193# define BODY_FROM_C \
194 { \
195 uint16_t u1 = get16 (inptr); \
196 \
197 if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
198 { \
199 /* No surrogate. */ \
200 put32 (outptr, u1); \
201 inptr += 2; \
202 } \
203 else \
204 { \
205 /* An isolated low-surrogate was found. This has to be \
206 considered ill-formed. */ \
207 if (__glibc_unlikely (u1 >= 0xdc00)) \
208 { \
209 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
210 } \
211 /* It's a surrogate character. At least the first word says \
212 it is. */ \
213 if (__glibc_unlikely (inptr + 4 > inend)) \
214 { \
215 /* We don't have enough input for another complete input \
216 character. */ \
217 result = __GCONV_INCOMPLETE_INPUT; \
218 break; \
219 } \
220 \
221 inptr += 2; \
222 uint16_t u2 = get16 (inptr); \
223 if (__builtin_expect (u2 < 0xdc00, 0) \
224 || __builtin_expect (u2 > 0xdfff, 0)) \
225 { \
226 /* This is no valid second word for a surrogate. */ \
227 inptr -= 2; \
228 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
229 } \
230 \
231 put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
232 inptr += 2; \
233 } \
234 outptr += 4; \
235 }
236
237
238/* Generate loop-function with software routing. */
239# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
240# define MAX_NEEDED_INPUT MAX_NEEDED_FROM
241# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
242# define FROM_LOOP_C __from_utf16_loop_c
243# define LOOPFCT FROM_LOOP_C
244# define LOOP_NEED_FLAGS
245# define BODY BODY_FROM_C
246# include <iconv/loop.c>
247#else
248# define FROM_LOOP_C NULL
249#endif /* HAVE_FROM_C != 1 */
250
251#if HAVE_FROM_VX == 1
252# define BODY_FROM_VX \
253 { \
254 size_t inlen = inend - inptr; \
255 size_t outlen = outend - outptr; \
256 unsigned long tmp, tmp2, tmp3; \
257 asm volatile (".machine push\n\t" \
258 ".machine \"z13\"\n\t" \
259 ".machinemode \"zarch_nohighgprs\"\n\t" \
260 /* Setup to check for surrogates. */ \
261 " larl %[R_TMP],9f\n\t" \
262 " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \
263 CONVERT_32BIT_SIZE_T ([R_INLEN]) \
264 CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
265 /* Loop which handles UTF-16 chars <0xd800, >0xdfff. */ \
266 "0: clgijl %[R_INLEN],16,2f\n\t" \
267 " clgijl %[R_OUTLEN],32,2f\n\t" \
268 "1: vl %%v16,0(%[R_IN])\n\t" \
269 /* Check for surrogate chars. */ \
270 " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \
271 " jno 10f\n\t" \
272 /* Enlarge to UTF-32. */ \
273 " vuplhh %%v17,%%v16\n\t" \
274 " la %[R_IN],16(%[R_IN])\n\t" \
275 " vupllh %%v18,%%v16\n\t" \
276 " aghi %[R_INLEN],-16\n\t" \
277 /* Store 32 bytes to buf_out. */ \
278 " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \
279 " aghi %[R_OUTLEN],-32\n\t" \
280 " la %[R_OUT],32(%[R_OUT])\n\t" \
281 " clgijl %[R_INLEN],16,2f\n\t" \
282 " clgijl %[R_OUTLEN],32,2f\n\t" \
283 " j 1b\n\t" \
284 /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31) */ \
285 "9: .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
286 " .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
287 /* At least one uint16_t is in range of surrogates. \
288 Store the preceding chars. */ \
289 "10: vlgvb %[R_TMP],%%v19,7\n\t" \
290 " vuplhh %%v17,%%v16\n\t" \
291 " sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \
292 " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \
293 " jl 12f\n\t" \
294 " vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t" \
295 " vupllh %%v18,%%v16\n\t" \
296 " ahi %[R_TMP2],-16\n\t" \
297 " jl 11f\n\t" \
298 " vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t" \
299 "11: \n\t" /* Update pointers. */ \
300 " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
301 " slgr %[R_INLEN],%[R_TMP]\n\t" \
302 " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \
303 " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \
304 /* Calculate remaining uint16_t values in loaded vrs. */ \
305 "12: lghi %[R_TMP2],16\n\t" \
306 " slgr %[R_TMP2],%[R_TMP]\n\t" \
307 " srl %[R_TMP2],1\n\t" \
308 " llh %[R_TMP],0(%[R_IN])\n\t" \
309 " aghi %[R_OUTLEN],-4\n\t" \
310 " j 16f\n\t" \
311 /* Handle remaining bytes. */ \
312 "2: \n\t" \
313 /* Zero, one or more bytes available? */ \
314 " clgfi %[R_INLEN],1\n\t" \
315 " je 97f\n\t" /* Only one byte available. */ \
316 " jl 99f\n\t" /* End if no bytes available. */ \
317 /* Calculate remaining uint16_t values in inptr. */ \
318 " srlg %[R_TMP2],%[R_INLEN],1\n\t" \
319 /* Handle remaining uint16_t values. */ \
320 "13: llh %[R_TMP],0(%[R_IN])\n\t" \
321 " slgfi %[R_OUTLEN],4\n\t" \
322 " jl 96f \n\t" \
323 " clfi %[R_TMP],0xd800\n\t" \
324 " jhe 15f\n\t" \
325 "14: st %[R_TMP],0(%[R_OUT])\n\t" \
326 " la %[R_IN],2(%[R_IN])\n\t" \
327 " aghi %[R_INLEN],-2\n\t" \
328 " la %[R_OUT],4(%[R_OUT])\n\t" \
329 " brctg %[R_TMP2],13b\n\t" \
330 " j 0b\n\t" /* Switch to vx-loop. */ \
331 /* Handle UTF-16 surrogate pair. */ \
332 "15: clfi %[R_TMP],0xdfff\n\t" \
333 " jh 14b\n\t" /* Jump away if ch > 0xdfff. */ \
334 "16: clfi %[R_TMP],0xdc00\n\t" \
335 " jhe 98f\n\t" /* Jump away in case of low-surrogate. */ \
336 " slgfi %[R_INLEN],4\n\t" \
337 " jl 97f\n\t" /* Big enough input? */ \
338 " llh %[R_TMP3],2(%[R_IN])\n\t" /* Load low surrogate. */ \
339 " slfi %[R_TMP],0xd7c0\n\t" \
340 " sll %[R_TMP],10\n\t" \
341 " risbgn %[R_TMP],%[R_TMP3],54,63,0\n\t" /* Insert klmnopqrst. */ \
342 " nilf %[R_TMP3],0xfc00\n\t" \
343 " clfi %[R_TMP3],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \
344 " jne 98f\n\t" \
345 " st %[R_TMP],0(%[R_OUT])\n\t" \
346 " la %[R_IN],4(%[R_IN])\n\t" \
347 " la %[R_OUT],4(%[R_OUT])\n\t" \
348 " aghi %[R_TMP2],-2\n\t" \
349 " jh 13b\n\t" /* Handle remaining uint16_t values. */ \
350 " j 0b\n\t" /* Switch to vx-loop. */ \
351 "96: \n\t" /* Return full output. */ \
352 " lghi %[R_RES],%[RES_OUT_FULL]\n\t" \
353 " j 99f\n\t" \
354 "97: \n\t" /* Return incomplete input. */ \
355 " lghi %[R_RES],%[RES_IN_FULL]\n\t" \
356 " j 99f\n\t" \
357 "98:\n\t" /* Return Illegal character. */ \
358 " lghi %[R_RES],%[RES_IN_ILL]\n\t" \
359 "99:\n\t" \
360 ".machine pop" \
361 : /* outputs */ [R_IN] "+a" (inptr) \
362 , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \
363 , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \
364 , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \
365 , [R_RES] "+d" (result) \
366 : /* inputs */ \
367 [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
368 , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
369 , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \
370 : /* clobber list */ "memory", "cc" \
371 ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
372 ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
373 ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \
374 ); \
375 if (__glibc_likely (inptr == inend) \
376 || result != __GCONV_ILLEGAL_INPUT) \
377 break; \
378 \
379 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
380 }
381
382/* Generate loop-function with hardware vector instructions. */
383# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
384# define MAX_NEEDED_INPUT MAX_NEEDED_FROM
385# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
386# define FROM_LOOP_VX __from_utf16_loop_vx
387# define LOOPFCT FROM_LOOP_VX
388# define LOOP_NEED_FLAGS
389# define BODY BODY_FROM_VX
390# include <iconv/loop.c>
391#else
392# define FROM_LOOP_VX NULL
393#endif /* HAVE_FROM_VX != 1 */
394
395#if HAVE_FROM_VX_CU == 1
396#define BODY_FROM_VX_CU \
397 { \
398 register const unsigned char* pInput asm ("8") = inptr; \
399 register size_t inlen asm ("9") = inend - inptr; \
400 register unsigned char* pOutput asm ("10") = outptr; \
401 register size_t outlen asm ("11") = outend - outptr; \
402 unsigned long tmp, tmp2, tmp3; \
403 asm volatile (".machine push\n\t" \
404 ".machine \"z13\"\n\t" \
405 ".machinemode \"zarch_nohighgprs\"\n\t" \
406 /* Setup to check for surrogates. */ \
407 " larl %[R_TMP],9f\n\t" \
408 " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \
409 CONVERT_32BIT_SIZE_T ([R_INLEN]) \
410 CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
411 /* Loop which handles UTF-16 chars <0xd800, >0xdfff. */ \
412 "0: clgijl %[R_INLEN],16,20f\n\t" \
413 " clgijl %[R_OUTLEN],32,20f\n\t" \
414 "1: vl %%v16,0(%[R_IN])\n\t" \
415 /* Check for surrogate chars. */ \
416 " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \
417 " jno 10f\n\t" \
418 /* Enlarge to UTF-32. */ \
419 " vuplhh %%v17,%%v16\n\t" \
420 " la %[R_IN],16(%[R_IN])\n\t" \
421 " vupllh %%v18,%%v16\n\t" \
422 " aghi %[R_INLEN],-16\n\t" \
423 /* Store 32 bytes to buf_out. */ \
424 " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \
425 " aghi %[R_OUTLEN],-32\n\t" \
426 " la %[R_OUT],32(%[R_OUT])\n\t" \
427 " clgijl %[R_INLEN],16,20f\n\t" \
428 " clgijl %[R_OUTLEN],32,20f\n\t" \
429 " j 1b\n\t" \
430 /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31) */ \
431 "9: .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
432 " .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
433 /* At least one uint16_t is in range of surrogates. \
434 Store the preceding chars. */ \
435 "10: vlgvb %[R_TMP],%%v19,7\n\t" \
436 " vuplhh %%v17,%%v16\n\t" \
437 " sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \
438 " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \
439 " jl 20f\n\t" \
440 " vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t" \
441 " vupllh %%v18,%%v16\n\t" \
442 " ahi %[R_TMP2],-16\n\t" \
443 " jl 11f\n\t" \
444 " vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t" \
445 "11: \n\t" /* Update pointers. */ \
446 " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
447 " slgr %[R_INLEN],%[R_TMP]\n\t" \
448 " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \
449 " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \
450 /* Handles UTF16 surrogates with convert instruction. */ \
451 "20: cu24 %[R_OUT],%[R_IN],1\n\t" \
452 " jo 0b\n\t" /* Try vector implemenation again. */ \
453 " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \
454 " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \
455 ".machine pop" \
456 : /* outputs */ [R_IN] "+a" (pInput) \
457 , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \
458 , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \
459 , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \
460 , [R_RES] "+d" (result) \
461 : /* inputs */ \
462 [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
463 , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
464 : /* clobber list */ "memory", "cc" \
465 ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
466 ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
467 ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \
468 ); \
469 inptr = pInput; \
470 outptr = pOutput; \
471 \
472 if (__glibc_likely (inlen == 0) \
473 || result == __GCONV_FULL_OUTPUT) \
474 break; \
475 if (inlen == 1) \
476 { \
477 /* Input does not contain a complete utf16 character. */ \
478 result = __GCONV_INCOMPLETE_INPUT; \
479 break; \
480 } \
481 else if (result != __GCONV_ILLEGAL_INPUT) \
482 { \
483 /* Input is >= 2 and < 4 bytes (as cu24 would have processed \
484 a possible next utf16 character) and not illegal. \
485 => we have a single high surrogate at end of input. */ \
486 result = __GCONV_INCOMPLETE_INPUT; \
487 break; \
488 } \
489 \
490 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
491 }
492
493/* Generate loop-function with hardware vector and utf-convert instructions. */
494# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
495# define MAX_NEEDED_INPUT MAX_NEEDED_FROM
496# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
497# define FROM_LOOP_VX_CU __from_utf16_loop_vx_cu
498# define LOOPFCT FROM_LOOP_VX_CU
499# define LOOP_NEED_FLAGS
500# define BODY BODY_FROM_VX_CU
501# include <iconv/loop.c>
502#else
503# define FROM_LOOP_VX_CU NULL
504#endif /* HAVE_FROM_VX_CU != 1 */
505
506/* Conversion from UTF-32 internal/BE to UTF-16. */
507
508#if HAVE_TO_C == 1
509/* The software routine is copied from utf-16.c (minus bytes
510 swapping). */
511# define BODY_TO_C \
512 { \
513 uint32_t c = get32 (inptr); \
514 \
515 if (__builtin_expect (c <= 0xd7ff, 1) \
516 || (c > 0xdfff && c <= 0xffff)) \
517 { \
518 /* Two UTF-16 chars. */ \
519 put16 (outptr, c); \
520 } \
521 else if (__builtin_expect (c >= 0x10000, 1) \
522 && __builtin_expect (c <= 0x10ffff, 1)) \
523 { \
524 /* Four UTF-16 chars. */ \
525 uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
526 uint16_t out; \
527 \
528 /* Generate a surrogate character. */ \
529 if (__glibc_unlikely (outptr + 4 > outend)) \
530 { \
531 /* Overflow in the output buffer. */ \
532 result = __GCONV_FULL_OUTPUT; \
533 break; \
534 } \
535 \
536 out = 0xd800; \
537 out |= (zabcd & 0xff) << 6; \
538 out |= (c >> 10) & 0x3f; \
539 put16 (outptr, out); \
540 outptr += 2; \
541 \
542 out = 0xdc00; \
543 out |= c & 0x3ff; \
544 put16 (outptr, out); \
545 } \
546 else \
547 { \
548 STANDARD_TO_LOOP_ERR_HANDLER (4); \
549 } \
550 outptr += 2; \
551 inptr += 4; \
552 }
553
554/* Generate loop-function with software routing. */
555# define MIN_NEEDED_INPUT MIN_NEEDED_TO
556# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
557# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
558# define TO_LOOP_C __to_utf16_loop_c
559# define LOOPFCT TO_LOOP_C
560# define LOOP_NEED_FLAGS
561# define BODY BODY_TO_C
562# include <iconv/loop.c>
563#else
564# define TO_LOOP_C NULL
565#endif /* HAVE_TO_C != 1 */
566
567#if HAVE_TO_VX == 1
568# define BODY_TO_VX \
569 { \
570 size_t inlen = inend - inptr; \
571 size_t outlen = outend - outptr; \
572 unsigned long tmp, tmp2, tmp3; \
573 asm volatile (".machine push\n\t" \
574 ".machine \"z13\"\n\t" \
575 ".machinemode \"zarch_nohighgprs\"\n\t" \
576 /* Setup to check for surrogates. */ \
577 " larl %[R_TMP],9f\n\t" \
578 " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \
579 CONVERT_32BIT_SIZE_T ([R_INLEN]) \
580 CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
581 /* Loop which handles UTF-32 chars \
582 ch < 0xd800 || (ch > 0xdfff && ch < 0x10000). */ \
583 "0: clgijl %[R_INLEN],32,2f\n\t" \
584 " clgijl %[R_OUTLEN],16,2f\n\t" \
585 "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \
586 " lghi %[R_TMP2],0\n\t" \
587 /* Shorten to UTF-16. */ \
588 " vpkf %%v18,%%v16,%%v17\n\t" \
589 /* Check for surrogate chars. */ \
590 " vstrcfs %%v19,%%v16,%%v30,%%v31\n\t" \
591 " jno 10f\n\t" \
592 " vstrcfs %%v19,%%v17,%%v30,%%v31\n\t" \
593 " jno 11f\n\t" \
594 /* Store 16 bytes to buf_out. */ \
595 " vst %%v18,0(%[R_OUT])\n\t" \
596 " la %[R_IN],32(%[R_IN])\n\t" \
597 " aghi %[R_INLEN],-32\n\t" \
598 " aghi %[R_OUTLEN],-16\n\t" \
599 " la %[R_OUT],16(%[R_OUT])\n\t" \
600 " clgijl %[R_INLEN],32,2f\n\t" \
601 " clgijl %[R_OUTLEN],16,2f\n\t" \
602 " j 1b\n\t" \
603 /* Calculate remaining uint32_t values in inptr. */ \
604 "2: \n\t" \
605 " clgije %[R_INLEN],0,99f\n\t" \
606 " clgijl %[R_INLEN],4,92f\n\t" \
607 " srlg %[R_TMP2],%[R_INLEN],2\n\t" \
608 " j 20f\n\t" \
609 /* Setup to check for ch >= 0xd800 && ch <= 0xdfff \
610 and check for ch >= 0x10000. (v30, v31) */ \
611 "9: .long 0xd800,0xdfff,0x10000,0x10000\n\t" \
612 " .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \
613 /* At least on UTF32 char is in range of surrogates. \
614 Store the preceding characters. */ \
615 "11: ahi %[R_TMP2],16\n\t" \
616 "10: vlgvb %[R_TMP],%%v19,7\n\t" \
617 " agr %[R_TMP],%[R_TMP2]\n\t" \
618 " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \
619 " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \
620 " jl 12f\n\t" \
621 " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \
622 /* Update pointers. */ \
623 " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
624 " slgr %[R_INLEN],%[R_TMP]\n\t" \
625 " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \
626 " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \
627 /* Calculate remaining uint32_t values in vrs. */ \
628 "12: lghi %[R_TMP2],8\n\t" \
629 " srlg %[R_TMP3],%[R_TMP3],1\n\t" \
630 " slgr %[R_TMP2],%[R_TMP3]\n\t" \
631 /* Handle remaining UTF-32 characters. */ \
632 "20: l %[R_TMP],0(%[R_IN])\n\t" \
633 " aghi %[R_INLEN],-4\n\t" \
634 /* Test if ch is 2byte UTF-16 char. */ \
635 " clfi %[R_TMP],0xffff\n\t" \
636 " jh 21f\n\t" \
637 /* Handle 2 byte UTF16 char. */ \
638 " lgr %[R_TMP3],%[R_TMP]\n\t" \
639 " nilf %[R_TMP],0xf800\n\t" \
640 " clfi %[R_TMP],0xd800\n\t" \
641 " je 91f\n\t" /* Do not accept UTF-16 surrogates. */ \
642 " slgfi %[R_OUTLEN],2\n\t" \
643 " jl 90f \n\t" \
644 " sth %[R_TMP3],0(%[R_OUT])\n\t" \
645 " la %[R_IN],4(%[R_IN])\n\t" \
646 " la %[R_OUT],2(%[R_OUT])\n\t" \
647 " brctg %[R_TMP2],20b\n\t" \
648 " j 0b\n\t" /* Switch to vx-loop. */ \
649 /* Test if ch is 4byte UTF-16 char. */ \
650 "21: clfi %[R_TMP],0x10ffff\n\t" \
651 " jh 91f\n\t" /* ch > 0x10ffff is not allowed! */ \
652 /* Handle 4 byte UTF16 char. */ \
653 " slgfi %[R_OUTLEN],4\n\t" \
654 " jl 90f \n\t" \
655 " slfi %[R_TMP],0x10000\n\t" /* zabcd = uvwxy - 1. */ \
656 " llilf %[R_TMP3],0xd800dc00\n\t" \
657 " la %[R_IN],4(%[R_IN])\n\t" \
658 " risbgn %[R_TMP3],%[R_TMP],38,47,6\n\t" /* High surrogate. */ \
659 " risbgn %[R_TMP3],%[R_TMP],54,63,0\n\t" /* Low surrogate. */ \
660 " st %[R_TMP3],0(%[R_OUT])\n\t" \
661 " la %[R_OUT],4(%[R_OUT])\n\t" \
662 " brctg %[R_TMP2],20b\n\t" \
663 " j 0b\n\t" /* Switch to vx-loop. */ \
664 "92: lghi %[R_RES],%[RES_IN_FULL]\n\t" \
665 " j 99f\n\t" \
666 "91: lghi %[R_RES],%[RES_IN_ILL]\n\t" \
667 " j 99f\n\t" \
668 "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \
669 "99: \n\t" \
670 ".machine pop" \
671 : /* outputs */ [R_IN] "+a" (inptr) \
672 , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \
673 , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \
674 , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \
675 , [R_RES] "+d" (result) \
676 : /* inputs */ \
677 [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
678 , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
679 , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \
680 : /* clobber list */ "memory", "cc" \
681 ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
682 ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
683 ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \
684 ); \
685 if (__glibc_likely (inptr == inend) \
686 || result != __GCONV_ILLEGAL_INPUT) \
687 break; \
688 \
689 STANDARD_TO_LOOP_ERR_HANDLER (4); \
690 }
691
692/* Generate loop-function with hardware vector instructions. */
693# define MIN_NEEDED_INPUT MIN_NEEDED_TO
694# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
695# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
696# define TO_LOOP_VX __to_utf16_loop_vx
697# define LOOPFCT TO_LOOP_VX
698# define LOOP_NEED_FLAGS
699# define BODY BODY_TO_VX
700# include <iconv/loop.c>
701#else
702# define TO_LOOP_VX NULL
703#endif /* HAVE_TO_VX != 1 */
704
705#if HAVE_TO_VX_CU == 1
706#define BODY_TO_VX_CU \
707 { \
708 register const unsigned char* pInput asm ("8") = inptr; \
709 register size_t inlen asm ("9") = inend - inptr; \
710 register unsigned char* pOutput asm ("10") = outptr; \
711 register size_t outlen asm ("11") = outend - outptr; \
712 unsigned long tmp, tmp2, tmp3; \
713 asm volatile (".machine push\n\t" \
714 ".machine \"z13\"\n\t" \
715 ".machinemode \"zarch_nohighgprs\"\n\t" \
716 /* Setup to check for surrogates. */ \
717 " larl %[R_TMP],9f\n\t" \
718 " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \
719 CONVERT_32BIT_SIZE_T ([R_INLEN]) \
720 CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
721 /* Loop which handles UTF-32 chars \
722 ch < 0xd800 || (ch > 0xdfff && ch < 0x10000). */ \
723 "0: clgijl %[R_INLEN],32,20f\n\t" \
724 " clgijl %[R_OUTLEN],16,20f\n\t" \
725 "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \
726 " lghi %[R_TMP2],0\n\t" \
727 /* Shorten to UTF-16. */ \
728 " vpkf %%v18,%%v16,%%v17\n\t" \
729 /* Check for surrogate chars. */ \
730 " vstrcfs %%v19,%%v16,%%v30,%%v31\n\t" \
731 " jno 10f\n\t" \
732 " vstrcfs %%v19,%%v17,%%v30,%%v31\n\t" \
733 " jno 11f\n\t" \
734 /* Store 16 bytes to buf_out. */ \
735 " vst %%v18,0(%[R_OUT])\n\t" \
736 " la %[R_IN],32(%[R_IN])\n\t" \
737 " aghi %[R_INLEN],-32\n\t" \
738 " aghi %[R_OUTLEN],-16\n\t" \
739 " la %[R_OUT],16(%[R_OUT])\n\t" \
740 " clgijl %[R_INLEN],32,20f\n\t" \
741 " clgijl %[R_OUTLEN],16,20f\n\t" \
742 " j 1b\n\t" \
743 /* Setup to check for ch >= 0xd800 && ch <= 0xdfff \
744 and check for ch >= 0x10000. (v30, v31) */ \
745 "9: .long 0xd800,0xdfff,0x10000,0x10000\n\t" \
746 " .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \
747 /* At least one UTF32 char is in range of surrogates. \
748 Store the preceding characters. */ \
749 "11: ahi %[R_TMP2],16\n\t" \
750 "10: vlgvb %[R_TMP],%%v19,7\n\t" \
751 " agr %[R_TMP],%[R_TMP2]\n\t" \
752 " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \
753 " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \
754 " jl 20f\n\t" \
755 " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \
756 /* Update pointers. */ \
757 " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
758 " slgr %[R_INLEN],%[R_TMP]\n\t" \
759 " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \
760 " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \
761 /* Handles UTF16 surrogates with convert instruction. */ \
762 "20: cu42 %[R_OUT],%[R_IN]\n\t" \
763 " jo 0b\n\t" /* Try vector implemenation again. */ \
764 " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \
765 " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \
766 ".machine pop" \
767 : /* outputs */ [R_IN] "+a" (pInput) \
768 , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \
769 , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \
770 , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \
771 , [R_RES] "+d" (result) \
772 : /* inputs */ \
773 [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
774 , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
775 : /* clobber list */ "memory", "cc" \
776 ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
777 ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
778 ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \
779 ); \
780 inptr = pInput; \
781 outptr = pOutput; \
782 \
783 if (__glibc_likely (inlen == 0) \
784 || result == __GCONV_FULL_OUTPUT) \
785 break; \
786 if (inlen < 4) \
787 { \
788 result = __GCONV_INCOMPLETE_INPUT; \
789 break; \
790 } \
791 \
792 STANDARD_TO_LOOP_ERR_HANDLER (4); \
793 }
794
795/* Generate loop-function with hardware vector and utf-convert instructions. */
796# define MIN_NEEDED_INPUT MIN_NEEDED_TO
797# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
798# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
799# define TO_LOOP_VX_CU __to_utf16_loop_vx_cu
800# define LOOPFCT TO_LOOP_VX_CU
801# define LOOP_NEED_FLAGS
802# define BODY BODY_TO_VX_CU
803# include <iconv/loop.c>
804#else
805# define TO_LOOP_VX_CU NULL
806#endif /* HAVE_TO_VX_CU != 1 */
807
808/* This file also exists in sysdeps/s390/multiarch/ which
809 generates ifunc resolvers for FROM/TO_LOOP functions
810 and includes iconv/skeleton.c afterwards. */
811#if ! defined USE_MULTIARCH
812# include <iconv/skeleton.c>
813#endif
814

source code of glibc/sysdeps/s390/utf16-utf32-z9.c