1 | /* Copyright (C) 2000-2024 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | /* Create a table from Unicode to CHARSET. |
19 | This is a good test for CHARSET's iconv() module, in particular the |
20 | TO_LOOP BODY macro. */ |
21 | |
22 | #include <stddef.h> |
23 | #include <stdio.h> |
24 | #include <stdlib.h> |
25 | #include <string.h> |
26 | #include <iconv.h> |
27 | #include <errno.h> |
28 | |
29 | int |
30 | main (int argc, char *argv[]) |
31 | { |
32 | const char *charset; |
33 | iconv_t cd; |
34 | int bmp_only; |
35 | int no_tags; |
36 | |
37 | if (argc != 2) |
38 | { |
39 | fprintf (stderr, format: "Usage: tst-table-to charset\n" ); |
40 | return 1; |
41 | } |
42 | charset = argv[1]; |
43 | |
44 | cd = iconv_open (tocode: charset, fromcode: "UTF-8" ); |
45 | if (cd == (iconv_t)(-1)) |
46 | { |
47 | perror (s: "iconv_open" ); |
48 | return 1; |
49 | } |
50 | |
51 | /* When testing UTF-8, stop at 0x10000, otherwise the output |
52 | file gets too big. */ |
53 | bmp_only = (strcmp (s1: charset, s2: "UTF-8" ) == 0); |
54 | /* When testing any encoding other than UTF-8 or GB18030, stop at 0xE0000, |
55 | because the conversion drops Unicode tag characters (range |
56 | U+E0000..U+E007F). */ |
57 | no_tags = !(strcmp (s1: charset, s2: "UTF-8" ) == 0 |
58 | || strcmp (s1: charset, s2: "GB18030" ) == 0); |
59 | |
60 | { |
61 | unsigned int i; |
62 | unsigned char buf[10]; |
63 | for (i = 0; i < (bmp_only ? 0x10000 : no_tags ? 0xE0000 : 0x110000); i++) |
64 | { |
65 | unsigned char in[6]; |
66 | unsigned int incount = |
67 | (i < 0x80 ? (in[0] = i, 1) |
68 | : i < 0x800 ? (in[0] = 0xc0 | (i >> 6), |
69 | in[1] = 0x80 | (i & 0x3f), 2) |
70 | : i < 0x10000 ? (in[0] = 0xe0 | (i >> 12), |
71 | in[1] = 0x80 | ((i >> 6) & 0x3f), |
72 | in[2] = 0x80 | (i & 0x3f), 3) |
73 | : /* i < 0x200000 */ (in[0] = 0xf0 | (i >> 18), |
74 | in[1] = 0x80 | ((i >> 12) & 0x3f), |
75 | in[2] = 0x80 | ((i >> 6) & 0x3f), |
76 | in[3] = 0x80 | (i & 0x3f), 4)); |
77 | const char *inbuf = (const char *) in; |
78 | size_t inbytesleft = incount; |
79 | char *outbuf = (char *) buf; |
80 | size_t outbytesleft = sizeof (buf); |
81 | size_t result; |
82 | size_t result2 = 0; |
83 | |
84 | iconv (cd: cd, NULL, NULL, NULL, NULL); |
85 | result = iconv (cd: cd, |
86 | inbuf: (char **) &inbuf, inbytesleft: &inbytesleft, |
87 | outbuf: &outbuf, outbytesleft: &outbytesleft); |
88 | if (result != (size_t)(-1)) |
89 | result2 = iconv (cd: cd, NULL, NULL, outbuf: &outbuf, outbytesleft: &outbytesleft); |
90 | |
91 | if (result == (size_t)(-1) || result2 == (size_t)(-1)) |
92 | { |
93 | if (errno != EILSEQ) |
94 | { |
95 | int saved_errno = errno; |
96 | fprintf (stderr, format: "0x%02X: iconv error: " , i); |
97 | errno = saved_errno; |
98 | perror (s: "" ); |
99 | return 1; |
100 | } |
101 | } |
102 | else if (result == 0) /* ignore conversions with transliteration */ |
103 | { |
104 | unsigned int j, jmax; |
105 | if (inbytesleft != 0 || outbytesleft == sizeof (buf)) |
106 | { |
107 | fprintf (stderr, format: "0x%02X: inbytes = %ld, outbytes = %ld\n" , i, |
108 | (long) (incount - inbytesleft), |
109 | (long) (sizeof (buf) - outbytesleft)); |
110 | return 1; |
111 | } |
112 | jmax = sizeof (buf) - outbytesleft; |
113 | printf (format: "0x" ); |
114 | for (j = 0; j < jmax; j++) |
115 | printf (format: "%02X" , buf[j]); |
116 | printf (format: "\t0x%04X\n" , i); |
117 | } |
118 | } |
119 | } |
120 | |
121 | if (iconv_close (cd: cd) < 0) |
122 | { |
123 | perror (s: "iconv_close" ); |
124 | return 1; |
125 | } |
126 | |
127 | if (ferror (stdin) || fflush (stdout) || ferror (stdout)) |
128 | { |
129 | fprintf (stderr, format: "I/O error\n" ); |
130 | return 1; |
131 | } |
132 | |
133 | return 0; |
134 | } |
135 | |