1 | /* Copyright (C) 2000-2022 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | /* We always want assert to be fully defined. */ |
19 | #undef NDEBUG |
20 | #include <assert.h> |
21 | #include <locale.h> |
22 | #include <stdio.h> |
23 | #include <stdlib.h> |
24 | #include <string.h> |
25 | #include <wchar.h> |
26 | |
27 | |
28 | static int check_ascii (const char *locname); |
29 | |
30 | /* UTF-8 single byte feeding test for mbrtowc(). */ |
31 | static int |
32 | utf8_test_1 (void) |
33 | { |
34 | wchar_t wc; |
35 | mbstate_t s; |
36 | |
37 | wc = 42; /* arbitrary number */ |
38 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
39 | assert (mbrtowc (&wc, "\xE2" , 1, &s) == (size_t) -2); /* 1st byte processed */ |
40 | assert (mbrtowc (&wc, "\x89" , 1, &s) == (size_t) -2); /* 2nd byte processed */ |
41 | assert (wc == 42); /* no value has not been stored into &wc yet */ |
42 | assert (mbrtowc (&wc, "\xA0" , 1, &s) == 1); /* 3nd byte processed */ |
43 | assert (wc == 0x2260); /* E2 89 A0 = U+2260 (not equal) decoded correctly */ |
44 | assert (mbrtowc (&wc, "" , 1, &s) == 0); /* test final byte processing */ |
45 | assert (wc == 0); /* test final byte decoding */ |
46 | |
47 | /* The following test is by Al Viro <aviro@redhat.com>. */ |
48 | const char str[] = "\xe0\xa0\x80" ; |
49 | |
50 | wc = 42; /* arbitrary number */ |
51 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
52 | assert (mbrtowc (&wc, str, 1, &s) == -2); |
53 | assert (mbrtowc (&wc, str + 1, 2, &s) == 2); |
54 | assert (wc == 0x800); |
55 | |
56 | wc = 42; /* arbitrary number */ |
57 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
58 | assert (mbrtowc (&wc, str, 3, &s) == 3); |
59 | assert (wc == 0x800); |
60 | |
61 | return 0; |
62 | } |
63 | |
64 | /* Test for NUL byte processing via empty string. */ |
65 | static int |
66 | utf8_test_2 (void) |
67 | { |
68 | wchar_t wc; |
69 | mbstate_t s; |
70 | |
71 | wc = 42; /* arbitrary number */ |
72 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
73 | assert (mbrtowc (NULL, "" , 1, &s) == 0); /* valid terminator */ |
74 | assert (mbsinit (&s)); |
75 | |
76 | wc = 42; /* arbitrary number */ |
77 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
78 | assert (mbrtowc (&wc, "\xE2" , 1, &s) == (size_t) -2); /* 1st byte processed */ |
79 | assert (mbrtowc (NULL, "" , 1, &s) == (size_t) -1); /* invalid terminator */ |
80 | |
81 | wc = 42; /* arbitrary number */ |
82 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
83 | assert (mbrtowc (&wc, "\xE2" , 1, &s) == (size_t) -2); /* 1st byte processed */ |
84 | assert (mbrtowc (&wc, "\x89" , 1, &s) == (size_t) -2); /* 2nd byte processed */ |
85 | assert (mbrtowc (NULL, "" , 1, &s) == (size_t) -1); /* invalid terminator */ |
86 | |
87 | wc = 42; /* arbitrary number */ |
88 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
89 | assert (mbrtowc (&wc, "\xE2" , 1, &s) == (size_t) -2); /* 1st byte processed */ |
90 | assert (mbrtowc (&wc, "\x89" , 1, &s) == (size_t) -2); /* 2nd byte processed */ |
91 | assert (mbrtowc (&wc, "\xA0" , 1, &s) == 1); /* 3nd byte processed */ |
92 | assert (mbrtowc (NULL, "" , 1, &s) == 0); /* valid terminator */ |
93 | assert (mbsinit (&s)); |
94 | |
95 | return 0; |
96 | } |
97 | |
98 | /* Test for NUL byte processing via NULL string. */ |
99 | static int |
100 | utf8_test_3 (void) |
101 | { |
102 | wchar_t wc; |
103 | mbstate_t s; |
104 | |
105 | wc = 42; /* arbitrary number */ |
106 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
107 | assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */ |
108 | assert (mbsinit (&s)); |
109 | |
110 | wc = 42; /* arbitrary number */ |
111 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
112 | assert (mbrtowc (&wc, "\xE2" , 1, &s) == (size_t) -2); /* 1st byte processed */ |
113 | assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */ |
114 | |
115 | wc = 42; /* arbitrary number */ |
116 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
117 | assert (mbrtowc (&wc, "\xE2" , 1, &s) == (size_t) -2); /* 1st byte processed */ |
118 | assert (mbrtowc (&wc, "\x89" , 1, &s) == (size_t) -2); /* 2nd byte processed */ |
119 | assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */ |
120 | |
121 | wc = 42; /* arbitrary number */ |
122 | memset (&s, 0, sizeof (s)); /* get s into initial state */ |
123 | assert (mbrtowc (&wc, "\xE2" , 1, &s) == (size_t) -2); /* 1st byte processed */ |
124 | assert (mbrtowc (&wc, "\x89" , 1, &s) == (size_t) -2); /* 2nd byte processed */ |
125 | assert (mbrtowc (&wc, "\xA0" , 1, &s) == 1); /* 3nd byte processed */ |
126 | assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */ |
127 | assert (mbsinit (&s)); |
128 | |
129 | return 0; |
130 | } |
131 | |
132 | static int |
133 | utf8_test (void) |
134 | { |
135 | const char *locale = "de_DE.UTF-8" ; |
136 | int error = 0; |
137 | |
138 | if (!setlocale (LC_CTYPE, locale)) |
139 | { |
140 | fprintf (stderr, "locale '%s' not available!\n" , locale); |
141 | exit (1); |
142 | } |
143 | |
144 | error |= utf8_test_1 (); |
145 | error |= utf8_test_2 (); |
146 | error |= utf8_test_3 (); |
147 | |
148 | return error; |
149 | } |
150 | |
151 | |
152 | static int |
153 | do_test (void) |
154 | { |
155 | int result = 0; |
156 | |
157 | /* Check mapping of ASCII range for some character sets which have |
158 | ASCII as a subset. For those the wide char generated must have |
159 | the same value. */ |
160 | setlocale (LC_ALL, "C" ); |
161 | result |= check_ascii (locname: setlocale (LC_ALL, NULL)); |
162 | |
163 | setlocale (LC_ALL, "de_DE.UTF-8" ); |
164 | result |= check_ascii (locname: setlocale (LC_ALL, NULL)); |
165 | result |= utf8_test (); |
166 | |
167 | setlocale (LC_ALL, "ja_JP.EUC-JP" ); |
168 | result |= check_ascii (locname: setlocale (LC_ALL, NULL)); |
169 | |
170 | return result; |
171 | } |
172 | |
173 | |
174 | static int |
175 | check_ascii (const char *locname) |
176 | { |
177 | int c; |
178 | int res = 0; |
179 | |
180 | printf (format: "Testing locale \"%s\":\n" , locname); |
181 | |
182 | for (c = 0; c <= 127; ++c) |
183 | { |
184 | char buf[MB_CUR_MAX]; |
185 | wchar_t wc = 0xffffffff; |
186 | mbstate_t s; |
187 | size_t n, i; |
188 | |
189 | for (i = 0; i < MB_CUR_MAX; ++i) |
190 | buf[i] = c + i; |
191 | |
192 | memset (&s, '\0', sizeof (s)); |
193 | |
194 | n = mbrtowc (&wc, buf, MB_CUR_MAX, &s); |
195 | if (n == (size_t) -1) |
196 | { |
197 | printf (format: "%s: '\\x%x': encoding error\n" , locname, c); |
198 | ++res; |
199 | } |
200 | else if (n == (size_t) -2) |
201 | { |
202 | printf (format: "%s: '\\x%x': incomplete character\n" , locname, c); |
203 | ++res; |
204 | } |
205 | else if (n == 0 && c != 0) |
206 | { |
207 | printf (format: "%s: '\\x%x': 0 returned\n" , locname, c); |
208 | ++res; |
209 | } |
210 | else if (n != 0 && c == 0) |
211 | { |
212 | printf (format: "%s: '\\x%x': not 0 returned\n" , locname, c); |
213 | ++res; |
214 | } |
215 | else if (c != 0 && n != 1) |
216 | { |
217 | printf (format: "%s: '\\x%x': not 1 returned\n" , locname, c); |
218 | ++res; |
219 | } |
220 | else if (wc != (wchar_t) c) |
221 | { |
222 | printf (format: "%s: '\\x%x': wc != L'\\x%x'\n" , locname, c, c); |
223 | ++res; |
224 | } |
225 | } |
226 | |
227 | printf (format: res == 1 ? "%d error\n" : "%d errors\n" , res); |
228 | |
229 | return res != 0; |
230 | } |
231 | |
232 | #define TEST_FUNCTION do_test () |
233 | #include "../test-skeleton.c" |
234 | |