1 | /* Test program for the wide character stream functions handling larger |
2 | amounts of text. |
3 | Copyright (C) 2000-2022 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <assert.h> |
21 | #include <iconv.h> |
22 | #include <locale.h> |
23 | #include <stdio.h> |
24 | #include <stdlib.h> |
25 | #include <string.h> |
26 | #include <unistd.h> |
27 | #include <wchar.h> |
28 | |
29 | /* Approximate size of the file (must be larger). */ |
30 | #define SIZE 210000 |
31 | |
32 | |
33 | static int |
34 | do_test (void) |
35 | { |
36 | char name[] = "/tmp/widetext.out.XXXXXX" ; |
37 | char mbbuf[SIZE]; |
38 | char mb2buf[SIZE]; |
39 | wchar_t wcbuf[SIZE]; |
40 | wchar_t wc2buf[SIZE]; |
41 | size_t mbsize; |
42 | size_t wcsize; |
43 | int fd; |
44 | FILE *fp; |
45 | size_t n; |
46 | int res; |
47 | int status = 0; |
48 | wchar_t *wcp; |
49 | |
50 | setlocale (LC_ALL, "de_DE.UTF-8" ); |
51 | printf (format: "locale used: %s\n\n" , setlocale (LC_ALL, NULL)); |
52 | |
53 | /* Read the file into memory. */ |
54 | mbsize = fread (ptr: mbbuf, size: 1, SIZE, stdin); |
55 | if (mbsize == 0) |
56 | { |
57 | printf (format: "%u: cannot read input file from standard input: %m\n" , |
58 | __LINE__); |
59 | exit (1); |
60 | } |
61 | |
62 | printf (format: "INFO: input file has %Zd bytes\n" , mbsize); |
63 | |
64 | /* First convert the text to wide characters. We use iconv here. */ |
65 | { |
66 | iconv_t cd; |
67 | char *inbuf = mbbuf; |
68 | size_t inleft = mbsize; |
69 | char *outbuf = (char *) wcbuf; |
70 | size_t outleft = sizeof (wcbuf); |
71 | size_t nonr; |
72 | |
73 | cd = iconv_open (tocode: "WCHAR_T" , fromcode: "UTF-8" ); |
74 | if (cd == (iconv_t) -1) |
75 | { |
76 | printf (format: "%u: cannot get iconv descriptor for conversion to UCS4\n" , |
77 | __LINE__); |
78 | exit (1); |
79 | } |
80 | |
81 | /* We must need only one call and there must be no losses. */ |
82 | nonr = iconv (cd: cd, inbuf: &inbuf, inbytesleft: &inleft, outbuf: &outbuf, outbytesleft: &outleft); |
83 | if (nonr != 0 && nonr != (size_t) -1) |
84 | { |
85 | printf (format: "%u: iconv performed %Zd nonreversible conversions\n" , |
86 | __LINE__, nonr); |
87 | exit (1); |
88 | } |
89 | |
90 | if (nonr == (size_t) -1) |
91 | { |
92 | printf (format: "\ |
93 | %u: iconv returned with %Zd and errno = %m (inleft: %Zd, outleft: %Zd)\n" , |
94 | __LINE__, nonr, inleft, outleft); |
95 | exit (1); |
96 | } |
97 | |
98 | if (inleft != 0) |
99 | { |
100 | printf (format: "%u: iconv didn't convert all input\n" , __LINE__); |
101 | exit (1); |
102 | } |
103 | |
104 | iconv_close (cd: cd); |
105 | |
106 | if ((sizeof (wcbuf) - outleft) % sizeof (wchar_t) != 0) |
107 | { |
108 | printf (format: "%u: iconv converted not complete wchar_t\n" , __LINE__); |
109 | exit (1); |
110 | } |
111 | |
112 | wcsize = (sizeof (wcbuf) - outleft) / sizeof (wchar_t); |
113 | assert (wcsize + 1 <= SIZE); |
114 | } |
115 | |
116 | /* Now that we finished the preparations, run the first test. We |
117 | are writing the wide char data out and read it back in. We write |
118 | and read single characters. */ |
119 | |
120 | fd = mkstemp (template: name); |
121 | if (fd == -1) |
122 | { |
123 | printf (format: "%u: cannot open temporary file: %m\n" , __LINE__); |
124 | exit (1); |
125 | } |
126 | |
127 | unlink (name: name); |
128 | |
129 | fp = fdopen (dup (fd), "w" ); |
130 | if (fp == NULL) |
131 | { |
132 | printf (format: "%u: fdopen of temp file for writing failed: %m\n" , __LINE__); |
133 | exit (1); |
134 | } |
135 | |
136 | for (n = 0; n < wcsize; ++n) |
137 | { |
138 | if (fputwc (wc: wcbuf[n], stream: fp) == WEOF) |
139 | { |
140 | printf (format: "%u: fputwc failed: %m\n" , __LINE__); |
141 | exit (1); |
142 | } |
143 | } |
144 | |
145 | res = fclose (fp); |
146 | if (res != 0) |
147 | { |
148 | printf (format: "%u: fclose after single-character writing failed (%d): %m\n" , |
149 | __LINE__, res); |
150 | exit (1); |
151 | } |
152 | |
153 | lseek (fd: fd, SEEK_SET, whence: 0); |
154 | fp = fdopen (dup (fd), "r" ); |
155 | if (fp == NULL) |
156 | { |
157 | printf (format: "%u: fdopen of temp file for reading failed: %m\n" , __LINE__); |
158 | exit (1); |
159 | } |
160 | |
161 | for (n = 0; n < wcsize; ++n) |
162 | { |
163 | wint_t wch = fgetwc (stream: fp); |
164 | if (wch == WEOF) |
165 | { |
166 | printf (format: "%u: fgetwc failed (idx %Zd): %m\n" , __LINE__, n); |
167 | exit (1); |
168 | } |
169 | wc2buf[n] = wch; |
170 | } |
171 | |
172 | /* There should be nothing else. */ |
173 | if (fgetwc (stream: fp) != WEOF) |
174 | { |
175 | printf (format: "%u: too many characters available with fgetwc\n" , __LINE__); |
176 | status = 1; |
177 | } |
178 | else if (wmemcmp (s1: wcbuf, s2: wc2buf, n: wcsize) != 0) |
179 | { |
180 | printf (format: "%u: buffer read with fgetwc differs\n" , __LINE__); |
181 | status = 1; |
182 | } |
183 | |
184 | res = fclose (fp); |
185 | if (res != 0) |
186 | { |
187 | printf (format: "%u: fclose after single-character reading failed (%d): %m\n" , |
188 | __LINE__, res); |
189 | exit (1); |
190 | } |
191 | |
192 | /* Just make sure there are no two errors which hide each other, read the |
193 | file using the `char' functions. */ |
194 | |
195 | lseek (fd: fd, SEEK_SET, whence: 0); |
196 | fp = fdopen (fd, "r" ); |
197 | if (fp == NULL) |
198 | { |
199 | printf (format: "%u: fdopen of temp file for reading failed: %m\n" , __LINE__); |
200 | exit (1); |
201 | } |
202 | |
203 | if (fread (ptr: mb2buf, size: 1, n: mbsize, stream: fp) != mbsize) |
204 | { |
205 | printf (format: "%u: cannot read all of the temp file\n" , __LINE__); |
206 | status = 1; |
207 | } |
208 | else |
209 | { |
210 | /* Make sure there is nothing left. */ |
211 | if (fgetc (stream: fp) != EOF) |
212 | { |
213 | printf (format: "%u: more input available\n" , __LINE__); |
214 | status = 1; |
215 | } |
216 | |
217 | if (memcmp (mb2buf, mbbuf, mbsize) != 0) |
218 | { |
219 | printf (format: "%u: buffer written with fputwc differs\n" , __LINE__); |
220 | status = 1; |
221 | } |
222 | } |
223 | |
224 | res = fclose (fp); |
225 | if (res != 0) |
226 | { |
227 | printf (format: "%u: fclose after single-character reading failed (%d): %m\n" , |
228 | __LINE__, res); |
229 | exit (1); |
230 | } |
231 | |
232 | /* Now to reading and writing line-wise. */ |
233 | |
234 | fd = mkstemp (template: strcpy (name, "/tmp/widetext.out.XXXXXX" )); |
235 | if (fd == -1) |
236 | { |
237 | printf (format: "%u: cannot open temporary file: %m\n" , __LINE__); |
238 | exit (1); |
239 | } |
240 | |
241 | unlink (name: name); |
242 | |
243 | fp = fdopen (dup (fd), "w" ); |
244 | if (fp == NULL) |
245 | { |
246 | printf (format: "%u: fdopen of temp file for writing failed: %m\n" , __LINE__); |
247 | exit (1); |
248 | } |
249 | |
250 | for (wcp = wcbuf; wcp < &wcbuf[wcsize]; ) |
251 | { |
252 | wchar_t *wendp = wcschr (wcp, L'\n'); |
253 | |
254 | if (wendp != NULL) |
255 | { |
256 | /* Temporarily NUL terminate the line. */ |
257 | wchar_t save = wendp[1]; |
258 | wendp[1] = L'\0'; |
259 | |
260 | fputws (ws: wcp, stream: fp); |
261 | |
262 | wendp[1] = save; |
263 | wcp = &wendp[1]; |
264 | } |
265 | else |
266 | { |
267 | fputws (ws: wcp, stream: fp); |
268 | wcp = wcschr (wcp, L'\0'); |
269 | assert (wcp == &wcbuf[wcsize]); |
270 | } |
271 | } |
272 | |
273 | res = fclose (fp); |
274 | if (res != 0) |
275 | { |
276 | printf (format: "%u: fclose after line-wise writing failed (%d): %m\n" , |
277 | __LINE__, res); |
278 | exit (1); |
279 | } |
280 | |
281 | lseek (fd: fd, SEEK_SET, whence: 0); |
282 | fp = fdopen (dup (fd), "r" ); |
283 | if (fp == NULL) |
284 | { |
285 | printf (format: "%u: fdopen of temp file for reading failed: %m\n" , __LINE__); |
286 | exit (1); |
287 | } |
288 | |
289 | for (wcp = wc2buf; wcp < &wc2buf[wcsize]; ) |
290 | { |
291 | if (fgetws (ws: wcp, n: &wc2buf[wcsize] - wcp + 1, stream: fp) == NULL) |
292 | { |
293 | printf (format: "%u: short read using fgetws (only %td of %Zd)\n" , |
294 | __LINE__, wcp - wc2buf, wcsize); |
295 | status = 1; |
296 | break; |
297 | } |
298 | wcp = wcschr (wcp, L'\0'); |
299 | } |
300 | |
301 | if (wcp > &wc2buf[wcsize]) |
302 | { |
303 | printf (format: "%u: fgetws read too much\n" , __LINE__); |
304 | status = 1; |
305 | } |
306 | else if (fgetwc (stream: fp) != WEOF) |
307 | { |
308 | /* There should be nothing else. */ |
309 | printf (format: "%u: too many characters available with fgetws\n" , __LINE__); |
310 | status = 1; |
311 | } |
312 | |
313 | if (wcp >= &wc2buf[wcsize] && wmemcmp (s1: wcbuf, s2: wc2buf, n: wcsize) != 0) |
314 | { |
315 | printf (format: "%u: buffer read with fgetws differs\n" , __LINE__); |
316 | status = 1; |
317 | } |
318 | |
319 | res = fclose (fp); |
320 | if (res != 0) |
321 | { |
322 | printf (format: "%u: fclose after single-character reading failed (%d): %m\n" , |
323 | __LINE__, res); |
324 | exit (1); |
325 | } |
326 | |
327 | /* Just make sure there are no two errors which hide each other, read the |
328 | file using the `char' functions. */ |
329 | |
330 | lseek (fd: fd, SEEK_SET, whence: 0); |
331 | fp = fdopen (fd, "r" ); |
332 | if (fp == NULL) |
333 | { |
334 | printf (format: "%u: fdopen of temp file for reading failed: %m\n" , __LINE__); |
335 | exit (1); |
336 | } |
337 | |
338 | if (fread (ptr: mb2buf, size: 1, n: mbsize, stream: fp) != mbsize) |
339 | { |
340 | printf (format: "%u: cannot read all of the temp file\n" , __LINE__); |
341 | status = 1; |
342 | } |
343 | else |
344 | { |
345 | /* Make sure there is nothing left. */ |
346 | if (fgetc (stream: fp) != EOF) |
347 | { |
348 | printf (format: "%u: more input available\n" , __LINE__); |
349 | status = 1; |
350 | } |
351 | |
352 | if (memcmp (mb2buf, mbbuf, mbsize) != 0) |
353 | { |
354 | printf (format: "%u: buffer written with fputws differs\n" , __LINE__); |
355 | status = 1; |
356 | } |
357 | } |
358 | |
359 | res = fclose (fp); |
360 | if (res != 0) |
361 | { |
362 | printf (format: "%u: fclose after single-character reading failed (%d): %m\n" , |
363 | __LINE__, res); |
364 | exit (1); |
365 | } |
366 | |
367 | return status; |
368 | } |
369 | |
370 | #define TEST_FUNCTION do_test () |
371 | #include "../test-skeleton.c" |
372 | |