gencat.c source code [glibc/catgets/gencat.c]

1	/ Copyright (C) 1996-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published
6	by the Free Software Foundation; version 2 of the License, or
7	(at your option) any later version.
8
9	This program is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License
15	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
16
17	#ifdef HAVE_CONFIG_H
18	# include "config.h"
19	#endif
20
21	#include <argp.h>
22	#include <assert.h>
23	#include <ctype.h>
24	#include <endian.h>
25	#include <errno.h>
26	#include <error.h>
27	#include <fcntl.h>
28	#include <iconv.h>
29	#include <langinfo.h>
30	#include <locale.h>
31	#include <libintl.h>
32	#include <limits.h>
33	#include <nl_types.h>
34	#include <obstack.h>
35	#include <stdint.h>
36	#include <stdio.h>
37	#include <stdlib.h>
38	#include <string.h>
39	#include <unistd.h>
40	#include <wchar.h>
41
42	#include "version.h"
43
44	#include "catgetsinfo.h"
45
46
47	#define SWAPU32(w) \
48	(((w) << 24) \| (((w) & 0xff00) << 8) \| (((w) >> 8) & 0xff00) \| ((w) >> 24))
49
50	struct message_list
51	{
52	int number;
53	const char *message;
54
55	const char *fname;
56	size_t line;
57	const char *symbol;
58
59	struct message_list *next;
60	};
61
62
63	struct set_list
64	{
65	int number;
66	int deleted;
67	struct message_list *messages;
68	int last_message;
69
70	const char *fname;
71	size_t line;
72	const char *symbol;
73
74	struct set_list *next;
75	};
76
77
78	struct catalog
79	{
80	struct set_list *all_sets;
81	struct set_list *current_set;
82	size_t total_messages;
83	wint_t quote_char;
84	int last_set;
85
86	struct obstack mem_pool;
87	};
88
89
90	/ If non-zero force creation of new file, not using existing one. /
91	static int force_new;
92
93	/ Name of output file. /
94	static const char *output_name;
95
96	/ Name of generated C header file. /
97	static const char *header_name;
98
99	/ Name and version of program. /
100	static void print_version (FILE stream, struct* argp_state *state);
101	void (argp_program_version_hook) (FILE , struct argp_state *) = print_version;
102
103	#define OPT_NEW 1
104
105	/ Definitions of arguments for argp functions. /
106	static const struct argp_option options[] =
107	{
108	{ "header", `'H'`, N_("NAME"), `0`,
109	N_("Create C header file NAME containing symbol definitions") },
110	{ "new", OPT_NEW, NULL, `0`,
111	N_("Do not use existing catalog, force new output file") },
112	{ "output", `'o'`, N_("NAME"), `0`, N_("Write output to file NAME") },
113	{ NULL, `0`, NULL, `0`, NULL }
114	};
115
116	/ Short description of program. /
117	static const char doc[] = N_("Generate message catalog.\
118	\vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
119	is -, output is written to standard output.\n");
120
121	/ Strings for arguments in help texts. /
122	static const char args_doc[] = N_("\
123	-o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
124
125	/ Prototype for option handler. /
126	static error_t parse_opt (int key, char arg, struct* argp_state *state);
127
128	/ Function to print some extra text in the help message. /
129	static char more_help (int* key, const char text, void* *input);
130
131	/ Data structure to communicate with argp functions. /
132	static struct argp argp =
133	{
134	options, parse_opt, args_doc, doc, NULL, more_help
135	};
136
137
138	/ Wrapper functions with error checking for standard functions. /
139	#include <programs/xmalloc.h>
140
141	/ Prototypes for local functions. /
142	static void error_print (void);
143	static struct catalog read_input_file (struct* catalog *current,
144	const char *fname);
145	static void write_out (struct catalog result, const* char *output_name,
146	const char *header_name);
147	static struct set_list find_set (struct* catalog current, int* number);
148	static void normalize_line (const char *fname, size_t line, iconv_t cd,
149	wchar_t *string, wchar_t quote_char,
150	wchar_t escape_char);
151	static void read_old (struct catalog catalog, const* char *file_name);
152	static int open_conversion (const char codesetp, iconv_t cd_towcp,
153	iconv_t cd_tombp, wchar_t escape_charp);
154
155
156	int
157	main (int argc, char *argv[])
158	{
159	struct catalog *result;
160	int remaining;
161
162	/ Set program name for messages. /
163	error_print_progname = error_print;
164
165	/ Set locale via LC_ALL. /
166	setlocale (LC_ALL, locale: "");
167
168	/ Set the text message domain. /
169	textdomain (PACKAGE);
170
171	/ Initialize local variables. /
172	result = NULL;
173
174	/ Parse and process arguments. /
175	argp_parse (argp: &argp, argc: argc, argv: argv, flags: `0`, arg_index: &remaining, NULL);
176
177	/ Determine output file. /
178	if (output_name == NULL)
179	output_name = remaining < argc ? argv[remaining++] : "-";
180
181	/ Process all input files. /
182	setlocale (LC_CTYPE, locale: "C");
183	if (remaining < argc)
184	do
185	result = read_input_file (current: result, fname: argv[remaining]);
186	while (++remaining < argc);
187	else
188	result = read_input_file (NULL, fname: "-");
189
190	/ Write out the result. /
191	if (result != NULL)
192	write_out (result, output_name, header_name);
193
194	return error_message_count != `0`;
195	}
196
197
198	/ Handle program arguments. /
199	static error_t
200	parse_opt (int key, char arg, struct* argp_state *state)
201	{
202	switch (key)
203	{
204	case `'H'`:
205	header_name = arg;
206	break;
207	case OPT_NEW:
208	force_new = `1`;
209	break;
210	case `'o'`:
211	output_name = arg;
212	break;
213	default:
214	return ARGP_ERR_UNKNOWN;
215	}
216	return `0`;
217	}
218
219
220	static char *
221	more_help (int key, const char text, void* *input)
222	{
223	char *tp = NULL;
224	switch (key)
225	{
226	case ARGP_KEY_HELP_EXTRA:
227	/ We print some extra information. /
228	if (asprintf (ptr: &tp, gettext ("\
229	For bug reporting instructions, please see:\n\
230	%s.\n"), REPORT_BUGS_TO) < `0`)
231	return NULL;
232	return tp;
233	default:
234	break;
235	}
236	return (char *) text;
237	}
238
239	/ Print the version information. /
240	static void
241	print_version (FILE stream, struct* argp_state *state)
242	{
243	fprintf (stream: stream, format: "gencat %s%s\n", PKGVERSION, VERSION);
244	fprintf (stream: stream, gettext ("\
245	Copyright (C) %s Free Software Foundation, Inc.\n\
246	This is free software; see the source for copying conditions. There is NO\n\
247	warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
248	"), "2022");
249	fprintf (stream: stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
250	}
251
252
253	/ The address of this function will be assigned to the hook in the*
254	error functions. /*
255	static void
256	error_print (void)
257	{
258	/ We don't want the program name to be printed in messages. Emacs'*
259	compile.el does not like this. /*
260	}
261
262
263	static struct catalog *
264	read_input_file (struct catalog current, const* char *fname)
265	{
266	FILE *fp;
267	char *buf;
268	size_t len;
269	size_t line_number;
270	wchar_t *wbuf;
271	size_t wbufsize;
272	iconv_t cd_towc = (iconv_t) -`1`;
273	iconv_t cd_tomb = (iconv_t) -`1`;
274	wchar_t escape_char = L`'\\'`;
275	char *codeset = NULL;
276
277	if (strcmp (s1: fname, s2: "-") == `0` \|\| strcmp (s1: fname, s2: "/dev/stdin") == `0`)
278	{
279	fp = stdin;
280	fname = gettext ("standard input");
281	}
282	else
283	fp = fopen (filename: fname, modes: "r");
284	if (fp == NULL)
285	{
286	error (status: `0`, errno, gettext ("cannot open input file `%s'"), fname);
287	return current;
288	}
289
290	/ If we haven't seen anything yet, allocate result structure. /
291	if (current == NULL)
292	{
293	current = (struct catalog ) xcalloc (n: `1`, s: sizeof* (*current));
294
295	#define obstack_chunk_alloc malloc
296	#define obstack_chunk_free free
297	obstack_init (&current->mem_pool);
298
299	current->current_set = find_set (current, NL_SETD);
300	}
301
302	buf = NULL;
303	len = `0`;
304	line_number = `0`;
305
306	wbufsize = `1024`;
307	wbuf = (wchar_t *) xmalloc (n: wbufsize);
308
309	while (!feof (stream: fp))
310	{
311	int continued;
312	int used;
313	size_t start_line = line_number + `1`;
314	char *this_line;
315
316	do
317	{
318	int act_len;
319
320	act_len = getline (lineptr: &buf, n: &len, stream: fp);
321	if (act_len <= `0`)
322	break;
323	++line_number;
324
325	/ It the line continued? /
326	continued = `0`;
327	if (buf[act_len - `1`] == `'\n'`)
328	{
329	--act_len;
330
331	/ There might be more than one backslash at the end of*
332	the line. Only if there is an odd number of them is
333	the line continued. /*
334	if (act_len > `0` && buf[act_len - `1`] == `'\\'`)
335	{
336	int temp_act_len = act_len;
337
338	do
339	{
340	--temp_act_len;
341	continued = !continued;
342	}
343	while (temp_act_len > `0` && buf[temp_act_len - `1`] == `'\\'`);
344
345	if (continued)
346	--act_len;
347	}
348	}
349
350	/ Append to currently selected line. /
351	obstack_grow (&current->mem_pool, buf, act_len);
352	}
353	while (continued);
354
355	obstack_1grow (&current->mem_pool, `'\0'`);
356	this_line = (char *) obstack_finish (&current->mem_pool);
357
358	used = `0`;
359	if (this_line[`0`] == `'$'`)
360	{
361	if (isblank (this_line[`1`]))
362	{
363	int cnt = `1`;
364	while (isblank (this_line[cnt]))
365	++cnt;
366	if (strncmp (s1: &this_line[cnt], s2: "codeset=", n: `8`) != `0`)
367	/ This is a comment line. Do nothing. /;
368	else if (codeset != NULL)
369	/ Ignore multiple codeset. /;
370	else
371	{
372	int start = cnt + `8`;
373	cnt = start;
374	while (this_line[cnt] != `'\0'` && !isspace (this_line[cnt]))
375	++cnt;
376	if (cnt != start)
377	{
378	int len = cnt - start;
379	codeset = xmalloc (n: len + `1`);
380	((char* *) mempcpy (codeset, &this_line[start], len))
381	= `'\0'`;
382	}
383	}
384	}
385	else if (strncmp (s1: &this_line[`1`], s2: "set", n: `3`) == `0`)
386	{
387	int cnt = sizeof ("set");
388	int set_number;
389	const char *symbol = NULL;
390	while (isspace (this_line[cnt]))
391	++cnt;
392
393	if (isdigit (this_line[cnt]))
394	{
395	set_number = atol (nptr: &this_line[cnt]);
396
397	/ If the given number for the character set is*
398	higher than any we used for symbolic set names
399	avoid clashing by using only higher numbers for
400	the following symbolic definitions. /*
401	if (set_number > current->last_set)
402	current->last_set = set_number;
403	}
404	else
405	{
406	/ See whether it is a reasonable identifier. /
407	int start = cnt;
408	while (isalnum (this_line[cnt]) \|\| this_line[cnt] == `'_'`)
409	++cnt;
410
411	if (cnt == start)
412	{
413	/ No correct character found. /
414	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
415	gettext ("illegal set number"));
416	set_number = `0`;
417	}
418	else
419	{
420	/ We have found seomthing that looks like a*
421	correct identifier. /*
422	struct set_list *runp;
423
424	this_line[cnt] = `'\0'`;
425	used = `1`;
426	symbol = &this_line[start];
427
428	/ Test whether the identifier was already used. /
429	runp = current->all_sets;
430	while (runp != `0`)
431	if (runp->symbol != NULL
432	&& strcmp (s1: runp->symbol, s2: symbol) == `0`)
433	break;
434	else
435	runp = runp->next;
436
437	if (runp != NULL)
438	{
439	/ We cannot allow duplicate identifiers for*
440	message sets. /*
441	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
442	gettext ("duplicate set definition"));
443	error_at_line (status: `0`, errnum: `0`, fname: runp->fname, lineno: runp->line,
444	gettext ("\
445	this is the first definition"));
446	set_number = `0`;
447	}
448	else
449	/ Allocate next free message set for identifier. /
450	set_number = ++current->last_set;
451	}
452	}
453
454	if (set_number != `0`)
455	{
456	/ We found a legal set number. /
457	current->current_set = find_set (current, number: set_number);
458	if (symbol != NULL)
459	used = `1`;
460	current->current_set->symbol = symbol;
461	current->current_set->fname = fname;
462	current->current_set->line = start_line;
463	}
464	}
465	else if (strncmp (s1: &this_line[`1`], s2: "delset", n: `6`) == `0`)
466	{
467	int cnt = sizeof ("delset");
468	while (isspace (this_line[cnt]))
469	++cnt;
470
471	if (isdigit (this_line[cnt]))
472	{
473	size_t set_number = atol (nptr: &this_line[cnt]);
474	struct set_list *set;
475
476	/ Mark the message set with the given number as*
477	deleted. /*
478	set = find_set (current, number: set_number);
479	set->deleted = `1`;
480	}
481	else
482	{
483	/ See whether it is a reasonable identifier. /
484	int start = cnt;
485	while (isalnum (this_line[cnt]) \|\| this_line[cnt] == `'_'`)
486	++cnt;
487
488	if (cnt == start)
489	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
490	gettext ("illegal set number"));
491	else
492	{
493	const char *symbol;
494	struct set_list *runp;
495
496	this_line[cnt] = `'\0'`;
497	used = `1`;
498	symbol = &this_line[start];
499
500	/ We have a symbolic set name. This name must*
501	appear somewhere else in the catalogs read so
502	far. /*
503	for (runp = current->all_sets; runp != NULL;
504	runp = runp->next)
505	{
506	if (strcmp (s1: runp->symbol, s2: symbol) == `0`)
507	{
508	runp->deleted = `1`;
509	break;
510	}
511	}
512	if (runp == NULL)
513	/ Name does not exist before. /
514	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
515	gettext ("unknown set `%s'"), symbol);
516	}
517	}
518	}
519	else if (strncmp (s1: &this_line[`1`], s2: "quote", n: `5`) == `0`)
520	{
521	char buf[`2`];
522	char *bufptr;
523	size_t buflen;
524	char *wbufptr;
525	size_t wbuflen;
526	int cnt;
527
528	cnt = sizeof ("quote");
529	while (isspace (this_line[cnt]))
530	++cnt;
531
532	/ We need the conversion. /
533	if (cd_towc == (iconv_t) -`1`
534	&& open_conversion (codesetp: codeset, cd_towcp: &cd_towc, cd_tombp: &cd_tomb,
535	escape_charp: &escape_char) != `0`)
536	/ Something is wrong. /
537	goto out;
538
539	/ Yes, the quote char can be '\0'; this means no quote*
540	char. The function using the information works on
541	wide characters so we have to convert it here. /*
542	buf[`0`] = this_line[cnt];
543	buf[`1`] = `'\0'`;
544	bufptr = buf;
545	buflen = `2`;
546
547	wbufptr = (char *) wbuf;
548	wbuflen = wbufsize;
549
550	/ Flush the state. /
551	iconv (cd: cd_towc, NULL, NULL, NULL, NULL);
552
553	iconv (cd: cd_towc, inbuf: &bufptr, inbytesleft: &buflen, outbuf: &wbufptr, outbytesleft: &wbuflen);
554	if (buflen != `0` \|\| (wchar_t *) wbufptr != &wbuf[`2`])
555	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
556	gettext ("invalid quote character"));
557	else
558	/ Use the converted wide character. /
559	current->quote_char = wbuf[`0`];
560	}
561	else
562	{
563	int cnt;
564	cnt = `2`;
565	while (this_line[cnt] != `'\0'` && !isspace (this_line[cnt]))
566	++cnt;
567	this_line[cnt] = `'\0'`;
568	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
569	gettext ("unknown directive `%s': line ignored"),
570	&this_line[`1`]);
571	}
572	}
573	else if (isalnum (this_line[`0`]) \|\| this_line[`0`] == `'_'`)
574	{
575	const char *ident = this_line;
576	char *line = this_line;
577	int message_number;
578
579	do
580	++line;
581	while (line[`0`] != `'\0'` && !isspace (line[`0`]));
582	if (line[`0`] != `'\0'`)
583	line++ = `'\0'`; /* Terminate the identifier. /
584
585	/ Now we found the beginning of the message itself. /
586
587	if (isdigit (ident[`0`]))
588	{
589	struct message_list *runp;
590	struct message_list *lastp;
591
592	message_number = atoi (nptr: ident);
593
594	/ Find location to insert the new message. /
595	runp = current->current_set->messages;
596	lastp = NULL;
597	while (runp != NULL)
598	if (runp->number == message_number)
599	break;
600	else
601	{
602	lastp = runp;
603	runp = runp->next;
604	}
605	if (runp != NULL)
606	{
607	/ Oh, oh. There is already a message with this*
608	number in the message set. /*
609	if (runp->symbol == NULL)
610	{
611	/ The existing message had its number specified*
612	by the user. Fatal collision type uh, oh. /*
613	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
614	gettext ("duplicated message number"));
615	error_at_line (status: `0`, errnum: `0`, fname: runp->fname, lineno: runp->line,
616	gettext ("this is the first definition"));
617	message_number = `0`;
618	}
619	else
620	{
621	/ Collision was with number auto-assigned to a*
622	symbolic. Change existing symbolic number
623	and move to end the list (if not already there). /*
624	runp->number = ++current->current_set->last_message;
625
626	if (runp->next != NULL)
627	{
628	struct message_list *endp;
629
630	if (lastp == NULL)
631	current->current_set->messages=runp->next;
632	else
633	lastp->next=runp->next;
634
635	endp = runp->next;
636	while (endp->next != NULL)
637	endp = endp->next;
638
639	endp->next = runp;
640	runp->next = NULL;
641	}
642	}
643	}
644	ident = NULL; / We don't have a symbol. /
645
646	if (message_number != `0`
647	&& message_number > current->current_set->last_message)
648	current->current_set->last_message = message_number;
649	}
650	else if (ident[`0`] != `'\0'`)
651	{
652	struct message_list *runp;
653
654	/ Test whether the symbolic name was not used for*
655	another message in this message set. /*
656	runp = current->current_set->messages;
657	while (runp != NULL)
658	if (runp->symbol != NULL && strcmp (s1: ident, s2: runp->symbol) == `0`)
659	break;
660	else
661	runp = runp->next;
662	if (runp != NULL)
663	{
664	/ The name is already used. /
665	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line, gettext ("\
666	duplicated message identifier"));
667	error_at_line (status: `0`, errnum: `0`, fname: runp->fname, lineno: runp->line,
668	gettext ("this is the first definition"));
669	message_number = `0`;
670	}
671	else
672	/ Give the message the next unused number. /
673	message_number = ++current->current_set->last_message;
674	}
675	else
676	message_number = `0`;
677
678	if (message_number != `0`)
679	{
680	char *inbuf;
681	size_t inlen;
682	char *outbuf;
683	size_t outlen;
684	struct message_list *newp;
685	size_t line_len = strlen (s: line) + `1`;
686	size_t ident_len = `0`;
687
688	/ We need the conversion. /
689	if (cd_towc == (iconv_t) -`1`
690	&& open_conversion (codesetp: codeset, cd_towcp: &cd_towc, cd_tombp: &cd_tomb,
691	escape_charp: &escape_char) != `0`)
692	/ Something is wrong. /
693	goto out;
694
695	/ Convert to a wide character string. We have to*
696	interpret escape sequences which will be impossible
697	without doing the conversion if the codeset of the
698	message is stateful. /*
699	while (`1`)
700	{
701	inbuf = line;
702	inlen = line_len;
703	outbuf = (char *) wbuf;
704	outlen = wbufsize;
705
706	/ Flush the state. /
707	iconv (cd: cd_towc, NULL, NULL, NULL, NULL);
708
709	iconv (cd: cd_towc, inbuf: &inbuf, inbytesleft: &inlen, outbuf: &outbuf, outbytesleft: &outlen);
710	if (inlen == `0`)
711	{
712	/ The string is converted. /
713	assert (outlen < wbufsize);
714	assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - `1`]
715	== L`'\0'`);
716	break;
717	}
718
719	if (outlen != `0`)
720	{
721	/ Something is wrong with this string, we ignore it. /
722	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line, gettext ("\
723	invalid character: message ignored"));
724	goto ignore;
725	}
726
727	/ The output buffer is too small. /
728	wbufsize *= `2`;
729	wbuf = (wchar_t *) xrealloc (o: wbuf, n: wbufsize);
730	}
731
732	/ Strip quote characters, change escape sequences into*
733	correct characters etc. /*
734	normalize_line (fname, line: start_line, cd: cd_towc, string: wbuf,
735	quote_char: current->quote_char, escape_char);
736
737	if (ident)
738	ident_len = line - this_line;
739
740	/ Now the string is free of escape sequences. Convert it*
741	back into a multibyte character string. First free the
742	memory allocated for the original string. /*
743	obstack_free (&current->mem_pool, this_line);
744
745	used = `1`; / Yes, we use the line. /
746
747	/ Now fill in the new string. It should never happen that*
748	the replaced string is longer than the original. /*
749	inbuf = (char *) wbuf;
750	inlen = (wcslen (s: wbuf) + `1`) * sizeof (wchar_t);
751
752	outlen = obstack_room (&current->mem_pool);
753	obstack_blank (&current->mem_pool, outlen);
754	this_line = (char *) obstack_base (&current->mem_pool);
755	outbuf = this_line + ident_len;
756	outlen -= ident_len;
757
758	/ Flush the state. /
759	iconv (cd: cd_tomb, NULL, NULL, NULL, NULL);
760
761	iconv (cd: cd_tomb, inbuf: &inbuf, inbytesleft: &inlen, outbuf: &outbuf, outbytesleft: &outlen);
762	if (inlen != `0`)
763	{
764	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
765	gettext ("invalid line"));
766	goto ignore;
767	}
768	assert (outbuf[-`1`] == `'\0'`);
769
770	/ Free the memory in the obstack we don't use. /
771	obstack_blank (&current->mem_pool, -(int) outlen);
772	line = obstack_finish (&current->mem_pool);
773
774	newp = (struct message_list ) xmalloc (n: sizeof* (*newp));
775	newp->number = message_number;
776	newp->message = line + ident_len;
777	/ Remember symbolic name; is NULL if no is given. /
778	newp->symbol = ident ? line : NULL;
779	/ Remember where we found the character. /
780	newp->fname = fname;
781	newp->line = start_line;
782
783	/ Find place to insert to message. We keep them in a*
784	sorted single linked list. /*
785	if (current->current_set->messages == NULL
786	\|\| current->current_set->messages->number > message_number)
787	{
788	newp->next = current->current_set->messages;
789	current->current_set->messages = newp;
790	}
791	else
792	{
793	struct message_list *runp;
794	runp = current->current_set->messages;
795	while (runp->next != NULL)
796	if (runp->next->number > message_number)
797	break;
798	else
799	runp = runp->next;
800	newp->next = runp->next;
801	runp->next = newp;
802	}
803	}
804	++current->total_messages;
805	}
806	else
807	{
808	size_t cnt;
809
810	cnt = `0`;
811	/ See whether we have any non-white space character in this*
812	line. /*
813	while (this_line[cnt] != `'\0'` && isspace (this_line[cnt]))
814	++cnt;
815
816	if (this_line[cnt] != `'\0'`)
817	/ Yes, some unknown characters found. /
818	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
819	gettext ("malformed line ignored"));
820	}
821
822	ignore:
823	/ We can save the memory for the line if it was not used. /
824	if (!used)
825	obstack_free (&current->mem_pool, this_line);
826	}
827
828	/ Close the conversion modules. /
829	iconv_close (cd: cd_towc);
830	iconv_close (cd: cd_tomb);
831	free (ptr: codeset);
832
833	out:
834	free (ptr: wbuf);
835
836	if (fp != stdin)
837	fclose (stream: fp);
838	return current;
839	}
840
841
842	static void
843	write_out (struct catalog catalog, const* char *output_name,
844	const char *header_name)
845	{
846	/ Computing the "optimal" size. /
847	struct set_list *set_run;
848	size_t best_total, best_size, best_depth;
849	size_t act_size, act_depth;
850	struct catalog_obj obj;
851	struct obstack string_pool;
852	const char *strings;
853	size_t strings_size;
854	uint32_t array1, array2;
855	size_t cnt;
856	int fd;
857
858	/ If not otherwise told try to read file with existing*
859	translations. /*
860	if (!force_new)
861	read_old (catalog, file_name: output_name);
862
863	/ Initialize best_size with a very high value. /
864	best_total = best_size = best_depth = UINT_MAX;
865
866	/ We need some start size for testing. Let's start with*
867	TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
868	5. /*
869	act_size = `1` + catalog->total_messages / `5`;
870
871	/ We determine the size of a hash table here. Because the message*
872	numbers can be chosen arbitrary by the programmer we cannot use
873	the simple method of accessing the array using the message
874	number. The algorithm is based on the trivial hash function
875	NUMBER % TABLE_SIZE, where collisions are stored in a second
876	dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
877	the needed space (= TABLE_SIZE TABLE_DEPTH) is minimal. /
878	while (act_size <= best_total)
879	{
880	size_t deep[act_size];
881
882	act_depth = `1`;
883	memset (s: deep, c: `'\0'`, n: act_size * sizeof (size_t));
884	set_run = catalog->all_sets;
885	while (set_run != NULL)
886	{
887	struct message_list *message_run;
888
889	message_run = set_run->messages;
890	while (message_run != NULL)
891	{
892	size_t idx = (message_run->number * set_run->number) % act_size;
893
894	++deep[idx];
895	if (deep[idx] > act_depth)
896	{
897	act_depth = deep[idx];
898	if (act_depth * act_size > best_total)
899	break;
900	}
901	message_run = message_run->next;
902	}
903	set_run = set_run->next;
904	}
905
906	if (act_depth * act_size <= best_total)
907	{
908	/ We have found a better solution. /
909	best_total = act_depth * act_size;
910	best_size = act_size;
911	best_depth = act_depth;
912	}
913
914	++act_size;
915	}
916
917	/ let's be prepared for an empty message file. /
918	if (best_size == UINT_MAX)
919	{
920	best_size = `1`;
921	best_depth = `1`;
922	}
923
924	/ OK, now we have the size we will use. Fill in the header, build*
925	the table and the second one with swapped byte order. /*
926	obj.magic = CATGETS_MAGIC;
927	obj.plane_size = best_size;
928	obj.plane_depth = best_depth;
929
930	/ Allocate room for all needed arrays. /
931	array1 =
932	(uint32_t ) alloca (best_size best_depth * sizeof (uint32_t) * `3`);
933	memset (s: array1, c: `'\0'`, n: best_size * best_depth * sizeof (uint32_t) * `3`);
934	array2
935	= (uint32_t ) alloca (best_size best_depth * sizeof (uint32_t) * `3`);
936	obstack_init (&string_pool);
937
938	set_run = catalog->all_sets;
939	while (set_run != NULL)
940	{
941	struct message_list *message_run;
942
943	message_run = set_run->messages;
944	while (message_run != NULL)
945	{
946	size_t idx = (((message_run->number * set_run->number) % best_size)
947	* `3`);
948	/ Determine collision depth. /
949	while (array1[idx] != `0`)
950	idx += best_size * `3`;
951
952	/ Store set number, message number and pointer into string*
953	space, relative to the first string. /*
954	array1[idx + `0`] = set_run->number;
955	array1[idx + `1`] = message_run->number;
956	array1[idx + `2`] = obstack_object_size (&string_pool);
957
958	/ Add current string to the continuous space containing all*
959	strings. /*
960	obstack_grow0 (&string_pool, message_run->message,
961	strlen (message_run->message));
962
963	message_run = message_run->next;
964	}
965
966	set_run = set_run->next;
967	}
968	strings_size = obstack_object_size (&string_pool);
969	strings = obstack_finish (&string_pool);
970
971	/ Compute ARRAY2 by changing the byte order. /
972	for (cnt = `0`; cnt < best_size * best_depth * `3`; ++cnt)
973	array2[cnt] = SWAPU32 (array1[cnt]);
974
975	/ Now we can write out the whole data. /
976	if (strcmp (s1: output_name, s2: "-") == `0`
977	\|\| strcmp (s1: output_name, s2: "/dev/stdout") == `0`)
978	fd = STDOUT_FILENO;
979	else
980	{
981	fd = creat (file: output_name, mode: `0666`);
982	if (fd < `0`)
983	error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"),
984	output_name);
985	}
986
987	/ Write out header. /
988	write (fd: fd, buf: &obj, n: sizeof (obj));
989
990	/ We always write out the little endian version of the index*
991	arrays. /*
992	#if __BYTE_ORDER == __LITTLE_ENDIAN
993	write (fd: fd, buf: array1, n: best_size * best_depth * sizeof (uint32_t) * `3`);
994	write (fd: fd, buf: array2, n: best_size * best_depth * sizeof (uint32_t) * `3`);
995	#elif __BYTE_ORDER == __BIG_ENDIAN
996	write (fd, array2, best_size * best_depth * sizeof (uint32_t) * `3`);
997	write (fd, array1, best_size * best_depth * sizeof (uint32_t) * `3`);
998	#else
999	# error Cannot handle __BYTE_ORDER byte order
1000	#endif
1001
1002	/ Finally write the strings. /
1003	write (fd: fd, buf: strings, n: strings_size);
1004
1005	if (fd != STDOUT_FILENO)
1006	close (fd: fd);
1007
1008	/ If requested now write out the header file. /
1009	if (header_name != NULL)
1010	{
1011	int first = `1`;
1012	FILE *fp;
1013
1014	/ Open output file. "-" or "/dev/stdout" means write to*
1015	standard output. /*
1016	if (strcmp (s1: header_name, s2: "-") == `0`
1017	\|\| strcmp (s1: header_name, s2: "/dev/stdout") == `0`)
1018	fp = stdout;
1019	else
1020	{
1021	fp = fopen (filename: header_name, modes: "w");
1022	if (fp == NULL)
1023	error (EXIT_FAILURE, errno,
1024	gettext ("cannot open output file `%s'"), header_name);
1025	}
1026
1027	/ Iterate over all sets and all messages. /
1028	set_run = catalog->all_sets;
1029	while (set_run != NULL)
1030	{
1031	struct message_list *message_run;
1032
1033	/ If the current message set has a symbolic name write this*
1034	out first. /*
1035	if (set_run->symbol != NULL)
1036	fprintf (stream: fp, format: "%s#define %sSet %#x\t/* %s:%Zu */\n",
1037	first ? "" : "\n", set_run->symbol, set_run->number - `1`,
1038	set_run->fname, set_run->line);
1039	first = `0`;
1040
1041	message_run = set_run->messages;
1042	while (message_run != NULL)
1043	{
1044	/ If the current message has a symbolic name write*
1045	#define out. But we have to take care for the set
1046	not having a symbolic name. /*
1047	if (message_run->symbol != NULL)
1048	{
1049	if (set_run->symbol == NULL)
1050	fprintf (stream: fp, format: "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
1051	set_run->number, message_run->symbol,
1052	message_run->number, message_run->fname,
1053	message_run->line);
1054	else
1055	fprintf (stream: fp, format: "#define %s%s %#x\t/* %s:%Zu */\n",
1056	set_run->symbol, message_run->symbol,
1057	message_run->number, message_run->fname,
1058	message_run->line);
1059	}
1060
1061	message_run = message_run->next;
1062	}
1063
1064	set_run = set_run->next;
1065	}
1066
1067	if (fp != stdout)
1068	fclose (stream: fp);
1069	}
1070	}
1071
1072
1073	static struct set_list *
1074	find_set (struct catalog current, int* number)
1075	{
1076	struct set_list *result = current->all_sets;
1077
1078	/ We must avoid set number 0 because a set of this number signals*
1079	in the tables that the entry is not occupied. /*
1080	++number;
1081
1082	while (result != NULL)
1083	if (result->number == number)
1084	return result;
1085	else
1086	result = result->next;
1087
1088	/ Prepare new message set. /
1089	result = (struct set_list ) xcalloc (n: `1`, s: sizeof* (*result));
1090	result->number = number;
1091	result->next = current->all_sets;
1092	current->all_sets = result;
1093
1094	return result;
1095	}
1096
1097
1098	/ Normalize given string inplace* by processing escape sequences*
1099	and quote characters. /*
1100	static void
1101	normalize_line (const char fname, size_t line, iconv_t cd, wchar_t string,
1102	wchar_t quote_char, wchar_t escape_char)
1103	{
1104	int is_quoted;
1105	wchar_t *rp = string;
1106	wchar_t *wp = string;
1107
1108	if (quote_char != L`'\0'` && *rp == quote_char)
1109	{
1110	is_quoted = `1`;
1111	++rp;
1112	}
1113	else
1114	is_quoted = `0`;
1115
1116	while (*rp != L`'\0'`)
1117	if (*rp == quote_char)
1118	/ We simply end the string when we find the first time an*
1119	not-escaped quote character. /*
1120	break;
1121	else if (*rp == escape_char)
1122	{
1123	++rp;
1124	if (quote_char != L`'\0'` && *rp == quote_char)
1125	/ This is an extension to XPG. /
1126	wp++ = rp++;
1127	else
1128	/ Recognize escape sequences. /
1129	switch (*rp)
1130	{
1131	case L`'n'`:
1132	*wp++ = L`'\n'`;
1133	++rp;
1134	break;
1135	case L`'t'`:
1136	*wp++ = L`'\t'`;
1137	++rp;
1138	break;
1139	case L`'v'`:
1140	*wp++ = L`'\v'`;
1141	++rp;
1142	break;
1143	case L`'b'`:
1144	*wp++ = L`'\b'`;
1145	++rp;
1146	break;
1147	case L`'r'`:
1148	*wp++ = L`'\r'`;
1149	++rp;
1150	break;
1151	case L`'f'`:
1152	*wp++ = L`'\f'`;
1153	++rp;
1154	break;
1155	case L`'0'` ... L`'7'`:
1156	{
1157	int number;
1158	char cbuf[`2`];
1159	char *cbufptr;
1160	size_t cbufin;
1161	wchar_t wcbuf[`2`];
1162	char *wcbufptr;
1163	size_t wcbufin;
1164
1165	number = *rp++ - L`'0'`;
1166	while (number <= (`255` / `8`) && rp >= L`'0'` && rp <= L`'7'`)
1167	{
1168	number *= `8`;
1169	number += *rp++ - L`'0'`;
1170	}
1171
1172	cbuf[`0`] = (char) number;
1173	cbuf[`1`] = `'\0'`;
1174	cbufptr = cbuf;
1175	cbufin = `2`;
1176
1177	wcbufptr = (char *) wcbuf;
1178	wcbufin = sizeof (wcbuf);
1179
1180	/ Flush the state. /
1181	iconv (cd: cd, NULL, NULL, NULL, NULL);
1182
1183	iconv (cd: cd, inbuf: &cbufptr, inbytesleft: &cbufin, outbuf: &wcbufptr, outbytesleft: &wcbufin);
1184	if (cbufptr != &cbuf[`2`] \|\| (wchar_t *) wcbufptr != &wcbuf[`2`])
1185	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: line,
1186	gettext ("invalid escape sequence"));
1187	else
1188	*wp++ = wcbuf[`0`];
1189	}
1190	break;
1191	default:
1192	if (*rp == escape_char)
1193	{
1194	*wp++ = escape_char;
1195	++rp;
1196	}
1197	else
1198	{
1199	/ Simply ignore the backslash character. /
1200	}
1201	break;
1202	}
1203	}
1204	else
1205	wp++ = rp++;
1206
1207	/ If we saw a quote character at the beginning we expect another*
1208	one at the end. /*
1209	if (is_quoted && *rp != quote_char)
1210	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: line, gettext ("unterminated message"));
1211
1212	/ Terminate string. /
1213	*wp = L`'\0'`;
1214	return;
1215	}
1216
1217
1218	static void
1219	read_old (struct catalog catalog, const* char *file_name)
1220	{
1221	struct catalog_info old_cat_obj;
1222	struct set_list *set = NULL;
1223	int last_set = -`1`;
1224	size_t cnt;
1225
1226	/ Try to open catalog, but don't look through the NLSPATH. /
1227	if (__open_catalog (cat_name: file_name, NULL, NULL, catalog: &old_cat_obj) != `0`)
1228	{
1229	if (errno == ENOENT)
1230	/ No problem, the catalog simply does not exist. /
1231	return;
1232	else
1233	error (EXIT_FAILURE, errno,
1234	gettext ("while opening old catalog file"));
1235	}
1236
1237	/ OK, we have the catalog loaded. Now read all messages and merge*
1238	them. When set and message number clash for any message the new
1239	one is used. If the new one is empty it indicates that the
1240	message should be deleted. /*
1241	for (cnt = `0`; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
1242	{
1243	struct message_list message, last;
1244
1245	if (old_cat_obj.name_ptr[cnt * `3` + `0`] == `0`)
1246	/ No message in this slot. /
1247	continue;
1248
1249	if (old_cat_obj.name_ptr[cnt * `3` + `0`] - `1` != (uint32_t) last_set)
1250	{
1251	last_set = old_cat_obj.name_ptr[cnt * `3` + `0`] - `1`;
1252	set = find_set (current: catalog, number: old_cat_obj.name_ptr[cnt * `3` + `0`] - `1`);
1253	}
1254
1255	last = NULL;
1256	message = set->messages;
1257	while (message != NULL)
1258	{
1259	if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * `3` + `1`])
1260	break;
1261	last = message;
1262	message = message->next;
1263	}
1264
1265	if (message == NULL
1266	\|\| (uint32_t) message->number > old_cat_obj.name_ptr[cnt * `3` + `1`])
1267	{
1268	/ We have found a message which is not yet in the catalog.*
1269	Insert it at the right position. /*
1270	struct message_list *newp;
1271
1272	newp = (struct message_list ) xmalloc (n: sizeof* (*newp));
1273	newp->number = old_cat_obj.name_ptr[cnt * `3` + `1`];
1274	newp->message =
1275	&old_cat_obj.strings[old_cat_obj.name_ptr[cnt * `3` + `2`]];
1276	newp->fname = NULL;
1277	newp->line = `0`;
1278	newp->symbol = NULL;
1279	newp->next = message;
1280
1281	if (last == NULL)
1282	set->messages = newp;
1283	else
1284	last->next = newp;
1285
1286	++catalog->total_messages;
1287	}
1288	else if (*message->message == `'\0'`)
1289	{
1290	/ The new empty message has overridden the old one thus*
1291	"deleting" it as required. Now remove the empty remains. /*
1292	if (last == NULL)
1293	set->messages = message->next;
1294	else
1295	last->next = message->next;
1296	}
1297	}
1298	}
1299
1300
1301	static int
1302	open_conversion (const char codeset, iconv_t cd_towcp, iconv_t *cd_tombp,
1303	wchar_t *escape_charp)
1304	{
1305	char buf[`2`];
1306	char *bufptr;
1307	size_t bufsize;
1308	wchar_t wbuf[`2`];
1309	char *wbufptr;
1310	size_t wbufsize;
1311
1312	/ If the input file does not specify the codeset use the locale's. /
1313	if (codeset == NULL)
1314	{
1315	setlocale (LC_ALL, locale: "");
1316	codeset = nl_langinfo (CODESET);
1317	setlocale (LC_ALL, locale: "C");
1318	}
1319
1320	/ Get the conversion modules. /
1321	*cd_towcp = iconv_open (tocode: "WCHAR_T", fromcode: codeset);
1322	*cd_tombp = iconv_open (tocode: codeset, fromcode: "WCHAR_T");
1323	if (cd_towcp == (iconv_t) -`1` \|\| cd_tombp == (iconv_t) -`1`)
1324	{
1325	error (status: `0`, errnum: `0`, gettext ("conversion modules not available"));
1326	if (*cd_towcp != (iconv_t) -`1`)
1327	iconv_close (cd: *cd_towcp);
1328
1329	return `1`;
1330	}
1331
1332	/ One special case for historical reasons is the backslash*
1333	character. In some codesets the byte value 0x5c is not mapped to
1334	U005c in Unicode. These charsets then don't have a backslash
1335	character at all. Therefore we have to live with whatever the
1336	codeset provides and recognize, instead of the U005c, the character
1337	the byte value 0x5c is mapped to. /*
1338	buf[`0`] = `'\\'`;
1339	buf[`1`] = `'\0'`;
1340	bufptr = buf;
1341	bufsize = `2`;
1342
1343	wbufptr = (char *) wbuf;
1344	wbufsize = sizeof (wbuf);
1345
1346	iconv (cd: *cd_towcp, inbuf: &bufptr, inbytesleft: &bufsize, outbuf: &wbufptr, outbytesleft: &wbufsize);
1347	if (bufsize != `0` \|\| wbufsize != `0`)
1348	{
1349	/ Something went wrong, we couldn't convert the byte 0x5c. Go*
1350	on with using U005c. /*
1351	error (status: `0`, errnum: `0`, gettext ("cannot determine escape character"));
1352	*escape_charp = L`'\\'`;
1353	}
1354	else
1355	*escape_charp = wbuf[`0`];
1356
1357	return `0`;
1358	}
1359

source code of glibc/catgets/gencat.c