1/* Copyright (C) 1995-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published
6 by the Free Software Foundation; version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, see <https://www.gnu.org/licenses/>. */
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <argp.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <libintl.h>
25#include <locale.h>
26#include <stdbool.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31#include <error.h>
32#include <sys/mman.h>
33#include <sys/stat.h>
34#include <ctype.h>
35
36#include "localedef.h"
37#include "charmap.h"
38#include "locfile.h"
39
40/* Undefine the following line in the production version. */
41/* #define NDEBUG 1 */
42#include <assert.h>
43
44
45/* List of copied locales. */
46struct copy_def_list_t *copy_list;
47
48/* If this is defined be POSIX conform. */
49int posix_conformance;
50
51/* If not zero force output even if warning were issued. */
52static int force_output;
53
54/* Prefix for output files. */
55const char *output_prefix;
56
57/* Name of the character map file. */
58static const char *charmap_file;
59
60/* Name of the locale definition file. */
61static const char *input_file;
62
63/* Name of the repertoire map file. */
64const char *repertoire_global;
65
66/* Name of the locale.alias file. */
67const char *alias_file;
68
69/* List of all locales. */
70static struct localedef_t *locales;
71
72/* If true don't add locale data to archive. */
73bool no_archive;
74
75/* If true add named locales to archive. */
76static bool add_to_archive;
77
78/* If true delete named locales from archive. */
79static bool delete_from_archive;
80
81/* If true replace archive content when adding. */
82static bool replace_archive;
83
84/* If true list archive content. */
85static bool list_archive;
86
87/* If true create hard links to other locales (default). */
88bool hard_links = true;
89
90/* Maximum number of retries when opening the locale archive. */
91int max_locarchive_open_retry = 10;
92
93
94/* Name and version of program. */
95static void print_version (FILE *stream, struct argp_state *state);
96void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
97
98#define OPT_POSIX 301
99#define OPT_QUIET 302
100#define OPT_PREFIX 304
101#define OPT_NO_ARCHIVE 305
102#define OPT_ADD_TO_ARCHIVE 306
103#define OPT_REPLACE 307
104#define OPT_DELETE_FROM_ARCHIVE 308
105#define OPT_LIST_ARCHIVE 309
106#define OPT_LITTLE_ENDIAN 400
107#define OPT_BIG_ENDIAN 401
108#define OPT_NO_WARN 402
109#define OPT_WARN 403
110#define OPT_NO_HARD_LINKS 404
111
112/* Definitions of arguments for argp functions. */
113static const struct argp_option options[] =
114{
115 { NULL, 0, NULL, 0, N_("Input Files:") },
116 { "charmap", 'f', N_("FILE"), 0,
117 N_("Symbolic character names defined in FILE") },
118 { "inputfile", 'i', N_("FILE"), 0,
119 N_("Source definitions are found in FILE") },
120 { "repertoire-map", 'u', N_("FILE"), 0,
121 N_("FILE contains mapping from symbolic names to UCS4 values") },
122
123 { NULL, 0, NULL, 0, N_("Output control:") },
124 { "force", 'c', NULL, 0,
125 N_("Create output even if warning messages were issued") },
126 { "no-hard-links", OPT_NO_HARD_LINKS, NULL, 0,
127 N_("Do not create hard links between installed locales") },
128 { "prefix", OPT_PREFIX, N_("PATH"), 0, N_("Optional output file prefix") },
129 { "posix", OPT_POSIX, NULL, 0, N_("Strictly conform to POSIX") },
130 { "quiet", OPT_QUIET, NULL, 0,
131 N_("Suppress warnings and information messages") },
132 { "verbose", 'v', NULL, 0, N_("Print more messages") },
133 { "no-warnings", OPT_NO_WARN, N_("<warnings>"), 0,
134 N_("Comma-separated list of warnings to disable; "
135 "supported warnings are: ascii, intcurrsym") },
136 { "warnings", OPT_WARN, N_("<warnings>"), 0,
137 N_("Comma-separated list of warnings to enable; "
138 "supported warnings are: ascii, intcurrsym") },
139
140 { NULL, 0, NULL, 0, N_("Archive control:") },
141 { "no-archive", OPT_NO_ARCHIVE, NULL, 0,
142 N_("Don't add new data to archive") },
143 { "add-to-archive", OPT_ADD_TO_ARCHIVE, NULL, 0,
144 N_("Add locales named by parameters to archive") },
145 { "replace", OPT_REPLACE, NULL, 0, N_("Replace existing archive content") },
146 { "delete-from-archive", OPT_DELETE_FROM_ARCHIVE, NULL, 0,
147 N_("Remove locales named by parameters from archive") },
148 { "list-archive", OPT_LIST_ARCHIVE, NULL, 0, N_("List content of archive") },
149 { "alias-file", 'A', N_("FILE"), 0,
150 N_("locale.alias file to consult when making archive")},
151 { "little-endian", OPT_LITTLE_ENDIAN, NULL, 0,
152 N_("Generate little-endian output") },
153 { "big-endian", OPT_BIG_ENDIAN, NULL, 0,
154 N_("Generate big-endian output") },
155 { NULL, 0, NULL, 0, NULL }
156};
157
158/* Short description of program. */
159static const char doc[] = N_("Compile locale specification");
160
161/* Strings for arguments in help texts. */
162static const char args_doc[] = N_("\
163NAME\n\
164[--add-to-archive|--delete-from-archive] FILE...\n\
165--list-archive [FILE]");
166
167/* Prototype for option handler. */
168static error_t parse_opt (int key, char *arg, struct argp_state *state);
169
170/* Function to print some extra text in the help message. */
171static char *more_help (int key, const char *text, void *input);
172
173/* Data structure to communicate with argp functions. */
174static struct argp argp =
175{
176 options, parse_opt, args_doc, doc, NULL, more_help
177};
178
179
180/* Prototypes for local functions. */
181static void error_print (void);
182static char *construct_output_path (char *path);
183static char *normalize_codeset (const char *codeset, size_t name_len);
184
185
186int
187main (int argc, char *argv[])
188{
189 char *output_path;
190 int cannot_write_why;
191 struct charmap_t *charmap;
192 struct localedef_t global;
193 int remaining;
194
195 /* Set initial values for global variables. */
196 copy_list = NULL;
197 posix_conformance = getenv (name: "POSIXLY_CORRECT") != NULL;
198 error_print_progname = error_print;
199
200 /* Set locale. Do not set LC_ALL because the other categories must
201 not be affected (according to POSIX.2). */
202 setlocale (LC_MESSAGES, locale: "");
203 setlocale (LC_CTYPE, locale: "");
204
205 /* Initialize the message catalog. */
206 textdomain (domainname: _libc_intl_domainname);
207
208 /* Parse and process arguments. */
209 argp_err_exit_status = 4;
210 argp_parse (argp: &argp, argc: argc, argv: argv, flags: 0, arg_index: &remaining, NULL);
211
212 /* Handle a few special cases. */
213 if (list_archive)
214 show_archive_content (fname: remaining > 1 ? argv[remaining] : NULL, verbose);
215 if (add_to_archive)
216 return add_locales_to_archive (nlist: argc - remaining, list: &argv[remaining],
217 replace: replace_archive);
218 if (delete_from_archive)
219 return delete_locales_from_archive (nlist: argc - remaining, list: &argv[remaining]);
220
221 /* POSIX.2 requires to be verbose about missing characters in the
222 character map. */
223 verbose |= posix_conformance;
224
225 if (argc - remaining != 1)
226 {
227 /* We need exactly one non-option parameter. */
228 argp_help (argp: &argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
229 name: program_invocation_short_name);
230 exit (status: 4);
231 }
232
233 /* The parameter describes the output path of the constructed files.
234 If the described files cannot be written return a NULL pointer.
235 We don't free output_path because we will exit. */
236 output_path = construct_output_path (path: argv[remaining]);
237 if (output_path == NULL && ! no_archive)
238 error (status: 4, errno, _("cannot create directory for output files"));
239 cannot_write_why = errno;
240
241 /* Now that the parameters are processed we have to reset the local
242 ctype locale. (P1003.2 4.35.5.2) */
243 setlocale (LC_CTYPE, locale: "POSIX");
244
245 /* Look whether the system really allows locale definitions. POSIX
246 defines error code 3 for this situation so I think it must be
247 a fatal error (see P1003.2 4.35.8). */
248 if (sysconf (_SC_2_LOCALEDEF) < 0)
249 record_error (status: 3, errnum: 0, _("\
250FATAL: system does not define `_POSIX2_LOCALEDEF'"));
251
252 /* Process charmap file. */
253 charmap = charmap_read (filename: charmap_file, verbose, error_not_found: 1, be_quiet, use_default: 1);
254
255 /* Add the first entry in the locale list. */
256 memset (s: &global, c: '\0', n: sizeof (struct localedef_t));
257 global.name = input_file ?: "/dev/stdin";
258 global.needed = ALL_LOCALES;
259 locales = &global;
260
261 /* Now read the locale file. */
262 if (locfile_read (result: &global, charmap) != 0)
263 record_error (status: 4, errno, _("\
264cannot open locale definition file `%s'"), input_file);
265
266 /* Perhaps we saw some `copy' instructions. */
267 while (1)
268 {
269 struct localedef_t *runp = locales;
270
271 while (runp != NULL && (runp->needed & runp->avail) == runp->needed)
272 runp = runp->next;
273
274 if (runp == NULL)
275 /* Everything read. */
276 break;
277
278 if (locfile_read (result: runp, charmap) != 0)
279 record_error (status: 4, errno, _("\
280cannot open locale definition file `%s'"), runp->name);
281 }
282
283 /* Check the categories we processed in source form. */
284 check_all_categories (definitions: locales, charmap);
285
286 /* What we do next depends on the number of errors and warnings we
287 have generated in processing the input files.
288
289 * No errors: Write the output file.
290
291 * Some warnings: Write the output file and exit with status 1 to
292 indicate there may be problems using the output file e.g. missing
293 data that makes it difficult to use
294
295 * Errors: We don't write the output file and we exit with status 4
296 to indicate no output files were written.
297
298 The use of -c|--force writes the output file even if errors were
299 seen. */
300 if (recorded_error_count == 0 || force_output != 0)
301 {
302 if (cannot_write_why != 0)
303 record_error (status: 4, errnum: cannot_write_why, _("\
304cannot write output files to `%s'"), output_path ? : argv[remaining]);
305 else
306 write_all_categories (definitions: locales, charmap, locname: argv[remaining], output_path);
307 }
308 else
309 record_error (status: 4, errnum: 0, _("\
310no output file produced because errors were issued"));
311
312 /* This exit status is prescribed by POSIX.2 4.35.7. */
313 exit (status: recorded_warning_count != 0);
314}
315
316/* Search warnings for matching warnings and if found enable those
317 warnings if ENABLED is true, otherwise disable the warnings. */
318static void
319set_warnings (char *warnings, bool enabled)
320{
321 char *tok = warnings;
322 char *copy = (char *) malloc (size: strlen (s: warnings) + 1);
323 char *save = copy;
324
325 /* As we make a copy of the warnings list we remove all spaces from
326 the warnings list to make the processing a more robust. We don't
327 support spaces in a warning name. */
328 do
329 {
330 while (isspace (*tok) != 0)
331 tok++;
332 }
333 while ((*save++ = *tok++) != '\0');
334
335 warnings = copy;
336
337 /* Tokenize the input list of warnings to set, compare them to
338 known warnings, and set the warning. We purposely ignore unknown
339 warnings, and are thus forward compatible, users can attempt to
340 disable whaterver new warnings they know about, but we will only
341 disable those *we* known about. */
342 while ((tok = strtok_r (s: warnings, delim: ",", save_ptr: &save)) != NULL)
343 {
344 warnings = NULL;
345 if (strcmp (s1: tok, s2: "ascii") == 0)
346 warn_ascii = enabled;
347 else if (strcmp (s1: tok, s2: "intcurrsym") == 0)
348 warn_int_curr_symbol = enabled;
349 }
350
351 free (ptr: copy);
352}
353
354/* Handle program arguments. */
355static error_t
356parse_opt (int key, char *arg, struct argp_state *state)
357{
358 switch (key)
359 {
360 case OPT_QUIET:
361 be_quiet = 1;
362 break;
363 case OPT_POSIX:
364 posix_conformance = 1;
365 break;
366 case OPT_PREFIX:
367 output_prefix = arg;
368 break;
369 case OPT_NO_ARCHIVE:
370 no_archive = true;
371 break;
372 case OPT_ADD_TO_ARCHIVE:
373 add_to_archive = true;
374 break;
375 case OPT_REPLACE:
376 replace_archive = true;
377 break;
378 case OPT_DELETE_FROM_ARCHIVE:
379 delete_from_archive = true;
380 break;
381 case OPT_LIST_ARCHIVE:
382 list_archive = true;
383 break;
384 case OPT_LITTLE_ENDIAN:
385 set_big_endian (false);
386 break;
387 case OPT_BIG_ENDIAN:
388 set_big_endian (true);
389 break;
390 case OPT_NO_WARN:
391 /* Disable the warnings. */
392 set_warnings (warnings: arg, false);
393 break;
394 case OPT_WARN:
395 /* Enable the warnings. */
396 set_warnings (warnings: arg, true);
397 break;
398 case OPT_NO_HARD_LINKS:
399 /* Do not hard link to other locales. */
400 hard_links = false;
401 break;
402 case 'c':
403 force_output = 1;
404 break;
405 case 'f':
406 charmap_file = arg;
407 break;
408 case 'A':
409 alias_file = arg;
410 break;
411 case 'i':
412 input_file = arg;
413 break;
414 case 'u':
415 repertoire_global = arg;
416 break;
417 case 'v':
418 verbose = 1;
419 break;
420 default:
421 return ARGP_ERR_UNKNOWN;
422 }
423 return 0;
424}
425
426
427static char *
428more_help (int key, const char *text, void *input)
429{
430 char *cp;
431 char *tp;
432
433 switch (key)
434 {
435 case ARGP_KEY_HELP_EXTRA:
436 /* We print some extra information. */
437 tp = xasprintf (gettext ("\
438For bug reporting instructions, please see:\n\
439%s.\n"), REPORT_BUGS_TO);
440 cp = xasprintf (gettext ("\
441System's directory for character maps : %s\n\
442 repertoire maps: %s\n\
443 locale path : %s\n\
444%s"),
445 CHARMAP_PATH, REPERTOIREMAP_PATH, LOCALE_PATH, tp);
446 free (ptr: tp);
447 return cp;
448 default:
449 break;
450 }
451 return (char *) text;
452}
453
454/* Print the version information. */
455static void
456print_version (FILE *stream, struct argp_state *state)
457{
458 fprintf (stream: stream, format: "localedef %s%s\n", PKGVERSION, VERSION);
459 fprintf (stream: stream, gettext ("\
460Copyright (C) %s Free Software Foundation, Inc.\n\
461This is free software; see the source for copying conditions. There is NO\n\
462warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
463"), "2022");
464 fprintf (stream: stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
465}
466
467
468/* The address of this function will be assigned to the hook in the error
469 functions. */
470static void
471error_print (void)
472{
473}
474
475
476/* The parameter to localedef describes the output path. If it does contain a
477 '/' character it is a relative path. Otherwise it names the locale this
478 definition is for. The returned path must be freed by the caller. */
479static char *
480construct_output_path (char *path)
481{
482 char *result;
483
484 if (strchr (s: path, c: '/') == NULL)
485 {
486 /* This is a system path. First examine whether the locale name
487 contains a reference to the codeset. This should be
488 normalized. */
489 char *startp;
490 char *endp = NULL;
491 char *normal = NULL;
492
493 startp = path;
494 /* Either we have a '@' which starts a CEN name or '.' which starts the
495 codeset specification. The CEN name starts with '@' and may also have
496 a codeset specification, but we do not normalize the string after '@'.
497 If we only find the codeset specification then we normalize only the codeset
498 specification (but not anything after a subsequent '@'). */
499 while (*startp != '\0' && *startp != '@' && *startp != '.')
500 ++startp;
501 if (*startp == '.')
502 {
503 /* We found a codeset specification. Now find the end. */
504 endp = ++startp;
505
506 /* Stop at the first '@', and don't normalize anything past that. */
507 while (*endp != '\0' && *endp != '@')
508 ++endp;
509
510 if (endp > startp)
511 normal = normalize_codeset (codeset: startp, name_len: endp - startp);
512 }
513
514 if (normal == NULL)
515 result = xasprintf (format: "%s%s/%s/", output_prefix ?: "",
516 COMPLOCALEDIR, path);
517 else
518 result = xasprintf (format: "%s%s/%.*s%s%s/",
519 output_prefix ?: "", COMPLOCALEDIR,
520 (int) (startp - path), path, normal, endp ?: "");
521 /* Free the allocated normalized codeset name. */
522 free (ptr: normal);
523 }
524 else
525 {
526 /* This is a user path. */
527 result = xasprintf (format: "%s/", path);
528
529 /* If the user specified an output path we cannot add the output
530 to the archive. */
531 no_archive = true;
532 }
533
534 errno = 0;
535
536 if (no_archive && euidaccess (name: result, W_OK) == -1)
537 {
538 /* Perhaps the directory does not exist now. Try to create it. */
539 if (errno == ENOENT)
540 {
541 errno = 0;
542 if (mkdir (path: result, mode: 0777) < 0)
543 {
544 record_verbose (stderr,
545 _("cannot create output path \'%s\': %s"),
546 result, strerror (errno));
547 free (ptr: result);
548 return NULL;
549 }
550 }
551 else
552 record_verbose (stderr,
553 _("no write permission to output path \'%s\': %s"),
554 result, strerror (errno));
555 }
556
557 return result;
558}
559
560
561/* Normalize codeset name. There is no standard for the codeset names.
562 Normalization allows the user to use any of the common names e.g. UTF-8,
563 utf-8, utf8, UTF8 etc.
564
565 We normalize using the following rules:
566 - Remove all non-alpha-numeric characters
567 - Lowercase all characters.
568 - If there are only digits assume it's an ISO standard and prefix with 'iso'
569
570 We return the normalized string which needs to be freed by free. */
571static char *
572normalize_codeset (const char *codeset, size_t name_len)
573{
574 int len = 0;
575 int only_digit = 1;
576 char *retval;
577 char *wp;
578 size_t cnt;
579
580 /* Compute the length of only the alpha-numeric characters. */
581 for (cnt = 0; cnt < name_len; ++cnt)
582 if (isalnum (codeset[cnt]))
583 {
584 ++len;
585
586 if (isalpha (codeset[cnt]))
587 only_digit = 0;
588 }
589
590 /* If there were only digits we assume it's an ISO standard and we will
591 prefix with 'iso' so include space for that. We fill in the required
592 space from codeset up to the converted length. */
593 wp = retval = xasprintf (format: "%s%.*s", only_digit ? "iso" : "", len, codeset);
594
595 /* Skip "iso". */
596 if (only_digit)
597 wp += 3;
598
599 /* Lowercase all characters. */
600 for (cnt = 0; cnt < name_len; ++cnt)
601 if (isalpha (codeset[cnt]))
602 *wp++ = tolower (codeset[cnt]);
603 else if (isdigit (codeset[cnt]))
604 *wp++ = codeset[cnt];
605
606 /* Return allocated and converted name for caller to free. */
607 return retval;
608}
609
610
611struct localedef_t *
612add_to_readlist (int category, const char *name, const char *repertoire_name,
613 int generate, struct localedef_t *copy_locale)
614{
615 struct localedef_t *runp = locales;
616
617 while (runp != NULL && strcmp (s1: name, s2: runp->name) != 0)
618 runp = runp->next;
619
620 if (runp == NULL)
621 {
622 /* Add a new entry at the end. */
623 struct localedef_t *newp;
624
625 assert (generate == 1);
626
627 newp = xcalloc (n: 1, s: sizeof (struct localedef_t));
628 newp->name = name;
629 newp->repertoire_name = repertoire_name;
630
631 if (locales == NULL)
632 runp = locales = newp;
633 else
634 {
635 runp = locales;
636 while (runp->next != NULL)
637 runp = runp->next;
638 runp = runp->next = newp;
639 }
640 }
641
642 if (generate
643 && (runp->needed & (1 << category)) != 0
644 && (runp->avail & (1 << category)) == 0)
645 record_error (status: 5, errnum: 0, _("\
646circular dependencies between locale definitions"));
647
648 if (copy_locale != NULL)
649 {
650 if (runp->categories[category].generic != NULL)
651 record_error (status: 5, errnum: 0, _("\
652cannot add already read locale `%s' a second time"), name);
653 else
654 runp->categories[category].generic =
655 copy_locale->categories[category].generic;
656 }
657
658 runp->needed |= 1 << category;
659
660 return runp;
661}
662
663
664struct localedef_t *
665find_locale (int category, const char *name, const char *repertoire_name,
666 const struct charmap_t *charmap)
667{
668 struct localedef_t *result;
669
670 /* Find the locale, but do not generate it since this would be a bug. */
671 result = add_to_readlist (category, name, repertoire_name, generate: 0, NULL);
672
673 assert (result != NULL);
674
675 if ((result->avail & (1 << category)) == 0
676 && locfile_read (result, charmap) != 0)
677 record_error (status: 4, errno, _("\
678cannot open locale definition file `%s'"), result->name);
679
680 return result;
681}
682
683
684struct localedef_t *
685load_locale (int category, const char *name, const char *repertoire_name,
686 const struct charmap_t *charmap, struct localedef_t *copy_locale)
687{
688 struct localedef_t *result;
689
690 /* Generate the locale if it does not exist. */
691 result = add_to_readlist (category, name, repertoire_name, generate: 1, copy_locale);
692
693 assert (result != NULL);
694
695 if ((result->avail & (1 << category)) == 0
696 && locfile_read (result, charmap) != 0)
697 record_error (status: 4, errno, _("\
698cannot open locale definition file `%s'"), result->name);
699
700 return result;
701}
702

source code of glibc/locale/programs/localedef.c