1/* Copyright (C) 1995-2019 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <alloca.h>
23#include <byteswap.h>
24#include <endian.h>
25#include <errno.h>
26#include <limits.h>
27#include <obstack.h>
28#include <stdlib.h>
29#include <string.h>
30#include <wchar.h>
31#include <wctype.h>
32#include <stdint.h>
33#include <sys/uio.h>
34
35#include "localedef.h"
36#include "charmap.h"
37#include "localeinfo.h"
38#include "langinfo.h"
39#include "linereader.h"
40#include "locfile-token.h"
41#include "locfile.h"
42
43#include <assert.h>
44
45
46/* The bit used for representing a special class. */
47#define BITPOS(class) ((class) - tok_upper)
48#define BIT(class) (_ISbit (BITPOS (class)))
49#define BITw(class) (_ISwbit (BITPOS (class)))
50
51#define ELEM(ctype, collection, idx, value) \
52 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53 &ctype->collection##_act idx, value)
54
55
56/* To be compatible with former implementations we for now restrict
57 the number of bits for character classes to 16. When compatibility
58 is not necessary anymore increase the number to 32. */
59#define char_class_t uint16_t
60#define char_class32_t uint32_t
61
62
63/* Type to describe a transliteration action. We have a possibly
64 multiple character from-string and a set of multiple character
65 to-strings. All are 32bit values since this is what is used in
66 the gconv functions. */
67struct translit_to_t
68{
69 uint32_t *str;
70
71 struct translit_to_t *next;
72};
73
74struct translit_t
75{
76 uint32_t *from;
77
78 const char *fname;
79 size_t lineno;
80
81 struct translit_to_t *to;
82
83 struct translit_t *next;
84};
85
86struct translit_ignore_t
87{
88 uint32_t from;
89 uint32_t to;
90 uint32_t step;
91
92 const char *fname;
93 size_t lineno;
94
95 struct translit_ignore_t *next;
96};
97
98
99/* Type to describe a transliteration include statement. */
100struct translit_include_t
101{
102 const char *copy_locale;
103 const char *copy_repertoire;
104
105 struct translit_include_t *next;
106};
107
108/* Provide some dummy pointer for empty string. */
109static uint32_t no_str[] = { 0 };
110
111
112/* Sparse table of uint32_t. */
113#define TABLE idx_table
114#define ELEMENT uint32_t
115#define DEFAULT ((uint32_t) ~0)
116#define NO_ADD_LOCALE
117#include "3level.h"
118
119#define TABLE wcwidth_table
120#define ELEMENT uint8_t
121#define DEFAULT 0xff
122#include "3level.h"
123
124#define TABLE wctrans_table
125#define ELEMENT int32_t
126#define DEFAULT 0
127#define wctrans_table_add wctrans_table_add_internal
128#include "3level.h"
129#undef wctrans_table_add
130/* The wctrans_table must actually store the difference between the
131 desired result and the argument. */
132static inline void
133wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
134{
135 wctrans_table_add_internal (t, wc, mapped_wc - wc);
136}
137
138/* Construction of sparse 3-level tables.
139 See wchar-lookup.h for their structure and the meaning of p and q. */
140
141struct wctype_table
142{
143 /* Parameters. */
144 unsigned int p;
145 unsigned int q;
146 /* Working representation. */
147 size_t level1_alloc;
148 size_t level1_size;
149 uint32_t *level1;
150 size_t level2_alloc;
151 size_t level2_size;
152 uint32_t *level2;
153 size_t level3_alloc;
154 size_t level3_size;
155 uint32_t *level3;
156 size_t result_size;
157};
158
159static void add_locale_wctype_table (struct locale_file *file,
160 struct wctype_table *t);
161
162/* The real definition of the struct for the LC_CTYPE locale. */
163struct locale_ctype_t
164{
165 uint32_t *charnames;
166 size_t charnames_max;
167 size_t charnames_act;
168 /* An index lookup table, to speedup find_idx. */
169 struct idx_table charnames_idx;
170
171 struct repertoire_t *repertoire;
172
173 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
174#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
175 size_t nr_charclass;
176 const char *classnames[MAX_NR_CHARCLASS];
177 uint32_t last_class_char;
178 uint32_t class256_collection[256];
179 uint32_t *class_collection;
180 size_t class_collection_max;
181 size_t class_collection_act;
182 uint32_t class_done;
183 uint32_t class_offset;
184
185 struct charseq **mbdigits;
186 size_t mbdigits_act;
187 size_t mbdigits_max;
188 uint32_t *wcdigits;
189 size_t wcdigits_act;
190 size_t wcdigits_max;
191
192 struct charseq *mboutdigits[10];
193 uint32_t wcoutdigits[10];
194 size_t outdigits_act;
195
196 /* If the following number ever turns out to be too small simply
197 increase it. But I doubt it will. --drepper@gnu */
198#define MAX_NR_CHARMAP 16
199 const char *mapnames[MAX_NR_CHARMAP];
200 uint32_t *map_collection[MAX_NR_CHARMAP];
201 uint32_t map256_collection[2][256];
202 size_t map_collection_max[MAX_NR_CHARMAP];
203 size_t map_collection_act[MAX_NR_CHARMAP];
204 size_t map_collection_nr;
205 size_t last_map_idx;
206 int tomap_done[MAX_NR_CHARMAP];
207 uint32_t map_offset;
208
209 /* Transliteration information. */
210 struct translit_include_t *translit_include;
211 struct translit_t *translit;
212 struct translit_ignore_t *translit_ignore;
213 uint32_t ntranslit_ignore;
214
215 uint32_t *default_missing;
216 const char *default_missing_file;
217 size_t default_missing_lineno;
218
219 uint32_t to_nonascii;
220 uint32_t nonascii_case;
221
222 /* The arrays for the binary representation. */
223 char_class_t *ctype_b;
224 char_class32_t *ctype32_b;
225 uint32_t **map_b;
226 uint32_t **map32_b;
227 uint32_t **class_b;
228 struct wctype_table *class_3level;
229 struct wctrans_table *map_3level;
230 uint32_t *class_name_ptr;
231 uint32_t *map_name_ptr;
232 struct wcwidth_table width;
233 uint32_t mb_cur_max;
234 const char *codeset_name;
235 uint32_t *translit_from_idx;
236 uint32_t *translit_from_tbl;
237 uint32_t *translit_to_idx;
238 uint32_t *translit_to_tbl;
239 uint32_t translit_idx_size;
240 size_t translit_from_tbl_size;
241 size_t translit_to_tbl_size;
242
243 struct obstack mempool;
244};
245
246
247/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
248 whether 'int' is 16 bit, 32 bit, or 64 bit. */
249#define EMPTY ((uint32_t) ~0)
250
251
252#define obstack_chunk_alloc xmalloc
253#define obstack_chunk_free free
254
255
256/* Prototypes for local functions. */
257static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
258 const struct charmap_t *charmap,
259 struct localedef_t *copy_locale,
260 int ignore_content);
261static void ctype_class_new (struct linereader *lr,
262 struct locale_ctype_t *ctype, const char *name);
263static void ctype_map_new (struct linereader *lr,
264 struct locale_ctype_t *ctype,
265 const char *name, const struct charmap_t *charmap);
266static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
267 size_t *max, size_t *act, uint32_t idx);
268static void set_class_defaults (struct locale_ctype_t *ctype,
269 const struct charmap_t *charmap,
270 struct repertoire_t *repertoire);
271static void allocate_arrays (struct locale_ctype_t *ctype,
272 const struct charmap_t *charmap,
273 struct repertoire_t *repertoire);
274
275
276static const char *longnames[] =
277{
278 "zero", "one", "two", "three", "four",
279 "five", "six", "seven", "eight", "nine"
280};
281static const char *uninames[] =
282{
283 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
285};
286static const unsigned char digits[] = "0123456789";
287
288
289static void
290ctype_startup (struct linereader *lr, struct localedef_t *locale,
291 const struct charmap_t *charmap,
292 struct localedef_t *copy_locale, int ignore_content)
293{
294 unsigned int cnt;
295 struct locale_ctype_t *ctype;
296
297 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
298 {
299 if (copy_locale == NULL)
300 {
301 /* Allocate the needed room. */
302 locale->categories[LC_CTYPE].ctype = ctype =
303 (struct locale_ctype_t *) xcalloc (1,
304 sizeof (struct locale_ctype_t));
305
306 /* We have seen no names yet. */
307 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
308 ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
309 * sizeof (uint32_t));
310 for (cnt = 0; cnt < 256; ++cnt)
311 ctype->charnames[cnt] = cnt;
312 ctype->charnames_act = 256;
313 idx_table_init (&ctype->charnames_idx);
314
315 /* Fill character class information. */
316 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
317 /* The order of the following instructions determines the bit
318 positions! */
319 ctype_class_new (lr, ctype, "upper");
320 ctype_class_new (lr, ctype, "lower");
321 ctype_class_new (lr, ctype, "alpha");
322 ctype_class_new (lr, ctype, "digit");
323 ctype_class_new (lr, ctype, "xdigit");
324 ctype_class_new (lr, ctype, "space");
325 ctype_class_new (lr, ctype, "print");
326 ctype_class_new (lr, ctype, "graph");
327 ctype_class_new (lr, ctype, "blank");
328 ctype_class_new (lr, ctype, "cntrl");
329 ctype_class_new (lr, ctype, "punct");
330 ctype_class_new (lr, ctype, "alnum");
331
332 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
333 ctype->class_collection
334 = (uint32_t *) xcalloc (sizeof (unsigned long int),
335 ctype->class_collection_max);
336 ctype->class_collection_act = 256;
337
338 /* Fill character map information. */
339 ctype->last_map_idx = MAX_NR_CHARMAP;
340 ctype_map_new (lr, ctype, "toupper", charmap);
341 ctype_map_new (lr, ctype, "tolower", charmap);
342
343 /* Fill first 256 entries in `toXXX' arrays. */
344 for (cnt = 0; cnt < 256; ++cnt)
345 {
346 ctype->map_collection[0][cnt] = cnt;
347 ctype->map_collection[1][cnt] = cnt;
348
349 ctype->map256_collection[0][cnt] = cnt;
350 ctype->map256_collection[1][cnt] = cnt;
351 }
352
353 if (enc_not_ascii_compatible)
354 ctype->to_nonascii = 1;
355
356 obstack_init (&ctype->mempool);
357 }
358 else
359 ctype = locale->categories[LC_CTYPE].ctype =
360 copy_locale->categories[LC_CTYPE].ctype;
361 }
362}
363
364
365void
366ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
367{
368 /* See POSIX.2, table 2-6 for the meaning of the following table. */
369#define NCLASS 12
370 static const struct
371 {
372 const char *name;
373 const char allow[NCLASS];
374 }
375 valid_table[NCLASS] =
376 {
377 /* The order is important. See token.h for more information.
378 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
379 { "upper", "--MX-XDDXXX-" },
380 { "lower", "--MX-XDDXXX-" },
381 { "alpha", "---X-XDDXXX-" },
382 { "digit", "XXX--XDDXXX-" },
383 { "xdigit", "-----XDDXXX-" },
384 { "space", "XXXXX------X" },
385 { "print", "---------X--" },
386 { "graph", "---------X--" },
387 { "blank", "XXXXXM-----X" },
388 { "cntrl", "XXXXX-XX--XX" },
389 { "punct", "XXXXX-DD-X-X" },
390 { "alnum", "-----XDDXXX-" }
391 };
392 size_t cnt;
393 int cls1, cls2;
394 uint32_t space_value;
395 struct charseq *space_seq;
396 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
397 int warned;
398 const void *key;
399 size_t len;
400 void *vdata;
401 void *curs;
402
403 /* Now resolve copying and also handle completely missing definitions. */
404 if (ctype == NULL)
405 {
406 const char *repertoire_name;
407
408 /* First see whether we were supposed to copy. If yes, find the
409 actual definition. */
410 if (locale->copy_name[LC_CTYPE] != NULL)
411 {
412 /* Find the copying locale. This has to happen transitively since
413 the locale we are copying from might also copying another one. */
414 struct localedef_t *from = locale;
415
416 do
417 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
418 from->repertoire_name, charmap);
419 while (from->categories[LC_CTYPE].ctype == NULL
420 && from->copy_name[LC_CTYPE] != NULL);
421
422 ctype = locale->categories[LC_CTYPE].ctype
423 = from->categories[LC_CTYPE].ctype;
424 }
425
426 /* If there is still no definition issue an warning and create an
427 empty one. */
428 if (ctype == NULL)
429 {
430 record_warning (_("\
431No definition for %s category found"), "LC_CTYPE");
432 ctype_startup (NULL, locale, charmap, NULL, 0);
433 ctype = locale->categories[LC_CTYPE].ctype;
434 }
435
436 /* Get the repertoire we have to use. */
437 repertoire_name = locale->repertoire_name ?: repertoire_global;
438 if (repertoire_name != NULL)
439 ctype->repertoire = repertoire_read (repertoire_name);
440 }
441
442 /* We need the name of the currently used 8-bit character set to
443 make correct conversion between this 8-bit representation and the
444 ISO 10646 character set used internally for wide characters. */
445 ctype->codeset_name = charmap->code_set_name;
446 if (ctype->codeset_name == NULL)
447 {
448 record_error (0, 0, _("\
449No character set name specified in charmap"));
450 ctype->codeset_name = "//UNKNOWN//";
451 }
452
453 /* Set default value for classes not specified. */
454 set_class_defaults (ctype, charmap, ctype->repertoire);
455
456 /* Check according to table. */
457 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
458 {
459 uint32_t tmp = ctype->class_collection[cnt];
460
461 if (tmp != 0)
462 {
463 for (cls1 = 0; cls1 < NCLASS; ++cls1)
464 if ((tmp & _ISwbit (cls1)) != 0)
465 for (cls2 = 0; cls2 < NCLASS; ++cls2)
466 if (valid_table[cls1].allow[cls2] != '-')
467 {
468 int eq = (tmp & _ISwbit (cls2)) != 0;
469 switch (valid_table[cls1].allow[cls2])
470 {
471 case 'M':
472 if (!eq)
473 {
474 uint32_t value = ctype->charnames[cnt];
475
476 record_error (0, 0, _("\
477character L'\\u%0*x' in class `%s' must be in class `%s'"),
478 value > 0xffff ? 8 : 4,
479 value,
480 valid_table[cls1].name,
481 valid_table[cls2].name);
482 }
483 break;
484
485 case 'X':
486 if (eq)
487 {
488 uint32_t value = ctype->charnames[cnt];
489
490 record_error (0, 0, _("\
491character L'\\u%0*x' in class `%s' must not be in class `%s'"),
492 value > 0xffff ? 8 : 4,
493 value,
494 valid_table[cls1].name,
495 valid_table[cls2].name);
496 }
497 break;
498
499 case 'D':
500 ctype->class_collection[cnt] |= _ISwbit (cls2);
501 break;
502
503 default:
504 record_error (5, 0, _("\
505internal error in %s, line %u"), __FUNCTION__, __LINE__);
506 }
507 }
508 }
509 }
510
511 for (cnt = 0; cnt < 256; ++cnt)
512 {
513 uint32_t tmp = ctype->class256_collection[cnt];
514
515 if (tmp != 0)
516 {
517 for (cls1 = 0; cls1 < NCLASS; ++cls1)
518 if ((tmp & _ISbit (cls1)) != 0)
519 for (cls2 = 0; cls2 < NCLASS; ++cls2)
520 if (valid_table[cls1].allow[cls2] != '-')
521 {
522 int eq = (tmp & _ISbit (cls2)) != 0;
523 switch (valid_table[cls1].allow[cls2])
524 {
525 case 'M':
526 if (!eq)
527 {
528 char buf[17];
529
530 snprintf (buf, sizeof buf, "\\%Zo", cnt);
531
532 record_error (0, 0, _("\
533character '%s' in class `%s' must be in class `%s'"),
534 buf,
535 valid_table[cls1].name,
536 valid_table[cls2].name);
537 }
538 break;
539
540 case 'X':
541 if (eq)
542 {
543 char buf[17];
544
545 snprintf (buf, sizeof buf, "\\%Zo", cnt);
546
547 record_error (0, 0, _("\
548character '%s' in class `%s' must not be in class `%s'"),
549 buf,
550 valid_table[cls1].name,
551 valid_table[cls2].name);
552 }
553 break;
554
555 case 'D':
556 ctype->class256_collection[cnt] |= _ISbit (cls2);
557 break;
558
559 default:
560 record_error (5, 0, _("\
561internal error in %s, line %u"), __FUNCTION__, __LINE__);
562 }
563 }
564 }
565 }
566
567 /* ... and now test <SP> as a special case. */
568 space_value = 32;
569 if (((cnt = BITPOS (tok_space),
570 (ELEM (ctype, class_collection, , space_value)
571 & BITw (tok_space)) == 0)
572 || (cnt = BITPOS (tok_blank),
573 (ELEM (ctype, class_collection, , space_value)
574 & BITw (tok_blank)) == 0)))
575 {
576 record_error (0, 0, _("<SP> character not in class `%s'"),
577 valid_table[cnt].name);
578 }
579 else if (((cnt = BITPOS (tok_punct),
580 (ELEM (ctype, class_collection, , space_value)
581 & BITw (tok_punct)) != 0)
582 || (cnt = BITPOS (tok_graph),
583 (ELEM (ctype, class_collection, , space_value)
584 & BITw (tok_graph))
585 != 0)))
586 {
587 record_error (0, 0, _("\
588<SP> character must not be in class `%s'"),
589 valid_table[cnt].name);
590 }
591 else
592 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
593
594 space_seq = charmap_find_value (charmap, "SP", 2);
595 if (space_seq == NULL)
596 space_seq = charmap_find_value (charmap, "space", 5);
597 if (space_seq == NULL)
598 space_seq = charmap_find_value (charmap, "U00000020", 9);
599 if (space_seq == NULL || space_seq->nbytes != 1)
600 {
601 record_error (0, 0, _("\
602character <SP> not defined in character map"));
603 }
604 else if (((cnt = BITPOS (tok_space),
605 (ctype->class256_collection[space_seq->bytes[0]]
606 & BIT (tok_space)) == 0)
607 || (cnt = BITPOS (tok_blank),
608 (ctype->class256_collection[space_seq->bytes[0]]
609 & BIT (tok_blank)) == 0)))
610 {
611 record_error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table[cnt].name);
613 }
614 else if (((cnt = BITPOS (tok_punct),
615 (ctype->class256_collection[space_seq->bytes[0]]
616 & BIT (tok_punct)) != 0)
617 || (cnt = BITPOS (tok_graph),
618 (ctype->class256_collection[space_seq->bytes[0]]
619 & BIT (tok_graph)) != 0)))
620 {
621 record_error (0, 0, _("\
622<SP> character must not be in class `%s'"),
623 valid_table[cnt].name);
624 }
625 else
626 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
627
628 /* Check whether all single-byte characters make to their upper/lowercase
629 equivalent according to the ASCII rules. */
630 for (cnt = 'A'; cnt <= 'Z'; ++cnt)
631 {
632 uint32_t uppval = ctype->map256_collection[0][cnt];
633 uint32_t lowval = ctype->map256_collection[1][cnt];
634 uint32_t lowuppval = ctype->map256_collection[0][lowval];
635 uint32_t lowlowval = ctype->map256_collection[1][lowval];
636
637 if (uppval != cnt
638 || lowval != cnt + 0x20
639 || lowuppval != cnt
640 || lowlowval != cnt + 0x20)
641 ctype->nonascii_case = 1;
642 }
643 for (cnt = 0; cnt < 256; ++cnt)
644 if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
645 if (ctype->map256_collection[0][cnt] != cnt
646 || ctype->map256_collection[1][cnt] != cnt)
647 ctype->nonascii_case = 1;
648
649 /* Now that the tests are done make sure the name array contains all
650 characters which are handled in the WIDTH section of the
651 character set definition file. */
652 if (charmap->width_rules != NULL)
653 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
654 {
655 unsigned char bytes[charmap->mb_cur_max];
656 int nbytes = charmap->width_rules[cnt].from->nbytes;
657
658 /* We have the range of character for which the width is
659 specified described using byte sequences of the multibyte
660 charset. We have to convert this to UCS4 now. And we
661 cannot simply convert the beginning and the end of the
662 sequence, we have to iterate over the byte sequence and
663 convert it for every single character. */
664 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
665
666 while (nbytes < charmap->width_rules[cnt].to->nbytes
667 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
668 nbytes) <= 0)
669 {
670 /* Find the UCS value for `bytes'. */
671 int inner;
672 uint32_t wch;
673 struct charseq *seq
674 = charmap_find_symbol (charmap, (char *) bytes, nbytes);
675
676 if (seq == NULL)
677 wch = ILLEGAL_CHAR_VALUE;
678 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
679 wch = seq->ucs4;
680 else
681 wch = repertoire_find_value (ctype->repertoire, seq->name,
682 strlen (seq->name));
683
684 if (wch != ILLEGAL_CHAR_VALUE)
685 /* We are only interested in the side-effects of the
686 `find_idx' call. It will add appropriate entries in
687 the name array if this is necessary. */
688 (void) find_idx (ctype, NULL, NULL, NULL, wch);
689
690 /* "Increment" the bytes sequence. */
691 inner = nbytes - 1;
692 while (inner >= 0 && bytes[inner] == 0xff)
693 --inner;
694
695 if (inner < 0)
696 {
697 /* We have to extend the byte sequence. */
698 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
699 break;
700
701 bytes[0] = 1;
702 memset (&bytes[1], 0, nbytes);
703 ++nbytes;
704 }
705 else
706 {
707 ++bytes[inner];
708 while (++inner < nbytes)
709 bytes[inner] = 0;
710 }
711 }
712 }
713
714 /* Now set all the other characters of the character set to the
715 default width. */
716 curs = NULL;
717 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
718 {
719 struct charseq *data = (struct charseq *) vdata;
720
721 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
722 data->ucs4 = repertoire_find_value (ctype->repertoire,
723 data->name, len);
724
725 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
726 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
727 }
728
729 /* There must be a multiple of 10 digits. */
730 if (ctype->mbdigits_act % 10 != 0)
731 {
732 assert (ctype->mbdigits_act == ctype->wcdigits_act);
733 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
734 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
735 record_error (0, 0, _("\
736`digit' category has not entries in groups of ten"));
737 }
738
739 /* Check the input digits. There must be a multiple of ten available.
740 In each group it could be that one or the other character is missing.
741 In this case the whole group must be removed. */
742 cnt = 0;
743 while (cnt < ctype->mbdigits_act)
744 {
745 size_t inner;
746 for (inner = 0; inner < 10; ++inner)
747 if (ctype->mbdigits[cnt + inner] == NULL)
748 break;
749
750 if (inner == 10)
751 cnt += 10;
752 else
753 {
754 /* Remove the group. */
755 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
756 ((ctype->wcdigits_act - cnt - 10)
757 * sizeof (ctype->mbdigits[0])));
758 ctype->mbdigits_act -= 10;
759 }
760 }
761
762 /* If no input digits are given use the default. */
763 if (ctype->mbdigits_act == 0)
764 {
765 if (ctype->mbdigits_max == 0)
766 {
767 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
768 10 * sizeof (struct charseq *));
769 ctype->mbdigits_max = 10;
770 }
771
772 for (cnt = 0; cnt < 10; ++cnt)
773 {
774 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
775 (char *) digits + cnt, 1);
776 if (ctype->mbdigits[cnt] == NULL)
777 {
778 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
779 longnames[cnt],
780 strlen (longnames[cnt]));
781 if (ctype->mbdigits[cnt] == NULL)
782 {
783 /* Hum, this ain't good. */
784 record_error (0, 0, _("\
785no input digits defined and none of the standard names in the charmap"));
786
787 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
788 sizeof (struct charseq) + 1);
789
790 /* This is better than nothing. */
791 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
792 ctype->mbdigits[cnt]->nbytes = 1;
793 }
794 }
795 }
796
797 ctype->mbdigits_act = 10;
798 }
799
800 /* Check the wide character input digits. There must be a multiple
801 of ten available. In each group it could be that one or the other
802 character is missing. In this case the whole group must be
803 removed. */
804 cnt = 0;
805 while (cnt < ctype->wcdigits_act)
806 {
807 size_t inner;
808 for (inner = 0; inner < 10; ++inner)
809 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
810 break;
811
812 if (inner == 10)
813 cnt += 10;
814 else
815 {
816 /* Remove the group. */
817 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
818 ((ctype->wcdigits_act - cnt - 10)
819 * sizeof (ctype->wcdigits[0])));
820 ctype->wcdigits_act -= 10;
821 }
822 }
823
824 /* If no input digits are given use the default. */
825 if (ctype->wcdigits_act == 0)
826 {
827 if (ctype->wcdigits_max == 0)
828 {
829 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
830 10 * sizeof (uint32_t));
831 ctype->wcdigits_max = 10;
832 }
833
834 for (cnt = 0; cnt < 10; ++cnt)
835 ctype->wcdigits[cnt] = L'0' + cnt;
836
837 ctype->mbdigits_act = 10;
838 }
839
840 /* Check the outdigits. */
841 warned = 0;
842 for (cnt = 0; cnt < 10; ++cnt)
843 if (ctype->mboutdigits[cnt] == NULL)
844 {
845 static struct charseq replace[2];
846
847 if (!warned)
848 {
849 record_error (0, 0, _("\
850not all characters used in `outdigit' are available in the charmap"));
851 warned = 1;
852 }
853
854 replace[0].nbytes = 1;
855 replace[0].bytes[0] = '?';
856 replace[0].bytes[1] = '\0';
857 ctype->mboutdigits[cnt] = &replace[0];
858 }
859
860 warned = 0;
861 for (cnt = 0; cnt < 10; ++cnt)
862 if (ctype->wcoutdigits[cnt] == 0)
863 {
864 if (!warned)
865 {
866 record_error (0, 0, _("\
867not all characters used in `outdigit' are available in the repertoire"));
868 warned = 1;
869 }
870
871 ctype->wcoutdigits[cnt] = L'?';
872 }
873
874 /* Sort the entries in the translit_ignore list. */
875 if (ctype->translit_ignore != NULL)
876 {
877 struct translit_ignore_t *firstp = ctype->translit_ignore;
878 struct translit_ignore_t *runp;
879
880 ctype->ntranslit_ignore = 1;
881
882 for (runp = firstp->next; runp != NULL; runp = runp->next)
883 {
884 struct translit_ignore_t *lastp = NULL;
885 struct translit_ignore_t *cmpp;
886
887 ++ctype->ntranslit_ignore;
888
889 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
890 if (runp->from < cmpp->from)
891 break;
892
893 runp->next = lastp;
894 if (lastp == NULL)
895 firstp = runp;
896 }
897
898 ctype->translit_ignore = firstp;
899 }
900}
901
902
903void
904ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
905 const char *output_path)
906{
907 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
908 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
909 + ctype->nr_charclass + ctype->map_collection_nr);
910 struct locale_file file;
911 uint32_t default_missing_len;
912 size_t elem, cnt;
913
914 /* Now prepare the output: Find the sizes of the table we can use. */
915 allocate_arrays (ctype, charmap, ctype->repertoire);
916
917 default_missing_len = (ctype->default_missing
918 ? wcslen ((wchar_t *) ctype->default_missing)
919 : 0);
920
921 init_locale_data (&file, nelems);
922 for (elem = 0; elem < nelems; ++elem)
923 {
924 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
925 switch (elem)
926 {
927#define CTYPE_EMPTY(name) \
928 case name: \
929 add_locale_empty (&file); \
930 break
931
932 CTYPE_EMPTY(_NL_CTYPE_GAP1);
933 CTYPE_EMPTY(_NL_CTYPE_GAP2);
934 CTYPE_EMPTY(_NL_CTYPE_GAP3);
935 CTYPE_EMPTY(_NL_CTYPE_GAP4);
936 CTYPE_EMPTY(_NL_CTYPE_GAP5);
937 CTYPE_EMPTY(_NL_CTYPE_GAP6);
938
939#define CTYPE_RAW_DATA(name, base, size) \
940 case _NL_ITEM_INDEX (name): \
941 add_locale_raw_data (&file, base, size); \
942 break
943
944 CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
945 ctype->ctype_b,
946 (256 + 128) * sizeof (char_class_t));
947
948#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
949 case _NL_ITEM_INDEX (name): \
950 add_locale_uint32_array (&file, base, n_elems); \
951 break
952
953 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
954 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
955 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
956 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
957 CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
958 ctype->ctype32_b,
959 256 * sizeof (char_class32_t));
960
961#define CTYPE_UINT32(name, value) \
962 case _NL_ITEM_INDEX (name): \
963 add_locale_uint32 (&file, value); \
964 break
965
966 CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
967 CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
968 CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
969
970 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
971 ctype->translit_from_idx,
972 ctype->translit_idx_size);
973
974 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
975 ctype->translit_from_tbl,
976 ctype->translit_from_tbl_size
977 / sizeof (uint32_t));
978
979 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
980 ctype->translit_to_idx,
981 ctype->translit_idx_size);
982
983 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
984 ctype->translit_to_tbl,
985 ctype->translit_to_tbl_size / sizeof (uint32_t));
986
987 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
988 /* The class name array. */
989 start_locale_structure (&file);
990 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
991 add_locale_string (&file, ctype->classnames[cnt]);
992 add_locale_char (&file, 0);
993 align_locale_data (&file, LOCFILE_ALIGN);
994 end_locale_structure (&file);
995 break;
996
997 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
998 /* The class name array. */
999 start_locale_structure (&file);
1000 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1001 add_locale_string (&file, ctype->mapnames[cnt]);
1002 add_locale_char (&file, 0);
1003 align_locale_data (&file, LOCFILE_ALIGN);
1004 end_locale_structure (&file);
1005 break;
1006
1007 case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1008 add_locale_wcwidth_table (&file, &ctype->width);
1009 break;
1010
1011 CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1012
1013 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1014 add_locale_string (&file, ctype->codeset_name);
1015 break;
1016
1017 CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1018
1019 CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1020
1021 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1022 add_locale_uint32 (&file, ctype->mbdigits_act / 10);
1023 break;
1024
1025 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1026 add_locale_uint32 (&file, ctype->wcdigits_act / 10);
1027 break;
1028
1029 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1030 start_locale_structure (&file);
1031 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1032 cnt < ctype->mbdigits_act; cnt += 10)
1033 {
1034 add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1035 ctype->mbdigits[cnt]->nbytes);
1036 add_locale_char (&file, 0);
1037 }
1038 end_locale_structure (&file);
1039 break;
1040
1041 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1042 start_locale_structure (&file);
1043 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1044 add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1045 ctype->mboutdigits[cnt]->nbytes);
1046 add_locale_char (&file, 0);
1047 end_locale_structure (&file);
1048 break;
1049
1050 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1051 start_locale_structure (&file);
1052 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1053 cnt < ctype->wcdigits_act; cnt += 10)
1054 add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1055 end_locale_structure (&file);
1056 break;
1057
1058 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1059 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1060 add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1061 break;
1062
1063 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1064 add_locale_uint32 (&file, default_missing_len);
1065 break;
1066
1067 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1068 add_locale_uint32_array (&file, ctype->default_missing,
1069 default_missing_len);
1070 break;
1071
1072 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1073 add_locale_uint32 (&file, ctype->ntranslit_ignore);
1074 break;
1075
1076 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1077 start_locale_structure (&file);
1078 {
1079 struct translit_ignore_t *runp;
1080 for (runp = ctype->translit_ignore; runp != NULL;
1081 runp = runp->next)
1082 {
1083 add_locale_uint32 (&file, runp->from);
1084 add_locale_uint32 (&file, runp->to);
1085 add_locale_uint32 (&file, runp->step);
1086 }
1087 }
1088 end_locale_structure (&file);
1089 break;
1090
1091 default:
1092 assert (! "unknown CTYPE element");
1093 }
1094 else
1095 {
1096 /* Handle extra maps. */
1097 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1098 if (nr < ctype->nr_charclass)
1099 {
1100 start_locale_prelude (&file);
1101 add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
1102 end_locale_prelude (&file);
1103 add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1104 }
1105 else
1106 {
1107 nr -= ctype->nr_charclass;
1108 assert (nr < ctype->map_collection_nr);
1109 add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1110 }
1111 }
1112 }
1113
1114 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1115}
1116
1117
1118/* Local functions. */
1119static void
1120ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1121 const char *name)
1122{
1123 size_t cnt;
1124
1125 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1126 if (strcmp (ctype->classnames[cnt], name) == 0)
1127 break;
1128
1129 if (cnt < ctype->nr_charclass)
1130 {
1131 lr_error (lr, _("character class `%s' already defined"), name);
1132 return;
1133 }
1134
1135 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1136 /* Exit code 2 is prescribed in P1003.2b. */
1137 record_error (2, 0, _("\
1138implementation limit: no more than %Zd character classes allowed"),
1139 MAX_NR_CHARCLASS);
1140
1141 ctype->classnames[ctype->nr_charclass++] = name;
1142}
1143
1144
1145static void
1146ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1147 const char *name, const struct charmap_t *charmap)
1148{
1149 size_t max_chars = 0;
1150 size_t cnt;
1151
1152 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1153 {
1154 if (strcmp (ctype->mapnames[cnt], name) == 0)
1155 break;
1156
1157 if (max_chars < ctype->map_collection_max[cnt])
1158 max_chars = ctype->map_collection_max[cnt];
1159 }
1160
1161 if (cnt < ctype->map_collection_nr)
1162 {
1163 lr_error (lr, _("character map `%s' already defined"), name);
1164 return;
1165 }
1166
1167 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1168 /* Exit code 2 is prescribed in P1003.2b. */
1169 record_error (2, 0, _("\
1170implementation limit: no more than %d character maps allowed"),
1171 MAX_NR_CHARMAP);
1172
1173 ctype->mapnames[cnt] = name;
1174
1175 if (max_chars == 0)
1176 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1177 else
1178 ctype->map_collection_max[cnt] = max_chars;
1179
1180 ctype->map_collection[cnt] = (uint32_t *)
1181 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1182 ctype->map_collection_act[cnt] = 256;
1183
1184 ++ctype->map_collection_nr;
1185}
1186
1187
1188/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1189 is possible if we only want to extend the name array. */
1190static uint32_t *
1191find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1192 size_t *act, uint32_t idx)
1193{
1194 size_t cnt;
1195
1196 if (idx < 256)
1197 return table == NULL ? NULL : &(*table)[idx];
1198
1199 /* Use the charnames_idx lookup table instead of the slow search loop. */
1200#if 1
1201 cnt = idx_table_get (&ctype->charnames_idx, idx);
1202 if (cnt == EMPTY)
1203 /* Not found. */
1204 cnt = ctype->charnames_act;
1205#else
1206 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1207 if (ctype->charnames[cnt] == idx)
1208 break;
1209#endif
1210
1211 /* We have to distinguish two cases: the name is found or not. */
1212 if (cnt == ctype->charnames_act)
1213 {
1214 /* Extend the name array. */
1215 if (ctype->charnames_act == ctype->charnames_max)
1216 {
1217 ctype->charnames_max *= 2;
1218 ctype->charnames = (uint32_t *)
1219 xrealloc (ctype->charnames,
1220 sizeof (uint32_t) * ctype->charnames_max);
1221 }
1222 ctype->charnames[ctype->charnames_act++] = idx;
1223 idx_table_add (&ctype->charnames_idx, idx, cnt);
1224 }
1225
1226 if (table == NULL)
1227 /* We have done everything we are asked to do. */
1228 return NULL;
1229
1230 if (max == NULL)
1231 /* The caller does not want to extend the table. */
1232 return (cnt >= *act ? NULL : &(*table)[cnt]);
1233
1234 if (cnt >= *act)
1235 {
1236 if (cnt >= *max)
1237 {
1238 size_t old_max = *max;
1239 do
1240 *max *= 2;
1241 while (*max <= cnt);
1242
1243 *table =
1244 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1245 memset (&(*table)[old_max], '\0',
1246 (*max - old_max) * sizeof (uint32_t));
1247 }
1248
1249 *act = cnt + 1;
1250 }
1251
1252 return &(*table)[cnt];
1253}
1254
1255
1256static int
1257get_character (struct token *now, const struct charmap_t *charmap,
1258 struct repertoire_t *repertoire,
1259 struct charseq **seqp, uint32_t *wchp)
1260{
1261 if (now->tok == tok_bsymbol)
1262 {
1263 /* This will hopefully be the normal case. */
1264 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1265 now->val.str.lenmb);
1266 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1267 now->val.str.lenmb);
1268 }
1269 else if (now->tok == tok_ucs4)
1270 {
1271 char utmp[10];
1272
1273 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1274 *seqp = charmap_find_value (charmap, utmp, 9);
1275
1276 if (*seqp == NULL)
1277 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1278
1279 if (*seqp == NULL)
1280 {
1281 /* Compute the value in the charmap from the UCS value. */
1282 const char *symbol = repertoire_find_symbol (repertoire,
1283 now->val.ucs4);
1284
1285 if (symbol == NULL)
1286 *seqp = NULL;
1287 else
1288 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1289
1290 if (*seqp == NULL)
1291 {
1292 if (repertoire != NULL)
1293 {
1294 /* Insert a negative entry. */
1295 static const struct charseq negative
1296 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1297 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1298 sizeof (uint32_t));
1299 *newp = now->val.ucs4;
1300
1301 insert_entry (&repertoire->seq_table, newp,
1302 sizeof (uint32_t), (void *) &negative);
1303 }
1304 }
1305 else
1306 (*seqp)->ucs4 = now->val.ucs4;
1307 }
1308 else if ((*seqp)->ucs4 != now->val.ucs4)
1309 *seqp = NULL;
1310
1311 *wchp = now->val.ucs4;
1312 }
1313 else if (now->tok == tok_charcode)
1314 {
1315 /* We must map from the byte code to UCS4. */
1316 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1317 now->val.str.lenmb);
1318
1319 if (*seqp == NULL)
1320 *wchp = ILLEGAL_CHAR_VALUE;
1321 else
1322 {
1323 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1324 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1325 strlen ((*seqp)->name));
1326 *wchp = (*seqp)->ucs4;
1327 }
1328 }
1329 else
1330 return 1;
1331
1332 return 0;
1333}
1334
1335
1336/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1337 the .(2). counterparts. */
1338static void
1339charclass_symbolic_ellipsis (struct linereader *ldfile,
1340 struct locale_ctype_t *ctype,
1341 const struct charmap_t *charmap,
1342 struct repertoire_t *repertoire,
1343 struct token *now,
1344 const char *last_str,
1345 unsigned long int class256_bit,
1346 unsigned long int class_bit, int base,
1347 int ignore_content, int handle_digits, int step)
1348{
1349 const char *nowstr = now->val.str.startmb;
1350 char tmp[now->val.str.lenmb + 1];
1351 const char *cp;
1352 char *endp;
1353 unsigned long int from;
1354 unsigned long int to;
1355
1356 /* We have to compute the ellipsis values using the symbolic names. */
1357 assert (last_str != NULL);
1358
1359 if (strlen (last_str) != now->val.str.lenmb)
1360 {
1361 invalid_range:
1362 lr_error (ldfile,
1363 _("`%s' and `%.*s' are not valid names for symbolic range"),
1364 last_str, (int) now->val.str.lenmb, nowstr);
1365 return;
1366 }
1367
1368 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1369 /* Nothing to do, the names are the same. */
1370 return;
1371
1372 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1373 ;
1374
1375 errno = 0;
1376 from = strtoul (cp, &endp, base);
1377 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1378 goto invalid_range;
1379
1380 to = strtoul (nowstr + (cp - last_str), &endp, base);
1381 if ((to == UINT_MAX && errno == ERANGE)
1382 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1383 goto invalid_range;
1384
1385 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1386 if (!ignore_content)
1387 {
1388 now->val.str.startmb = tmp;
1389 while ((from += step) <= to)
1390 {
1391 struct charseq *seq;
1392 uint32_t wch;
1393
1394 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1395 (int) (cp - last_str), last_str,
1396 (int) (now->val.str.lenmb - (cp - last_str)),
1397 from);
1398
1399 get_character (now, charmap, repertoire, &seq, &wch);
1400
1401 if (seq != NULL && seq->nbytes == 1)
1402 /* Yep, we can store information about this byte sequence. */
1403 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1404
1405 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1406 /* We have the UCS4 position. */
1407 *find_idx (ctype, &ctype->class_collection,
1408 &ctype->class_collection_max,
1409 &ctype->class_collection_act, wch) |= class_bit;
1410
1411 if (handle_digits == 1)
1412 {
1413 /* We must store the digit values. */
1414 if (ctype->mbdigits_act == ctype->mbdigits_max)
1415 {
1416 ctype->mbdigits_max *= 2;
1417 ctype->mbdigits = xrealloc (ctype->mbdigits,
1418 (ctype->mbdigits_max
1419 * sizeof (char *)));
1420 ctype->wcdigits_max *= 2;
1421 ctype->wcdigits = xrealloc (ctype->wcdigits,
1422 (ctype->wcdigits_max
1423 * sizeof (uint32_t)));
1424 }
1425
1426 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1427 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1428 }
1429 else if (handle_digits == 2)
1430 {
1431 /* We must store the digit values. */
1432 if (ctype->outdigits_act >= 10)
1433 {
1434 lr_error (ldfile, _("\
1435%s: field `%s' does not contain exactly ten entries"),
1436 "LC_CTYPE", "outdigit");
1437 return;
1438 }
1439
1440 ctype->mboutdigits[ctype->outdigits_act] = seq;
1441 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1442 ++ctype->outdigits_act;
1443 }
1444 }
1445 }
1446}
1447
1448
1449/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1450static void
1451charclass_ucs4_ellipsis (struct linereader *ldfile,
1452 struct locale_ctype_t *ctype,
1453 const struct charmap_t *charmap,
1454 struct repertoire_t *repertoire,
1455 struct token *now, uint32_t last_wch,
1456 unsigned long int class256_bit,
1457 unsigned long int class_bit, int ignore_content,
1458 int handle_digits, int step)
1459{
1460 if (last_wch > now->val.ucs4)
1461 {
1462 lr_error (ldfile, _("\
1463to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1464 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1465 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1466 return;
1467 }
1468
1469 if (!ignore_content)
1470 while ((last_wch += step) <= now->val.ucs4)
1471 {
1472 /* We have to find out whether there is a byte sequence corresponding
1473 to this UCS4 value. */
1474 struct charseq *seq;
1475 char utmp[10];
1476
1477 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1478 seq = charmap_find_value (charmap, utmp, 9);
1479 if (seq == NULL)
1480 {
1481 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1482 seq = charmap_find_value (charmap, utmp, 5);
1483 }
1484
1485 if (seq == NULL)
1486 /* Try looking in the repertoire map. */
1487 seq = repertoire_find_seq (repertoire, last_wch);
1488
1489 /* If this is the first time we look for this sequence create a new
1490 entry. */
1491 if (seq == NULL)
1492 {
1493 static const struct charseq negative
1494 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1495
1496 /* Find the symbolic name for this UCS4 value. */
1497 if (repertoire != NULL)
1498 {
1499 const char *symbol = repertoire_find_symbol (repertoire,
1500 last_wch);
1501 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1502 sizeof (uint32_t));
1503 *newp = last_wch;
1504
1505 if (symbol != NULL)
1506 /* We have a name, now search the multibyte value. */
1507 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1508
1509 if (seq == NULL)
1510 /* We have to create a fake entry. */
1511 seq = (struct charseq *) &negative;
1512 else
1513 seq->ucs4 = last_wch;
1514
1515 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1516 seq);
1517 }
1518 else
1519 /* We have to create a fake entry. */
1520 seq = (struct charseq *) &negative;
1521 }
1522
1523 /* We have a name, now search the multibyte value. */
1524 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1525 /* Yep, we can store information about this byte sequence. */
1526 ctype->class256_collection[(size_t) seq->bytes[0]]
1527 |= class256_bit;
1528
1529 /* And of course we have the UCS4 position. */
1530 if (class_bit != 0)
1531 *find_idx (ctype, &ctype->class_collection,
1532 &ctype->class_collection_max,
1533 &ctype->class_collection_act, last_wch) |= class_bit;
1534
1535 if (handle_digits == 1)
1536 {
1537 /* We must store the digit values. */
1538 if (ctype->mbdigits_act == ctype->mbdigits_max)
1539 {
1540 ctype->mbdigits_max *= 2;
1541 ctype->mbdigits = xrealloc (ctype->mbdigits,
1542 (ctype->mbdigits_max
1543 * sizeof (char *)));
1544 ctype->wcdigits_max *= 2;
1545 ctype->wcdigits = xrealloc (ctype->wcdigits,
1546 (ctype->wcdigits_max
1547 * sizeof (uint32_t)));
1548 }
1549
1550 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1551 ? seq : NULL);
1552 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1553 }
1554 else if (handle_digits == 2)
1555 {
1556 /* We must store the digit values. */
1557 if (ctype->outdigits_act >= 10)
1558 {
1559 lr_error (ldfile, _("\
1560%s: field `%s' does not contain exactly ten entries"),
1561 "LC_CTYPE", "outdigit");
1562 return;
1563 }
1564
1565 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1566 ? seq : NULL);
1567 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1568 ++ctype->outdigits_act;
1569 }
1570 }
1571}
1572
1573
1574/* Ellipsis as in `/xea/x12.../xea/x34'. */
1575static void
1576charclass_charcode_ellipsis (struct linereader *ldfile,
1577 struct locale_ctype_t *ctype,
1578 const struct charmap_t *charmap,
1579 struct repertoire_t *repertoire,
1580 struct token *now, char *last_charcode,
1581 uint32_t last_charcode_len,
1582 unsigned long int class256_bit,
1583 unsigned long int class_bit, int ignore_content,
1584 int handle_digits)
1585{
1586 /* First check whether the to-value is larger. */
1587 if (now->val.charcode.nbytes != last_charcode_len)
1588 {
1589 lr_error (ldfile, _("\
1590start and end character sequence of range must have the same length"));
1591 return;
1592 }
1593
1594 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1595 {
1596 lr_error (ldfile, _("\
1597to-value character sequence is smaller than from-value sequence"));
1598 return;
1599 }
1600
1601 if (!ignore_content)
1602 {
1603 do
1604 {
1605 /* Increment the byte sequence value. */
1606 struct charseq *seq;
1607 uint32_t wch;
1608 int i;
1609
1610 for (i = last_charcode_len - 1; i >= 0; --i)
1611 if (++last_charcode[i] != 0)
1612 break;
1613
1614 if (last_charcode_len == 1)
1615 /* Of course we have the charcode value. */
1616 ctype->class256_collection[(size_t) last_charcode[0]]
1617 |= class256_bit;
1618
1619 /* Find the symbolic name. */
1620 seq = charmap_find_symbol (charmap, last_charcode,
1621 last_charcode_len);
1622 if (seq != NULL)
1623 {
1624 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1625 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1626 strlen (seq->name));
1627 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1628
1629 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1630 *find_idx (ctype, &ctype->class_collection,
1631 &ctype->class_collection_max,
1632 &ctype->class_collection_act, wch) |= class_bit;
1633 }
1634 else
1635 wch = ILLEGAL_CHAR_VALUE;
1636
1637 if (handle_digits == 1)
1638 {
1639 /* We must store the digit values. */
1640 if (ctype->mbdigits_act == ctype->mbdigits_max)
1641 {
1642 ctype->mbdigits_max *= 2;
1643 ctype->mbdigits = xrealloc (ctype->mbdigits,
1644 (ctype->mbdigits_max
1645 * sizeof (char *)));
1646 ctype->wcdigits_max *= 2;
1647 ctype->wcdigits = xrealloc (ctype->wcdigits,
1648 (ctype->wcdigits_max
1649 * sizeof (uint32_t)));
1650 }
1651
1652 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1653 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1654 seq->nbytes = last_charcode_len;
1655
1656 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1657 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1658 }
1659 else if (handle_digits == 2)
1660 {
1661 struct charseq *seq;
1662 /* We must store the digit values. */
1663 if (ctype->outdigits_act >= 10)
1664 {
1665 lr_error (ldfile, _("\
1666%s: field `%s' does not contain exactly ten entries"),
1667 "LC_CTYPE", "outdigit");
1668 return;
1669 }
1670
1671 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1672 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1673 seq->nbytes = last_charcode_len;
1674
1675 ctype->mboutdigits[ctype->outdigits_act] = seq;
1676 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1677 ++ctype->outdigits_act;
1678 }
1679 }
1680 while (memcmp (last_charcode, now->val.charcode.bytes,
1681 last_charcode_len) != 0);
1682 }
1683}
1684
1685
1686static uint32_t *
1687find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1688 uint32_t wch)
1689{
1690 struct translit_t *trunp = ctype->translit;
1691 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1692
1693 while (trunp != NULL)
1694 {
1695 /* XXX We simplify things here. The transliterations we look
1696 for are only allowed to have one character. */
1697 if (trunp->from[0] == wch && trunp->from[1] == 0)
1698 {
1699 /* Found it. Now look for a transliteration which can be
1700 represented with the character set. */
1701 struct translit_to_t *torunp = trunp->to;
1702
1703 while (torunp != NULL)
1704 {
1705 int i;
1706
1707 for (i = 0; torunp->str[i] != 0; ++i)
1708 {
1709 char utmp[10];
1710
1711 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1712 if (charmap_find_value (charmap, utmp, 9) == NULL)
1713 /* This character cannot be represented. */
1714 break;
1715 }
1716
1717 if (torunp->str[i] == 0)
1718 return torunp->str;
1719
1720 torunp = torunp->next;
1721 }
1722
1723 break;
1724 }
1725
1726 trunp = trunp->next;
1727 }
1728
1729 /* Check for ignored chars. */
1730 while (tirunp != NULL)
1731 {
1732 if (tirunp->from <= wch && tirunp->to >= wch)
1733 {
1734 uint32_t wi;
1735
1736 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1737 if (wi == wch)
1738 return no_str;
1739 }
1740 }
1741
1742 /* Nothing found. */
1743 return NULL;
1744}
1745
1746
1747uint32_t *
1748find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1749 uint32_t wch)
1750{
1751 struct locale_ctype_t *ctype;
1752 uint32_t *result = NULL;
1753
1754 assert (locale != NULL);
1755 ctype = locale->categories[LC_CTYPE].ctype;
1756
1757 if (ctype == NULL)
1758 return NULL;
1759
1760 if (ctype->translit != NULL)
1761 result = find_translit2 (ctype, charmap, wch);
1762
1763 if (result == NULL)
1764 {
1765 struct translit_include_t *irunp = ctype->translit_include;
1766
1767 while (irunp != NULL && result == NULL)
1768 {
1769 result = find_translit (find_locale (CTYPE_LOCALE,
1770 irunp->copy_locale,
1771 irunp->copy_repertoire,
1772 charmap),
1773 charmap, wch);
1774 irunp = irunp->next;
1775 }
1776 }
1777
1778 return result;
1779}
1780
1781
1782/* Read one transliteration entry. */
1783static uint32_t *
1784read_widestring (struct linereader *ldfile, struct token *now,
1785 const struct charmap_t *charmap,
1786 struct repertoire_t *repertoire)
1787{
1788 uint32_t *wstr;
1789
1790 if (now->tok == tok_default_missing)
1791 /* The special name "" will denote this case. */
1792 wstr = no_str;
1793 else if (now->tok == tok_bsymbol)
1794 {
1795 /* Get the value from the repertoire. */
1796 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1797 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1798 now->val.str.lenmb);
1799 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1800 {
1801 /* We cannot proceed, we don't know the UCS4 value. */
1802 free (wstr);
1803 return NULL;
1804 }
1805
1806 wstr[1] = 0;
1807 }
1808 else if (now->tok == tok_ucs4)
1809 {
1810 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1811 wstr[0] = now->val.ucs4;
1812 wstr[1] = 0;
1813 }
1814 else if (now->tok == tok_charcode)
1815 {
1816 /* Argh, we have to convert to the symbol name first and then to the
1817 UCS4 value. */
1818 struct charseq *seq = charmap_find_symbol (charmap,
1819 now->val.str.startmb,
1820 now->val.str.lenmb);
1821 if (seq == NULL)
1822 /* Cannot find the UCS4 value. */
1823 return NULL;
1824
1825 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1826 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1827 strlen (seq->name));
1828 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1829 /* We cannot proceed, we don't know the UCS4 value. */
1830 return NULL;
1831
1832 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1833 wstr[0] = seq->ucs4;
1834 wstr[1] = 0;
1835 }
1836 else if (now->tok == tok_string)
1837 {
1838 wstr = now->val.str.startwc;
1839 if (wstr == NULL || wstr[0] == 0)
1840 return NULL;
1841 }
1842 else
1843 {
1844 if (now->tok != tok_eol && now->tok != tok_eof)
1845 lr_ignore_rest (ldfile, 0);
1846 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1847 return (uint32_t *) -1l;
1848 }
1849
1850 return wstr;
1851}
1852
1853
1854static void
1855read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1856 struct token *now, const struct charmap_t *charmap,
1857 struct repertoire_t *repertoire)
1858{
1859 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1860 struct translit_t *result;
1861 struct translit_to_t **top;
1862 struct obstack *ob = &ctype->mempool;
1863 int first;
1864 int ignore;
1865
1866 if (from_wstr == NULL)
1867 /* There is no valid from string. */
1868 return;
1869
1870 result = (struct translit_t *) obstack_alloc (ob,
1871 sizeof (struct translit_t));
1872 result->from = from_wstr;
1873 result->fname = ldfile->fname;
1874 result->lineno = ldfile->lineno;
1875 result->next = NULL;
1876 result->to = NULL;
1877 top = &result->to;
1878 first = 1;
1879 ignore = 0;
1880
1881 while (1)
1882 {
1883 uint32_t *to_wstr;
1884
1885 /* Next we have one or more transliterations. They are
1886 separated by semicolons. */
1887 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1888
1889 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1890 {
1891 /* One string read. */
1892 const uint32_t zero = 0;
1893
1894 if (!ignore)
1895 {
1896 obstack_grow (ob, &zero, 4);
1897 to_wstr = obstack_finish (ob);
1898
1899 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1900 (*top)->str = to_wstr;
1901 (*top)->next = NULL;
1902 }
1903
1904 if (now->tok == tok_eol)
1905 {
1906 result->next = ctype->translit;
1907 ctype->translit = result;
1908 return;
1909 }
1910
1911 if (!ignore)
1912 top = &(*top)->next;
1913 ignore = 0;
1914 }
1915 else
1916 {
1917 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1918 if (to_wstr == (uint32_t *) -1l)
1919 {
1920 /* An error occurred. */
1921 obstack_free (ob, result);
1922 return;
1923 }
1924
1925 if (to_wstr == NULL)
1926 ignore = 1;
1927 else
1928 /* This value is usable. */
1929 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1930
1931 first = 0;
1932 }
1933 }
1934}
1935
1936
1937static void
1938read_translit_ignore_entry (struct linereader *ldfile,
1939 struct locale_ctype_t *ctype,
1940 const struct charmap_t *charmap,
1941 struct repertoire_t *repertoire)
1942{
1943 /* We expect a semicolon-separated list of characters we ignore. We are
1944 only interested in the wide character definitions. These must be
1945 single characters, possibly defining a range when an ellipsis is used. */
1946 while (1)
1947 {
1948 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1949 verbose);
1950 struct translit_ignore_t *newp;
1951 uint32_t from;
1952
1953 if (now->tok == tok_eol || now->tok == tok_eof)
1954 {
1955 lr_error (ldfile,
1956 _("premature end of `translit_ignore' definition"));
1957 return;
1958 }
1959
1960 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1961 {
1962 lr_error (ldfile, _("syntax error"));
1963 lr_ignore_rest (ldfile, 0);
1964 return;
1965 }
1966
1967 if (now->tok == tok_ucs4)
1968 from = now->val.ucs4;
1969 else
1970 /* Try to get the value. */
1971 from = repertoire_find_value (repertoire, now->val.str.startmb,
1972 now->val.str.lenmb);
1973
1974 if (from == ILLEGAL_CHAR_VALUE)
1975 {
1976 lr_error (ldfile, "invalid character name");
1977 newp = NULL;
1978 }
1979 else
1980 {
1981 newp = (struct translit_ignore_t *)
1982 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1983 newp->from = from;
1984 newp->to = from;
1985 newp->step = 1;
1986
1987 newp->next = ctype->translit_ignore;
1988 ctype->translit_ignore = newp;
1989 }
1990
1991 /* Now we expect either a semicolon, an ellipsis, or the end of the
1992 line. */
1993 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1994
1995 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1996 {
1997 /* XXX Should we bother implementing `....'? `...' certainly
1998 will not be implemented. */
1999 uint32_t to;
2000 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2001
2002 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2003
2004 if (now->tok == tok_eol || now->tok == tok_eof)
2005 {
2006 lr_error (ldfile,
2007 _("premature end of `translit_ignore' definition"));
2008 return;
2009 }
2010
2011 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2012 {
2013 lr_error (ldfile, _("syntax error"));
2014 lr_ignore_rest (ldfile, 0);
2015 return;
2016 }
2017
2018 if (now->tok == tok_ucs4)
2019 to = now->val.ucs4;
2020 else
2021 /* Try to get the value. */
2022 to = repertoire_find_value (repertoire, now->val.str.startmb,
2023 now->val.str.lenmb);
2024
2025 if (to == ILLEGAL_CHAR_VALUE)
2026 lr_error (ldfile, "invalid character name");
2027 else
2028 {
2029 /* Make sure the `to'-value is larger. */
2030 if (to >= from)
2031 {
2032 newp->to = to;
2033 newp->step = step;
2034 }
2035 else
2036 lr_error (ldfile, _("\
2037to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2038 (to | from) < 65536 ? 4 : 8, to,
2039 (to | from) < 65536 ? 4 : 8, from);
2040 }
2041
2042 /* And the next token. */
2043 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2044 }
2045
2046 if (now->tok == tok_eol || now->tok == tok_eof)
2047 /* We are done. */
2048 return;
2049
2050 if (now->tok == tok_semicolon)
2051 /* Next round. */
2052 continue;
2053
2054 /* If we come here something is wrong. */
2055 lr_error (ldfile, _("syntax error"));
2056 lr_ignore_rest (ldfile, 0);
2057 return;
2058 }
2059}
2060
2061
2062/* The parser for the LC_CTYPE section of the locale definition. */
2063void
2064ctype_read (struct linereader *ldfile, struct localedef_t *result,
2065 const struct charmap_t *charmap, const char *repertoire_name,
2066 int ignore_content)
2067{
2068 struct repertoire_t *repertoire = NULL;
2069 struct locale_ctype_t *ctype;
2070 struct token *now;
2071 enum token_t nowtok;
2072 size_t cnt;
2073 uint32_t last_wch = 0;
2074 enum token_t last_token;
2075 enum token_t ellipsis_token;
2076 int step;
2077 char last_charcode[16];
2078 size_t last_charcode_len = 0;
2079 const char *last_str = NULL;
2080 int mapidx;
2081 struct localedef_t *copy_locale = NULL;
2082
2083 /* Get the repertoire we have to use. */
2084 if (repertoire_name != NULL)
2085 repertoire = repertoire_read (repertoire_name);
2086
2087 /* The rest of the line containing `LC_CTYPE' must be free. */
2088 lr_ignore_rest (ldfile, 1);
2089
2090
2091 do
2092 {
2093 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2094 nowtok = now->tok;
2095 }
2096 while (nowtok == tok_eol);
2097
2098 /* If we see `copy' now we are almost done. */
2099 if (nowtok == tok_copy)
2100 {
2101 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2102 if (now->tok != tok_string)
2103 {
2104 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2105
2106 skip_category:
2107 do
2108 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2109 while (now->tok != tok_eof && now->tok != tok_end);
2110
2111 if (now->tok != tok_eof
2112 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2113 now->tok == tok_eof))
2114 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2115 else if (now->tok != tok_lc_ctype)
2116 {
2117 lr_error (ldfile, _("\
2118%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2119 lr_ignore_rest (ldfile, 0);
2120 }
2121 else
2122 lr_ignore_rest (ldfile, 1);
2123
2124 return;
2125 }
2126
2127 if (! ignore_content)
2128 {
2129 /* Get the locale definition. */
2130 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2131 repertoire_name, charmap, NULL);
2132 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2133 {
2134 /* Not yet loaded. So do it now. */
2135 if (locfile_read (copy_locale, charmap) != 0)
2136 goto skip_category;
2137 }
2138
2139 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2140 return;
2141 }
2142
2143 lr_ignore_rest (ldfile, 1);
2144
2145 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2146 nowtok = now->tok;
2147 }
2148
2149 /* Prepare the data structures. */
2150 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2151 ctype = result->categories[LC_CTYPE].ctype;
2152
2153 /* Remember the repertoire we use. */
2154 if (!ignore_content)
2155 ctype->repertoire = repertoire;
2156
2157 while (1)
2158 {
2159 unsigned long int class_bit = 0;
2160 unsigned long int class256_bit = 0;
2161 int handle_digits = 0;
2162
2163 /* Of course we don't proceed beyond the end of file. */
2164 if (nowtok == tok_eof)
2165 break;
2166
2167 /* Ingore empty lines. */
2168 if (nowtok == tok_eol)
2169 {
2170 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2171 nowtok = now->tok;
2172 continue;
2173 }
2174
2175 switch (nowtok)
2176 {
2177 case tok_charclass:
2178 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2179 while (now->tok == tok_ident || now->tok == tok_string)
2180 {
2181 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2182 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2183 if (now->tok != tok_semicolon)
2184 break;
2185 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2186 }
2187 if (now->tok != tok_eol)
2188 SYNTAX_ERROR (_("\
2189%s: syntax error in definition of new character class"), "LC_CTYPE");
2190 break;
2191
2192 case tok_charconv:
2193 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194 while (now->tok == tok_ident || now->tok == tok_string)
2195 {
2196 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2197 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2198 if (now->tok != tok_semicolon)
2199 break;
2200 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2201 }
2202 if (now->tok != tok_eol)
2203 SYNTAX_ERROR (_("\
2204%s: syntax error in definition of new character map"), "LC_CTYPE");
2205 break;
2206
2207 case tok_class:
2208 /* Ignore the rest of the line if we don't need the input of
2209 this line. */
2210 if (ignore_content)
2211 {
2212 lr_ignore_rest (ldfile, 0);
2213 break;
2214 }
2215
2216 /* We simply forget the `class' keyword and use the following
2217 operand to determine the bit. */
2218 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219 if (now->tok == tok_ident || now->tok == tok_string)
2220 {
2221 /* Must can be one of the predefined class names. */
2222 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2223 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2224 break;
2225 if (cnt >= ctype->nr_charclass)
2226 {
2227 /* OK, it's a new class. */
2228 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2229
2230 class_bit = _ISwbit (ctype->nr_charclass - 1);
2231 }
2232 else
2233 {
2234 class_bit = _ISwbit (cnt);
2235
2236 free (now->val.str.startmb);
2237 }
2238 }
2239 else if (now->tok == tok_digit)
2240 goto handle_tok_digit;
2241 else if (now->tok < tok_upper || now->tok > tok_blank)
2242 goto err_label;
2243 else
2244 {
2245 class_bit = BITw (now->tok);
2246 class256_bit = BIT (now->tok);
2247 }
2248
2249 /* The next character must be a semicolon. */
2250 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2251 if (now->tok != tok_semicolon)
2252 goto err_label;
2253 goto read_charclass;
2254
2255 case tok_upper:
2256 case tok_lower:
2257 case tok_alpha:
2258 case tok_alnum:
2259 case tok_space:
2260 case tok_cntrl:
2261 case tok_punct:
2262 case tok_graph:
2263 case tok_print:
2264 case tok_xdigit:
2265 case tok_blank:
2266 /* Ignore the rest of the line if we don't need the input of
2267 this line. */
2268 if (ignore_content)
2269 {
2270 lr_ignore_rest (ldfile, 0);
2271 break;
2272 }
2273
2274 class_bit = BITw (now->tok);
2275 class256_bit = BIT (now->tok);
2276 handle_digits = 0;
2277 read_charclass:
2278 ctype->class_done |= class_bit;
2279 last_token = tok_none;
2280 ellipsis_token = tok_none;
2281 step = 1;
2282 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2283 while (now->tok != tok_eol && now->tok != tok_eof)
2284 {
2285 uint32_t wch;
2286 struct charseq *seq;
2287
2288 if (ellipsis_token == tok_none)
2289 {
2290 if (get_character (now, charmap, repertoire, &seq, &wch))
2291 goto err_label;
2292
2293 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2294 /* Yep, we can store information about this byte
2295 sequence. */
2296 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2297
2298 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2299 && class_bit != 0)
2300 /* We have the UCS4 position. */
2301 *find_idx (ctype, &ctype->class_collection,
2302 &ctype->class_collection_max,
2303 &ctype->class_collection_act, wch) |= class_bit;
2304
2305 last_token = now->tok;
2306 /* Terminate the string. */
2307 if (last_token == tok_bsymbol)
2308 {
2309 now->val.str.startmb[now->val.str.lenmb] = '\0';
2310 last_str = now->val.str.startmb;
2311 }
2312 else
2313 last_str = NULL;
2314 last_wch = wch;
2315 memcpy (last_charcode, now->val.charcode.bytes, 16);
2316 last_charcode_len = now->val.charcode.nbytes;
2317
2318 if (!ignore_content && handle_digits == 1)
2319 {
2320 /* We must store the digit values. */
2321 if (ctype->mbdigits_act == ctype->mbdigits_max)
2322 {
2323 ctype->mbdigits_max += 10;
2324 ctype->mbdigits = xrealloc (ctype->mbdigits,
2325 (ctype->mbdigits_max
2326 * sizeof (char *)));
2327 ctype->wcdigits_max += 10;
2328 ctype->wcdigits = xrealloc (ctype->wcdigits,
2329 (ctype->wcdigits_max
2330 * sizeof (uint32_t)));
2331 }
2332
2333 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2334 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2335 }
2336 else if (!ignore_content && handle_digits == 2)
2337 {
2338 /* We must store the digit values. */
2339 if (ctype->outdigits_act >= 10)
2340 {
2341 lr_error (ldfile, _("\
2342%s: field `%s' does not contain exactly ten entries"),
2343 "LC_CTYPE", "outdigit");
2344 lr_ignore_rest (ldfile, 0);
2345 break;
2346 }
2347
2348 ctype->mboutdigits[ctype->outdigits_act] = seq;
2349 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2350 ++ctype->outdigits_act;
2351 }
2352 }
2353 else
2354 {
2355 /* Now it gets complicated. We have to resolve the
2356 ellipsis problem. First we must distinguish between
2357 the different kind of ellipsis and this must match the
2358 tokens we have seen. */
2359 assert (last_token != tok_none);
2360
2361 if (last_token != now->tok)
2362 {
2363 lr_error (ldfile, _("\
2364ellipsis range must be marked by two operands of same type"));
2365 lr_ignore_rest (ldfile, 0);
2366 break;
2367 }
2368
2369 if (last_token == tok_bsymbol)
2370 {
2371 if (ellipsis_token == tok_ellipsis3)
2372 lr_error (ldfile, _("with symbolic name range values \
2373the absolute ellipsis `...' must not be used"));
2374
2375 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2376 repertoire, now, last_str,
2377 class256_bit, class_bit,
2378 (ellipsis_token
2379 == tok_ellipsis4
2380 ? 10 : 16),
2381 ignore_content,
2382 handle_digits, step);
2383 }
2384 else if (last_token == tok_ucs4)
2385 {
2386 if (ellipsis_token != tok_ellipsis2)
2387 lr_error (ldfile, _("\
2388with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2389
2390 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2391 repertoire, now, last_wch,
2392 class256_bit, class_bit,
2393 ignore_content, handle_digits,
2394 step);
2395 }
2396 else
2397 {
2398 assert (last_token == tok_charcode);
2399
2400 if (ellipsis_token != tok_ellipsis3)
2401 lr_error (ldfile, _("\
2402with character code range values one must use the absolute ellipsis `...'"));
2403
2404 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2405 repertoire, now,
2406 last_charcode,
2407 last_charcode_len,
2408 class256_bit, class_bit,
2409 ignore_content,
2410 handle_digits);
2411 }
2412
2413 /* Now we have used the last value. */
2414 last_token = tok_none;
2415 }
2416
2417 /* Next we expect a semicolon or the end of the line. */
2418 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2419 if (now->tok == tok_eol || now->tok == tok_eof)
2420 break;
2421
2422 if (last_token != tok_none
2423 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2424 {
2425 if (now->tok == tok_ellipsis2_2)
2426 {
2427 now->tok = tok_ellipsis2;
2428 step = 2;
2429 }
2430 else if (now->tok == tok_ellipsis4_2)
2431 {
2432 now->tok = tok_ellipsis4;
2433 step = 2;
2434 }
2435
2436 ellipsis_token = now->tok;
2437
2438 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2439 continue;
2440 }
2441
2442 if (now->tok != tok_semicolon)
2443 goto err_label;
2444
2445 /* And get the next character. */
2446 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2447
2448 ellipsis_token = tok_none;
2449 step = 1;
2450 }
2451 break;
2452
2453 case tok_digit:
2454 /* Ignore the rest of the line if we don't need the input of
2455 this line. */
2456 if (ignore_content)
2457 {
2458 lr_ignore_rest (ldfile, 0);
2459 break;
2460 }
2461
2462 handle_tok_digit:
2463 class_bit = _ISwdigit;
2464 class256_bit = _ISdigit;
2465 handle_digits = 1;
2466 goto read_charclass;
2467
2468 case tok_outdigit:
2469 /* Ignore the rest of the line if we don't need the input of
2470 this line. */
2471 if (ignore_content)
2472 {
2473 lr_ignore_rest (ldfile, 0);
2474 break;
2475 }
2476
2477 if (ctype->outdigits_act != 0)
2478 lr_error (ldfile, _("\
2479%s: field `%s' declared more than once"),
2480 "LC_CTYPE", "outdigit");
2481 class_bit = 0;
2482 class256_bit = 0;
2483 handle_digits = 2;
2484 goto read_charclass;
2485
2486 case tok_toupper:
2487 /* Ignore the rest of the line if we don't need the input of
2488 this line. */
2489 if (ignore_content)
2490 {
2491 lr_ignore_rest (ldfile, 0);
2492 break;
2493 }
2494
2495 mapidx = 0;
2496 goto read_mapping;
2497
2498 case tok_tolower:
2499 /* Ignore the rest of the line if we don't need the input of
2500 this line. */
2501 if (ignore_content)
2502 {
2503 lr_ignore_rest (ldfile, 0);
2504 break;
2505 }
2506
2507 mapidx = 1;
2508 goto read_mapping;
2509
2510 case tok_map:
2511 /* Ignore the rest of the line if we don't need the input of
2512 this line. */
2513 if (ignore_content)
2514 {
2515 lr_ignore_rest (ldfile, 0);
2516 break;
2517 }
2518
2519 /* We simply forget the `map' keyword and use the following
2520 operand to determine the mapping. */
2521 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2522 if (now->tok == tok_ident || now->tok == tok_string)
2523 {
2524 size_t cnt;
2525
2526 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2527 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2528 break;
2529
2530 if (cnt < ctype->map_collection_nr)
2531 free (now->val.str.startmb);
2532 else
2533 /* OK, it's a new map. */
2534 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2535
2536 mapidx = cnt;
2537 }
2538 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2539 goto err_label;
2540 else
2541 mapidx = now->tok - tok_toupper;
2542
2543 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2544 /* This better should be a semicolon. */
2545 if (now->tok != tok_semicolon)
2546 goto err_label;
2547
2548 read_mapping:
2549 /* Test whether this mapping was already defined. */
2550 if (ctype->tomap_done[mapidx])
2551 {
2552 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2553 ctype->mapnames[mapidx]);
2554 lr_ignore_rest (ldfile, 0);
2555 break;
2556 }
2557 ctype->tomap_done[mapidx] = 1;
2558
2559 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2560 while (now->tok != tok_eol && now->tok != tok_eof)
2561 {
2562 struct charseq *from_seq;
2563 uint32_t from_wch;
2564 struct charseq *to_seq;
2565 uint32_t to_wch;
2566
2567 /* Every pair starts with an opening brace. */
2568 if (now->tok != tok_open_brace)
2569 goto err_label;
2570
2571 /* Next comes the from-value. */
2572 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2573 if (get_character (now, charmap, repertoire, &from_seq,
2574 &from_wch) != 0)
2575 goto err_label;
2576
2577 /* The next is a comma. */
2578 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2579 if (now->tok != tok_comma)
2580 goto err_label;
2581
2582 /* And the other value. */
2583 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584 if (get_character (now, charmap, repertoire, &to_seq,
2585 &to_wch) != 0)
2586 goto err_label;
2587
2588 /* And the last thing is the closing brace. */
2589 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590 if (now->tok != tok_close_brace)
2591 goto err_label;
2592
2593 if (!ignore_content)
2594 {
2595 /* Check whether the mapping converts from an ASCII value
2596 to a non-ASCII value. */
2597 if (from_seq != NULL && from_seq->nbytes == 1
2598 && isascii (from_seq->bytes[0])
2599 && to_seq != NULL && (to_seq->nbytes != 1
2600 || !isascii (to_seq->bytes[0])))
2601 ctype->to_nonascii = 1;
2602
2603 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2604 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2605 /* We can use this value. */
2606 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2607 = to_seq->bytes[0];
2608
2609 if (from_wch != ILLEGAL_CHAR_VALUE
2610 && to_wch != ILLEGAL_CHAR_VALUE)
2611 /* Both correct values. */
2612 *find_idx (ctype, &ctype->map_collection[mapidx],
2613 &ctype->map_collection_max[mapidx],
2614 &ctype->map_collection_act[mapidx],
2615 from_wch) = to_wch;
2616 }
2617
2618 /* Now comes a semicolon or the end of the line/file. */
2619 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2620 if (now->tok == tok_semicolon)
2621 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2622 }
2623 break;
2624
2625 case tok_translit_start:
2626 /* Ignore the entire translit section with its peculiar syntax
2627 if we don't need the input. */
2628 if (ignore_content)
2629 {
2630 do
2631 {
2632 lr_ignore_rest (ldfile, 0);
2633 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2634 }
2635 while (now->tok != tok_translit_end && now->tok != tok_eof);
2636
2637 if (now->tok == tok_eof)
2638 lr_error (ldfile, _(\
2639"%s: `translit_start' section does not end with `translit_end'"),
2640 "LC_CTYPE");
2641
2642 break;
2643 }
2644
2645 /* The rest of the line better should be empty. */
2646 lr_ignore_rest (ldfile, 1);
2647
2648 /* We count here the number of allocated entries in the `translit'
2649 array. */
2650 cnt = 0;
2651
2652 ldfile->translate_strings = 1;
2653 ldfile->return_widestr = 1;
2654
2655 /* We proceed until we see the `translit_end' token. */
2656 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2657 now->tok != tok_translit_end && now->tok != tok_eof)
2658 {
2659 if (now->tok == tok_eol)
2660 /* Ignore empty lines. */
2661 continue;
2662
2663 if (now->tok == tok_include)
2664 {
2665 /* We have to include locale. */
2666 const char *locale_name;
2667 const char *repertoire_name;
2668 struct translit_include_t *include_stmt, **include_ptr;
2669
2670 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2671 /* This should be a string or an identifier. In any
2672 case something to name a locale. */
2673 if (now->tok != tok_string && now->tok != tok_ident)
2674 {
2675 translit_syntax:
2676 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2677 lr_ignore_rest (ldfile, 0);
2678 continue;
2679 }
2680 locale_name = now->val.str.startmb;
2681
2682 /* Next should be a semicolon. */
2683 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2684 if (now->tok != tok_semicolon)
2685 goto translit_syntax;
2686
2687 /* Now the repertoire name. */
2688 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2689 if ((now->tok != tok_string && now->tok != tok_ident)
2690 || now->val.str.startmb == NULL)
2691 goto translit_syntax;
2692 repertoire_name = now->val.str.startmb;
2693 if (repertoire_name[0] == '\0')
2694 /* Ignore the empty string. */
2695 repertoire_name = NULL;
2696
2697 /* Save the include statement for later processing. */
2698 include_stmt = (struct translit_include_t *)
2699 xmalloc (sizeof (struct translit_include_t));
2700 include_stmt->copy_locale = locale_name;
2701 include_stmt->copy_repertoire = repertoire_name;
2702 include_stmt->next = NULL;
2703
2704 include_ptr = &ctype->translit_include;
2705 while (*include_ptr != NULL)
2706 include_ptr = &(*include_ptr)->next;
2707 *include_ptr = include_stmt;
2708
2709 /* The rest of the line must be empty. */
2710 lr_ignore_rest (ldfile, 1);
2711
2712 /* Make sure the locale is read. */
2713 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2714 1, NULL);
2715 continue;
2716 }
2717 else if (now->tok == tok_default_missing)
2718 {
2719 uint32_t *wstr;
2720
2721 while (1)
2722 {
2723 /* We expect a single character or string as the
2724 argument. */
2725 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2726 wstr = read_widestring (ldfile, now, charmap,
2727 repertoire);
2728
2729 if (wstr != NULL)
2730 {
2731 if (ctype->default_missing != NULL)
2732 {
2733 lr_error (ldfile, _("\
2734%s: duplicate `default_missing' definition"), "LC_CTYPE");
2735 record_error_at_line (0, 0,
2736 ctype->default_missing_file,
2737 ctype->default_missing_lineno,
2738 _("\
2739previous definition was here"));
2740 }
2741 else
2742 {
2743 ctype->default_missing = wstr;
2744 ctype->default_missing_file = ldfile->fname;
2745 ctype->default_missing_lineno = ldfile->lineno;
2746 }
2747 /* We can have more entries, ignore them. */
2748 lr_ignore_rest (ldfile, 0);
2749 break;
2750 }
2751 else if (wstr == (uint32_t *) -1l)
2752 /* This was an syntax error. */
2753 break;
2754
2755 /* Maybe there is another replacement we can use. */
2756 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2757 if (now->tok == tok_eol || now->tok == tok_eof)
2758 {
2759 /* Nothing found. We tell the user. */
2760 lr_error (ldfile, _("\
2761%s: no representable `default_missing' definition found"), "LC_CTYPE");
2762 break;
2763 }
2764 if (now->tok != tok_semicolon)
2765 goto translit_syntax;
2766 }
2767
2768 continue;
2769 }
2770 else if (now->tok == tok_translit_ignore)
2771 {
2772 read_translit_ignore_entry (ldfile, ctype, charmap,
2773 repertoire);
2774 continue;
2775 }
2776
2777 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2778 }
2779 ldfile->return_widestr = 0;
2780
2781 if (now->tok == tok_eof)
2782 lr_error (ldfile, _(\
2783"%s: `translit_start' section does not end with `translit_end'"),
2784 "LC_CTYPE");
2785
2786 break;
2787
2788 case tok_ident:
2789 /* Ignore the rest of the line if we don't need the input of
2790 this line. */
2791 if (ignore_content)
2792 {
2793 lr_ignore_rest (ldfile, 0);
2794 break;
2795 }
2796
2797 /* This could mean one of several things. First test whether
2798 it's a character class name. */
2799 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2800 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2801 break;
2802 if (cnt < ctype->nr_charclass)
2803 {
2804 class_bit = _ISwbit (cnt);
2805 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2806 free (now->val.str.startmb);
2807 goto read_charclass;
2808 }
2809 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2810 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2811 break;
2812 if (cnt < ctype->map_collection_nr)
2813 {
2814 mapidx = cnt;
2815 free (now->val.str.startmb);
2816 goto read_mapping;
2817 }
2818 break;
2819
2820 case tok_end:
2821 /* Next we assume `LC_CTYPE'. */
2822 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2823 if (now->tok == tok_eof)
2824 break;
2825 if (now->tok == tok_eol)
2826 lr_error (ldfile, _("%s: incomplete `END' line"),
2827 "LC_CTYPE");
2828 else if (now->tok != tok_lc_ctype)
2829 lr_error (ldfile, _("\
2830%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2831 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2832 return;
2833
2834 default:
2835 err_label:
2836 if (now->tok != tok_eof)
2837 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2838 }
2839
2840 /* Prepare for the next round. */
2841 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2842 nowtok = now->tok;
2843 }
2844
2845 /* When we come here we reached the end of the file. */
2846 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2847}
2848
2849
2850/* Subroutine of set_class_defaults, below. */
2851static void
2852set_one_default (struct locale_ctype_t *ctype,
2853 const struct charmap_t *charmap,
2854 int bitpos, int from, int to)
2855{
2856 char tmp[2];
2857 int ch;
2858 int bit = _ISbit (bitpos);
2859 int bitw = _ISwbit (bitpos);
2860 /* Define string. */
2861 strcpy (tmp, "?");
2862
2863 for (ch = from; ch <= to; ++ch)
2864 {
2865 struct charseq *seq;
2866 tmp[0] = ch;
2867
2868 seq = charmap_find_value (charmap, tmp, 1);
2869 if (seq == NULL)
2870 {
2871 char buf[10];
2872 sprintf (buf, "U%08X", ch);
2873 seq = charmap_find_value (charmap, buf, 9);
2874 }
2875 if (seq == NULL)
2876 {
2877 record_error (0, 0, _("\
2878%s: character `%s' not defined while needed as default value"),
2879 "LC_CTYPE", tmp);
2880 }
2881 else if (seq->nbytes != 1)
2882 record_error (0, 0, _("\
2883%s: character `%s' in charmap not representable with one byte"),
2884 "LC_CTYPE", tmp);
2885 else
2886 ctype->class256_collection[seq->bytes[0]] |= bit;
2887
2888 /* No need to search here, the ASCII value is also the Unicode
2889 value. */
2890 ELEM (ctype, class_collection, , ch) |= bitw;
2891 }
2892}
2893
2894static void
2895set_class_defaults (struct locale_ctype_t *ctype,
2896 const struct charmap_t *charmap,
2897 struct repertoire_t *repertoire)
2898{
2899#define set_default(bitpos, from, to) \
2900 set_one_default (ctype, charmap, bitpos, from, to)
2901
2902 /* These function defines the default values for the classes and conversions
2903 according to POSIX.2 2.5.2.1.
2904 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2905 Don't move them unless you know what you do! */
2906
2907 /* Set default values if keyword was not present. */
2908 if ((ctype->class_done & BITw (tok_upper)) == 0)
2909 /* "If this keyword [lower] is not specified, the lowercase letters
2910 `A' through `Z', ..., shall automatically belong to this class,
2911 with implementation defined character values." [P1003.2, 2.5.2.1] */
2912 set_default (BITPOS (tok_upper), 'A', 'Z');
2913
2914 if ((ctype->class_done & BITw (tok_lower)) == 0)
2915 /* "If this keyword [lower] is not specified, the lowercase letters
2916 `a' through `z', ..., shall automatically belong to this class,
2917 with implementation defined character values." [P1003.2, 2.5.2.1] */
2918 set_default (BITPOS (tok_lower), 'a', 'z');
2919
2920 if ((ctype->class_done & BITw (tok_alpha)) == 0)
2921 {
2922 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2923 class `lower' *must* be in class `alpha'. */
2924 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2925 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2926
2927 for (size_t cnt = 0; cnt < 256; ++cnt)
2928 if ((ctype->class256_collection[cnt] & mask) != 0)
2929 ctype->class256_collection[cnt] |= BIT (tok_alpha);
2930
2931 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2932 if ((ctype->class_collection[cnt] & maskw) != 0)
2933 ctype->class_collection[cnt] |= BITw (tok_alpha);
2934 }
2935
2936 if ((ctype->class_done & BITw (tok_digit)) == 0)
2937 /* "If this keyword [digit] is not specified, the digits `0' through
2938 `9', ..., shall automatically belong to this class, with
2939 implementation-defined character values." [P1003.2, 2.5.2.1] */
2940 set_default (BITPOS (tok_digit), '0', '9');
2941
2942 /* "Only characters specified for the `alpha' and `digit' keyword
2943 shall be specified. Characters specified for the keyword `alpha'
2944 and `digit' are automatically included in this class. */
2945 {
2946 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2947 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2948
2949 for (size_t cnt = 0; cnt < 256; ++cnt)
2950 if ((ctype->class256_collection[cnt] & mask) != 0)
2951 ctype->class256_collection[cnt] |= BIT (tok_alnum);
2952
2953 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2954 if ((ctype->class_collection[cnt] & maskw) != 0)
2955 ctype->class_collection[cnt] |= BITw (tok_alnum);
2956 }
2957
2958 if ((ctype->class_done & BITw (tok_space)) == 0)
2959 /* "If this keyword [space] is not specified, the characters <space>,
2960 <form-feed>, <newline>, <carriage-return>, <tab>, and
2961 <vertical-tab>, ..., shall automatically belong to this class,
2962 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2963 {
2964 struct charseq *seq;
2965
2966 seq = charmap_find_value (charmap, "space", 5);
2967 if (seq == NULL)
2968 seq = charmap_find_value (charmap, "SP", 2);
2969 if (seq == NULL)
2970 seq = charmap_find_value (charmap, "U00000020", 9);
2971 if (seq == NULL)
2972 {
2973 record_error (0, 0, _("\
2974%s: character `%s' not defined while needed as default value"),
2975 "LC_CTYPE", "<space>");
2976 }
2977 else if (seq->nbytes != 1)
2978 record_error (0, 0, _("\
2979%s: character `%s' in charmap not representable with one byte"),
2980 "LC_CTYPE", "<space>");
2981 else
2982 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2983
2984 /* No need to search. */
2985 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2986
2987 seq = charmap_find_value (charmap, "form-feed", 9);
2988 if (seq == NULL)
2989 seq = charmap_find_value (charmap, "U0000000C", 9);
2990 if (seq == NULL)
2991 {
2992 record_error (0, 0, _("\
2993%s: character `%s' not defined while needed as default value"),
2994 "LC_CTYPE", "<form-feed>");
2995 }
2996 else if (seq->nbytes != 1)
2997 record_error (0, 0, _("\
2998%s: character `%s' in charmap not representable with one byte"),
2999 "LC_CTYPE", "<form-feed>");
3000 else
3001 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3002
3003 /* No need to search. */
3004 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3005
3006
3007 seq = charmap_find_value (charmap, "newline", 7);
3008 if (seq == NULL)
3009 seq = charmap_find_value (charmap, "U0000000A", 9);
3010 if (seq == NULL)
3011 {
3012 record_error (0, 0, _("\
3013%s: character `%s' not defined while needed as default value"),
3014 "LC_CTYPE", "<newline>");
3015 }
3016 else if (seq->nbytes != 1)
3017 record_error (0, 0, _("\
3018%s: character `%s' in charmap not representable with one byte"),
3019 "LC_CTYPE", "<newline>");
3020 else
3021 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3022
3023 /* No need to search. */
3024 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3025
3026
3027 seq = charmap_find_value (charmap, "carriage-return", 15);
3028 if (seq == NULL)
3029 seq = charmap_find_value (charmap, "U0000000D", 9);
3030 if (seq == NULL)
3031 {
3032 record_error (0, 0, _("\
3033%s: character `%s' not defined while needed as default value"),
3034 "LC_CTYPE", "<carriage-return>");
3035 }
3036 else if (seq->nbytes != 1)
3037 record_error (0, 0, _("\
3038%s: character `%s' in charmap not representable with one byte"),
3039 "LC_CTYPE", "<carriage-return>");
3040 else
3041 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3042
3043 /* No need to search. */
3044 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3045
3046
3047 seq = charmap_find_value (charmap, "tab", 3);
3048 if (seq == NULL)
3049 seq = charmap_find_value (charmap, "U00000009", 9);
3050 if (seq == NULL)
3051 {
3052 record_error (0, 0, _("\
3053%s: character `%s' not defined while needed as default value"),
3054 "LC_CTYPE", "<tab>");
3055 }
3056 else if (seq->nbytes != 1)
3057 record_error (0, 0, _("\
3058%s: character `%s' in charmap not representable with one byte"),
3059 "LC_CTYPE", "<tab>");
3060 else
3061 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3062
3063 /* No need to search. */
3064 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3065
3066
3067 seq = charmap_find_value (charmap, "vertical-tab", 12);
3068 if (seq == NULL)
3069 seq = charmap_find_value (charmap, "U0000000B", 9);
3070 if (seq == NULL)
3071 {
3072 record_error (0, 0, _("\
3073%s: character `%s' not defined while needed as default value"),
3074 "LC_CTYPE", "<vertical-tab>");
3075 }
3076 else if (seq->nbytes != 1)
3077 record_error (0, 0, _("\
3078%s: character `%s' in charmap not representable with one byte"),
3079 "LC_CTYPE", "<vertical-tab>");
3080 else
3081 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3082
3083 /* No need to search. */
3084 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3085 }
3086
3087 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3088 /* "If this keyword is not specified, the digits `0' to `9', the
3089 uppercase letters `A' through `F', and the lowercase letters `a'
3090 through `f', ..., shell automatically belong to this class, with
3091 implementation defined character values." [P1003.2, 2.5.2.1] */
3092 {
3093 set_default (BITPOS (tok_xdigit), '0', '9');
3094 set_default (BITPOS (tok_xdigit), 'A', 'F');
3095 set_default (BITPOS (tok_xdigit), 'a', 'f');
3096 }
3097
3098 if ((ctype->class_done & BITw (tok_blank)) == 0)
3099 /* "If this keyword [blank] is unspecified, the characters <space> and
3100 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3101 {
3102 struct charseq *seq;
3103
3104 seq = charmap_find_value (charmap, "space", 5);
3105 if (seq == NULL)
3106 seq = charmap_find_value (charmap, "SP", 2);
3107 if (seq == NULL)
3108 seq = charmap_find_value (charmap, "U00000020", 9);
3109 if (seq == NULL)
3110 {
3111 record_error (0, 0, _("\
3112%s: character `%s' not defined while needed as default value"),
3113 "LC_CTYPE", "<space>");
3114 }
3115 else if (seq->nbytes != 1)
3116 record_error (0, 0, _("\
3117%s: character `%s' in charmap not representable with one byte"),
3118 "LC_CTYPE", "<space>");
3119 else
3120 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3121
3122 /* No need to search. */
3123 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3124
3125
3126 seq = charmap_find_value (charmap, "tab", 3);
3127 if (seq == NULL)
3128 seq = charmap_find_value (charmap, "U00000009", 9);
3129 if (seq == NULL)
3130 {
3131 record_error (0, 0, _("\
3132%s: character `%s' not defined while needed as default value"),
3133 "LC_CTYPE", "<tab>");
3134 }
3135 else if (seq->nbytes != 1)
3136 record_error (0, 0, _("\
3137%s: character `%s' in charmap not representable with one byte"),
3138 "LC_CTYPE", "<tab>");
3139 else
3140 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3141
3142 /* No need to search. */
3143 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3144 }
3145
3146 if ((ctype->class_done & BITw (tok_graph)) == 0)
3147 /* "If this keyword [graph] is not specified, characters specified for
3148 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3149 shall belong to this character class." [P1003.2, 2.5.2.1] */
3150 {
3151 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower)
3152 | BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit)
3153 | BIT (tok_punct);
3154 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower)
3155 | BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit)
3156 | BITw (tok_punct);
3157
3158 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3159 if ((ctype->class_collection[cnt] & maskw) != 0)
3160 ctype->class_collection[cnt] |= BITw (tok_graph);
3161
3162 for (size_t cnt = 0; cnt < 256; ++cnt)
3163 if ((ctype->class256_collection[cnt] & mask) != 0)
3164 ctype->class256_collection[cnt] |= BIT (tok_graph);
3165 }
3166
3167 if ((ctype->class_done & BITw (tok_print)) == 0)
3168 /* "If this keyword [print] is not provided, characters specified for
3169 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3170 and the <space> character shall belong to this character class."
3171 [P1003.2, 2.5.2.1] */
3172 {
3173 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower)
3174 | BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit)
3175 | BIT (tok_punct);
3176 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower)
3177 | BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit)
3178 | BITw (tok_punct);
3179 struct charseq *seq;
3180
3181 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3182 if ((ctype->class_collection[cnt] & maskw) != 0)
3183 ctype->class_collection[cnt] |= BITw (tok_print);
3184
3185 for (size_t cnt = 0; cnt < 256; ++cnt)
3186 if ((ctype->class256_collection[cnt] & mask) != 0)
3187 ctype->class256_collection[cnt] |= BIT (tok_print);
3188
3189
3190 seq = charmap_find_value (charmap, "space", 5);
3191 if (seq == NULL)
3192 seq = charmap_find_value (charmap, "SP", 2);
3193 if (seq == NULL)
3194 seq = charmap_find_value (charmap, "U00000020", 9);
3195 if (seq == NULL)
3196 {
3197 record_error (0, 0, _("\
3198%s: character `%s' not defined while needed as default value"),
3199 "LC_CTYPE", "<space>");
3200 }
3201 else if (seq->nbytes != 1)
3202 record_error (0, 0, _("\
3203%s: character `%s' in charmap not representable with one byte"),
3204 "LC_CTYPE", "<space>");
3205 else
3206 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3207
3208 /* No need to search. */
3209 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3210 }
3211
3212 if (ctype->tomap_done[0] == 0)
3213 /* "If this keyword [toupper] is not specified, the lowercase letters
3214 `a' through `z', and their corresponding uppercase letters `A' to
3215 `Z', ..., shall automatically be included, with implementation-
3216 defined character values." [P1003.2, 2.5.2.1] */
3217 {
3218 char tmp[4];
3219 int ch;
3220
3221 strcpy (tmp, "<?>");
3222
3223 for (ch = 'a'; ch <= 'z'; ++ch)
3224 {
3225 struct charseq *seq_from, *seq_to;
3226
3227 tmp[1] = (char) ch;
3228
3229 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3230 if (seq_from == NULL)
3231 {
3232 char buf[10];
3233 sprintf (buf, "U%08X", ch);
3234 seq_from = charmap_find_value (charmap, buf, 9);
3235 }
3236 if (seq_from == NULL)
3237 {
3238 record_error (0, 0, _("\
3239%s: character `%s' not defined while needed as default value"),
3240 "LC_CTYPE", tmp);
3241 }
3242 else if (seq_from->nbytes != 1)
3243 {
3244 record_error (0, 0, _("\
3245%s: character `%s' needed as default value not representable with one byte"),
3246 "LC_CTYPE", tmp);
3247 }
3248 else
3249 {
3250 /* This conversion is implementation defined. */
3251 tmp[1] = (char) (ch + ('A' - 'a'));
3252 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3253 if (seq_to == NULL)
3254 {
3255 char buf[10];
3256 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3257 seq_to = charmap_find_value (charmap, buf, 9);
3258 }
3259 if (seq_to == NULL)
3260 {
3261 record_error (0, 0, _("\
3262%s: character `%s' not defined while needed as default value"),
3263 "LC_CTYPE", tmp);
3264 }
3265 else if (seq_to->nbytes != 1)
3266 {
3267 record_error (0, 0, _("\
3268%s: character `%s' needed as default value not representable with one byte"),
3269 "LC_CTYPE", tmp);
3270 }
3271 else
3272 /* The index [0] is determined by the order of the
3273 `ctype_map_newP' calls in `ctype_startup'. */
3274 ctype->map256_collection[0][seq_from->bytes[0]]
3275 = seq_to->bytes[0];
3276 }
3277
3278 /* No need to search. */
3279 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3280 }
3281 }
3282
3283 if (ctype->tomap_done[1] == 0)
3284 /* "If this keyword [tolower] is not specified, the mapping shall be
3285 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3286 {
3287 for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3288 if (ctype->map_collection[0][cnt] != 0)
3289 ELEM (ctype, map_collection, [1],
3290 ctype->map_collection[0][cnt])
3291 = ctype->charnames[cnt];
3292
3293 for (size_t cnt = 0; cnt < 256; ++cnt)
3294 if (ctype->map256_collection[0][cnt] != 0)
3295 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3296 }
3297
3298 if (ctype->outdigits_act != 10)
3299 {
3300 if (ctype->outdigits_act != 0)
3301 record_error (0, 0, _("\
3302%s: field `%s' does not contain exactly ten entries"),
3303 "LC_CTYPE", "outdigit");
3304
3305 for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3306 {
3307 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3308 (char *) digits + cnt,
3309 1);
3310
3311 if (ctype->mboutdigits[cnt] == NULL)
3312 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3313 longnames[cnt],
3314 strlen (longnames[cnt]));
3315
3316 if (ctype->mboutdigits[cnt] == NULL)
3317 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3318 uninames[cnt], 9);
3319
3320 if (ctype->mboutdigits[cnt] == NULL)
3321 {
3322 /* Provide a replacement. */
3323 record_error (0, 0, _("\
3324no output digits defined and none of the standard names in the charmap"));
3325
3326 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3327 sizeof (struct charseq)
3328 + 1);
3329
3330 /* This is better than nothing. */
3331 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3332 ctype->mboutdigits[cnt]->nbytes = 1;
3333 }
3334
3335 ctype->wcoutdigits[cnt] = L'0' + cnt;
3336 }
3337
3338 ctype->outdigits_act = 10;
3339 }
3340
3341#undef set_default
3342}
3343
3344
3345/* Initialize. Assumes t->p and t->q have already been set. */
3346static inline void
3347wctype_table_init (struct wctype_table *t)
3348{
3349 t->level1 = NULL;
3350 t->level1_alloc = t->level1_size = 0;
3351 t->level2 = NULL;
3352 t->level2_alloc = t->level2_size = 0;
3353 t->level3 = NULL;
3354 t->level3_alloc = t->level3_size = 0;
3355}
3356
3357/* Retrieve an entry. */
3358static inline int
3359wctype_table_get (struct wctype_table *t, uint32_t wc)
3360{
3361 uint32_t index1 = wc >> (t->q + t->p + 5);
3362 if (index1 < t->level1_size)
3363 {
3364 uint32_t lookup1 = t->level1[index1];
3365 if (lookup1 != EMPTY)
3366 {
3367 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3368 + (lookup1 << t->q);
3369 uint32_t lookup2 = t->level2[index2];
3370 if (lookup2 != EMPTY)
3371 {
3372 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3373 + (lookup2 << t->p);
3374 uint32_t lookup3 = t->level3[index3];
3375 uint32_t index4 = wc & 0x1f;
3376
3377 return (lookup3 >> index4) & 1;
3378 }
3379 }
3380 }
3381 return 0;
3382}
3383
3384/* Add one entry. */
3385static void
3386wctype_table_add (struct wctype_table *t, uint32_t wc)
3387{
3388 uint32_t index1 = wc >> (t->q + t->p + 5);
3389 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3390 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3391 uint32_t index4 = wc & 0x1f;
3392 size_t i, i1, i2;
3393
3394 if (index1 >= t->level1_size)
3395 {
3396 if (index1 >= t->level1_alloc)
3397 {
3398 size_t alloc = 2 * t->level1_alloc;
3399 if (alloc <= index1)
3400 alloc = index1 + 1;
3401 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3402 alloc * sizeof (uint32_t));
3403 t->level1_alloc = alloc;
3404 }
3405 while (index1 >= t->level1_size)
3406 t->level1[t->level1_size++] = EMPTY;
3407 }
3408
3409 if (t->level1[index1] == EMPTY)
3410 {
3411 if (t->level2_size == t->level2_alloc)
3412 {
3413 size_t alloc = 2 * t->level2_alloc + 1;
3414 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3415 (alloc << t->q) * sizeof (uint32_t));
3416 t->level2_alloc = alloc;
3417 }
3418 i1 = t->level2_size << t->q;
3419 i2 = (t->level2_size + 1) << t->q;
3420 for (i = i1; i < i2; i++)
3421 t->level2[i] = EMPTY;
3422 t->level1[index1] = t->level2_size++;
3423 }
3424
3425 index2 += t->level1[index1] << t->q;
3426
3427 if (t->level2[index2] == EMPTY)
3428 {
3429 if (t->level3_size == t->level3_alloc)
3430 {
3431 size_t alloc = 2 * t->level3_alloc + 1;
3432 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3433 (alloc << t->p) * sizeof (uint32_t));
3434 t->level3_alloc = alloc;
3435 }
3436 i1 = t->level3_size << t->p;
3437 i2 = (t->level3_size + 1) << t->p;
3438 for (i = i1; i < i2; i++)
3439 t->level3[i] = 0;
3440 t->level2[index2] = t->level3_size++;
3441 }
3442
3443 index3 += t->level2[index2] << t->p;
3444
3445 t->level3[index3] |= (uint32_t)1 << index4;
3446}
3447
3448/* Finalize and shrink. */
3449static void
3450add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
3451{
3452 size_t i, j, k;
3453 uint32_t reorder3[t->level3_size];
3454 uint32_t reorder2[t->level2_size];
3455 uint32_t level2_offset, level3_offset;
3456
3457 /* Uniquify level3 blocks. */
3458 k = 0;
3459 for (j = 0; j < t->level3_size; j++)
3460 {
3461 for (i = 0; i < k; i++)
3462 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3463 (1 << t->p) * sizeof (uint32_t)) == 0)
3464 break;
3465 /* Relocate block j to block i. */
3466 reorder3[j] = i;
3467 if (i == k)
3468 {
3469 if (i != j)
3470 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3471 (1 << t->p) * sizeof (uint32_t));
3472 k++;
3473 }
3474 }
3475 t->level3_size = k;
3476
3477 for (i = 0; i < (t->level2_size << t->q); i++)
3478 if (t->level2[i] != EMPTY)
3479 t->level2[i] = reorder3[t->level2[i]];
3480
3481 /* Uniquify level2 blocks. */
3482 k = 0;
3483 for (j = 0; j < t->level2_size; j++)
3484 {
3485 for (i = 0; i < k; i++)
3486 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3487 (1 << t->q) * sizeof (uint32_t)) == 0)
3488 break;
3489 /* Relocate block j to block i. */
3490 reorder2[j] = i;
3491 if (i == k)
3492 {
3493 if (i != j)
3494 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3495 (1 << t->q) * sizeof (uint32_t));
3496 k++;
3497 }
3498 }
3499 t->level2_size = k;
3500
3501 for (i = 0; i < t->