1/* Copyright (C) 1995-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published
6 by the Free Software Foundation; version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, see <https://www.gnu.org/licenses/>. */
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <errno.h>
22#include <stdlib.h>
23#include <wchar.h>
24#include <stdint.h>
25#include <sys/param.h>
26#include <array_length.h>
27
28#include "localedef.h"
29#include "charmap.h"
30#include "localeinfo.h"
31#include "linereader.h"
32#include "locfile.h"
33#include "elem-hash.h"
34
35/* Uncomment the following line in the production version. */
36/* #define NDEBUG 1 */
37#include <assert.h>
38
39#define obstack_chunk_alloc malloc
40#define obstack_chunk_free free
41
42static inline void
43__attribute ((always_inline))
44obstack_int32_grow (struct obstack *obstack, int32_t data)
45{
46 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47 data = maybe_swap_uint32 (value: data);
48 if (sizeof (int32_t) == sizeof (int))
49 obstack_int_grow (obstack, data);
50 else
51 obstack_grow (obstack, &data, sizeof (int32_t));
52}
53
54static inline void
55__attribute ((always_inline))
56obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57{
58 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59 data = maybe_swap_uint32 (value: data);
60 if (sizeof (int32_t) == sizeof (int))
61 obstack_int_grow_fast (obstack, data);
62 else
63 obstack_grow (obstack, &data, sizeof (int32_t));
64}
65
66/* Forward declaration. */
67struct element_t;
68
69/* Data type for list of strings. */
70struct section_list
71{
72 /* Successor in the known_sections list. */
73 struct section_list *def_next;
74 /* Successor in the sections list. */
75 struct section_list *next;
76 /* Name of the section. */
77 const char *name;
78 /* First element of this section. */
79 struct element_t *first;
80 /* Last element of this section. */
81 struct element_t *last;
82 /* These are the rules for this section. */
83 enum coll_sort_rule *rules;
84 /* Index of the rule set in the appropriate section of the output file. */
85 int ruleidx;
86};
87
88struct element_t;
89
90struct element_list_t
91{
92 /* Number of elements. */
93 int cnt;
94
95 struct element_t **w;
96};
97
98/* Data type for collating element. */
99struct element_t
100{
101 const char *name;
102
103 const char *mbs;
104 size_t nmbs;
105 const uint32_t *wcs;
106 size_t nwcs;
107 int *mborder;
108 int wcorder;
109
110 /* The following is a bit mask which bits are set if this element is
111 used in the appropriate level. Interesting for the singlebyte
112 weight computation.
113
114 XXX The type here restricts the number of levels to 32. It could
115 be changed if necessary but I doubt this is necessary. */
116 unsigned int used_in_level;
117
118 struct element_list_t *weights;
119
120 /* Nonzero if this is a real character definition. */
121 int is_character;
122
123 /* Order of the character in the sequence. This information will
124 be used in range expressions. */
125 int mbseqorder;
126 int wcseqorder;
127
128 /* Where does the definition come from. */
129 const char *file;
130 size_t line;
131
132 /* Which section does this belong to. */
133 struct section_list *section;
134
135 /* Predecessor and successor in the order list. */
136 struct element_t *last;
137 struct element_t *next;
138
139 /* Next element in multibyte output list. */
140 struct element_t *mbnext;
141 struct element_t *mblast;
142
143 /* Next element in wide character output list. */
144 struct element_t *wcnext;
145 struct element_t *wclast;
146};
147
148/* Special element value. */
149#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
152
153/* Data type for collating symbol. */
154struct symbol_t
155{
156 const char *name;
157
158 /* Point to place in the order list. */
159 struct element_t *order;
160
161 /* Where does the definition come from. */
162 const char *file;
163 size_t line;
164};
165
166/* Sparse table of struct element_t *. */
167#define TABLE wchead_table
168#define ELEMENT struct element_t *
169#define DEFAULT NULL
170#define ITERATE
171#define NO_ADD_LOCALE
172#include "3level.h"
173
174/* Sparse table of int32_t. */
175#define TABLE collidx_table
176#define ELEMENT int32_t
177#define DEFAULT 0
178#include "3level.h"
179
180/* Sparse table of uint32_t. */
181#define TABLE collseq_table
182#define ELEMENT uint32_t
183#define DEFAULT ~((uint32_t) 0)
184#include "3level.h"
185
186
187/* Simple name list for the preprocessor. */
188struct name_list
189{
190 struct name_list *next;
191 char str[0];
192};
193
194
195/* The real definition of the struct for the LC_COLLATE locale. */
196struct locale_collate_t
197{
198 /* Does the locale use code points to compare the encoding? */
199 bool codepoint_collation;
200
201 int col_weight_max;
202 int cur_weight_max;
203
204 /* List of known scripts. */
205 struct section_list *known_sections;
206 /* List of used sections. */
207 struct section_list *sections;
208 /* Current section using definition. */
209 struct section_list *current_section;
210 /* There always can be an unnamed section. */
211 struct section_list unnamed_section;
212 /* Flag whether the unnamed section has been defined. */
213 bool unnamed_section_defined;
214 /* To make handling of errors easier we have another section. */
215 struct section_list error_section;
216 /* Sometimes we are defining the values for collating symbols before
217 the first actual section. */
218 struct section_list symbol_section;
219
220 /* Start of the order list. */
221 struct element_t *start;
222
223 /* The undefined element. */
224 struct element_t undefined;
225
226 /* This is the cursor for `reorder_after' insertions. */
227 struct element_t *cursor;
228
229 /* This value is used when handling ellipsis. */
230 struct element_t ellipsis_weight;
231
232 /* Known collating elements. */
233 hash_table elem_table;
234
235 /* Known collating symbols. */
236 hash_table sym_table;
237
238 /* Known collation sequences. */
239 hash_table seq_table;
240
241 struct obstack mempool;
242
243 /* The LC_COLLATE category is a bit special as it is sometimes possible
244 that the definitions from more than one input file contains information.
245 Therefore we keep all relevant input in a list. */
246 struct locale_collate_t *next;
247
248 /* Arrays with heads of the list for each of the leading bytes in
249 the multibyte sequences. */
250 struct element_t *mbheads[256];
251
252 /* Arrays with heads of the list for each of the leading bytes in
253 the multibyte sequences. */
254 struct wchead_table wcheads;
255
256 /* The arrays with the collation sequence order. */
257 unsigned char mbseqorder[256];
258 struct collseq_table wcseqorder;
259
260 /* State of the preprocessor. */
261 enum
262 {
263 else_none = 0,
264 else_ignore,
265 else_seen
266 }
267 else_action;
268};
269
270
271/* We have a few global variables which are used for reading all
272 LC_COLLATE category descriptions in all files. */
273static uint32_t nrules;
274
275/* List of defined preprocessor symbols. */
276static struct name_list *defined;
277
278
279/* We need UTF-8 encoding of numbers. */
280static inline int
281__attribute ((always_inline))
282utf8_encode (char *buf, int val)
283{
284 int retval;
285
286 if (val < 0x80)
287 {
288 *buf++ = (char) val;
289 retval = 1;
290 }
291 else
292 {
293 int step;
294
295 for (step = 2; step < 6; ++step)
296 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
297 break;
298 retval = step;
299
300 *buf = (unsigned char) (~0xff >> step);
301 --step;
302 do
303 {
304 buf[step] = 0x80 | (val & 0x3f);
305 val >>= 6;
306 }
307 while (--step > 0);
308 *buf |= val;
309 }
310
311 return retval;
312}
313
314
315static struct section_list *
316make_seclist_elem (struct locale_collate_t *collate, const char *string,
317 struct section_list *next)
318{
319 struct section_list *newp;
320
321 newp = (struct section_list *) obstack_alloc (&collate->mempool,
322 sizeof (*newp));
323 newp->next = next;
324 newp->name = string;
325 newp->first = NULL;
326 newp->last = NULL;
327
328 return newp;
329}
330
331
332static struct element_t *
333new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
334 const uint32_t *wcs, const char *name, size_t namelen,
335 int is_character)
336{
337 struct element_t *newp;
338
339 newp = (struct element_t *) obstack_alloc (&collate->mempool,
340 sizeof (*newp));
341 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
342 name, namelen);
343 if (mbs != NULL)
344 {
345 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
346 newp->nmbs = mbslen;
347 }
348 else
349 {
350 newp->mbs = NULL;
351 newp->nmbs = 0;
352 }
353 if (wcs != NULL)
354 {
355 size_t nwcs = wcslen (s: (wchar_t *) wcs);
356 uint32_t zero = 0;
357 /* Handle <U0000> as a single character. */
358 if (nwcs == 0)
359 nwcs = 1;
360 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
361 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
362 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
363 newp->nwcs = nwcs;
364 }
365 else
366 {
367 newp->wcs = NULL;
368 newp->nwcs = 0;
369 }
370 newp->mborder = NULL;
371 newp->wcorder = 0;
372 newp->used_in_level = 0;
373 newp->is_character = is_character;
374
375 /* Will be assigned later. XXX */
376 newp->mbseqorder = 0;
377 newp->wcseqorder = 0;
378
379 /* Will be allocated later. */
380 newp->weights = NULL;
381
382 newp->file = NULL;
383 newp->line = 0;
384
385 newp->section = collate->current_section;
386
387 newp->last = NULL;
388 newp->next = NULL;
389
390 newp->mbnext = NULL;
391 newp->mblast = NULL;
392
393 newp->wcnext = NULL;
394 newp->wclast = NULL;
395
396 return newp;
397}
398
399
400static struct symbol_t *
401new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
402{
403 struct symbol_t *newp;
404
405 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
406
407 newp->name = obstack_copy0 (&collate->mempool, name, len);
408 newp->order = NULL;
409
410 newp->file = NULL;
411 newp->line = 0;
412
413 return newp;
414}
415
416
417/* Test whether this name is already defined somewhere. */
418static int
419check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
420 const struct charmap_t *charmap,
421 struct repertoire_t *repertoire, const char *symbol,
422 size_t symbol_len)
423{
424 void *ignore = NULL;
425
426 if (find_entry (htab: &charmap->char_table, key: symbol, keylen: symbol_len, result: &ignore) == 0)
427 {
428 lr_error (lr: ldfile, _("`%.*s' already defined in charmap"),
429 (int) symbol_len, symbol);
430 return 1;
431 }
432
433 if (repertoire != NULL
434 && (find_entry (htab: &repertoire->char_table, key: symbol, keylen: symbol_len, result: &ignore)
435 == 0))
436 {
437 lr_error (lr: ldfile, _("`%.*s' already defined in repertoire"),
438 (int) symbol_len, symbol);
439 return 1;
440 }
441
442 if (find_entry (htab: &collate->sym_table, key: symbol, keylen: symbol_len, result: &ignore) == 0)
443 {
444 lr_error (lr: ldfile, _("`%.*s' already defined as collating symbol"),
445 (int) symbol_len, symbol);
446 return 1;
447 }
448
449 if (find_entry (htab: &collate->elem_table, key: symbol, keylen: symbol_len, result: &ignore) == 0)
450 {
451 lr_error (lr: ldfile, _("`%.*s' already defined as collating element"),
452 (int) symbol_len, symbol);
453 return 1;
454 }
455
456 return 0;
457}
458
459
460/* Read the direction specification. */
461static void
462read_directions (struct linereader *ldfile, struct token *arg,
463 const struct charmap_t *charmap,
464 struct repertoire_t *repertoire, struct localedef_t *result)
465{
466 int cnt = 0;
467 int max = nrules ?: 10;
468 enum coll_sort_rule *rules = calloc (nmemb: max, size: sizeof (*rules));
469 int warned = 0;
470 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
471
472 while (1)
473 {
474 int valid = 0;
475
476 if (arg->tok == tok_forward)
477 {
478 if (rules[cnt] & sort_backward)
479 {
480 if (! warned)
481 {
482 lr_error (lr: ldfile, _("\
483%s: `forward' and `backward' are mutually excluding each other"),
484 "LC_COLLATE");
485 warned = 1;
486 }
487 }
488 else if (rules[cnt] & sort_forward)
489 {
490 if (! warned)
491 {
492 lr_error (lr: ldfile, _("\
493%s: `%s' mentioned more than once in definition of weight %d"),
494 "LC_COLLATE", "forward", cnt + 1);
495 }
496 }
497 else
498 rules[cnt] |= sort_forward;
499
500 valid = 1;
501 }
502 else if (arg->tok == tok_backward)
503 {
504 if (rules[cnt] & sort_forward)
505 {
506 if (! warned)
507 {
508 lr_error (lr: ldfile, _("\
509%s: `forward' and `backward' are mutually excluding each other"),
510 "LC_COLLATE");
511 warned = 1;
512 }
513 }
514 else if (rules[cnt] & sort_backward)
515 {
516 if (! warned)
517 {
518 lr_error (lr: ldfile, _("\
519%s: `%s' mentioned more than once in definition of weight %d"),
520 "LC_COLLATE", "backward", cnt + 1);
521 }
522 }
523 else
524 rules[cnt] |= sort_backward;
525
526 valid = 1;
527 }
528 else if (arg->tok == tok_position)
529 {
530 if (rules[cnt] & sort_position)
531 {
532 if (! warned)
533 {
534 lr_error (lr: ldfile, _("\
535%s: `%s' mentioned more than once in definition of weight %d"),
536 "LC_COLLATE", "position", cnt + 1);
537 }
538 }
539 else
540 rules[cnt] |= sort_position;
541
542 valid = 1;
543 }
544
545 if (valid)
546 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
547
548 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
549 || arg->tok == tok_semicolon)
550 {
551 if (! valid && ! warned)
552 {
553 lr_error (lr: ldfile, _("%s: syntax error"), "LC_COLLATE");
554 warned = 1;
555 }
556
557 /* See whether we have to increment the counter. */
558 if (arg->tok != tok_comma && rules[cnt] != 0)
559 {
560 /* Add the default `forward' if we have seen only `position'. */
561 if (rules[cnt] == sort_position)
562 rules[cnt] = sort_position | sort_forward;
563
564 ++cnt;
565 }
566
567 if (arg->tok == tok_eof || arg->tok == tok_eol)
568 /* End of line or file, so we exit the loop. */
569 break;
570
571 if (nrules == 0)
572 {
573 /* See whether we have enough room in the array. */
574 if (cnt == max)
575 {
576 max += 10;
577 rules = (enum coll_sort_rule *) xrealloc (o: rules,
578 n: max
579 * sizeof (*rules));
580 memset (s: &rules[cnt], c: '\0', n: (max - cnt) * sizeof (*rules));
581 }
582 }
583 else
584 {
585 if (cnt == nrules)
586 {
587 /* There must not be any more rule. */
588 if (! warned)
589 {
590 lr_error (lr: ldfile, _("\
591%s: too many rules; first entry only had %d"),
592 "LC_COLLATE", nrules);
593 warned = 1;
594 }
595
596 lr_ignore_rest (lr: ldfile, verbose: 0);
597 break;
598 }
599 }
600 }
601 else
602 {
603 if (! warned)
604 {
605 lr_error (lr: ldfile, _("%s: syntax error"), "LC_COLLATE");
606 warned = 1;
607 }
608 }
609
610 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
611 }
612
613 if (nrules == 0)
614 {
615 /* Now we know how many rules we have. */
616 nrules = cnt;
617 rules = (enum coll_sort_rule *) xrealloc (o: rules,
618 n: nrules * sizeof (*rules));
619 }
620 else
621 {
622 if (cnt < nrules)
623 {
624 /* Not enough rules in this specification. */
625 if (! warned)
626 lr_error (lr: ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
627
628 do
629 rules[cnt] = sort_forward;
630 while (++cnt < nrules);
631 }
632 }
633
634 collate->current_section->rules = rules;
635}
636
637
638static struct element_t *
639find_element (struct linereader *ldfile, struct locale_collate_t *collate,
640 const char *str, size_t len)
641{
642 void *result = NULL;
643
644 /* Search for the entries among the collation sequences already define. */
645 if (find_entry (htab: &collate->seq_table, key: str, keylen: len, result: &result) != 0)
646 {
647 /* Nope, not define yet. So we see whether it is a
648 collation symbol. */
649 void *ptr;
650
651 if (find_entry (htab: &collate->sym_table, key: str, keylen: len, result: &ptr) == 0)
652 {
653 /* It's a collation symbol. */
654 struct symbol_t *sym = (struct symbol_t *) ptr;
655 result = sym->order;
656
657 if (result == NULL)
658 result = sym->order = new_element (collate, NULL, mbslen: 0, NULL,
659 NULL, namelen: 0, is_character: 0);
660 }
661 else if (find_entry (htab: &collate->elem_table, key: str, keylen: len, result: &result) != 0)
662 {
663 /* It's also no collation element. So it is a character
664 element defined later. */
665 result = new_element (collate, NULL, mbslen: 0, NULL, name: str, namelen: len, is_character: 1);
666 /* Insert it into the sequence table. */
667 insert_entry (htab: &collate->seq_table, key: str, keylen: len, data: result);
668 }
669 }
670
671 return (struct element_t *) result;
672}
673
674
675static void
676unlink_element (struct locale_collate_t *collate)
677{
678 if (collate->cursor == collate->start)
679 {
680 assert (collate->cursor->next == NULL);
681 assert (collate->cursor->last == NULL);
682 collate->cursor = NULL;
683 }
684 else
685 {
686 if (collate->cursor->next != NULL)
687 collate->cursor->next->last = collate->cursor->last;
688 if (collate->cursor->last != NULL)
689 collate->cursor->last->next = collate->cursor->next;
690 collate->cursor = collate->cursor->last;
691 }
692}
693
694
695static void
696insert_weights (struct linereader *ldfile, struct element_t *elem,
697 const struct charmap_t *charmap,
698 struct repertoire_t *repertoire, struct localedef_t *result,
699 enum token_t ellipsis)
700{
701 int weight_cnt;
702 struct token *arg;
703 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
704
705 /* Initialize all the fields. */
706 elem->file = ldfile->fname;
707 elem->line = ldfile->lineno;
708
709 elem->last = collate->cursor;
710 elem->next = collate->cursor ? collate->cursor->next : NULL;
711 if (collate->cursor != NULL && collate->cursor->next != NULL)
712 collate->cursor->next->last = elem;
713 if (collate->cursor != NULL)
714 collate->cursor->next = elem;
715 if (collate->start == NULL)
716 {
717 assert (collate->cursor == NULL);
718 collate->start = elem;
719 }
720
721 elem->section = collate->current_section;
722
723 if (collate->current_section->first == NULL)
724 collate->current_section->first = elem;
725 if (collate->current_section->last == collate->cursor)
726 collate->current_section->last = elem;
727
728 collate->cursor = elem;
729
730 elem->weights = (struct element_list_t *)
731 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
732 memset (s: elem->weights, c: '\0', n: nrules * sizeof (struct element_list_t));
733
734 weight_cnt = 0;
735
736 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
737 do
738 {
739 if (arg->tok == tok_eof || arg->tok == tok_eol)
740 break;
741
742 if (arg->tok == tok_ignore)
743 {
744 /* The weight for this level has to be ignored. We use the
745 null pointer to indicate this. */
746 elem->weights[weight_cnt].w = (struct element_t **)
747 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
748 elem->weights[weight_cnt].w[0] = NULL;
749 elem->weights[weight_cnt].cnt = 1;
750 }
751 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
752 {
753 char ucs4str[10];
754 struct element_t *val;
755 char *symstr;
756 size_t symlen;
757
758 if (arg->tok == tok_bsymbol)
759 {
760 symstr = arg->val.str.startmb;
761 symlen = arg->val.str.lenmb;
762 }
763 else
764 {
765 snprintf (s: ucs4str, maxlen: sizeof (ucs4str), format: "U%08X", arg->val.ucs4);
766 symstr = ucs4str;
767 symlen = 9;
768 }
769
770 val = find_element (ldfile, collate, str: symstr, len: symlen);
771 if (val == NULL)
772 break;
773
774 elem->weights[weight_cnt].w = (struct element_t **)
775 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
776 elem->weights[weight_cnt].w[0] = val;
777 elem->weights[weight_cnt].cnt = 1;
778 }
779 else if (arg->tok == tok_string)
780 {
781 /* Split the string up in the individual characters and put
782 the element definitions in the list. */
783 const char *cp = arg->val.str.startmb;
784 int cnt = 0;
785 struct element_t *charelem;
786 struct element_t **weights = NULL;
787 int max = 0;
788
789 if (*cp == '\0')
790 {
791 lr_error (lr: ldfile, _("%s: empty weight string not allowed"),
792 "LC_COLLATE");
793 lr_ignore_rest (lr: ldfile, verbose: 0);
794 break;
795 }
796
797 do
798 {
799 if (*cp == '<')
800 {
801 /* Ahh, it's a bsymbol or an UCS4 value. If it's
802 the latter we have to unify the name. */
803 const char *startp = ++cp;
804 size_t len;
805
806 while (*cp != '>')
807 {
808 if (*cp == ldfile->escape_char)
809 ++cp;
810 if (*cp == '\0')
811 /* It's a syntax error. */
812 goto syntax;
813
814 ++cp;
815 }
816
817 if (cp - startp == 5 && startp[0] == 'U'
818 && isxdigit (startp[1]) && isxdigit (startp[2])
819 && isxdigit (startp[3]) && isxdigit (startp[4]))
820 {
821 unsigned int ucs4 = strtoul (nptr: startp + 1, NULL, base: 16);
822 char *newstr;
823
824 newstr = (char *) xmalloc (n: 10);
825 snprintf (s: newstr, maxlen: 10, format: "U%08X", ucs4);
826 startp = newstr;
827
828 len = 9;
829 }
830 else
831 len = cp - startp;
832
833 charelem = find_element (ldfile, collate, str: startp, len);
834 ++cp;
835 }
836 else
837 {
838 /* People really shouldn't use characters directly in
839 the string. Especially since it's not really clear
840 what this means. We interpret all characters in the
841 string as if that would be bsymbols. Otherwise we
842 would have to match back to bsymbols somehow and this
843 is normally not what people normally expect. */
844 charelem = find_element (ldfile, collate, str: cp++, len: 1);
845 }
846
847 if (charelem == NULL)
848 {
849 /* We ignore the rest of the line. */
850 lr_ignore_rest (lr: ldfile, verbose: 0);
851 break;
852 }
853
854 /* Add the pointer. */
855 if (cnt >= max)
856 {
857 struct element_t **newp;
858 max += 10;
859 newp = (struct element_t **)
860 alloca (max * sizeof (struct element_t *));
861 memcpy (dest: newp, src: weights, n: cnt * sizeof (struct element_t *));
862 weights = newp;
863 }
864 weights[cnt++] = charelem;
865 }
866 while (*cp != '\0');
867
868 /* Now store the information. */
869 elem->weights[weight_cnt].w = (struct element_t **)
870 obstack_alloc (&collate->mempool,
871 cnt * sizeof (struct element_t *));
872 memcpy (dest: elem->weights[weight_cnt].w, src: weights,
873 n: cnt * sizeof (struct element_t *));
874 elem->weights[weight_cnt].cnt = cnt;
875
876 /* We don't need the string anymore. */
877 free (ptr: arg->val.str.startmb);
878 }
879 else if (ellipsis != tok_none
880 && (arg->tok == tok_ellipsis2
881 || arg->tok == tok_ellipsis3
882 || arg->tok == tok_ellipsis4))
883 {
884 /* It must be the same ellipsis as used in the initial column. */
885 if (arg->tok != ellipsis)
886 lr_error (lr: ldfile, _("\
887%s: weights must use the same ellipsis symbol as the name"),
888 "LC_COLLATE");
889
890 /* The weight for this level will depend on the element
891 iterating over the range. Put a placeholder. */
892 elem->weights[weight_cnt].w = (struct element_t **)
893 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
894 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
895 elem->weights[weight_cnt].cnt = 1;
896 }
897 else
898 {
899 syntax:
900 /* It's a syntax error. */
901 lr_error (lr: ldfile, _("%s: syntax error"), "LC_COLLATE");
902 lr_ignore_rest (lr: ldfile, verbose: 0);
903 break;
904 }
905
906 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
907 /* This better should be the end of the line or a semicolon. */
908 if (arg->tok == tok_semicolon)
909 /* OK, ignore this and read the next token. */
910 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
911 else if (arg->tok != tok_eof && arg->tok != tok_eol)
912 {
913 /* It's a syntax error. */
914 lr_error (lr: ldfile, _("%s: syntax error"), "LC_COLLATE");
915 lr_ignore_rest (lr: ldfile, verbose: 0);
916 break;
917 }
918 }
919 while (++weight_cnt < nrules);
920
921 if (weight_cnt < nrules)
922 {
923 /* This means the rest of the line uses the current element as
924 the weight. */
925 do
926 {
927 elem->weights[weight_cnt].w = (struct element_t **)
928 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
929 if (ellipsis == tok_none)
930 elem->weights[weight_cnt].w[0] = elem;
931 else
932 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
933 elem->weights[weight_cnt].cnt = 1;
934 }
935 while (++weight_cnt < nrules);
936 }
937 else
938 {
939 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
940 {
941 /* Too many rule values. */
942 lr_error (lr: ldfile, _("%s: too many values"), "LC_COLLATE");
943 lr_ignore_rest (lr: ldfile, verbose: 0);
944 }
945 else
946 lr_ignore_rest (lr: ldfile, verbose: arg->tok != tok_eol && arg->tok != tok_eof);
947 }
948}
949
950
951static int
952insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
953 const struct charmap_t *charmap, struct repertoire_t *repertoire,
954 struct localedef_t *result)
955{
956 /* First find out what kind of symbol this is. */
957 struct charseq *seq;
958 uint32_t wc;
959 struct element_t *elem = NULL;
960 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
961
962 /* Try to find the character in the charmap. */
963 seq = charmap_find_value (charmap, name: symstr, len: symlen);
964
965 /* Determine the wide character. */
966 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
967 {
968 wc = repertoire_find_value (repertoire, name: symstr, len: symlen);
969 if (seq != NULL)
970 seq->ucs4 = wc;
971 }
972 else
973 wc = seq->ucs4;
974
975 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
976 {
977 /* It's no character, so look through the collation elements and
978 symbol list. */
979 void *ptr = elem;
980 if (find_entry (htab: &collate->elem_table, key: symstr, keylen: symlen, result: &ptr) != 0)
981 {
982 void *result;
983 struct symbol_t *sym = NULL;
984
985 /* It's also collation element. Therefore it's either a
986 collating symbol or it's a character which is not
987 supported by the character set. In the later case we
988 simply create a dummy entry. */
989 if (find_entry (htab: &collate->sym_table, key: symstr, keylen: symlen, result: &result) == 0)
990 {
991 /* It's a collation symbol. */
992 sym = (struct symbol_t *) result;
993
994 elem = sym->order;
995 }
996
997 if (elem == NULL)
998 {
999 elem = new_element (collate, NULL, mbslen: 0, NULL, name: symstr, namelen: symlen, is_character: 0);
1000
1001 if (sym != NULL)
1002 sym->order = elem;
1003 else
1004 /* Enter a fake element in the sequence table. This
1005 won't cause anything in the output since there is
1006 no multibyte or wide character associated with
1007 it. */
1008 insert_entry (htab: &collate->seq_table, key: symstr, keylen: symlen, data: elem);
1009 }
1010 }
1011 else
1012 /* Copy the result back. */
1013 elem = ptr;
1014 }
1015 else
1016 {
1017 /* Otherwise the symbols stands for a character. */
1018 void *ptr = elem;
1019 if (find_entry (htab: &collate->seq_table, key: symstr, keylen: symlen, result: &ptr) != 0)
1020 {
1021 uint32_t wcs[2] = { wc, 0 };
1022
1023 /* We have to allocate an entry. */
1024 elem = new_element (collate,
1025 mbs: seq != NULL ? (char *) seq->bytes : NULL,
1026 mbslen: seq != NULL ? seq->nbytes : 0,
1027 wcs: wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1028 name: symstr, namelen: symlen, is_character: 1);
1029
1030 /* And add it to the table. */
1031 if (insert_entry (htab: &collate->seq_table, key: symstr, keylen: symlen, data: elem) != 0)
1032 /* This cannot happen. */
1033 assert (! "Internal error");
1034 }
1035 else
1036 {
1037 /* Copy the result back. */
1038 elem = ptr;
1039
1040 /* Maybe the character was used before the definition. In this case
1041 we have to insert the byte sequences now. */
1042 if (elem->mbs == NULL && seq != NULL)
1043 {
1044 elem->mbs = obstack_copy0 (&collate->mempool,
1045 seq->bytes, seq->nbytes);
1046 elem->nmbs = seq->nbytes;
1047 }
1048
1049 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1050 {
1051 uint32_t wcs[2] = { wc, 0 };
1052
1053 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1054 elem->nwcs = 1;
1055 }
1056 }
1057 }
1058
1059 /* Test whether this element is not already in the list. */
1060 if (elem->next != NULL || elem == collate->cursor)
1061 {
1062 lr_error (lr: ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1063 (int) symlen, symstr, elem->file, elem->line);
1064 lr_ignore_rest (lr: ldfile, verbose: 0);
1065 return 1;
1066 }
1067
1068 insert_weights (ldfile, elem, charmap, repertoire, result, ellipsis: tok_none);
1069
1070 return 0;
1071}
1072
1073
1074static void
1075handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1076 enum token_t ellipsis, const struct charmap_t *charmap,
1077 struct repertoire_t *repertoire,
1078 struct localedef_t *result)
1079{
1080 struct element_t *startp;
1081 struct element_t *endp;
1082 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1083
1084 /* Unlink the entry added for the ellipsis. */
1085 unlink_element (collate);
1086 startp = collate->cursor;
1087
1088 /* Process and add the end-entry. */
1089 if (symstr != NULL
1090 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1091 /* Something went wrong with inserting the to-value. This means
1092 we cannot process the ellipsis. */
1093 return;
1094
1095 /* Reset the cursor. */
1096 collate->cursor = startp;
1097
1098 /* Now we have to handle many different situations:
1099 - we have to distinguish between the three different ellipsis forms
1100 - the is the ellipsis at the beginning, in the middle, or at the end.
1101 */
1102 endp = collate->cursor->next;
1103 assert (symstr == NULL || endp != NULL);
1104
1105 /* XXX The following is probably very wrong since also collating symbols
1106 can appear in ranges. But do we want/can refine the test for that? */
1107#if 0
1108 /* Both, the start and the end symbol, must stand for characters. */
1109 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1110 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1111 {
1112 lr_error (ldfile, _("\
1113%s: the start and the end symbol of a range must stand for characters"),
1114 "LC_COLLATE");
1115 return;
1116 }
1117#endif
1118
1119 if (ellipsis == tok_ellipsis3)
1120 {
1121 /* One requirement we make here: the length of the byte
1122 sequences for the first and end character must be the same.
1123 This is mainly to prevent unwanted effects and this is often
1124 not what is wanted. */
1125 size_t len = (startp->mbs != NULL ? startp->nmbs
1126 : (endp->mbs != NULL ? endp->nmbs : 0));
1127 char mbcnt[len + 1];
1128 char mbend[len + 1];
1129
1130 /* Well, this should be caught somewhere else already. Just to
1131 make sure. */
1132 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1133 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1134
1135 if (startp != NULL && endp != NULL
1136 && startp->mbs != NULL && endp->mbs != NULL
1137 && startp->nmbs != endp->nmbs)
1138 {
1139 lr_error (lr: ldfile, _("\
1140%s: byte sequences of first and last character must have the same length"),
1141 "LC_COLLATE");
1142 return;
1143 }
1144
1145 /* Determine whether we have to generate multibyte sequences. */
1146 if ((startp == NULL || startp->mbs != NULL)
1147 && (endp == NULL || endp->mbs != NULL))
1148 {
1149 int cnt;
1150 int ret;
1151
1152 /* Prepare the beginning byte sequence. This is either from the
1153 beginning byte sequence or it is all nulls if it was an
1154 initial ellipsis. */
1155 if (startp == NULL || startp->mbs == NULL)
1156 memset (s: mbcnt, c: '\0', n: len);
1157 else
1158 {
1159 memcpy (dest: mbcnt, src: startp->mbs, n: len);
1160
1161 /* And increment it so that the value is the first one we will
1162 try to insert. */
1163 for (cnt = len - 1; cnt >= 0; --cnt)
1164 if (++mbcnt[cnt] != '\0')
1165 break;
1166 }
1167 mbcnt[len] = '\0';
1168
1169 /* And the end sequence. */
1170 if (endp == NULL || endp->mbs == NULL)
1171 memset (s: mbend, c: '\0', n: len);
1172 else
1173 memcpy (dest: mbend, src: endp->mbs, n: len);
1174 mbend[len] = '\0';
1175
1176 /* Test whether we have a correct range. */
1177 ret = memcmp (s1: mbcnt, s2: mbend, n: len);
1178 if (ret >= 0)
1179 {
1180 if (ret > 0)
1181 lr_error (lr: ldfile, _("%s: byte sequence of first character of \
1182range is not lower than that of the last character"), "LC_COLLATE");
1183 return;
1184 }
1185
1186 /* Generate the byte sequences data. */
1187 while (1)
1188 {
1189 struct charseq *seq;
1190
1191 /* Quite a bit of work ahead. We have to find the character
1192 definition for the byte sequence and then determine the
1193 wide character belonging to it. */
1194 seq = charmap_find_symbol (charmap, name: mbcnt, len);
1195 if (seq != NULL)
1196 {
1197 struct element_t *elem;
1198 size_t namelen;
1199
1200 /* I don't think this can ever happen. */
1201 assert (seq->name != NULL);
1202 namelen = strlen (s: seq->name);
1203
1204 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1205 seq->ucs4 = repertoire_find_value (repertoire, name: seq->name,
1206 len: namelen);
1207
1208 /* Now we are ready to insert the new value in the
1209 sequence. Find out whether the element is
1210 already known. */
1211 void *ptr;
1212 if (find_entry (htab: &collate->seq_table, key: seq->name, keylen: namelen,
1213 result: &ptr) != 0)
1214 {
1215 uint32_t wcs[2] = { seq->ucs4, 0 };
1216
1217 /* We have to allocate an entry. */
1218 elem = new_element (collate, mbs: mbcnt, mbslen: len,
1219 wcs: seq->ucs4 == ILLEGAL_CHAR_VALUE
1220 ? NULL : wcs, name: seq->name,
1221 namelen, is_character: 1);
1222
1223 /* And add it to the table. */
1224 if (insert_entry (htab: &collate->seq_table, key: seq->name,
1225 keylen: namelen, data: elem) != 0)
1226 /* This cannot happen. */
1227 assert (! "Internal error");
1228 }
1229 else
1230 /* Copy the result. */
1231 elem = ptr;
1232
1233 /* Test whether this element is not already in the list. */
1234 if (elem->next != NULL || (collate->cursor != NULL
1235 && elem->next == collate->cursor))
1236 {
1237 lr_error (lr: ldfile, _("\
1238order for `%.*s' already defined at %s:%Zu"),
1239 (int) namelen, seq->name,
1240 elem->file, elem->line);
1241 goto increment;
1242 }
1243
1244 /* Enqueue the new element. */
1245 elem->last = collate->cursor;
1246 if (collate->cursor == NULL)
1247 elem->next = NULL;
1248 else
1249 {
1250 elem->next = collate->cursor->next;
1251 elem->last->next = elem;
1252 if (elem->next != NULL)
1253 elem->next->last = elem;
1254 }
1255 if (collate->start == NULL)
1256 {
1257 assert (collate->cursor == NULL);
1258 collate->start = elem;
1259 }
1260 collate->cursor = elem;
1261
1262 /* Add the weight value. We take them from the
1263 `ellipsis_weights' member of `collate'. */
1264 elem->weights = (struct element_list_t *)
1265 obstack_alloc (&collate->mempool,
1266 nrules * sizeof (struct element_list_t));
1267 for (cnt = 0; cnt < nrules; ++cnt)
1268 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1269 && (collate->ellipsis_weight.weights[cnt].w[0]
1270 == ELEMENT_ELLIPSIS2))
1271 {
1272 elem->weights[cnt].w = (struct element_t **)
1273 obstack_alloc (&collate->mempool,
1274 sizeof (struct element_t *));
1275 elem->weights[cnt].w[0] = elem;
1276 elem->weights[cnt].cnt = 1;
1277 }
1278 else
1279 {
1280 /* Simply use the weight from `ellipsis_weight'. */
1281 elem->weights[cnt].w =
1282 collate->ellipsis_weight.weights[cnt].w;
1283 elem->weights[cnt].cnt =
1284 collate->ellipsis_weight.weights[cnt].cnt;
1285 }
1286 }
1287
1288 /* Increment for the next round. */
1289 increment:
1290 for (cnt = len - 1; cnt >= 0; --cnt)
1291 if (++mbcnt[cnt] != '\0')
1292 break;
1293
1294 /* Find out whether this was all. */
1295 if (cnt < 0 || memcmp (s1: mbcnt, s2: mbend, n: len) >= 0)
1296 /* Yep, that's all. */
1297 break;
1298 }
1299 }
1300 }
1301 else
1302 {
1303 /* For symbolic range we naturally must have a beginning and an
1304 end specified by the user. */
1305 if (startp == NULL)
1306 lr_error (lr: ldfile, _("\
1307%s: symbolic range ellipsis must not directly follow `order_start'"),
1308 "LC_COLLATE");
1309 else if (endp == NULL)
1310 lr_error (lr: ldfile, _("\
1311%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1312 "LC_COLLATE");
1313 else
1314 {
1315 /* Determine the range. To do so we have to determine the
1316 common prefix of the both names and then the numeric
1317 values of both ends. */
1318 size_t lenfrom = strlen (s: startp->name);
1319 size_t lento = strlen (s: endp->name);
1320 char buf[lento + 1];
1321 int preflen = 0;
1322 long int from;
1323 long int to;
1324 char *cp;
1325 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1326
1327 if (lenfrom != lento)
1328 {
1329 invalid_range:
1330 lr_error (lr: ldfile, _("\
1331`%s' and `%.*s' are not valid names for symbolic range"),
1332 startp->name, (int) lento, endp->name);
1333 return;
1334 }
1335
1336 while (startp->name[preflen] == endp->name[preflen])
1337 if (startp->name[preflen] == '\0')
1338 /* Nothing to be done. The start and end point are identical
1339 and while inserting the end point we have already given
1340 the user an error message. */
1341 return;
1342 else
1343 ++preflen;
1344
1345 errno = 0;
1346 from = strtol (nptr: startp->name + preflen, endptr: &cp, base: base);
1347 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1348 goto invalid_range;
1349
1350 errno = 0;
1351 to = strtol (nptr: endp->name + preflen, endptr: &cp, base: base);
1352 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1353 goto invalid_range;
1354
1355 /* Copy the prefix. */
1356 memcpy (dest: buf, src: startp->name, n: preflen);
1357
1358 /* Loop over all values. */
1359 for (++from; from < to; ++from)
1360 {
1361 struct element_t *elem = NULL;
1362 struct charseq *seq;
1363 uint32_t wc;
1364 int cnt;
1365
1366 /* Generate the name. */
1367 sprintf (s: buf + preflen, format: base == 10 ? "%0*ld" : "%0*lX",
1368 (int) (lenfrom - preflen), from);
1369
1370 /* Look whether this name is already defined. */
1371 void *ptr;
1372 if (find_entry (htab: &collate->seq_table, key: buf, keylen: symlen, result: &ptr) == 0)
1373 {
1374 /* Copy back the result. */
1375 elem = ptr;
1376
1377 if (elem->next != NULL || (collate->cursor != NULL
1378 && elem->next == collate->cursor))
1379 {
1380 lr_error (lr: ldfile, _("\
1381%s: order for `%.*s' already defined at %s:%Zu"),
1382 "LC_COLLATE", (int) lenfrom, buf,
1383 elem->file, elem->line);
1384 continue;
1385 }
1386
1387 if (elem->name == NULL)
1388 {
1389 lr_error (lr: ldfile, _("%s: `%s' must be a character"),
1390 "LC_COLLATE", buf);
1391 continue;
1392 }
1393 }
1394
1395 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1396 {
1397 /* Search for a character of this name. */
1398 seq = charmap_find_value (charmap, name: buf, len: lenfrom);
1399 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1400 {
1401 wc = repertoire_find_value (repertoire, name: buf, len: lenfrom);
1402
1403 if (seq != NULL)
1404 seq->ucs4 = wc;
1405 }
1406 else
1407 wc = seq->ucs4;
1408
1409 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1410 /* We don't know anything about a character with this
1411 name. XXX Should we warn? */
1412 continue;
1413
1414 if (elem == NULL)
1415 {
1416 uint32_t wcs[2] = { wc, 0 };
1417
1418 /* We have to allocate an entry. */
1419 elem = new_element (collate,
1420 mbs: seq != NULL
1421 ? (char *) seq->bytes : NULL,
1422 mbslen: seq != NULL ? seq->nbytes : 0,
1423 wcs: wc == ILLEGAL_CHAR_VALUE
1424 ? NULL : wcs, name: buf, namelen: lenfrom, is_character: 1);
1425 }
1426 else
1427 {
1428 /* Update the element. */
1429 if (seq != NULL)
1430 {
1431 elem->mbs = obstack_copy0 (&collate->mempool,
1432 seq->bytes, seq->nbytes);
1433 elem->nmbs = seq->nbytes;
1434 }
1435
1436 if (wc != ILLEGAL_CHAR_VALUE)
1437 {
1438 uint32_t zero = 0;
1439
1440 obstack_grow (&collate->mempool,
1441 &wc, sizeof (uint32_t));
1442 obstack_grow (&collate->mempool,
1443 &zero, sizeof (uint32_t));
1444 elem->wcs = obstack_finish (&collate->mempool);
1445 elem->nwcs = 1;
1446 }
1447 }
1448
1449 elem->file = ldfile->fname;
1450 elem->line = ldfile->lineno;
1451 elem->section = collate->current_section;
1452 }
1453
1454 /* Enqueue the new element. */
1455 elem->last = collate->cursor;
1456 elem->next = collate->cursor->next;
1457 elem->last->next = elem;
1458 if (elem->next != NULL)
1459 elem->next->last = elem;
1460 collate->cursor = elem;
1461
1462 /* Now add the weights. They come from the `ellipsis_weights'
1463 member of `collate'. */
1464 elem->weights = (struct element_list_t *)
1465 obstack_alloc (&collate->mempool,
1466 nrules * sizeof (struct element_list_t));
1467 for (cnt = 0; cnt < nrules; ++cnt)
1468 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1469 && (collate->ellipsis_weight.weights[cnt].w[0]
1470 == ELEMENT_ELLIPSIS2))
1471 {
1472 elem->weights[cnt].w = (struct element_t **)
1473 obstack_alloc (&collate->mempool,
1474 sizeof (struct element_t *));
1475 elem->weights[cnt].w[0] = elem;
1476 elem->weights[cnt].cnt = 1;
1477 }
1478 else
1479 {
1480 /* Simly use the weight from `ellipsis_weight'. */
1481 elem->weights[cnt].w =
1482 collate->ellipsis_weight.weights[cnt].w;
1483 elem->weights[cnt].cnt =
1484 collate->ellipsis_weight.weights[cnt].cnt;
1485 }
1486 }
1487 }
1488 }
1489 /* Move the cursor to the last entry in the ellipsis.
1490 Subsequent operations need to start from the last entry. */
1491 collate->cursor = endp;
1492}
1493
1494
1495static void
1496collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1497 struct localedef_t *copy_locale, int ignore_content)
1498{
1499 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1500 {
1501 struct locale_collate_t *collate;
1502
1503 if (copy_locale == NULL)
1504 {
1505 collate = locale->categories[LC_COLLATE].collate =
1506 (struct locale_collate_t *)
1507 xcalloc (n: 1, s: sizeof (struct locale_collate_t));
1508
1509 /* Init the various data structures. */
1510 init_hash (htab: &collate->elem_table, init_size: 100);
1511 init_hash (htab: &collate->sym_table, init_size: 100);
1512 init_hash (htab: &collate->seq_table, init_size: 500);
1513 obstack_init (&collate->mempool);
1514
1515 collate->col_weight_max = -1;
1516 collate->codepoint_collation = false;
1517 }
1518 else
1519 /* Reuse the copy_locale's data structures. */
1520 collate = locale->categories[LC_COLLATE].collate =
1521 copy_locale->categories[LC_COLLATE].collate;
1522 }
1523
1524 ldfile->translate_strings = 0;
1525 ldfile->return_widestr = 0;
1526}
1527
1528
1529void
1530collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1531{
1532 /* Now is the time when we can assign the individual collation
1533 values for all the symbols. We have possibly different values
1534 for the wide- and the multibyte-character symbols. This is done
1535 since it might make a difference in the encoding if there is in
1536 some cases no multibyte-character but there are wide-characters.
1537 (The other way around it is not important since theencoded
1538 collation value in the wide-character case is 32 bits wide and
1539 therefore requires no encoding).
1540
1541 The lowest collation value assigned is 2. Zero is reserved for
1542 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1543 functions and 1 is used to separate the individual passes for the
1544 different rules.
1545
1546 We also have to construct is list with all the bytes/words which
1547 can come first in a sequence, followed by all the elements which
1548 also start with this byte/word. The order is reverse which has
1549 among others the important effect that longer strings are located
1550 first in the list. This is required for the output data since
1551 the algorithm used in `strcoll' etc depends on this.
1552
1553 The multibyte case is easy. We simply sort into an array with
1554 256 elements. */
1555 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1556 int mbact[nrules];
1557 int wcact;
1558 int mbseqact;
1559 int wcseqact;
1560 struct element_t *runp;
1561 int i;
1562 int need_undefined = 0;
1563 struct section_list *sect;
1564 int ruleidx;
1565 int nr_wide_elems = 0;
1566
1567 if (collate == NULL)
1568 {
1569 /* No data, no check. Issue a warning. */
1570 record_warning (_("No definition for %s category found"),
1571 "LC_COLLATE");
1572 return;
1573 }
1574
1575 /* No data required. */
1576 if (collate->codepoint_collation)
1577 return;
1578
1579 /* If this assertion is hit change the type in `element_t'. */
1580 assert (nrules <= sizeof (runp->used_in_level) * 8);
1581
1582 /* Make sure that the `position' rule is used either in all sections
1583 or in none. */
1584 for (i = 0; i < nrules; ++i)
1585 for (sect = collate->sections; sect != NULL; sect = sect->next)
1586 if (sect != collate->current_section
1587 && sect->rules != NULL
1588 && ((sect->rules[i] & sort_position)
1589 != (collate->current_section->rules[i] & sort_position)))
1590 {
1591 record_error (status: 0, errnum: 0, _("\
1592%s: `position' must be used for a specific level in all sections or none"),
1593 "LC_COLLATE");
1594 break;
1595 }
1596
1597 /* Find out which elements are used at which level. At the same
1598 time we find out whether we have any undefined symbols. */
1599 runp = collate->start;
1600 while (runp != NULL)
1601 {
1602 if (runp->mbs != NULL)
1603 {
1604 for (i = 0; i < nrules; ++i)
1605 {
1606 int j;
1607
1608 for (j = 0; j < runp->weights[i].cnt; ++j)
1609 /* A NULL pointer as the weight means IGNORE. */
1610 if (runp->weights[i].w[j] != NULL)
1611 {
1612 if (runp->weights[i].w[j]->weights == NULL)
1613 {
1614 record_error_at_line (status: 0, errnum: 0, filename: runp->file, linenum: runp->line,
1615 _("symbol `%s' not defined"),
1616 runp->weights[i].w[j]->name);
1617
1618 need_undefined = 1;
1619 runp->weights[i].w[j] = &collate->undefined;
1620 }
1621 else
1622 /* Set the bit for the level. */
1623 runp->weights[i].w[j]->used_in_level |= 1 << i;
1624 }
1625 }
1626 }
1627
1628 /* Up to the next entry. */
1629 runp = runp->next;
1630 }
1631
1632 /* Walk through the list of defined sequences and assign weights. Also
1633 create the data structure which will allow generating the single byte
1634 character based tables.
1635
1636 Since at each time only the weights for each of the rules are
1637 only compared to other weights for this rule it is possible to
1638 assign more compact weight values than simply counting all
1639 weights in sequence. We can assign weights from 3, one for each
1640 rule individually and only for those elements, which are actually
1641 used for this rule.
1642
1643 Why is this important? It is not for the wide char table. But
1644 it is for the singlebyte output since here larger numbers have to
1645 be encoded to make it possible to emit the value as a byte
1646 string. */
1647 for (i = 0; i < nrules; ++i)
1648 mbact[i] = 2;
1649 wcact = 2;
1650 mbseqact = 0;
1651 wcseqact = 0;
1652 runp = collate->start;
1653 while (runp != NULL)
1654 {
1655 /* Determine the order. */
1656 if (runp->used_in_level != 0)
1657 {
1658 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1659 nrules * sizeof (int));
1660
1661 for (i = 0; i < nrules; ++i)
1662 if ((runp->used_in_level & (1 << i)) != 0)
1663 runp->mborder[i] = mbact[i]++;
1664 else
1665 runp->mborder[i] = 0;
1666 }
1667
1668 if (runp->mbs != NULL)
1669 {
1670 struct element_t **eptr;
1671 struct element_t *lastp = NULL;
1672
1673 /* Find the point where to insert in the list. */
1674 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1675 while (*eptr != NULL)
1676 {
1677 if ((*eptr)->nmbs < runp->nmbs)
1678 break;
1679
1680 if ((*eptr)->nmbs == runp->nmbs)
1681 {
1682 int c = memcmp (s1: (*eptr)->mbs, s2: runp->mbs, n: runp->nmbs);
1683
1684 if (c == 0)
1685 {
1686 /* This should not happen. It means that we have
1687 to symbols with the same byte sequence. It is
1688 of course an error. */
1689 record_error_at_line (status: 0, errnum: 0, filename: (*eptr)->file,
1690 linenum: (*eptr)->line,
1691 _("\
1692symbol `%s' has the same encoding as"), (*eptr)->name);
1693
1694 record_error_at_line (status: 0, errnum: 0, filename: runp->file, linenum: runp->line,
1695 _("symbol `%s'"), runp->name);
1696 goto dont_insert;
1697 }
1698 else if (c < 0)
1699 /* Insert it here. */
1700 break;
1701 }
1702
1703 /* To the next entry. */
1704 lastp = *eptr;
1705 eptr = &(*eptr)->mbnext;
1706 }
1707
1708 /* Set the pointers. */
1709 runp->mbnext = *eptr;
1710 runp->mblast = lastp;
1711 if (*eptr != NULL)
1712 (*eptr)->mblast = runp;
1713 *eptr = runp;
1714 dont_insert:
1715 ;
1716 }
1717
1718 if (runp->used_in_level)
1719 {
1720 runp->wcorder = wcact++;
1721
1722 /* We take the opportunity to count the elements which have
1723 wide characters. */
1724 ++nr_wide_elems;
1725 }
1726
1727 if (runp->is_character)
1728 {
1729 if (runp->nmbs == 1)
1730 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1731
1732 runp->wcseqorder = wcseqact++;
1733 }
1734 else if (runp->mbs != NULL && runp->weights != NULL)
1735 /* This is for collation elements. */
1736 runp->wcseqorder = wcseqact++;
1737
1738 /* Up to the next entry. */
1739 runp = runp->next;
1740 }
1741
1742 /* Find out whether any of the `mbheads' entries is unset. In this
1743 case we use the UNDEFINED entry. */
1744 for (i = 1; i < 256; ++i)
1745 if (collate->mbheads[i] == NULL)
1746 {
1747 need_undefined = 1;
1748 collate->mbheads[i] = &collate->undefined;
1749 }
1750
1751 /* Now to the wide character case. */
1752 collate->wcheads.p = 6;
1753 collate->wcheads.q = 10;
1754 wchead_table_init (t: &collate->wcheads);
1755
1756 collate->wcseqorder.p = 6;
1757 collate->wcseqorder.q = 10;
1758 collseq_table_init (t: &collate->wcseqorder);
1759
1760 /* Start adding. */
1761 runp = collate->start;
1762 while (runp != NULL)
1763 {
1764 if (runp->wcs != NULL)
1765 {
1766 struct element_t *e;
1767 struct element_t **eptr;
1768 struct element_t *lastp;
1769
1770 /* Insert the collation sequence value. */
1771 if (runp->is_character)
1772 collseq_table_add (t: &collate->wcseqorder, wc: runp->wcs[0],
1773 value: runp->wcseqorder);
1774
1775 /* Find the point where to insert in the list. */
1776 e = wchead_table_get (t: &collate->wcheads, wc: runp->wcs[0]);
1777 eptr = &e;
1778 lastp = NULL;
1779 while (*eptr != NULL)
1780 {
1781 if ((*eptr)->nwcs < runp->nwcs)
1782 break;
1783
1784 if ((*eptr)->nwcs == runp->nwcs)
1785 {
1786 int c = wmemcmp (s1: (wchar_t *) (*eptr)->wcs,
1787 s2: (wchar_t *) runp->wcs, n: runp->nwcs);
1788
1789 if (c == 0)
1790 {
1791 /* This should not happen. It means that we have
1792 two symbols with the same byte sequence. It is
1793 of course an error. */
1794 record_error_at_line (status: 0, errnum: 0, filename: (*eptr)->file,
1795 linenum: (*eptr)->line,
1796 _("\
1797symbol `%s' has the same encoding as"), (*eptr)->name);
1798
1799 record_error_at_line (status: 0, errnum: 0, filename: runp->file, linenum: runp->line,
1800 _("symbol `%s'"), runp->name);
1801 goto dont_insertwc;
1802 }
1803 else if (c < 0)
1804 /* Insert it here. */
1805 break;
1806 }
1807
1808 /* To the next entry. */
1809 lastp = *eptr;
1810 eptr = &(*eptr)->wcnext;
1811 }
1812
1813 /* Set the pointers. */
1814 runp->wcnext = *eptr;
1815 runp->wclast = lastp;
1816 if (*eptr != NULL)
1817 (*eptr)->wclast = runp;
1818 *eptr = runp;
1819 if (eptr == &e)
1820 wchead_table_add (t: &collate->wcheads, wc: runp->wcs[0], value: e);
1821 dont_insertwc:
1822 ;
1823 }
1824
1825 /* Up to the next entry. */
1826 runp = runp->next;
1827 }
1828
1829 /* Now determine whether the UNDEFINED entry is needed and if yes,
1830 whether it was defined. */
1831 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1832 if (collate->undefined.file == NULL)
1833 {
1834 if (need_undefined)
1835 {
1836 /* This seems not to be enforced by recent standards. Don't
1837 emit an error, simply append UNDEFINED at the end. */
1838 collate->undefined.mborder =
1839 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1840
1841 for (i = 0; i < nrules; ++i)
1842 collate->undefined.mborder[i] = mbact[i]++;
1843 }
1844
1845 /* In any case we will need the definition for the wide character
1846 case. But we will not complain that it is missing since the
1847 specification strangely enough does not seem to account for
1848 this. */
1849 collate->undefined.wcorder = wcact++;
1850 }
1851
1852 /* Finally, try to unify the rules for the sections. Whenever the rules
1853 for a section are the same as those for another section give the
1854 ruleset the same index. Since there are never many section we can
1855 use an O(n^2) algorithm here. */
1856 sect = collate->sections;
1857 while (sect != NULL && sect->rules == NULL)
1858 sect = sect->next;
1859
1860 /* Bail out if we have no sections because of earlier errors. */
1861 if (sect == NULL)
1862 {
1863 record_error (EXIT_FAILURE, errnum: 0, _("too many errors; giving up"));
1864 return;
1865 }
1866
1867 ruleidx = 0;
1868 do
1869 {
1870 struct section_list *osect = collate->sections;
1871
1872 while (osect != sect)
1873 if (osect->rules != NULL
1874 && memcmp (s1: osect->rules, s2: sect->rules,
1875 n: nrules * sizeof (osect->rules[0])) == 0)
1876 break;
1877 else
1878 osect = osect->next;
1879
1880 if (osect == sect)
1881 sect->ruleidx = ruleidx++;
1882 else
1883 sect->ruleidx = osect->ruleidx;
1884
1885 /* Next section. */
1886 do
1887 sect = sect->next;
1888 while (sect != NULL && sect->rules == NULL);
1889 }
1890 while (sect != NULL);
1891 /* We are currently not prepared for more than 128 rulesets. But this
1892 should never really be a problem. */
1893 assert (ruleidx <= 128);
1894}
1895
1896
1897static int32_t
1898output_weight (struct obstack *pool, struct locale_collate_t *collate,
1899 struct element_t *elem)
1900{
1901 size_t cnt;
1902 int32_t retval;
1903
1904 /* Optimize the use of UNDEFINED. */
1905 if (elem == &collate->undefined)
1906 /* The weights are already inserted. */
1907 return 0;
1908
1909 /* This byte can start exactly one collation element and this is
1910 a single byte. We can directly give the index to the weights. */
1911 retval = obstack_object_size (pool);
1912
1913 /* Construct the weight. */
1914 for (cnt = 0; cnt < nrules; ++cnt)
1915 {
1916 char buf[elem->weights[cnt].cnt * 7];
1917 int len = 0;
1918 int i;
1919
1920 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1921 /* Encode the weight value. We do nothing for IGNORE entries. */
1922 if (elem->weights[cnt].w[i] != NULL)
1923 len += utf8_encode (buf: &buf[len],
1924 val: elem->weights[cnt].w[i]->mborder[cnt]);
1925
1926 /* And add the buffer content. */
1927 obstack_1grow (pool, len);
1928 obstack_grow (pool, buf, len);
1929 }
1930
1931 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1932}
1933
1934
1935static int32_t
1936output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1937 struct element_t *elem)
1938{
1939 size_t cnt;
1940 int32_t retval;
1941
1942 /* Optimize the use of UNDEFINED. */
1943 if (elem == &collate->undefined)
1944 /* The weights are already inserted. */
1945 return 0;
1946
1947 /* This byte can start exactly one collation element and this is
1948 a single byte. We can directly give the index to the weights. */
1949 retval = obstack_object_size (pool) / sizeof (int32_t);
1950
1951 /* Construct the weight. */
1952 for (cnt = 0; cnt < nrules; ++cnt)
1953 {
1954 int32_t buf[elem->weights[cnt].cnt];
1955 int i;
1956 int32_t j;
1957
1958 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1959 if (elem->weights[cnt].w[i] != NULL)
1960 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1961
1962 /* And add the buffer content. */
1963 obstack_int32_grow (obstack: pool, data: j);
1964
1965 obstack_grow (pool, buf, j * sizeof (int32_t));
1966 maybe_swap_uint32_obstack (obstack: pool, n: j);
1967 }
1968
1969 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1970}
1971
1972/* If localedef is every threaded, this would need to be __thread var. */
1973static struct
1974{
1975 struct obstack *weightpool;
1976 struct obstack *extrapool;
1977 struct obstack *indpool;
1978 struct locale_collate_t *collate;
1979 struct collidx_table *tablewc;
1980} atwc;
1981
1982static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1983
1984static void
1985add_to_tablewc (uint32_t ch, struct element_t *runp)
1986{
1987 if (runp->wcnext == NULL && runp->nwcs == 1)
1988 {
1989 int32_t weigthidx = output_weightwc (pool: atwc.weightpool, collate: atwc.collate,
1990 elem: runp);
1991 collidx_table_add (t: atwc.tablewc, wc: ch, value: weigthidx);
1992 }
1993 else
1994 {
1995 /* As for the singlebyte table, we recognize sequences and
1996 compress them. */
1997
1998 collidx_table_add (t: atwc.tablewc, wc: ch,
1999 value: -(obstack_object_size (atwc.extrapool)
2000 / sizeof (uint32_t)));
2001
2002 do
2003 {
2004 /* Store the current index in the weight table. We know that
2005 the current position in the `extrapool' is aligned on a
2006 32-bit address. */
2007 int32_t weightidx;
2008 int added;
2009
2010 /* Find out wether this is a single entry or we have more than
2011 one consecutive entry. */
2012 if (runp->wcnext != NULL
2013 && runp->nwcs == runp->wcnext->nwcs
2014 && wmemcmp (s1: (wchar_t *) runp->wcs,
2015 s2: (wchar_t *)runp->wcnext->wcs,
2016 n: runp->nwcs - 1) == 0
2017 && (runp->wcs[runp->nwcs - 1]
2018 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2019 {
2020 int i;
2021 struct element_t *series_startp = runp;
2022 struct element_t *curp;
2023
2024 /* Now add first the initial byte sequence. */
2025 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2026 if (sizeof (int32_t) == sizeof (int))
2027 obstack_make_room (atwc.extrapool, added);
2028
2029 /* More than one consecutive entry. We mark this by having
2030 a negative index into the indirect table. */
2031 obstack_int32_grow_fast (obstack: atwc.extrapool,
2032 data: -(obstack_object_size (atwc.indpool)
2033 / sizeof (int32_t)));
2034 obstack_int32_grow_fast (obstack: atwc.extrapool, data: runp->nwcs - 1);
2035
2036 do
2037 runp = runp->wcnext;
2038 while (runp->wcnext != NULL
2039 && runp->nwcs == runp->wcnext->nwcs
2040 && wmemcmp (s1: (wchar_t *) runp->wcs,
2041 s2: (wchar_t *)runp->wcnext->wcs,
2042 n: runp->nwcs - 1) == 0
2043 && (runp->wcs[runp->nwcs - 1]
2044 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2045
2046 /* Now walk backward from here to the beginning. */
2047 curp = runp;
2048
2049 for (i = 1; i < runp->nwcs; ++i)
2050 obstack_int32_grow_fast (obstack: atwc.extrapool, data: curp->wcs[i]);
2051
2052 /* Now find the end of the consecutive sequence and
2053 add all the indices in the indirect pool. */
2054 do
2055 {
2056 weightidx = output_weightwc (pool: atwc.weightpool, collate: atwc.collate,
2057 elem: curp);
2058 obstack_int32_grow (obstack: atwc.indpool, data: weightidx);
2059
2060 curp = curp->wclast;
2061 }
2062 while (curp != series_startp);
2063
2064 /* Add the final weight. */
2065 weightidx = output_weightwc (pool: atwc.weightpool, collate: atwc.collate,
2066 elem: curp);
2067 obstack_int32_grow (obstack: atwc.indpool, data: weightidx);
2068
2069 /* And add the end byte sequence. Without length this
2070 time. */
2071 for (i = 1; i < curp->nwcs; ++i)
2072 obstack_int32_grow (obstack: atwc.extrapool, data: curp->wcs[i]);
2073 }
2074 else
2075 {
2076 /* A single entry. Simply add the index and the length and
2077 string (except for the first character which is already
2078 tested for). */
2079 int i;
2080
2081 /* Output the weight info. */
2082 weightidx = output_weightwc (pool: atwc.weightpool, collate: atwc.collate,
2083 elem: runp);
2084
2085 assert (runp->nwcs > 0);
2086 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2087 if (sizeof (int) == sizeof (int32_t))
2088 obstack_make_room (atwc.extrapool, added);
2089
2090 obstack_int32_grow_fast (obstack: atwc.extrapool, data: weightidx);
2091 obstack_int32_grow_fast (obstack: atwc.extrapool, data: runp->nwcs - 1);
2092 for (i = 1; i < runp->nwcs; ++i)
2093 obstack_int32_grow_fast (obstack: atwc.extrapool, data: runp->wcs[i]);
2094 }
2095
2096 /* Next entry. */
2097 runp = runp->wcnext;
2098 }
2099 while (runp != NULL);
2100 }
2101}
2102
2103/* Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and
2104 _NL_COLLATE_COLLSEQWC. */
2105#include "C-collate-seq.c"
2106
2107void
2108collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2109 const char *output_path)
2110{
2111 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2112 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2113 struct locale_file file;
2114 size_t ch;
2115 int32_t tablemb[256];
2116 struct obstack weightpool;
2117 struct obstack extrapool;
2118 struct obstack indirectpool;
2119 struct section_list *sect;
2120 struct collidx_table tablewc;
2121 uint32_t elem_size;
2122 uint32_t *elem_table;
2123 int i;
2124 struct element_t *runp;
2125
2126 init_locale_data (file: &file, n_elements: nelems);
2127 add_locale_uint32 (file: &file, value: nrules);
2128
2129 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2130 if (collate == NULL || collate->codepoint_collation)
2131 {
2132 size_t idx;
2133 for (idx = 1; idx < nelems; idx++)
2134 {
2135 /* The words have to be handled specially. */
2136 if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2137 add_locale_uint32 (file: &file, value: 0);
2138 else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
2139 && collate != NULL)
2140 /* A valid LC_COLLATE must have a code set name. */
2141 add_locale_string (file: &file, string: charmap->code_set_name);
2142 else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB)
2143 && collate != NULL)
2144 add_locale_raw_data (file: &file, data: collseqmb, size: sizeof (collseqmb));
2145 else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)
2146 && collate != NULL)
2147 add_locale_uint32_array (file: &file, data: collseqwc,
2148 array_length (collseqwc));
2149 else
2150 add_locale_empty (file: &file);
2151 }
2152 write_locale_data (output_path, LC_COLLATE, category: "LC_COLLATE", file: &file);
2153 return;
2154 }
2155
2156 obstack_init (&weightpool);
2157 obstack_init (&extrapool);
2158 obstack_init (&indirectpool);
2159
2160 /* Since we are using the sign of an integer to mark indirection the
2161 offsets in the arrays we are indirectly referring to must not be
2162 zero since -0 == 0. Therefore we add a bit of dummy content. */
2163 obstack_int32_grow (obstack: &extrapool, data: 0);
2164 obstack_int32_grow (obstack: &indirectpool, data: 0);
2165
2166 /* Prepare the ruleset table. */
2167 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2168 if (sect->rules != NULL && sect->ruleidx == i)
2169 {
2170 int j;
2171
2172 obstack_make_room (&weightpool, nrules);
2173
2174 for (j = 0; j < nrules; ++j)
2175 obstack_1grow_fast (&weightpool, sect->rules[j]);
2176 ++i;
2177 }
2178 /* And align the output. */
2179 i = (nrules * i) % LOCFILE_ALIGN;
2180 if (i > 0)
2181 do
2182 obstack_1grow (&weightpool, '\0');
2183 while (++i < LOCFILE_ALIGN);
2184
2185 add_locale_raw_obstack (file: &file, obstack: &weightpool);
2186
2187 /* Generate the 8-bit table. Walk through the lists of sequences
2188 starting with the same byte and add them one after the other to
2189 the table. In case we have more than one sequence starting with
2190 the same byte we have to use extra indirection.
2191
2192 First add a record for the NUL byte. This entry will never be used
2193 so it does not matter. */
2194 tablemb[0] = 0;
2195
2196 /* Now insert the `UNDEFINED' value if it is used. Since this value
2197 will probably be used more than once it is good to store the
2198 weights only once. */
2199 if (collate->undefined.used_in_level != 0)
2200 output_weight (pool: &weightpool, collate, elem: &collate->undefined);
2201
2202 for (ch = 1; ch < 256; ++ch)
2203 if (collate->mbheads[ch]->mbnext == NULL
2204 && collate->mbheads[ch]->nmbs <= 1)
2205 {
2206 tablemb[ch] = output_weight (pool: &weightpool, collate,
2207 elem: collate->mbheads[ch]);
2208 }
2209 else
2210 {
2211 /* The entries in the list are sorted by length and then
2212 alphabetically. This is the order in which we will add the
2213 elements to the collation table. This allows simply walking
2214 the table in sequence and stopping at the first matching
2215 entry. Since the longer sequences are coming first in the
2216 list they have the possibility to match first, just as it
2217 has to be. In the worst case we are walking to the end of
2218 the list where we put, if no singlebyte sequence is defined
2219 in the locale definition, the weights for UNDEFINED.
2220
2221 To reduce the length of the search list we compress them a bit.
2222 This happens by collecting sequences of consecutive byte
2223 sequences in one entry (having and begin and end byte sequence)
2224 and add only one index into the weight table. We can find the
2225 consecutive entries since they are also consecutive in the list. */
2226 struct element_t *runp = collate->mbheads[ch];
2227 struct element_t *lastp;
2228
2229 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2230
2231 tablemb[ch] = -obstack_object_size (&extrapool);
2232
2233 do
2234 {
2235 /* Store the current index in the weight table. We know that
2236 the current position in the `extrapool' is aligned on a
2237 32-bit address. */
2238 int32_t weightidx;
2239 int added;
2240
2241 /* Find out wether this is a single entry or we have more than
2242 one consecutive entry. */
2243 if (runp->mbnext != NULL
2244 && runp->nmbs == runp->mbnext->nmbs
2245 && memcmp (s1: runp->mbs, s2: runp->mbnext->mbs, n: runp->nmbs - 1) == 0
2246 && (runp->mbs[runp->nmbs - 1]
2247 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2248 {
2249 int i;
2250 struct element_t *series_startp = runp;
2251 struct element_t *curp;
2252
2253 /* Compute how much space we will need. */
2254 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2255 + 2 * (runp->nmbs - 1));
2256 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2257 obstack_make_room (&extrapool, added);
2258
2259 /* More than one consecutive entry. We mark this by having
2260 a negative index into the indirect table. */
2261 obstack_int32_grow_fast (obstack: &extrapool,
2262 data: -(obstack_object_size (&indirectpool)
2263 / sizeof (int32_t)));
2264
2265 /* Now search first the end of the series. */
2266 do
2267 runp = runp->mbnext;
2268 while (runp->mbnext != NULL
2269 && runp->nmbs == runp->mbnext->nmbs
2270 && memcmp (s1: runp->mbs, s2: runp->mbnext->mbs,
2271 n: runp->nmbs - 1) == 0
2272 && (runp->mbs[runp->nmbs - 1]
2273 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2274
2275 /* Now walk backward from here to the beginning. */
2276 curp = runp;
2277
2278 assert (runp->nmbs <= 256);
2279 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2280 for (i = 1; i < curp->nmbs; ++i)
2281 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2282
2283 /* Now find the end of the consecutive sequence and
2284 add all the indices in the indirect pool. */
2285 do
2286 {
2287 weightidx = output_weight (pool: &weightpool, collate, elem: curp);
2288 obstack_int32_grow (obstack: &indirectpool, data: weightidx);
2289
2290 curp = curp->mblast;
2291 }
2292 while (curp != series_startp);
2293
2294 /* Add the final weight. */
2295 weightidx = output_weight (pool: &weightpool, collate, elem: curp);
2296 obstack_int32_grow (obstack: &indirectpool, data: weightidx);
2297
2298 /* And add the end byte sequence. Without length this
2299 time. */
2300 for (i = 1; i < curp->nmbs; ++i)
2301 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2302 }
2303 else
2304 {
2305 /* A single entry. Simply add the index and the length and
2306 string (except for the first character which is already
2307 tested for). */
2308 int i;
2309
2310 /* Output the weight info. */
2311 weightidx = output_weight (pool: &weightpool, collate, elem: runp);
2312
2313 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2314 + runp->nmbs - 1);
2315 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2316 obstack_make_room (&extrapool, added);
2317
2318 obstack_int32_grow_fast (obstack: &extrapool, data: weightidx);
2319 assert (runp->nmbs <= 256);
2320 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2321
2322 for (i = 1; i < runp->nmbs; ++i)
2323 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2324 }
2325
2326 /* Add alignment bytes if necessary. */
2327 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2328 obstack_1grow_fast (&extrapool, '\0');
2329
2330 /* Next entry. */
2331 lastp = runp;
2332 runp = runp->mbnext;
2333 }
2334 while (runp != NULL);
2335
2336 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2337
2338 /* If the final entry in the list is not a single character we
2339 add an UNDEFINED entry here. */
2340 if (lastp->nmbs != 1)
2341 {
2342 int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2343 obstack_make_room (&extrapool, added);
2344
2345 obstack_int32_grow_fast (obstack: &extrapool, data: 0);
2346 /* XXX What rule? We just pick the first. */
2347 obstack_1grow_fast (&extrapool, 0);
2348 /* Length is zero. */
2349 obstack_1grow_fast (&extrapool, 0);
2350
2351 /* Add alignment bytes if necessary. */
2352 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2353 obstack_1grow_fast (&extrapool, '\0');
2354 }
2355 }
2356
2357 /* Add padding to the tables if necessary. */
2358 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2359 obstack_1grow (&weightpool, 0);
2360
2361 /* Now add the four tables. */
2362 add_locale_uint32_array (file: &file, data: (const uint32_t *) tablemb, n_elems: 256);
2363 add_locale_raw_obstack (file: &file, obstack: &weightpool);
2364 add_locale_raw_obstack (file: &file, obstack: &extrapool);
2365 add_locale_raw_obstack (file: &file, obstack: &indirectpool);
2366
2367 /* Now the same for the wide character table. We need to store some
2368 more information here. */
2369 add_locale_empty (file: &file);
2370 add_locale_empty (file: &file);
2371 add_locale_empty (file: &file);
2372
2373 /* Since we are using the sign of an integer to mark indirection the
2374 offsets in the arrays we are indirectly referring to must not be
2375 zero since -0 == 0. Therefore we add a bit of dummy content. */
2376 obstack_int32_grow (obstack: &extrapool, data: 0);
2377 obstack_int32_grow (obstack: &indirectpool, data: 0);
2378
2379 /* Now insert the `UNDEFINED' value if it is used. Since this value
2380 will probably be used more than once it is good to store the
2381 weights only once. */
2382 if (output_weightwc (pool: &weightpool, collate, elem: &collate->undefined) != 0)
2383 abort ();
2384
2385 /* Generate the table. Walk through the lists of sequences starting
2386 with the same wide character and add them one after the other to
2387 the table. In case we have more than one sequence starting with
2388 the same byte we have to use extra indirection. */
2389 tablewc.p = 6;
2390 tablewc.q = 10;
2391 collidx_table_init (t: &tablewc);
2392
2393 atwc.weightpool = &weightpool;
2394 atwc.extrapool = &extrapool;
2395 atwc.indpool = &indirectpool;
2396 atwc.collate = collate;
2397 atwc.tablewc = &tablewc;
2398
2399 wchead_table_iterate (t: &collate->wcheads, fn: add_to_tablewc);
2400
2401 memset (s: &atwc, c: 0, n: sizeof (atwc));
2402
2403 /* Now add the four tables. */
2404 add_locale_collidx_table (file: &file, t: &tablewc);
2405 add_locale_raw_obstack (file: &file, obstack: &weightpool);
2406 add_locale_raw_obstack (file: &file, obstack: &extrapool);
2407 add_locale_raw_obstack (file: &file, obstack: &indirectpool);
2408
2409 /* Finally write the table with collation element names out. It is
2410 a hash table with a simple function which gets the name of the
2411 character as the input. One character might have many names. The
2412 value associated with the name is an index into the weight table
2413 where we are then interested in the first-level weight value.
2414
2415 To determine how large the table should be we are counting the
2416 elements have to put in. Since we are using internal chaining
2417 using a secondary hash function we have to make the table a bit
2418 larger to avoid extremely long search times. We can achieve
2419 good results with a 40% larger table than there are entries. */
2420 elem_size = 0;
2421 runp = collate->start;
2422 while (runp != NULL)
2423 {
2424 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2425 /* Yep, the element really counts. */
2426 ++elem_size;
2427
2428 runp = runp->next;
2429 }
2430 /* Add 50% and find the next prime number. */
2431 elem_size = next_prime (seed: elem_size + (elem_size >> 1));
2432
2433 /* Allocate the table. Each entry consists of two words: the hash
2434 value and an index in a secondary table which provides the index
2435 into the weight table and the string itself (so that a match can
2436 be determined). */
2437 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2438 elem_size * 2 * sizeof (uint32_t));
2439 memset (s: elem_table, c: '\0', n: elem_size * 2 * sizeof (uint32_t));
2440
2441 /* Now add the elements. */
2442 runp = collate->start;
2443 while (runp != NULL)
2444 {
2445 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2446 {
2447 /* Compute the hash value of the name. */
2448 uint32_t namelen = strlen (s: runp->name);
2449 uint32_t hash = elem_hash (str: runp->name, n: namelen);
2450 size_t idx = hash % elem_size;
2451#ifndef NDEBUG
2452 size_t start_idx = idx;
2453#endif
2454
2455 if (elem_table[idx * 2] != 0)
2456 {
2457 /* The spot is already taken. Try iterating using the value
2458 from the secondary hashing function. */
2459 size_t iter = hash % (elem_size - 2) + 1;
2460
2461 do
2462 {
2463 idx += iter;
2464 if (idx >= elem_size)
2465 idx -= elem_size;
2466 assert (idx != start_idx);
2467 }
2468 while (elem_table[idx * 2] != 0);
2469 }
2470 /* This is the spot where we will insert the value. */
2471 elem_table[idx * 2] = hash;
2472 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2473
2474 /* The string itself including length. */
2475 obstack_1grow (&extrapool, namelen);
2476 obstack_grow (&extrapool, runp->name, namelen);
2477
2478 /* And the multibyte representation. */
2479 obstack_1grow (&extrapool, runp->nmbs);
2480 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2481
2482 /* And align again to 32 bits. */
2483 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2484 obstack_grow (&extrapool, "\0\0",
2485 (sizeof (int32_t)
2486 - ((1 + namelen + 1 + runp->nmbs)
2487 % sizeof (int32_t))));
2488
2489 /* Now some 32-bit values: multibyte collation sequence,
2490 wide char string (including length), and wide char
2491 collation sequence. */
2492 obstack_int32_grow (obstack: &extrapool, data: runp->mbseqorder);
2493
2494 obstack_int32_grow (obstack: &extrapool, data: runp->nwcs);
2495 obstack_grow (&extrapool, runp->wcs,
2496 runp->nwcs * sizeof (uint32_t));
2497 maybe_swap_uint32_obstack (obstack: &extrapool, n: runp->nwcs);
2498
2499 obstack_int32_grow (obstack: &extrapool, data: runp->wcseqorder);
2500 }
2501
2502 runp = runp->next;
2503 }
2504
2505 /* Prepare to write out this data. */
2506 add_locale_uint32 (file: &file, value: elem_size);
2507 add_locale_uint32_array (file: &file, data: elem_table, n_elems: 2 * elem_size);
2508 add_locale_raw_obstack (file: &file, obstack: &extrapool);
2509 add_locale_raw_data (file: &file, data: collate->mbseqorder, size: 256);
2510 add_locale_collseq_table (file: &file, t: &collate->wcseqorder);
2511 add_locale_string (file: &file, string: charmap->code_set_name);
2512 write_locale_data (output_path, LC_COLLATE, category: "LC_COLLATE", file: &file);
2513
2514 obstack_free (&weightpool, NULL);
2515 obstack_free (&extrapool, NULL);
2516 obstack_free (&indirectpool, NULL);
2517}
2518
2519
2520static enum token_t
2521skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2522 const struct charmap_t *charmap, int to_endif)
2523{
2524 while (1)
2525 {
2526 struct token *now = lr_token (lr: ldfile, charmap, NULL, NULL, verbose: 0);
2527 enum token_t nowtok = now->tok;
2528
2529 if (nowtok == tok_eof || nowtok == tok_end)
2530 return nowtok;
2531
2532 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2533 {
2534 lr_error (lr: ldfile, _("%s: nested conditionals not supported"),
2535 "LC_COLLATE");
2536 nowtok = skip_to (ldfile, collate, charmap, to_endif: tok_endif);
2537 if (nowtok == tok_eof || nowtok == tok_end)
2538 return nowtok;
2539 }
2540 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2541 {
2542 lr_ignore_rest (lr: ldfile, verbose: 1);
2543 return nowtok;
2544 }
2545 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2546 {
2547 /* Do not read the rest of the line. */
2548 return nowtok;
2549 }
2550 else if (nowtok == tok_else)
2551 {
2552 lr_error (lr: ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2553 }
2554
2555 lr_ignore_rest (lr: ldfile, verbose: 0);
2556 }
2557}
2558
2559
2560void
2561collate_read (struct linereader *ldfile, struct localedef_t *result,
2562 const struct charmap_t *charmap, const char *repertoire_name,
2563 int ignore_content)
2564{
2565 struct repertoire_t *repertoire = NULL;
2566 struct locale_collate_t *collate;
2567 struct token *now;
2568 struct token *arg = NULL;
2569 enum token_t nowtok;
2570 enum token_t was_ellipsis = tok_none;
2571 struct localedef_t *copy_locale = NULL;
2572 /* Parsing state:
2573 0 - start
2574 1 - between `order-start' and `order-end'
2575 2 - after `order-end'
2576 3 - after `reorder-after', waiting for `reorder-end'
2577 4 - after `reorder-end'
2578 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2579 6 - after `reorder-sections-end'
2580 */
2581 int state = 0;
2582
2583 /* Get the repertoire we have to use. */
2584 if (repertoire_name != NULL)
2585 repertoire = repertoire_read (filename: repertoire_name);
2586
2587 /* The rest of the line containing `LC_COLLATE' must be free. */
2588 lr_ignore_rest (lr: ldfile, verbose: 1);
2589
2590 while (1)
2591 {
2592 do
2593 {
2594 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2595 nowtok = now->tok;
2596 }
2597 while (nowtok == tok_eol);
2598
2599 if (nowtok != tok_define)
2600 break;
2601
2602 if (ignore_content)
2603 lr_ignore_rest (lr: ldfile, verbose: 0);
2604 else
2605 {
2606 arg = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2607 if (arg->tok != tok_ident)
2608 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2609 else
2610 {
2611 /* Simply add the new symbol. */
2612 struct name_list *newsym = xmalloc (n: sizeof (*newsym)
2613 + arg->val.str.lenmb + 1);
2614 memcpy (dest: newsym->str, src: arg->val.str.startmb, n: arg->val.str.lenmb);
2615 newsym->str[arg->val.str.lenmb] = '\0';
2616 newsym->next = defined;
2617 defined = newsym;
2618
2619 lr_ignore_rest (lr: ldfile, verbose: 1);
2620 }
2621 }
2622 }
2623
2624 if (nowtok == tok_copy)
2625 {
2626 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2627 if (now->tok != tok_string)
2628 {
2629 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2630
2631 skip_category:
2632 do
2633 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2634 while (now->tok != tok_eof && now->tok != tok_end);
2635
2636 if (now->tok != tok_eof
2637 || (now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose),
2638 now->tok == tok_eof))
2639 lr_error (lr: ldfile, _("%s: premature end of file"), "LC_COLLATE");
2640 else if (now->tok != tok_lc_collate)
2641 {
2642 lr_error (lr: ldfile, _("\
2643%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2644 lr_ignore_rest (lr: ldfile, verbose: 0);
2645 }
2646 else
2647 lr_ignore_rest (lr: ldfile, verbose: 1);
2648
2649 return;
2650 }
2651
2652 if (! ignore_content)
2653 {
2654 /* Get the locale definition. */
2655 copy_locale = load_locale (LC_COLLATE, name: now->val.str.startmb,
2656 repertoire_name, charmap, NULL);
2657 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2658 {
2659 /* Not yet loaded. So do it now. */
2660 if (locfile_read (result: copy_locale, charmap) != 0)
2661 goto skip_category;
2662 }
2663
2664 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2665 return;
2666 }
2667
2668 lr_ignore_rest (lr: ldfile, verbose: 1);
2669
2670 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2671 nowtok = now->tok;
2672 }
2673
2674 /* Prepare the data structures. */
2675 collate_startup (ldfile, locale: result, copy_locale, ignore_content);
2676 collate = result->categories[LC_COLLATE].collate;
2677
2678 while (1)
2679 {
2680 char ucs4buf[10];
2681 char *symstr;
2682 size_t symlen;
2683
2684 /* Of course we don't proceed beyond the end of file. */
2685 if (nowtok == tok_eof)
2686 break;
2687
2688 /* Ingore empty lines. */
2689 if (nowtok == tok_eol)
2690 {
2691 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2692 nowtok = now->tok;
2693 continue;
2694 }
2695
2696 switch (nowtok)
2697 {
2698 case tok_codepoint_collation:
2699 collate->codepoint_collation = true;
2700 break;
2701
2702 case tok_copy:
2703 /* Allow copying other locales. */
2704 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2705 if (now->tok != tok_string)
2706 goto err_label;
2707
2708 if (! ignore_content)
2709 load_locale (LC_COLLATE, name: now->val.str.startmb, repertoire_name,
2710 charmap, copy_locale: result);
2711
2712 lr_ignore_rest (lr: ldfile, verbose: 1);
2713 break;
2714
2715 case tok_coll_weight_max:
2716 /* Ignore the rest of the line if we don't need the input of
2717 this line. */
2718 if (ignore_content)
2719 {
2720 lr_ignore_rest (lr: ldfile, verbose: 0);
2721 break;
2722 }
2723
2724 if (state != 0)
2725 goto err_label;
2726
2727 arg = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
2728 if (arg->tok != tok_number)
2729 goto err_label;
2730 if (collate->col_weight_max != -1)
2731 lr_error (lr: ldfile, _("%s: duplicate definition of `%s'"),
2732 "LC_COLLATE", "col_weight_max");
2733 else
2734 collate->col_weight_max = arg->val.num;
2735 lr_ignore_rest (lr: ldfile, verbose: 1);
2736 break;
2737
2738 case tok_section_symbol:
2739 /* Ignore the rest of the line if we don't need the input of
2740 this line. */
2741 if (ignore_content)
2742 {
2743 lr_ignore_rest (lr: ldfile, verbose: 0);
2744 break;
2745 }
2746
2747 if (state != 0)
2748 goto err_label;
2749
2750 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
2751 if (arg->tok != tok_bsymbol)
2752 goto err_label;
2753 else if (!ignore_content)
2754 {
2755 /* Check whether this section is already known. */
2756 struct section_list *known = collate->sections;
2757 while (known != NULL)
2758 {
2759 if (strcmp (s1: known->name, s2: arg->val.str.startmb) == 0)
2760 break;
2761 known = known->next;
2762 }
2763
2764 if (known != NULL)
2765 {
2766 lr_error (lr: ldfile,
2767 _("%s: duplicate declaration of section `%s'"),
2768 "LC_COLLATE", arg->val.str.startmb);
2769 free (ptr: arg->val.str.startmb);
2770 }
2771 else
2772 collate->sections = make_seclist_elem (collate,
2773 string: arg->val.str.startmb,
2774 next: collate->sections);
2775
2776 lr_ignore_rest (lr: ldfile, verbose: known == NULL);
2777 }
2778 else
2779 {
2780 free (ptr: arg->val.str.startmb);
2781 lr_ignore_rest (lr: ldfile, verbose: 0);
2782 }
2783 break;
2784
2785 case tok_collating_element:
2786 /* Ignore the rest of the line if we don't need the input of
2787 this line. */
2788 if (ignore_content)
2789 {
2790 lr_ignore_rest (lr: ldfile, verbose: 0);
2791 break;
2792 }
2793
2794 if (state != 0 && state != 2)
2795 goto err_label;
2796
2797 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
2798 if (arg->tok != tok_bsymbol)
2799 goto err_label;
2800 else
2801 {
2802 const char *symbol = arg->val.str.startmb;
2803 size_t symbol_len = arg->val.str.lenmb;
2804
2805 /* Next the `from' keyword. */
2806 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
2807 if (arg->tok != tok_from)
2808 {
2809 free (ptr: (char *) symbol);
2810 goto err_label;
2811 }
2812
2813 ldfile->return_widestr = 1;
2814 ldfile->translate_strings = 1;
2815
2816 /* Finally the string with the replacement. */
2817 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
2818
2819 ldfile->return_widestr = 0;
2820 ldfile->translate_strings = 0;
2821
2822 if (arg->tok != tok_string)
2823 goto err_label;
2824
2825 if (!ignore_content && symbol != NULL)
2826 {
2827 /* The name is already defined. */
2828 if (check_duplicate (ldfile, collate, charmap,
2829 repertoire, symbol, symbol_len))
2830 goto col_elem_free;
2831
2832 if (arg->val.str.startmb != NULL)
2833 insert_entry (htab: &collate->elem_table, key: symbol, keylen: symbol_len,
2834 data: new_element (collate,
2835 mbs: arg->val.str.startmb,
2836 mbslen: arg->val.str.lenmb - 1,
2837 wcs: arg->val.str.startwc,
2838 name: symbol, namelen: symbol_len, is_character: 0));
2839 }
2840 else
2841 {
2842 col_elem_free:
2843 free (ptr: (char *) symbol);
2844 free (ptr: arg->val.str.startmb);
2845 free (ptr: arg->val.str.startwc);
2846 }
2847 lr_ignore_rest (lr: ldfile, verbose: 1);
2848 }
2849 break;
2850
2851 case tok_collating_symbol:
2852 /* Ignore the rest of the line if we don't need the input of
2853 this line. */
2854 if (ignore_content)
2855 {
2856 lr_ignore_rest (lr: ldfile, verbose: 0);
2857 break;
2858 }
2859
2860 if (state != 0 && state != 2)
2861 goto err_label;
2862
2863 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
2864 if (arg->tok != tok_bsymbol)
2865 goto err_label;
2866 else
2867 {
2868 char *symbol = arg->val.str.startmb;
2869 size_t symbol_len = arg->val.str.lenmb;
2870 char *endsymbol = NULL;
2871 size_t endsymbol_len = 0;
2872 enum token_t ellipsis = tok_none;
2873
2874 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
2875 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2876 {
2877 ellipsis = arg->tok;
2878
2879 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire,
2880 verbose);
2881 if (arg->tok != tok_bsymbol)
2882 {
2883 free (ptr: symbol);
2884 goto err_label;
2885 }
2886
2887 endsymbol = arg->val.str.startmb;
2888 endsymbol_len = arg->val.str.lenmb;
2889
2890 lr_ignore_rest (lr: ldfile, verbose: 1);
2891 }
2892 else if (arg->tok != tok_eol)
2893 {
2894 free (ptr: symbol);
2895 goto err_label;
2896 }
2897
2898 if (!ignore_content)
2899 {
2900 if (symbol == NULL
2901 || (ellipsis != tok_none && endsymbol == NULL))
2902 {
2903 lr_error (lr: ldfile, _("\
2904%s: unknown character in collating symbol name"),
2905 "LC_COLLATE");
2906 goto col_sym_free;
2907 }
2908 else if (ellipsis == tok_none)
2909 {
2910 /* A single symbol, no ellipsis. */
2911 if (check_duplicate (ldfile, collate, charmap,
2912 repertoire, symbol, symbol_len))
2913 /* The name is already defined. */
2914 goto col_sym_free;
2915
2916 insert_entry (htab: &collate->sym_table, key: symbol, keylen: symbol_len,
2917 data: new_symbol (collate, name: symbol, len: symbol_len));
2918 }
2919 else if (symbol_len != endsymbol_len)
2920 {
2921 col_sym_inv_range:
2922 lr_error (lr: ldfile,
2923 _("invalid names for character range"));
2924 goto col_sym_free;
2925 }
2926 else
2927 {
2928 /* Oh my, we have to handle an ellipsis. First, as
2929 usual, determine the common prefix and then
2930 convert the rest into a range. */
2931 size_t prefixlen;
2932 unsigned long int from;
2933 unsigned long int to;
2934 char *endp;
2935
2936 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2937 if (symbol[prefixlen] != endsymbol[prefixlen])
2938 break;
2939
2940 /* Convert the rest into numbers. */
2941 symbol[symbol_len] = '\0';
2942 from = strtoul (nptr: &symbol[prefixlen], endptr: &endp,
2943 base: ellipsis == tok_ellipsis2 ? 16 : 10);
2944 if (*endp != '\0')
2945 goto col_sym_inv_range;
2946
2947 endsymbol[symbol_len] = '\0';
2948 to = strtoul (nptr: &endsymbol[prefixlen], endptr: &endp,
2949 base: ellipsis == tok_ellipsis2 ? 16 : 10);
2950 if (*endp != '\0')
2951 goto col_sym_inv_range;
2952
2953 if (from > to)
2954 goto col_sym_inv_range;
2955
2956 /* Now loop over all entries. */
2957 while (from <= to)
2958 {
2959 char *symbuf;
2960
2961 symbuf = (char *) obstack_alloc (&collate->mempool,
2962 symbol_len + 1);
2963
2964 /* Create the name. */
2965 sprintf (s: symbuf,
2966 format: ellipsis == tok_ellipsis2
2967 ? "%.*s%.*lX" : "%.*s%.*lu",
2968 (int) prefixlen, symbol,
2969 (int) (symbol_len - prefixlen), from);
2970
2971 if (check_duplicate (ldfile, collate, charmap,
2972 repertoire, symbol: symbuf, symbol_len))
2973 /* The name is already defined. */
2974 goto col_sym_free;
2975
2976 insert_entry (htab: &collate->sym_table, key: symbuf,
2977 keylen: symbol_len,
2978 data: new_symbol (collate, name: symbuf,
2979 len: symbol_len));
2980
2981 /* Increment the counter. */
2982 ++from;
2983 }
2984
2985 goto col_sym_free;
2986 }
2987 }
2988 else
2989 {
2990 col_sym_free:
2991 free (ptr: symbol);
2992 free (ptr: endsymbol);
2993 }
2994 }
2995 break;
2996
2997 case tok_symbol_equivalence:
2998 /* Ignore the rest of the line if we don't need the input of
2999 this line. */
3000 if (ignore_content)
3001 {
3002 lr_ignore_rest (lr: ldfile, verbose: 0);
3003 break;
3004 }
3005
3006 if (state != 0)
3007 goto err_label;
3008
3009 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
3010 if (arg->tok != tok_bsymbol)
3011 goto err_label;
3012 else
3013 {
3014 const char *newname = arg->val.str.startmb;
3015 size_t newname_len = arg->val.str.lenmb;
3016 const char *symname;
3017 size_t symname_len;
3018 void *symval; /* Actually struct symbol_t* */
3019
3020 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
3021 if (arg->tok != tok_bsymbol)
3022 {
3023 free (ptr: (char *) newname);
3024 goto err_label;
3025 }
3026
3027 symname = arg->val.str.startmb;
3028 symname_len = arg->val.str.lenmb;
3029
3030 if (newname == NULL)
3031 {
3032 lr_error (lr: ldfile, _("\
3033%s: unknown character in equivalent definition name"),
3034 "LC_COLLATE");
3035
3036 sym_equiv_free:
3037 free (ptr: (char *) newname);
3038 free (ptr: (char *) symname);
3039 break;
3040 }
3041 if (symname == NULL)
3042 {
3043 lr_error (lr: ldfile, _("\
3044%s: unknown character in equivalent definition value"),
3045 "LC_COLLATE");
3046 goto sym_equiv_free;
3047 }
3048
3049 /* See whether the symbol name is already defined. */
3050 if (find_entry (htab: &collate->sym_table, key: symname, keylen: symname_len,
3051 result: &symval) != 0)
3052 {
3053 lr_error (lr: ldfile, _("\
3054%s: unknown symbol `%s' in equivalent definition"),
3055 "LC_COLLATE", symname);
3056 goto sym_equiv_free;
3057 }
3058
3059 if (insert_entry (htab: &collate->sym_table,
3060 key: newname, keylen: newname_len, data: symval) < 0)
3061 {
3062 lr_error (lr: ldfile, _("\
3063error while adding equivalent collating symbol"));
3064 goto sym_equiv_free;
3065 }
3066
3067 free (ptr: (char *) symname);
3068 }
3069 lr_ignore_rest (lr: ldfile, verbose: 1);
3070 break;
3071
3072 case tok_script:
3073 /* Ignore the rest of the line if we don't need the input of
3074 this line. */
3075 if (ignore_content)
3076 {
3077 lr_ignore_rest (lr: ldfile, verbose: 0);
3078 break;
3079 }
3080
3081 /* We get told about the scripts we know. */
3082 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
3083 if (arg->tok != tok_bsymbol)
3084 goto err_label;
3085 else
3086 {
3087 struct section_list *runp = collate->known_sections;
3088 char *name;
3089
3090 while (runp != NULL)
3091 if (strncmp (s1: runp->name, s2: arg->val.str.startmb,
3092 n: arg->val.str.lenmb) == 0
3093 && runp->name[arg->val.str.lenmb] == '\0')
3094 break;
3095 else
3096 runp = runp->def_next;
3097
3098 if (runp != NULL)
3099 {
3100 lr_error (lr: ldfile, _("duplicate definition of script `%s'"),
3101 runp->name);
3102 lr_ignore_rest (lr: ldfile, verbose: 0);
3103 break;
3104 }
3105
3106 runp = (struct section_list *) xcalloc (n: 1, s: sizeof (*runp));
3107 name = (char *) xmalloc (n: arg->val.str.lenmb + 1);
3108 memcpy (dest: name, src: arg->val.str.startmb, n: arg->val.str.lenmb);
3109 name[arg->val.str.lenmb] = '\0';
3110 runp->name = name;
3111
3112 runp->def_next = collate->known_sections;
3113 collate->known_sections = runp;
3114 }
3115 lr_ignore_rest (lr: ldfile, verbose: 1);
3116 break;
3117
3118 case tok_order_start:
3119 /* Ignore the rest of the line if we don't need the input of
3120 this line. */
3121 if (ignore_content)
3122 {
3123 lr_ignore_rest (lr: ldfile, verbose: 0);
3124 break;
3125 }
3126
3127 if (state != 0 && state != 1 && state != 2)
3128 goto err_label;
3129 state = 1;
3130
3131 /* The 14652 draft does not specify whether all `order_start' lines
3132 must contain the same number of sort-rules, but 14651 does. So
3133 we require this here as well. */
3134 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
3135 if (arg->tok == tok_bsymbol)
3136 {
3137 /* This better should be a section name. */
3138 struct section_list *sp = collate->known_sections;
3139 while (sp != NULL
3140 && (sp->name == NULL
3141 || strncmp (s1: sp->name, s2: arg->val.str.startmb,
3142 n: arg->val.str.lenmb) != 0
3143 || sp->name[arg->val.str.lenmb] != '\0'))
3144 sp = sp->def_next;
3145
3146 if (sp == NULL)
3147 {
3148 lr_error (lr: ldfile, _("\
3149%s: unknown section name `%.*s'"),
3150 "LC_COLLATE", (int) arg->val.str.lenmb,
3151 arg->val.str.startmb);
3152 /* We use the error section. */
3153 collate->current_section = &collate->error_section;
3154
3155 if (collate->error_section.first == NULL)
3156 {
3157 /* Insert &collate->error_section at the end of
3158 the collate->sections list. */
3159 if (collate->sections == NULL)
3160 collate->sections = &collate->error_section;
3161 else
3162 {
3163 sp = collate->sections;
3164 while (sp->next != NULL)
3165 sp = sp->next;
3166
3167 sp->next = &collate->error_section;
3168 }
3169 collate->error_section.next = NULL;
3170 }
3171 }
3172 else
3173 {
3174 /* One should not be allowed to open the same
3175 section twice. */
3176 if (sp->first != NULL)
3177 lr_error (lr: ldfile, _("\
3178%s: multiple order definitions for section `%s'"),
3179 "LC_COLLATE", sp->name);
3180 else
3181 {
3182 /* Insert sp in the collate->sections list,
3183 right after collate->current_section. */
3184 if (collate->current_section != NULL)
3185 {
3186 sp->next = collate->current_section->next;
3187 collate->current_section->next = sp;
3188 }
3189 else if (collate->sections == NULL)
3190 /* This is the first section to be defined. */
3191 collate->sections = sp;
3192
3193 collate->current_section = sp;
3194 }
3195
3196 /* Next should come the end of the line or a semicolon. */
3197 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire,
3198 verbose);
3199 if (arg->tok == tok_eol)
3200 {
3201 uint32_t cnt;
3202
3203 /* This means we have exactly one rule: `forward'. */
3204 if (nrules > 1)
3205 lr_error (lr: ldfile, _("\
3206%s: invalid number of sorting rules"),
3207 "LC_COLLATE");
3208 else
3209 nrules = 1;
3210 sp->rules = obstack_alloc (&collate->mempool,
3211 (sizeof (enum coll_sort_rule)
3212 * nrules));
3213 for (cnt = 0; cnt < nrules; ++cnt)
3214 sp->rules[cnt] = sort_forward;
3215
3216 /* Next line. */
3217 break;
3218 }
3219
3220 /* Get the next token. */
3221 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire,
3222 verbose);
3223 }
3224 }
3225 else
3226 {
3227 /* There is no section symbol. Therefore we use the unnamed
3228 section. */
3229 collate->current_section = &collate->unnamed_section;
3230
3231 if (collate->unnamed_section_defined)
3232 lr_error (lr: ldfile, _("\
3233%s: multiple order definitions for unnamed section"),
3234 "LC_COLLATE");
3235 else
3236 {
3237 /* Insert &collate->unnamed_section at the beginning of
3238 the collate->sections list. */
3239 collate->unnamed_section.next = collate->sections;
3240 collate->sections = &collate->unnamed_section;
3241 collate->unnamed_section_defined = true;
3242 }
3243 }
3244
3245 /* Now read the direction names. */
3246 read_directions (ldfile, arg, charmap, repertoire, result);
3247
3248 /* From now we need the strings untranslated. */
3249 ldfile->translate_strings = 0;
3250 break;
3251
3252 case tok_order_end:
3253 /* Ignore the rest of the line if we don't need the input of
3254 this line. */
3255 if (ignore_content)
3256 {
3257 lr_ignore_rest (lr: ldfile, verbose: 0);
3258 break;
3259 }
3260
3261 if (state != 1)
3262 goto err_label;
3263
3264 /* Handle ellipsis at end of list. */
3265 if (was_ellipsis != tok_none)
3266 {
3267 handle_ellipsis (ldfile, NULL, symlen: 0, ellipsis: was_ellipsis, charmap,
3268 repertoire, result);
3269 was_ellipsis = tok_none;
3270 }
3271
3272 state = 2;
3273 lr_ignore_rest (lr: ldfile, verbose: 1);
3274 break;
3275
3276 case tok_reorder_after:
3277 /* Ignore the rest of the line if we don't need the input of
3278 this line. */
3279 if (ignore_content)
3280 {
3281 lr_ignore_rest (lr: ldfile, verbose: 0);
3282 break;
3283 }
3284
3285 if (state == 1)
3286 {
3287 lr_error (lr: ldfile, _("%s: missing `order_end' keyword"),
3288 "LC_COLLATE");
3289 state = 2;
3290
3291 /* Handle ellipsis at end of list. */
3292 if (was_ellipsis != tok_none)
3293 {
3294 handle_ellipsis (ldfile, symstr: arg->val.str.startmb,
3295 symlen: arg->val.str.lenmb, ellipsis: was_ellipsis, charmap,
3296 repertoire, result);
3297 was_ellipsis = tok_none;
3298 }
3299 }
3300 else if (state == 0 && copy_locale == NULL)
3301 goto err_label;
3302 else if (state != 0 && state != 2 && state != 3)
3303 goto err_label;
3304 state = 3;
3305
3306 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
3307 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3308 {
3309 /* Find this symbol in the sequence table. */
3310 char ucsbuf[10];
3311 char *startmb;
3312 size_t lenmb;
3313 struct element_t *insp;
3314 int no_error = 1;
3315 void *ptr;
3316
3317 if (arg->tok == tok_bsymbol)
3318 {
3319 startmb = arg->val.str.startmb;
3320 lenmb = arg->val.str.lenmb;
3321 }
3322 else
3323 {
3324 sprintf (s: ucsbuf, format: "U%08X", arg->val.ucs4);
3325 startmb = ucsbuf;
3326 lenmb = 9;
3327 }
3328
3329 if (find_entry (htab: &collate->seq_table, key: startmb, keylen: lenmb, result: &ptr) == 0)
3330 /* Yes, the symbol exists. Simply point the cursor
3331 to it. */
3332 collate->cursor = (struct element_t *) ptr;
3333 else
3334 {
3335 struct symbol_t *symbp;
3336 void *ptr;
3337
3338 if (find_entry (htab: &collate->sym_table, key: startmb, keylen: lenmb,
3339 result: &ptr) == 0)
3340 {
3341 symbp = ptr;
3342
3343 if (symbp->order->last != NULL
3344 || symbp->order->next != NULL)
3345 collate->cursor = symbp->order;
3346 else
3347 {
3348 /* This is a collating symbol but its position
3349 is not yet defined. */
3350 lr_error (lr: ldfile, _("\
3351%s: order for collating symbol %.*s not yet defined"),
3352 "LC_COLLATE", (int) lenmb, startmb);
3353 collate->cursor = NULL;
3354 no_error = 0;
3355 }
3356 }
3357 else if (find_entry (htab: &collate->elem_table, key: startmb, keylen: lenmb,
3358 result: &ptr) == 0)
3359 {
3360 insp = (struct element_t *) ptr;
3361
3362 if (insp->last != NULL || insp->next != NULL)
3363 collate->cursor = insp;
3364 else
3365 {
3366 /* This is a collating element but its position
3367 is not yet defined. */
3368 lr_error (lr: ldfile, _("\
3369%s: order for collating element %.*s not yet defined"),
3370 "LC_COLLATE", (int) lenmb, startmb);
3371 collate->cursor = NULL;
3372 no_error = 0;
3373 }
3374 }
3375 else
3376 {
3377 /* This is bad. The symbol after which we have to
3378 insert does not exist. */
3379 lr_error (lr: ldfile, _("\
3380%s: cannot reorder after %.*s: symbol not known"),
3381 "LC_COLLATE", (int) lenmb, startmb);
3382 collate->cursor = NULL;
3383 no_error = 0;
3384 }
3385 }
3386
3387 lr_ignore_rest (lr: ldfile, verbose: no_error);
3388 }
3389 else
3390 /* This must not happen. */
3391 goto err_label;
3392 break;
3393
3394 case tok_reorder_end:
3395 /* Ignore the rest of the line if we don't need the input of
3396 this line. */
3397 if (ignore_content)
3398 break;
3399
3400 if (state != 3)
3401 goto err_label;
3402 state = 4;
3403 lr_ignore_rest (lr: ldfile, verbose: 1);
3404 break;
3405
3406 case tok_reorder_sections_after:
3407 /* Ignore the rest of the line if we don't need the input of
3408 this line. */
3409 if (ignore_content)
3410 {
3411 lr_ignore_rest (lr: ldfile, verbose: 0);
3412 break;
3413 }
3414
3415 if (state == 1)
3416 {
3417 lr_error (lr: ldfile, _("%s: missing `order_end' keyword"),
3418 "LC_COLLATE");
3419 state = 2;
3420
3421 /* Handle ellipsis at end of list. */
3422 if (was_ellipsis != tok_none)
3423 {
3424 handle_ellipsis (ldfile, NULL, symlen: 0, ellipsis: was_ellipsis, charmap,
3425 repertoire, result);
3426 was_ellipsis = tok_none;
3427 }
3428 }
3429 else if (state == 3)
3430 {
3431 record_error (status: 0, errnum: 0, _("\
3432%s: missing `reorder-end' keyword"), "LC_COLLATE");
3433 state = 4;
3434 }
3435 else if (state != 2 && state != 4)
3436 goto err_label;
3437 state = 5;
3438
3439 /* Get the name of the sections we are adding after. */
3440 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire, verbose);
3441 if (arg->tok == tok_bsymbol)
3442 {
3443 /* Now find a section with this name. */
3444 struct section_list *runp = collate->sections;
3445
3446 while (runp != NULL)
3447 {
3448 if (runp->name != NULL
3449 && strlen (s: runp->name) == arg->val.str.lenmb
3450 && memcmp (s1: runp->name, s2: arg->val.str.startmb,
3451 n: arg->val.str.lenmb) == 0)
3452 break;
3453
3454 runp = runp->next;
3455 }
3456
3457 if (runp != NULL)
3458 collate->current_section = runp;
3459 else
3460 {
3461 /* This is bad. The section after which we have to
3462 reorder does not exist. Therefore we cannot
3463 process the whole rest of this reorder
3464 specification. */
3465 lr_error (lr: ldfile, _("%s: section `%.*s' not known"),
3466 "LC_COLLATE", (int) arg->val.str.lenmb,
3467 arg->val.str.startmb);
3468
3469 do
3470 {
3471 lr_ignore_rest (lr: ldfile, verbose: 0);
3472
3473 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
3474 }
3475 while (now->tok == tok_reorder_sections_after
3476 || now->tok == tok_reorder_sections_end
3477 || now->tok == tok_end);
3478
3479 /* Process the token we just saw. */
3480 nowtok = now->tok;
3481 continue;
3482 }
3483 }
3484 else
3485 /* This must not happen. */
3486 goto err_label;
3487 break;
3488
3489 case tok_reorder_sections_end:
3490 /* Ignore the rest of the line if we don't need the input of
3491 this line. */
3492 if (ignore_content)
3493 break;
3494
3495 if (state != 5)
3496 goto err_label;
3497 state = 6;
3498 lr_ignore_rest (lr: ldfile, verbose: 1);
3499 break;
3500
3501 case tok_bsymbol:
3502 case tok_ucs4:
3503 /* Ignore the rest of the line if we don't need the input of
3504 this line. */
3505 if (ignore_content)
3506 {
3507 lr_ignore_rest (lr: ldfile, verbose: 0);
3508 break;
3509 }
3510
3511 if (state != 0 && state != 1 && state != 3 && state != 5)
3512 goto err_label;
3513
3514 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3515 goto err_label;
3516
3517 if (nowtok == tok_ucs4)
3518 {
3519 snprintf (s: ucs4buf, maxlen: sizeof (ucs4buf), format: "U%08X", now->val.ucs4);
3520 symstr = ucs4buf;
3521 symlen = 9;
3522 }
3523 else if (arg != NULL)
3524 {
3525 symstr = arg->val.str.startmb;
3526 symlen = arg->val.str.lenmb;
3527 }
3528 else
3529 {
3530 lr_error (lr: ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3531 (int) ldfile->token.val.str.lenmb,
3532 ldfile->token.val.str.startmb);
3533 break;
3534 }
3535
3536 struct element_t *seqp;
3537 if (state == 0)
3538 {
3539 /* We are outside an `order_start' region. This means
3540 we must only accept definitions of values for
3541 collation symbols since these are purely abstract
3542 values and don't need directions associated. */
3543 void *ptr;
3544
3545 if (find_entry (htab: &collate->seq_table, key: symstr, keylen: symlen, result: &ptr) == 0)
3546 {
3547 seqp = ptr;
3548
3549 /* It's already defined. First check whether this
3550 is really a collating symbol. */
3551 if (seqp->is_character)
3552 goto err_label;
3553
3554 goto move_entry;
3555 }
3556 else
3557 {
3558 void *result;
3559
3560 if (find_entry (htab: &collate->sym_table, key: symstr, keylen: symlen,
3561 result: &result) != 0)
3562 /* No collating symbol, it's an error. */
3563 goto err_label;
3564
3565 /* Maybe this is the first time we define a symbol
3566 value and it is before the first actual section. */
3567 if (collate->sections == NULL)
3568 collate->sections = collate->current_section =
3569 &collate->symbol_section;
3570 }
3571
3572 if (was_ellipsis != tok_none)
3573 {
3574 handle_ellipsis (ldfile, symstr, symlen, ellipsis: was_ellipsis,
3575 charmap, repertoire, result);
3576
3577 /* Remember that we processed the ellipsis. */
3578 was_ellipsis = tok_none;
3579
3580 /* And don't add the value a second time. */
3581 break;
3582 }
3583 }
3584 else if (state == 3)
3585 {
3586 /* It is possible that we already have this collation sequence.
3587 In this case we move the entry. */
3588 void *sym;
3589 void *ptr;
3590
3591 /* If the symbol after which we have to insert was not found
3592 ignore all entries. */
3593 if (collate->cursor == NULL)
3594 {
3595 lr_ignore_rest (lr: ldfile, verbose: 0);
3596 break;
3597 }
3598
3599 if (find_entry (htab: &collate->seq_table, key: symstr, keylen: symlen, result: &ptr) == 0)
3600 {
3601 seqp = (struct element_t *) ptr;
3602 goto move_entry;
3603 }
3604
3605 if (find_entry (htab: &collate->sym_table, key: symstr, keylen: symlen, result: &sym) == 0
3606 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3607 goto move_entry;
3608
3609 if (find_entry (htab: &collate->elem_table, key: symstr, keylen: symlen, result: &ptr) == 0
3610 && (seqp = (struct element_t *) ptr,
3611 seqp->last != NULL || seqp->next != NULL
3612 || (collate->start != NULL && seqp == collate->start)))
3613 {
3614 move_entry:
3615 /* Remove the entry from the old position. */
3616 if (seqp->last == NULL)
3617 collate->start = seqp->next;
3618 else
3619 seqp->last->next = seqp->next;
3620 if (seqp->next != NULL)
3621 seqp->next->last = seqp->last;
3622
3623 /* We also have to check whether this entry is the
3624 first or last of a section. */
3625 if (seqp->section->first == seqp)
3626 {
3627 if (seqp->section->first == seqp->section->last)
3628 /* This section has no content anymore. */
3629 seqp->section->first = seqp->section->last = NULL;
3630 else
3631 seqp->section->first = seqp->next;
3632 }
3633 else if (seqp->section->last == seqp)
3634 seqp->section->last = seqp->last;
3635
3636 /* Now insert it in the new place. */
3637 insert_weights (ldfile, elem: seqp, charmap, repertoire, result,
3638 ellipsis: tok_none);
3639 break;
3640 }
3641
3642 /* Otherwise we just add a new entry. */
3643 }
3644 else if (state == 5)
3645 {
3646 /* We are reordering sections. Find the named section. */
3647 struct section_list *runp = collate->sections;
3648 struct section_list *prevp = NULL;
3649
3650 while (runp != NULL)
3651 {
3652 if (runp->name != NULL
3653 && strlen (s: runp->name) == symlen
3654 && memcmp (s1: runp->name, s2: symstr, n: symlen) == 0)
3655 break;
3656
3657 prevp = runp;
3658 runp = runp->next;
3659 }
3660
3661 if (runp == NULL)
3662 {
3663 lr_error (lr: ldfile, _("%s: section `%.*s' not known"),
3664 "LC_COLLATE", (int) symlen, symstr);
3665 lr_ignore_rest (lr: ldfile, verbose: 0);
3666 }
3667 else
3668 {
3669 if (runp != collate->current_section)
3670 {
3671 /* Remove the named section from the old place and
3672 insert it in the new one. */
3673 prevp->next = runp->next;
3674
3675 runp->next = collate->current_section->next;
3676 collate->current_section->next = runp;
3677 collate->current_section = runp;
3678 }
3679
3680 /* Process the rest of the line which might change
3681 the collation rules. */
3682 arg = lr_token (lr: ldfile, charmap, locale: result, repertoire,
3683 verbose);
3684 if (arg->tok != tok_eof && arg->tok != tok_eol)
3685 read_directions (ldfile, arg, charmap, repertoire,
3686 result);
3687 }
3688 break;
3689 }
3690 else if (was_ellipsis != tok_none)
3691 {
3692 /* Using the information in the `ellipsis_weight'
3693 element and this and the last value we have to handle
3694 the ellipsis now. */
3695 assert (state == 1);
3696
3697 handle_ellipsis (ldfile, symstr, symlen, ellipsis: was_ellipsis, charmap,
3698 repertoire, result);
3699
3700 /* Remember that we processed the ellipsis. */
3701 was_ellipsis = tok_none;
3702
3703 /* And don't add the value a second time. */
3704 break;
3705 }
3706
3707 /* Now insert in the new place. */
3708 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3709 break;
3710
3711 case tok_undefined:
3712 /* Ignore the rest of the line if we don't need the input of
3713 this line. */
3714 if (ignore_content)
3715 {
3716 lr_ignore_rest (lr: ldfile, verbose: 0);
3717 break;
3718 }
3719
3720 if (state != 1)
3721 goto err_label;
3722
3723 if (was_ellipsis != tok_none)
3724 {
3725 lr_error (lr: ldfile,
3726 _("%s: cannot have `%s' as end of ellipsis range"),
3727 "LC_COLLATE", "UNDEFINED");
3728
3729 unlink_element (collate);
3730 was_ellipsis = tok_none;
3731 }
3732
3733 /* See whether UNDEFINED already appeared somewhere. */
3734 if (collate->undefined.next != NULL
3735 || &collate->undefined == collate->cursor)
3736 {
3737 lr_error (lr: ldfile,
3738 _("%s: order for `%.*s' already defined at %s:%Zu"),
3739 "LC_COLLATE", 9, "UNDEFINED",
3740 collate->undefined.file,
3741 collate->undefined.line);
3742 lr_ignore_rest (lr: ldfile, verbose: 0);
3743 }
3744 else
3745 /* Parse the weights. */
3746 insert_weights (ldfile, elem: &collate->undefined, charmap,
3747 repertoire, result, ellipsis: tok_none);
3748 break;
3749
3750 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3751 case tok_ellipsis3: /* absolute ellipsis */
3752 case tok_ellipsis4: /* symbolic decimal ellipsis */
3753 /* This is the symbolic (decimal or hexadecimal) or absolute
3754 ellipsis. */
3755 if (was_ellipsis != tok_none)
3756 goto err_label;
3757
3758 if (state != 0 && state != 1 && state != 3)
3759 goto err_label;
3760
3761 was_ellipsis = nowtok;
3762
3763 insert_weights (ldfile, elem: &collate->ellipsis_weight, charmap,
3764 repertoire, result, ellipsis: nowtok);
3765 break;
3766
3767 case tok_end:
3768 seen_end:
3769 /* Next we assume `LC_COLLATE'. */
3770 if (!ignore_content)
3771 {
3772 if (state == 0
3773 && copy_locale == NULL
3774 && !collate->codepoint_collation)
3775 /* We must either see a copy statement or have
3776 ordering values, or codepoint_collation. */
3777 lr_error (lr: ldfile,
3778 _("%s: empty category description not allowed"),
3779 "LC_COLLATE");
3780 else if (state == 1)
3781 {
3782 lr_error (lr: ldfile, _("%s: missing `order_end' keyword"),
3783 "LC_COLLATE");
3784
3785 /* Handle ellipsis at end of list. */
3786 if (was_ellipsis != tok_none)
3787 {
3788 handle_ellipsis (ldfile, NULL, symlen: 0, ellipsis: was_ellipsis, charmap,
3789 repertoire, result);
3790 was_ellipsis = tok_none;
3791 }
3792 }
3793 else if (state == 3)
3794 record_error (status: 0, errnum: 0, _("\
3795%s: missing `reorder-end' keyword"), "LC_COLLATE");
3796 else if (state == 5)
3797 record_error (status: 0, errnum: 0, _("\
3798%s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3799 }
3800 arg = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
3801 if (arg->tok == tok_eof)
3802 break;
3803 if (arg->tok == tok_eol)
3804 lr_error (lr: ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3805 else if (arg->tok != tok_lc_collate)
3806 lr_error (lr: ldfile, _("\
3807%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3808 lr_ignore_rest (lr: ldfile, verbose: arg->tok == tok_lc_collate);
3809 return;
3810
3811 case tok_define:
3812 if (ignore_content)
3813 {
3814 lr_ignore_rest (lr: ldfile, verbose: 0);
3815 break;
3816 }
3817
3818 arg = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
3819 if (arg->tok != tok_ident)
3820 goto err_label;
3821
3822 /* Simply add the new symbol. */
3823 struct name_list *newsym = xmalloc (n: sizeof (*newsym)
3824 + arg->val.str.lenmb + 1);
3825 memcpy (dest: newsym->str, src: arg->val.str.startmb, n: arg->val.str.lenmb);
3826 newsym->str[arg->val.str.lenmb] = '\0';
3827 newsym->next = defined;
3828 defined = newsym;
3829
3830 lr_ignore_rest (lr: ldfile, verbose: 1);
3831 break;
3832
3833 case tok_undef:
3834 if (ignore_content)
3835 {
3836 lr_ignore_rest (lr: ldfile, verbose: 0);
3837 break;
3838 }
3839
3840 arg = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
3841 if (arg->tok != tok_ident)
3842 goto err_label;
3843
3844 /* Remove _all_ occurrences of the symbol from the list. */
3845 struct name_list *prevdef = NULL;
3846 struct name_list *curdef = defined;
3847 while (curdef != NULL)
3848 if (strncmp (s1: arg->val.str.startmb, s2: curdef->str,
3849 n: arg->val.str.lenmb) == 0
3850 && curdef->str[arg->val.str.lenmb] == '\0')
3851 {
3852 if (prevdef == NULL)
3853 defined = curdef->next;
3854 else
3855 prevdef->next = curdef->next;
3856
3857 struct name_list *olddef = curdef;
3858 curdef = curdef->next;
3859
3860 free (ptr: olddef);
3861 }
3862 else
3863 {
3864 prevdef = curdef;
3865 curdef = curdef->next;
3866 }
3867
3868 lr_ignore_rest (lr: ldfile, verbose: 1);
3869 break;
3870
3871 case tok_ifdef:
3872 case tok_ifndef:
3873 if (ignore_content)
3874 {
3875 lr_ignore_rest (lr: ldfile, verbose: 0);
3876 break;
3877 }
3878
3879 found_ifdef:
3880 arg = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
3881 if (arg->tok != tok_ident)
3882 goto err_label;
3883 lr_ignore_rest (lr: ldfile, verbose: 1);
3884
3885 if (collate->else_action == else_none)
3886 {
3887 curdef = defined;
3888 while (curdef != NULL)
3889 if (strncmp (s1: arg->val.str.startmb, s2: curdef->str,
3890 n: arg->val.str.lenmb) == 0
3891 && curdef->str[arg->val.str.lenmb] == '\0')
3892 break;
3893 else
3894 curdef = curdef->next;
3895
3896 if ((nowtok == tok_ifdef && curdef != NULL)
3897 || (nowtok == tok_ifndef && curdef == NULL))
3898 {
3899 /* We have to use the if-branch. */
3900 collate->else_action = else_ignore;
3901 }
3902 else
3903 {
3904 /* We have to use the else-branch, if there is one. */
3905 nowtok = skip_to (ldfile, collate, charmap, to_endif: 0);
3906 if (nowtok == tok_else)
3907 collate->else_action = else_seen;
3908 else if (nowtok == tok_elifdef)
3909 {
3910 nowtok = tok_ifdef;
3911 goto found_ifdef;
3912 }
3913 else if (nowtok == tok_elifndef)
3914 {
3915 nowtok = tok_ifndef;
3916 goto found_ifdef;
3917 }
3918 else if (nowtok == tok_eof)
3919 goto seen_eof;
3920 else if (nowtok == tok_end)
3921 goto seen_end;
3922 }
3923 }
3924 else
3925 {
3926 /* XXX Should it really become necessary to support nested
3927 preprocessor handling we will push the state here. */
3928 lr_error (lr: ldfile, _("%s: nested conditionals not supported"),
3929 "LC_COLLATE");
3930 nowtok = skip_to (ldfile, collate, charmap, to_endif: 1);
3931 if (nowtok == tok_eof)
3932 goto seen_eof;
3933 else if (nowtok == tok_end)
3934 goto seen_end;
3935 }
3936 break;
3937
3938 case tok_elifdef:
3939 case tok_elifndef:
3940 case tok_else:
3941 if (ignore_content)
3942 {
3943 lr_ignore_rest (lr: ldfile, verbose: 0);
3944 break;
3945 }
3946
3947 lr_ignore_rest (lr: ldfile, verbose: 1);
3948
3949 if (collate->else_action == else_ignore)
3950 {
3951 /* Ignore everything until the endif. */
3952 nowtok = skip_to (ldfile, collate, charmap, to_endif: 1);
3953 if (nowtok == tok_eof)
3954 goto seen_eof;
3955 else if (nowtok == tok_end)
3956 goto seen_end;
3957 }
3958 else
3959 {
3960 assert (collate->else_action == else_none);
3961 lr_error (lr: ldfile, _("\
3962%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3963 nowtok == tok_else ? "else"
3964 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3965 }
3966 break;
3967
3968 case tok_endif:
3969 if (ignore_content)
3970 {
3971 lr_ignore_rest (lr: ldfile, verbose: 0);
3972 break;
3973 }
3974
3975 lr_ignore_rest (lr: ldfile, verbose: 1);
3976
3977 if (collate->else_action != else_ignore
3978 && collate->else_action != else_seen)
3979 lr_error (lr: ldfile, _("\
3980%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3981
3982 /* XXX If we support nested preprocessor directives we pop
3983 the state here. */
3984 collate->else_action = else_none;
3985 break;
3986
3987 default:
3988 err_label:
3989 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3990 }
3991
3992 /* Prepare for the next round. */
3993 now = lr_token (lr: ldfile, charmap, locale: result, NULL, verbose);
3994 nowtok = now->tok;
3995 }
3996
3997 seen_eof:
3998 /* When we come here we reached the end of the file. */
3999 lr_error (lr: ldfile, _("%s: premature end of file"), "LC_COLLATE");
4000}
4001

source code of glibc/locale/programs/ld-collate.c