1/* Internal functions for the *scanf* implementation.
2 Copyright (C) 1991-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <assert.h>
20#include <errno.h>
21#include <limits.h>
22#include <ctype.h>
23#include <stdarg.h>
24#include <stdbool.h>
25#include <stdio.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29#include <wchar.h>
30#include <wctype.h>
31#include <libc-diag.h>
32#include <libc-lock.h>
33#include <locale/localeinfo.h>
34#include <scratch_buffer.h>
35
36#ifdef __GNUC__
37# define HAVE_LONGLONG
38# define LONGLONG long long
39#else
40# define LONGLONG long
41#endif
42
43/* Determine whether we have to handle `long long' at all. */
44#if LONG_MAX == LONG_LONG_MAX
45# define need_longlong 0
46#else
47# define need_longlong 1
48#endif
49
50/* Determine whether we have to handle `long'. */
51#if INT_MAX == LONG_MAX
52# define need_long 0
53#else
54# define need_long 1
55#endif
56
57/* Those are flags in the conversion format. */
58#define LONG 0x0001 /* l: long or double */
59#define LONGDBL 0x0002 /* L: long long or long double */
60#define SHORT 0x0004 /* h: short */
61#define SUPPRESS 0x0008 /* *: suppress assignment */
62#define POINTER 0x0010 /* weird %p pointer (`fake hex') */
63#define NOSKIP 0x0020 /* do not skip blanks */
64#define NUMBER_SIGNED 0x0040 /* signed integer */
65#define GROUP 0x0080 /* ': group numbers */
66#define GNU_MALLOC 0x0100 /* a: malloc strings */
67#define CHAR 0x0200 /* hh: char */
68#define I18N 0x0400 /* I: use locale's digits */
69#define HEXA_FLOAT 0x0800 /* hexadecimal float */
70#define READ_POINTER 0x1000 /* this is a pointer value */
71#define POSIX_MALLOC 0x2000 /* m: malloc strings */
72#define MALLOC (GNU_MALLOC | POSIX_MALLOC)
73
74#include <locale/localeinfo.h>
75#include <libioP.h>
76
77#ifdef COMPILE_WSCANF
78# define ungetc(c, s) ((void) (c == WEOF \
79 || (--read_in, \
80 _IO_sputbackwc (s, c))))
81# define ungetc_not_eof(c, s) ((void) (--read_in, \
82 _IO_sputbackwc (s, c)))
83# define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
84 : ((c = _IO_getwc_unlocked (s)), \
85 (void) (c != WEOF \
86 ? ++read_in \
87 : (size_t) (inchar_errno = errno)), c))
88
89# define ISSPACE(Ch) iswspace (Ch)
90# define ISDIGIT(Ch) iswdigit (Ch)
91# define ISXDIGIT(Ch) iswxdigit (Ch)
92# define TOLOWER(Ch) towlower (Ch)
93# define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
94# define __strtoll_internal __wcstoll_internal
95# define __strtoull_internal __wcstoull_internal
96# define __strtol_internal __wcstol_internal
97# define __strtoul_internal __wcstoul_internal
98# define __strtold_internal __wcstold_internal
99# define __strtod_internal __wcstod_internal
100# define __strtof_internal __wcstof_internal
101# if __HAVE_FLOAT128_UNLIKE_LDBL
102# define __strtof128_internal __wcstof128_internal
103# endif
104
105# define L_(Str) L##Str
106# define CHAR_T wchar_t
107# define UCHAR_T unsigned int
108# define WINT_T wint_t
109# undef EOF
110# define EOF WEOF
111#else
112# define ungetc(c, s) ((void) ((int) c == EOF \
113 || (--read_in, \
114 _IO_sputbackc (s, (unsigned char) c))))
115# define ungetc_not_eof(c, s) ((void) (--read_in, \
116 _IO_sputbackc (s, (unsigned char) c)))
117# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
118 : ((c = _IO_getc_unlocked (s)), \
119 (void) (c != EOF \
120 ? ++read_in \
121 : (size_t) (inchar_errno = errno)), c))
122# define ISSPACE(Ch) __isspace_l (Ch, loc)
123# define ISDIGIT(Ch) __isdigit_l (Ch, loc)
124# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
125# define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
126# define ORIENT if (_IO_vtable_offset (s) == 0 \
127 && _IO_fwide (s, -1) != -1) \
128 return EOF
129
130# define L_(Str) Str
131# define CHAR_T char
132# define UCHAR_T unsigned char
133# define WINT_T int
134#endif
135
136#include "printf-parse.h" /* Use read_int. */
137
138#define encode_error() do { \
139 __set_errno (EILSEQ); \
140 goto errout; \
141 } while (0)
142#define conv_error() do { \
143 goto errout; \
144 } while (0)
145#define input_error() do { \
146 if (done == 0) done = EOF; \
147 goto errout; \
148 } while (0)
149#define add_ptr_to_free(ptr) \
150 do \
151 { \
152 if (ptrs_to_free == NULL \
153 || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \
154 / sizeof (ptrs_to_free->ptrs[0]))) \
155 { \
156 struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \
157 new_ptrs->count = 0; \
158 new_ptrs->next = ptrs_to_free; \
159 ptrs_to_free = new_ptrs; \
160 } \
161 ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \
162 } \
163 while (0)
164#define ARGCHECK(s, format) \
165 do \
166 { \
167 /* Check file argument for consistence. */ \
168 CHECK_FILE (s, EOF); \
169 if (s->_flags & _IO_NO_READS) \
170 { \
171 __set_errno (EBADF); \
172 return EOF; \
173 } \
174 else if (format == NULL) \
175 { \
176 __set_errno (EINVAL); \
177 return EOF; \
178 } \
179 } while (0)
180#define LOCK_STREAM(S) \
181 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
182 _IO_flockfile (S)
183#define UNLOCK_STREAM(S) \
184 _IO_funlockfile (S); \
185 __libc_cleanup_region_end (0)
186
187struct ptrs_to_free
188{
189 size_t count;
190 struct ptrs_to_free *next;
191 char **ptrs[32];
192};
193
194struct char_buffer {
195 CHAR_T *current;
196 CHAR_T *end;
197 struct scratch_buffer scratch;
198};
199
200/* Returns a pointer to the first CHAR_T object in the buffer. Only
201 valid if char_buffer_add (BUFFER, CH) has been called and
202 char_buffer_error (BUFFER) is false. */
203static inline CHAR_T *
204char_buffer_start (const struct char_buffer *buffer)
205{
206 return (CHAR_T *) buffer->scratch.data;
207}
208
209/* Returns the number of CHAR_T objects in the buffer. Only valid if
210 char_buffer_error (BUFFER) is false. */
211static inline size_t
212char_buffer_size (const struct char_buffer *buffer)
213{
214 return buffer->current - char_buffer_start (buffer);
215}
216
217/* Reinitializes BUFFER->current and BUFFER->end to cover the entire
218 scratch buffer. */
219static inline void
220char_buffer_rewind (struct char_buffer *buffer)
221{
222 buffer->current = char_buffer_start (buffer);
223 buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T);
224}
225
226/* Returns true if a previous call to char_buffer_add (BUFFER, CH)
227 failed. */
228static inline bool
229char_buffer_error (const struct char_buffer *buffer)
230{
231 return __glibc_unlikely (buffer->current == NULL);
232}
233
234/* Slow path for char_buffer_add. */
235static void
236char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch)
237{
238 if (char_buffer_error (buffer))
239 return;
240 size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data;
241 if (!scratch_buffer_grow_preserve (buffer: &buffer->scratch))
242 {
243 buffer->current = NULL;
244 buffer->end = NULL;
245 return;
246 }
247 char_buffer_rewind (buffer);
248 buffer->current += offset;
249 *buffer->current++ = ch;
250}
251
252/* Adds CH to BUFFER. This function does not report any errors, check
253 for them with char_buffer_error. */
254static inline void
255char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
256 __attribute__ ((always_inline));
257static inline void
258char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
259{
260 if (__glibc_unlikely (buffer->current == buffer->end))
261 char_buffer_add_slow (buffer, ch);
262 else
263 *buffer->current++ = ch;
264}
265
266/* Read formatted input from S according to the format string
267 FORMAT, using the argument list in ARG.
268 Return the number of assignments made, or -1 for an input error. */
269#ifdef COMPILE_WSCANF
270int
271__vfwscanf_internal (FILE *s, const wchar_t *format, va_list argptr,
272 unsigned int mode_flags)
273#else
274int
275__vfscanf_internal (FILE *s, const char *format, va_list argptr,
276 unsigned int mode_flags)
277#endif
278{
279 va_list arg;
280 const UCHAR_T *f = (const UCHAR_T *) format;
281 UCHAR_T fc; /* Current character of the format. */
282 WINT_T done = 0; /* Assignments done. */
283 size_t read_in = 0; /* Chars read in. */
284 WINT_T c = 0; /* Last char read. */
285 int width; /* Maximum field width. */
286 int flags; /* Modifiers for current format element. */
287#ifndef COMPILE_WSCANF
288 locale_t loc = _NL_CURRENT_LOCALE;
289 struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
290#endif
291
292 /* Errno of last failed inchar call. */
293 int inchar_errno = 0;
294 /* Status for reading F-P nums. */
295 char got_digit, got_dot, got_e, got_sign;
296 /* If a [...] is a [^...]. */
297 CHAR_T not_in;
298#define exp_char not_in
299 /* Base for integral numbers. */
300 int base;
301 /* Decimal point character. */
302#ifdef COMPILE_WSCANF
303 wint_t decimal;
304#else
305 const char *decimal;
306#endif
307 /* The thousands character of the current locale. */
308#ifdef COMPILE_WSCANF
309 wint_t thousands;
310#else
311 const char *thousands;
312#endif
313 struct ptrs_to_free *ptrs_to_free = NULL;
314 /* State for the conversions. */
315 mbstate_t state;
316 /* Integral holding variables. */
317 union
318 {
319 long long int q;
320 unsigned long long int uq;
321 long int l;
322 unsigned long int ul;
323 } num;
324 /* Character-buffer pointer. */
325 char *str = NULL;
326 wchar_t *wstr = NULL;
327 char **strptr = NULL;
328 ssize_t strsize = 0;
329 /* We must not react on white spaces immediately because they can
330 possibly be matched even if in the input stream no character is
331 available anymore. */
332 int skip_space = 0;
333 /* Workspace. */
334 CHAR_T *tw; /* Temporary pointer. */
335 struct char_buffer charbuf;
336 scratch_buffer_init (buffer: &charbuf.scratch);
337
338#ifdef __va_copy
339 __va_copy (arg, argptr);
340#else
341 arg = (va_list) argptr;
342#endif
343
344#ifdef ORIENT
345 ORIENT;
346#endif
347
348 ARGCHECK (s, format);
349
350 {
351#ifndef COMPILE_WSCANF
352 struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
353#endif
354
355 /* Figure out the decimal point character. */
356#ifdef COMPILE_WSCANF
357 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
358#else
359 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
360#endif
361 /* Figure out the thousands separator character. */
362#ifdef COMPILE_WSCANF
363 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
364#else
365 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
366 if (*thousands == '\0')
367 thousands = NULL;
368#endif
369 }
370
371 /* Lock the stream. */
372 LOCK_STREAM (s);
373
374
375#ifndef COMPILE_WSCANF
376 /* From now on we use `state' to convert the format string. */
377 memset (&state, '\0', sizeof (state));
378#endif
379
380 /* Run through the format string. */
381 while (*f != '\0')
382 {
383 unsigned int argpos;
384 bool is_fast;
385 /* Extract the next argument, which is of type TYPE.
386 For a %N$... spec, this is the Nth argument from the beginning;
387 otherwise it is the next argument after the state now in ARG. */
388#ifdef __va_copy
389# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
390 : ({ unsigned int pos = argpos; \
391 va_list arg; \
392 __va_copy (arg, argptr); \
393 while (--pos > 0) \
394 (void) va_arg (arg, void *); \
395 va_arg (arg, type); \
396 }))
397#else
398# if 0
399 /* XXX Possible optimization. */
400# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
401 : ({ va_list arg = (va_list) argptr; \
402 arg = (va_list) ((char *) arg \
403 + (argpos - 1) \
404 * __va_rounded_size (void *)); \
405 va_arg (arg, type); \
406 }))
407# else
408# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
409 : ({ unsigned int pos = argpos; \
410 va_list arg = (va_list) argptr; \
411 while (--pos > 0) \
412 (void) va_arg (arg, void *); \
413 va_arg (arg, type); \
414 }))
415# endif
416#endif
417
418#ifndef COMPILE_WSCANF
419 if (!isascii (*f))
420 {
421 /* Non-ASCII, may be a multibyte. */
422 int len = __mbrlen ((const char *) f, strlen ((const char *) f),
423 &state);
424 if (len > 0)
425 {
426 do
427 {
428 c = inchar ();
429 if (__glibc_unlikely (c == EOF))
430 input_error ();
431 else if (c != *f++)
432 {
433 ungetc_not_eof (c, s);
434 conv_error ();
435 }
436 }
437 while (--len > 0);
438 continue;
439 }
440 }
441#endif
442
443 fc = *f++;
444 if (fc != '%')
445 {
446 /* Remember to skip spaces. */
447 if (ISSPACE (fc))
448 {
449 skip_space = 1;
450 continue;
451 }
452
453 /* Read a character. */
454 c = inchar ();
455
456 /* Characters other than format specs must just match. */
457 if (__glibc_unlikely (c == EOF))
458 input_error ();
459
460 /* We saw white space char as the last character in the format
461 string. Now it's time to skip all leading white space. */
462 if (skip_space)
463 {
464 while (ISSPACE (c))
465 if (__glibc_unlikely (inchar () == EOF))
466 input_error ();
467 skip_space = 0;
468 }
469
470 if (__glibc_unlikely (c != fc))
471 {
472 ungetc (c, s);
473 conv_error ();
474 }
475
476 continue;
477 }
478
479 /* This is the start of the conversion string. */
480 flags = 0;
481
482 /* Initialize state of modifiers. */
483 argpos = 0;
484
485 /* Prepare temporary buffer. */
486 char_buffer_rewind (buffer: &charbuf);
487
488 /* Check for a positional parameter specification. */
489 if (ISDIGIT (*f))
490 {
491 argpos = read_int (pstr: &f);
492 if (*f == L_('$'))
493 ++f;
494 else
495 {
496 /* Oops; that was actually the field width. */
497 width = argpos;
498 argpos = 0;
499 goto got_width;
500 }
501 }
502
503 /* Check for the assignment-suppressing, the number grouping flag,
504 and the signal to use the locale's digit representation. */
505 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
506 switch (*f++)
507 {
508 case L_('*'):
509 flags |= SUPPRESS;
510 break;
511 case L_('\''):
512#ifdef COMPILE_WSCANF
513 if (thousands != L'\0')
514#else
515 if (thousands != NULL)
516#endif
517 flags |= GROUP;
518 break;
519 case L_('I'):
520 flags |= I18N;
521 break;
522 }
523
524 /* Find the maximum field width. */
525 width = 0;
526 if (ISDIGIT (*f))
527 width = read_int (pstr: &f);
528 got_width:
529 if (width == 0)
530 width = -1;
531
532 /* Check for type modifiers. */
533 switch (*f++)
534 {
535 case L_('h'):
536 /* ints are short ints or chars. */
537 if (*f == L_('h'))
538 {
539 ++f;
540 flags |= CHAR;
541 }
542 else
543 flags |= SHORT;
544 break;
545 case L_('l'):
546 if (*f == L_('l'))
547 {
548 /* A double `l' is equivalent to an `L'. */
549 ++f;
550 flags |= LONGDBL | LONG;
551 }
552 else
553 /* ints are long ints. */
554 flags |= LONG;
555 break;
556 case L_('q'):
557 case L_('L'):
558 /* doubles are long doubles, and ints are long long ints. */
559 flags |= LONGDBL | LONG;
560 break;
561 case L_('a'):
562 /* The `a' is used as a flag only if followed by `s', `S' or
563 `['. */
564 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
565 {
566 --f;
567 break;
568 }
569 /* In __isoc99_*scanf %as, %aS and %a[ extension is not
570 supported at all. */
571 if (__glibc_likely ((mode_flags & SCANF_ISOC99_A) != 0))
572 {
573 --f;
574 break;
575 }
576 /* String conversions (%s, %[) take a `char **'
577 arg and fill it in with a malloc'd pointer. */
578 flags |= GNU_MALLOC;
579 break;
580 case L_('m'):
581 flags |= POSIX_MALLOC;
582 if (*f == L_('l'))
583 {
584 ++f;
585 flags |= LONG;
586 }
587 break;
588 case L_('z'):
589 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
590 flags |= LONGDBL;
591 else if (sizeof (size_t) > sizeof (unsigned int))
592 flags |= LONG;
593 break;
594 case L_('j'):
595 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
596 flags |= LONGDBL;
597 else if (sizeof (uintmax_t) > sizeof (unsigned int))
598 flags |= LONG;
599 break;
600 case L_('t'):
601 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
602 flags |= LONGDBL;
603 else if (sizeof (ptrdiff_t) > sizeof (int))
604 flags |= LONG;
605 break;
606 case L_('w'):
607 {
608 is_fast = false;
609 if (*f == L_('f'))
610 {
611 ++f;
612 is_fast = true;
613 }
614 int bitwidth = 0;
615 if (ISDIGIT (*f))
616 bitwidth = read_int (pstr: &f);
617 if (is_fast)
618 switch (bitwidth)
619 {
620 case 8:
621 bitwidth = INT_FAST8_WIDTH;
622 break;
623 case 16:
624 bitwidth = INT_FAST16_WIDTH;
625 break;
626 case 32:
627 bitwidth = INT_FAST32_WIDTH;
628 break;
629 case 64:
630 bitwidth = INT_FAST64_WIDTH;
631 break;
632 }
633 switch (bitwidth)
634 {
635 case 8:
636 flags |= CHAR;
637 break;
638 case 16:
639 flags |= SHORT;
640 break;
641 case 32:
642 break;
643 case 64:
644 flags |= LONGDBL | LONG;
645 break;
646 default:
647 /* ISO C requires this error to be detected. */
648 __set_errno (EINVAL);
649 goto errout;
650 }
651 }
652 break;
653 default:
654 /* Not a recognized modifier. Backup. */
655 --f;
656 break;
657 }
658
659 /* End of the format string? */
660 if (__glibc_unlikely (*f == L_('\0')))
661 conv_error ();
662
663 /* Find the conversion specifier. */
664 fc = *f++;
665 if (skip_space || (fc != L_('[') && fc != L_('c')
666 && fc != L_('C') && fc != L_('n')))
667 {
668 /* Eat whitespace. */
669 int save_errno = errno;
670 __set_errno (0);
671 do
672 /* We add the additional test for EOF here since otherwise
673 inchar will restore the old errno value which might be
674 EINTR but does not indicate an interrupt since nothing
675 was read at this time. */
676 if (__builtin_expect ((c == EOF || inchar () == EOF)
677 && errno == EINTR, 0))
678 input_error ();
679 while (ISSPACE (c));
680 __set_errno (save_errno);
681 ungetc (c, s);
682 skip_space = 0;
683 }
684
685 switch (fc)
686 {
687 case L_('%'): /* Must match a literal '%'. */
688 c = inchar ();
689 if (__glibc_unlikely (c == EOF))
690 input_error ();
691 if (__glibc_unlikely (c != fc))
692 {
693 ungetc_not_eof (c, s);
694 conv_error ();
695 }
696 break;
697
698 case L_('n'): /* Answer number of assignments done. */
699 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
700 with the 'n' conversion specifier. */
701 if (!(flags & SUPPRESS))
702 {
703 /* Don't count the read-ahead. */
704 if (need_longlong && (flags & LONGDBL))
705 *ARG (long long int *) = read_in;
706 else if (need_long && (flags & LONG))
707 *ARG (long int *) = read_in;
708 else if (flags & SHORT)
709 *ARG (short int *) = read_in;
710 else if (!(flags & CHAR))
711 *ARG (int *) = read_in;
712 else
713 *ARG (char *) = read_in;
714
715#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
716 /* We have a severe problem here. The ISO C standard
717 contradicts itself in explaining the effect of the %n
718 format in `scanf'. While in ISO C:1990 and the ISO C
719 Amendment 1:1995 the result is described as
720
721 Execution of a %n directive does not effect the
722 assignment count returned at the completion of
723 execution of the f(w)scanf function.
724
725 in ISO C Corrigendum 1:1994 the following was added:
726
727 Subclause 7.9.6.2
728 Add the following fourth example:
729 In:
730 #include <stdio.h>
731 int d1, d2, n1, n2, i;
732 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
733 the value 123 is assigned to d1 and the value3 to n1.
734 Because %n can never get an input failure the value
735 of 3 is also assigned to n2. The value of d2 is not
736 affected. The value 3 is assigned to i.
737
738 We go for now with the historically correct code from ISO C,
739 i.e., we don't count the %n assignments. When it ever
740 should proof to be wrong just remove the #ifdef above. */
741 ++done;
742#endif
743 }
744 break;
745
746 case L_('c'): /* Match characters. */
747 if ((flags & LONG) == 0)
748 {
749 if (width == -1)
750 width = 1;
751
752#define STRING_ARG(Str, Type, Width) \
753 do if (!(flags & SUPPRESS)) \
754 { \
755 if (flags & MALLOC) \
756 { \
757 /* The string is to be stored in a malloc'd buffer. */ \
758 /* For %mS using char ** is actually wrong, but \
759 shouldn't make a difference on any arch glibc \
760 supports and would unnecessarily complicate \
761 things. */ \
762 strptr = ARG (char **); \
763 if (strptr == NULL) \
764 conv_error (); \
765 /* Allocate an initial buffer. */ \
766 strsize = Width; \
767 *strptr = (char *) malloc (strsize * sizeof (Type)); \
768 Str = (Type *) *strptr; \
769 if (Str != NULL) \
770 add_ptr_to_free (strptr); \
771 else if (flags & POSIX_MALLOC) \
772 { \
773 done = EOF; \
774 goto errout; \
775 } \
776 } \
777 else \
778 Str = ARG (Type *); \
779 if (Str == NULL) \
780 conv_error (); \
781 } while (0)
782#ifdef COMPILE_WSCANF
783 STRING_ARG (str, char, 100);
784#else
785 STRING_ARG (str, char, (width > 1024 ? 1024 : width));
786#endif
787
788 c = inchar ();
789 if (__glibc_unlikely (c == EOF))
790 input_error ();
791
792#ifdef COMPILE_WSCANF
793 /* We have to convert the wide character(s) into multibyte
794 characters and store the result. */
795 memset (&state, '\0', sizeof (state));
796
797 do
798 {
799 size_t n;
800
801 if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC)
802 && *strptr + strsize - str <= MB_LEN_MAX)
803 {
804 /* We have to enlarge the buffer if the `m' flag
805 was given. */
806 size_t strleng = str - *strptr;
807 char *newstr;
808
809 newstr = (char *) realloc (*strptr, strsize * 2);
810 if (newstr == NULL)
811 {
812 /* Can't allocate that much. Last-ditch effort. */
813 newstr = (char *) realloc (*strptr,
814 strleng + MB_LEN_MAX);
815 if (newstr == NULL)
816 {
817 /* c can't have `a' flag, only `m'. */
818 done = EOF;
819 goto errout;
820 }
821 else
822 {
823 *strptr = newstr;
824 str = newstr + strleng;
825 strsize = strleng + MB_LEN_MAX;
826 }
827 }
828 else
829 {
830 *strptr = newstr;
831 str = newstr + strleng;
832 strsize *= 2;
833 }
834 }
835
836 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
837 if (__glibc_unlikely (n == (size_t) -1))
838 /* No valid wide character. */
839 input_error ();
840
841 /* Increment the output pointer. Even if we don't
842 write anything. */
843 str += n;
844 }
845 while (--width > 0 && inchar () != EOF);
846#else
847 if (!(flags & SUPPRESS))
848 {
849 do
850 {
851 if ((flags & MALLOC)
852 && (char *) str == *strptr + strsize)
853 {
854 /* Enlarge the buffer. */
855 size_t newsize
856 = strsize
857 + (strsize >= width ? width - 1 : strsize);
858
859 str = (char *) realloc (ptr: *strptr, size: newsize);
860 if (str == NULL)
861 {
862 /* Can't allocate that much. Last-ditch
863 effort. */
864 str = (char *) realloc (ptr: *strptr, size: strsize + 1);
865 if (str == NULL)
866 {
867 /* c can't have `a' flag, only `m'. */
868 done = EOF;
869 goto errout;
870 }
871 else
872 {
873 *strptr = (char *) str;
874 str += strsize;
875 ++strsize;
876 }
877 }
878 else
879 {
880 *strptr = (char *) str;
881 str += strsize;
882 strsize = newsize;
883 }
884 }
885 *str++ = c;
886 }
887 while (--width > 0 && inchar () != EOF);
888 }
889 else
890 while (--width > 0 && inchar () != EOF);
891#endif
892
893 if (!(flags & SUPPRESS))
894 {
895 if ((flags & MALLOC) && str - *strptr != strsize)
896 {
897 char *cp = (char *) realloc (ptr: *strptr, size: str - *strptr);
898 if (cp != NULL)
899 *strptr = cp;
900 }
901 strptr = NULL;
902 ++done;
903 }
904
905 break;
906 }
907 /* FALLTHROUGH */
908 case L_('C'):
909 if (width == -1)
910 width = 1;
911
912 STRING_ARG (wstr, wchar_t, (width > 1024 ? 1024 : width));
913
914 c = inchar ();
915 if (__glibc_unlikely (c == EOF))
916 input_error ();
917
918#ifdef COMPILE_WSCANF
919 /* Just store the incoming wide characters. */
920 if (!(flags & SUPPRESS))
921 {
922 do
923 {
924 if ((flags & MALLOC)
925 && wstr == (wchar_t *) *strptr + strsize)
926 {
927 size_t newsize
928 = strsize + (strsize > width ? width - 1 : strsize);
929 /* Enlarge the buffer. */
930 wstr = (wchar_t *) realloc (*strptr,
931 newsize * sizeof (wchar_t));
932 if (wstr == NULL)
933 {
934 /* Can't allocate that much. Last-ditch effort. */
935 wstr = (wchar_t *) realloc (*strptr,
936 (strsize + 1)
937 * sizeof (wchar_t));
938 if (wstr == NULL)
939 {
940 /* C or lc can't have `a' flag, only `m'
941 flag. */
942 done = EOF;
943 goto errout;
944 }
945 else
946 {
947 *strptr = (char *) wstr;
948 wstr += strsize;
949 ++strsize;
950 }
951 }
952 else
953 {
954 *strptr = (char *) wstr;
955 wstr += strsize;
956 strsize = newsize;
957 }
958 }
959 *wstr++ = c;
960 }
961 while (--width > 0 && inchar () != EOF);
962 }
963 else
964 while (--width > 0 && inchar () != EOF);
965#else
966 {
967 /* We have to convert the multibyte input sequence to wide
968 characters. */
969 char buf[1];
970 mbstate_t cstate;
971
972 memset (&cstate, '\0', sizeof (cstate));
973
974 do
975 {
976 /* This is what we present the mbrtowc function first. */
977 buf[0] = c;
978
979 if (!(flags & SUPPRESS) && (flags & MALLOC)
980 && wstr == (wchar_t *) *strptr + strsize)
981 {
982 size_t newsize
983 = strsize + (strsize > width ? width - 1 : strsize);
984 /* Enlarge the buffer. */
985 wstr = (wchar_t *) realloc (ptr: *strptr,
986 size: newsize * sizeof (wchar_t));
987 if (wstr == NULL)
988 {
989 /* Can't allocate that much. Last-ditch effort. */
990 wstr = (wchar_t *) realloc (ptr: *strptr,
991 size: ((strsize + 1)
992 * sizeof (wchar_t)));
993 if (wstr == NULL)
994 {
995 /* C or lc can't have `a' flag, only `m' flag. */
996 done = EOF;
997 goto errout;
998 }
999 else
1000 {
1001 *strptr = (char *) wstr;
1002 wstr += strsize;
1003 ++strsize;
1004 }
1005 }
1006 else
1007 {
1008 *strptr = (char *) wstr;
1009 wstr += strsize;
1010 strsize = newsize;
1011 }
1012 }
1013
1014 while (1)
1015 {
1016 size_t n;
1017
1018 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1019 buf, 1, &cstate);
1020
1021 if (n == (size_t) -2)
1022 {
1023 /* Possibly correct character, just not enough
1024 input. */
1025 if (__glibc_unlikely (inchar () == EOF))
1026 encode_error ();
1027
1028 buf[0] = c;
1029 continue;
1030 }
1031
1032 if (__glibc_unlikely (n != 1))
1033 encode_error ();
1034
1035 /* We have a match. */
1036 break;
1037 }
1038
1039 /* Advance the result pointer. */
1040 ++wstr;
1041 }
1042 while (--width > 0 && inchar () != EOF);
1043 }
1044#endif
1045
1046 if (!(flags & SUPPRESS))
1047 {
1048 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1049 {
1050 wchar_t *cp = (wchar_t *) realloc (ptr: *strptr,
1051 size: ((wstr
1052 - (wchar_t *) *strptr)
1053 * sizeof (wchar_t)));
1054 if (cp != NULL)
1055 *strptr = (char *) cp;
1056 }
1057 strptr = NULL;
1058
1059 ++done;
1060 }
1061
1062 break;
1063
1064 case L_('s'): /* Read a string. */
1065 if (!(flags & LONG))
1066 {
1067 STRING_ARG (str, char, 100);
1068
1069 c = inchar ();
1070 if (__glibc_unlikely (c == EOF))
1071 input_error ();
1072
1073#ifdef COMPILE_WSCANF
1074 memset (&state, '\0', sizeof (state));
1075#endif
1076
1077 do
1078 {
1079 if (ISSPACE (c))
1080 {
1081 ungetc_not_eof (c, s);
1082 break;
1083 }
1084
1085#ifdef COMPILE_WSCANF
1086 /* This is quite complicated. We have to convert the
1087 wide characters into multibyte characters and then
1088 store them. */
1089 {
1090 size_t n;
1091
1092 if (!(flags & SUPPRESS) && (flags & MALLOC)
1093 && *strptr + strsize - str <= MB_LEN_MAX)
1094 {
1095 /* We have to enlarge the buffer if the `a' or `m'
1096 flag was given. */
1097 size_t strleng = str - *strptr;
1098 char *newstr;
1099
1100 newstr = (char *) realloc (*strptr, strsize * 2);
1101 if (newstr == NULL)
1102 {
1103 /* Can't allocate that much. Last-ditch
1104 effort. */
1105 newstr = (char *) realloc (*strptr,
1106 strleng + MB_LEN_MAX);
1107 if (newstr == NULL)
1108 {
1109 if (flags & POSIX_MALLOC)
1110 {
1111 done = EOF;
1112 goto errout;
1113 }
1114 /* We lose. Oh well. Terminate the
1115 string and stop converting,
1116 so at least we don't skip any input. */
1117 ((char *) (*strptr))[strleng] = '\0';
1118 strptr = NULL;
1119 ++done;
1120 conv_error ();
1121 }
1122 else
1123 {
1124 *strptr = newstr;
1125 str = newstr + strleng;
1126 strsize = strleng + MB_LEN_MAX;
1127 }
1128 }
1129 else
1130 {
1131 *strptr = newstr;
1132 str = newstr + strleng;
1133 strsize *= 2;
1134 }
1135 }
1136
1137 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
1138 &state);
1139 if (__glibc_unlikely (n == (size_t) -1))
1140 encode_error ();
1141
1142 assert (n <= MB_LEN_MAX);
1143 str += n;
1144 }
1145#else
1146 /* This is easy. */
1147 if (!(flags & SUPPRESS))
1148 {
1149 *str++ = c;
1150 if ((flags & MALLOC)
1151 && (char *) str == *strptr + strsize)
1152 {
1153 /* Enlarge the buffer. */
1154 str = (char *) realloc (ptr: *strptr, size: 2 * strsize);
1155 if (str == NULL)
1156 {
1157 /* Can't allocate that much. Last-ditch
1158 effort. */
1159 str = (char *) realloc (ptr: *strptr, size: strsize + 1);
1160 if (str == NULL)
1161 {
1162 if (flags & POSIX_MALLOC)
1163 {
1164 done = EOF;
1165 goto errout;
1166 }
1167 /* We lose. Oh well. Terminate the
1168 string and stop converting,
1169 so at least we don't skip any input. */
1170 ((char *) (*strptr))[strsize - 1] = '\0';
1171 strptr = NULL;
1172 ++done;
1173 conv_error ();
1174 }
1175 else
1176 {
1177 *strptr = (char *) str;
1178 str += strsize;
1179 ++strsize;
1180 }
1181 }
1182 else
1183 {
1184 *strptr = (char *) str;
1185 str += strsize;
1186 strsize *= 2;
1187 }
1188 }
1189 }
1190#endif
1191 }
1192 while ((width <= 0 || --width > 0) && inchar () != EOF);
1193
1194 if (!(flags & SUPPRESS))
1195 {
1196#ifdef COMPILE_WSCANF
1197 /* We have to emit the code to get into the initial
1198 state. */
1199 char buf[MB_LEN_MAX];
1200 size_t n = __wcrtomb (buf, L'\0', &state);
1201 if (n > 0 && (flags & MALLOC)
1202 && str + n >= *strptr + strsize)
1203 {
1204 /* Enlarge the buffer. */
1205 size_t strleng = str - *strptr;
1206 char *newstr;
1207
1208 newstr = (char *) realloc (*strptr, strleng + n + 1);
1209 if (newstr == NULL)
1210 {
1211 if (flags & POSIX_MALLOC)
1212 {
1213 done = EOF;
1214 goto errout;
1215 }
1216 /* We lose. Oh well. Terminate the string
1217 and stop converting, so at least we don't
1218 skip any input. */
1219 ((char *) (*strptr))[strleng] = '\0';
1220 strptr = NULL;
1221 ++done;
1222 conv_error ();
1223 }
1224 else
1225 {
1226 *strptr = newstr;
1227 str = newstr + strleng;
1228 strsize = strleng + n + 1;
1229 }
1230 }
1231
1232 str = __mempcpy (str, buf, n);
1233#endif
1234 *str++ = '\0';
1235
1236 if ((flags & MALLOC) && str - *strptr != strsize)
1237 {
1238 char *cp = (char *) realloc (ptr: *strptr, size: str - *strptr);
1239 if (cp != NULL)
1240 *strptr = cp;
1241 }
1242 strptr = NULL;
1243
1244 ++done;
1245 }
1246 break;
1247 }
1248 /* FALLTHROUGH */
1249
1250 case L_('S'):
1251 {
1252#ifndef COMPILE_WSCANF
1253 mbstate_t cstate;
1254#endif
1255
1256 /* Wide character string. */
1257 STRING_ARG (wstr, wchar_t, 100);
1258
1259 c = inchar ();
1260 if (__builtin_expect (c == EOF, 0))
1261 input_error ();
1262
1263#ifndef COMPILE_WSCANF
1264 memset (&cstate, '\0', sizeof (cstate));
1265#endif
1266
1267 do
1268 {
1269 if (ISSPACE (c))
1270 {
1271 ungetc_not_eof (c, s);
1272 break;
1273 }
1274
1275#ifdef COMPILE_WSCANF
1276 /* This is easy. */
1277 if (!(flags & SUPPRESS))
1278 {
1279 *wstr++ = c;
1280 if ((flags & MALLOC)
1281 && wstr == (wchar_t *) *strptr + strsize)
1282 {
1283 /* Enlarge the buffer. */
1284 wstr = (wchar_t *) realloc (*strptr,
1285 (2 * strsize)
1286 * sizeof (wchar_t));
1287 if (wstr == NULL)
1288 {
1289 /* Can't allocate that much. Last-ditch
1290 effort. */
1291 wstr = (wchar_t *) realloc (*strptr,
1292 (strsize + 1)
1293 * sizeof (wchar_t));
1294 if (wstr == NULL)
1295 {
1296 if (flags & POSIX_MALLOC)
1297 {
1298 done = EOF;
1299 goto errout;
1300 }
1301 /* We lose. Oh well. Terminate the string
1302 and stop converting, so at least we don't
1303 skip any input. */
1304 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1305 strptr = NULL;
1306 ++done;
1307 conv_error ();
1308 }
1309 else
1310 {
1311 *strptr = (char *) wstr;
1312 wstr += strsize;
1313 ++strsize;
1314 }
1315 }
1316 else
1317 {
1318 *strptr = (char *) wstr;
1319 wstr += strsize;
1320 strsize *= 2;
1321 }
1322 }
1323 }
1324#else
1325 {
1326 char buf[1];
1327
1328 buf[0] = c;
1329
1330 while (1)
1331 {
1332 size_t n;
1333
1334 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1335 buf, 1, &cstate);
1336
1337 if (n == (size_t) -2)
1338 {
1339 /* Possibly correct character, just not enough
1340 input. */
1341 if (__glibc_unlikely (inchar () == EOF))
1342 encode_error ();
1343
1344 buf[0] = c;
1345 continue;
1346 }
1347
1348 if (__glibc_unlikely (n != 1))
1349 encode_error ();
1350
1351 /* We have a match. */
1352 ++wstr;
1353 break;
1354 }
1355
1356 if (!(flags & SUPPRESS) && (flags & MALLOC)
1357 && wstr == (wchar_t *) *strptr + strsize)
1358 {
1359 /* Enlarge the buffer. */
1360 wstr = (wchar_t *) realloc (ptr: *strptr,
1361 size: (2 * strsize
1362 * sizeof (wchar_t)));
1363 if (wstr == NULL)
1364 {
1365 /* Can't allocate that much. Last-ditch effort. */
1366 wstr = (wchar_t *) realloc (ptr: *strptr,
1367 size: ((strsize + 1)
1368 * sizeof (wchar_t)));
1369 if (wstr == NULL)
1370 {
1371 if (flags & POSIX_MALLOC)
1372 {
1373 done = EOF;
1374 goto errout;
1375 }
1376 /* We lose. Oh well. Terminate the
1377 string and stop converting, so at
1378 least we don't skip any input. */
1379 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1380 strptr = NULL;
1381 ++done;
1382 conv_error ();
1383 }
1384 else
1385 {
1386 *strptr = (char *) wstr;
1387 wstr += strsize;
1388 ++strsize;
1389 }
1390 }
1391 else
1392 {
1393 *strptr = (char *) wstr;
1394 wstr += strsize;
1395 strsize *= 2;
1396 }
1397 }
1398 }
1399#endif
1400 }
1401 while ((width <= 0 || --width > 0) && inchar () != EOF);
1402
1403 if (!(flags & SUPPRESS))
1404 {
1405 *wstr++ = L'\0';
1406
1407 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1408 {
1409 wchar_t *cp = (wchar_t *) realloc (ptr: *strptr,
1410 size: ((wstr
1411 - (wchar_t *) *strptr)
1412 * sizeof (wchar_t)));
1413 if (cp != NULL)
1414 *strptr = (char *) cp;
1415 }
1416 strptr = NULL;
1417
1418 ++done;
1419 }
1420 }
1421 break;
1422
1423 case L_('x'): /* Hexadecimal integer. */
1424 case L_('X'): /* Ditto. */
1425 base = 16;
1426 goto number;
1427
1428 case L_('o'): /* Octal integer. */
1429 base = 8;
1430 goto number;
1431
1432 case L_('b'): /* Binary integer. */
1433 base = 2;
1434 goto number;
1435
1436 case L_('u'): /* Unsigned decimal integer. */
1437 base = 10;
1438 goto number;
1439
1440 case L_('d'): /* Signed decimal integer. */
1441 base = 10;
1442 flags |= NUMBER_SIGNED;
1443 goto number;
1444
1445 case L_('i'): /* Generic number. */
1446 base = 0;
1447 flags |= NUMBER_SIGNED;
1448
1449 number:
1450 c = inchar ();
1451 if (__glibc_unlikely (c == EOF))
1452 input_error ();
1453
1454 /* Check for a sign. */
1455 if (c == L_('-') || c == L_('+'))
1456 {
1457 char_buffer_add (buffer: &charbuf, ch: c);
1458 if (width > 0)
1459 --width;
1460 c = inchar ();
1461 }
1462
1463 /* Look for a leading indication of base. */
1464 if (width != 0 && c == L_('0'))
1465 {
1466 if (width > 0)
1467 --width;
1468
1469 char_buffer_add (buffer: &charbuf, ch: c);
1470 c = inchar ();
1471
1472 if (width != 0 && TOLOWER (c) == L_('x'))
1473 {
1474 if (base == 0)
1475 base = 16;
1476 if (base == 16)
1477 {
1478 if (width > 0)
1479 --width;
1480 c = inchar ();
1481 }
1482 }
1483 else if (width != 0
1484 && TOLOWER (c) == L_('b')
1485 && (base == 2
1486 || ((mode_flags & SCANF_ISOC23_BIN_CST) != 0
1487 && base == 0)))
1488 {
1489 base = 2;
1490 if (width > 0)
1491 --width;
1492 c = inchar ();
1493 }
1494 else if (base == 0)
1495 base = 8;
1496 }
1497
1498 if (base == 0)
1499 base = 10;
1500
1501 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1502 {
1503 int from_level;
1504 int to_level;
1505 int level;
1506 enum { num_digits_len = 10 };
1507#ifdef COMPILE_WSCANF
1508 const wchar_t *wcdigits[num_digits_len];
1509#else
1510 const char *mbdigits[num_digits_len];
1511#endif
1512 CHAR_T *digits_extended[num_digits_len] = { NULL };
1513
1514 /* "to_inpunct" is a map from ASCII digits to their
1515 equivalent in locale. This is defined for locales
1516 which use an extra digits set. */
1517 wctrans_t map = __wctrans (property: "to_inpunct");
1518 int n;
1519
1520 from_level = 0;
1521#ifdef COMPILE_WSCANF
1522 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1523 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1524#else
1525 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1526#endif
1527
1528 /* Get the alternative digit forms if there are any. */
1529 if (__glibc_unlikely (map != NULL))
1530 {
1531 /* Adding new level for extra digits set in locale file. */
1532 ++to_level;
1533
1534 for (n = 0; n < num_digits_len; ++n)
1535 {
1536#ifdef COMPILE_WSCANF
1537 wcdigits[n] = (const wchar_t *)
1538 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1539
1540 wchar_t *wc_extended = (wchar_t *)
1541 malloc ((to_level + 2) * sizeof (wchar_t));
1542 if (wc_extended == NULL)
1543 {
1544 done = EOF;
1545 goto digits_extended_fail;
1546 }
1547 __wmemcpy (wc_extended, wcdigits[n], to_level);
1548 wc_extended[to_level] = __towctrans (L'0' + n, map);
1549 wc_extended[to_level + 1] = '\0';
1550 digits_extended[n] = wc_extended;
1551#else
1552 mbdigits[n]
1553 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1554
1555 /* Get the equivalent wide char in map. */
1556 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1557
1558 /* Convert it to multibyte representation. */
1559 mbstate_t state;
1560 memset (&state, '\0', sizeof (state));
1561
1562 char extra_mbdigit[MB_LEN_MAX];
1563 size_t mblen
1564 = __wcrtomb (s: extra_mbdigit, wc: extra_wcdigit, ps: &state);
1565
1566 if (mblen == (size_t) -1)
1567 {
1568 /* Ignore this new level. */
1569 map = NULL;
1570 break;
1571 }
1572
1573 /* Calculate the length of mbdigits[n]. */
1574 const char *last_char = mbdigits[n];
1575 for (level = 0; level < to_level; ++level)
1576 last_char = strchr (last_char, '\0') + 1;
1577
1578 size_t mbdigits_len = last_char - mbdigits[n];
1579
1580 /* Allocate memory for extended multibyte digit. */
1581 char *mb_extended = malloc (size: mbdigits_len + mblen + 1);
1582 if (mb_extended == NULL)
1583 {
1584 done = EOF;
1585 goto digits_extended_fail;
1586 }
1587
1588 /* And get the mbdigits + extra_digit string. */
1589 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1590 mbdigits_len),
1591 extra_mbdigit, mblen) = '\0';
1592 digits_extended[n] = mb_extended;
1593#endif
1594 }
1595 }
1596
1597 /* Read the number into workspace. */
1598 while (c != EOF && width != 0)
1599 {
1600 /* In this round we get the pointer to the digit strings
1601 and also perform the first round of comparisons. */
1602 for (n = 0; n < num_digits_len; ++n)
1603 {
1604 /* Get the string for the digits with value N. */
1605#ifdef COMPILE_WSCANF
1606
1607 /* wcdigits_extended[] is fully set in the loop
1608 above, but the test for "map != NULL" is done
1609 inside the loop here and outside the loop there. */
1610 DIAG_PUSH_NEEDS_COMMENT;
1611 DIAG_IGNORE_NEEDS_COMMENT (4.7, "-Wmaybe-uninitialized");
1612
1613 if (__glibc_unlikely (map != NULL))
1614 wcdigits[n] = digits_extended[n];
1615 else
1616 wcdigits[n] = (const wchar_t *)
1617 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1618 wcdigits[n] += from_level;
1619
1620 DIAG_POP_NEEDS_COMMENT;
1621
1622 if (c == (wint_t) *wcdigits[n])
1623 {
1624 to_level = from_level;
1625 break;
1626 }
1627
1628 /* Advance the pointer to the next string. */
1629 ++wcdigits[n];
1630#else
1631 const char *cmpp;
1632 int avail = width > 0 ? width : INT_MAX;
1633
1634 if (__glibc_unlikely (map != NULL))
1635 mbdigits[n] = digits_extended[n];
1636 else
1637 mbdigits[n]
1638 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1639
1640 for (level = 0; level < from_level; level++)
1641 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1642
1643 cmpp = mbdigits[n];
1644 while ((unsigned char) *cmpp == c && avail >= 0)
1645 {
1646 if (*++cmpp == '\0')
1647 break;
1648 else
1649 {
1650 if (avail == 0 || inchar () == EOF)
1651 break;
1652 --avail;
1653 }
1654 }
1655
1656 if (*cmpp == '\0')
1657 {
1658 if (width > 0)
1659 width = avail;
1660 to_level = from_level;
1661 break;
1662 }
1663
1664 /* We are pushing all read characters back. */
1665 if (cmpp > mbdigits[n])
1666 {
1667 ungetc (c, s);
1668 while (--cmpp > mbdigits[n])
1669 ungetc_not_eof ((unsigned char) *cmpp, s);
1670 c = (unsigned char) *cmpp;
1671 }
1672
1673 /* Advance the pointer to the next string. */
1674 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1675#endif
1676 }
1677
1678 if (n == num_digits_len)
1679 {
1680 /* Have not yet found the digit. */
1681 for (level = from_level + 1; level <= to_level; ++level)
1682 {
1683 /* Search all ten digits of this level. */
1684 for (n = 0; n < num_digits_len; ++n)
1685 {
1686#ifdef COMPILE_WSCANF
1687 if (c == (wint_t) *wcdigits[n])
1688 break;
1689
1690 /* Advance the pointer to the next string. */
1691 ++wcdigits[n];
1692#else
1693 const char *cmpp;
1694 int avail = width > 0 ? width : INT_MAX;
1695
1696 cmpp = mbdigits[n];
1697 while ((unsigned char) *cmpp == c && avail >= 0)
1698 {
1699 if (*++cmpp == '\0')
1700 break;
1701 else
1702 {
1703 if (avail == 0 || inchar () == EOF)
1704 break;
1705 --avail;
1706 }
1707 }
1708
1709 if (*cmpp == '\0')
1710 {
1711 if (width > 0)
1712 width = avail;
1713 break;
1714 }
1715
1716 /* We are pushing all read characters back. */
1717 if (cmpp > mbdigits[n])
1718 {
1719 ungetc (c, s);
1720 while (--cmpp > mbdigits[n])
1721 ungetc_not_eof ((unsigned char) *cmpp, s);
1722 c = (unsigned char) *cmpp;
1723 }
1724
1725 /* Advance the pointer to the next string. */
1726 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1727#endif
1728 }
1729
1730 if (n < 10)
1731 {
1732 /* Found it. */
1733 from_level = level;
1734 to_level = level;
1735 break;
1736 }
1737 }
1738 }
1739
1740 if (n < num_digits_len)
1741 c = L_('0') + n;
1742 else if (flags & GROUP)
1743 {
1744 /* Try matching against the thousands separator. */
1745#ifdef COMPILE_WSCANF
1746 if (c != thousands)
1747 break;
1748#else
1749 const char *cmpp = thousands;
1750 int avail = width > 0 ? width : INT_MAX;
1751
1752 while ((unsigned char) *cmpp == c && avail >= 0)
1753 {
1754 char_buffer_add (buffer: &charbuf, ch: c);
1755 if (*++cmpp == '\0')
1756 break;
1757 else
1758 {
1759 if (avail == 0 || inchar () == EOF)
1760 break;
1761 --avail;
1762 }
1763 }
1764
1765 if (char_buffer_error (buffer: &charbuf))
1766 {
1767 __set_errno (ENOMEM);
1768 done = EOF;
1769 break;
1770 }
1771
1772 if (*cmpp != '\0')
1773 {
1774 /* We are pushing all read characters back. */
1775 if (cmpp > thousands)
1776 {
1777 charbuf.current -= cmpp - thousands;
1778 ungetc (c, s);
1779 while (--cmpp > thousands)
1780 ungetc_not_eof ((unsigned char) *cmpp, s);
1781 c = (unsigned char) *cmpp;
1782 }
1783 break;
1784 }
1785
1786 if (width > 0)
1787 width = avail;
1788
1789 /* The last thousands character will be added back by
1790 the char_buffer_add below. */
1791 --charbuf.current;
1792#endif
1793 }
1794 else
1795 break;
1796
1797 char_buffer_add (buffer: &charbuf, ch: c);
1798 if (width > 0)
1799 --width;
1800
1801 c = inchar ();
1802 }
1803
1804digits_extended_fail:
1805 for (n = 0; n < num_digits_len; n++)
1806 free (ptr: digits_extended[n]);
1807
1808 if (done == EOF)
1809 goto errout;
1810 }
1811 else
1812 /* Read the number into workspace. */
1813 while (c != EOF && width != 0)
1814 {
1815 if (base == 16)
1816 {
1817 if (!ISXDIGIT (c))
1818 break;
1819 }
1820 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1821 {
1822 if (base == 10 && (flags & GROUP))
1823 {
1824 /* Try matching against the thousands separator. */
1825#ifdef COMPILE_WSCANF
1826 if (c != thousands)
1827 break;
1828#else
1829 const char *cmpp = thousands;
1830 int avail = width > 0 ? width : INT_MAX;
1831
1832 while ((unsigned char) *cmpp == c && avail >= 0)
1833 {
1834 char_buffer_add (buffer: &charbuf, ch: c);
1835 if (*++cmpp == '\0')
1836 break;
1837 else
1838 {
1839 if (avail == 0 || inchar () == EOF)
1840 break;
1841 --avail;
1842 }
1843 }
1844
1845 if (char_buffer_error (buffer: &charbuf))
1846 {
1847 __set_errno (ENOMEM);
1848 done = EOF;
1849 goto errout;
1850 }
1851
1852 if (*cmpp != '\0')
1853 {
1854 /* We are pushing all read characters back. */
1855 if (cmpp > thousands)
1856 {
1857 charbuf.current -= cmpp - thousands;
1858 ungetc (c, s);
1859 while (--cmpp > thousands)
1860 ungetc_not_eof ((unsigned char) *cmpp, s);
1861 c = (unsigned char) *cmpp;
1862 }
1863 break;
1864 }
1865
1866 if (width > 0)
1867 width = avail;
1868
1869 /* The last thousands character will be added back by
1870 the char_buffer_add below. */
1871 --charbuf.current;
1872#endif
1873 }
1874 else
1875 break;
1876 }
1877 char_buffer_add (buffer: &charbuf, ch: c);
1878 if (width > 0)
1879 --width;
1880
1881 c = inchar ();
1882 }
1883
1884 if (char_buffer_error (buffer: &charbuf))
1885 {
1886 __set_errno (ENOMEM);
1887 done = EOF;
1888 goto errout;
1889 }
1890
1891 if (char_buffer_size (buffer: &charbuf) == 0
1892 || (char_buffer_size (buffer: &charbuf) == 1
1893 && (char_buffer_start (buffer: &charbuf)[0] == L_('+')
1894 || char_buffer_start (buffer: &charbuf)[0] == L_('-'))))
1895 {
1896 /* There was no number. If we are supposed to read a pointer
1897 we must recognize "(nil)" as well. */
1898 if (__builtin_expect (char_buffer_size (buffer: &charbuf) == 0
1899 && (flags & READ_POINTER)
1900 && (width < 0 || width >= 5)
1901 && c == '('
1902 && TOLOWER (inchar ()) == L_('n')
1903 && TOLOWER (inchar ()) == L_('i')
1904 && TOLOWER (inchar ()) == L_('l')
1905 && inchar () == L_(')'), 1))
1906 /* We must produce the value of a NULL pointer. A single
1907 '0' digit is enough. */
1908 char_buffer_add (buffer: &charbuf, L_('0'));
1909 else
1910 {
1911 /* The last read character is not part of the number
1912 anymore. */
1913 ungetc (c, s);
1914
1915 conv_error ();
1916 }
1917 }
1918 else
1919 /* The just read character is not part of the number anymore. */
1920 ungetc (c, s);
1921
1922 /* Convert the number. */
1923 char_buffer_add (buffer: &charbuf, L_('\0'));
1924 if (char_buffer_error (buffer: &charbuf))
1925 {
1926 __set_errno (ENOMEM);
1927 done = EOF;
1928 goto errout;
1929 }
1930 if (need_longlong && (flags & LONGDBL))
1931 {
1932 if (flags & NUMBER_SIGNED)
1933 num.q = __strtoll_internal
1934 (char_buffer_start (buffer: &charbuf), &tw, base, flags & GROUP);
1935 else
1936 num.uq = __strtoull_internal
1937 (char_buffer_start (buffer: &charbuf), &tw, base, flags & GROUP);
1938 }
1939 else
1940 {
1941 if (flags & NUMBER_SIGNED)
1942 num.l = __strtol_internal
1943 (char_buffer_start (buffer: &charbuf), &tw, base, flags & GROUP);
1944 else
1945 num.ul = __strtoul_internal
1946 (char_buffer_start (buffer: &charbuf), &tw, base, flags & GROUP);
1947 }
1948 if (__glibc_unlikely (char_buffer_start (&charbuf) == tw))
1949 conv_error ();
1950
1951 if (!(flags & SUPPRESS))
1952 {
1953 if (flags & NUMBER_SIGNED)
1954 {
1955 if (need_longlong && (flags & LONGDBL))
1956 *ARG (LONGLONG int *) = num.q;
1957 else if (need_long && (flags & LONG))
1958 *ARG (long int *) = num.l;
1959 else if (flags & SHORT)
1960 *ARG (short int *) = (short int) num.l;
1961 else if (!(flags & CHAR))
1962 *ARG (int *) = (int) num.l;
1963 else
1964 *ARG (signed char *) = (signed char) num.ul;
1965 }
1966 else
1967 {
1968 if (need_longlong && (flags & LONGDBL))
1969 *ARG (unsigned LONGLONG int *) = num.uq;
1970 else if (need_long && (flags & LONG))
1971 *ARG (unsigned long int *) = num.ul;
1972 else if (flags & SHORT)
1973 *ARG (unsigned short int *)
1974 = (unsigned short int) num.ul;
1975 else if (!(flags & CHAR))
1976 *ARG (unsigned int *) = (unsigned int) num.ul;
1977 else
1978 *ARG (unsigned char *) = (unsigned char) num.ul;
1979 }
1980 ++done;
1981 }
1982 break;
1983
1984 case L_('e'): /* Floating-point numbers. */
1985 case L_('E'):
1986 case L_('f'):
1987 case L_('F'):
1988 case L_('g'):
1989 case L_('G'):
1990 case L_('a'):
1991 case L_('A'):
1992 c = inchar ();
1993 if (width > 0)
1994 --width;
1995 if (__glibc_unlikely (c == EOF))
1996 input_error ();
1997
1998 got_digit = got_dot = got_e = got_sign = 0;
1999
2000 /* Check for a sign. */
2001 if (c == L_('-') || c == L_('+'))
2002 {
2003 got_sign = 1;
2004 char_buffer_add (buffer: &charbuf, ch: c);
2005 if (__glibc_unlikely (width == 0 || inchar () == EOF))
2006 /* EOF is only an input error before we read any chars. */
2007 conv_error ();
2008 if (width > 0)
2009 --width;
2010 }
2011
2012 /* Take care for the special arguments "nan" and "inf". */
2013 if (TOLOWER (c) == L_('n'))
2014 {
2015 /* Maybe "nan". */
2016 char_buffer_add (buffer: &charbuf, ch: c);
2017 if (__builtin_expect (width == 0
2018 || inchar () == EOF
2019 || TOLOWER (c) != L_('a'), 0))
2020 conv_error ();
2021 if (width > 0)
2022 --width;
2023 char_buffer_add (buffer: &charbuf, ch: c);
2024 if (__builtin_expect (width == 0
2025 || inchar () == EOF
2026 || TOLOWER (c) != L_('n'), 0))
2027 conv_error ();
2028 if (width > 0)
2029 --width;
2030 char_buffer_add (buffer: &charbuf, ch: c);
2031 /* It is "nan". */
2032 goto scan_float;
2033 }
2034 else if (TOLOWER (c) == L_('i'))
2035 {
2036 /* Maybe "inf" or "infinity". */
2037 char_buffer_add (buffer: &charbuf, ch: c);
2038 if (__builtin_expect (width == 0
2039 || inchar () == EOF
2040 || TOLOWER (c) != L_('n'), 0))
2041 conv_error ();
2042 if (width > 0)
2043 --width;
2044 char_buffer_add (buffer: &charbuf, ch: c);
2045 if (__builtin_expect (width == 0
2046 || inchar () == EOF
2047 || TOLOWER (c) != L_('f'), 0))
2048 conv_error ();
2049 if (width > 0)
2050 --width;
2051 char_buffer_add (buffer: &charbuf, ch: c);
2052 /* It is as least "inf". */
2053 if (width != 0 && inchar () != EOF)
2054 {
2055 if (TOLOWER (c) == L_('i'))
2056 {
2057 if (width > 0)
2058 --width;
2059 /* Now we have to read the rest as well. */
2060 char_buffer_add (buffer: &charbuf, ch: c);
2061 if (__builtin_expect (width == 0
2062 || inchar () == EOF
2063 || TOLOWER (c) != L_('n'), 0))
2064 conv_error ();
2065 if (width > 0)
2066 --width;
2067 char_buffer_add (buffer: &charbuf, ch: c);
2068 if (__builtin_expect (width == 0
2069 || inchar () == EOF
2070 || TOLOWER (c) != L_('i'), 0))
2071 conv_error ();
2072 if (width > 0)
2073 --width;
2074 char_buffer_add (buffer: &charbuf, ch: c);
2075 if (__builtin_expect (width == 0
2076 || inchar () == EOF
2077 || TOLOWER (c) != L_('t'), 0))
2078 conv_error ();
2079 if (width > 0)
2080 --width;
2081 char_buffer_add (buffer: &charbuf, ch: c);
2082 if (__builtin_expect (width == 0
2083 || inchar () == EOF
2084 || TOLOWER (c) != L_('y'), 0))
2085 conv_error ();
2086 if (width > 0)
2087 --width;
2088 char_buffer_add (buffer: &charbuf, ch: c);
2089 }
2090 else
2091 /* Never mind. */
2092 ungetc (c, s);
2093 }
2094 goto scan_float;
2095 }
2096
2097 exp_char = L_('e');
2098 if (width != 0 && c == L_('0'))
2099 {
2100 char_buffer_add (buffer: &charbuf, ch: c);
2101 c = inchar ();
2102 if (width > 0)
2103 --width;
2104 if (width != 0 && TOLOWER (c) == L_('x'))
2105 {
2106 /* It is a number in hexadecimal format. */
2107 char_buffer_add (buffer: &charbuf, ch: c);
2108
2109 flags |= HEXA_FLOAT;
2110 exp_char = L_('p');
2111
2112 /* Grouping is not allowed. */
2113 flags &= ~GROUP;
2114 c = inchar ();
2115 if (width > 0)
2116 --width;
2117 }
2118 else
2119 got_digit = 1;
2120 }
2121
2122 while (1)
2123 {
2124 if (char_buffer_error (buffer: &charbuf))
2125 {
2126 __set_errno (ENOMEM);
2127 done = EOF;
2128 goto errout;
2129 }
2130 if (ISDIGIT (c))
2131 {
2132 char_buffer_add (buffer: &charbuf, ch: c);
2133 got_digit = 1;
2134 }
2135 else if (!got_e && (flags & HEXA_FLOAT) && ISXDIGIT (c))
2136 {
2137 char_buffer_add (buffer: &charbuf, ch: c);
2138 got_digit = 1;
2139 }
2140 else if (got_e && charbuf.current[-1] == exp_char
2141 && (c == L_('-') || c == L_('+')))
2142 char_buffer_add (buffer: &charbuf, ch: c);
2143 else if (got_digit && !got_e
2144 && (CHAR_T) TOLOWER (c) == exp_char)
2145 {
2146 char_buffer_add (buffer: &charbuf, exp_char);
2147 got_e = got_dot = 1;
2148 }
2149 else
2150 {
2151#ifdef COMPILE_WSCANF
2152 if (! got_dot && c == decimal)
2153 {
2154 char_buffer_add (&charbuf, c);
2155 got_dot = 1;
2156 }
2157 else if ((flags & GROUP) != 0 && ! got_dot && c == thousands)
2158 char_buffer_add (&charbuf, c);
2159 else
2160 {
2161 /* The last read character is not part of the number
2162 anymore. */
2163 ungetc (c, s);
2164 break;
2165 }
2166#else
2167 const char *cmpp = decimal;
2168 int avail = width > 0 ? width : INT_MAX;
2169
2170 if (! got_dot)
2171 {
2172 while ((unsigned char) *cmpp == c && avail >= 0)
2173 if (*++cmpp == '\0')
2174 break;
2175 else
2176 {
2177 if (avail == 0 || inchar () == EOF)
2178 break;
2179 --avail;
2180 }
2181 }
2182
2183 if (*cmpp == '\0')
2184 {
2185 /* Add all the characters. */
2186 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
2187 char_buffer_add (buffer: &charbuf, ch: (unsigned char) *cmpp);
2188 if (width > 0)
2189 width = avail;
2190 got_dot = 1;
2191 }
2192 else
2193 {
2194 /* Figure out whether it is a thousands separator.
2195 There is one problem: we possibly read more than
2196 one character. We cannot push them back but since
2197 we know that parts of the `decimal' string matched,
2198 we can compare against it. */
2199 const char *cmp2p = thousands;
2200
2201 if ((flags & GROUP) != 0 && ! got_dot)
2202 {
2203 while (cmp2p - thousands < cmpp - decimal
2204 && *cmp2p == decimal[cmp2p - thousands])
2205 ++cmp2p;
2206 if (cmp2p - thousands == cmpp - decimal)
2207 {
2208 while ((unsigned char) *cmp2p == c && avail >= 0)
2209 if (*++cmp2p == '\0')
2210 break;
2211 else
2212 {
2213 if (avail == 0 || inchar () == EOF)
2214 break;
2215 --avail;
2216 }
2217 }
2218 }
2219
2220 if (cmp2p != NULL && *cmp2p == '\0')
2221 {
2222 /* Add all the characters. */
2223 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
2224 char_buffer_add (buffer: &charbuf, ch: (unsigned char) *cmpp);
2225 if (width > 0)
2226 width = avail;
2227 }
2228 else
2229 {
2230 /* The last read character is not part of the number
2231 anymore. */
2232 ungetc (c, s);
2233 break;
2234 }
2235 }
2236#endif
2237 }
2238
2239 if (width == 0 || inchar () == EOF)
2240 break;
2241
2242 if (width > 0)
2243 --width;
2244 }
2245
2246 if (char_buffer_error (buffer: &charbuf))
2247 {
2248 __set_errno (ENOMEM);
2249 done = EOF;
2250 goto errout;
2251 }
2252
2253 wctrans_t map;
2254 if (__builtin_expect ((flags & I18N) != 0, 0)
2255 /* Hexadecimal floats make no sense, fixing localized
2256 digits with ASCII letters. */
2257 && !(flags & HEXA_FLOAT)
2258 /* Minimum requirement. */
2259 && (char_buffer_size (buffer: &charbuf) == got_sign || got_dot)
2260 && (map = __wctrans (property: "to_inpunct")) != NULL)
2261 {
2262 /* Reget the first character. */
2263 inchar ();
2264
2265 /* Localized digits, decimal points, and thousands
2266 separator. */
2267 wint_t wcdigits[12];
2268
2269 /* First get decimal equivalent to check if we read it
2270 or not. */
2271 wcdigits[11] = __towctrans (L'.', map);
2272
2273 /* If we have not read any character or have just read
2274 locale decimal point which matches the decimal point
2275 for localized FP numbers, then we may have localized
2276 digits. Note, we test GOT_DOT above. */
2277#ifdef COMPILE_WSCANF
2278 if (char_buffer_size (&charbuf) == got_sign
2279 || (char_buffer_size (&charbuf) == got_sign + 1
2280 && wcdigits[11] == decimal))
2281#else
2282 char mbdigits[12][MB_LEN_MAX + 1];
2283
2284 mbstate_t state;
2285 memset (&state, '\0', sizeof (state));
2286
2287 bool match_so_far = char_buffer_size (buffer: &charbuf) == got_sign;
2288 size_t mblen = __wcrtomb (s: mbdigits[11], wc: wcdigits[11], ps: &state);
2289 if (mblen != (size_t) -1)
2290 {
2291 mbdigits[11][mblen] = '\0';
2292 match_so_far |=
2293 (char_buffer_size (buffer: &charbuf) == strlen (decimal) + got_sign
2294 && strcmp (decimal, mbdigits[11]) == 0);
2295 }
2296 else
2297 {
2298 size_t decimal_len = strlen (decimal);
2299 /* This should always be the case but the data comes
2300 from a file. */
2301 if (decimal_len <= MB_LEN_MAX)
2302 {
2303 match_so_far |= (char_buffer_size (buffer: &charbuf)
2304 == decimal_len + got_sign);
2305 memcpy (mbdigits[11], decimal, decimal_len + 1);
2306 }
2307 else
2308 match_so_far = false;
2309 }
2310
2311 if (match_so_far)
2312#endif
2313 {
2314 bool have_locthousands = (flags & GROUP) != 0;
2315
2316 /* Now get the digits and the thousands-sep equivalents. */
2317 for (int n = 0; n < 11; ++n)
2318 {
2319 if (n < 10)
2320 wcdigits[n] = __towctrans (L'0' + n, map);
2321 else if (n == 10)
2322 {
2323 wcdigits[10] = __towctrans (L',', map);
2324 have_locthousands &= wcdigits[10] != L'\0';
2325 }
2326
2327#ifndef COMPILE_WSCANF
2328 memset (&state, '\0', sizeof (state));
2329
2330 size_t mblen = __wcrtomb (s: mbdigits[n], wc: wcdigits[n],
2331 ps: &state);
2332 if (mblen == (size_t) -1)
2333 {
2334 if (n == 10)
2335 {
2336 if (have_locthousands)
2337 {
2338 size_t thousands_len = strlen (thousands);
2339 if (thousands_len <= MB_LEN_MAX)
2340 memcpy (mbdigits[10], thousands,
2341 thousands_len + 1);
2342 else
2343 have_locthousands = false;
2344 }
2345 }
2346 else
2347 /* Ignore checking against localized digits. */
2348 goto no_i18nflt;
2349 }
2350 else
2351 mbdigits[n][mblen] = '\0';
2352#endif
2353 }
2354
2355 /* Start checking against localized digits, if
2356 conversion is done correctly. */
2357 while (1)
2358 {
2359 if (char_buffer_error (buffer: &charbuf))
2360 {
2361 __set_errno (ENOMEM);
2362 done = EOF;
2363 goto errout;
2364 }
2365 if (got_e && charbuf.current[-1] == exp_char
2366 && (c == L_('-') || c == L_('+')))
2367 char_buffer_add (buffer: &charbuf, ch: c);
2368 else if (char_buffer_size (buffer: &charbuf) > got_sign && !got_e
2369 && (CHAR_T) TOLOWER (c) == exp_char)
2370 {
2371 char_buffer_add (buffer: &charbuf, exp_char);
2372 got_e = got_dot = 1;
2373 }
2374 else
2375 {
2376 /* Check against localized digits, decimal point,
2377 and thousands separator. */
2378 int n;
2379 for (n = 0; n < 12; ++n)
2380 {
2381#ifdef COMPILE_WSCANF
2382 if (c == wcdigits[n])
2383 {
2384 if (n < 10)
2385 char_buffer_add (&charbuf, L_('0') + n);
2386 else if (n == 11 && !got_dot)
2387 {
2388 char_buffer_add (&charbuf, decimal);
2389 got_dot = 1;
2390 }
2391 else if (n == 10 && have_locthousands
2392 && ! got_dot)
2393 char_buffer_add (&charbuf, thousands);
2394 else
2395 /* The last read character is not part
2396 of the number anymore. */
2397 n = 12;
2398
2399 break;
2400 }
2401#else
2402 const char *cmpp = mbdigits[n];
2403 int avail = width > 0 ? width : INT_MAX;
2404
2405 while ((unsigned char) *cmpp == c && avail >= 0)
2406 if (*++cmpp == '\0')
2407 break;
2408 else
2409 {
2410 if (avail == 0 || inchar () == EOF)
2411 break;
2412 --avail;
2413 }
2414 if (*cmpp == '\0')
2415 {
2416 if (width > 0)
2417 width = avail;
2418
2419 if (n < 10)
2420 char_buffer_add (buffer: &charbuf, L_('0') + n);
2421 else if (n == 11 && !got_dot)
2422 {
2423 /* Add all the characters. */
2424 for (cmpp = decimal; *cmpp != '\0';
2425 ++cmpp)
2426 char_buffer_add (buffer: &charbuf,
2427 ch: (unsigned char) *cmpp);
2428
2429 got_dot = 1;
2430 }
2431 else if (n == 10 && (flags & GROUP) != 0
2432 && ! got_dot)
2433 {
2434 /* Add all the characters. */
2435 for (cmpp = thousands; *cmpp != '\0';
2436 ++cmpp)
2437 char_buffer_add (buffer: &charbuf,
2438 ch: (unsigned char) *cmpp);
2439 }
2440 else
2441 /* The last read character is not part
2442 of the number anymore. */
2443 n = 12;
2444
2445 break;
2446 }
2447
2448 /* We are pushing all read characters back. */
2449 if (cmpp > mbdigits[n])
2450 {
2451 ungetc (c, s);
2452 while (--cmpp > mbdigits[n])
2453 ungetc_not_eof ((unsigned char) *cmpp, s);
2454 c = (unsigned char) *cmpp;
2455 }
2456#endif
2457 }
2458
2459 if (n >= 12)
2460 {
2461 /* The last read character is not part
2462 of the number anymore. */
2463 ungetc (c, s);
2464 break;
2465 }
2466 }
2467
2468 if (width == 0 || inchar () == EOF)
2469 break;
2470
2471 if (width > 0)
2472 --width;
2473 }
2474 }
2475
2476#ifndef COMPILE_WSCANF
2477 no_i18nflt:
2478 ;
2479#endif
2480 }
2481
2482 if (char_buffer_error (buffer: &charbuf))
2483 {
2484 __set_errno (ENOMEM);
2485 done = EOF;
2486 goto errout;
2487 }
2488
2489 /* Have we read any character? If we try to read a number
2490 in hexadecimal notation and we have read only the `0x'
2491 prefix this is an error. */
2492 if (__glibc_unlikely (char_buffer_size (&charbuf) == got_sign
2493 || ((flags & HEXA_FLOAT)
2494 && (char_buffer_size (&charbuf)
2495 == 2 + got_sign))))
2496 conv_error ();
2497
2498 scan_float:
2499 /* Convert the number. */
2500 char_buffer_add (buffer: &charbuf, L_('\0'));
2501 if (char_buffer_error (buffer: &charbuf))
2502 {
2503 __set_errno (ENOMEM);
2504 done = EOF;
2505 goto errout;
2506 }
2507#if __HAVE_FLOAT128_UNLIKE_LDBL
2508 if ((flags & LONGDBL) \
2509 && (mode_flags & SCANF_LDBL_USES_FLOAT128) != 0)
2510 {
2511 _Float128 d = __strtof128_internal
2512 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2513 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2514 *ARG (_Float128 *) = d;
2515 }
2516 else
2517#endif
2518 if ((flags & LONGDBL) \
2519 && __glibc_likely ((mode_flags & SCANF_LDBL_IS_DBL) == 0))
2520 {
2521 long double d = __strtold_internal
2522 (char_buffer_start (buffer: &charbuf), &tw, flags & GROUP);
2523 if (!(flags & SUPPRESS) && tw != char_buffer_start (buffer: &charbuf))
2524 *ARG (long double *) = d;
2525 }
2526 else if (flags & (LONG | LONGDBL))
2527 {
2528 double d = __strtod_internal
2529 (char_buffer_start (buffer: &charbuf), &tw, flags & GROUP);
2530 if (!(flags & SUPPRESS) && tw != char_buffer_start (buffer: &charbuf))
2531 *ARG (double *) = d;
2532 }
2533 else
2534 {
2535 float d = __strtof_internal
2536 (char_buffer_start (buffer: &charbuf), &tw, flags & GROUP);
2537 if (!(flags & SUPPRESS) && tw != char_buffer_start (buffer: &charbuf))
2538 *ARG (float *) = d;
2539 }
2540
2541 if (__glibc_unlikely (tw == char_buffer_start (&charbuf)))
2542 conv_error ();
2543
2544 if (!(flags & SUPPRESS))
2545 ++done;
2546 break;
2547
2548 case L_('['): /* Character class. */
2549 if (flags & LONG)
2550 STRING_ARG (wstr, wchar_t, 100);
2551 else
2552 STRING_ARG (str, char, 100);
2553
2554 if (*f == L_('^'))
2555 {
2556 ++f;
2557 not_in = 1;
2558 }
2559 else
2560 not_in = 0;
2561
2562
2563#ifdef COMPILE_WSCANF
2564 /* Find the beginning and the end of the scanlist. We are not
2565 creating a lookup table since it would have to be too large.
2566 Instead we search each time through the string. This is not
2567 a constant lookup time but who uses this feature deserves to
2568 be punished. */
2569 tw = (wchar_t *) f; /* Marks the beginning. */
2570
2571 if (*f == L']')
2572 ++f;
2573
2574 while ((fc = *f++) != L'\0' && fc != L']');
2575
2576 if (__glibc_unlikely (fc == L'\0'))
2577 conv_error ();
2578 wchar_t *twend = (wchar_t *) f - 1;
2579#else
2580 /* Fill WP with byte flags indexed by character.
2581 We will use this flag map for matching input characters. */
2582 if (!scratch_buffer_set_array_size
2583 (buffer: &charbuf.scratch, UCHAR_MAX + 1, size: 1))
2584 {
2585 done = EOF;
2586 goto errout;
2587 }
2588 memset (charbuf.scratch.data, '\0', UCHAR_MAX + 1);
2589
2590 fc = *f;
2591 if (fc == ']' || fc == '-')
2592 {
2593 /* If ] or - appears before any char in the set, it is not
2594 the terminator or separator, but the first char in the
2595 set. */
2596 ((char *)charbuf.scratch.data)[fc] = 1;
2597 ++f;
2598 }
2599
2600 while ((fc = *f++) != '\0' && fc != ']')
2601 if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f)
2602 {
2603 /* Add all characters from the one before the '-'
2604 up to (but not including) the next format char. */
2605 for (fc = f[-2]; fc < *f; ++fc)
2606 ((char *)charbuf.scratch.data)[fc] = 1;
2607 }
2608 else
2609 /* Add the character to the flag map. */
2610 ((char *)charbuf.scratch.data)[fc] = 1;
2611
2612 if (__glibc_unlikely (fc == '\0'))
2613 conv_error();
2614#endif
2615
2616 if (flags & LONG)
2617 {
2618 size_t now = read_in;
2619#ifdef COMPILE_WSCANF
2620 if (__glibc_unlikely (inchar () == WEOF))
2621 input_error ();
2622
2623 do
2624 {
2625 wchar_t *runp;
2626
2627 /* Test whether it's in the scanlist. */
2628 runp = tw;
2629 while (runp < twend)
2630 {
2631 if (runp[0] == L'-' && runp[1] != '\0'
2632 && runp + 1 != twend
2633 && runp != tw
2634 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2635 {
2636 /* Match against all characters in between the
2637 first and last character of the sequence. */
2638 wchar_t wc;
2639
2640 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2641 if ((wint_t) wc == c)
2642 break;
2643
2644 if (wc <= runp[1] && !not_in)
2645 break;
2646 if (wc <= runp[1] && not_in)
2647 {
2648 /* The current character is not in the
2649 scanset. */
2650 ungetc (c, s);
2651 goto out;
2652 }
2653
2654 runp += 2;
2655 }
2656 else
2657 {
2658 if ((wint_t) *runp == c && !not_in)
2659 break;
2660 if ((wint_t) *runp == c && not_in)
2661 {
2662 ungetc (c, s);
2663 goto out;
2664 }
2665
2666 ++runp;
2667 }
2668 }
2669
2670 if (runp == twend && !not_in)
2671 {
2672 ungetc (c, s);
2673 goto out;
2674 }
2675
2676 if (!(flags & SUPPRESS))
2677 {
2678 *wstr++ = c;
2679
2680 if ((flags & MALLOC)
2681 && wstr == (wchar_t *) *strptr + strsize)
2682 {
2683 /* Enlarge the buffer. */
2684 wstr = (wchar_t *) realloc (*strptr,
2685 (2 * strsize)
2686 * sizeof (wchar_t));
2687 if (wstr == NULL)
2688 {
2689 /* Can't allocate that much. Last-ditch
2690 effort. */
2691 wstr = (wchar_t *)
2692 realloc (*strptr, (strsize + 1)
2693 * sizeof (wchar_t));
2694 if (wstr == NULL)
2695 {
2696 if (flags & POSIX_MALLOC)
2697 {
2698 done = EOF;
2699 goto errout;
2700 }
2701 /* We lose. Oh well. Terminate the string
2702 and stop converting, so at least we don't
2703 skip any input. */
2704 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2705 strptr = NULL;
2706 ++done;
2707 conv_error ();
2708 }
2709 else
2710 {
2711 *strptr = (char *) wstr;
2712 wstr += strsize;
2713 ++strsize;
2714 }
2715 }
2716 else
2717 {
2718 *strptr = (char *) wstr;
2719 wstr += strsize;
2720 strsize *= 2;
2721 }
2722 }
2723 }
2724 }
2725 while ((width < 0 || --width > 0) && inchar () != WEOF);
2726 out:
2727#else
2728 char buf[MB_LEN_MAX];
2729 size_t cnt = 0;
2730 mbstate_t cstate;
2731
2732 if (__glibc_unlikely (inchar () == EOF))
2733 input_error ();
2734
2735 memset (&cstate, '\0', sizeof (cstate));
2736
2737 do
2738 {
2739 if (((char *) charbuf.scratch.data)[c] == not_in)
2740 {
2741 ungetc_not_eof (c, s);
2742 break;
2743 }
2744
2745 /* This is easy. */
2746 if (!(flags & SUPPRESS))
2747 {
2748 size_t n;
2749
2750 /* Convert it into a wide character. */
2751 buf[0] = c;
2752 n = __mbrtowc (wstr, buf, 1, &cstate);
2753
2754 if (n == (size_t) -2)
2755 {
2756 /* Possibly correct character, just not enough
2757 input. */
2758 ++cnt;
2759 assert (cnt < MB_LEN_MAX);
2760 continue;
2761 }
2762 cnt = 0;
2763
2764 ++wstr;
2765 if ((flags & MALLOC)
2766 && wstr == (wchar_t *) *strptr + strsize)
2767 {
2768 /* Enlarge the buffer. */
2769 wstr = (wchar_t *) realloc (ptr: *strptr,
2770 size: (2 * strsize
2771 * sizeof (wchar_t)));
2772 if (wstr == NULL)
2773 {
2774 /* Can't allocate that much. Last-ditch
2775 effort. */
2776 wstr = (wchar_t *)
2777 realloc (ptr: *strptr, size: ((strsize + 1)
2778 * sizeof (wchar_t)));
2779 if (wstr == NULL)
2780 {
2781 if (flags & POSIX_MALLOC)
2782 {
2783 done = EOF;
2784 goto errout;
2785 }
2786 /* We lose. Oh well. Terminate the
2787 string and stop converting,
2788 so at least we don't skip any input. */
2789 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2790 strptr = NULL;
2791 ++done;
2792 conv_error ();
2793 }
2794 else
2795 {
2796 *strptr = (char *) wstr;
2797 wstr += strsize;
2798 ++strsize;
2799 }
2800 }
2801 else
2802 {
2803 *strptr = (char *) wstr;
2804 wstr += strsize;
2805 strsize *= 2;
2806 }
2807 }
2808 }
2809
2810 if (width >= 0 && --width <= 0)
2811 break;
2812 }
2813 while (inchar () != EOF);
2814
2815 if (__glibc_unlikely (cnt != 0))
2816 /* We stopped in the middle of recognizing another
2817 character. That's a problem. */
2818 encode_error ();
2819#endif
2820
2821 if (__glibc_unlikely (now == read_in))
2822 /* We haven't successfully read any character. */
2823 conv_error ();
2824
2825 if (!(flags & SUPPRESS))
2826 {
2827 *wstr++ = L'\0';
2828
2829 if ((flags & MALLOC)
2830 && wstr - (wchar_t *) *strptr != strsize)
2831 {
2832 wchar_t *cp = (wchar_t *)
2833 realloc (ptr: *strptr, size: ((wstr - (wchar_t *) *strptr)
2834 * sizeof (wchar_t)));
2835 if (cp != NULL)
2836 *strptr = (char *) cp;
2837 }
2838 strptr = NULL;
2839
2840 ++done;
2841 }
2842 }
2843 else
2844 {
2845 size_t now = read_in;
2846
2847 if (__glibc_unlikely (inchar () == EOF))
2848 input_error ();
2849
2850#ifdef COMPILE_WSCANF
2851
2852 memset (&state, '\0', sizeof (state));
2853
2854 do
2855 {
2856 wchar_t *runp;
2857 size_t n;
2858
2859 /* Test whether it's in the scanlist. */
2860 runp = tw;
2861 while (runp < twend)
2862 {
2863 if (runp[0] == L'-' && runp[1] != '\0'
2864 && runp + 1 != twend
2865 && runp != tw
2866 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2867 {
2868 /* Match against all characters in between the
2869 first and last character of the sequence. */
2870 wchar_t wc;
2871
2872 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2873 if ((wint_t) wc == c)
2874 break;
2875
2876 if (wc <= runp[1] && !not_in)
2877 break;
2878 if (wc <= runp[1] && not_in)
2879 {
2880 /* The current character is not in the
2881 scanset. */
2882 ungetc (c, s);
2883 goto out2;
2884 }
2885
2886 runp += 2;
2887 }
2888 else
2889 {
2890 if ((wint_t) *runp == c && !not_in)
2891 break;
2892 if ((wint_t) *runp == c && not_in)
2893 {
2894 ungetc (c, s);
2895 goto out2;
2896 }
2897
2898 ++runp;
2899 }
2900 }
2901
2902 if (runp == twend && !not_in)
2903 {
2904 ungetc (c, s);
2905 goto out2;
2906 }
2907
2908 if (!(flags & SUPPRESS))
2909 {
2910 if ((flags & MALLOC)
2911 && *strptr + strsize - str <= MB_LEN_MAX)
2912 {
2913 /* Enlarge the buffer. */
2914 size_t strleng = str - *strptr;
2915 char *newstr;
2916
2917 newstr = (char *) realloc (*strptr, 2 * strsize);
2918 if (newstr == NULL)
2919 {
2920 /* Can't allocate that much. Last-ditch
2921 effort. */
2922 newstr = (char *) realloc (*strptr,
2923 strleng + MB_LEN_MAX);
2924 if (newstr == NULL)
2925 {
2926 if (flags & POSIX_MALLOC)
2927 {
2928 done = EOF;
2929 goto errout;
2930 }
2931 /* We lose. Oh well. Terminate the string
2932 and stop converting, so at least we don't
2933 skip any input. */
2934 ((char *) (*strptr))[strleng] = '\0';
2935 strptr = NULL;
2936 ++done;
2937 conv_error ();
2938 }
2939 else
2940 {
2941 *strptr = newstr;
2942 str = newstr + strleng;
2943 strsize = strleng + MB_LEN_MAX;
2944 }
2945 }
2946 else
2947 {
2948 *strptr = newstr;
2949 str = newstr + strleng;
2950 strsize *= 2;
2951 }
2952 }
2953 }
2954
2955 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2956 if (__glibc_unlikely (n == (size_t) -1))
2957 encode_error ();
2958
2959 assert (n <= MB_LEN_MAX);
2960 str += n;
2961 }
2962 while ((width < 0 || --width > 0) && inchar () != WEOF);
2963 out2:
2964#else
2965 do
2966 {
2967 if (((char *) charbuf.scratch.data)[c] == not_in)
2968 {
2969 ungetc_not_eof (c, s);
2970 break;
2971 }
2972
2973 /* This is easy. */
2974 if (!(flags & SUPPRESS))
2975 {
2976 *str++ = c;
2977 if ((flags & MALLOC)
2978 && (char *) str == *strptr + strsize)
2979 {
2980 /* Enlarge the buffer. */
2981 size_t newsize = 2 * strsize;
2982
2983 allocagain:
2984 str = (char *) realloc (ptr: *strptr, size: newsize);
2985 if (str == NULL)
2986 {
2987 /* Can't allocate that much. Last-ditch
2988 effort. */
2989 if (newsize > strsize + 1)
2990 {
2991 newsize = strsize + 1;
2992 goto allocagain;
2993 }
2994 if (flags & POSIX_MALLOC)
2995 {
2996 done = EOF;
2997 goto errout;
2998 }
2999 /* We lose. Oh well. Terminate the
3000 string and stop converting,
3001 so at least we don't skip any input. */
3002 ((char *) (*strptr))[strsize - 1] = '\0';
3003 strptr = NULL;
3004 ++done;
3005 conv_error ();
3006 }
3007 else
3008 {
3009 *strptr = (char *) str;
3010 str += strsize;
3011 strsize = newsize;
3012 }
3013 }
3014 }
3015 }
3016 while ((width < 0 || --width > 0) && inchar () != EOF);
3017#endif
3018
3019 if (__glibc_unlikely (now == read_in))
3020 /* We haven't successfully read any character. */
3021 conv_error ();
3022
3023 if (!(flags & SUPPRESS))
3024 {
3025#ifdef COMPILE_WSCANF
3026 /* We have to emit the code to get into the initial
3027 state. */
3028 char buf[MB_LEN_MAX];
3029 size_t n = __wcrtomb (buf, L'\0', &state);
3030 if (n > 0 && (flags & MALLOC)
3031 && str + n >= *strptr + strsize)
3032 {
3033 /* Enlarge the buffer. */
3034 size_t strleng = str - *strptr;
3035 char *newstr;
3036
3037 newstr = (char *) realloc (*strptr, strleng + n + 1);
3038 if (newstr == NULL)
3039 {
3040 if (flags & POSIX_MALLOC)
3041 {
3042 done = EOF;
3043 goto errout;
3044 }
3045 /* We lose. Oh well. Terminate the string
3046 and stop converting, so at least we don't
3047 skip any input. */
3048 ((char *) (*strptr))[strleng] = '\0';
3049 strptr = NULL;
3050 ++done;
3051 conv_error ();
3052 }
3053 else
3054 {
3055 *strptr = newstr;
3056 str = newstr + strleng;
3057 strsize = strleng + n + 1;
3058 }
3059 }
3060
3061 str = __mempcpy (str, buf, n);
3062#endif
3063 *str++ = '\0';
3064
3065 if ((flags & MALLOC) && str - *strptr != strsize)
3066 {
3067 char *cp = (char *) realloc (ptr: *strptr, size: str - *strptr);
3068 if (cp != NULL)
3069 *strptr = cp;
3070 }
3071 strptr = NULL;
3072
3073 ++done;
3074 }
3075 }
3076 break;
3077
3078 case L_('p'): /* Generic pointer. */
3079 base = 16;
3080 /* A PTR must be the same size as a `long int'. */
3081 flags &= ~(SHORT|LONGDBL);
3082 if (need_long)
3083 flags |= LONG;
3084 flags |= READ_POINTER;
3085 goto number;
3086
3087 default:
3088 /* If this is an unknown format character punt. */
3089 conv_error ();
3090 }
3091 }
3092
3093 /* The last thing we saw int the format string was a white space.
3094 Consume the last white spaces. */
3095 if (skip_space)
3096 {
3097 do
3098 c = inchar ();
3099 while (ISSPACE (c));
3100 ungetc (c, s);
3101 }
3102
3103 errout:
3104 /* Unlock stream. */
3105 UNLOCK_STREAM (s);
3106
3107 scratch_buffer_free (buffer: &charbuf.scratch);
3108
3109 if (__glibc_unlikely (done == EOF))
3110 {
3111 if (__glibc_unlikely (ptrs_to_free != NULL))
3112 {
3113 struct ptrs_to_free *p = ptrs_to_free;
3114 while (p != NULL)
3115 {
3116 for (size_t cnt = 0; cnt < p->count; ++cnt)
3117 {
3118 free (ptr: *p->ptrs[cnt]);
3119 *p->ptrs[cnt] = NULL;
3120 }
3121 p = p->next;
3122 ptrs_to_free = p;
3123 }
3124 }
3125 }
3126 else if (__glibc_unlikely (strptr != NULL))
3127 {
3128 free (ptr: *strptr);
3129 *strptr = NULL;
3130 }
3131 return done;
3132}
3133

source code of glibc/stdio-common/vfscanf-internal.c