1 | /* Test compilation of truncated regular expressions. |
2 | Copyright (C) 2018-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* This test constructs various patterns in an attempt to trigger |
20 | over-reading the regular expression compiler, such as bug |
21 | 23578. */ |
22 | |
23 | #include <array_length.h> |
24 | #include <errno.h> |
25 | #include <locale.h> |
26 | #include <regex.h> |
27 | #include <stdio.h> |
28 | #include <stdlib.h> |
29 | #include <string.h> |
30 | #include <support/check.h> |
31 | #include <support/next_to_fault.h> |
32 | #include <support/support.h> |
33 | #include <support/test-driver.h> |
34 | #include <wchar.h> |
35 | |
36 | /* Locales to test. */ |
37 | static const char locales[][17] = |
38 | { |
39 | "C" , |
40 | "C.UTF-8" , |
41 | "en_US.UTF-8" , |
42 | "de_DE.ISO-8859-1" , |
43 | }; |
44 | |
45 | /* Syntax options. Will be combined with other flags. */ |
46 | static const reg_syntax_t syntaxes[] = |
47 | { |
48 | RE_SYNTAX_EMACS, |
49 | RE_SYNTAX_AWK, |
50 | RE_SYNTAX_GNU_AWK, |
51 | RE_SYNTAX_POSIX_AWK, |
52 | RE_SYNTAX_GREP, |
53 | RE_SYNTAX_EGREP, |
54 | RE_SYNTAX_POSIX_EGREP, |
55 | RE_SYNTAX_POSIX_BASIC, |
56 | RE_SYNTAX_POSIX_EXTENDED, |
57 | RE_SYNTAX_POSIX_MINIMAL_EXTENDED, |
58 | }; |
59 | |
60 | /* Trailing characters placed after the initial character. */ |
61 | static const char trailing_strings[][4] = |
62 | { |
63 | "" , |
64 | "[" , |
65 | "\\" , |
66 | "[\\" , |
67 | "(" , |
68 | "(\\" , |
69 | "\\(" , |
70 | }; |
71 | |
72 | static int |
73 | do_test (void) |
74 | { |
75 | /* Staging buffer for the constructed regular expression. */ |
76 | char buffer[16]; |
77 | |
78 | /* Allocation used to detect over-reading by the regular expression |
79 | compiler. */ |
80 | struct support_next_to_fault ntf |
81 | = support_next_to_fault_allocate (size: sizeof (buffer)); |
82 | |
83 | /* Arbitrary Unicode codepoint at which we stop generating |
84 | characters. We do not probe the whole range because that would |
85 | take too long due to combinatorical exploision as the result of |
86 | combination with other flags. */ |
87 | static const wchar_t last_character = 0xfff; |
88 | |
89 | for (size_t locale_idx = 0; locale_idx < array_length (locales); |
90 | ++ locale_idx) |
91 | { |
92 | if (setlocale (LC_ALL, locales[locale_idx]) == NULL) |
93 | { |
94 | support_record_failure (); |
95 | printf (format: "error: setlocale (\"%s\"): %m" , locales[locale_idx]); |
96 | continue; |
97 | } |
98 | if (test_verbose > 0) |
99 | printf (format: "info: testing locale \"%s\"\n" , locales[locale_idx]); |
100 | |
101 | for (wchar_t wc = 0; wc <= last_character; ++wc) |
102 | { |
103 | char *after_wc; |
104 | if (wc == 0) |
105 | { |
106 | /* wcrtomb treats L'\0' in a special way. */ |
107 | *buffer = '\0'; |
108 | after_wc = &buffer[1]; |
109 | } |
110 | else |
111 | { |
112 | mbstate_t ps = { }; |
113 | size_t ret = wcrtomb (buffer, wc, &ps); |
114 | if (ret == (size_t) -1) |
115 | { |
116 | /* EILSEQ means that the target character set |
117 | cannot encode the character. */ |
118 | if (errno != EILSEQ) |
119 | { |
120 | support_record_failure (); |
121 | printf (format: "error: wcrtomb (0x%x) failed: %m\n" , |
122 | (unsigned) wc); |
123 | } |
124 | continue; |
125 | } |
126 | TEST_VERIFY_EXIT (ret != 0); |
127 | after_wc = &buffer[ret]; |
128 | } |
129 | |
130 | for (size_t trailing_idx = 0; |
131 | trailing_idx < array_length (trailing_strings); |
132 | ++trailing_idx) |
133 | { |
134 | char *after_trailing |
135 | = stpcpy (after_wc, trailing_strings[trailing_idx]); |
136 | |
137 | for (int do_nul = 0; do_nul < 2; ++do_nul) |
138 | { |
139 | char *after_nul; |
140 | if (do_nul) |
141 | { |
142 | *after_trailing = '\0'; |
143 | after_nul = &after_trailing[1]; |
144 | } |
145 | else |
146 | after_nul = after_trailing; |
147 | |
148 | size_t length = after_nul - buffer; |
149 | |
150 | /* Make sure that the faulting region starts |
151 | after the used portion of the buffer. */ |
152 | char *ntf_start = ntf.buffer + sizeof (buffer) - length; |
153 | memcpy (ntf_start, buffer, length); |
154 | |
155 | for (const reg_syntax_t *psyntax = syntaxes; |
156 | psyntax < array_end (syntaxes); ++psyntax) |
157 | for (int do_icase = 0; do_icase < 2; ++do_icase) |
158 | { |
159 | re_syntax_options = *psyntax; |
160 | if (do_icase) |
161 | re_syntax_options |= RE_ICASE; |
162 | |
163 | regex_t reg; |
164 | memset (®, 0, sizeof (reg)); |
165 | const char *msg = re_compile_pattern |
166 | (pattern: ntf_start, length: length, buffer: ®); |
167 | if (msg != NULL) |
168 | { |
169 | if (test_verbose > 0) |
170 | { |
171 | char *quoted = support_quote_blob |
172 | (blob: buffer, length); |
173 | printf (format: "info: compilation failed for pattern" |
174 | " \"%s\", syntax 0x%lx: %s\n" , |
175 | quoted, re_syntax_options, msg); |
176 | free (ptr: quoted); |
177 | } |
178 | } |
179 | else |
180 | regfree (preg: ®); |
181 | } |
182 | } |
183 | } |
184 | } |
185 | } |
186 | |
187 | support_next_to_fault_free (&ntf); |
188 | |
189 | return 0; |
190 | } |
191 | |
192 | #include <support/test-driver.c> |
193 | |