1 | /* Regular expression tests. |
2 | Copyright (C) 2002-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sys/types.h> |
20 | #include <mcheck.h> |
21 | #include <regex.h> |
22 | #include <stdio.h> |
23 | #include <stdlib.h> |
24 | |
25 | /* Tests supposed to match. */ |
26 | struct |
27 | { |
28 | const char *pattern; |
29 | const char *string; |
30 | int flags, nmatch; |
31 | regmatch_t rm[5]; |
32 | } tests[] = { |
33 | /* Test for newline handling in regex. */ |
34 | { "[^~]*~" , "\nx~y" , 0, 2, { { 0, 3 }, { -1, -1 } } }, |
35 | /* Other tests. */ |
36 | { "a(.*)b" , "a b" , REG_EXTENDED, 2, { { 0, 3 }, { 1, 2 } } }, |
37 | { ".*|\\([KIO]\\)\\([^|]*\\).*|?[KIO]" , "10~.~|P|K0|I10|O16|?KSb" , 0, 3, |
38 | { { 0, 21 }, { 15, 16 }, { 16, 18 } } }, |
39 | { ".*|\\([KIO]\\)\\([^|]*\\).*|?\\1" , "10~.~|P|K0|I10|O16|?KSb" , 0, 3, |
40 | { { 0, 21 }, { 8, 9 }, { 9, 10 } } }, |
41 | { "^\\(a*\\)\\1\\{9\\}\\(a\\{0,9\\}\\)\\([0-9]*;.*[^a]\\2\\([0-9]\\)\\)" , |
42 | "a1;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9aa2aa1a0" , 0, |
43 | 5, { { 0, 67 }, { 0, 0 }, { 0, 1 }, { 1, 67 }, { 66, 67 } } }, |
44 | /* Test for BRE expression anchoring. POSIX says just that this may match; |
45 | in glibc regex it always matched, so avoid changing it. */ |
46 | { "\\(^\\|foo\\)bar" , "bar" , 0, 2, { { 0, 3 }, { -1, -1 } } }, |
47 | { "\\(foo\\|^\\)bar" , "bar" , 0, 2, { { 0, 3 }, { -1, -1 } } }, |
48 | /* In ERE this must be treated as an anchor. */ |
49 | { "(^|foo)bar" , "bar" , REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, |
50 | { "(foo|^)bar" , "bar" , REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, |
51 | /* Here ^ cannot be treated as an anchor according to POSIX. */ |
52 | { "(^|foo)bar" , "(^|foo)bar" , 0, 2, { { 0, 10 }, { -1, -1 } } }, |
53 | { "(foo|^)bar" , "(foo|^)bar" , 0, 2, { { 0, 10 }, { -1, -1 } } }, |
54 | /* More tests on backreferences. */ |
55 | { "()\\1" , "x" , REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, |
56 | { "()x\\1" , "x" , REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } }, |
57 | { "()\\1*\\1*" , "" , REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, |
58 | { "([0-9]).*\\1(a*)" , "7;7a6" , REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, |
59 | { "([0-9]).*\\1(a*)" , "7;7a" , REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, |
60 | { "(b)()c\\1" , "bcb" , REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } }, |
61 | { "()(b)c\\2" , "bcb" , REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, |
62 | { "a(b)()c\\1" , "abcb" , REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } }, |
63 | { "a()(b)c\\2" , "abcb" , REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, |
64 | { "()(b)\\1c\\2" , "bcb" , REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, |
65 | { "(b())\\2\\1" , "bbbb" , REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } }, |
66 | { "a()(b)\\1c\\2" , "abcb" , REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, |
67 | { "a()d(b)\\1c\\2" , "adbcb" , REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } }, |
68 | { "a(b())\\2\\1" , "abbbb" , REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } }, |
69 | { "(bb())\\2\\1" , "bbbb" , REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } }, |
70 | { "^([^,]*),\\1,\\1$" , "a,a,a" , REG_EXTENDED, 2, { { 0, 5 }, { 0, 1 } } }, |
71 | { "^([^,]*),\\1,\\1$" , "ab,ab,ab" , REG_EXTENDED, 2, { { 0, 8 }, { 0, 2 } } }, |
72 | { "^([^,]*),\\1,\\1,\\1$" , "abc,abc,abc,abc" , REG_EXTENDED, 2, |
73 | { { 0, 15 }, { 0, 3 } } }, |
74 | { "^(.?)(.?)(.?)(.?)(.?).?\\5\\4\\3\\2\\1$" , |
75 | "level" , REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, |
76 | { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$" , |
77 | "level" , REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, |
78 | { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$" , |
79 | "abcdedcba" , REG_EXTENDED, 1, { { 0, 9 } } }, |
80 | #if 0 |
81 | /* XXX Not used since they fail so far. */ |
82 | { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$" , |
83 | "ababababa" , REG_EXTENDED, 1, { { 0, 9 } } }, |
84 | { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$" , |
85 | "level" , REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, |
86 | { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$" , |
87 | "ababababa" , REG_EXTENDED, 1, { { 0, 9 } } }, |
88 | #endif |
89 | }; |
90 | |
91 | int |
92 | main (void) |
93 | { |
94 | regex_t re; |
95 | regmatch_t rm[5]; |
96 | size_t i; |
97 | int n, ret = 0; |
98 | |
99 | mtrace (); |
100 | |
101 | for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) |
102 | { |
103 | n = regcomp (preg: &re, pattern: tests[i].pattern, cflags: tests[i].flags); |
104 | if (n != 0) |
105 | { |
106 | char buf[500]; |
107 | regerror (errcode: n, preg: &re, errbuf: buf, errbuf_size: sizeof (buf)); |
108 | printf (format: "%s: regcomp %zd failed: %s\n" , tests[i].pattern, i, buf); |
109 | ret = 1; |
110 | continue; |
111 | } |
112 | |
113 | if (regexec (preg: &re, String: tests[i].string, nmatch: tests[i].nmatch, pmatch: rm, eflags: 0)) |
114 | { |
115 | printf (format: "%s: regexec %zd failed\n" , tests[i].pattern, i); |
116 | ret = 1; |
117 | regfree (preg: &re); |
118 | continue; |
119 | } |
120 | |
121 | for (n = 0; n < tests[i].nmatch; ++n) |
122 | if (rm[n].rm_so != tests[i].rm[n].rm_so |
123 | || rm[n].rm_eo != tests[i].rm[n].rm_eo) |
124 | { |
125 | if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1) |
126 | break; |
127 | printf (format: "%s: regexec %zd match failure rm[%d] %d..%d\n" , |
128 | tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo); |
129 | ret = 1; |
130 | break; |
131 | } |
132 | |
133 | regfree (preg: &re); |
134 | } |
135 | |
136 | return ret; |
137 | } |
138 | |