1 | //===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Support/Regex.h" |
10 | #include "llvm/ADT/SmallVector.h" |
11 | #include "gtest/gtest.h" |
12 | #include <cstring> |
13 | |
14 | using namespace llvm; |
15 | namespace { |
16 | |
17 | class RegexTest : public ::testing::Test { |
18 | }; |
19 | |
20 | TEST_F(RegexTest, Basics) { |
21 | Regex r1("^[0-9]+$" ); |
22 | EXPECT_TRUE(r1.match("916" )); |
23 | EXPECT_TRUE(r1.match("9" )); |
24 | EXPECT_FALSE(r1.match("9a" )); |
25 | |
26 | SmallVector<StringRef, 1> Matches; |
27 | Regex r2("[0-9]+" ); |
28 | EXPECT_TRUE(r2.match("aa216b" , &Matches)); |
29 | EXPECT_EQ(1u, Matches.size()); |
30 | EXPECT_EQ("216" , Matches[0].str()); |
31 | |
32 | Regex r3("[0-9]+([a-f])?:([0-9]+)" ); |
33 | EXPECT_TRUE(r3.match("9a:513b" , &Matches)); |
34 | EXPECT_EQ(3u, Matches.size()); |
35 | EXPECT_EQ("9a:513" , Matches[0].str()); |
36 | EXPECT_EQ("a" , Matches[1].str()); |
37 | EXPECT_EQ("513" , Matches[2].str()); |
38 | |
39 | EXPECT_TRUE(r3.match("9:513b" , &Matches)); |
40 | EXPECT_EQ(3u, Matches.size()); |
41 | EXPECT_EQ("9:513" , Matches[0].str()); |
42 | EXPECT_EQ("" , Matches[1].str()); |
43 | EXPECT_EQ("513" , Matches[2].str()); |
44 | |
45 | Regex r4("a[^b]+b" ); |
46 | std::string String="axxb" ; |
47 | String[2] = '\0'; |
48 | EXPECT_FALSE(r4.match("abb" )); |
49 | EXPECT_TRUE(r4.match(String, &Matches)); |
50 | EXPECT_EQ(1u, Matches.size()); |
51 | EXPECT_EQ(String, Matches[0].str()); |
52 | |
53 | std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)" ; |
54 | String="YX99a:513b" ; |
55 | NulPattern[7] = '\0'; |
56 | Regex r5(NulPattern); |
57 | EXPECT_FALSE(r5.match(String)); |
58 | EXPECT_FALSE(r5.match("X9" )); |
59 | String[3]='\0'; |
60 | EXPECT_TRUE(r5.match(String)); |
61 | } |
62 | |
63 | TEST_F(RegexTest, EmptyPattern) { |
64 | // The empty pattern doesn't match anything -- not even the empty string. |
65 | // (This is different from some other regex implementations.) |
66 | Regex r("" ); |
67 | EXPECT_FALSE(r.match("123" )); |
68 | EXPECT_FALSE(r.match("" )); |
69 | } |
70 | |
71 | TEST_F(RegexTest, Backreferences) { |
72 | Regex r1("([a-z]+)_\\1" ); |
73 | SmallVector<StringRef, 4> Matches; |
74 | EXPECT_TRUE(r1.match("abc_abc" , &Matches)); |
75 | EXPECT_EQ(2u, Matches.size()); |
76 | EXPECT_FALSE(r1.match("abc_ab" , &Matches)); |
77 | |
78 | Regex r2("a([0-9])b\\1c\\1" ); |
79 | EXPECT_TRUE(r2.match("a4b4c4" , &Matches)); |
80 | EXPECT_EQ(2u, Matches.size()); |
81 | EXPECT_EQ("4" , Matches[1].str()); |
82 | EXPECT_FALSE(r2.match("a2b2c3" )); |
83 | |
84 | Regex r3("a([0-9])([a-z])b\\1\\2" ); |
85 | EXPECT_TRUE(r3.match("a6zb6z" , &Matches)); |
86 | EXPECT_EQ(3u, Matches.size()); |
87 | EXPECT_EQ("6" , Matches[1].str()); |
88 | EXPECT_EQ("z" , Matches[2].str()); |
89 | EXPECT_FALSE(r3.match("a6zb6y" )); |
90 | EXPECT_FALSE(r3.match("a6zb7z" )); |
91 | |
92 | Regex r4("(abc|xyz|uvw)_\\1" ); |
93 | EXPECT_TRUE(r4.match("abc_abc" , &Matches)); |
94 | EXPECT_EQ(2u, Matches.size()); |
95 | EXPECT_FALSE(r4.match("abc_ab" , &Matches)); |
96 | EXPECT_FALSE(r4.match("abc_xyz" , &Matches)); |
97 | |
98 | Regex r5("(xyz|abc|uvw)_\\1" ); |
99 | EXPECT_TRUE(r5.match("abc_abc" , &Matches)); |
100 | EXPECT_EQ(2u, Matches.size()); |
101 | EXPECT_FALSE(r5.match("abc_ab" , &Matches)); |
102 | EXPECT_FALSE(r5.match("abc_xyz" , &Matches)); |
103 | |
104 | Regex r6("(xyz|uvw|abc)_\\1" ); |
105 | EXPECT_TRUE(r6.match("abc_abc" , &Matches)); |
106 | EXPECT_EQ(2u, Matches.size()); |
107 | EXPECT_FALSE(r6.match("abc_ab" , &Matches)); |
108 | EXPECT_FALSE(r6.match("abc_xyz" , &Matches)); |
109 | } |
110 | |
111 | TEST_F(RegexTest, Substitution) { |
112 | std::string Error; |
113 | |
114 | EXPECT_EQ("aNUMber" , Regex("[0-9]+" ).sub("NUM" , "a1234ber" )); |
115 | |
116 | // Standard Escapes |
117 | EXPECT_EQ("a\\ber" , Regex("[0-9]+" ).sub("\\\\" , "a1234ber" , &Error)); |
118 | EXPECT_EQ("" , Error); |
119 | EXPECT_EQ("a\nber" , Regex("[0-9]+" ).sub("\\n" , "a1234ber" , &Error)); |
120 | EXPECT_EQ("" , Error); |
121 | EXPECT_EQ("a\tber" , Regex("[0-9]+" ).sub("\\t" , "a1234ber" , &Error)); |
122 | EXPECT_EQ("" , Error); |
123 | EXPECT_EQ("ajber" , Regex("[0-9]+" ).sub("\\j" , "a1234ber" , &Error)); |
124 | EXPECT_EQ("" , Error); |
125 | |
126 | EXPECT_EQ("aber" , Regex("[0-9]+" ).sub("\\" , "a1234ber" , &Error)); |
127 | EXPECT_EQ(Error, "replacement string contained trailing backslash" ); |
128 | |
129 | // Backreferences |
130 | EXPECT_EQ("aa1234bber" , Regex("a[0-9]+b" ).sub("a\\0b" , "a1234ber" , &Error)); |
131 | EXPECT_EQ("" , Error); |
132 | |
133 | EXPECT_EQ("a1234ber" , Regex("a([0-9]+)b" ).sub("a\\1b" , "a1234ber" , &Error)); |
134 | EXPECT_EQ("" , Error); |
135 | |
136 | EXPECT_EQ("aber" , Regex("a[0-9]+b" ).sub("a\\100b" , "a1234ber" , &Error)); |
137 | EXPECT_EQ(Error, "invalid backreference string '100'" ); |
138 | |
139 | EXPECT_EQ("012345" , Regex("a([0-9]+).*" ).sub("0\\g<1>5" , "a1234ber" , &Error)); |
140 | EXPECT_EQ("" , Error); |
141 | |
142 | EXPECT_EQ("0a1234ber5" , |
143 | Regex("a([0-9]+).*" ).sub("0\\g<0>5" , "a1234ber" , &Error)); |
144 | EXPECT_EQ("" , Error); |
145 | |
146 | EXPECT_EQ("0A5" , Regex("a(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*" ) |
147 | .sub("0\\g<10>5" , "a123456789Aber" , &Error)); |
148 | EXPECT_EQ("" , Error); |
149 | |
150 | EXPECT_EQ("0g<-1>5" , |
151 | Regex("a([0-9]+).*" ).sub("0\\g<-1>5" , "a1234ber" , &Error)); |
152 | EXPECT_EQ("" , Error); |
153 | |
154 | EXPECT_EQ("0g<15" , Regex("a([0-9]+).*" ).sub("0\\g<15" , "a1234ber" , &Error)); |
155 | EXPECT_EQ("" , Error); |
156 | |
157 | EXPECT_EQ("0g<>15" , Regex("a([0-9]+).*" ).sub("0\\g<>15" , "a1234ber" , &Error)); |
158 | EXPECT_EQ("" , Error); |
159 | |
160 | EXPECT_EQ("0g<3e>1" , |
161 | Regex("a([0-9]+).*" ).sub("0\\g<3e>1" , "a1234ber" , &Error)); |
162 | EXPECT_EQ("" , Error); |
163 | |
164 | EXPECT_EQ("aber" , Regex("a([0-9]+)b" ).sub("a\\g<100>b" , "a1234ber" , &Error)); |
165 | EXPECT_EQ(Error, "invalid backreference string 'g<100>'" ); |
166 | } |
167 | |
168 | TEST_F(RegexTest, IsLiteralERE) { |
169 | EXPECT_TRUE(Regex::isLiteralERE("abc" )); |
170 | EXPECT_FALSE(Regex::isLiteralERE("a(bc)" )); |
171 | EXPECT_FALSE(Regex::isLiteralERE("^abc" )); |
172 | EXPECT_FALSE(Regex::isLiteralERE("abc$" )); |
173 | EXPECT_FALSE(Regex::isLiteralERE("a|bc" )); |
174 | EXPECT_FALSE(Regex::isLiteralERE("abc*" )); |
175 | EXPECT_FALSE(Regex::isLiteralERE("abc+" )); |
176 | EXPECT_FALSE(Regex::isLiteralERE("abc?" )); |
177 | EXPECT_FALSE(Regex::isLiteralERE("abc." )); |
178 | EXPECT_FALSE(Regex::isLiteralERE("a[bc]" )); |
179 | EXPECT_FALSE(Regex::isLiteralERE("abc\\1" )); |
180 | EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}" )); |
181 | } |
182 | |
183 | TEST_F(RegexTest, Escape) { |
184 | EXPECT_EQ("a\\[bc\\]" , Regex::escape("a[bc]" )); |
185 | EXPECT_EQ("abc\\{1\\\\,2\\}" , Regex::escape("abc{1\\,2}" )); |
186 | } |
187 | |
188 | TEST_F(RegexTest, IsValid) { |
189 | std::string Error; |
190 | EXPECT_FALSE(Regex("(foo" ).isValid(Error)); |
191 | EXPECT_EQ("parentheses not balanced" , Error); |
192 | EXPECT_FALSE(Regex("a[b-" ).isValid(Error)); |
193 | EXPECT_EQ("invalid character range" , Error); |
194 | } |
195 | |
196 | TEST_F(RegexTest, MoveConstruct) { |
197 | Regex r1("^[0-9]+$" ); |
198 | Regex r2(std::move(r1)); |
199 | EXPECT_TRUE(r2.match("916" )); |
200 | } |
201 | |
202 | TEST_F(RegexTest, MoveAssign) { |
203 | Regex r1("^[0-9]+$" ); |
204 | Regex r2("abc" ); |
205 | r2 = std::move(r1); |
206 | EXPECT_TRUE(r2.match("916" )); |
207 | std::string Error; |
208 | EXPECT_FALSE(r1.isValid(Error)); |
209 | } |
210 | |
211 | TEST_F(RegexTest, NoArgConstructor) { |
212 | std::string Error; |
213 | Regex r1; |
214 | EXPECT_FALSE(r1.isValid(Error)); |
215 | EXPECT_EQ("invalid regular expression" , Error); |
216 | r1 = Regex("abc" ); |
217 | EXPECT_TRUE(r1.isValid(Error)); |
218 | } |
219 | |
220 | TEST_F(RegexTest, MatchInvalid) { |
221 | Regex r1; |
222 | std::string Error; |
223 | EXPECT_FALSE(r1.isValid(Error)); |
224 | EXPECT_FALSE(r1.match("X" )); |
225 | } |
226 | |
227 | // https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727 |
228 | TEST_F(RegexTest, OssFuzz3727Regression) { |
229 | // Wrap in a StringRef so the NUL byte doesn't terminate the string |
230 | Regex r(StringRef("[[[=GS\x00[=][" , 10)); |
231 | std::string Error; |
232 | EXPECT_FALSE(r.isValid(Error)); |
233 | } |
234 | |
235 | } |
236 | |
237 | TEST_F(RegexTest, NullStringInput) { |
238 | Regex r("^$" ); |
239 | // String data points to nullptr in default constructor |
240 | StringRef String; |
241 | EXPECT_TRUE(r.match(String)); |
242 | } |
243 | |