1//===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Support/Regex.h"
10#include "llvm/ADT/SmallVector.h"
11#include "gtest/gtest.h"
12#include <cstring>
13
14using namespace llvm;
15namespace {
16
17class RegexTest : public ::testing::Test {
18};
19
20TEST_F(RegexTest, Basics) {
21 Regex r1("^[0-9]+$");
22 EXPECT_TRUE(r1.match("916"));
23 EXPECT_TRUE(r1.match("9"));
24 EXPECT_FALSE(r1.match("9a"));
25
26 SmallVector<StringRef, 1> Matches;
27 Regex r2("[0-9]+");
28 EXPECT_TRUE(r2.match("aa216b", &Matches));
29 EXPECT_EQ(1u, Matches.size());
30 EXPECT_EQ("216", Matches[0].str());
31
32 Regex r3("[0-9]+([a-f])?:([0-9]+)");
33 EXPECT_TRUE(r3.match("9a:513b", &Matches));
34 EXPECT_EQ(3u, Matches.size());
35 EXPECT_EQ("9a:513", Matches[0].str());
36 EXPECT_EQ("a", Matches[1].str());
37 EXPECT_EQ("513", Matches[2].str());
38
39 EXPECT_TRUE(r3.match("9:513b", &Matches));
40 EXPECT_EQ(3u, Matches.size());
41 EXPECT_EQ("9:513", Matches[0].str());
42 EXPECT_EQ("", Matches[1].str());
43 EXPECT_EQ("513", Matches[2].str());
44
45 Regex r4("a[^b]+b");
46 std::string String="axxb";
47 String[2] = '\0';
48 EXPECT_FALSE(r4.match("abb"));
49 EXPECT_TRUE(r4.match(String, &Matches));
50 EXPECT_EQ(1u, Matches.size());
51 EXPECT_EQ(String, Matches[0].str());
52
53 std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
54 String="YX99a:513b";
55 NulPattern[7] = '\0';
56 Regex r5(NulPattern);
57 EXPECT_FALSE(r5.match(String));
58 EXPECT_FALSE(r5.match("X9"));
59 String[3]='\0';
60 EXPECT_TRUE(r5.match(String));
61}
62
63TEST_F(RegexTest, EmptyPattern) {
64 // The empty pattern doesn't match anything -- not even the empty string.
65 // (This is different from some other regex implementations.)
66 Regex r("");
67 EXPECT_FALSE(r.match("123"));
68 EXPECT_FALSE(r.match(""));
69}
70
71TEST_F(RegexTest, Backreferences) {
72 Regex r1("([a-z]+)_\\1");
73 SmallVector<StringRef, 4> Matches;
74 EXPECT_TRUE(r1.match("abc_abc", &Matches));
75 EXPECT_EQ(2u, Matches.size());
76 EXPECT_FALSE(r1.match("abc_ab", &Matches));
77
78 Regex r2("a([0-9])b\\1c\\1");
79 EXPECT_TRUE(r2.match("a4b4c4", &Matches));
80 EXPECT_EQ(2u, Matches.size());
81 EXPECT_EQ("4", Matches[1].str());
82 EXPECT_FALSE(r2.match("a2b2c3"));
83
84 Regex r3("a([0-9])([a-z])b\\1\\2");
85 EXPECT_TRUE(r3.match("a6zb6z", &Matches));
86 EXPECT_EQ(3u, Matches.size());
87 EXPECT_EQ("6", Matches[1].str());
88 EXPECT_EQ("z", Matches[2].str());
89 EXPECT_FALSE(r3.match("a6zb6y"));
90 EXPECT_FALSE(r3.match("a6zb7z"));
91
92 Regex r4("(abc|xyz|uvw)_\\1");
93 EXPECT_TRUE(r4.match("abc_abc", &Matches));
94 EXPECT_EQ(2u, Matches.size());
95 EXPECT_FALSE(r4.match("abc_ab", &Matches));
96 EXPECT_FALSE(r4.match("abc_xyz", &Matches));
97
98 Regex r5("(xyz|abc|uvw)_\\1");
99 EXPECT_TRUE(r5.match("abc_abc", &Matches));
100 EXPECT_EQ(2u, Matches.size());
101 EXPECT_FALSE(r5.match("abc_ab", &Matches));
102 EXPECT_FALSE(r5.match("abc_xyz", &Matches));
103
104 Regex r6("(xyz|uvw|abc)_\\1");
105 EXPECT_TRUE(r6.match("abc_abc", &Matches));
106 EXPECT_EQ(2u, Matches.size());
107 EXPECT_FALSE(r6.match("abc_ab", &Matches));
108 EXPECT_FALSE(r6.match("abc_xyz", &Matches));
109}
110
111TEST_F(RegexTest, Substitution) {
112 std::string Error;
113
114 EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
115
116 // Standard Escapes
117 EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
118 EXPECT_EQ("", Error);
119 EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
120 EXPECT_EQ("", Error);
121 EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
122 EXPECT_EQ("", Error);
123 EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
124 EXPECT_EQ("", Error);
125
126 EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
127 EXPECT_EQ(Error, "replacement string contained trailing backslash");
128
129 // Backreferences
130 EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
131 EXPECT_EQ("", Error);
132
133 EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
134 EXPECT_EQ("", Error);
135
136 EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
137 EXPECT_EQ(Error, "invalid backreference string '100'");
138
139 EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\g<1>5", "a1234ber", &Error));
140 EXPECT_EQ("", Error);
141
142 EXPECT_EQ("0a1234ber5",
143 Regex("a([0-9]+).*").sub("0\\g<0>5", "a1234ber", &Error));
144 EXPECT_EQ("", Error);
145
146 EXPECT_EQ("0A5", Regex("a(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*")
147 .sub("0\\g<10>5", "a123456789Aber", &Error));
148 EXPECT_EQ("", Error);
149
150 EXPECT_EQ("0g<-1>5",
151 Regex("a([0-9]+).*").sub("0\\g<-1>5", "a1234ber", &Error));
152 EXPECT_EQ("", Error);
153
154 EXPECT_EQ("0g<15", Regex("a([0-9]+).*").sub("0\\g<15", "a1234ber", &Error));
155 EXPECT_EQ("", Error);
156
157 EXPECT_EQ("0g<>15", Regex("a([0-9]+).*").sub("0\\g<>15", "a1234ber", &Error));
158 EXPECT_EQ("", Error);
159
160 EXPECT_EQ("0g<3e>1",
161 Regex("a([0-9]+).*").sub("0\\g<3e>1", "a1234ber", &Error));
162 EXPECT_EQ("", Error);
163
164 EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\g<100>b", "a1234ber", &Error));
165 EXPECT_EQ(Error, "invalid backreference string 'g<100>'");
166}
167
168TEST_F(RegexTest, IsLiteralERE) {
169 EXPECT_TRUE(Regex::isLiteralERE("abc"));
170 EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
171 EXPECT_FALSE(Regex::isLiteralERE("^abc"));
172 EXPECT_FALSE(Regex::isLiteralERE("abc$"));
173 EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
174 EXPECT_FALSE(Regex::isLiteralERE("abc*"));
175 EXPECT_FALSE(Regex::isLiteralERE("abc+"));
176 EXPECT_FALSE(Regex::isLiteralERE("abc?"));
177 EXPECT_FALSE(Regex::isLiteralERE("abc."));
178 EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
179 EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
180 EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
181}
182
183TEST_F(RegexTest, Escape) {
184 EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
185 EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
186}
187
188TEST_F(RegexTest, IsValid) {
189 std::string Error;
190 EXPECT_FALSE(Regex("(foo").isValid(Error));
191 EXPECT_EQ("parentheses not balanced", Error);
192 EXPECT_FALSE(Regex("a[b-").isValid(Error));
193 EXPECT_EQ("invalid character range", Error);
194}
195
196TEST_F(RegexTest, MoveConstruct) {
197 Regex r1("^[0-9]+$");
198 Regex r2(std::move(r1));
199 EXPECT_TRUE(r2.match("916"));
200}
201
202TEST_F(RegexTest, MoveAssign) {
203 Regex r1("^[0-9]+$");
204 Regex r2("abc");
205 r2 = std::move(r1);
206 EXPECT_TRUE(r2.match("916"));
207 std::string Error;
208 EXPECT_FALSE(r1.isValid(Error));
209}
210
211TEST_F(RegexTest, NoArgConstructor) {
212 std::string Error;
213 Regex r1;
214 EXPECT_FALSE(r1.isValid(Error));
215 EXPECT_EQ("invalid regular expression", Error);
216 r1 = Regex("abc");
217 EXPECT_TRUE(r1.isValid(Error));
218}
219
220TEST_F(RegexTest, MatchInvalid) {
221 Regex r1;
222 std::string Error;
223 EXPECT_FALSE(r1.isValid(Error));
224 EXPECT_FALSE(r1.match("X"));
225}
226
227// https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727
228TEST_F(RegexTest, OssFuzz3727Regression) {
229 // Wrap in a StringRef so the NUL byte doesn't terminate the string
230 Regex r(StringRef("[[[=GS\x00[=][", 10));
231 std::string Error;
232 EXPECT_FALSE(r.isValid(Error));
233}
234
235}
236
237TEST_F(RegexTest, NullStringInput) {
238 Regex r("^$");
239 // String data points to nullptr in default constructor
240 StringRef String;
241 EXPECT_TRUE(r.match(String));
242}
243

source code of llvm/unittests/Support/RegexTest.cpp