1 | //===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a glob pattern matcher. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_SUPPORT_GLOBPATTERN_H |
14 | #define LLVM_SUPPORT_GLOBPATTERN_H |
15 | |
16 | #include "llvm/ADT/BitVector.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/Support/Error.h" |
20 | #include <optional> |
21 | |
22 | namespace llvm { |
23 | |
24 | /// This class implements a glob pattern matcher similar to the one found in |
25 | /// bash, but with some key differences. Namely, that \p "*" matches all |
26 | /// characters and does not exclude path separators. |
27 | /// |
28 | /// * \p "?" matches a single character. |
29 | /// * \p "*" matches zero or more characters. |
30 | /// * \p "[<chars>]" matches one character in the bracket. Character ranges, |
31 | /// e.g., \p "[a-z]", and negative sets via \p "[^ab]" or \p "[!ab]" are also |
32 | /// supported. |
33 | /// * \p "{<glob>,...}" matches one of the globs in the list. Nested brace |
34 | /// expansions are not supported. If \p MaxSubPatterns is empty then |
35 | /// brace expansions are not supported and characters \p "{,}" are treated as |
36 | /// literals. |
37 | /// * \p "\" escapes the next character so it is treated as a literal. |
38 | /// |
39 | /// |
40 | /// Some known edge cases are: |
41 | /// * \p "]" is allowed as the first character in a character class, i.e., |
42 | /// \p "[]]" is valid and matches the literal \p "]". |
43 | /// * The empty character class, i.e., \p "[]", is invalid. |
44 | /// * Empty or singleton brace expansions, e.g., \p "{}", \p "{a}", are invalid. |
45 | /// * \p "}" and \p "," that are not inside a brace expansion are taken as |
46 | /// literals, e.g., \p ",}" is valid but \p "{" is not. |
47 | /// |
48 | /// |
49 | /// For example, \p "*[/\\]foo.{c,cpp}" will match (unix or windows) paths to |
50 | /// all files named \p "foo.c" or \p "foo.cpp". |
51 | class GlobPattern { |
52 | public: |
53 | /// \param Pat the pattern to match against |
54 | /// \param MaxSubPatterns if provided limit the number of allowed subpatterns |
55 | /// created from expanding braces otherwise disable |
56 | /// brace expansion |
57 | static Expected<GlobPattern> |
58 | create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}); |
59 | /// \returns \p true if \p S matches this glob pattern |
60 | bool match(StringRef S) const; |
61 | |
62 | // Returns true for glob pattern "*". Can be used to avoid expensive |
63 | // preparation/acquisition of the input for match(). |
64 | bool isTrivialMatchAll() const { |
65 | if (!Prefix.empty()) |
66 | return false; |
67 | if (SubGlobs.size() != 1) |
68 | return false; |
69 | return SubGlobs[0].getPat() == "*" ; |
70 | } |
71 | |
72 | private: |
73 | StringRef Prefix; |
74 | |
75 | struct SubGlobPattern { |
76 | /// \param Pat the pattern to match against |
77 | static Expected<SubGlobPattern> create(StringRef Pat); |
78 | /// \returns \p true if \p S matches this glob pattern |
79 | bool match(StringRef S) const; |
80 | StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); } |
81 | |
82 | // Brackets with their end position and matched bytes. |
83 | struct Bracket { |
84 | size_t NextOffset; |
85 | BitVector Bytes; |
86 | }; |
87 | SmallVector<Bracket, 0> Brackets; |
88 | SmallVector<char, 0> Pat; |
89 | }; |
90 | SmallVector<SubGlobPattern, 1> SubGlobs; |
91 | }; |
92 | } |
93 | |
94 | #endif // LLVM_SUPPORT_GLOBPATTERN_H |
95 | |