1 | //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | //===----------------------------------------------------------------------===// |
7 | // |
8 | // This is a utility class used to parse user-provided text files with |
9 | // "special case lists" for code sanitizers. Such files are used to |
10 | // define an "ABI list" for DataFlowSanitizer and allow/exclusion lists for |
11 | // sanitizers like AddressSanitizer or UndefinedBehaviorSanitizer. |
12 | // |
13 | // Empty lines and lines starting with "#" are ignored. Sections are defined |
14 | // using a '[section_name]' header and can be used to specify sanitizers the |
15 | // entries below it apply to. Section names are regular expressions, and |
16 | // entries without a section header match all sections (e.g. an '[*]' header |
17 | // is assumed.) |
18 | // The remaining lines should have the form: |
19 | // prefix:wildcard_expression[=category] |
20 | // If category is not specified, it is assumed to be empty string. |
21 | // Definitions of "prefix" and "category" are sanitizer-specific. For example, |
22 | // sanitizer exclusion support prefixes "src", "fun" and "global". |
23 | // Wildcard expressions define, respectively, source files, functions or |
24 | // globals which shouldn't be instrumented. |
25 | // Examples of categories: |
26 | // "functional": used in DFSan to list functions with pure functional |
27 | // semantics. |
28 | // "init": used in ASan exclusion list to disable initialization-order bugs |
29 | // detection for certain globals or source files. |
30 | // Full special case list file example: |
31 | // --- |
32 | // [address] |
33 | // # Excluded items: |
34 | // fun:*_ZN4base6subtle* |
35 | // global:*global_with_bad_access_or_initialization* |
36 | // global:*global_with_initialization_issues*=init |
37 | // type:*Namespace::ClassName*=init |
38 | // src:file_with_tricky_code.cc |
39 | // src:ignore-global-initializers-issues.cc=init |
40 | // |
41 | // [dataflow] |
42 | // # Functions with pure functional semantics: |
43 | // fun:cos=functional |
44 | // fun:sin=functional |
45 | // --- |
46 | // Note that the wild card is in fact an llvm::Regex, but * is automatically |
47 | // replaced with .* |
48 | // |
49 | //===----------------------------------------------------------------------===// |
50 | |
51 | #ifndef LLVM_SUPPORT_SPECIALCASELIST_H |
52 | #define LLVM_SUPPORT_SPECIALCASELIST_H |
53 | |
54 | #include "llvm/ADT/StringMap.h" |
55 | #include "llvm/Support/Regex.h" |
56 | #include "llvm/Support/TrigramIndex.h" |
57 | #include <memory> |
58 | #include <string> |
59 | #include <vector> |
60 | |
61 | namespace llvm { |
62 | class MemoryBuffer; |
63 | class StringRef; |
64 | |
65 | namespace vfs { |
66 | class FileSystem; |
67 | } |
68 | |
69 | class SpecialCaseList { |
70 | public: |
71 | /// Parses the special case list entries from files. On failure, returns |
72 | /// 0 and writes an error message to string. |
73 | static std::unique_ptr<SpecialCaseList> |
74 | create(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS, |
75 | std::string &Error); |
76 | /// Parses the special case list from a memory buffer. On failure, returns |
77 | /// 0 and writes an error message to string. |
78 | static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB, |
79 | std::string &Error); |
80 | /// Parses the special case list entries from files. On failure, reports a |
81 | /// fatal error. |
82 | static std::unique_ptr<SpecialCaseList> |
83 | createOrDie(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS); |
84 | |
85 | ~SpecialCaseList(); |
86 | |
87 | /// Returns true, if special case list contains a line |
88 | /// \code |
89 | /// @Prefix:<E>=@Category |
90 | /// \endcode |
91 | /// where @Query satisfies wildcard expression <E> in a given @Section. |
92 | bool inSection(StringRef Section, StringRef Prefix, StringRef Query, |
93 | StringRef Category = StringRef()) const; |
94 | |
95 | /// Returns the line number corresponding to the special case list entry if |
96 | /// the special case list contains a line |
97 | /// \code |
98 | /// @Prefix:<E>=@Category |
99 | /// \endcode |
100 | /// where @Query satisfies wildcard expression <E> in a given @Section. |
101 | /// Returns zero if there is no exclusion entry corresponding to this |
102 | /// expression. |
103 | unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, |
104 | StringRef Category = StringRef()) const; |
105 | |
106 | protected: |
107 | // Implementations of the create*() functions that can also be used by derived |
108 | // classes. |
109 | bool createInternal(const std::vector<std::string> &Paths, |
110 | vfs::FileSystem &VFS, std::string &Error); |
111 | bool createInternal(const MemoryBuffer *MB, std::string &Error); |
112 | |
113 | SpecialCaseList() = default; |
114 | SpecialCaseList(SpecialCaseList const &) = delete; |
115 | SpecialCaseList &operator=(SpecialCaseList const &) = delete; |
116 | |
117 | /// Represents a set of regular expressions. Regular expressions which are |
118 | /// "literal" (i.e. no regex metacharacters) are stored in Strings. The |
119 | /// reason for doing so is efficiency; StringMap is much faster at matching |
120 | /// literal strings than Regex. |
121 | class Matcher { |
122 | public: |
123 | bool insert(std::string Regexp, unsigned LineNumber, std::string &REError); |
124 | // Returns the line number in the source file that this query matches to. |
125 | // Returns zero if no match is found. |
126 | unsigned match(StringRef Query) const; |
127 | |
128 | private: |
129 | StringMap<unsigned> Strings; |
130 | TrigramIndex Trigrams; |
131 | std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes; |
132 | }; |
133 | |
134 | using SectionEntries = StringMap<StringMap<Matcher>>; |
135 | |
136 | struct Section { |
137 | Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){}; |
138 | |
139 | std::unique_ptr<Matcher> SectionMatcher; |
140 | SectionEntries Entries; |
141 | }; |
142 | |
143 | std::vector<Section> Sections; |
144 | |
145 | /// Parses just-constructed SpecialCaseList entries from a memory buffer. |
146 | bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap, |
147 | std::string &Error); |
148 | |
149 | // Helper method for derived classes to search by Prefix, Query, and Category |
150 | // once they have already resolved a section entry. |
151 | unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix, |
152 | StringRef Query, StringRef Category) const; |
153 | }; |
154 | |
155 | } // namespace llvm |
156 | |
157 | #endif // LLVM_SUPPORT_SPECIALCASELIST_H |
158 | |
159 | |