1//===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//===----------------------------------------------------------------------===//
7//
8// This is a utility class used to parse user-provided text files with
9// "special case lists" for code sanitizers. Such files are used to
10// define an "ABI list" for DataFlowSanitizer and allow/exclusion lists for
11// sanitizers like AddressSanitizer or UndefinedBehaviorSanitizer.
12//
13// Empty lines and lines starting with "#" are ignored. Sections are defined
14// using a '[section_name]' header and can be used to specify sanitizers the
15// entries below it apply to. Section names are regular expressions, and
16// entries without a section header match all sections (e.g. an '[*]' header
17// is assumed.)
18// The remaining lines should have the form:
19// prefix:wildcard_expression[=category]
20// If category is not specified, it is assumed to be empty string.
21// Definitions of "prefix" and "category" are sanitizer-specific. For example,
22// sanitizer exclusion support prefixes "src", "fun" and "global".
23// Wildcard expressions define, respectively, source files, functions or
24// globals which shouldn't be instrumented.
25// Examples of categories:
26// "functional": used in DFSan to list functions with pure functional
27// semantics.
28// "init": used in ASan exclusion list to disable initialization-order bugs
29// detection for certain globals or source files.
30// Full special case list file example:
31// ---
32// [address]
33// # Excluded items:
34// fun:*_ZN4base6subtle*
35// global:*global_with_bad_access_or_initialization*
36// global:*global_with_initialization_issues*=init
37// type:*Namespace::ClassName*=init
38// src:file_with_tricky_code.cc
39// src:ignore-global-initializers-issues.cc=init
40//
41// [dataflow]
42// # Functions with pure functional semantics:
43// fun:cos=functional
44// fun:sin=functional
45// ---
46// Note that the wild card is in fact an llvm::Regex, but * is automatically
47// replaced with .*
48//
49//===----------------------------------------------------------------------===//
50
51#ifndef LLVM_SUPPORT_SPECIALCASELIST_H
52#define LLVM_SUPPORT_SPECIALCASELIST_H
53
54#include "llvm/ADT/StringMap.h"
55#include "llvm/Support/Regex.h"
56#include "llvm/Support/TrigramIndex.h"
57#include <memory>
58#include <string>
59#include <vector>
60
61namespace llvm {
62class MemoryBuffer;
63class StringRef;
64
65namespace vfs {
66class FileSystem;
67}
68
69class SpecialCaseList {
70public:
71 /// Parses the special case list entries from files. On failure, returns
72 /// 0 and writes an error message to string.
73 static std::unique_ptr<SpecialCaseList>
74 create(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS,
75 std::string &Error);
76 /// Parses the special case list from a memory buffer. On failure, returns
77 /// 0 and writes an error message to string.
78 static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB,
79 std::string &Error);
80 /// Parses the special case list entries from files. On failure, reports a
81 /// fatal error.
82 static std::unique_ptr<SpecialCaseList>
83 createOrDie(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS);
84
85 ~SpecialCaseList();
86
87 /// Returns true, if special case list contains a line
88 /// \code
89 /// @Prefix:<E>=@Category
90 /// \endcode
91 /// where @Query satisfies wildcard expression <E> in a given @Section.
92 bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
93 StringRef Category = StringRef()) const;
94
95 /// Returns the line number corresponding to the special case list entry if
96 /// the special case list contains a line
97 /// \code
98 /// @Prefix:<E>=@Category
99 /// \endcode
100 /// where @Query satisfies wildcard expression <E> in a given @Section.
101 /// Returns zero if there is no exclusion entry corresponding to this
102 /// expression.
103 unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
104 StringRef Category = StringRef()) const;
105
106protected:
107 // Implementations of the create*() functions that can also be used by derived
108 // classes.
109 bool createInternal(const std::vector<std::string> &Paths,
110 vfs::FileSystem &VFS, std::string &Error);
111 bool createInternal(const MemoryBuffer *MB, std::string &Error);
112
113 SpecialCaseList() = default;
114 SpecialCaseList(SpecialCaseList const &) = delete;
115 SpecialCaseList &operator=(SpecialCaseList const &) = delete;
116
117 /// Represents a set of regular expressions. Regular expressions which are
118 /// "literal" (i.e. no regex metacharacters) are stored in Strings. The
119 /// reason for doing so is efficiency; StringMap is much faster at matching
120 /// literal strings than Regex.
121 class Matcher {
122 public:
123 bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
124 // Returns the line number in the source file that this query matches to.
125 // Returns zero if no match is found.
126 unsigned match(StringRef Query) const;
127
128 private:
129 StringMap<unsigned> Strings;
130 TrigramIndex Trigrams;
131 std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
132 };
133
134 using SectionEntries = StringMap<StringMap<Matcher>>;
135
136 struct Section {
137 Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){};
138
139 std::unique_ptr<Matcher> SectionMatcher;
140 SectionEntries Entries;
141 };
142
143 std::vector<Section> Sections;
144
145 /// Parses just-constructed SpecialCaseList entries from a memory buffer.
146 bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
147 std::string &Error);
148
149 // Helper method for derived classes to search by Prefix, Query, and Category
150 // once they have already resolved a section entry.
151 unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
152 StringRef Query, StringRef Category) const;
153};
154
155} // namespace llvm
156
157#endif // LLVM_SUPPORT_SPECIALCASELIST_H
158
159