1 | //===- CallDescription.h - function/method call matching --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file This file defines a generic mechanism for matching for function and |
10 | /// method calls of C, C++, and Objective-C languages. Instances of these |
11 | /// classes are frequently used together with the CallEvent classes. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H |
16 | #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H |
17 | |
18 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
19 | #include "llvm/ADT/ArrayRef.h" |
20 | #include "llvm/Support/Compiler.h" |
21 | #include <optional> |
22 | #include <vector> |
23 | |
24 | namespace clang { |
25 | class IdentifierInfo; |
26 | } // namespace clang |
27 | |
28 | namespace clang { |
29 | namespace ento { |
30 | /// A `CallDescription` is a pattern that can be used to _match_ calls |
31 | /// based on the qualified name and the argument/parameter counts. |
32 | class CallDescription { |
33 | public: |
34 | enum class Mode { |
35 | /// Match calls to functions from the C standard library. This also |
36 | /// recognizes builtin variants whose name is derived by adding |
37 | /// "__builtin", "__inline" or similar prefixes or suffixes; but only |
38 | /// matches functions than are externally visible and are declared either |
39 | /// directly within a TU or in the namespace 'std'. |
40 | /// For the exact heuristics, see CheckerContext::isCLibraryFunction(). |
41 | CLibrary, |
42 | |
43 | /// An extended version of the `CLibrary` mode that also matches the |
44 | /// hardened variants like __FOO_chk() and __builtin__FOO_chk() that take |
45 | /// additional arguments compared to the "regular" function FOO(). |
46 | /// This is not the default behavior of `CLibrary` because in this case the |
47 | /// checker code must be prepared to handle the different parametrization. |
48 | /// For the exact heuristics, see CheckerContext::isHardenedVariantOf(). |
49 | CLibraryMaybeHardened, |
50 | |
51 | /// Matches "simple" functions that are not methods. (Static methods are |
52 | /// methods.) |
53 | SimpleFunc, |
54 | |
55 | /// Matches a C++ method (may be static, may be virtual, may be an |
56 | /// overloaded operator, a constructor or a destructor). |
57 | CXXMethod, |
58 | |
59 | /// Match any CallEvent that is not an ObjCMethodCall. |
60 | /// FIXME: Previously this was the default behavior of CallDescription, but |
61 | /// its use should be replaced by a more specific mode almost everywhere. |
62 | Unspecified, |
63 | |
64 | /// FIXME: Add support for ObjCMethodCall events (I'm not adding it because |
65 | /// I'm not familiar with Objective-C). Note that currently an early return |
66 | /// in `bool matches(const CallEvent &Call) const;` discards all |
67 | /// Objective-C method calls. |
68 | }; |
69 | |
70 | private: |
71 | friend class CallEvent; |
72 | using MaybeCount = std::optional<unsigned>; |
73 | |
74 | mutable std::optional<const IdentifierInfo *> II; |
75 | // The list of the qualified names used to identify the specified CallEvent, |
76 | // e.g. "{a, b}" represent the qualified names, like "a::b". |
77 | std::vector<std::string> QualifiedName; |
78 | MaybeCount RequiredArgs; |
79 | MaybeCount RequiredParams; |
80 | Mode MatchAs; |
81 | |
82 | public: |
83 | /// Constructs a CallDescription object. |
84 | /// |
85 | /// @param MatchAs Specifies the kind of the call that should be matched. |
86 | /// |
87 | /// @param QualifiedName The list of the name qualifiers of the function that |
88 | /// will be matched. The user is allowed to skip any of the qualifiers. |
89 | /// For example, {"std", "basic_string", "c_str"} would match both |
90 | /// std::basic_string<...>::c_str() and std::__1::basic_string<...>::c_str(). |
91 | /// |
92 | /// @param RequiredArgs The expected number of arguments that are passed to |
93 | /// the function. Omit this parameter (or pass std::nullopt) to match every |
94 | /// occurrence without checking the argument count in the call. |
95 | /// |
96 | /// @param RequiredParams The expected number of parameters in the function |
97 | /// definition that is called. Omit this parameter to match every occurrence |
98 | /// without checking the parameter count in the definition. |
99 | CallDescription(Mode MatchAs, ArrayRef<StringRef> QualifiedName, |
100 | MaybeCount RequiredArgs = std::nullopt, |
101 | MaybeCount RequiredParams = std::nullopt); |
102 | |
103 | /// Construct a CallDescription with default flags. |
104 | CallDescription(ArrayRef<StringRef> QualifiedName, |
105 | MaybeCount RequiredArgs = std::nullopt, |
106 | MaybeCount RequiredParams = std::nullopt); |
107 | |
108 | CallDescription(std::nullptr_t) = delete; |
109 | |
110 | /// Get the name of the function that this object matches. |
111 | StringRef getFunctionName() const { return QualifiedName.back(); } |
112 | |
113 | /// Get the qualified name parts in reversed order. |
114 | /// E.g. { "std", "vector", "data" } -> "vector", "std" |
115 | auto begin_qualified_name_parts() const { |
116 | return std::next(x: QualifiedName.rbegin()); |
117 | } |
118 | auto end_qualified_name_parts() const { return QualifiedName.rend(); } |
119 | |
120 | /// It's false, if and only if we expect a single identifier, such as |
121 | /// `getenv`. It's true for `std::swap`, or `my::detail::container::data`. |
122 | bool hasQualifiedNameParts() const { return QualifiedName.size() > 1; } |
123 | |
124 | /// @name Matching CallDescriptions against a CallEvent |
125 | /// @{ |
126 | |
127 | /// Returns true if the CallEvent is a call to a function that matches |
128 | /// the CallDescription. |
129 | /// |
130 | /// \note This function is not intended to be used to match Obj-C method |
131 | /// calls. |
132 | bool matches(const CallEvent &Call) const; |
133 | |
134 | /// Returns true whether the CallEvent matches on any of the CallDescriptions |
135 | /// supplied. |
136 | /// |
137 | /// \note This function is not intended to be used to match Obj-C method |
138 | /// calls. |
139 | friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1) { |
140 | return CD1.matches(Call); |
141 | } |
142 | |
143 | /// \copydoc clang::ento::CallDescription::matchesAny(const CallEvent &, const CallDescription &) |
144 | template <typename... Ts> |
145 | friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1, |
146 | const Ts &...CDs) { |
147 | return CD1.matches(Call) || matchesAny(Call, CDs...); |
148 | } |
149 | /// @} |
150 | |
151 | /// @name Matching CallDescriptions against a CallExpr |
152 | /// @{ |
153 | |
154 | /// Returns true if the CallExpr is a call to a function that matches the |
155 | /// CallDescription. |
156 | /// |
157 | /// When available, always prefer matching with a CallEvent! This function |
158 | /// exists only when that is not available, for example, when _only_ |
159 | /// syntactic check is done on a piece of code. |
160 | /// |
161 | /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade |
162 | /// for syntactic only matching if you are writing a new checker. This is |
163 | /// handy if a CallDescriptionMap is already there. |
164 | /// |
165 | /// The function is imprecise because CallEvent may know path sensitive |
166 | /// information, such as the precise argument count (see comments for |
167 | /// CallEvent::getNumArgs), the called function if it was called through a |
168 | /// function pointer, and other information not available syntactically. |
169 | bool matchesAsWritten(const CallExpr &CE) const; |
170 | |
171 | /// Returns true whether the CallExpr matches on any of the CallDescriptions |
172 | /// supplied. |
173 | /// |
174 | /// \note This function is not intended to be used to match Obj-C method |
175 | /// calls. |
176 | friend bool matchesAnyAsWritten(const CallExpr &CE, |
177 | const CallDescription &CD1) { |
178 | return CD1.matchesAsWritten(CE); |
179 | } |
180 | |
181 | /// \copydoc clang::ento::CallDescription::matchesAnyAsWritten(const CallExpr &, const CallDescription &) |
182 | template <typename... Ts> |
183 | friend bool matchesAnyAsWritten(const CallExpr &CE, |
184 | const CallDescription &CD1, |
185 | const Ts &...CDs) { |
186 | return CD1.matchesAsWritten(CE) || matchesAnyAsWritten(CE, CDs...); |
187 | } |
188 | /// @} |
189 | |
190 | private: |
191 | bool matchesImpl(const FunctionDecl *Callee, size_t ArgCount, |
192 | size_t ParamCount) const; |
193 | |
194 | bool matchNameOnly(const NamedDecl *ND) const; |
195 | bool matchQualifiedNameParts(const Decl *D) const; |
196 | }; |
197 | |
198 | /// An immutable map from CallDescriptions to arbitrary data. Provides a unified |
199 | /// way for checkers to react on function calls. |
200 | template <typename T> class CallDescriptionMap { |
201 | friend class CallDescriptionSet; |
202 | |
203 | // Some call descriptions aren't easily hashable (eg., the ones with qualified |
204 | // names in which some sections are omitted), so let's put them |
205 | // in a simple vector and use linear lookup. |
206 | // TODO: Implement an actual map for fast lookup for "hashable" call |
207 | // descriptions (eg., the ones for C functions that just match the name). |
208 | std::vector<std::pair<CallDescription, T>> LinearMap; |
209 | |
210 | public: |
211 | CallDescriptionMap( |
212 | std::initializer_list<std::pair<CallDescription, T>> &&List) |
213 | : LinearMap(List) {} |
214 | |
215 | template <typename InputIt> |
216 | CallDescriptionMap(InputIt First, InputIt Last) : LinearMap(First, Last) {} |
217 | |
218 | ~CallDescriptionMap() = default; |
219 | |
220 | // These maps are usually stored once per checker, so let's make sure |
221 | // we don't do redundant copies. |
222 | CallDescriptionMap(const CallDescriptionMap &) = delete; |
223 | CallDescriptionMap &operator=(const CallDescription &) = delete; |
224 | |
225 | CallDescriptionMap(CallDescriptionMap &&) = default; |
226 | CallDescriptionMap &operator=(CallDescriptionMap &&) = default; |
227 | |
228 | [[nodiscard]] const T *lookup(const CallEvent &Call) const { |
229 | // Slow path: linear lookup. |
230 | // TODO: Implement some sort of fast path. |
231 | for (const std::pair<CallDescription, T> &I : LinearMap) |
232 | if (I.first.matches(Call)) |
233 | return &I.second; |
234 | |
235 | return nullptr; |
236 | } |
237 | |
238 | /// When available, always prefer lookup with a CallEvent! This function |
239 | /// exists only when that is not available, for example, when _only_ |
240 | /// syntactic check is done on a piece of code. |
241 | /// |
242 | /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade |
243 | /// for syntactic only matching if you are writing a new checker. This is |
244 | /// handy if a CallDescriptionMap is already there. |
245 | /// |
246 | /// The function is imprecise because CallEvent may know path sensitive |
247 | /// information, such as the precise argument count (see comments for |
248 | /// CallEvent::getNumArgs), the called function if it was called through a |
249 | /// function pointer, and other information not available syntactically. |
250 | [[nodiscard]] const T *lookupAsWritten(const CallExpr &Call) const { |
251 | // Slow path: linear lookup. |
252 | // TODO: Implement some sort of fast path. |
253 | for (const std::pair<CallDescription, T> &I : LinearMap) |
254 | if (I.first.matchesAsWritten(Call)) |
255 | return &I.second; |
256 | |
257 | return nullptr; |
258 | } |
259 | }; |
260 | |
261 | /// Enumerators of this enum class are used to construct CallDescription |
262 | /// objects; in that context the fully qualified name is needlessly verbose. |
263 | using CDM = CallDescription::Mode; |
264 | |
265 | /// An immutable set of CallDescriptions. |
266 | /// Checkers can efficiently decide if a given CallEvent matches any |
267 | /// CallDescription in the set. |
268 | class CallDescriptionSet { |
269 | CallDescriptionMap<bool /*unused*/> Impl = {}; |
270 | |
271 | public: |
272 | CallDescriptionSet(std::initializer_list<CallDescription> &&List); |
273 | |
274 | CallDescriptionSet(const CallDescriptionSet &) = delete; |
275 | CallDescriptionSet &operator=(const CallDescription &) = delete; |
276 | |
277 | [[nodiscard]] bool contains(const CallEvent &Call) const; |
278 | |
279 | /// When available, always prefer lookup with a CallEvent! This function |
280 | /// exists only when that is not available, for example, when _only_ |
281 | /// syntactic check is done on a piece of code. |
282 | /// |
283 | /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade |
284 | /// for syntactic only matching if you are writing a new checker. This is |
285 | /// handy if a CallDescriptionMap is already there. |
286 | /// |
287 | /// The function is imprecise because CallEvent may know path sensitive |
288 | /// information, such as the precise argument count (see comments for |
289 | /// CallEvent::getNumArgs), the called function if it was called through a |
290 | /// function pointer, and other information not available syntactically. |
291 | [[nodiscard]] bool containsAsWritten(const CallExpr &CE) const; |
292 | }; |
293 | |
294 | } // namespace ento |
295 | } // namespace clang |
296 | |
297 | #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H |
298 | |