1 | //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a DataflowAnalysisContext class that owns objects that |
10 | // encompass the state of a program and stores context that is used during |
11 | // dataflow analysis. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H |
16 | #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H |
17 | |
18 | #include "clang/AST/Decl.h" |
19 | #include "clang/AST/Expr.h" |
20 | #include "clang/AST/TypeOrdering.h" |
21 | #include "clang/Analysis/FlowSensitive/ASTOps.h" |
22 | #include "clang/Analysis/FlowSensitive/AdornedCFG.h" |
23 | #include "clang/Analysis/FlowSensitive/Arena.h" |
24 | #include "clang/Analysis/FlowSensitive/Solver.h" |
25 | #include "clang/Analysis/FlowSensitive/StorageLocation.h" |
26 | #include "clang/Analysis/FlowSensitive/Value.h" |
27 | #include "llvm/ADT/DenseMap.h" |
28 | #include "llvm/ADT/DenseSet.h" |
29 | #include "llvm/ADT/SetVector.h" |
30 | #include "llvm/Support/Compiler.h" |
31 | #include <cassert> |
32 | #include <memory> |
33 | #include <optional> |
34 | |
35 | namespace clang { |
36 | namespace dataflow { |
37 | class Logger; |
38 | |
39 | struct ContextSensitiveOptions { |
40 | /// The maximum depth to analyze. A value of zero is equivalent to disabling |
41 | /// context-sensitive analysis entirely. |
42 | unsigned Depth = 2; |
43 | }; |
44 | |
45 | /// Owns objects that encompass the state of a program and stores context that |
46 | /// is used during dataflow analysis. |
47 | class DataflowAnalysisContext { |
48 | public: |
49 | struct Options { |
50 | /// Options for analyzing function bodies when present in the translation |
51 | /// unit, or empty to disable context-sensitive analysis. Note that this is |
52 | /// fundamentally limited: some constructs, such as recursion, are |
53 | /// explicitly unsupported. |
54 | std::optional<ContextSensitiveOptions> ContextSensitiveOpts; |
55 | |
56 | /// If provided, analysis details will be recorded here. |
57 | /// (This is always non-null within an AnalysisContext, the framework |
58 | /// provides a fallback no-op logger). |
59 | Logger *Log = nullptr; |
60 | }; |
61 | |
62 | /// Constructs a dataflow analysis context. |
63 | /// |
64 | /// Requirements: |
65 | /// |
66 | /// `S` must not be null. |
67 | DataflowAnalysisContext(std::unique_ptr<Solver> S, |
68 | Options Opts = Options{ |
69 | /*ContextSensitiveOpts=*/.ContextSensitiveOpts: std::nullopt, |
70 | /*Logger=*/.Log: nullptr}); |
71 | ~DataflowAnalysisContext(); |
72 | |
73 | /// Sets a callback that returns the names and types of the synthetic fields |
74 | /// to add to a `RecordStorageLocation` of a given type. |
75 | /// Typically, this is called from the constructor of a `DataflowAnalysis` |
76 | /// |
77 | /// The field types returned by the callback may not have reference type. |
78 | /// |
79 | /// To maintain the invariant that all `RecordStorageLocation`s of a given |
80 | /// type have the same fields: |
81 | /// * The callback must always return the same result for a given type |
82 | /// * `setSyntheticFieldCallback()` must be called before any |
83 | // `RecordStorageLocation`s are created. |
84 | void setSyntheticFieldCallback( |
85 | std::function<llvm::StringMap<QualType>(QualType)> CB) { |
86 | assert(!RecordStorageLocationCreated); |
87 | SyntheticFieldCallback = CB; |
88 | } |
89 | |
90 | /// Returns a new storage location appropriate for `Type`. |
91 | /// |
92 | /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. |
93 | StorageLocation &createStorageLocation(QualType Type); |
94 | |
95 | /// Creates a `RecordStorageLocation` for the given type and with the given |
96 | /// fields. |
97 | /// |
98 | /// Requirements: |
99 | /// |
100 | /// `FieldLocs` must contain exactly the fields returned by |
101 | /// `getModeledFields(Type)`. |
102 | /// `SyntheticFields` must contain exactly the fields returned by |
103 | /// `getSyntheticFields(Type)`. |
104 | RecordStorageLocation &createRecordStorageLocation( |
105 | QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, |
106 | RecordStorageLocation::SyntheticFieldMap SyntheticFields); |
107 | |
108 | /// Returns a stable storage location for `D`. |
109 | StorageLocation &getStableStorageLocation(const ValueDecl &D); |
110 | |
111 | /// Returns a stable storage location for `E`. |
112 | StorageLocation &getStableStorageLocation(const Expr &E); |
113 | |
114 | /// Returns a pointer value that represents a null pointer. Calls with |
115 | /// `PointeeType` that are canonically equivalent will return the same result. |
116 | /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. |
117 | PointerValue &getOrCreateNullPointerValue(QualType PointeeType); |
118 | |
119 | /// Adds `Constraint` to current and future flow conditions in this context. |
120 | /// |
121 | /// Invariants must contain only flow-insensitive information, i.e. facts that |
122 | /// are true on all paths through the program. |
123 | /// Information can be added eagerly (when analysis begins), or lazily (e.g. |
124 | /// when values are first used). The analysis must be careful that the same |
125 | /// information is added regardless of which order blocks are analyzed in. |
126 | void addInvariant(const Formula &Constraint); |
127 | |
128 | /// Adds `Constraint` to the flow condition identified by `Token`. |
129 | void addFlowConditionConstraint(Atom Token, const Formula &Constraint); |
130 | |
131 | /// Creates a new flow condition with the same constraints as the flow |
132 | /// condition identified by `Token` and returns its token. |
133 | Atom forkFlowCondition(Atom Token); |
134 | |
135 | /// Creates a new flow condition that represents the disjunction of the flow |
136 | /// conditions identified by `FirstToken` and `SecondToken`, and returns its |
137 | /// token. |
138 | Atom joinFlowConditions(Atom FirstToken, Atom SecondToken); |
139 | |
140 | /// Returns true if the constraints of the flow condition identified by |
141 | /// `Token` imply that `F` is true. |
142 | /// Returns false if the flow condition does not imply `F` or if the solver |
143 | /// times out. |
144 | bool flowConditionImplies(Atom Token, const Formula &F); |
145 | |
146 | /// Returns true if the constraints of the flow condition identified by |
147 | /// `Token` still allow `F` to be true. |
148 | /// Returns false if the flow condition implies that `F` is false or if the |
149 | /// solver times out. |
150 | bool flowConditionAllows(Atom Token, const Formula &F); |
151 | |
152 | /// Returns true if `Val1` is equivalent to `Val2`. |
153 | /// Note: This function doesn't take into account constraints on `Val1` and |
154 | /// `Val2` imposed by the flow condition. |
155 | bool equivalentFormulas(const Formula &Val1, const Formula &Val2); |
156 | |
157 | LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, |
158 | llvm::raw_ostream &OS = llvm::dbgs()); |
159 | |
160 | /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise, |
161 | /// returns null. |
162 | const AdornedCFG *getAdornedCFG(const FunctionDecl *F); |
163 | |
164 | const Options &getOptions() { return Opts; } |
165 | |
166 | Arena &arena() { return *A; } |
167 | |
168 | /// Returns the outcome of satisfiability checking on `Constraints`. |
169 | /// |
170 | /// Flow conditions are not incorporated, so they may need to be manually |
171 | /// included in `Constraints` to provide contextually-accurate results, e.g. |
172 | /// if any definitions or relationships of the values in `Constraints` have |
173 | /// been stored in flow conditions. |
174 | Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints); |
175 | |
176 | /// Returns the fields of `Type`, limited to the set of fields modeled by this |
177 | /// context. |
178 | FieldSet getModeledFields(QualType Type); |
179 | |
180 | /// Returns the names and types of the synthetic fields for the given record |
181 | /// type. |
182 | llvm::StringMap<QualType> getSyntheticFields(QualType Type) { |
183 | assert(Type->isRecordType()); |
184 | if (SyntheticFieldCallback) { |
185 | llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type); |
186 | // Synthetic fields are not allowed to have reference type. |
187 | assert([&Result] { |
188 | for (const auto &Entry : Result) |
189 | if (Entry.getValue()->isReferenceType()) |
190 | return false; |
191 | return true; |
192 | }()); |
193 | return Result; |
194 | } |
195 | return {}; |
196 | } |
197 | |
198 | private: |
199 | friend class Environment; |
200 | |
201 | struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { |
202 | static QualType getEmptyKey() { |
203 | // Allow a NULL `QualType` by using a different value as the empty key. |
204 | return QualType::getFromOpaquePtr(Ptr: reinterpret_cast<Type *>(1)); |
205 | } |
206 | |
207 | using DenseMapInfo::getHashValue; |
208 | using DenseMapInfo::getTombstoneKey; |
209 | using DenseMapInfo::isEqual; |
210 | }; |
211 | |
212 | // Extends the set of modeled field declarations. |
213 | void addModeledFields(const FieldSet &Fields); |
214 | |
215 | /// Adds all constraints of the flow condition identified by `Token` and all |
216 | /// of its transitive dependencies to `Constraints`. |
217 | void |
218 | addTransitiveFlowConditionConstraints(Atom Token, |
219 | llvm::SetVector<const Formula *> &Out); |
220 | |
221 | /// Returns true if the solver is able to prove that there is a satisfying |
222 | /// assignment for `Constraints`. |
223 | bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) { |
224 | return querySolver(Constraints: std::move(Constraints)).getStatus() == |
225 | Solver::Result::Status::Satisfiable; |
226 | } |
227 | |
228 | /// Returns true if the solver is able to prove that there is no satisfying |
229 | /// assignment for `Constraints` |
230 | bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) { |
231 | return querySolver(Constraints: std::move(Constraints)).getStatus() == |
232 | Solver::Result::Status::Unsatisfiable; |
233 | } |
234 | |
235 | std::unique_ptr<Solver> S; |
236 | std::unique_ptr<Arena> A; |
237 | |
238 | // Maps from program declarations and statements to storage locations that are |
239 | // assigned to them. These assignments are global (aggregated across all basic |
240 | // blocks) and are used to produce stable storage locations when the same |
241 | // basic blocks are evaluated multiple times. The storage locations that are |
242 | // in scope for a particular basic block are stored in `Environment`. |
243 | llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; |
244 | llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; |
245 | |
246 | // Null pointer values, keyed by the canonical pointee type. |
247 | // |
248 | // FIXME: The pointer values are indexed by the pointee types which are |
249 | // required to initialize the `PointeeLoc` field in `PointerValue`. Consider |
250 | // creating a type-independent `NullPointerValue` without a `PointeeLoc` |
251 | // field. |
252 | llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> |
253 | NullPointerVals; |
254 | |
255 | Options Opts; |
256 | |
257 | // Flow conditions are tracked symbolically: each unique flow condition is |
258 | // associated with a fresh symbolic variable (token), bound to the clause that |
259 | // defines the flow condition. Conceptually, each binding corresponds to an |
260 | // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition |
261 | // token (an atomic boolean) and `Ci`s are the set of constraints in the flow |
262 | // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in |
263 | // the `FlowConditionConstraints` map, keyed by the token of the flow |
264 | // condition. |
265 | // |
266 | // Flow conditions depend on other flow conditions if they are created using |
267 | // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition |
268 | // dependencies is stored in the `FlowConditionDeps` map. |
269 | llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps; |
270 | llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints; |
271 | const Formula *Invariant = nullptr; |
272 | |
273 | llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts; |
274 | |
275 | // Fields modeled by environments covered by this context. |
276 | FieldSet ModeledFields; |
277 | |
278 | std::unique_ptr<Logger> LogOwner; // If created via flags. |
279 | |
280 | std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback; |
281 | |
282 | /// Has any `RecordStorageLocation` been created yet? |
283 | bool RecordStorageLocationCreated = false; |
284 | }; |
285 | |
286 | } // namespace dataflow |
287 | } // namespace clang |
288 | |
289 | #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H |
290 | |