1 | //===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines base types and functions for building dataflow analyses |
10 | // that run over Control-Flow Graphs (CFGs). |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H |
15 | #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H |
16 | |
17 | #include <iterator> |
18 | #include <optional> |
19 | #include <type_traits> |
20 | #include <utility> |
21 | #include <vector> |
22 | |
23 | #include "clang/AST/ASTContext.h" |
24 | #include "clang/Analysis/CFG.h" |
25 | #include "clang/Analysis/FlowSensitive/AdornedCFG.h" |
26 | #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" |
27 | #include "clang/Analysis/FlowSensitive/DataflowLattice.h" |
28 | #include "clang/Analysis/FlowSensitive/MatchSwitch.h" |
29 | #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" |
30 | #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" |
31 | #include "llvm/ADT/STLExtras.h" |
32 | #include "llvm/ADT/STLFunctionalExtras.h" |
33 | #include "llvm/ADT/SmallVector.h" |
34 | #include "llvm/Support/Errc.h" |
35 | #include "llvm/Support/Error.h" |
36 | |
37 | namespace clang { |
38 | namespace dataflow { |
39 | |
40 | /// Base class template for dataflow analyses built on a single lattice type. |
41 | /// |
42 | /// Requirements: |
43 | /// |
44 | /// `Derived` must be derived from a specialization of this class template and |
45 | /// must provide the following public members: |
46 | /// * `LatticeT initialElement()` - returns a lattice element that models the |
47 | /// initial state of a basic block; |
48 | /// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies |
49 | /// the analysis transfer function for a given CFG element and lattice |
50 | /// element. |
51 | /// |
52 | /// `Derived` can optionally provide the following members: |
53 | /// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, |
54 | /// Environment &Env)` - applies the analysis transfer |
55 | /// function for a given edge from a CFG block of a conditional statement. |
56 | /// |
57 | /// `Derived` can optionally override the virtual functions in the |
58 | /// `Environment::ValueModel` interface (which is an indirect base class of |
59 | /// this class). |
60 | /// |
61 | /// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must |
62 | /// provide the following public members: |
63 | /// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the |
64 | /// argument by computing their least upper bound, modifies the object if |
65 | /// necessary, and returns an effect indicating whether any changes were |
66 | /// made to it; |
67 | /// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)` |
68 | /// * `bool operator==(const LatticeT &) const` - returns true if and only if |
69 | /// the object is equal to the argument. |
70 | /// |
71 | /// `LatticeT` can optionally provide the following members: |
72 | /// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the |
73 | /// lattice element with an approximation that can reach a fixed point more |
74 | /// quickly than iterated application of the transfer function alone. The |
75 | /// previous value is provided to inform the choice of widened value. The |
76 | /// function must also serve as a comparison operation, by indicating whether |
77 | /// the widened value is equivalent to the previous value with the returned |
78 | /// `LatticeJoinEffect`. |
79 | template <typename Derived, typename LatticeT> |
80 | class DataflowAnalysis : public TypeErasedDataflowAnalysis { |
81 | public: |
82 | /// Bounded join-semilattice that is used in the analysis. |
83 | using Lattice = LatticeT; |
84 | |
85 | explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {} |
86 | |
87 | explicit DataflowAnalysis(ASTContext &Context, |
88 | DataflowAnalysisOptions Options) |
89 | : TypeErasedDataflowAnalysis(Options), Context(Context) {} |
90 | |
91 | ASTContext &getASTContext() final { return Context; } |
92 | |
93 | TypeErasedLattice typeErasedInitialElement() final { |
94 | return {static_cast<Derived *>(this)->initialElement()}; |
95 | } |
96 | |
97 | TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, |
98 | const TypeErasedLattice &E2) final { |
99 | // FIXME: change the signature of join() to avoid copying here. |
100 | Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value); |
101 | const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); |
102 | L1.join(L2); |
103 | return {std::move(L1)}; |
104 | } |
105 | |
106 | LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, |
107 | const TypeErasedLattice &Previous) final { |
108 | Lattice &C = llvm::any_cast<Lattice &>(Current.Value); |
109 | const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value); |
110 | return widenInternal(Rank0{}, C, P); |
111 | } |
112 | |
113 | bool isEqualTypeErased(const TypeErasedLattice &E1, |
114 | const TypeErasedLattice &E2) final { |
115 | const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value); |
116 | const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); |
117 | return L1 == L2; |
118 | } |
119 | |
120 | void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, |
121 | Environment &Env) final { |
122 | Lattice &L = llvm::any_cast<Lattice &>(E.Value); |
123 | static_cast<Derived *>(this)->transfer(Element, L, Env); |
124 | } |
125 | |
126 | void transferBranchTypeErased(bool Branch, const Stmt *Stmt, |
127 | TypeErasedLattice &E, Environment &Env) final { |
128 | transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt, |
129 | E, Env); |
130 | } |
131 | |
132 | private: |
133 | // These `Rank` structs are used for template metaprogramming to choose |
134 | // between overloads. |
135 | struct Rank1 {}; |
136 | struct Rank0 : Rank1 {}; |
137 | |
138 | // The first-choice implementation: use `widen` when it is available. |
139 | template <typename T> |
140 | static auto widenInternal(Rank0, T &Current, const T &Prev) |
141 | -> decltype(Current.widen(Prev)) { |
142 | return Current.widen(Prev); |
143 | } |
144 | |
145 | // The second-choice implementation: `widen` is unavailable. Widening is |
146 | // merged with equality checking, so when widening is unimplemented, we |
147 | // default to equality checking. |
148 | static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current, |
149 | const Lattice &Prev) { |
150 | return Prev == Current ? LatticeJoinEffect::Unchanged |
151 | : LatticeJoinEffect::Changed; |
152 | } |
153 | |
154 | // The first-choice implementation: `transferBranch` is implemented. |
155 | template <typename Analysis> |
156 | static auto transferBranchInternal(Rank0, Analysis &A, bool Branch, |
157 | const Stmt *Stmt, TypeErasedLattice &L, |
158 | Environment &Env) |
159 | -> std::void_t<decltype(A.transferBranch( |
160 | Branch, Stmt, std::declval<LatticeT &>(), Env))> { |
161 | A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env); |
162 | } |
163 | |
164 | // The second-choice implementation: `transferBranch` is unimplemented. No-op. |
165 | template <typename Analysis> |
166 | static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *, |
167 | TypeErasedLattice &, Environment &) {} |
168 | |
169 | ASTContext &Context; |
170 | }; |
171 | |
172 | // Model of the program at a given program point. |
173 | template <typename LatticeT> struct DataflowAnalysisState { |
174 | // Model of a program property. |
175 | LatticeT Lattice; |
176 | |
177 | // Model of the state of the program (store and heap). |
178 | Environment Env; |
179 | }; |
180 | |
181 | /// Performs dataflow analysis and returns a mapping from basic block IDs to |
182 | /// dataflow analysis states that model the respective basic blocks. The |
183 | /// returned vector, if any, will have the same size as the number of CFG |
184 | /// blocks, with indices corresponding to basic block IDs. Returns an error if |
185 | /// the dataflow analysis cannot be performed successfully. Otherwise, calls |
186 | /// `PostVisitCFG` on each CFG element with the final analysis results at that |
187 | /// program point. |
188 | /// |
189 | /// `MaxBlockVisits` caps the number of block visits during analysis. See |
190 | /// `runTypeErasedDataflowAnalysis` for a full description. The default value is |
191 | /// essentially arbitrary -- large enough to accommodate what seems like any |
192 | /// reasonable CFG, but still small enough to limit the cost of hitting the |
193 | /// limit. |
194 | template <typename AnalysisT> |
195 | llvm::Expected<std::vector< |
196 | std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>> |
197 | runDataflowAnalysis( |
198 | const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv, |
199 | std::function<void(const CFGElement &, const DataflowAnalysisState< |
200 | typename AnalysisT::Lattice> &)> |
201 | PostVisitCFG = nullptr, |
202 | std::int32_t MaxBlockVisits = 20'000) { |
203 | std::function<void(const CFGElement &, |
204 | const TypeErasedDataflowAnalysisState &)> |
205 | PostVisitCFGClosure = nullptr; |
206 | if (PostVisitCFG) { |
207 | PostVisitCFGClosure = [&PostVisitCFG]( |
208 | const CFGElement &Element, |
209 | const TypeErasedDataflowAnalysisState &State) { |
210 | auto *Lattice = |
211 | llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value); |
212 | // FIXME: we should not be copying the environment here! |
213 | // Ultimately the PostVisitCFG only gets a const reference anyway. |
214 | PostVisitCFG(Element, DataflowAnalysisState<typename AnalysisT::Lattice>{ |
215 | *Lattice, State.Env.fork()}); |
216 | }; |
217 | } |
218 | |
219 | auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis( |
220 | ACFG, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits); |
221 | if (!TypeErasedBlockStates) |
222 | return TypeErasedBlockStates.takeError(); |
223 | |
224 | std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> |
225 | BlockStates; |
226 | BlockStates.reserve(TypeErasedBlockStates->size()); |
227 | |
228 | llvm::transform( |
229 | std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates), |
230 | [](auto &OptState) { |
231 | return llvm::transformOptional( |
232 | std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) { |
233 | return DataflowAnalysisState<typename AnalysisT::Lattice>{ |
234 | llvm::any_cast<typename AnalysisT::Lattice>( |
235 | std::move(State.Lattice.Value)), |
236 | std::move(State.Env)}; |
237 | }); |
238 | }); |
239 | return std::move(BlockStates); |
240 | } |
241 | |
242 | // Create an analysis class that is derived from `DataflowAnalysis`. This is an |
243 | // SFINAE adapter that allows us to call two different variants of constructor |
244 | // (either with or without the optional `Environment` parameter). |
245 | // FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment` |
246 | // parameter in their constructor so that we can get rid of this abomination. |
247 | template <typename AnalysisT> |
248 | auto createAnalysis(ASTContext &ASTCtx, Environment &Env) |
249 | -> decltype(AnalysisT(ASTCtx, Env)) { |
250 | return AnalysisT(ASTCtx, Env); |
251 | } |
252 | template <typename AnalysisT> |
253 | auto createAnalysis(ASTContext &ASTCtx, Environment &Env) |
254 | -> decltype(AnalysisT(ASTCtx)) { |
255 | return AnalysisT(ASTCtx); |
256 | } |
257 | |
258 | /// Runs a dataflow analysis over the given function and then runs `Diagnoser` |
259 | /// over the results. Returns a list of diagnostics for `FuncDecl` or an |
260 | /// error. Currently, errors can occur (at least) because the analysis requires |
261 | /// too many iterations over the CFG or the SAT solver times out. |
262 | /// |
263 | /// The default value of `MaxSATIterations` was chosen based on the following |
264 | /// observations: |
265 | /// - Non-pathological calls to the solver typically require only a few hundred |
266 | /// iterations. |
267 | /// - This limit is still low enough to keep runtimes acceptable (on typical |
268 | /// machines) in cases where we hit the limit. |
269 | /// |
270 | /// `MaxBlockVisits` caps the number of block visits during analysis. See |
271 | /// `runDataflowAnalysis` for a full description and explanation of the default |
272 | /// value. |
273 | template <typename AnalysisT, typename Diagnostic> |
274 | llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction( |
275 | const FunctionDecl &FuncDecl, ASTContext &ASTCtx, |
276 | llvm::function_ref<llvm::SmallVector<Diagnostic>( |
277 | const CFGElement &, ASTContext &, |
278 | const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)> |
279 | Diagnoser, |
280 | std::int64_t MaxSATIterations = 1'000'000'000, |
281 | std::int32_t MaxBlockVisits = 20'000) { |
282 | llvm::Expected<AdornedCFG> Context = AdornedCFG::build(Func: FuncDecl); |
283 | if (!Context) |
284 | return Context.takeError(); |
285 | |
286 | auto OwnedSolver = std::make_unique<WatchedLiteralsSolver>(args&: MaxSATIterations); |
287 | const WatchedLiteralsSolver *Solver = OwnedSolver.get(); |
288 | DataflowAnalysisContext AnalysisContext(std::move(OwnedSolver)); |
289 | Environment Env(AnalysisContext, FuncDecl); |
290 | AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env); |
291 | llvm::SmallVector<Diagnostic> Diagnostics; |
292 | if (llvm::Error Err = |
293 | runTypeErasedDataflowAnalysis( |
294 | *Context, Analysis, Env, |
295 | [&ASTCtx, &Diagnoser, &Diagnostics]( |
296 | const CFGElement &Elt, |
297 | const TypeErasedDataflowAnalysisState &State) mutable { |
298 | auto EltDiagnostics = Diagnoser( |
299 | Elt, ASTCtx, |
300 | TransferStateForDiagnostics<typename AnalysisT::Lattice>( |
301 | llvm::any_cast<const typename AnalysisT::Lattice &>( |
302 | State.Lattice.Value), |
303 | State.Env)); |
304 | llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); |
305 | }, |
306 | MaxBlockVisits) |
307 | .takeError()) |
308 | return std::move(Err); |
309 | |
310 | if (Solver->reachedLimit()) |
311 | return llvm::createStringError(EC: llvm::errc::interrupted, |
312 | Msg: "SAT solver timed out" ); |
313 | |
314 | return Diagnostics; |
315 | } |
316 | |
317 | /// Abstract base class for dataflow "models": reusable analysis components that |
318 | /// model a particular aspect of program semantics in the `Environment`. For |
319 | /// example, a model may capture a type and its related functions. |
320 | class DataflowModel : public Environment::ValueModel { |
321 | public: |
322 | /// Return value indicates whether the model processed the `Element`. |
323 | virtual bool transfer(const CFGElement &Element, Environment &Env) = 0; |
324 | }; |
325 | |
326 | } // namespace dataflow |
327 | } // namespace clang |
328 | |
329 | #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H |
330 | |