1//===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines SymbolManager, a class that manages symbolic values
10// created for use by ExprEngine and related classes.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
15#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
16
17#include "clang/AST/Expr.h"
18#include "clang/AST/Type.h"
19#include "clang/Analysis/AnalysisDeclContext.h"
20#include "clang/Basic/LLVM.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/FoldingSet.h"
27#include "llvm/Support/Allocator.h"
28#include <cassert>
29
30namespace clang {
31
32class ASTContext;
33class Stmt;
34
35namespace ento {
36
37class BasicValueFactory;
38class StoreManager;
39
40///A symbol representing the value stored at a MemRegion.
41class SymbolRegionValue : public SymbolData {
42 const TypedValueRegion *R;
43
44public:
45 SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
46 : SymbolData(SymbolRegionValueKind, sym), R(r) {
47 assert(r);
48 assert(isValidTypeForSymbol(r->getValueType()));
49 }
50
51 const TypedValueRegion* getRegion() const { return R; }
52
53 static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
54 profile.AddInteger((unsigned) SymbolRegionValueKind);
55 profile.AddPointer(R);
56 }
57
58 void Profile(llvm::FoldingSetNodeID& profile) override {
59 Profile(profile, R);
60 }
61
62 StringRef getKindStr() const override;
63
64 void dumpToStream(raw_ostream &os) const override;
65 const MemRegion *getOriginRegion() const override { return getRegion(); }
66
67 QualType getType() const override;
68
69 // Implement isa<T> support.
70 static bool classof(const SymExpr *SE) {
71 return SE->getKind() == SymbolRegionValueKind;
72 }
73};
74
75/// A symbol representing the result of an expression in the case when we do
76/// not know anything about what the expression is.
77class SymbolConjured : public SymbolData {
78 const Stmt *S;
79 QualType T;
80 unsigned Count;
81 const LocationContext *LCtx;
82 const void *SymbolTag;
83
84public:
85 SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
86 QualType t, unsigned count, const void *symbolTag)
87 : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
88 LCtx(lctx), SymbolTag(symbolTag) {
89 // FIXME: 's' might be a nullptr if we're conducting invalidation
90 // that was caused by a destructor call on a temporary object,
91 // which has no statement associated with it.
92 // Due to this, we might be creating the same invalidation symbol for
93 // two different invalidation passes (for two different temporaries).
94 assert(lctx);
95 assert(isValidTypeForSymbol(t));
96 }
97
98 const Stmt *getStmt() const { return S; }
99 unsigned getCount() const { return Count; }
100 const void *getTag() const { return SymbolTag; }
101
102 QualType getType() const override;
103
104 StringRef getKindStr() const override;
105
106 void dumpToStream(raw_ostream &os) const override;
107
108 static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
109 QualType T, unsigned Count, const LocationContext *LCtx,
110 const void *SymbolTag) {
111 profile.AddInteger((unsigned) SymbolConjuredKind);
112 profile.AddPointer(S);
113 profile.AddPointer(LCtx);
114 profile.Add(T);
115 profile.AddInteger(Count);
116 profile.AddPointer(SymbolTag);
117 }
118
119 void Profile(llvm::FoldingSetNodeID& profile) override {
120 Profile(profile, S, T, Count, LCtx, SymbolTag);
121 }
122
123 // Implement isa<T> support.
124 static bool classof(const SymExpr *SE) {
125 return SE->getKind() == SymbolConjuredKind;
126 }
127};
128
129/// A symbol representing the value of a MemRegion whose parent region has
130/// symbolic value.
131class SymbolDerived : public SymbolData {
132 SymbolRef parentSymbol;
133 const TypedValueRegion *R;
134
135public:
136 SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
137 : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
138 assert(parent);
139 assert(r);
140 assert(isValidTypeForSymbol(r->getValueType()));
141 }
142
143 SymbolRef getParentSymbol() const { return parentSymbol; }
144 const TypedValueRegion *getRegion() const { return R; }
145
146 QualType getType() const override;
147
148 StringRef getKindStr() const override;
149
150 void dumpToStream(raw_ostream &os) const override;
151 const MemRegion *getOriginRegion() const override { return getRegion(); }
152
153 static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
154 const TypedValueRegion *r) {
155 profile.AddInteger((unsigned) SymbolDerivedKind);
156 profile.AddPointer(r);
157 profile.AddPointer(parent);
158 }
159
160 void Profile(llvm::FoldingSetNodeID& profile) override {
161 Profile(profile, parentSymbol, R);
162 }
163
164 // Implement isa<T> support.
165 static bool classof(const SymExpr *SE) {
166 return SE->getKind() == SymbolDerivedKind;
167 }
168};
169
170/// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
171/// Clients should not ask the SymbolManager for a region's extent. Always use
172/// SubRegion::getExtent instead -- the value returned may not be a symbol.
173class SymbolExtent : public SymbolData {
174 const SubRegion *R;
175
176public:
177 SymbolExtent(SymbolID sym, const SubRegion *r)
178 : SymbolData(SymbolExtentKind, sym), R(r) {
179 assert(r);
180 }
181
182 const SubRegion *getRegion() const { return R; }
183
184 QualType getType() const override;
185
186 StringRef getKindStr() const override;
187
188 void dumpToStream(raw_ostream &os) const override;
189
190 static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
191 profile.AddInteger((unsigned) SymbolExtentKind);
192 profile.AddPointer(R);
193 }
194
195 void Profile(llvm::FoldingSetNodeID& profile) override {
196 Profile(profile, R);
197 }
198
199 // Implement isa<T> support.
200 static bool classof(const SymExpr *SE) {
201 return SE->getKind() == SymbolExtentKind;
202 }
203};
204
205/// SymbolMetadata - Represents path-dependent metadata about a specific region.
206/// Metadata symbols remain live as long as they are marked as in use before
207/// dead-symbol sweeping AND their associated regions are still alive.
208/// Intended for use by checkers.
209class SymbolMetadata : public SymbolData {
210 const MemRegion* R;
211 const Stmt *S;
212 QualType T;
213 const LocationContext *LCtx;
214 unsigned Count;
215 const void *Tag;
216
217public:
218 SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
219 const LocationContext *LCtx, unsigned count, const void *tag)
220 : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
221 Count(count), Tag(tag) {
222 assert(r);
223 assert(s);
224 assert(isValidTypeForSymbol(t));
225 assert(LCtx);
226 assert(tag);
227 }
228
229 const MemRegion *getRegion() const { return R; }
230 const Stmt *getStmt() const { return S; }
231 const LocationContext *getLocationContext() const { return LCtx; }
232 unsigned getCount() const { return Count; }
233 const void *getTag() const { return Tag; }
234
235 QualType getType() const override;
236
237 StringRef getKindStr() const override;
238
239 void dumpToStream(raw_ostream &os) const override;
240
241 static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
242 const Stmt *S, QualType T, const LocationContext *LCtx,
243 unsigned Count, const void *Tag) {
244 profile.AddInteger((unsigned) SymbolMetadataKind);
245 profile.AddPointer(R);
246 profile.AddPointer(S);
247 profile.Add(T);
248 profile.AddPointer(LCtx);
249 profile.AddInteger(Count);
250 profile.AddPointer(Tag);
251 }
252
253 void Profile(llvm::FoldingSetNodeID& profile) override {
254 Profile(profile, R, S, T, LCtx, Count, Tag);
255 }
256
257 // Implement isa<T> support.
258 static bool classof(const SymExpr *SE) {
259 return SE->getKind() == SymbolMetadataKind;
260 }
261};
262
263/// Represents a cast expression.
264class SymbolCast : public SymExpr {
265 const SymExpr *Operand;
266
267 /// Type of the operand.
268 QualType FromTy;
269
270 /// The type of the result.
271 QualType ToTy;
272
273public:
274 SymbolCast(const SymExpr *In, QualType From, QualType To)
275 : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
276 assert(In);
277 assert(isValidTypeForSymbol(From));
278 // FIXME: GenericTaintChecker creates symbols of void type.
279 // Otherwise, 'To' should also be a valid type.
280 }
281
282 unsigned computeComplexity() const override {
283 if (Complexity == 0)
284 Complexity = 1 + Operand->computeComplexity();
285 return Complexity;
286 }
287
288 QualType getType() const override { return ToTy; }
289
290 const SymExpr *getOperand() const { return Operand; }
291
292 void dumpToStream(raw_ostream &os) const override;
293
294 static void Profile(llvm::FoldingSetNodeID& ID,
295 const SymExpr *In, QualType From, QualType To) {
296 ID.AddInteger((unsigned) SymbolCastKind);
297 ID.AddPointer(In);
298 ID.Add(From);
299 ID.Add(To);
300 }
301
302 void Profile(llvm::FoldingSetNodeID& ID) override {
303 Profile(ID, Operand, FromTy, ToTy);
304 }
305
306 // Implement isa<T> support.
307 static bool classof(const SymExpr *SE) {
308 return SE->getKind() == SymbolCastKind;
309 }
310};
311
312/// Represents a symbolic expression involving a binary operator
313class BinarySymExpr : public SymExpr {
314 BinaryOperator::Opcode Op;
315 QualType T;
316
317protected:
318 BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
319 : SymExpr(k), Op(op), T(t) {
320 assert(classof(this));
321 // Binary expressions are results of arithmetic. Pointer arithmetic is not
322 // handled by binary expressions, but it is instead handled by applying
323 // sub-regions to regions.
324 assert(isValidTypeForSymbol(t) && !Loc::isLocType(t));
325 }
326
327public:
328 // FIXME: We probably need to make this out-of-line to avoid redundant
329 // generation of virtual functions.
330 QualType getType() const override { return T; }
331
332 BinaryOperator::Opcode getOpcode() const { return Op; }
333
334 // Implement isa<T> support.
335 static bool classof(const SymExpr *SE) {
336 Kind k = SE->getKind();
337 return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS;
338 }
339
340protected:
341 static unsigned computeOperandComplexity(const SymExpr *Value) {
342 return Value->computeComplexity();
343 }
344 static unsigned computeOperandComplexity(const llvm::APSInt &Value) {
345 return 1;
346 }
347
348 static const llvm::APSInt *getPointer(const llvm::APSInt &Value) {
349 return &Value;
350 }
351 static const SymExpr *getPointer(const SymExpr *Value) { return Value; }
352
353 static void dumpToStreamImpl(raw_ostream &os, const SymExpr *Value);
354 static void dumpToStreamImpl(raw_ostream &os, const llvm::APSInt &Value);
355 static void dumpToStreamImpl(raw_ostream &os, BinaryOperator::Opcode op);
356};
357
358/// Template implementation for all binary symbolic expressions
359template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassKind>
360class BinarySymExprImpl : public BinarySymExpr {
361 LHSTYPE LHS;
362 RHSTYPE RHS;
363
364public:
365 BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
366 QualType t)
367 : BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
368 assert(getPointer(lhs));
369 assert(getPointer(rhs));
370 }
371
372 void dumpToStream(raw_ostream &os) const override {
373 dumpToStreamImpl(os, LHS);
374 dumpToStreamImpl(os, getOpcode());
375 dumpToStreamImpl(os, RHS);
376 }
377
378 LHSTYPE getLHS() const { return LHS; }
379 RHSTYPE getRHS() const { return RHS; }
380
381 unsigned computeComplexity() const override {
382 if (Complexity == 0)
383 Complexity =
384 computeOperandComplexity(RHS) + computeOperandComplexity(LHS);
385 return Complexity;
386 }
387
388 static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs,
389 BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) {
390 ID.AddInteger((unsigned)ClassKind);
391 ID.AddPointer(getPointer(lhs));
392 ID.AddInteger(op);
393 ID.AddPointer(getPointer(rhs));
394 ID.Add(t);
395 }
396
397 void Profile(llvm::FoldingSetNodeID &ID) override {
398 Profile(ID, LHS, getOpcode(), RHS, getType());
399 }
400
401 // Implement isa<T> support.
402 static bool classof(const SymExpr *SE) { return SE->getKind() == ClassKind; }
403};
404
405/// Represents a symbolic expression like 'x' + 3.
406using SymIntExpr = BinarySymExprImpl<const SymExpr *, const llvm::APSInt &,
407 SymExpr::Kind::SymIntExprKind>;
408
409/// Represents a symbolic expression like 3 - 'x'.
410using IntSymExpr = BinarySymExprImpl<const llvm::APSInt &, const SymExpr *,
411 SymExpr::Kind::IntSymExprKind>;
412
413/// Represents a symbolic expression like 'x' + 'y'.
414using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *,
415 SymExpr::Kind::SymSymExprKind>;
416
417class SymbolManager {
418 using DataSetTy = llvm::FoldingSet<SymExpr>;
419 using SymbolDependTy =
420 llvm::DenseMap<SymbolRef, std::unique_ptr<SymbolRefSmallVectorTy>>;
421
422 DataSetTy DataSet;
423
424 /// Stores the extra dependencies between symbols: the data should be kept
425 /// alive as long as the key is live.
426 SymbolDependTy SymbolDependencies;
427
428 unsigned SymbolCounter = 0;
429 llvm::BumpPtrAllocator& BPAlloc;
430 BasicValueFactory &BV;
431 ASTContext &Ctx;
432
433public:
434 SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
435 llvm::BumpPtrAllocator& bpalloc)
436 : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
437
438 static bool canSymbolicate(QualType T);
439
440 /// Make a unique symbol for MemRegion R according to its kind.
441 const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
442
443 const SymbolConjured* conjureSymbol(const Stmt *E,
444 const LocationContext *LCtx,
445 QualType T,
446 unsigned VisitCount,
447 const void *SymbolTag = nullptr);
448
449 const SymbolConjured* conjureSymbol(const Expr *E,
450 const LocationContext *LCtx,
451 unsigned VisitCount,
452 const void *SymbolTag = nullptr) {
453 return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag);
454 }
455
456 const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
457 const TypedValueRegion *R);
458
459 const SymbolExtent *getExtentSymbol(const SubRegion *R);
460
461 /// Creates a metadata symbol associated with a specific region.
462 ///
463 /// VisitCount can be used to differentiate regions corresponding to
464 /// different loop iterations, thus, making the symbol path-dependent.
465 const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S,
466 QualType T,
467 const LocationContext *LCtx,
468 unsigned VisitCount,
469 const void *SymbolTag = nullptr);
470
471 const SymbolCast* getCastSymbol(const SymExpr *Operand,
472 QualType From, QualType To);
473
474 const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
475 const llvm::APSInt& rhs, QualType t);
476
477 const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
478 const llvm::APSInt& rhs, QualType t) {
479 return getSymIntExpr(&lhs, op, rhs, t);
480 }
481
482 const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
483 BinaryOperator::Opcode op,
484 const SymExpr *rhs, QualType t);
485
486 const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
487 const SymExpr *rhs, QualType t);
488
489 QualType getType(const SymExpr *SE) const {
490 return SE->getType();
491 }
492
493 /// Add artificial symbol dependency.
494 ///
495 /// The dependent symbol should stay alive as long as the primary is alive.
496 void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
497
498 const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
499
500 ASTContext &getContext() { return Ctx; }
501 BasicValueFactory &getBasicVals() { return BV; }
502};
503
504/// A class responsible for cleaning up unused symbols.
505class SymbolReaper {
506 enum SymbolStatus {
507 NotProcessed,
508 HaveMarkedDependents
509 };
510
511 using SymbolSetTy = llvm::DenseSet<SymbolRef>;
512 using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>;
513 using RegionSetTy = llvm::DenseSet<const MemRegion *>;
514
515 SymbolMapTy TheLiving;
516 SymbolSetTy MetadataInUse;
517
518 RegionSetTy RegionRoots;
519
520 const StackFrameContext *LCtx;
521 const Stmt *Loc;
522 SymbolManager& SymMgr;
523 StoreRef reapedStore;
524 llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
525
526public:
527 /// Construct a reaper object, which removes everything which is not
528 /// live before we execute statement s in the given location context.
529 ///
530 /// If the statement is NULL, everything is this and parent contexts is
531 /// considered live.
532 /// If the stack frame context is NULL, everything on stack is considered
533 /// dead.
534 SymbolReaper(const StackFrameContext *Ctx, const Stmt *s,
535 SymbolManager &symmgr, StoreManager &storeMgr)
536 : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {}
537
538 const LocationContext *getLocationContext() const { return LCtx; }
539
540 bool isLive(SymbolRef sym);
541 bool isLiveRegion(const MemRegion *region);
542 bool isLive(const Expr *ExprVal, const LocationContext *LCtx) const;
543 bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
544
545 /// Unconditionally marks a symbol as live.
546 ///
547 /// This should never be
548 /// used by checkers, only by the state infrastructure such as the store and
549 /// environment. Checkers should instead use metadata symbols and markInUse.
550 void markLive(SymbolRef sym);
551
552 /// Marks a symbol as important to a checker.
553 ///
554 /// For metadata symbols,
555 /// this will keep the symbol alive as long as its associated region is also
556 /// live. For other symbols, this has no effect; checkers are not permitted
557 /// to influence the life of other symbols. This should be used before any
558 /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
559 void markInUse(SymbolRef sym);
560
561 using region_iterator = RegionSetTy::const_iterator;
562
563 region_iterator region_begin() const { return RegionRoots.begin(); }
564 region_iterator region_end() const { return RegionRoots.end(); }
565
566 /// Returns whether or not a symbol has been confirmed dead.
567 ///
568 /// This should only be called once all marking of dead symbols has completed.
569 /// (For checkers, this means only in the checkDeadSymbols callback.)
570 bool isDead(SymbolRef sym) {
571 return !isLive(sym);
572 }
573
574 void markLive(const MemRegion *region);
575 void markElementIndicesLive(const MemRegion *region);
576
577 /// Set to the value of the symbolic store after
578 /// StoreManager::removeDeadBindings has been called.
579 void setReapedStore(StoreRef st) { reapedStore = st; }
580
581private:
582 /// Mark the symbols dependent on the input symbol as live.
583 void markDependentsLive(SymbolRef sym);
584};
585
586class SymbolVisitor {
587protected:
588 ~SymbolVisitor() = default;
589
590public:
591 SymbolVisitor() = default;
592 SymbolVisitor(const SymbolVisitor &) = default;
593 SymbolVisitor(SymbolVisitor &&) {}
594
595 /// A visitor method invoked by ProgramStateManager::scanReachableSymbols.
596 ///
597 /// The method returns \c true if symbols should continue be scanned and \c
598 /// false otherwise.
599 virtual bool VisitSymbol(SymbolRef sym) = 0;
600 virtual bool VisitMemRegion(const MemRegion *) { return true; }
601};
602
603} // namespace ento
604
605} // namespace clang
606
607#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
608