1//===- PoisonChecking.cpp - -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implements a transform pass which instruments IR such that poison semantics
10// are made explicit. That is, it provides a (possibly partial) executable
11// semantics for every instruction w.r.t. poison as specified in the LLVM
12// LangRef. There are obvious parallels to the sanitizer tools, but this pass
13// is focused purely on the semantics of LLVM IR, not any particular source
14// language. If you're looking for something to see if your C/C++ contains
15// UB, this is not it.
16//
17// The rewritten semantics of each instruction will include the following
18// components:
19//
20// 1) The original instruction, unmodified.
21// 2) A propagation rule which translates dynamic information about the poison
22// state of each input to whether the dynamic output of the instruction
23// produces poison.
24// 3) A creation rule which validates any poison producing flags on the
25// instruction itself (e.g. checks for overflow on nsw).
26// 4) A check rule which traps (to a handler function) if this instruction must
27// execute undefined behavior given the poison state of it's inputs.
28//
29// This is a must analysis based transform; that is, the resulting code may
30// produce a false negative result (not report UB when actually exists
31// according to the LangRef spec), but should never produce a false positive
32// (report UB where it doesn't exist).
33//
34// Use cases for this pass include:
35// - Understanding (and testing!) the implications of the definition of poison
36// from the LangRef.
37// - Validating the output of a IR fuzzer to ensure that all programs produced
38// are well defined on the specific input used.
39// - Finding/confirming poison specific miscompiles by checking the poison
40// status of an input/IR pair is the same before and after an optimization
41// transform.
42// - Checking that a bugpoint reduction does not introduce UB which didn't
43// exist in the original program being reduced.
44//
45// The major sources of inaccuracy are currently:
46// - Most validation rules not yet implemented for instructions with poison
47// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
48// - UB which is control dependent on a branch on poison is not yet
49// reported. Currently, only data flow dependence is modeled.
50// - Poison which is propagated through memory is not modeled. As such,
51// storing poison to memory and then reloading it will cause a false negative
52// as we consider the reloaded value to not be poisoned.
53// - Poison propagation across function boundaries is not modeled. At the
54// moment, all arguments and return values are assumed not to be poison.
55// - Undef is not modeled. In particular, the optimizer's freedom to pick
56// concrete values for undef bits so as to maximize potential for producing
57// poison is not modeled.
58//
59//===----------------------------------------------------------------------===//
60
61#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
62#include "llvm/ADT/DenseMap.h"
63#include "llvm/Analysis/ValueTracking.h"
64#include "llvm/IR/IRBuilder.h"
65#include "llvm/Support/CommandLine.h"
66
67using namespace llvm;
68
69#define DEBUG_TYPE "poison-checking"
70
71static cl::opt<bool>
72LocalCheck("poison-checking-function-local",
73 cl::init(Val: false),
74 cl::desc("Check that returns are non-poison (for testing)"));
75
76
77static bool isConstantFalse(Value* V) {
78 assert(V->getType()->isIntegerTy(1));
79 if (auto *CI = dyn_cast<ConstantInt>(Val: V))
80 return CI->isZero();
81 return false;
82}
83
84static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
85 if (Ops.size() == 0)
86 return B.getFalse();
87 unsigned i = 0;
88 for (; i < Ops.size() && isConstantFalse(V: Ops[i]); i++) {}
89 if (i == Ops.size())
90 return B.getFalse();
91 Value *Accum = Ops[i++];
92 for (Value *Op : llvm::drop_begin(RangeOrContainer&: Ops, N: i))
93 if (!isConstantFalse(V: Op))
94 Accum = B.CreateOr(LHS: Accum, RHS: Op);
95 return Accum;
96}
97
98static void generateCreationChecksForBinOp(Instruction &I,
99 SmallVectorImpl<Value*> &Checks) {
100 assert(isa<BinaryOperator>(I));
101
102 IRBuilder<> B(&I);
103 Value *LHS = I.getOperand(i: 0);
104 Value *RHS = I.getOperand(i: 1);
105 switch (I.getOpcode()) {
106 default:
107 return;
108 case Instruction::Add: {
109 if (I.hasNoSignedWrap()) {
110 auto *OverflowOp =
111 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
112 Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1));
113 }
114 if (I.hasNoUnsignedWrap()) {
115 auto *OverflowOp =
116 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
117 Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1));
118 }
119 break;
120 }
121 case Instruction::Sub: {
122 if (I.hasNoSignedWrap()) {
123 auto *OverflowOp =
124 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
125 Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1));
126 }
127 if (I.hasNoUnsignedWrap()) {
128 auto *OverflowOp =
129 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
130 Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1));
131 }
132 break;
133 }
134 case Instruction::Mul: {
135 if (I.hasNoSignedWrap()) {
136 auto *OverflowOp =
137 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
138 Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1));
139 }
140 if (I.hasNoUnsignedWrap()) {
141 auto *OverflowOp =
142 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
143 Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1));
144 }
145 break;
146 }
147 case Instruction::UDiv: {
148 if (I.isExact()) {
149 auto *Check =
150 B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateURem(LHS, RHS),
151 RHS: ConstantInt::get(Ty: LHS->getType(), V: 0));
152 Checks.push_back(Elt: Check);
153 }
154 break;
155 }
156 case Instruction::SDiv: {
157 if (I.isExact()) {
158 auto *Check =
159 B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateSRem(LHS, RHS),
160 RHS: ConstantInt::get(Ty: LHS->getType(), V: 0));
161 Checks.push_back(Elt: Check);
162 }
163 break;
164 }
165 case Instruction::AShr:
166 case Instruction::LShr:
167 case Instruction::Shl: {
168 Value *ShiftCheck =
169 B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: RHS,
170 RHS: ConstantInt::get(Ty: RHS->getType(),
171 V: LHS->getType()->getScalarSizeInBits()));
172 Checks.push_back(Elt: ShiftCheck);
173 break;
174 }
175 };
176}
177
178/// Given an instruction which can produce poison on non-poison inputs
179/// (i.e. canCreatePoison returns true), generate runtime checks to produce
180/// boolean indicators of when poison would result.
181static void generateCreationChecks(Instruction &I,
182 SmallVectorImpl<Value*> &Checks) {
183 IRBuilder<> B(&I);
184 if (isa<BinaryOperator>(Val: I) && !I.getType()->isVectorTy())
185 generateCreationChecksForBinOp(I, Checks);
186
187 // Handle non-binops separately
188 switch (I.getOpcode()) {
189 default:
190 // Note there are a couple of missing cases here, once implemented, this
191 // should become an llvm_unreachable.
192 break;
193 case Instruction::ExtractElement: {
194 Value *Vec = I.getOperand(i: 0);
195 auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
196 if (!VecVTy)
197 break;
198 Value *Idx = I.getOperand(i: 1);
199 unsigned NumElts = VecVTy->getNumElements();
200 Value *Check =
201 B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx,
202 RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts));
203 Checks.push_back(Elt: Check);
204 break;
205 }
206 case Instruction::InsertElement: {
207 Value *Vec = I.getOperand(i: 0);
208 auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
209 if (!VecVTy)
210 break;
211 Value *Idx = I.getOperand(i: 2);
212 unsigned NumElts = VecVTy->getNumElements();
213 Value *Check =
214 B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx,
215 RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts));
216 Checks.push_back(Elt: Check);
217 break;
218 }
219 };
220}
221
222static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
223 auto Itr = ValToPoison.find(Val: V);
224 if (Itr != ValToPoison.end())
225 return Itr->second;
226 if (isa<Constant>(Val: V)) {
227 return ConstantInt::getFalse(Context&: V->getContext());
228 }
229 // Return false for unknwon values - this implements a non-strict mode where
230 // unhandled IR constructs are simply considered to never produce poison. At
231 // some point in the future, we probably want a "strict mode" for testing if
232 // nothing else.
233 return ConstantInt::getFalse(Context&: V->getContext());
234}
235
236static void CreateAssert(IRBuilder<> &B, Value *Cond) {
237 assert(Cond->getType()->isIntegerTy(1));
238 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond))
239 if (CI->isAllOnesValue())
240 return;
241
242 Module *M = B.GetInsertBlock()->getModule();
243 M->getOrInsertFunction(Name: "__poison_checker_assert",
244 RetTy: Type::getVoidTy(C&: M->getContext()),
245 Args: Type::getInt1Ty(C&: M->getContext()));
246 Function *TrapFunc = M->getFunction(Name: "__poison_checker_assert");
247 B.CreateCall(Callee: TrapFunc, Args: Cond);
248}
249
250static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
251 assert(Cond->getType()->isIntegerTy(1));
252 CreateAssert(B, Cond: B.CreateNot(V: Cond));
253}
254
255static bool rewrite(Function &F) {
256 auto * const Int1Ty = Type::getInt1Ty(C&: F.getContext());
257
258 DenseMap<Value *, Value *> ValToPoison;
259
260 for (BasicBlock &BB : F)
261 for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I++) {
262 auto *OldPHI = cast<PHINode>(Val: &*I);
263 auto *NewPHI = PHINode::Create(Ty: Int1Ty, NumReservedValues: OldPHI->getNumIncomingValues());
264 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
265 NewPHI->addIncoming(V: UndefValue::get(T: Int1Ty),
266 BB: OldPHI->getIncomingBlock(i));
267 NewPHI->insertBefore(InsertPos: OldPHI);
268 ValToPoison[OldPHI] = NewPHI;
269 }
270
271 for (BasicBlock &BB : F)
272 for (Instruction &I : BB) {
273 if (isa<PHINode>(Val: I)) continue;
274
275 IRBuilder<> B(cast<Instruction>(Val: &I));
276
277 // Note: There are many more sources of documented UB, but this pass only
278 // attempts to find UB triggered by propagation of poison.
279 SmallVector<const Value *, 4> NonPoisonOps;
280 SmallPtrSet<const Value *, 4> SeenNonPoisonOps;
281 getGuaranteedNonPoisonOps(I: &I, Ops&: NonPoisonOps);
282 for (const Value *Op : NonPoisonOps)
283 if (SeenNonPoisonOps.insert(Ptr: Op).second)
284 CreateAssertNot(B,
285 Cond: getPoisonFor(ValToPoison, V: const_cast<Value *>(Op)));
286
287 if (LocalCheck)
288 if (auto *RI = dyn_cast<ReturnInst>(Val: &I))
289 if (RI->getNumOperands() != 0) {
290 Value *Op = RI->getOperand(i_nocapture: 0);
291 CreateAssertNot(B, Cond: getPoisonFor(ValToPoison, V: Op));
292 }
293
294 SmallVector<Value*, 4> Checks;
295 for (const Use &U : I.operands()) {
296 if (ValToPoison.count(Val: U) && propagatesPoison(PoisonOp: U))
297 Checks.push_back(Elt: getPoisonFor(ValToPoison, V: U));
298 }
299
300 if (canCreatePoison(Op: cast<Operator>(Val: &I)))
301 generateCreationChecks(I, Checks);
302 ValToPoison[&I] = buildOrChain(B, Ops: Checks);
303 }
304
305 for (BasicBlock &BB : F)
306 for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I++) {
307 auto *OldPHI = cast<PHINode>(Val: &*I);
308 if (!ValToPoison.count(Val: OldPHI))
309 continue; // skip the newly inserted phis
310 auto *NewPHI = cast<PHINode>(Val: ValToPoison[OldPHI]);
311 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
312 auto *OldVal = OldPHI->getIncomingValue(i);
313 NewPHI->setIncomingValue(i, V: getPoisonFor(ValToPoison, V: OldVal));
314 }
315 }
316 return true;
317}
318
319
320PreservedAnalyses PoisonCheckingPass::run(Module &M,
321 ModuleAnalysisManager &AM) {
322 bool Changed = false;
323 for (auto &F : M)
324 Changed |= rewrite(F);
325
326 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
327}
328
329PreservedAnalyses PoisonCheckingPass::run(Function &F,
330 FunctionAnalysisManager &AM) {
331 return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
332}
333
334/* Major TODO Items:
335 - Control dependent poison UB
336 - Strict mode - (i.e. must analyze every operand)
337 - Poison through memory
338 - Function ABIs
339 - Full coverage of intrinsics, etc.. (ouch)
340
341 Instructions w/Unclear Semantics:
342 - shufflevector - It would seem reasonable for an out of bounds mask element
343 to produce poison, but the LangRef does not state.
344 - all binary ops w/vector operands - The likely interpretation would be that
345 any element overflowing should produce poison for the entire result, but
346 the LangRef does not state.
347 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
348 strange that only certian flags should be documented as producing poison.
349
350 Cases of clear poison semantics not yet implemented:
351 - Exact flags on ashr/lshr produce poison
352 - NSW/NUW flags on shl produce poison
353 - Inbounds flag on getelementptr produce poison
354 - fptosi/fptoui (out of bounds input) produce poison
355 - Scalable vector types for insertelement/extractelement
356 - Floating point binary ops w/fmf nnan/noinfs flags produce poison
357 */
358

source code of llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp