1 | //===- PoisonChecking.cpp - -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implements a transform pass which instruments IR such that poison semantics |
10 | // are made explicit. That is, it provides a (possibly partial) executable |
11 | // semantics for every instruction w.r.t. poison as specified in the LLVM |
12 | // LangRef. There are obvious parallels to the sanitizer tools, but this pass |
13 | // is focused purely on the semantics of LLVM IR, not any particular source |
14 | // language. If you're looking for something to see if your C/C++ contains |
15 | // UB, this is not it. |
16 | // |
17 | // The rewritten semantics of each instruction will include the following |
18 | // components: |
19 | // |
20 | // 1) The original instruction, unmodified. |
21 | // 2) A propagation rule which translates dynamic information about the poison |
22 | // state of each input to whether the dynamic output of the instruction |
23 | // produces poison. |
24 | // 3) A creation rule which validates any poison producing flags on the |
25 | // instruction itself (e.g. checks for overflow on nsw). |
26 | // 4) A check rule which traps (to a handler function) if this instruction must |
27 | // execute undefined behavior given the poison state of it's inputs. |
28 | // |
29 | // This is a must analysis based transform; that is, the resulting code may |
30 | // produce a false negative result (not report UB when actually exists |
31 | // according to the LangRef spec), but should never produce a false positive |
32 | // (report UB where it doesn't exist). |
33 | // |
34 | // Use cases for this pass include: |
35 | // - Understanding (and testing!) the implications of the definition of poison |
36 | // from the LangRef. |
37 | // - Validating the output of a IR fuzzer to ensure that all programs produced |
38 | // are well defined on the specific input used. |
39 | // - Finding/confirming poison specific miscompiles by checking the poison |
40 | // status of an input/IR pair is the same before and after an optimization |
41 | // transform. |
42 | // - Checking that a bugpoint reduction does not introduce UB which didn't |
43 | // exist in the original program being reduced. |
44 | // |
45 | // The major sources of inaccuracy are currently: |
46 | // - Most validation rules not yet implemented for instructions with poison |
47 | // relavant flags. At the moment, only nsw/nuw on add/sub are supported. |
48 | // - UB which is control dependent on a branch on poison is not yet |
49 | // reported. Currently, only data flow dependence is modeled. |
50 | // - Poison which is propagated through memory is not modeled. As such, |
51 | // storing poison to memory and then reloading it will cause a false negative |
52 | // as we consider the reloaded value to not be poisoned. |
53 | // - Poison propagation across function boundaries is not modeled. At the |
54 | // moment, all arguments and return values are assumed not to be poison. |
55 | // - Undef is not modeled. In particular, the optimizer's freedom to pick |
56 | // concrete values for undef bits so as to maximize potential for producing |
57 | // poison is not modeled. |
58 | // |
59 | //===----------------------------------------------------------------------===// |
60 | |
61 | #include "llvm/Transforms/Instrumentation/PoisonChecking.h" |
62 | #include "llvm/ADT/DenseMap.h" |
63 | #include "llvm/Analysis/ValueTracking.h" |
64 | #include "llvm/IR/IRBuilder.h" |
65 | #include "llvm/Support/CommandLine.h" |
66 | |
67 | using namespace llvm; |
68 | |
69 | #define DEBUG_TYPE "poison-checking" |
70 | |
71 | static cl::opt<bool> |
72 | LocalCheck("poison-checking-function-local" , |
73 | cl::init(Val: false), |
74 | cl::desc("Check that returns are non-poison (for testing)" )); |
75 | |
76 | |
77 | static bool isConstantFalse(Value* V) { |
78 | assert(V->getType()->isIntegerTy(1)); |
79 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
80 | return CI->isZero(); |
81 | return false; |
82 | } |
83 | |
84 | static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { |
85 | if (Ops.size() == 0) |
86 | return B.getFalse(); |
87 | unsigned i = 0; |
88 | for (; i < Ops.size() && isConstantFalse(V: Ops[i]); i++) {} |
89 | if (i == Ops.size()) |
90 | return B.getFalse(); |
91 | Value *Accum = Ops[i++]; |
92 | for (Value *Op : llvm::drop_begin(RangeOrContainer&: Ops, N: i)) |
93 | if (!isConstantFalse(V: Op)) |
94 | Accum = B.CreateOr(LHS: Accum, RHS: Op); |
95 | return Accum; |
96 | } |
97 | |
98 | static void generateCreationChecksForBinOp(Instruction &I, |
99 | SmallVectorImpl<Value*> &Checks) { |
100 | assert(isa<BinaryOperator>(I)); |
101 | |
102 | IRBuilder<> B(&I); |
103 | Value *LHS = I.getOperand(i: 0); |
104 | Value *RHS = I.getOperand(i: 1); |
105 | switch (I.getOpcode()) { |
106 | default: |
107 | return; |
108 | case Instruction::Add: { |
109 | if (I.hasNoSignedWrap()) { |
110 | auto *OverflowOp = |
111 | B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); |
112 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
113 | } |
114 | if (I.hasNoUnsignedWrap()) { |
115 | auto *OverflowOp = |
116 | B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); |
117 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
118 | } |
119 | break; |
120 | } |
121 | case Instruction::Sub: { |
122 | if (I.hasNoSignedWrap()) { |
123 | auto *OverflowOp = |
124 | B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); |
125 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
126 | } |
127 | if (I.hasNoUnsignedWrap()) { |
128 | auto *OverflowOp = |
129 | B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); |
130 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
131 | } |
132 | break; |
133 | } |
134 | case Instruction::Mul: { |
135 | if (I.hasNoSignedWrap()) { |
136 | auto *OverflowOp = |
137 | B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); |
138 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
139 | } |
140 | if (I.hasNoUnsignedWrap()) { |
141 | auto *OverflowOp = |
142 | B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); |
143 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
144 | } |
145 | break; |
146 | } |
147 | case Instruction::UDiv: { |
148 | if (I.isExact()) { |
149 | auto *Check = |
150 | B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateURem(LHS, RHS), |
151 | RHS: ConstantInt::get(Ty: LHS->getType(), V: 0)); |
152 | Checks.push_back(Elt: Check); |
153 | } |
154 | break; |
155 | } |
156 | case Instruction::SDiv: { |
157 | if (I.isExact()) { |
158 | auto *Check = |
159 | B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateSRem(LHS, RHS), |
160 | RHS: ConstantInt::get(Ty: LHS->getType(), V: 0)); |
161 | Checks.push_back(Elt: Check); |
162 | } |
163 | break; |
164 | } |
165 | case Instruction::AShr: |
166 | case Instruction::LShr: |
167 | case Instruction::Shl: { |
168 | Value *ShiftCheck = |
169 | B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: RHS, |
170 | RHS: ConstantInt::get(Ty: RHS->getType(), |
171 | V: LHS->getType()->getScalarSizeInBits())); |
172 | Checks.push_back(Elt: ShiftCheck); |
173 | break; |
174 | } |
175 | }; |
176 | } |
177 | |
178 | /// Given an instruction which can produce poison on non-poison inputs |
179 | /// (i.e. canCreatePoison returns true), generate runtime checks to produce |
180 | /// boolean indicators of when poison would result. |
181 | static void generateCreationChecks(Instruction &I, |
182 | SmallVectorImpl<Value*> &Checks) { |
183 | IRBuilder<> B(&I); |
184 | if (isa<BinaryOperator>(Val: I) && !I.getType()->isVectorTy()) |
185 | generateCreationChecksForBinOp(I, Checks); |
186 | |
187 | // Handle non-binops separately |
188 | switch (I.getOpcode()) { |
189 | default: |
190 | // Note there are a couple of missing cases here, once implemented, this |
191 | // should become an llvm_unreachable. |
192 | break; |
193 | case Instruction::ExtractElement: { |
194 | Value *Vec = I.getOperand(i: 0); |
195 | auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType()); |
196 | if (!VecVTy) |
197 | break; |
198 | Value *Idx = I.getOperand(i: 1); |
199 | unsigned NumElts = VecVTy->getNumElements(); |
200 | Value *Check = |
201 | B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx, |
202 | RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts)); |
203 | Checks.push_back(Elt: Check); |
204 | break; |
205 | } |
206 | case Instruction::InsertElement: { |
207 | Value *Vec = I.getOperand(i: 0); |
208 | auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType()); |
209 | if (!VecVTy) |
210 | break; |
211 | Value *Idx = I.getOperand(i: 2); |
212 | unsigned NumElts = VecVTy->getNumElements(); |
213 | Value *Check = |
214 | B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx, |
215 | RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts)); |
216 | Checks.push_back(Elt: Check); |
217 | break; |
218 | } |
219 | }; |
220 | } |
221 | |
222 | static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { |
223 | auto Itr = ValToPoison.find(Val: V); |
224 | if (Itr != ValToPoison.end()) |
225 | return Itr->second; |
226 | if (isa<Constant>(Val: V)) { |
227 | return ConstantInt::getFalse(Context&: V->getContext()); |
228 | } |
229 | // Return false for unknwon values - this implements a non-strict mode where |
230 | // unhandled IR constructs are simply considered to never produce poison. At |
231 | // some point in the future, we probably want a "strict mode" for testing if |
232 | // nothing else. |
233 | return ConstantInt::getFalse(Context&: V->getContext()); |
234 | } |
235 | |
236 | static void CreateAssert(IRBuilder<> &B, Value *Cond) { |
237 | assert(Cond->getType()->isIntegerTy(1)); |
238 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) |
239 | if (CI->isAllOnesValue()) |
240 | return; |
241 | |
242 | Module *M = B.GetInsertBlock()->getModule(); |
243 | M->getOrInsertFunction(Name: "__poison_checker_assert" , |
244 | RetTy: Type::getVoidTy(C&: M->getContext()), |
245 | Args: Type::getInt1Ty(C&: M->getContext())); |
246 | Function *TrapFunc = M->getFunction(Name: "__poison_checker_assert" ); |
247 | B.CreateCall(Callee: TrapFunc, Args: Cond); |
248 | } |
249 | |
250 | static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { |
251 | assert(Cond->getType()->isIntegerTy(1)); |
252 | CreateAssert(B, Cond: B.CreateNot(V: Cond)); |
253 | } |
254 | |
255 | static bool rewrite(Function &F) { |
256 | auto * const Int1Ty = Type::getInt1Ty(C&: F.getContext()); |
257 | |
258 | DenseMap<Value *, Value *> ValToPoison; |
259 | |
260 | for (BasicBlock &BB : F) |
261 | for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I++) { |
262 | auto *OldPHI = cast<PHINode>(Val: &*I); |
263 | auto *NewPHI = PHINode::Create(Ty: Int1Ty, NumReservedValues: OldPHI->getNumIncomingValues()); |
264 | for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) |
265 | NewPHI->addIncoming(V: UndefValue::get(T: Int1Ty), |
266 | BB: OldPHI->getIncomingBlock(i)); |
267 | NewPHI->insertBefore(InsertPos: OldPHI); |
268 | ValToPoison[OldPHI] = NewPHI; |
269 | } |
270 | |
271 | for (BasicBlock &BB : F) |
272 | for (Instruction &I : BB) { |
273 | if (isa<PHINode>(Val: I)) continue; |
274 | |
275 | IRBuilder<> B(cast<Instruction>(Val: &I)); |
276 | |
277 | // Note: There are many more sources of documented UB, but this pass only |
278 | // attempts to find UB triggered by propagation of poison. |
279 | SmallVector<const Value *, 4> NonPoisonOps; |
280 | SmallPtrSet<const Value *, 4> SeenNonPoisonOps; |
281 | getGuaranteedNonPoisonOps(I: &I, Ops&: NonPoisonOps); |
282 | for (const Value *Op : NonPoisonOps) |
283 | if (SeenNonPoisonOps.insert(Ptr: Op).second) |
284 | CreateAssertNot(B, |
285 | Cond: getPoisonFor(ValToPoison, V: const_cast<Value *>(Op))); |
286 | |
287 | if (LocalCheck) |
288 | if (auto *RI = dyn_cast<ReturnInst>(Val: &I)) |
289 | if (RI->getNumOperands() != 0) { |
290 | Value *Op = RI->getOperand(i_nocapture: 0); |
291 | CreateAssertNot(B, Cond: getPoisonFor(ValToPoison, V: Op)); |
292 | } |
293 | |
294 | SmallVector<Value*, 4> Checks; |
295 | for (const Use &U : I.operands()) { |
296 | if (ValToPoison.count(Val: U) && propagatesPoison(PoisonOp: U)) |
297 | Checks.push_back(Elt: getPoisonFor(ValToPoison, V: U)); |
298 | } |
299 | |
300 | if (canCreatePoison(Op: cast<Operator>(Val: &I))) |
301 | generateCreationChecks(I, Checks); |
302 | ValToPoison[&I] = buildOrChain(B, Ops: Checks); |
303 | } |
304 | |
305 | for (BasicBlock &BB : F) |
306 | for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I++) { |
307 | auto *OldPHI = cast<PHINode>(Val: &*I); |
308 | if (!ValToPoison.count(Val: OldPHI)) |
309 | continue; // skip the newly inserted phis |
310 | auto *NewPHI = cast<PHINode>(Val: ValToPoison[OldPHI]); |
311 | for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { |
312 | auto *OldVal = OldPHI->getIncomingValue(i); |
313 | NewPHI->setIncomingValue(i, V: getPoisonFor(ValToPoison, V: OldVal)); |
314 | } |
315 | } |
316 | return true; |
317 | } |
318 | |
319 | |
320 | PreservedAnalyses PoisonCheckingPass::run(Module &M, |
321 | ModuleAnalysisManager &AM) { |
322 | bool Changed = false; |
323 | for (auto &F : M) |
324 | Changed |= rewrite(F); |
325 | |
326 | return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); |
327 | } |
328 | |
329 | PreservedAnalyses PoisonCheckingPass::run(Function &F, |
330 | FunctionAnalysisManager &AM) { |
331 | return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); |
332 | } |
333 | |
334 | /* Major TODO Items: |
335 | - Control dependent poison UB |
336 | - Strict mode - (i.e. must analyze every operand) |
337 | - Poison through memory |
338 | - Function ABIs |
339 | - Full coverage of intrinsics, etc.. (ouch) |
340 | |
341 | Instructions w/Unclear Semantics: |
342 | - shufflevector - It would seem reasonable for an out of bounds mask element |
343 | to produce poison, but the LangRef does not state. |
344 | - all binary ops w/vector operands - The likely interpretation would be that |
345 | any element overflowing should produce poison for the entire result, but |
346 | the LangRef does not state. |
347 | - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems |
348 | strange that only certian flags should be documented as producing poison. |
349 | |
350 | Cases of clear poison semantics not yet implemented: |
351 | - Exact flags on ashr/lshr produce poison |
352 | - NSW/NUW flags on shl produce poison |
353 | - Inbounds flag on getelementptr produce poison |
354 | - fptosi/fptoui (out of bounds input) produce poison |
355 | - Scalable vector types for insertelement/extractelement |
356 | - Floating point binary ops w/fmf nnan/noinfs flags produce poison |
357 | */ |
358 | |