1 | //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the guard widening pass. The semantics of the |
10 | // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails |
11 | // more often that it did before the transform. This optimization is called |
12 | // "widening" and can be used hoist and common runtime checks in situations like |
13 | // these: |
14 | // |
15 | // %cmp0 = 7 u< Length |
16 | // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] |
17 | // call @unknown_side_effects() |
18 | // %cmp1 = 9 u< Length |
19 | // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] |
20 | // ... |
21 | // |
22 | // => |
23 | // |
24 | // %cmp0 = 9 u< Length |
25 | // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] |
26 | // call @unknown_side_effects() |
27 | // ... |
28 | // |
29 | // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a |
30 | // generic implementation of the same function, which will have the correct |
31 | // semantics from that point onward. It is always _legal_ to deoptimize (so |
32 | // replacing %cmp0 with false is "correct"), though it may not always be |
33 | // profitable to do so. |
34 | // |
35 | // NB! This pass is a work in progress. It hasn't been tuned to be "production |
36 | // ready" yet. It is known to have quadriatic running time and will not scale |
37 | // to large numbers of guards |
38 | // |
39 | //===----------------------------------------------------------------------===// |
40 | |
41 | #include "llvm/Transforms/Scalar/GuardWidening.h" |
42 | #include "llvm/ADT/DenseMap.h" |
43 | #include "llvm/ADT/DepthFirstIterator.h" |
44 | #include "llvm/ADT/Statistic.h" |
45 | #include "llvm/Analysis/AssumptionCache.h" |
46 | #include "llvm/Analysis/GuardUtils.h" |
47 | #include "llvm/Analysis/LoopInfo.h" |
48 | #include "llvm/Analysis/MemorySSAUpdater.h" |
49 | #include "llvm/Analysis/PostDominators.h" |
50 | #include "llvm/Analysis/ValueTracking.h" |
51 | #include "llvm/IR/ConstantRange.h" |
52 | #include "llvm/IR/Dominators.h" |
53 | #include "llvm/IR/IRBuilder.h" |
54 | #include "llvm/IR/IntrinsicInst.h" |
55 | #include "llvm/IR/PatternMatch.h" |
56 | #include "llvm/Support/CommandLine.h" |
57 | #include "llvm/Support/Debug.h" |
58 | #include "llvm/Support/KnownBits.h" |
59 | #include "llvm/Transforms/Scalar.h" |
60 | #include "llvm/Transforms/Utils/GuardUtils.h" |
61 | #include "llvm/Transforms/Utils/LoopUtils.h" |
62 | #include <functional> |
63 | |
64 | using namespace llvm; |
65 | |
66 | #define DEBUG_TYPE "guard-widening" |
67 | |
68 | STATISTIC(GuardsEliminated, "Number of eliminated guards" ); |
69 | STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches" ); |
70 | STATISTIC(FreezeAdded, "Number of freeze instruction introduced" ); |
71 | |
72 | static cl::opt<bool> |
73 | WidenBranchGuards("guard-widening-widen-branch-guards" , cl::Hidden, |
74 | cl::desc("Whether or not we should widen guards " |
75 | "expressed as branches by widenable conditions" ), |
76 | cl::init(Val: true)); |
77 | |
78 | namespace { |
79 | |
80 | // Get the condition of \p I. It can either be a guard or a conditional branch. |
81 | static Value *getCondition(Instruction *I) { |
82 | if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(Val: I)) { |
83 | assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && |
84 | "Bad guard intrinsic?" ); |
85 | return GI->getArgOperand(i: 0); |
86 | } |
87 | Value *Cond, *WC; |
88 | BasicBlock *IfTrueBB, *IfFalseBB; |
89 | if (parseWidenableBranch(U: I, Condition&: Cond, WidenableCondition&: WC, IfTrueBB, IfFalseBB)) |
90 | return Cond; |
91 | |
92 | return cast<BranchInst>(Val: I)->getCondition(); |
93 | } |
94 | |
95 | // Set the condition for \p I to \p NewCond. \p I can either be a guard or a |
96 | // conditional branch. |
97 | static void setCondition(Instruction *I, Value *NewCond) { |
98 | if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(Val: I)) { |
99 | assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && |
100 | "Bad guard intrinsic?" ); |
101 | GI->setArgOperand(i: 0, v: NewCond); |
102 | return; |
103 | } |
104 | cast<BranchInst>(Val: I)->setCondition(NewCond); |
105 | } |
106 | |
107 | // Eliminates the guard instruction properly. |
108 | static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { |
109 | GuardInst->eraseFromParent(); |
110 | if (MSSAU) |
111 | MSSAU->removeMemoryAccess(I: GuardInst); |
112 | ++GuardsEliminated; |
113 | } |
114 | |
115 | /// Find a point at which the widened condition of \p Guard should be inserted. |
116 | /// When it is represented as intrinsic call, we can do it right before the call |
117 | /// instruction. However, when we are dealing with widenable branch, we must |
118 | /// account for the following situation: widening should not turn a |
119 | /// loop-invariant condition into a loop-variant. It means that if |
120 | /// widenable.condition() call is invariant (w.r.t. any loop), the new wide |
121 | /// condition should stay invariant. Otherwise there can be a miscompile, like |
122 | /// the one described at https://github.com/llvm/llvm-project/issues/60234. The |
123 | /// safest way to do it is to expand the new condition at WC's block. |
124 | static std::optional<BasicBlock::iterator> |
125 | findInsertionPointForWideCondition(Instruction *WCOrGuard) { |
126 | if (isGuard(U: WCOrGuard)) |
127 | return WCOrGuard->getIterator(); |
128 | if (auto WC = extractWidenableCondition(U: WCOrGuard)) |
129 | return cast<Instruction>(Val: WC)->getIterator(); |
130 | return std::nullopt; |
131 | } |
132 | |
133 | class GuardWideningImpl { |
134 | DominatorTree &DT; |
135 | PostDominatorTree *PDT; |
136 | LoopInfo &LI; |
137 | AssumptionCache &AC; |
138 | MemorySSAUpdater *MSSAU; |
139 | |
140 | /// Together, these describe the region of interest. This might be all of |
141 | /// the blocks within a function, or only a given loop's blocks and preheader. |
142 | DomTreeNode *Root; |
143 | std::function<bool(BasicBlock*)> BlockFilter; |
144 | |
145 | /// The set of guards and conditional branches whose conditions have been |
146 | /// widened into dominating guards. |
147 | SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; |
148 | |
149 | /// The set of guards which have been widened to include conditions to other |
150 | /// guards. |
151 | DenseSet<Instruction *> WidenedGuards; |
152 | |
153 | /// Try to eliminate instruction \p Instr by widening it into an earlier |
154 | /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that |
155 | /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock |
156 | /// maps BasicBlocks to the set of guards seen in that block. |
157 | bool eliminateInstrViaWidening( |
158 | Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, |
159 | const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> |
160 | &GuardsPerBlock); |
161 | |
162 | /// Used to keep track of which widening potential is more effective. |
163 | enum WideningScore { |
164 | /// Don't widen. |
165 | WS_IllegalOrNegative, |
166 | |
167 | /// Widening is performance neutral as far as the cycles spent in check |
168 | /// conditions goes (but can still help, e.g., code layout, having less |
169 | /// deopt state). |
170 | WS_Neutral, |
171 | |
172 | /// Widening is profitable. |
173 | WS_Positive, |
174 | |
175 | /// Widening is very profitable. Not significantly different from \c |
176 | /// WS_Positive, except by the order. |
177 | WS_VeryPositive |
178 | }; |
179 | |
180 | static StringRef scoreTypeToString(WideningScore WS); |
181 | |
182 | /// Compute the score for widening the condition in \p DominatedInstr |
183 | /// into \p WideningPoint. |
184 | WideningScore computeWideningScore(Instruction *DominatedInstr, |
185 | Instruction *ToWiden, |
186 | BasicBlock::iterator WideningPoint, |
187 | SmallVectorImpl<Value *> &ChecksToHoist, |
188 | SmallVectorImpl<Value *> &ChecksToWiden); |
189 | |
190 | /// Helper to check if \p V can be hoisted to \p InsertPos. |
191 | bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos) const { |
192 | SmallPtrSet<const Instruction *, 8> Visited; |
193 | return canBeHoistedTo(V, InsertPos, Visited); |
194 | } |
195 | |
196 | bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos, |
197 | SmallPtrSetImpl<const Instruction *> &Visited) const; |
198 | |
199 | bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks, |
200 | BasicBlock::iterator InsertPos) const { |
201 | return all_of(Range: Checks, |
202 | P: [&](const Value *V) { return canBeHoistedTo(V, InsertPos); }); |
203 | } |
204 | /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c |
205 | /// canBeHoistedTo returned true. |
206 | void makeAvailableAt(Value *V, BasicBlock::iterator InsertPos) const; |
207 | |
208 | void makeAvailableAt(const SmallVectorImpl<Value *> &Checks, |
209 | BasicBlock::iterator InsertPos) const { |
210 | for (Value *V : Checks) |
211 | makeAvailableAt(V, InsertPos); |
212 | } |
213 | |
214 | /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try |
215 | /// to generate an expression computing the logical AND of \p ChecksToHoist |
216 | /// and \p ChecksToWiden. Return true if the expression computing the AND is |
217 | /// only as expensive as computing one of the set of expressions. If \p |
218 | /// InsertPt is true then actually generate the resulting expression, make it |
219 | /// available at \p InsertPt and return it in \p Result (else no change to the |
220 | /// IR is made). |
221 | std::optional<Value *> |
222 | mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
223 | SmallVectorImpl<Value *> &ChecksToWiden, |
224 | std::optional<BasicBlock::iterator> InsertPt); |
225 | |
226 | /// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make |
227 | /// it available at InsertPt |
228 | Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
229 | Value *OldCondition, BasicBlock::iterator InsertPt); |
230 | |
231 | /// Adds freeze to Orig and push it as far as possible very aggressively. |
232 | /// Also replaces all uses of frozen instruction with frozen version. |
233 | Value *freezeAndPush(Value *Orig, BasicBlock::iterator InsertPt); |
234 | |
235 | /// Represents a range check of the form \c Base + \c Offset u< \c Length, |
236 | /// with the constraint that \c Length is not negative. \c CheckInst is the |
237 | /// pre-existing instruction in the IR that computes the result of this range |
238 | /// check. |
239 | class RangeCheck { |
240 | const Value *Base; |
241 | const ConstantInt *Offset; |
242 | const Value *Length; |
243 | ICmpInst *CheckInst; |
244 | |
245 | public: |
246 | explicit RangeCheck(const Value *Base, const ConstantInt *Offset, |
247 | const Value *Length, ICmpInst *CheckInst) |
248 | : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} |
249 | |
250 | void setBase(const Value *NewBase) { Base = NewBase; } |
251 | void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } |
252 | |
253 | const Value *getBase() const { return Base; } |
254 | const ConstantInt *getOffset() const { return Offset; } |
255 | const APInt &getOffsetValue() const { return getOffset()->getValue(); } |
256 | const Value *getLength() const { return Length; }; |
257 | ICmpInst *getCheckInst() const { return CheckInst; } |
258 | |
259 | void print(raw_ostream &OS, bool PrintTypes = false) { |
260 | OS << "Base: " ; |
261 | Base->printAsOperand(O&: OS, PrintType: PrintTypes); |
262 | OS << " Offset: " ; |
263 | Offset->printAsOperand(O&: OS, PrintType: PrintTypes); |
264 | OS << " Length: " ; |
265 | Length->printAsOperand(O&: OS, PrintType: PrintTypes); |
266 | } |
267 | |
268 | LLVM_DUMP_METHOD void dump() { |
269 | print(OS&: dbgs()); |
270 | dbgs() << "\n" ; |
271 | } |
272 | }; |
273 | |
274 | /// Parse \p ToParse into a conjunction (logical-and) of range checks; and |
275 | /// append them to \p Checks. Returns true on success, may clobber \c Checks |
276 | /// on failure. |
277 | bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse, |
278 | SmallVectorImpl<RangeCheck> &Checks) { |
279 | for (auto CheckCond : ToParse) { |
280 | if (!parseRangeChecks(CheckCond, Checks)) |
281 | return false; |
282 | } |
283 | return true; |
284 | } |
285 | |
286 | bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks); |
287 | |
288 | /// Combine the checks in \p Checks into a smaller set of checks and append |
289 | /// them into \p CombinedChecks. Return true on success (i.e. all of checks |
290 | /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks |
291 | /// and \p CombinedChecks on success and on failure. |
292 | bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, |
293 | SmallVectorImpl<RangeCheck> &CombinedChecks) const; |
294 | |
295 | /// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden |
296 | /// for the price of computing only one of the set of expressions? |
297 | bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist, |
298 | SmallVectorImpl<Value *> &ChecksToWiden) { |
299 | return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/std::nullopt) |
300 | .has_value(); |
301 | } |
302 | |
303 | /// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false |
304 | void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist, |
305 | SmallVectorImpl<Value *> &ChecksToWiden, |
306 | Instruction *ToWiden) { |
307 | auto InsertPt = findInsertionPointForWideCondition(WCOrGuard: ToWiden); |
308 | auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt); |
309 | Value *Result = MergedCheck ? *MergedCheck |
310 | : hoistChecks(ChecksToHoist, |
311 | OldCondition: getCondition(I: ToWiden), InsertPt: *InsertPt); |
312 | |
313 | if (isGuardAsWidenableBranch(U: ToWiden)) { |
314 | setWidenableBranchCond(WidenableBR: cast<BranchInst>(Val: ToWiden), Cond: Result); |
315 | return; |
316 | } |
317 | setCondition(I: ToWiden, NewCond: Result); |
318 | } |
319 | |
320 | public: |
321 | explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, |
322 | LoopInfo &LI, AssumptionCache &AC, |
323 | MemorySSAUpdater *MSSAU, DomTreeNode *Root, |
324 | std::function<bool(BasicBlock *)> BlockFilter) |
325 | : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), |
326 | BlockFilter(BlockFilter) {} |
327 | |
328 | /// The entry point for this pass. |
329 | bool run(); |
330 | }; |
331 | } |
332 | |
333 | static bool isSupportedGuardInstruction(const Instruction *Insn) { |
334 | if (isGuard(U: Insn)) |
335 | return true; |
336 | if (WidenBranchGuards && isGuardAsWidenableBranch(U: Insn)) |
337 | return true; |
338 | return false; |
339 | } |
340 | |
341 | bool GuardWideningImpl::run() { |
342 | DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; |
343 | bool Changed = false; |
344 | for (auto DFI = df_begin(G: Root), DFE = df_end(G: Root); |
345 | DFI != DFE; ++DFI) { |
346 | auto *BB = (*DFI)->getBlock(); |
347 | if (!BlockFilter(BB)) |
348 | continue; |
349 | |
350 | auto &CurrentList = GuardsInBlock[BB]; |
351 | |
352 | for (auto &I : *BB) |
353 | if (isSupportedGuardInstruction(Insn: &I)) |
354 | CurrentList.push_back(Elt: cast<Instruction>(Val: &I)); |
355 | |
356 | for (auto *II : CurrentList) |
357 | Changed |= eliminateInstrViaWidening(Instr: II, DFSI: DFI, GuardsPerBlock: GuardsInBlock); |
358 | } |
359 | |
360 | assert(EliminatedGuardsAndBranches.empty() || Changed); |
361 | for (auto *I : EliminatedGuardsAndBranches) |
362 | if (!WidenedGuards.count(V: I)) { |
363 | assert(isa<ConstantInt>(getCondition(I)) && "Should be!" ); |
364 | if (isSupportedGuardInstruction(Insn: I)) |
365 | eliminateGuard(GuardInst: I, MSSAU); |
366 | else { |
367 | assert(isa<BranchInst>(I) && |
368 | "Eliminated something other than guard or branch?" ); |
369 | ++CondBranchEliminated; |
370 | } |
371 | } |
372 | |
373 | return Changed; |
374 | } |
375 | |
376 | bool GuardWideningImpl::eliminateInstrViaWidening( |
377 | Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, |
378 | const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> |
379 | &GuardsInBlock) { |
380 | SmallVector<Value *> ChecksToHoist; |
381 | parseWidenableGuard(U: Instr, Checks&: ChecksToHoist); |
382 | // Ignore trivial true or false conditions. These instructions will be |
383 | // trivially eliminated by any cleanup pass. Do not erase them because other |
384 | // guards can possibly be widened into them. |
385 | if (ChecksToHoist.empty() || |
386 | (ChecksToHoist.size() == 1 && isa<ConstantInt>(Val: ChecksToHoist.front()))) |
387 | return false; |
388 | |
389 | Instruction *BestSoFar = nullptr; |
390 | auto BestScoreSoFar = WS_IllegalOrNegative; |
391 | |
392 | // In the set of dominating guards, find the one we can merge GuardInst with |
393 | // for the most profit. |
394 | for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { |
395 | auto *CurBB = DFSI.getPath(n: i)->getBlock(); |
396 | if (!BlockFilter(CurBB)) |
397 | break; |
398 | assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!" ); |
399 | const auto &GuardsInCurBB = GuardsInBlock.find(Val: CurBB)->second; |
400 | |
401 | auto I = GuardsInCurBB.begin(); |
402 | auto E = Instr->getParent() == CurBB ? find(Range: GuardsInCurBB, Val: Instr) |
403 | : GuardsInCurBB.end(); |
404 | |
405 | #ifndef NDEBUG |
406 | { |
407 | unsigned Index = 0; |
408 | for (auto &I : *CurBB) { |
409 | if (Index == GuardsInCurBB.size()) |
410 | break; |
411 | if (GuardsInCurBB[Index] == &I) |
412 | Index++; |
413 | } |
414 | assert(Index == GuardsInCurBB.size() && |
415 | "Guards expected to be in order!" ); |
416 | } |
417 | #endif |
418 | |
419 | assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?" ); |
420 | |
421 | for (auto *Candidate : make_range(x: I, y: E)) { |
422 | auto WideningPoint = findInsertionPointForWideCondition(WCOrGuard: Candidate); |
423 | if (!WideningPoint) |
424 | continue; |
425 | SmallVector<Value *> CandidateChecks; |
426 | parseWidenableGuard(U: Candidate, Checks&: CandidateChecks); |
427 | auto Score = computeWideningScore(DominatedInstr: Instr, ToWiden: Candidate, WideningPoint: *WideningPoint, |
428 | ChecksToHoist, ChecksToWiden&: CandidateChecks); |
429 | LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate |
430 | << " is " << scoreTypeToString(Score) << "\n" ); |
431 | if (Score > BestScoreSoFar) { |
432 | BestScoreSoFar = Score; |
433 | BestSoFar = Candidate; |
434 | } |
435 | } |
436 | } |
437 | |
438 | if (BestScoreSoFar == WS_IllegalOrNegative) { |
439 | LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n" ); |
440 | return false; |
441 | } |
442 | |
443 | assert(BestSoFar != Instr && "Should have never visited same guard!" ); |
444 | assert(DT.dominates(BestSoFar, Instr) && "Should be!" ); |
445 | |
446 | LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar |
447 | << " with score " << scoreTypeToString(BestScoreSoFar) |
448 | << "\n" ); |
449 | SmallVector<Value *> ChecksToWiden; |
450 | parseWidenableGuard(U: BestSoFar, Checks&: ChecksToWiden); |
451 | widenGuard(ChecksToHoist, ChecksToWiden, ToWiden: BestSoFar); |
452 | auto NewGuardCondition = ConstantInt::getTrue(Context&: Instr->getContext()); |
453 | setCondition(I: Instr, NewCond: NewGuardCondition); |
454 | EliminatedGuardsAndBranches.push_back(Elt: Instr); |
455 | WidenedGuards.insert(V: BestSoFar); |
456 | return true; |
457 | } |
458 | |
459 | GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( |
460 | Instruction *DominatedInstr, Instruction *ToWiden, |
461 | BasicBlock::iterator WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist, |
462 | SmallVectorImpl<Value *> &ChecksToWiden) { |
463 | Loop *DominatedInstrLoop = LI.getLoopFor(BB: DominatedInstr->getParent()); |
464 | Loop *DominatingGuardLoop = LI.getLoopFor(BB: WideningPoint->getParent()); |
465 | bool HoistingOutOfLoop = false; |
466 | |
467 | if (DominatingGuardLoop != DominatedInstrLoop) { |
468 | // Be conservative and don't widen into a sibling loop. TODO: If the |
469 | // sibling is colder, we should consider allowing this. |
470 | if (DominatingGuardLoop && |
471 | !DominatingGuardLoop->contains(L: DominatedInstrLoop)) |
472 | return WS_IllegalOrNegative; |
473 | |
474 | HoistingOutOfLoop = true; |
475 | } |
476 | |
477 | if (!canBeHoistedTo(Checks: ChecksToHoist, InsertPos: WideningPoint)) |
478 | return WS_IllegalOrNegative; |
479 | // Further in the GuardWideningImpl::hoistChecks the entire condition might be |
480 | // widened, not the parsed list of checks. So we need to check the possibility |
481 | // of that condition hoisting. |
482 | if (!canBeHoistedTo(V: getCondition(I: ToWiden), InsertPos: WideningPoint)) |
483 | return WS_IllegalOrNegative; |
484 | |
485 | // If the guard was conditional executed, it may never be reached |
486 | // dynamically. There are two potential downsides to hoisting it out of the |
487 | // conditionally executed region: 1) we may spuriously deopt without need and |
488 | // 2) we have the extra cost of computing the guard condition in the common |
489 | // case. At the moment, we really only consider the second in our heuristic |
490 | // here. TODO: evaluate cost model for spurious deopt |
491 | // NOTE: As written, this also lets us hoist right over another guard which |
492 | // is essentially just another spelling for control flow. |
493 | if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden)) |
494 | return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; |
495 | |
496 | if (HoistingOutOfLoop) |
497 | return WS_Positive; |
498 | |
499 | // For a given basic block \p BB, return its successor which is guaranteed or |
500 | // highly likely will be taken as its successor. |
501 | auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { |
502 | if (auto *UniqueSucc = BB->getUniqueSuccessor()) |
503 | return UniqueSucc; |
504 | auto *Term = BB->getTerminator(); |
505 | Value *Cond = nullptr; |
506 | const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; |
507 | using namespace PatternMatch; |
508 | if (!match(V: Term, P: m_Br(C: m_Value(V&: Cond), T: m_BasicBlock(V&: IfTrue), |
509 | F: m_BasicBlock(V&: IfFalse)))) |
510 | return nullptr; |
511 | // For constant conditions, only one dynamical successor is possible |
512 | if (auto *ConstCond = dyn_cast<ConstantInt>(Val: Cond)) |
513 | return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; |
514 | // If one of successors ends with deopt, another one is likely. |
515 | if (IfFalse->getPostdominatingDeoptimizeCall()) |
516 | return IfTrue; |
517 | if (IfTrue->getPostdominatingDeoptimizeCall()) |
518 | return IfFalse; |
519 | // TODO: Use branch frequency metatada to allow hoisting through non-deopt |
520 | // branches? |
521 | return nullptr; |
522 | }; |
523 | |
524 | // Returns true if we might be hoisting above explicit control flow into a |
525 | // considerably hotter block. Note that this completely ignores implicit |
526 | // control flow (guards, calls which throw, etc...). That choice appears |
527 | // arbitrary (we assume that implicit control flow exits are all rare). |
528 | auto MaybeHoistingToHotterBlock = [&]() { |
529 | const auto *DominatingBlock = WideningPoint->getParent(); |
530 | const auto *DominatedBlock = DominatedInstr->getParent(); |
531 | |
532 | // Descend as low as we can, always taking the likely successor. |
533 | assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code" ); |
534 | assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code" ); |
535 | assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance" ); |
536 | while (DominatedBlock != DominatingBlock) { |
537 | auto *LikelySucc = GetLikelySuccessor(DominatingBlock); |
538 | // No likely successor? |
539 | if (!LikelySucc) |
540 | break; |
541 | // Only go down the dominator tree. |
542 | if (!DT.properlyDominates(A: DominatingBlock, B: LikelySucc)) |
543 | break; |
544 | DominatingBlock = LikelySucc; |
545 | } |
546 | |
547 | // Found? |
548 | if (DominatedBlock == DominatingBlock) |
549 | return false; |
550 | // We followed the likely successor chain and went past the dominated |
551 | // block. It means that the dominated guard is in dead/very cold code. |
552 | if (!DT.dominates(A: DominatingBlock, B: DominatedBlock)) |
553 | return true; |
554 | // TODO: diamond, triangle cases |
555 | if (!PDT) |
556 | return true; |
557 | return !PDT->dominates(A: DominatedBlock, B: DominatingBlock); |
558 | }; |
559 | |
560 | return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; |
561 | } |
562 | |
563 | bool GuardWideningImpl::canBeHoistedTo( |
564 | const Value *V, BasicBlock::iterator Loc, |
565 | SmallPtrSetImpl<const Instruction *> &Visited) const { |
566 | auto *Inst = dyn_cast<Instruction>(Val: V); |
567 | if (!Inst || DT.dominates(Def: Inst, User: Loc) || Visited.count(Ptr: Inst)) |
568 | return true; |
569 | |
570 | if (!isSafeToSpeculativelyExecute(I: Inst, CtxI: Loc, AC: &AC, DT: &DT) || |
571 | Inst->mayReadFromMemory()) |
572 | return false; |
573 | |
574 | Visited.insert(Ptr: Inst); |
575 | |
576 | // We only want to go _up_ the dominance chain when recursing. |
577 | assert(!isa<PHINode>(Loc) && |
578 | "PHIs should return false for isSafeToSpeculativelyExecute" ); |
579 | assert(DT.isReachableFromEntry(Inst->getParent()) && |
580 | "We did a DFS from the block entry!" ); |
581 | return all_of(Range: Inst->operands(), |
582 | P: [&](Value *Op) { return canBeHoistedTo(V: Op, Loc, Visited); }); |
583 | } |
584 | |
585 | void GuardWideningImpl::makeAvailableAt(Value *V, |
586 | BasicBlock::iterator Loc) const { |
587 | auto *Inst = dyn_cast<Instruction>(Val: V); |
588 | if (!Inst || DT.dominates(Def: Inst, User: Loc)) |
589 | return; |
590 | |
591 | assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) && |
592 | !Inst->mayReadFromMemory() && |
593 | "Should've checked with canBeHoistedTo!" ); |
594 | |
595 | for (Value *Op : Inst->operands()) |
596 | makeAvailableAt(V: Op, Loc); |
597 | |
598 | Inst->moveBefore(BB&: *Loc->getParent(), I: Loc); |
599 | } |
600 | |
601 | // Return Instruction before which we can insert freeze for the value V as close |
602 | // to def as possible. If there is no place to add freeze, return empty. |
603 | static std::optional<BasicBlock::iterator> |
604 | getFreezeInsertPt(Value *V, const DominatorTree &DT) { |
605 | auto *I = dyn_cast<Instruction>(Val: V); |
606 | if (!I) |
607 | return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator(); |
608 | |
609 | std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef(); |
610 | // If there is no place to add freeze - return nullptr. |
611 | if (!Res || !DT.dominates(Def: I, User: &**Res)) |
612 | return std::nullopt; |
613 | |
614 | Instruction *ResInst = &**Res; |
615 | |
616 | // If there is a User dominated by original I, then it should be dominated |
617 | // by Freeze instruction as well. |
618 | if (any_of(Range: I->users(), P: [&](User *U) { |
619 | Instruction *User = cast<Instruction>(Val: U); |
620 | return ResInst != User && DT.dominates(Def: I, User) && |
621 | !DT.dominates(Def: ResInst, User); |
622 | })) |
623 | return std::nullopt; |
624 | return Res; |
625 | } |
626 | |
627 | Value *GuardWideningImpl::freezeAndPush(Value *Orig, |
628 | BasicBlock::iterator InsertPt) { |
629 | if (isGuaranteedNotToBePoison(V: Orig, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
630 | return Orig; |
631 | std::optional<BasicBlock::iterator> InsertPtAtDef = |
632 | getFreezeInsertPt(V: Orig, DT); |
633 | if (!InsertPtAtDef) { |
634 | FreezeInst *FI = new FreezeInst(Orig, "gw.freeze" ); |
635 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
636 | return FI; |
637 | } |
638 | if (isa<Constant>(Val: Orig) || isa<GlobalValue>(Val: Orig)) { |
639 | BasicBlock::iterator InsertPt = *InsertPtAtDef; |
640 | FreezeInst *FI = new FreezeInst(Orig, "gw.freeze" ); |
641 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
642 | return FI; |
643 | } |
644 | |
645 | SmallSet<Value *, 16> Visited; |
646 | SmallVector<Value *, 16> Worklist; |
647 | SmallSet<Instruction *, 16> DropPoisonFlags; |
648 | SmallVector<Value *, 16> NeedFreeze; |
649 | DenseMap<Value *, FreezeInst *> CacheOfFreezes; |
650 | |
651 | // A bit overloaded data structures. Visited contains constant/GV |
652 | // if we already met it. In this case CacheOfFreezes has a freeze if it is |
653 | // required. |
654 | auto handleConstantOrGlobal = [&](Use &U) { |
655 | Value *Def = U.get(); |
656 | if (!isa<Constant>(Val: Def) && !isa<GlobalValue>(Val: Def)) |
657 | return false; |
658 | |
659 | if (Visited.insert(Ptr: Def).second) { |
660 | if (isGuaranteedNotToBePoison(V: Def, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
661 | return true; |
662 | BasicBlock::iterator InsertPt = *getFreezeInsertPt(V: Def, DT); |
663 | FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr" ); |
664 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
665 | CacheOfFreezes[Def] = FI; |
666 | } |
667 | |
668 | if (CacheOfFreezes.count(Val: Def)) |
669 | U.set(CacheOfFreezes[Def]); |
670 | return true; |
671 | }; |
672 | |
673 | Worklist.push_back(Elt: Orig); |
674 | while (!Worklist.empty()) { |
675 | Value *V = Worklist.pop_back_val(); |
676 | if (!Visited.insert(Ptr: V).second) |
677 | continue; |
678 | |
679 | if (isGuaranteedNotToBePoison(V, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
680 | continue; |
681 | |
682 | Instruction *I = dyn_cast<Instruction>(Val: V); |
683 | if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I), |
684 | /*ConsiderFlagsAndMetadata*/ false)) { |
685 | NeedFreeze.push_back(Elt: V); |
686 | continue; |
687 | } |
688 | // Check all operands. If for any of them we cannot insert Freeze, |
689 | // stop here. Otherwise, iterate. |
690 | if (any_of(Range: I->operands(), P: [&](Value *Op) { |
691 | return isa<Instruction>(Val: Op) && !getFreezeInsertPt(V: Op, DT); |
692 | })) { |
693 | NeedFreeze.push_back(Elt: I); |
694 | continue; |
695 | } |
696 | DropPoisonFlags.insert(Ptr: I); |
697 | for (Use &U : I->operands()) |
698 | if (!handleConstantOrGlobal(U)) |
699 | Worklist.push_back(Elt: U.get()); |
700 | } |
701 | for (Instruction *I : DropPoisonFlags) |
702 | I->dropPoisonGeneratingAnnotations(); |
703 | |
704 | Value *Result = Orig; |
705 | for (Value *V : NeedFreeze) { |
706 | BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT); |
707 | FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr" ); |
708 | FI->insertBefore(BB&: *FreezeInsertPt->getParent(), InsertPos: FreezeInsertPt); |
709 | ++FreezeAdded; |
710 | if (V == Orig) |
711 | Result = FI; |
712 | V->replaceUsesWithIf( |
713 | New: FI, ShouldReplace: [&](const Use & U)->bool { return U.getUser() != FI; }); |
714 | } |
715 | |
716 | return Result; |
717 | } |
718 | |
719 | std::optional<Value *> |
720 | GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
721 | SmallVectorImpl<Value *> &ChecksToWiden, |
722 | std::optional<BasicBlock::iterator> InsertPt) { |
723 | using namespace llvm::PatternMatch; |
724 | |
725 | Value *Result = nullptr; |
726 | { |
727 | // L >u C0 && L >u C1 -> L >u max(C0, C1) |
728 | ConstantInt *RHS0, *RHS1; |
729 | Value *LHS; |
730 | ICmpInst::Predicate Pred0, Pred1; |
731 | // TODO: Support searching for pairs to merge from both whole lists of |
732 | // ChecksToHoist and ChecksToWiden. |
733 | if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 && |
734 | match(V: ChecksToWiden.front(), |
735 | P: m_ICmp(Pred&: Pred0, L: m_Value(V&: LHS), R: m_ConstantInt(CI&: RHS0))) && |
736 | match(V: ChecksToHoist.front(), |
737 | P: m_ICmp(Pred&: Pred1, L: m_Specific(V: LHS), R: m_ConstantInt(CI&: RHS1)))) { |
738 | |
739 | ConstantRange CR0 = |
740 | ConstantRange::makeExactICmpRegion(Pred: Pred0, Other: RHS0->getValue()); |
741 | ConstantRange CR1 = |
742 | ConstantRange::makeExactICmpRegion(Pred: Pred1, Other: RHS1->getValue()); |
743 | |
744 | // Given what we're doing here and the semantics of guards, it would |
745 | // be correct to use a subset intersection, but that may be too |
746 | // aggressive in cases we care about. |
747 | if (std::optional<ConstantRange> Intersect = |
748 | CR0.exactIntersectWith(CR: CR1)) { |
749 | APInt NewRHSAP; |
750 | CmpInst::Predicate Pred; |
751 | if (Intersect->getEquivalentICmp(Pred, RHS&: NewRHSAP)) { |
752 | if (InsertPt) { |
753 | ConstantInt *NewRHS = |
754 | ConstantInt::get(Context&: (*InsertPt)->getContext(), V: NewRHSAP); |
755 | assert(canBeHoistedTo(LHS, *InsertPt) && "must be" ); |
756 | makeAvailableAt(V: LHS, Loc: *InsertPt); |
757 | Result = new ICmpInst(*InsertPt, Pred, LHS, NewRHS, "wide.chk" ); |
758 | } |
759 | return Result; |
760 | } |
761 | } |
762 | } |
763 | } |
764 | |
765 | { |
766 | SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; |
767 | if (parseRangeChecks(ToParse&: ChecksToWiden, Checks) && |
768 | parseRangeChecks(ToParse&: ChecksToHoist, Checks) && |
769 | combineRangeChecks(Checks, CombinedChecks)) { |
770 | if (InsertPt) { |
771 | for (auto &RC : CombinedChecks) { |
772 | makeAvailableAt(V: RC.getCheckInst(), Loc: *InsertPt); |
773 | if (Result) |
774 | Result = BinaryOperator::CreateAnd(V1: RC.getCheckInst(), V2: Result, Name: "" , |
775 | It: *InsertPt); |
776 | else |
777 | Result = RC.getCheckInst(); |
778 | } |
779 | assert(Result && "Failed to find result value" ); |
780 | Result->setName("wide.chk" ); |
781 | Result = freezeAndPush(Orig: Result, InsertPt: *InsertPt); |
782 | } |
783 | return Result; |
784 | } |
785 | } |
786 | // We were not able to compute ChecksToHoist AND ChecksToWiden for the price |
787 | // of one. |
788 | return std::nullopt; |
789 | } |
790 | |
791 | Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
792 | Value *OldCondition, |
793 | BasicBlock::iterator InsertPt) { |
794 | assert(!ChecksToHoist.empty()); |
795 | IRBuilder<> Builder(InsertPt->getParent(), InsertPt); |
796 | makeAvailableAt(Checks: ChecksToHoist, InsertPos: InsertPt); |
797 | makeAvailableAt(V: OldCondition, Loc: InsertPt); |
798 | Value *Result = Builder.CreateAnd(Ops: ChecksToHoist); |
799 | Result = freezeAndPush(Orig: Result, InsertPt); |
800 | Result = Builder.CreateAnd(LHS: OldCondition, RHS: Result); |
801 | Result->setName("wide.chk" ); |
802 | return Result; |
803 | } |
804 | |
805 | bool GuardWideningImpl::parseRangeChecks( |
806 | Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) { |
807 | using namespace llvm::PatternMatch; |
808 | |
809 | auto *IC = dyn_cast<ICmpInst>(Val: CheckCond); |
810 | if (!IC || !IC->getOperand(i_nocapture: 0)->getType()->isIntegerTy() || |
811 | (IC->getPredicate() != ICmpInst::ICMP_ULT && |
812 | IC->getPredicate() != ICmpInst::ICMP_UGT)) |
813 | return false; |
814 | |
815 | const Value *CmpLHS = IC->getOperand(i_nocapture: 0), *CmpRHS = IC->getOperand(i_nocapture: 1); |
816 | if (IC->getPredicate() == ICmpInst::ICMP_UGT) |
817 | std::swap(a&: CmpLHS, b&: CmpRHS); |
818 | |
819 | auto &DL = IC->getModule()->getDataLayout(); |
820 | |
821 | GuardWideningImpl::RangeCheck Check( |
822 | CmpLHS, cast<ConstantInt>(Val: ConstantInt::getNullValue(Ty: CmpRHS->getType())), |
823 | CmpRHS, IC); |
824 | |
825 | if (!isKnownNonNegative(V: Check.getLength(), SQ: DL)) |
826 | return false; |
827 | |
828 | // What we have in \c Check now is a correct interpretation of \p CheckCond. |
829 | // Try to see if we can move some constant offsets into the \c Offset field. |
830 | |
831 | bool Changed; |
832 | auto &Ctx = CheckCond->getContext(); |
833 | |
834 | do { |
835 | Value *OpLHS; |
836 | ConstantInt *OpRHS; |
837 | Changed = false; |
838 | |
839 | #ifndef NDEBUG |
840 | auto *BaseInst = dyn_cast<Instruction>(Val: Check.getBase()); |
841 | assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && |
842 | "Unreachable instruction?" ); |
843 | #endif |
844 | |
845 | if (match(V: Check.getBase(), P: m_Add(L: m_Value(V&: OpLHS), R: m_ConstantInt(CI&: OpRHS)))) { |
846 | Check.setBase(OpLHS); |
847 | APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); |
848 | Check.setOffset(ConstantInt::get(Context&: Ctx, V: NewOffset)); |
849 | Changed = true; |
850 | } else if (match(V: Check.getBase(), |
851 | P: m_Or(L: m_Value(V&: OpLHS), R: m_ConstantInt(CI&: OpRHS)))) { |
852 | KnownBits Known = computeKnownBits(V: OpLHS, DL); |
853 | if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { |
854 | Check.setBase(OpLHS); |
855 | APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); |
856 | Check.setOffset(ConstantInt::get(Context&: Ctx, V: NewOffset)); |
857 | Changed = true; |
858 | } |
859 | } |
860 | } while (Changed); |
861 | |
862 | Checks.push_back(Elt: Check); |
863 | return true; |
864 | } |
865 | |
866 | bool GuardWideningImpl::combineRangeChecks( |
867 | SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, |
868 | SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { |
869 | unsigned OldCount = Checks.size(); |
870 | while (!Checks.empty()) { |
871 | // Pick all of the range checks with a specific base and length, and try to |
872 | // merge them. |
873 | const Value *CurrentBase = Checks.front().getBase(); |
874 | const Value *CurrentLength = Checks.front().getLength(); |
875 | |
876 | SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; |
877 | |
878 | auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { |
879 | return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; |
880 | }; |
881 | |
882 | copy_if(Range&: Checks, Out: std::back_inserter(x&: CurrentChecks), P: IsCurrentCheck); |
883 | erase_if(C&: Checks, P: IsCurrentCheck); |
884 | |
885 | assert(CurrentChecks.size() != 0 && "We know we have at least one!" ); |
886 | |
887 | if (CurrentChecks.size() < 3) { |
888 | llvm::append_range(C&: RangeChecksOut, R&: CurrentChecks); |
889 | continue; |
890 | } |
891 | |
892 | // CurrentChecks.size() will typically be 3 here, but so far there has been |
893 | // no need to hard-code that fact. |
894 | |
895 | llvm::sort(C&: CurrentChecks, Comp: [&](const GuardWideningImpl::RangeCheck &LHS, |
896 | const GuardWideningImpl::RangeCheck &RHS) { |
897 | return LHS.getOffsetValue().slt(RHS: RHS.getOffsetValue()); |
898 | }); |
899 | |
900 | // Note: std::sort should not invalidate the ChecksStart iterator. |
901 | |
902 | const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); |
903 | const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); |
904 | |
905 | unsigned BitWidth = MaxOffset->getValue().getBitWidth(); |
906 | if ((MaxOffset->getValue() - MinOffset->getValue()) |
907 | .ugt(RHS: APInt::getSignedMinValue(numBits: BitWidth))) |
908 | return false; |
909 | |
910 | APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); |
911 | const APInt &HighOffset = MaxOffset->getValue(); |
912 | auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { |
913 | return (HighOffset - RC.getOffsetValue()).ult(RHS: MaxDiff); |
914 | }; |
915 | |
916 | if (MaxDiff.isMinValue() || !all_of(Range: drop_begin(RangeOrContainer&: CurrentChecks), P: OffsetOK)) |
917 | return false; |
918 | |
919 | // We have a series of f+1 checks as: |
920 | // |
921 | // I+k_0 u< L ... Chk_0 |
922 | // I+k_1 u< L ... Chk_1 |
923 | // ... |
924 | // I+k_f u< L ... Chk_f |
925 | // |
926 | // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 |
927 | // k_f-k_0 u< INT_MIN+k_f ... Precond_1 |
928 | // k_f != k_0 ... Precond_2 |
929 | // |
930 | // Claim: |
931 | // Chk_0 AND Chk_f implies all the other checks |
932 | // |
933 | // Informal proof sketch: |
934 | // |
935 | // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap |
936 | // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and |
937 | // thus I+k_f is the greatest unsigned value in that range. |
938 | // |
939 | // This combined with Ckh_(f+1) shows that everything in that range is u< L. |
940 | // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) |
941 | // lie in [I+k_0,I+k_f], this proving our claim. |
942 | // |
943 | // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are |
944 | // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal |
945 | // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping |
946 | // range by definition, and the latter case is impossible: |
947 | // |
948 | // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) |
949 | // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx |
950 | // |
951 | // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted |
952 | // with 'x' above) to be at least >u INT_MIN. |
953 | |
954 | RangeChecksOut.emplace_back(Args&: CurrentChecks.front()); |
955 | RangeChecksOut.emplace_back(Args&: CurrentChecks.back()); |
956 | } |
957 | |
958 | assert(RangeChecksOut.size() <= OldCount && "We pessimized!" ); |
959 | return RangeChecksOut.size() != OldCount; |
960 | } |
961 | |
962 | #ifndef NDEBUG |
963 | StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { |
964 | switch (WS) { |
965 | case WS_IllegalOrNegative: |
966 | return "IllegalOrNegative" ; |
967 | case WS_Neutral: |
968 | return "Neutral" ; |
969 | case WS_Positive: |
970 | return "Positive" ; |
971 | case WS_VeryPositive: |
972 | return "VeryPositive" ; |
973 | } |
974 | |
975 | llvm_unreachable("Fully covered switch above!" ); |
976 | } |
977 | #endif |
978 | |
979 | PreservedAnalyses GuardWideningPass::run(Function &F, |
980 | FunctionAnalysisManager &AM) { |
981 | // Avoid requesting analyses if there are no guards or widenable conditions. |
982 | auto *GuardDecl = F.getParent()->getFunction( |
983 | Intrinsic::getName(Intrinsic::experimental_guard)); |
984 | bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); |
985 | auto *WCDecl = F.getParent()->getFunction( |
986 | Intrinsic::getName(Intrinsic::experimental_widenable_condition)); |
987 | bool HasWidenableConditions = WCDecl && !WCDecl->use_empty(); |
988 | if (!HasIntrinsicGuards && !HasWidenableConditions) |
989 | return PreservedAnalyses::all(); |
990 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
991 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
992 | auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F); |
993 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
994 | auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F); |
995 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
996 | if (MSSAA) |
997 | MSSAU = std::make_unique<MemorySSAUpdater>(args: &MSSAA->getMSSA()); |
998 | if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, |
999 | DT.getRootNode(), [](BasicBlock *) { return true; }) |
1000 | .run()) |
1001 | return PreservedAnalyses::all(); |
1002 | |
1003 | PreservedAnalyses PA; |
1004 | PA.preserveSet<CFGAnalyses>(); |
1005 | PA.preserve<MemorySSAAnalysis>(); |
1006 | return PA; |
1007 | } |
1008 | |
1009 | PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, |
1010 | LoopStandardAnalysisResults &AR, |
1011 | LPMUpdater &U) { |
1012 | BasicBlock *RootBB = L.getLoopPredecessor(); |
1013 | if (!RootBB) |
1014 | RootBB = L.getHeader(); |
1015 | auto BlockFilter = [&](BasicBlock *BB) { |
1016 | return BB == RootBB || L.contains(BB); |
1017 | }; |
1018 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
1019 | if (AR.MSSA) |
1020 | MSSAU = std::make_unique<MemorySSAUpdater>(args&: AR.MSSA); |
1021 | if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, |
1022 | MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(BB: RootBB), |
1023 | BlockFilter) |
1024 | .run()) |
1025 | return PreservedAnalyses::all(); |
1026 | |
1027 | auto PA = getLoopPassPreservedAnalyses(); |
1028 | if (AR.MSSA) |
1029 | PA.preserve<MemorySSAAnalysis>(); |
1030 | return PA; |
1031 | } |
1032 | |