DFAJumpThreading.cpp source code [llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp]

1	//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Transform each threading path to effectively jump thread the DFA. For
10	// example, the CFG below could be transformed as follows, where the cloned
11	// blocks unconditionally branch to the next correct case based on what is
12	// identified in the analysis.
13	//
14	// sw.bb sw.bb
15	// / \| \ / \| \
16	// case1 case2 case3 case1 case2 case3
17	// \ \| / \| \| \|
18	// determinator det.2 det.3 det.1
19	// br sw.bb / \| \
20	// sw.bb.2 sw.bb.3 sw.bb.1
21	// br case2 br case3 br case1§
22	//
23	// Definitions and Terminology:
24	//
25	// Threading path:*
26	// a list of basic blocks, the exit state, and the block that determines
27	// the next state, for which the following notation will be used:
28	// < path of BBs that form a cycle > [ state, determinator ]
29	//
30	// Predictable switch:*
31	// The switch variable is always a known constant so that all conditional
32	// jumps based on switch variable can be converted to unconditional jump.
33	//
34	// Determinator:*
35	// The basic block that determines the next state of the DFA.
36	//
37	// Representing the optimization in C-like pseudocode: the code pattern on the
38	// left could functionally be transformed to the right pattern if the switch
39	// condition is predictable.
40	//
41	// X = A goto A
42	// for (...) A:
43	// switch (X) ...
44	// case A goto B
45	// X = B B:
46	// case B ...
47	// X = C goto C
48	//
49	// The pass first checks that switch variable X is decided by the control flow
50	// path taken in the loop; for example, in case B, the next value of X is
51	// decided to be C. It then enumerates through all paths in the loop and labels
52	// the basic blocks where the next state is decided.
53	//
54	// Using this information it creates new paths that unconditionally branch to
55	// the next case. This involves cloning code, so it only gets triggered if the
56	// amount of code duplicated is below a threshold.
57	//
58	//===----------------------------------------------------------------------===//
59
60	#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
61	#include "llvm/ADT/APInt.h"
62	#include "llvm/ADT/DenseMap.h"
63	#include "llvm/ADT/SmallSet.h"
64	#include "llvm/ADT/Statistic.h"
65	#include "llvm/Analysis/AssumptionCache.h"
66	#include "llvm/Analysis/CodeMetrics.h"
67	#include "llvm/Analysis/DomTreeUpdater.h"
68	#include "llvm/Analysis/LoopInfo.h"
69	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
70	#include "llvm/Analysis/TargetTransformInfo.h"
71	#include "llvm/IR/CFG.h"
72	#include "llvm/IR/Constants.h"
73	#include "llvm/IR/IntrinsicInst.h"
74	#include "llvm/Support/CommandLine.h"
75	#include "llvm/Support/Debug.h"
76	#include "llvm/Transforms/Utils/Cloning.h"
77	#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
78	#include "llvm/Transforms/Utils/ValueMapper.h"
79	#include <algorithm>
80	#include <deque>
81
82	#ifdef EXPENSIVE_CHECKS
83	#include "llvm/IR/Verifier.h"
84	#endif
85
86	using namespace llvm;
87
88	#define DEBUG_TYPE "dfa-jump-threading"
89
90	STATISTIC(NumTransforms, "Number of transformations done");
91	STATISTIC(NumCloned, "Number of blocks cloned");
92	STATISTIC(NumPaths, "Number of individual paths threaded");
93
94	static cl::opt<bool>
95	ClViewCfgBefore("dfa-jump-view-cfg-before",
96	cl::desc ("View the CFG before DFA Jump Threading"),
97	cl::Hidden, cl::init(Val: false));
98
99	static cl::opt<bool> EarlyExitHeuristic(
100	"dfa-early-exit-heuristic",
101	cl::desc ("Exit early if an unpredictable value come from the same loop"),
102	cl::Hidden, cl::init(Val: true));
103
104	static cl::opt<unsigned> MaxPathLength(
105	"dfa-max-path-length",
106	cl::desc ("Max number of blocks searched to find a threading path"),
107	cl::Hidden, cl::init(Val: `20`));
108
109	static cl::opt<unsigned>
110	MaxNumPaths("dfa-max-num-paths",
111	cl::desc ("Max number of paths enumerated around a switch"),
112	cl::Hidden, cl::init(Val: `200`));
113
114	static cl::opt<unsigned>
115	CostThreshold("dfa-cost-threshold",
116	cl::desc ("Maximum cost accepted for the transformation"),
117	cl::Hidden, cl::init(Val: `50`));
118
119	namespace {
120
121	class SelectInstToUnfold {
122	SelectInst *SI;
123	PHINode *SIUse;
124
125	public:
126	SelectInstToUnfold(SelectInst SI, PHINode SIUse) : SI(SI), SIUse(SIUse) {}
127
128	SelectInst getInst() { return* SI; }
129	PHINode getUse() { return* SIUse; }
130
131	explicit operator bool() const { return SI && SIUse; }
132	};
133
134	void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
135	std::vector<SelectInstToUnfold> *NewSIsToUnfold,
136	std::vector<BasicBlock > NewBBs);
137
138	class DFAJumpThreading {
139	public:
140	DFAJumpThreading(AssumptionCache AC, DominatorTree DT, LoopInfo *LI,
141	TargetTransformInfo TTI, OptimizationRemarkEmitter ORE)
142	: AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
143
144	bool run(Function &F);
145
146	private:
147	void
148	unfoldSelectInstrs(DominatorTree *DT,
149	const SmallVector<SelectInstToUnfold, `4`> &SelectInsts) {
150	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
151	SmallVector<SelectInstToUnfold, `4`> Stack;
152	for (SelectInstToUnfold SIToUnfold : SelectInsts)
153	Stack.push_back(Elt: SIToUnfold);
154
155	while (!Stack.empty()) {
156	SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
157
158	std::vector<SelectInstToUnfold> NewSIsToUnfold;
159	std::vector<BasicBlock *> NewBBs;
160	unfold(DTU: &DTU, SIToUnfold, NewSIsToUnfold: &NewSIsToUnfold, NewBBs: &NewBBs);
161
162	// Put newly discovered select instructions into the work list.
163	for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
164	Stack.push_back(Elt: NewSIToUnfold);
165	}
166	}
167
168	AssumptionCache *AC;
169	DominatorTree *DT;
170	LoopInfo *LI;
171	TargetTransformInfo *TTI;
172	OptimizationRemarkEmitter *ORE;
173	};
174
175	} // end anonymous namespace
176
177	namespace {
178
179	/// Create a new basic block and sink \p SIToSink into it.
180	void createBasicBlockAndSinkSelectInst(
181	DomTreeUpdater DTU, SelectInst SI, PHINode SIUse, SelectInst SIToSink,
182	BasicBlock EndBlock, StringRef NewBBName, BasicBlock *NewBlock,
183	BranchInst *NewBranch, std::vector<SelectInstToUnfold> NewSIsToUnfold,
184	std::vector<BasicBlock > NewBBs) {
185	assert(SIToSink->hasOneUse());
186	assert(NewBlock);
187	assert(NewBranch);
188	*NewBlock = BasicBlock::Create(Context&: SI->getContext(), Name: NewBBName,
189	Parent: EndBlock->getParent(), InsertBefore: EndBlock);
190	NewBBs->push_back(x: *NewBlock);
191	NewBranch = BranchInst::Create(IfTrue: EndBlock, InsertAtEnd: NewBlock);
192	SIToSink->moveBefore(MovePos: *NewBranch);
193	NewSIsToUnfold->push_back(x: SelectInstToUnfold (SIToSink, SIUse));
194	DTU->applyUpdates(Updates: {{DominatorTree::Insert, *NewBlock, EndBlock}});
195	}
196
197	/// Unfold the select instruction held in \p SIToUnfold by replacing it with
198	/// control flow.
199	///
200	/// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
201	/// created basic blocks into \p NewBBs.
202	///
203	/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
204	void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
205	std::vector<SelectInstToUnfold> *NewSIsToUnfold,
206	std::vector<BasicBlock > NewBBs) {
207	SelectInst *SI = SIToUnfold.getInst();
208	PHINode *SIUse = SIToUnfold.getUse();
209	BasicBlock *StartBlock = SI->getParent();
210	BasicBlock *EndBlock = SIUse->getParent();
211	BranchInst *StartBlockTerm =
212	dyn_cast<BranchInst>(Val: StartBlock->getTerminator());
213
214	assert(StartBlockTerm && StartBlockTerm->isUnconditional());
215	assert(SI->hasOneUse());
216
217	// These are the new basic blocks for the conditional branch.
218	// At least one will become an actual new basic block.
219	BasicBlock TrueBlock = nullptr*;
220	BasicBlock FalseBlock = nullptr*;
221	BranchInst TrueBranch = nullptr*;
222	BranchInst FalseBranch = nullptr*;
223
224	// Sink select instructions to be able to unfold them later.
225	if (SelectInst *SIOp = dyn_cast<SelectInst>(Val: SI->getTrueValue())) {
226	createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIToSink: SIOp, EndBlock,
227	NewBBName: "si.unfold.true", NewBlock: &TrueBlock, NewBranch: &TrueBranch,
228	NewSIsToUnfold, NewBBs);
229	}
230	if (SelectInst *SIOp = dyn_cast<SelectInst>(Val: SI->getFalseValue())) {
231	createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIToSink: SIOp, EndBlock,
232	NewBBName: "si.unfold.false", NewBlock: &FalseBlock,
233	NewBranch: &FalseBranch, NewSIsToUnfold, NewBBs);
234	}
235
236	// If there was nothing to sink, then arbitrarily choose the 'false' side
237	// for a new input value to the PHI.
238	if (!TrueBlock && !FalseBlock) {
239	FalseBlock = BasicBlock::Create(Context&: SI->getContext(), Name: "si.unfold.false",
240	Parent: EndBlock->getParent(), InsertBefore: EndBlock);
241	NewBBs->push_back(x: FalseBlock);
242	BranchInst::Create(IfTrue: EndBlock, InsertAtEnd: FalseBlock);
243	DTU->applyUpdates(Updates: {{DominatorTree::Insert, FalseBlock, EndBlock}});
244	}
245
246	// Insert the real conditional branch based on the original condition.
247	// If we did not create a new block for one of the 'true' or 'false' paths
248	// of the condition, it means that side of the branch goes to the end block
249	// directly and the path originates from the start block from the point of
250	// view of the new PHI.
251	BasicBlock *TT = EndBlock;
252	BasicBlock *FT = EndBlock;
253	if (TrueBlock && FalseBlock) {
254	// A diamond.
255	TT = TrueBlock;
256	FT = FalseBlock;
257
258	// Update the phi node of SI.
259	SIUse->addIncoming(V: SI->getTrueValue(), BB: TrueBlock);
260	SIUse->addIncoming(V: SI->getFalseValue(), BB: FalseBlock);
261
262	// Update any other PHI nodes in EndBlock.
263	for (PHINode &Phi : EndBlock->phis()) {
264	if (&Phi != SIUse) {
265	Value *OrigValue = Phi.getIncomingValueForBlock(BB: StartBlock);
266	Phi.addIncoming(V: OrigValue, BB: TrueBlock);
267	Phi.addIncoming(V: OrigValue, BB: FalseBlock);
268	}
269
270	// Remove incoming place of original StartBlock, which comes in a indirect
271	// way (through TrueBlock and FalseBlock) now.
272	Phi.removeIncomingValue(BB: StartBlock, / DeletePHIIfEmpty = / false);
273	}
274	} else {
275	BasicBlock NewBlock = nullptr*;
276	Value *SIOp1 = SI->getTrueValue();
277	Value *SIOp2 = SI->getFalseValue();
278
279	// A triangle pointing right.
280	if (!TrueBlock) {
281	NewBlock = FalseBlock;
282	FT = FalseBlock;
283	}
284	// A triangle pointing left.
285	else {
286	NewBlock = TrueBlock;
287	TT = TrueBlock;
288	std::swap(a&: SIOp1, b&: SIOp2);
289	}
290
291	// Update the phi node of SI.
292	for (unsigned Idx = `0`; Idx < SIUse->getNumIncomingValues(); ++Idx) {
293	if (SIUse->getIncomingBlock(i: Idx) == StartBlock)
294	SIUse->setIncomingValue(i: Idx, V: SIOp1);
295	}
296	SIUse->addIncoming(V: SIOp2, BB: NewBlock);
297
298	// Update any other PHI nodes in EndBlock.
299	for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II);
300	++II) {
301	if (Phi != SIUse)
302	Phi->addIncoming(V: Phi->getIncomingValueForBlock(BB: StartBlock), BB: NewBlock);
303	}
304	}
305	StartBlockTerm->eraseFromParent();
306	BranchInst::Create(IfTrue: TT, IfFalse: FT, Cond: SI->getCondition(), InsertAtEnd: StartBlock);
307	DTU->applyUpdates(Updates: {{DominatorTree::Insert, StartBlock, TT},
308	{DominatorTree::Insert, StartBlock, FT}});
309
310	// The select is now dead.
311	assert(SI->use_empty() && "Select must be dead now");
312	SI->eraseFromParent();
313	}
314
315	struct ClonedBlock {
316	BasicBlock *BB;
317	APInt State; ///< \p State corresponds to the next value of a switch stmnt.
318	};
319
320	typedef std::deque<BasicBlock *> PathType;
321	typedef std::vector<PathType> PathsType;
322	typedef SmallPtrSet<const BasicBlock *, `8`> VisitedBlocks;
323	typedef std::vector<ClonedBlock> CloneList;
324
325	// This data structure keeps track of all blocks that have been cloned. If two
326	// different ThreadingPaths clone the same block for a certain state it should
327	// be reused, and it can be looked up in this map.
328	typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
329
330	// This map keeps track of all the new definitions for an instruction. This
331	// information is needed when restoring SSA form after cloning blocks.
332	typedef MapVector<Instruction , std::vector<Instruction >> DefMap;
333
334	inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
335	OS << "< ";
336	for (const BasicBlock *BB : Path) {
337	std::string BBName;
338	if (BB->hasName())
339	raw_string_ostream (BBName) << BB->getName();
340	else
341	raw_string_ostream (BBName) << BB;
342	OS << BBName << " ";
343	}
344	OS << ">";
345	return OS;
346	}
347
348	/// ThreadingPath is a path in the control flow of a loop that can be threaded
349	/// by cloning necessary basic blocks and replacing conditional branches with
350	/// unconditional ones. A threading path includes a list of basic blocks, the
351	/// exit state, and the block that determines the next state.
352	struct ThreadingPath {
353	/// Exit value is DFA's exit state for the given path.
354	APInt getExitValue() const { return ExitVal; }
355	void setExitValue(const ConstantInt *V) {
356	ExitVal = V->getValue();
357	IsExitValSet = true;
358	}
359	bool isExitValueSet() const { return IsExitValSet; }
360
361	/// Determinator is the basic block that determines the next state of the DFA.
362	const BasicBlock getDeterminatorBB() const* { return DBB; }
363	void setDeterminator(const BasicBlock *BB) { DBB = BB; }
364
365	/// Path is a list of basic blocks.
366	const PathType &getPath() const { return Path; }
367	void setPath(const PathType &NewPath) { Path = NewPath; }
368
369	void print(raw_ostream &OS) const {
370	OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
371	}
372
373	private:
374	PathType Path;
375	APInt ExitVal;
376	const BasicBlock DBB = nullptr*;
377	bool IsExitValSet = false;
378	};
379
380	#ifndef NDEBUG
381	inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
382	TPath.print(OS);
383	return OS;
384	}
385	#endif
386
387	struct MainSwitch {
388	MainSwitch(SwitchInst SI, LoopInfo LI, OptimizationRemarkEmitter *ORE)
389	: LI(LI) {
390	if (isCandidate(SI)) {
391	Instr = SI;
392	} else {
393	ORE->emit(RemarkBuilder: [&]() {
394	return OptimizationRemarkMissed (DEBUG_TYPE, "SwitchNotPredictable", SI)
395	<< "Switch instruction is not predictable.";
396	});
397	}
398	}
399
400	virtual ~MainSwitch() = default;
401
402	SwitchInst getInstr() const* { return Instr; }
403	const SmallVector<SelectInstToUnfold, `4`> getSelectInsts() {
404	return SelectInsts;
405	}
406
407	private:
408	/// Do a use-def chain traversal starting from the switch condition to see if
409	/// \p SI is a potential condidate.
410	///
411	/// Also, collect select instructions to unfold.
412	bool isCandidate(const SwitchInst *SI) {
413	std::deque<std::pair<Value , BasicBlock >> Q;
414	SmallSet<Value *, `16`> SeenValues;
415	SelectInsts.clear();
416
417	Value *SICond = SI->getCondition();
418	LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
419	if (!isa<PHINode>(Val: SICond))
420	return false;
421
422	// The switch must be in a loop.
423	const Loop *L = LI->getLoopFor(BB: SI->getParent());
424	if (!L)
425	return false;
426
427	addToQueue(Val: SICond, BB: nullptr, Q, SeenValues);
428
429	while (!Q.empty()) {
430	Value *Current = Q.front().first;
431	BasicBlock *CurrentIncomingBB = Q.front().second;
432	Q.pop_front();
433
434	if (auto *Phi = dyn_cast<PHINode>(Val: Current)) {
435	for (BasicBlock *IncomingBB : Phi->blocks()) {
436	Value *Incoming = Phi->getIncomingValueForBlock(BB: IncomingBB);
437	addToQueue(Val: Incoming, BB: IncomingBB, Q, SeenValues);
438	}
439	LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
440	} else if (SelectInst *SelI = dyn_cast<SelectInst>(Val: Current)) {
441	if (!isValidSelectInst(SI: SelI))
442	return false;
443	addToQueue(Val: SelI->getTrueValue(), BB: CurrentIncomingBB, Q, SeenValues);
444	addToQueue(Val: SelI->getFalseValue(), BB: CurrentIncomingBB, Q, SeenValues);
445	LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
446	if (auto *SelIUse = dyn_cast<PHINode>(Val: SelI->user_back()))
447	SelectInsts.push_back(Elt: SelectInstToUnfold (SelI, SelIUse));
448	} else if (isa<Constant>(Val: Current)) {
449	LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
450	continue;
451	} else {
452	LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
453	// Allow unpredictable values. The hope is that those will be the
454	// initial switch values that can be ignored (they will hit the
455	// unthreaded switch) but this assumption will get checked later after
456	// paths have been enumerated (in function getStateDefMap).
457
458	// If the unpredictable value comes from the same inner loop it is
459	// likely that it will also be on the enumerated paths, causing us to
460	// exit after we have enumerated all the paths. This heuristic save
461	// compile time because a search for all the paths can become expensive.
462	if (EarlyExitHeuristic &&
463	L->contains(L: LI->getLoopFor(BB: CurrentIncomingBB))) {
464	LLVM_DEBUG(dbgs()
465	<< "\tExiting early due to unpredictability heuristic.\n");
466	return false;
467	}
468
469	continue;
470	}
471	}
472
473	return true;
474	}
475
476	void addToQueue(Value Val, BasicBlock BB,
477	std::deque<std::pair<Value , BasicBlock >> &Q,
478	SmallSet<Value *, `16`> &SeenValues) {
479	if (SeenValues.contains(Ptr: Val))
480	return;
481	Q.push_back(x: {Val, BB});
482	SeenValues.insert(Ptr: Val);
483	}
484
485	bool isValidSelectInst(SelectInst *SI) {
486	if (!SI->hasOneUse())
487	return false;
488
489	Instruction *SIUse = dyn_cast<Instruction>(Val: SI->user_back());
490	// The use of the select inst should be either a phi or another select.
491	if (!SIUse && !(isa<PHINode>(Val: SIUse) \|\| isa<SelectInst>(Val: SIUse)))
492	return false;
493
494	BasicBlock *SIBB = SI->getParent();
495
496	// Currently, we can only expand select instructions in basic blocks with
497	// one successor.
498	BranchInst *SITerm = dyn_cast<BranchInst>(Val: SIBB->getTerminator());
499	if (!SITerm \|\| !SITerm->isUnconditional())
500	return false;
501
502	// Only fold the select coming from directly where it is defined.
503	PHINode *PHIUser = dyn_cast<PHINode>(Val: SIUse);
504	if (PHIUser && PHIUser->getIncomingBlock(U: *SI->use_begin()) != SIBB)
505	return false;
506
507	// If select will not be sunk during unfolding, and it is in the same basic
508	// block as another state defining select, then cannot unfold both.
509	for (SelectInstToUnfold SIToUnfold : SelectInsts) {
510	SelectInst *PrevSI = SIToUnfold.getInst();
511	if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
512	PrevSI->getParent() == SI->getParent())
513	return false;
514	}
515
516	return true;
517	}
518
519	LoopInfo *LI;
520	SwitchInst Instr = nullptr*;
521	SmallVector<SelectInstToUnfold, `4`> SelectInsts;
522	};
523
524	struct AllSwitchPaths {
525	AllSwitchPaths(const MainSwitch MSwitch, OptimizationRemarkEmitter ORE)
526	: Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()),
527	ORE(ORE) {}
528
529	std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
530	unsigned getNumThreadingPaths() { return TPaths.size(); }
531	SwitchInst getSwitchInst() { return* Switch; }
532	BasicBlock getSwitchBlock() { return* SwitchBlock; }
533
534	void run() {
535	VisitedBlocks Visited;
536	PathsType LoopPaths = paths(BB: SwitchBlock, Visited, / PathDepth = / `1`);
537	StateDefMap StateDef = getStateDefMap(LoopPaths);
538
539	if (StateDef.empty()) {
540	ORE->emit(RemarkBuilder: [&]() {
541	return OptimizationRemarkMissed (DEBUG_TYPE, "SwitchNotPredictable",
542	Switch)
543	<< "Switch instruction is not predictable.";
544	});
545	return;
546	}
547
548	for (PathType Path : LoopPaths) {
549	ThreadingPath TPath;
550
551	const BasicBlock *PrevBB = Path.back();
552	for (const BasicBlock *BB : Path) {
553	if (StateDef.contains(Val: BB)) {
554	const PHINode *Phi = dyn_cast<PHINode>(Val: StateDef [BB]);
555	assert(Phi && "Expected a state-defining instr to be a phi node.");
556
557	const Value *V = Phi->getIncomingValueForBlock(BB: PrevBB);
558	if (const ConstantInt C = dyn_cast<const* ConstantInt>(Val: V)) {
559	TPath.setExitValue(C);
560	TPath.setDeterminator(BB);
561	TPath.setPath(Path);
562	}
563	}
564
565	// Switch block is the determinator, this is the final exit value.
566	if (TPath.isExitValueSet() && BB == Path.front())
567	break;
568
569	PrevBB = BB;
570	}
571
572	if (TPath.isExitValueSet() && isSupported(TPath))
573	TPaths.push_back(x: TPath);
574	}
575	}
576
577	private:
578	// Value: an instruction that defines a switch state;
579	// Key: the parent basic block of that instruction.
580	typedef DenseMap<const BasicBlock , const* PHINode *> StateDefMap;
581
582	PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,
583	unsigned PathDepth) const {
584	PathsType Res;
585
586	// Stop exploring paths after visiting MaxPathLength blocks
587	if (PathDepth > MaxPathLength) {
588	ORE->emit(RemarkBuilder: [&]() {
589	return OptimizationRemarkAnalysis (DEBUG_TYPE, "MaxPathLengthReached",
590	Switch)
591	<< "Exploration stopped after visiting MaxPathLength="
592	<< ore::NV ("MaxPathLength", MaxPathLength) << " blocks.";
593	});
594	return Res;
595	}
596
597	Visited.insert(Ptr: BB);
598
599	// Some blocks have multiple edges to the same successor, and this set
600	// is used to prevent a duplicate path from being generated
601	SmallSet<BasicBlock *, `4`> Successors;
602	for (BasicBlock *Succ : successors(BB)) {
603	if (!Successors.insert(Ptr: Succ).second)
604	continue;
605
606	// Found a cycle through the SwitchBlock
607	if (Succ == SwitchBlock) {
608	Res.push_back(x: {BB});
609	continue;
610	}
611
612	// We have encountered a cycle, do not get caught in it
613	if (Visited.contains(Ptr: Succ))
614	continue;
615
616	PathsType SuccPaths = paths(BB: Succ, Visited, PathDepth: PathDepth + `1`);
617	for (const PathType &Path : SuccPaths) {
618	PathType NewPath(Path);
619	NewPath.push_front(x: BB);
620	Res.push_back(x: NewPath);
621	if (Res.size() >= MaxNumPaths) {
622	return Res;
623	}
624	}
625	}
626	// This block could now be visited again from a different predecessor. Note
627	// that this will result in exponential runtime. Subpaths could possibly be
628	// cached but it takes a lot of memory to store them.
629	Visited.erase(Ptr: BB);
630	return Res;
631	}
632
633	/// Walk the use-def chain and collect all the state-defining instructions.
634	///
635	/// Return an empty map if unpredictable values encountered inside the basic
636	/// blocks of \p LoopPaths.
637	StateDefMap getStateDefMap(const PathsType &LoopPaths) const {
638	StateDefMap Res;
639
640	// Basic blocks belonging to any of the loops around the switch statement.
641	SmallPtrSet<BasicBlock *, `16`> LoopBBs;
642	for (const PathType &Path : LoopPaths) {
643	for (BasicBlock *BB : Path)
644	LoopBBs.insert(Ptr: BB);
645	}
646
647	Value *FirstDef = Switch->getOperand(i_nocapture: `0`);
648
649	assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");
650
651	SmallVector<PHINode *, `8`> Stack;
652	Stack.push_back(Elt: dyn_cast<PHINode>(Val: FirstDef));
653	SmallSet<Value *, `16`> SeenValues;
654
655	while (!Stack.empty()) {
656	PHINode *CurPhi = Stack.pop_back_val();
657
658	Res [CurPhi->getParent()] = CurPhi;
659	SeenValues.insert(Ptr: CurPhi);
660
661	for (BasicBlock *IncomingBB : CurPhi->blocks()) {
662	Value *Incoming = CurPhi->getIncomingValueForBlock(BB: IncomingBB);
663	bool IsOutsideLoops = LoopBBs.count(Ptr: IncomingBB) == `0`;
664	if (Incoming == FirstDef \|\| isa<ConstantInt>(Val: Incoming) \|\|
665	SeenValues.contains(Ptr: Incoming) \|\| IsOutsideLoops) {
666	continue;
667	}
668
669	// Any unpredictable value inside the loops means we must bail out.
670	if (!isa<PHINode>(Val: Incoming))
671	return StateDefMap ();
672
673	Stack.push_back(Elt: cast<PHINode>(Val: Incoming));
674	}
675	}
676
677	return Res;
678	}
679
680	/// The determinator BB should precede the switch-defining BB.
681	///
682	/// Otherwise, it is possible that the state defined in the determinator block
683	/// defines the state for the next iteration of the loop, rather than for the
684	/// current one.
685	///
686	/// Currently supported paths:
687	/// \code
688	/// < switch bb1 determ def > [ 42, determ ]
689	/// < switch_and_def bb1 determ > [ 42, determ ]
690	/// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ]
691	/// \endcode
692	///
693	/// Unsupported paths:
694	/// \code
695	/// < switch bb1 def determ > [ 43, determ ]
696	/// < switch_and_determ bb1 def > [ 43, switch_and_determ ]
697	/// \endcode
698	bool isSupported(const ThreadingPath &TPath) {
699	Instruction *SwitchCondI = dyn_cast<Instruction>(Val: Switch->getCondition());
700	assert(SwitchCondI);
701	if (!SwitchCondI)
702	return false;
703
704	const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent();
705	const BasicBlock *SwitchCondUseBB = Switch->getParent();
706	const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB();
707
708	assert(
709	SwitchCondUseBB == TPath.getPath().front() &&
710	"The first BB in a threading path should have the switch instruction");
711	if (SwitchCondUseBB != TPath.getPath().front())
712	return false;
713
714	// Make DeterminatorBB the first element in Path.
715	PathType Path = TPath.getPath();
716	auto ItDet = llvm::find(Range&: Path, Val: DeterminatorBB);
717	std::rotate(first: Path.begin(), middle: ItDet, last: Path.end());
718
719	bool IsDetBBSeen = false;
720	bool IsDefBBSeen = false;
721	bool IsUseBBSeen = false;
722	for (BasicBlock *BB : Path) {
723	if (BB == DeterminatorBB)
724	IsDetBBSeen = true;
725	if (BB == SwitchCondDefBB)
726	IsDefBBSeen = true;
727	if (BB == SwitchCondUseBB)
728	IsUseBBSeen = true;
729	if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen)
730	return false;
731	}
732
733	return true;
734	}
735
736	SwitchInst *Switch;
737	BasicBlock *SwitchBlock;
738	OptimizationRemarkEmitter *ORE;
739	std::vector<ThreadingPath> TPaths;
740	};
741
742	struct TransformDFA {
743	TransformDFA(AllSwitchPaths SwitchPaths, DominatorTree DT,
744	AssumptionCache AC, TargetTransformInfo TTI,
745	OptimizationRemarkEmitter *ORE,
746	SmallPtrSet<const Value *, `32`> EphValues)
747	: SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
748	EphValues (EphValues) {}
749
750	void run() {
751	if (isLegalAndProfitableToTransform()) {
752	createAllExitPaths();
753	NumTransforms ++;
754	}
755	}
756
757	private:
758	/// This function performs both a legality check and profitability check at
759	/// the same time since it is convenient to do so. It iterates through all
760	/// blocks that will be cloned, and keeps track of the duplication cost. It
761	/// also returns false if it is illegal to clone some required block.
762	bool isLegalAndProfitableToTransform() {
763	CodeMetrics Metrics;
764	SwitchInst *Switch = SwitchPaths->getSwitchInst();
765
766	// Don't thread switch without multiple successors.
767	if (Switch->getNumSuccessors() <= `1`)
768	return false;
769
770	// Note that DuplicateBlockMap is not being used as intended here. It is
771	// just being used to ensure (BB, State) pairs are only counted once.
772	DuplicateBlockMap DuplicateMap;
773
774	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
775	PathType PathBBs = TPath.getPath();
776	APInt NextState = TPath.getExitValue();
777	const BasicBlock *Determinator = TPath.getDeterminatorBB();
778
779	// Update Metrics for the Switch block, this is always cloned
780	BasicBlock *BB = SwitchPaths->getSwitchBlock();
781	BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
782	if (!VisitedBB) {
783	Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
784	DuplicateMap [BB].push_back(x: {.BB: BB, .State: NextState});
785	}
786
787	// If the Switch block is the Determinator, then we can continue since
788	// this is the only block that is cloned and we already counted for it.
789	if (PathBBs.front() == Determinator)
790	continue;
791
792	// Otherwise update Metrics for all blocks that will be cloned. If any
793	// block is already cloned and would be reused, don't double count it.
794	auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
795	for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt ++) {
796	BB = *BBIt;
797	VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
798	if (VisitedBB)
799	continue;
800	Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues);
801	DuplicateMap [BB].push_back(x: {.BB: BB, .State: NextState});
802	}
803
804	if (Metrics.notDuplicatable) {
805	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
806	<< "non-duplicatable instructions.\n");
807	ORE->emit(RemarkBuilder: [&]() {
808	return OptimizationRemarkMissed (DEBUG_TYPE, "NonDuplicatableInst",
809	Switch)
810	<< "Contains non-duplicatable instructions.";
811	});
812	return false;
813	}
814
815	if (Metrics.convergent) {
816	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
817	<< "convergent instructions.\n");
818	ORE->emit(RemarkBuilder: [&]() {
819	return OptimizationRemarkMissed (DEBUG_TYPE, "ConvergentInst", Switch)
820	<< "Contains convergent instructions.";
821	});
822	return false;
823	}
824
825	if (!Metrics.NumInsts.isValid()) {
826	LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
827	<< "instructions with invalid cost.\n");
828	ORE->emit(RemarkBuilder: [&]() {
829	return OptimizationRemarkMissed (DEBUG_TYPE, "ConvergentInst", Switch)
830	<< "Contains instructions with invalid cost.";
831	});
832	return false;
833	}
834	}
835
836	InstructionCost DuplicationCost = `0`;
837
838	unsigned JumpTableSize = `0`;
839	TTI->getEstimatedNumberOfCaseClusters(SI: Switch, JTSize&: JumpTableSize, PSI: nullptr*,
840	BFI: nullptr);
841	if (JumpTableSize == `0`) {
842	// Factor in the number of conditional branches reduced from jump
843	// threading. Assume that lowering the switch block is implemented by
844	// using binary search, hence the LogBase2().
845	unsigned CondBranches =
846	APInt (`32`, Switch->getNumSuccessors()).ceilLogBase2();
847	assert(CondBranches > `0` &&
848	"The threaded switch must have multiple branches");
849	DuplicationCost = Metrics.NumInsts / CondBranches;
850	} else {
851	// Compared with jump tables, the DFA optimizer removes an indirect branch
852	// on each loop iteration, thus making branch prediction more precise. The
853	// more branch targets there are, the more likely it is for the branch
854	// predictor to make a mistake, and the more benefit there is in the DFA
855	// optimizer. Thus, the more branch targets there are, the lower is the
856	// cost of the DFA opt.
857	DuplicationCost = Metrics.NumInsts / JumpTableSize;
858	}
859
860	LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
861	<< SwitchPaths->getSwitchBlock()->getName()
862	<< " is: " << DuplicationCost << "\n\n");
863
864	if (DuplicationCost > CostThreshold) {
865	LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
866	<< "cost threshold.\n");
867	ORE->emit(RemarkBuilder: [&]() {
868	return OptimizationRemarkMissed (DEBUG_TYPE, "NotProfitable", Switch)
869	<< "Duplication cost exceeds the cost threshold (cost="
870	<< ore::NV ("Cost", DuplicationCost)
871	<< ", threshold=" << ore::NV ("Threshold", CostThreshold) << ").";
872	});
873	return false;
874	}
875
876	ORE->emit(RemarkBuilder: [&]() {
877	return OptimizationRemark (DEBUG_TYPE, "JumpThreaded", Switch)
878	<< "Switch statement jump-threaded.";
879	});
880
881	return true;
882	}
883
884	/// Transform each threading path to effectively jump thread the DFA.
885	void createAllExitPaths() {
886	DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
887
888	// Move the switch block to the end of the path, since it will be duplicated
889	BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
890	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
891	LLVM_DEBUG(dbgs() << TPath << "\n");
892	PathType NewPath(TPath.getPath());
893	NewPath.push_back(x: SwitchBlock);
894	TPath.setPath(NewPath);
895	}
896
897	// Transform the ThreadingPaths and keep track of the cloned values
898	DuplicateBlockMap DuplicateMap;
899	DefMap NewDefs;
900
901	SmallSet<BasicBlock *, `16`> BlocksToClean;
902	for (BasicBlock *BB : successors(BB: SwitchBlock))
903	BlocksToClean.insert(Ptr: BB);
904
905	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
906	createExitPath(NewDefs, Path&: TPath, DuplicateMap, BlocksToClean, DTU: &DTU);
907	NumPaths ++;
908	}
909
910	// After all paths are cloned, now update the last successor of the cloned
911	// path so it skips over the switch statement
912	for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
913	updateLastSuccessor(TPath, DuplicateMap, DTU: &DTU);
914
915	// For each instruction that was cloned and used outside, update its uses
916	updateSSA(NewDefs);
917
918	// Clean PHI Nodes for the newly created blocks
919	for (BasicBlock *BB : BlocksToClean)
920	cleanPhiNodes(BB);
921	}
922
923	/// For a specific ThreadingPath \p Path, create an exit path starting from
924	/// the determinator block.
925	///
926	/// To remember the correct destination, we have to duplicate blocks
927	/// corresponding to each state. Also update the terminating instruction of
928	/// the predecessors, and phis in the successor blocks.
929	void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
930	DuplicateBlockMap &DuplicateMap,
931	SmallSet<BasicBlock *, `16`> &BlocksToClean,
932	DomTreeUpdater *DTU) {
933	APInt NextState = Path.getExitValue();
934	const BasicBlock *Determinator = Path.getDeterminatorBB();
935	PathType PathBBs = Path.getPath();
936
937	// Don't select the placeholder block in front
938	if (PathBBs.front() == Determinator)
939	PathBBs.pop_front();
940
941	auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator);
942	// When there is only one BB in PathBBs, the determinator takes itself as a
943	// direct predecessor.
944	BasicBlock PrevBB = PathBBs.size() == `1` ? DetIt : *std::prev(x: DetIt);
945	for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt ++) {
946	BasicBlock BB = BBIt;
947	BlocksToClean.insert(Ptr: BB);
948
949	// We already cloned BB for this NextState, now just update the branch
950	// and continue.
951	BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
952	if (NextBB) {
953	updatePredecessor(PrevBB, OldBB: BB, NewBB: NextBB, DTU);
954	PrevBB = NextBB;
955	continue;
956	}
957
958	// Clone the BB and update the successor of Prev to jump to the new block
959	BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
960	BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
961	DuplicateMap [BB].push_back(x: {.BB: NewBB, .State: NextState});
962	BlocksToClean.insert(Ptr: NewBB);
963	PrevBB = NewBB;
964	}
965	}
966
967	/// Restore SSA form after cloning blocks.
968	///
969	/// Each cloned block creates new defs for a variable, and the uses need to be
970	/// updated to reflect this. The uses may be replaced with a cloned value, or
971	/// some derived phi instruction. Note that all uses of a value defined in the
972	/// same block were already remapped when cloning the block.
973	void updateSSA(DefMap &NewDefs) {
974	SSAUpdaterBulk SSAUpdate;
975	SmallVector<Use *, `16`> UsesToRename;
976
977	for (const auto &KV : NewDefs) {
978	Instruction *I = KV.first;
979	BasicBlock *BB = I->getParent();
980	std::vector<Instruction *> Cloned = KV.second;
981
982	// Scan all uses of this instruction to see if it is used outside of its
983	// block, and if so, record them in UsesToRename.
984	for (Use &U : I->uses()) {
985	Instruction *User = cast<Instruction>(Val: U.getUser());
986	if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) {
987	if (UserPN->getIncomingBlock(U) == BB)
988	continue;
989	} else if (User->getParent() == BB) {
990	continue;
991	}
992
993	UsesToRename.push_back(Elt: &U);
994	}
995
996	// If there are no uses outside the block, we're done with this
997	// instruction.
998	if (UsesToRename.empty())
999	continue;
1000	LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
1001	<< "\n");
1002
1003	// We found a use of I outside of BB. Rename all uses of I that are
1004	// outside its block to be uses of the appropriate PHI node etc. See
1005	// ValuesInBlocks with the values we know.
1006	unsigned VarNum = SSAUpdate.AddVariable(Name: I->getName(), Ty: I->getType());
1007	SSAUpdate.AddAvailableValue(Var: VarNum, BB, V: I);
1008	for (Instruction *New : Cloned)
1009	SSAUpdate.AddAvailableValue(Var: VarNum, BB: New->getParent(), V: New);
1010
1011	while (!UsesToRename.empty())
1012	SSAUpdate.AddUse(Var: VarNum, U: UsesToRename.pop_back_val());
1013
1014	LLVM_DEBUG(dbgs() << "\n");
1015	}
1016	// SSAUpdater handles phi placement and renaming uses with the appropriate
1017	// value.
1018	SSAUpdate.RewriteAllUses(DT);
1019	}
1020
1021	/// Clones a basic block, and adds it to the CFG.
1022	///
1023	/// This function also includes updating phi nodes in the successors of the
1024	/// BB, and remapping uses that were defined locally in the cloned BB.
1025	BasicBlock cloneBlockAndUpdatePredecessor(BasicBlock BB, BasicBlock *PrevBB,
1026	const APInt &NextState,
1027	DuplicateBlockMap &DuplicateMap,
1028	DefMap &NewDefs,
1029	DomTreeUpdater *DTU) {
1030	ValueToValueMapTy VMap;
1031	BasicBlock *NewBB = CloneBasicBlock(
1032	BB, VMap, NameSuffix: ".jt" + std::to_string(val: NextState.getLimitedValue()),
1033	F: BB->getParent());
1034	NewBB->moveAfter(MovePos: BB);
1035	NumCloned ++;
1036
1037	for (Instruction &I : *NewBB) {
1038	// Do not remap operands of PHINode in case a definition in BB is an
1039	// incoming value to a phi in the same block. This incoming value will
1040	// be renamed later while restoring SSA.
1041	if (isa<PHINode>(Val: &I))
1042	continue;
1043	RemapInstruction(I: &I, VM&: VMap,
1044	Flags: RF_IgnoreMissingLocals \| RF_NoModuleLevelChanges);
1045	if (AssumeInst *II = dyn_cast<AssumeInst>(Val: &I))
1046	AC->registerAssumption(CI: II);
1047	}
1048
1049	updateSuccessorPhis(BB, ClonedBB: NewBB, NextState, VMap, DuplicateMap);
1050	updatePredecessor(PrevBB, OldBB: BB, NewBB, DTU);
1051	updateDefMap(NewDefs, VMap);
1052
1053	// Add all successors to the DominatorTree
1054	SmallPtrSet<BasicBlock *, `4`> SuccSet;
1055	for (auto *SuccBB : successors(BB: NewBB)) {
1056	if (SuccSet.insert(Ptr: SuccBB).second)
1057	DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBB, SuccBB}});
1058	}
1059	SuccSet.clear();
1060	return NewBB;
1061	}
1062
1063	/// Update the phi nodes in BB's successors.
1064	///
1065	/// This means creating a new incoming value from NewBB with the new
1066	/// instruction wherever there is an incoming value from BB.
1067	void updateSuccessorPhis(BasicBlock BB, BasicBlock ClonedBB,
1068	const APInt &NextState, ValueToValueMapTy &VMap,
1069	DuplicateBlockMap &DuplicateMap) {
1070	std::vector<BasicBlock *> BlocksToUpdate;
1071
1072	// If BB is the last block in the path, we can simply update the one case
1073	// successor that will be reached.
1074	if (BB == SwitchPaths->getSwitchBlock()) {
1075	SwitchInst *Switch = SwitchPaths->getSwitchInst();
1076	BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1077	BlocksToUpdate.push_back(x: NextCase);
1078	BasicBlock *ClonedSucc = getClonedBB(BB: NextCase, NextState, DuplicateMap);
1079	if (ClonedSucc)
1080	BlocksToUpdate.push_back(x: ClonedSucc);
1081	}
1082	// Otherwise update phis in all successors.
1083	else {
1084	for (BasicBlock *Succ : successors(BB)) {
1085	BlocksToUpdate.push_back(x: Succ);
1086
1087	// Check if a successor has already been cloned for the particular exit
1088	// value. In this case if a successor was already cloned, the phi nodes
1089	// in the cloned block should be updated directly.
1090	BasicBlock *ClonedSucc = getClonedBB(BB: Succ, NextState, DuplicateMap);
1091	if (ClonedSucc)
1092	BlocksToUpdate.push_back(x: ClonedSucc);
1093	}
1094	}
1095
1096	// If there is a phi with an incoming value from BB, create a new incoming
1097	// value for the new predecessor ClonedBB. The value will either be the same
1098	// value from BB or a cloned value.
1099	for (BasicBlock *Succ : BlocksToUpdate) {
1100	for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II);
1101	++II) {
1102	Value *Incoming = Phi->getIncomingValueForBlock(BB);
1103	if (Incoming) {
1104	if (isa<Constant>(Val: Incoming)) {
1105	Phi->addIncoming(V: Incoming, BB: ClonedBB);
1106	continue;
1107	}
1108	Value *ClonedVal = VMap [Incoming];
1109	if (ClonedVal)
1110	Phi->addIncoming(V: ClonedVal, BB: ClonedBB);
1111	else
1112	Phi->addIncoming(V: Incoming, BB: ClonedBB);
1113	}
1114	}
1115	}
1116	}
1117
1118	/// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1119	/// other successors are kept as well.
1120	void updatePredecessor(BasicBlock PrevBB, BasicBlock OldBB,
1121	BasicBlock NewBB, DomTreeUpdater DTU) {
1122	// When a path is reused, there is a chance that predecessors were already
1123	// updated before. Check if the predecessor needs to be updated first.
1124	if (!isPredecessor(BB: OldBB, IncomingBB: PrevBB))
1125	return;
1126
1127	Instruction *PrevTerm = PrevBB->getTerminator();
1128	for (unsigned Idx = `0`; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1129	if (PrevTerm->getSuccessor(Idx) == OldBB) {
1130	OldBB->removePredecessor(Pred: PrevBB, / KeepOneInputPHIs = / true);
1131	PrevTerm->setSuccessor(Idx, BB: NewBB);
1132	}
1133	}
1134	DTU->applyUpdates(Updates: {{DominatorTree::Delete, PrevBB, OldBB},
1135	{DominatorTree::Insert, PrevBB, NewBB}});
1136	}
1137
1138	/// Add new value mappings to the DefMap to keep track of all new definitions
1139	/// for a particular instruction. These will be used while updating SSA form.
1140	void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1141	SmallVector<std::pair<Instruction , Instruction >> NewDefsVector;
1142	NewDefsVector.reserve(N: VMap.size());
1143
1144	for (auto Entry : VMap) {
1145	Instruction *Inst =
1146	dyn_cast<Instruction>(Val: const_cast<Value *>(Entry.first));
1147	if (!Inst \|\| !Entry.second \|\| isa<BranchInst>(Val: Inst) \|\|
1148	isa<SwitchInst>(Val: Inst)) {
1149	continue;
1150	}
1151
1152	Instruction *Cloned = dyn_cast<Instruction>(Val&: Entry.second);
1153	if (!Cloned)
1154	continue;
1155
1156	NewDefsVector.push_back(Elt: {Inst, Cloned});
1157	}
1158
1159	// Sort the defs to get deterministic insertion order into NewDefs.
1160	sort(C&: NewDefsVector, Comp: [](const auto &LHS, const auto &RHS) {
1161	if (LHS.first == RHS.first)
1162	return LHS.second->comesBefore(RHS.second);
1163	return LHS.first->comesBefore(RHS.first);
1164	});
1165
1166	for (const auto &KV : NewDefsVector)
1167	NewDefs [KV.first].push_back(x: KV.second);
1168	}
1169
1170	/// Update the last branch of a particular cloned path to point to the correct
1171	/// case successor.
1172	///
1173	/// Note that this is an optional step and would have been done in later
1174	/// optimizations, but it makes the CFG significantly easier to work with.
1175	void updateLastSuccessor(ThreadingPath &TPath,
1176	DuplicateBlockMap &DuplicateMap,
1177	DomTreeUpdater *DTU) {
1178	APInt NextState = TPath.getExitValue();
1179	BasicBlock *BB = TPath.getPath().back();
1180	BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1181
1182	// Note multiple paths can end at the same block so check that it is not
1183	// updated yet
1184	if (!isa<SwitchInst>(Val: LastBlock->getTerminator()))
1185	return;
1186	SwitchInst *Switch = cast<SwitchInst>(Val: LastBlock->getTerminator());
1187	BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1188
1189	std::vector<DominatorTree::UpdateType> DTUpdates;
1190	SmallPtrSet<BasicBlock *, `4`> SuccSet;
1191	for (BasicBlock *Succ : successors(BB: LastBlock)) {
1192	if (Succ != NextCase && SuccSet.insert(Ptr: Succ).second)
1193	DTUpdates.push_back(x: {DominatorTree::Delete, LastBlock, Succ});
1194	}
1195
1196	Switch->eraseFromParent();
1197	BranchInst::Create(IfTrue: NextCase, InsertAtEnd: LastBlock);
1198
1199	DTU->applyUpdates(Updates: DTUpdates);
1200	}
1201
1202	/// After cloning blocks, some of the phi nodes have extra incoming values
1203	/// that are no longer used. This function removes them.
1204	void cleanPhiNodes(BasicBlock *BB) {
1205	// If BB is no longer reachable, remove any remaining phi nodes
1206	if (pred_empty(BB)) {
1207	std::vector<PHINode *> PhiToRemove;
1208	for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1209	PhiToRemove.push_back(x: Phi);
1210	}
1211	for (PHINode *PN : PhiToRemove) {
1212	PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType()));
1213	PN->eraseFromParent();
1214	}
1215	return;
1216	}
1217
1218	// Remove any incoming values that come from an invalid predecessor
1219	for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) {
1220	std::vector<BasicBlock *> BlocksToRemove;
1221	for (BasicBlock *IncomingBB : Phi->blocks()) {
1222	if (!isPredecessor(BB, IncomingBB))
1223	BlocksToRemove.push_back(x: IncomingBB);
1224	}
1225	for (BasicBlock *BB : BlocksToRemove)
1226	Phi->removeIncomingValue(BB);
1227	}
1228	}
1229
1230	/// Checks if BB was already cloned for a particular next state value. If it
1231	/// was then it returns this cloned block, and otherwise null.
1232	BasicBlock getClonedBB(BasicBlock BB, const APInt &NextState,
1233	DuplicateBlockMap &DuplicateMap) {
1234	CloneList ClonedBBs = DuplicateMap [BB];
1235
1236	// Find an entry in the CloneList with this NextState. If it exists then
1237	// return the corresponding BB
1238	auto It = llvm::find_if(Range&: ClonedBBs, P: [NextState](const ClonedBlock &C) {
1239	return C.State == NextState;
1240	});
1241	return It != ClonedBBs.end() ? (It).BB : nullptr*;
1242	}
1243
1244	/// Helper to get the successor corresponding to a particular case value for
1245	/// a switch statement.
1246	BasicBlock getNextCaseSuccessor(SwitchInst Switch, const APInt &NextState) {
1247	BasicBlock NextCase = nullptr*;
1248	for (auto Case : Switch->cases()) {
1249	if (Case.getCaseValue()->getValue() == NextState) {
1250	NextCase = Case.getCaseSuccessor();
1251	break;
1252	}
1253	}
1254	if (!NextCase)
1255	NextCase = Switch->getDefaultDest();
1256	return NextCase;
1257	}
1258
1259	/// Returns true if IncomingBB is a predecessor of BB.
1260	bool isPredecessor(BasicBlock BB, BasicBlock IncomingBB) {
1261	return llvm::is_contained(Range: predecessors(BB), Element: IncomingBB);
1262	}
1263
1264	AllSwitchPaths *SwitchPaths;
1265	DominatorTree *DT;
1266	AssumptionCache *AC;
1267	TargetTransformInfo *TTI;
1268	OptimizationRemarkEmitter *ORE;
1269	SmallPtrSet<const Value *, `32`> EphValues;
1270	std::vector<ThreadingPath> TPaths;
1271	};
1272
1273	bool DFAJumpThreading::run(Function &F) {
1274	LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1275
1276	if (F.hasOptSize()) {
1277	LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1278	return false;
1279	}
1280
1281	if (ClViewCfgBefore)
1282	F.viewCFG();
1283
1284	SmallVector<AllSwitchPaths, `2`> ThreadableLoops;
1285	bool MadeChanges = false;
1286
1287	for (BasicBlock &BB : F) {
1288	auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator());
1289	if (!SI)
1290	continue;
1291
1292	LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1293	<< " is a candidate\n");
1294	MainSwitch Switch(SI, LI, ORE);
1295
1296	if (!Switch.getInstr())
1297	continue;
1298
1299	LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1300	<< "candidate for jump threading\n");
1301	LLVM_DEBUG(SI->dump());
1302
1303	unfoldSelectInstrs(DT, SelectInsts: Switch.getSelectInsts());
1304	if (!Switch.getSelectInsts().empty())
1305	MadeChanges = true;
1306
1307	AllSwitchPaths SwitchPaths(&Switch, ORE);
1308	SwitchPaths.run();
1309
1310	if (SwitchPaths.getNumThreadingPaths() > `0`) {
1311	ThreadableLoops.push_back(Elt: SwitchPaths);
1312
1313	// For the time being limit this optimization to occurring once in a
1314	// function since it can change the CFG significantly. This is not a
1315	// strict requirement but it can cause buggy behavior if there is an
1316	// overlap of blocks in different opportunities. There is a lot of room to
1317	// experiment with catching more opportunities here.
1318	break;
1319	}
1320	}
1321
1322	SmallPtrSet<const Value *, `32`> EphValues;
1323	if (ThreadableLoops.size() > `0`)
1324	CodeMetrics::collectEphemeralValues(L: &F, AC, EphValues);
1325
1326	for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1327	TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1328	Transform.run();
1329	MadeChanges = true;
1330	}
1331
1332	#ifdef EXPENSIVE_CHECKS
1333	assert(DT->verify(DominatorTree::VerificationLevel::Full));
1334	verifyFunction(F, &dbgs());
1335	#endif
1336
1337	return MadeChanges;
1338	}
1339
1340	} // end anonymous namespace
1341
1342	/// Integrate with the new Pass Manager
1343	PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1344	FunctionAnalysisManager &AM) {
1345	AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
1346	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
1347	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
1348	TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
1349	OptimizationRemarkEmitter ORE(&F);
1350
1351	if (!DFAJumpThreading (&AC, &DT, &LI, &TTI, &ORE).run(F))
1352	return PreservedAnalyses::all();
1353
1354	PreservedAnalyses PA;
1355	PA.preserve<DominatorTreeAnalysis>();
1356	return PA;
1357	}
1358

source code of llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp