DeadStoreElimination.cpp source code [llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp]

1	//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// The code below implements dead store elimination using MemorySSA. It uses
10	// the following general approach: given a MemoryDef, walk upwards to find
11	// clobbering MemoryDefs that may be killed by the starting def. Then check
12	// that there are no uses that may read the location of the original MemoryDef
13	// in between both MemoryDefs. A bit more concretely:
14	//
15	// For all MemoryDefs StartDef:
16	// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17	// upwards.
18	// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19	// checking all uses starting at MaybeDeadAccess and walking until we see
20	// StartDef.
21	// 3. For each found CurrentDef, check that:
22	// 1. There are no barrier instructions between CurrentDef and StartDef (like
23	// throws or stores with ordering constraints).
24	// 2. StartDef is executed whenever CurrentDef is executed.
25	// 3. StartDef completely overwrites CurrentDef.
26	// 4. Erase CurrentDef from the function and MemorySSA.
27	//
28	//===----------------------------------------------------------------------===//
29
30	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31	#include "llvm/ADT/APInt.h"
32	#include "llvm/ADT/DenseMap.h"
33	#include "llvm/ADT/MapVector.h"
34	#include "llvm/ADT/PostOrderIterator.h"
35	#include "llvm/ADT/SetVector.h"
36	#include "llvm/ADT/SmallPtrSet.h"
37	#include "llvm/ADT/SmallVector.h"
38	#include "llvm/ADT/Statistic.h"
39	#include "llvm/ADT/StringRef.h"
40	#include "llvm/Analysis/AliasAnalysis.h"
41	#include "llvm/Analysis/CaptureTracking.h"
42	#include "llvm/Analysis/GlobalsModRef.h"
43	#include "llvm/Analysis/LoopInfo.h"
44	#include "llvm/Analysis/MemoryBuiltins.h"
45	#include "llvm/Analysis/MemoryLocation.h"
46	#include "llvm/Analysis/MemorySSA.h"
47	#include "llvm/Analysis/MemorySSAUpdater.h"
48	#include "llvm/Analysis/MustExecute.h"
49	#include "llvm/Analysis/PostDominators.h"
50	#include "llvm/Analysis/TargetLibraryInfo.h"
51	#include "llvm/Analysis/ValueTracking.h"
52	#include "llvm/IR/Argument.h"
53	#include "llvm/IR/BasicBlock.h"
54	#include "llvm/IR/Constant.h"
55	#include "llvm/IR/Constants.h"
56	#include "llvm/IR/DataLayout.h"
57	#include "llvm/IR/DebugInfo.h"
58	#include "llvm/IR/Dominators.h"
59	#include "llvm/IR/Function.h"
60	#include "llvm/IR/IRBuilder.h"
61	#include "llvm/IR/InstIterator.h"
62	#include "llvm/IR/InstrTypes.h"
63	#include "llvm/IR/Instruction.h"
64	#include "llvm/IR/Instructions.h"
65	#include "llvm/IR/IntrinsicInst.h"
66	#include "llvm/IR/Module.h"
67	#include "llvm/IR/PassManager.h"
68	#include "llvm/IR/PatternMatch.h"
69	#include "llvm/IR/Value.h"
70	#include "llvm/Support/Casting.h"
71	#include "llvm/Support/CommandLine.h"
72	#include "llvm/Support/Debug.h"
73	#include "llvm/Support/DebugCounter.h"
74	#include "llvm/Support/ErrorHandling.h"
75	#include "llvm/Support/raw_ostream.h"
76	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
77	#include "llvm/Transforms/Utils/BuildLibCalls.h"
78	#include "llvm/Transforms/Utils/Local.h"
79	#include <algorithm>
80	#include <cassert>
81	#include <cstdint>
82	#include <iterator>
83	#include <map>
84	#include <optional>
85	#include <utility>
86
87	using namespace llvm;
88	using namespace PatternMatch;
89
90	#define DEBUG_TYPE "dse"
91
92	STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93	STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94	STATISTIC(NumFastStores, "Number of stores deleted");
95	STATISTIC(NumFastOther, "Number of other instrs removed");
96	STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97	STATISTIC(NumModifiedStores, "Number of stores modified");
98	STATISTIC(NumCFGChecks, "Number of stores modified");
99	STATISTIC(NumCFGTries, "Number of stores modified");
100	STATISTIC(NumCFGSuccess, "Number of stores modified");
101	STATISTIC(NumGetDomMemoryDefPassed,
102	"Number of times a valid candidate is returned from getDomMemoryDef");
103	STATISTIC(NumDomMemDefChecks,
104	"Number iterations check for reads in getDomMemoryDef");
105
106	DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107	"Controls which MemoryDefs are eliminated.");
108
109	static cl::opt<bool>
110	EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111	cl::init(Val: true), cl::Hidden,
112	cl::desc ("Enable partial-overwrite tracking in DSE"));
113
114	static cl::opt<bool>
115	EnablePartialStoreMerging("enable-dse-partial-store-merging",
116	cl::init(Val: true), cl::Hidden,
117	cl::desc ("Enable partial store merging in DSE"));
118
119	static cl::opt<unsigned>
120	MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: `150`), cl::Hidden,
121	cl::desc ("The number of memory instructions to scan for "
122	"dead store elimination (default = 150)"));
123	static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
124	"dse-memoryssa-walklimit", cl::init(Val: `90`), cl::Hidden,
125	cl::desc ("The maximum number of steps while walking upwards to find "
126	"MemoryDefs that may be killed (default = 90)"));
127
128	static cl::opt<unsigned> MemorySSAPartialStoreLimit(
129	"dse-memoryssa-partial-store-limit", cl::init(Val: `5`), cl::Hidden,
130	cl::desc ("The maximum number candidates that only partially overwrite the "
131	"killing MemoryDef to consider"
132	" (default = 5)"));
133
134	static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
135	"dse-memoryssa-defs-per-block-limit", cl::init(Val: `5000`), cl::Hidden,
136	cl::desc ("The number of MemoryDefs we consider as candidates to eliminated "
137	"other stores per basic block (default = 5000)"));
138
139	static cl::opt<unsigned> MemorySSASameBBStepCost(
140	"dse-memoryssa-samebb-cost", cl::init(Val: `1`), cl::Hidden,
141	cl::desc (
142	"The cost of a step in the same basic block as the killing MemoryDef"
143	"(default = 1)"));
144
145	static cl::opt<unsigned>
146	MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: `5`),
147	cl::Hidden,
148	cl::desc ("The cost of a step in a different basic "
149	"block than the killing MemoryDef"
150	"(default = 5)"));
151
152	static cl::opt<unsigned> MemorySSAPathCheckLimit(
153	"dse-memoryssa-path-check-limit", cl::init(Val: `50`), cl::Hidden,
154	cl::desc ("The maximum number of blocks to check when trying to prove that "
155	"all paths to an exit go through a killing block (default = 50)"));
156
157	// This flags allows or disallows DSE to optimize MemorySSA during its
158	// traversal. Note that DSE optimizing MemorySSA may impact other passes
159	// downstream of the DSE invocation and can lead to issues not being
160	// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161	// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162	// impact follow-up passes.
163	static cl::opt<bool>
164	OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
165	cl::desc ("Allow DSE to optimize memory accesses."));
166
167	//===----------------------------------------------------------------------===//
168	// Helper functions
169	//===----------------------------------------------------------------------===//
170	using OverlapIntervalsTy = std::map<int64_t, int64_t>;
171	using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
172
173	/// Returns true if the end of this instruction can be safely shortened in
174	/// length.
175	static bool isShortenableAtTheEnd(Instruction *I) {
176	// Don't shorten stores for now
177	if (isa<StoreInst>(Val: I))
178	return false;
179
180	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
181	switch (II->getIntrinsicID()) {
182	default: return false;
183	case Intrinsic::memset:
184	case Intrinsic::memcpy:
185	case Intrinsic::memcpy_element_unordered_atomic:
186	case Intrinsic::memset_element_unordered_atomic:
187	// Do shorten memory intrinsics.
188	// FIXME: Add memmove if it's also safe to transform.
189	return true;
190	}
191	}
192
193	// Don't shorten libcalls calls for now.
194
195	return false;
196	}
197
198	/// Returns true if the beginning of this instruction can be safely shortened
199	/// in length.
200	static bool isShortenableAtTheBeginning(Instruction *I) {
201	// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
202	// easily done by offsetting the source address.
203	return isa<AnyMemSetInst>(Val: I);
204	}
205
206	static std::optional<TypeSize> getPointerSize(const Value *V,
207	const DataLayout &DL,
208	const TargetLibraryInfo &TLI,
209	const Function *F) {
210	uint64_t Size;
211	ObjectSizeOpts Opts;
212	Opts.NullIsUnknownSize = NullPointerIsDefined(F);
213
214	if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
215	return TypeSize::getFixed(ExactSize: Size);
216	return std::nullopt;
217	}
218
219	namespace {
220
221	enum OverwriteResult {
222	OW_Begin,
223	OW_Complete,
224	OW_End,
225	OW_PartialEarlierWithFullLater,
226	OW_MaybePartial,
227	OW_None,
228	OW_Unknown
229	};
230
231	} // end anonymous namespace
232
233	/// Check if two instruction are masked stores that completely
234	/// overwrite one another. More specifically, \p KillingI has to
235	/// overwrite \p DeadI.
236	static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
237	const Instruction *DeadI,
238	BatchAAResults &AA) {
239	const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
240	const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
241	if (KillingII == nullptr \|\| DeadII == nullptr)
242	return OW_Unknown;
243	if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
244	return OW_Unknown;
245	if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
246	// Type size.
247	VectorType *KillingTy =
248	cast<VectorType>(Val: KillingII->getArgOperand(i: `0`)->getType());
249	VectorType *DeadTy = cast<VectorType>(Val: DeadII->getArgOperand(i: `0`)->getType());
250	if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251	return OW_Unknown;
252	// Element count.
253	if (KillingTy->getElementCount() != DeadTy->getElementCount())
254	return OW_Unknown;
255	// Pointers.
256	Value *KillingPtr = KillingII->getArgOperand(i: `1`)->stripPointerCasts();
257	Value *DeadPtr = DeadII->getArgOperand(i: `1`)->stripPointerCasts();
258	if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
259	return OW_Unknown;
260	// Masks.
261	// TODO: check that KillingII's mask is a superset of the DeadII's mask.
262	if (KillingII->getArgOperand(i: `3`) != DeadII->getArgOperand(i: `3`))
263	return OW_Unknown;
264	return OW_Complete;
265	}
266	return OW_Unknown;
267	}
268
269	/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
270	/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
271	/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
272	/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
273	/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
274	/// overwritten by a killing (smaller) store which doesn't write outside the big
275	/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
276	/// NOTE: This function must only be called if both \p KillingLoc and \p
277	/// DeadLoc belong to the same underlying object with valid \p KillingOff and
278	/// \p DeadOff.
279	static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
280	const MemoryLocation &DeadLoc,
281	int64_t KillingOff, int64_t DeadOff,
282	Instruction *DeadI,
283	InstOverlapIntervalsTy &IOL) {
284	const uint64_t KillingSize = KillingLoc.Size.getValue();
285	const uint64_t DeadSize = DeadLoc.Size.getValue();
286	// We may now overlap, although the overlap is not complete. There might also
287	// be other incomplete overlaps, and together, they might cover the complete
288	// dead store.
289	// Note: The correctness of this logic depends on the fact that this function
290	// is not even called providing DepWrite when there are any intervening reads.
291	if (EnablePartialOverwriteTracking &&
292	KillingOff < int64_t(DeadOff + DeadSize) &&
293	int64_t(KillingOff + KillingSize) >= DeadOff) {
294
295	// Insert our part of the overlap into the map.
296	auto &IM = IOL [DeadI];
297	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
298	<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["
299	<< KillingOff << ", " << int64_t(KillingOff + KillingSize)
300	<< ")\n");
301
302	// Make sure that we only insert non-overlapping intervals and combine
303	// adjacent intervals. The intervals are stored in the map with the ending
304	// offset as the key (in the half-open sense) and the starting offset as
305	// the value.
306	int64_t KillingIntStart = KillingOff;
307	int64_t KillingIntEnd = KillingOff + KillingSize;
308
309	// Find any intervals ending at, or after, KillingIntStart which start
310	// before KillingIntEnd.
311	auto ILI = IM.lower_bound(x: KillingIntStart);
312	if (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
313	// This existing interval is overlapped with the current store somewhere
314	// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
315	// intervals and adjusting our start and end.
316	KillingIntStart = std::min(a: KillingIntStart, b: ILI ->second);
317	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
318	ILI = IM.erase(position: ILI);
319
320	// Continue erasing and adjusting our end in case other previous
321	// intervals are also overlapped with the current store.
322	//
323	// \|--- dead 1 ---\| \|--- dead 2 ---\|
324	// \|------- killing---------\|
325	//
326	while (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
327	assert(ILI ->second > KillingIntStart && "Unexpected interval");
328	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
329	ILI = IM.erase(position: ILI);
330	}
331	}
332
333	IM [KillingIntEnd] = KillingIntStart;
334
335	ILI = IM.begin();
336	if (ILI ->second <= DeadOff && ILI ->first >= int64_t(DeadOff + DeadSize)) {
337	LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
338	<< DeadOff << ", " << int64_t(DeadOff + DeadSize)
339	<< ") Composite KillingLoc [" << ILI ->second << ", "
340	<< ILI ->first << ")\n");
341	++NumCompletePartials;
342	return OW_Complete;
343	}
344	}
345
346	// Check for a dead store which writes to all the memory locations that
347	// the killing store writes to.
348	if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
349	int64_t(DeadOff + DeadSize) > KillingOff &&
350	uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
351	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
352	<< ", " << int64_t(DeadOff + DeadSize)
353	<< ") by a killing store [" << KillingOff << ", "
354	<< int64_t(KillingOff + KillingSize) << ")\n");
355	// TODO: Maybe come up with a better name?
356	return OW_PartialEarlierWithFullLater;
357	}
358
359	// Another interesting case is if the killing store overwrites the end of the
360	// dead store.
361	//
362	// \|--dead--\|
363	// \|-- killing --\|
364	//
365	// In this case we may want to trim the size of dead store to avoid
366	// generating stores to addresses which will definitely be overwritten killing
367	// store.
368	if (!EnablePartialOverwriteTracking &&
369	(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
370	int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
371	return OW_End;
372
373	// Finally, we also need to check if the killing store overwrites the
374	// beginning of the dead store.
375	//
376	// \|--dead--\|
377	// \|-- killing --\|
378	//
379	// In this case we may want to move the destination address and trim the size
380	// of dead store to avoid generating stores to addresses which will definitely
381	// be overwritten killing store.
382	if (!EnablePartialOverwriteTracking &&
383	(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
384	assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
385	"Expect to be handled as OW_Complete");
386	return OW_Begin;
387	}
388	// Otherwise, they don't completely overlap.
389	return OW_Unknown;
390	}
391
392	/// Returns true if the memory which is accessed by the second instruction is not
393	/// modified between the first and the second instruction.
394	/// Precondition: Second instruction must be dominated by the first
395	/// instruction.
396	static bool
397	memoryIsNotModifiedBetween(Instruction FirstI, Instruction SecondI,
398	BatchAAResults &AA, const DataLayout &DL,
399	DominatorTree *DT) {
400	// Do a backwards scan through the CFG from SecondI to FirstI. Look for
401	// instructions which can modify the memory location accessed by SecondI.
402	//
403	// While doing the walk keep track of the address to check. It might be
404	// different in different basic blocks due to PHI translation.
405	using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
406	SmallVector<BlockAddressPair, `16`> WorkList;
407	// Keep track of the address we visited each block with. Bail out if we
408	// visit a block with different addresses.
409	DenseMap<BasicBlock , Value > Visited;
410
411	BasicBlock::iterator FirstBBI(FirstI);
412	++FirstBBI;
413	BasicBlock::iterator SecondBBI(SecondI);
414	BasicBlock *FirstBB = FirstI->getParent();
415	BasicBlock *SecondBB = SecondI->getParent();
416	MemoryLocation MemLoc;
417	if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
418	MemLoc = MemoryLocation::getForDest(MI: MemSet);
419	else
420	MemLoc = MemoryLocation::get(Inst: SecondI);
421
422	auto MemLocPtr = const_cast<Value >(MemLoc.Ptr);
423
424	// Start checking the SecondBB.
425	WorkList.push_back(
426	Elt: std::make_pair(x&: SecondBB, y: PHITransAddr (MemLocPtr, DL, nullptr)));
427	bool isFirstBlock = true;
428
429	// Check all blocks going backward until we reach the FirstBB.
430	while (!WorkList.empty()) {
431	BlockAddressPair Current = WorkList.pop_back_val();
432	BasicBlock *B = Current.first;
433	PHITransAddr &Addr = Current.second;
434	Value *Ptr = Addr.getAddr();
435
436	// Ignore instructions before FirstI if this is the FirstBB.
437	BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
438
439	BasicBlock::iterator EI;
440	if (isFirstBlock) {
441	// Ignore instructions after SecondI if this is the first visit of SecondBB.
442	assert(B == SecondBB && "first block is not the store block");
443	EI = SecondBBI;
444	isFirstBlock = false;
445	} else {
446	// It's not SecondBB or (in case of a loop) the second visit of SecondBB.
447	// In this case we also have to look at instructions after SecondI.
448	EI = B->end();
449	}
450	for (; BI != EI; ++BI) {
451	Instruction I = &BI;
452	if (I->mayWriteToMemory() && I != SecondI)
453	if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
454	return false;
455	}
456	if (B != FirstBB) {
457	assert(B != &FirstBB->getParent()->getEntryBlock() &&
458	"Should not hit the entry block because SI must be dominated by LI");
459	for (BasicBlock *Pred : predecessors(BB: B)) {
460	PHITransAddr PredAddr = Addr;
461	if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
462	if (!PredAddr.isPotentiallyPHITranslatable())
463	return false;
464	if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
465	return false;
466	}
467	Value *TranslatedPtr = PredAddr.getAddr();
468	auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
469	if (!Inserted.second) {
470	// We already visited this block before. If it was with a different
471	// address - bail out!
472	if (TranslatedPtr != Inserted.first ->second)
473	return false;
474	// ... otherwise just skip it.
475	continue;
476	}
477	WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
478	}
479	}
480	}
481	return true;
482	}
483
484	static void shortenAssignment(Instruction Inst, Value OriginalDest,
485	uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
486	uint64_t NewSizeInBits, bool IsOverwriteEnd) {
487	const DataLayout &DL = Inst->getModule()->getDataLayout();
488	uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
489	uint64_t DeadSliceOffsetInBits =
490	OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : `0`);
491	auto SetDeadFragExpr = [](auto *Assign,
492	DIExpression::FragmentInfo DeadFragment) {
493	// createFragmentExpression expects an offset relative to the existing
494	// fragment offset if there is one.
495	uint64_t RelativeOffset = DeadFragment.OffsetInBits -
496	Assign->getExpression()
497	->getFragmentInfo()
498	.value_or(DIExpression::FragmentInfo (`0`, `0`))
499	.OffsetInBits;
500	if (auto NewExpr = DIExpression::createFragmentExpression(
501	Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
502	Assign->setExpression(*NewExpr);
503	return;
504	}
505	// Failed to create a fragment expression for this so discard the value,
506	// making this a kill location.
507	auto Expr = DIExpression::createFragmentExpression(
508	Expr: DIExpression::get(Context&: Assign->getContext(), Elements: std::nullopt),
509	OffsetInBits: DeadFragment.OffsetInBits, SizeInBits: DeadFragment.SizeInBits);
510	Assign->setExpression(Expr);
511	Assign->setKillLocation();
512	};
513
514	// A DIAssignID to use so that the inserted dbg.assign intrinsics do not
515	// link to any instructions. Created in the loop below (once).
516	DIAssignID LinkToNothing = nullptr*;
517	LLVMContext &Ctx = Inst->getContext();
518	auto GetDeadLink = [&Ctx, &LinkToNothing]() {
519	if (!LinkToNothing)
520	LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
521	return LinkToNothing;
522	};
523
524	// Insert an unlinked dbg.assign intrinsic for the dead fragment after each
525	// overlapping dbg.assign intrinsic. The loop invalidates the iterators
526	// returned by getAssignmentMarkers so save a copy of the markers to iterate
527	// over.
528	auto LinkedRange = at::getAssignmentMarkers(Inst);
529	SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
530	at::getDVRAssignmentMarkers(Inst);
531	SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
532	LinkedRange.end());
533	auto InsertAssignForOverlap = [&](auto *Assign) {
534	std::optional<DIExpression::FragmentInfo> NewFragment;
535	if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
536	DeadSliceSizeInBits, Assign,
537	NewFragment) \|\|
538	!NewFragment) {
539	// We couldn't calculate the intersecting fragment for some reason. Be
540	// cautious and unlink the whole assignment from the store.
541	Assign->setKillAddress();
542	Assign->setAssignId(GetDeadLink ());
543	return;
544	}
545	// No intersect.
546	if (NewFragment ->SizeInBits == `0`)
547	return;
548
549	// Fragments overlap: insert a new dbg.assign for this dead part.
550	auto NewAssign = static_cast<decltype*(Assign)>(Assign->clone());
551	NewAssign->insertAfter(Assign);
552	NewAssign->setAssignId(GetDeadLink ());
553	if (NewFragment)
554	SetDeadFragExpr(NewAssign, *NewFragment);
555	NewAssign->setKillAddress();
556	};
557	for_each(Range&: Linked, F: InsertAssignForOverlap);
558	for_each(Range&: LinkedDVRAssigns, F: InsertAssignForOverlap);
559	}
560
561	static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
562	uint64_t &DeadSize, int64_t KillingStart,
563	uint64_t KillingSize, bool IsOverwriteEnd) {
564	auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
565	Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
566
567	// We assume that memet/memcpy operates in chunks of the "largest" native
568	// type size and aligned on the same value. That means optimal start and size
569	// of memset/memcpy should be modulo of preferred alignment of that type. That
570	// is it there is no any sense in trying to reduce store size any further
571	// since any "extra" stores comes for free anyway.
572	// On the other hand, maximum alignment we can achieve is limited by alignment
573	// of initial store.
574
575	// TODO: Limit maximum alignment by preferred (or abi?) alignment of the
576	// "largest" native type.
577	// Note: What is the proper way to get that value?
578	// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
579	// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
580
581	int64_t ToRemoveStart = `0`;
582	uint64_t ToRemoveSize = `0`;
583	// Compute start and size of the region to remove. Make sure 'PrefAlign' is
584	// maintained on the remaining store.
585	if (IsOverwriteEnd) {
586	// Calculate required adjustment for 'KillingStart' in order to keep
587	// remaining store size aligned on 'PerfAlign'.
588	uint64_t Off =
589	offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
590	ToRemoveStart = KillingStart + Off;
591	if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
592	return false;
593	ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
594	} else {
595	ToRemoveStart = DeadStart;
596	assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
597	"Not overlapping accesses?");
598	ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
599	// Calculate required adjustment for 'ToRemoveSize'in order to keep
600	// start of the remaining store aligned on 'PerfAlign'.
601	uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
602	if (Off != `0`) {
603	if (ToRemoveSize <= (PrefAlign.value() - Off))
604	return false;
605	ToRemoveSize -= PrefAlign.value() - Off;
606	}
607	assert(isAligned(PrefAlign, ToRemoveSize) &&
608	"Should preserve selected alignment");
609	}
610
611	assert(ToRemoveSize > `0` && "Shouldn't reach here if nothing to remove");
612	assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
613
614	uint64_t NewSize = DeadSize - ToRemoveSize;
615	if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(Val: DeadI)) {
616	// When shortening an atomic memory intrinsic, the newly shortened
617	// length must remain an integer multiple of the element size.
618	const uint32_t ElementSize = AMI->getElementSizeInBytes();
619	if (`0` != NewSize % ElementSize)
620	return false;
621	}
622
623	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
624	<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
625	<< "\n KILLER [" << ToRemoveStart << ", "
626	<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
627
628	Value *DeadWriteLength = DeadIntrinsic->getLength();
629	Value *TrimmedLength = ConstantInt::get(Ty: DeadWriteLength->getType(), V: NewSize);
630	DeadIntrinsic->setLength(TrimmedLength);
631	DeadIntrinsic->setDestAlignment(PrefAlign);
632
633	Value *OrigDest = DeadIntrinsic->getRawDest();
634	if (!IsOverwriteEnd) {
635	Value *Indices[`1`] = {
636	ConstantInt::get(Ty: DeadWriteLength->getType(), V: ToRemoveSize)};
637	Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
638	PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "",
639	InsertBefore: DeadI->getIterator());
640	NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
641	DeadIntrinsic->setDest(NewDestGEP);
642	}
643
644	// Update attached dbg.assign intrinsics. Assume 8-bit byte.
645	shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * `8`, OldSizeInBits: DeadSize * `8`, NewSizeInBits: NewSize * `8`,
646	IsOverwriteEnd);
647
648	// Finally update start and size of dead access.
649	if (!IsOverwriteEnd)
650	DeadStart += ToRemoveSize;
651	DeadSize = NewSize;
652
653	return true;
654	}
655
656	static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
657	int64_t &DeadStart, uint64_t &DeadSize) {
658	if (IntervalMap.empty() \|\| !isShortenableAtTheEnd(I: DeadI))
659	return false;
660
661	OverlapIntervalsTy::iterator OII = --IntervalMap.end();
662	int64_t KillingStart = OII ->second;
663	uint64_t KillingSize = OII ->first - KillingStart;
664
665	assert(OII ->first - KillingStart >= `0` && "Size expected to be positive");
666
667	if (KillingStart > DeadStart &&
668	// Note: "KillingStart - KillingStart" is known to be positive due to
669	// preceding check.
670	(uint64_t)(KillingStart - DeadStart) < DeadSize &&
671	// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
672	// be non negative due to preceding checks.
673	KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
674	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
675	IsOverwriteEnd: true)) {
676	IntervalMap.erase(position: OII);
677	return true;
678	}
679	}
680	return false;
681	}
682
683	static bool tryToShortenBegin(Instruction *DeadI,
684	OverlapIntervalsTy &IntervalMap,
685	int64_t &DeadStart, uint64_t &DeadSize) {
686	if (IntervalMap.empty() \|\| !isShortenableAtTheBeginning(I: DeadI))
687	return false;
688
689	OverlapIntervalsTy::iterator OII = IntervalMap.begin();
690	int64_t KillingStart = OII ->second;
691	uint64_t KillingSize = OII ->first - KillingStart;
692
693	assert(OII ->first - KillingStart >= `0` && "Size expected to be positive");
694
695	if (KillingStart <= DeadStart &&
696	// Note: "DeadStart - KillingStart" is known to be non negative due to
697	// preceding check.
698	KillingSize > (uint64_t)(DeadStart - KillingStart)) {
699	// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
700	// be positive due to preceding checks.
701	assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
702	"Should have been handled as OW_Complete");
703	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
704	IsOverwriteEnd: false)) {
705	IntervalMap.erase(position: OII);
706	return true;
707	}
708	}
709	return false;
710	}
711
712	static Constant *
713	tryToMergePartialOverlappingStores(StoreInst KillingI, StoreInst DeadI,
714	int64_t KillingOffset, int64_t DeadOffset,
715	const DataLayout &DL, BatchAAResults &AA,
716	DominatorTree *DT) {
717
718	if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
719	DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
720	KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
721	DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
722	memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
723	// If the store we find is:
724	// a) partially overwritten by the store to 'Loc'
725	// b) the killing store is fully contained in the dead one and
726	// c) they both have a constant value
727	// d) none of the two stores need padding
728	// Merge the two stores, replacing the dead store's value with a
729	// merge of both values.
730	// TODO: Deal with other constant types (vectors, etc), and probably
731	// some mem intrinsics (if needed)
732
733	APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
734	APInt KillingValue =
735	cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
736	unsigned KillingBits = KillingValue.getBitWidth();
737	assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
738	KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
739
740	// Offset of the smaller store inside the larger store
741	unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * `8`;
742	unsigned LShiftAmount =
743	DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
744	: BitOffsetDiff;
745	APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
746	hiBit: LShiftAmount + KillingBits);
747	// Clear the bits we'll be replacing, then OR with the smaller
748	// store, shifted appropriately.
749	APInt Merged = (DeadValue & ~Mask) \| (KillingValue << LShiftAmount);
750	LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
751	<< "\n Killing: " << *KillingI
752	<< "\n Merged Value: " << Merged << `'\n'`);
753	return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
754	}
755	return nullptr;
756	}
757
758	namespace {
759	// Returns true if \p I is an intrinsic that does not read or write memory.
760	bool isNoopIntrinsic(Instruction *I) {
761	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
762	switch (II->getIntrinsicID()) {
763	case Intrinsic::lifetime_start:
764	case Intrinsic::lifetime_end:
765	case Intrinsic::invariant_end:
766	case Intrinsic::launder_invariant_group:
767	case Intrinsic::assume:
768	return true;
769	case Intrinsic::dbg_declare:
770	case Intrinsic::dbg_label:
771	case Intrinsic::dbg_value:
772	llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
773	default:
774	return false;
775	}
776	}
777	return false;
778	}
779
780	// Check if we can ignore \p D for DSE.
781	bool canSkipDef(MemoryDef D, bool* DefVisibleToCaller) {
782	Instruction *DI = D->getMemoryInst();
783	// Calls that only access inaccessible memory cannot read or write any memory
784	// locations we consider for elimination.
785	if (auto *CB = dyn_cast<CallBase>(Val: DI))
786	if (CB->onlyAccessesInaccessibleMemory())
787	return true;
788
789	// We can eliminate stores to locations not visible to the caller across
790	// throwing instructions.
791	if (DI->mayThrow() && !DefVisibleToCaller)
792	return true;
793
794	// We can remove the dead stores, irrespective of the fence and its ordering
795	// (release/acquire/seq_cst). Fences only constraints the ordering of
796	// already visible stores, it does not make a store visible to other
797	// threads. So, skipping over a fence does not change a store from being
798	// dead.
799	if (isa<FenceInst>(Val: DI))
800	return true;
801
802	// Skip intrinsics that do not really read or modify memory.
803	if (isNoopIntrinsic(I: DI))
804	return true;
805
806	return false;
807	}
808
809	struct DSEState {
810	Function &F;
811	AliasAnalysis &AA;
812	EarliestEscapeInfo EI;
813
814	/// The single BatchAA instance that is used to cache AA queries. It will
815	/// not be invalidated over the whole run. This is safe, because:
816	/// 1. Only memory writes are removed, so the alias cache for memory
817	/// locations remains valid.
818	/// 2. No new instructions are added (only instructions removed), so cached
819	/// information for a deleted value cannot be accessed by a re-used new
820	/// value pointer.
821	BatchAAResults BatchAA;
822
823	MemorySSA &MSSA;
824	DominatorTree &DT;
825	PostDominatorTree &PDT;
826	const TargetLibraryInfo &TLI;
827	const DataLayout &DL;
828	const LoopInfo &LI;
829
830	// Whether the function contains any irreducible control flow, useful for
831	// being accurately able to detect loops.
832	bool ContainsIrreducibleLoops;
833
834	// All MemoryDefs that potentially could kill other MemDefs.
835	SmallVector<MemoryDef *, `64`> MemDefs;
836	// Any that should be skipped as they are already deleted
837	SmallPtrSet<MemoryAccess *, `4`> SkipStores;
838	// Keep track whether a given object is captured before return or not.
839	DenseMap<const Value , bool*> CapturedBeforeReturn;
840	// Keep track of all of the objects that are invisible to the caller after
841	// the function returns.
842	DenseMap<const Value , bool*> InvisibleToCallerAfterRet;
843	// Keep track of blocks with throwing instructions not modeled in MemorySSA.
844	SmallPtrSet<BasicBlock *, `16`> ThrowingBlocks;
845	// Post-order numbers for each basic block. Used to figure out if memory
846	// accesses are executed before another access.
847	DenseMap<BasicBlock , unsigned*> PostOrderNumbers;
848
849	/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
850	/// basic block.
851	MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
852	// Check if there are root nodes that are terminated by UnreachableInst.
853	// Those roots pessimize post-dominance queries. If there are such roots,
854	// fall back to CFG scan starting from all non-unreachable roots.
855	bool AnyUnreachableExit;
856
857	// Whether or not we should iterate on removing dead stores at the end of the
858	// function due to removing a store causing a previously captured pointer to
859	// no longer be captured.
860	bool ShouldIterateEndOfFunctionDSE;
861
862	/// Dead instructions to be removed at the end of DSE.
863	SmallVector<Instruction *> ToRemove;
864
865	// Class contains self-reference, make sure it's not copied/moved.
866	DSEState(const DSEState &) = delete;
867	DSEState &operator=(const DSEState &) = delete;
868
869	DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
870	PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
871	const LoopInfo &LI)
872	: F(F), AA(AA), EI (DT, &LI), BatchAA (AA, &EI), MSSA(MSSA), DT(DT),
873	PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
874	// Collect blocks with throwing instructions not modeled in MemorySSA and
875	// alloc-like objects.
876	unsigned PO = `0`;
877	for (BasicBlock *BB : post_order(G: &F)) {
878	PostOrderNumbers [BB] = PO++;
879	for (Instruction &I : *BB) {
880	MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
881	if (I.mayThrow() && !MA)
882	ThrowingBlocks.insert(Ptr: I.getParent());
883
884	auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
885	if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
886	(getLocForWrite(I: &I) \|\| isMemTerminatorInst(I: &I)))
887	MemDefs.push_back(Elt: MD);
888	}
889	}
890
891	// Treat byval or inalloca arguments the same as Allocas, stores to them are
892	// dead at the end of the function.
893	for (Argument &AI : F.args())
894	if (AI.hasPassPointeeByValueCopyAttr())
895	InvisibleToCallerAfterRet.insert(KV: {&AI, true});
896
897	// Collect whether there is any irreducible control flow in the function.
898	ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
899
900	AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
901	return isa<UnreachableInst>(Val: E->getTerminator());
902	});
903	}
904
905	LocationSize strengthenLocationSize(const Instruction *I,
906	LocationSize Size) const {
907	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
908	LibFunc F;
909	if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
910	(F == LibFunc_memset_chk \|\| F == LibFunc_memcpy_chk)) {
911	// Use the precise location size specified by the 3rd argument
912	// for determining KillingI overwrites DeadLoc if it is a memset_chk
913	// instruction. memset_chk will write either the amount specified as 3rd
914	// argument or the function will immediately abort and exit the program.
915	// NOTE: AA may determine NoAlias if it can prove that the access size
916	// is larger than the allocation size due to that being UB. To avoid
917	// returning potentially invalid NoAlias results by AA, limit the use of
918	// the precise location size to isOverwrite.
919	if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: `2`)))
920	return LocationSize::precise(Value: Len->getZExtValue());
921	}
922	}
923	return Size;
924	}
925
926	/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
927	/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
928	/// location (by \p DeadI instruction).
929	/// Return OW_MaybePartial if \p KillingI does not completely overwrite
930	/// \p DeadI, but they both write to the same underlying object. In that
931	/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
932	/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
933	/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
934	OverwriteResult isOverwrite(const Instruction *KillingI,
935	const Instruction *DeadI,
936	const MemoryLocation &KillingLoc,
937	const MemoryLocation &DeadLoc,
938	int64_t &KillingOff, int64_t &DeadOff) {
939	// AliasAnalysis does not always account for loops. Limit overwrite checks
940	// to dependencies for which we can guarantee they are independent of any
941	// loops they are in.
942	if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
943	return OW_Unknown;
944
945	LocationSize KillingLocSize =
946	strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
947	const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
948	const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
949	const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
950	const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
951
952	// Check whether the killing store overwrites the whole object, in which
953	// case the size/offset of the dead store does not matter.
954	if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
955	isIdentifiedObject(V: KillingUndObj)) {
956	std::optional<TypeSize> KillingUndObjSize =
957	getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
958	if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
959	return OW_Complete;
960	}
961
962	// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
963	// get imprecise values here, though (except for unknown sizes).
964	if (!KillingLocSize.isPrecise() \|\| !DeadLoc.Size.isPrecise()) {
965	// In case no constant size is known, try to an IR values for the number
966	// of bytes written and check if they match.
967	const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
968	const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
969	if (KillingMemI && DeadMemI) {
970	const Value *KillingV = KillingMemI->getLength();
971	const Value *DeadV = DeadMemI->getLength();
972	if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
973	return OW_Complete;
974	}
975
976	// Masked stores have imprecise locations, but we can reason about them
977	// to some extent.
978	return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
979	}
980
981	const TypeSize KillingSize = KillingLocSize.getValue();
982	const TypeSize DeadSize = DeadLoc.Size.getValue();
983	// Bail on doing Size comparison which depends on AA for now
984	// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
985	const bool AnyScalable =
986	DeadSize.isScalable() \|\| KillingLocSize.isScalable();
987
988	if (AnyScalable)
989	return OW_Unknown;
990	// Query the alias information
991	AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
992
993	// If the start pointers are the same, we just have to compare sizes to see if
994	// the killing store was larger than the dead store.
995	if (AAR == AliasResult::MustAlias) {
996	// Make sure that the KillingSize size is >= the DeadSize size.
997	if (KillingSize >= DeadSize)
998	return OW_Complete;
999	}
1000
1001	// If we hit a partial alias we may have a full overwrite
1002	if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1003	int32_t Off = AAR.getOffset();
1004	if (Off >= `0` && (uint64_t)Off + DeadSize <= KillingSize)
1005	return OW_Complete;
1006	}
1007
1008	// If we can't resolve the same pointers to the same object, then we can't
1009	// analyze them at all.
1010	if (DeadUndObj != KillingUndObj) {
1011	// Non aliasing stores to different objects don't overlap. Note that
1012	// if the killing store is known to overwrite whole object (out of
1013	// bounds access overwrites whole object as well) then it is assumed to
1014	// completely overwrite any store to the same object even if they don't
1015	// actually alias (see next check).
1016	if (AAR == AliasResult::NoAlias)
1017	return OW_None;
1018	return OW_Unknown;
1019	}
1020
1021	// Okay, we have stores to two completely different pointers. Try to
1022	// decompose the pointer into a "base + constant_offset" form. If the base
1023	// pointers are equal, then we can reason about the two stores.
1024	DeadOff = `0`;
1025	KillingOff = `0`;
1026	const Value *DeadBasePtr =
1027	GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1028	const Value *KillingBasePtr =
1029	GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1030
1031	// If the base pointers still differ, we have two completely different
1032	// stores.
1033	if (DeadBasePtr != KillingBasePtr)
1034	return OW_Unknown;
1035
1036	// The killing access completely overlaps the dead store if and only if
1037	// both start and end of the dead one is "inside" the killing one:
1038	// \|<->\|--dead--\|<->\|
1039	// \|-----killing------\|
1040	// Accesses may overlap if and only if start of one of them is "inside"
1041	// another one:
1042	// \|<->\|--dead--\|<-------->\|
1043	// \|-------killing--------\|
1044	// OR
1045	// \|-------dead-------\|
1046	// \|<->\|---killing---\|<----->\|
1047	//
1048	// We have to be careful here as Off is signed while .Size is unsigned.
1049
1050	// Check if the dead access starts "not before" the killing one.
1051	if (DeadOff >= KillingOff) {
1052	// If the dead access ends "not after" the killing access then the
1053	// dead one is completely overwritten by the killing one.
1054	if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1055	return OW_Complete;
1056	// If start of the dead access is "before" end of the killing access
1057	// then accesses overlap.
1058	else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1059	return OW_MaybePartial;
1060	}
1061	// If start of the killing access is "before" end of the dead access then
1062	// accesses overlap.
1063	else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1064	return OW_MaybePartial;
1065	}
1066
1067	// Can reach here only if accesses are known not to overlap.
1068	return OW_None;
1069	}
1070
1071	bool isInvisibleToCallerAfterRet(const Value *V) {
1072	if (isa<AllocaInst>(Val: V))
1073	return true;
1074	auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1075	if (I.second) {
1076	if (!isInvisibleToCallerOnUnwind(V)) {
1077	I.first ->second = false;
1078	} else if (isNoAliasCall(V)) {
1079	I.first ->second = !PointerMayBeCaptured(V, ReturnCaptures: true, StoreCaptures: false);
1080	}
1081	}
1082	return I.first ->second;
1083	}
1084
1085	bool isInvisibleToCallerOnUnwind(const Value *V) {
1086	bool RequiresNoCaptureBeforeUnwind;
1087	if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1088	return false;
1089	if (!RequiresNoCaptureBeforeUnwind)
1090	return true;
1091
1092	auto I = CapturedBeforeReturn.insert(KV: {V, true});
1093	if (I.second)
1094	// NOTE: This could be made more precise by PointerMayBeCapturedBefore
1095	// with the killing MemoryDef. But we refrain from doing so for now to
1096	// limit compile-time and this does not cause any changes to the number
1097	// of stores removed on a large test set in practice.
1098	I.first ->second = PointerMayBeCaptured(V, ReturnCaptures: false, StoreCaptures: true);
1099	return !I.first ->second;
1100	}
1101
1102	std::optional<MemoryLocation> getLocForWrite(Instruction I) const* {
1103	if (!I->mayWriteToMemory())
1104	return std::nullopt;
1105
1106	if (auto *CB = dyn_cast<CallBase>(Val: I))
1107	return MemoryLocation::getForDest(CI: CB, TLI);
1108
1109	return MemoryLocation::getOrNone(Inst: I);
1110	}
1111
1112	/// Assuming this instruction has a dead analyzable write, can we delete
1113	/// this instruction?
1114	bool isRemovable(Instruction *I) {
1115	assert(getLocForWrite(I) && "Must have analyzable write");
1116
1117	// Don't remove volatile/atomic stores.
1118	if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1119	return SI->isUnordered();
1120
1121	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1122	// Don't remove volatile memory intrinsics.
1123	if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1124	return !MI->isVolatile();
1125
1126	// Never remove dead lifetime intrinsics, e.g. because they are followed
1127	// by a free.
1128	if (CB->isLifetimeStartOrEnd())
1129	return false;
1130
1131	return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1132	!CB->isTerminator();
1133	}
1134
1135	return false;
1136	}
1137
1138	/// Returns true if \p UseInst completely overwrites \p DefLoc
1139	/// (stored by \p DefInst).
1140	bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1141	Instruction *UseInst) {
1142	// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1143	// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1144	// MemoryDef.
1145	if (!UseInst->mayWriteToMemory())
1146	return false;
1147
1148	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1149	if (CB->onlyAccessesInaccessibleMemory())
1150	return false;
1151
1152	int64_t InstWriteOffset, DepWriteOffset;
1153	if (auto CC = getLocForWrite(I: UseInst))
1154	return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1155	DeadOff&: DepWriteOffset) == OW_Complete;
1156	return false;
1157	}
1158
1159	/// Returns true if \p Def is not read before returning from the function.
1160	bool isWriteAtEndOfFunction(MemoryDef *Def) {
1161	LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1162	<< *Def->getMemoryInst()
1163	<< ") is at the end the function \n");
1164
1165	auto MaybeLoc = getLocForWrite(I: Def->getMemoryInst());
1166	if (!MaybeLoc) {
1167	LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1168	return false;
1169	}
1170
1171	SmallVector<MemoryAccess *, `4`> WorkList;
1172	SmallPtrSet<MemoryAccess *, `8`> Visited;
1173	auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1174	if (!Visited.insert(Ptr: Acc).second)
1175	return;
1176	for (Use &U : Acc->uses())
1177	WorkList.push_back(Elt: cast<MemoryAccess>(Val: U.getUser()));
1178	};
1179	PushMemUses(Def);
1180	for (unsigned I = `0`; I < WorkList.size(); I++) {
1181	if (WorkList.size() >= MemorySSAScanLimit) {
1182	LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1183	return false;
1184	}
1185
1186	MemoryAccess *UseAccess = WorkList [I];
1187	if (isa<MemoryPhi>(Val: UseAccess)) {
1188	// AliasAnalysis does not account for loops. Limit elimination to
1189	// candidates for which we can guarantee they always store to the same
1190	// memory location.
1191	if (!isGuaranteedLoopInvariant(Ptr: MaybeLoc ->Ptr))
1192	return false;
1193
1194	PushMemUses(cast<MemoryPhi>(Val: UseAccess));
1195	continue;
1196	}
1197	// TODO: Checking for aliasing is expensive. Consider reducing the amount
1198	// of times this is called and/or caching it.
1199	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1200	if (isReadClobber(DefLoc: *MaybeLoc, UseInst)) {
1201	LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1202	return false;
1203	}
1204
1205	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1206	PushMemUses(UseDef);
1207	}
1208	return true;
1209	}
1210
1211	/// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1212	/// pair with the MemoryLocation terminated by \p I and a boolean flag
1213	/// indicating whether \p I is a free-like call.
1214	std::optional<std::pair<MemoryLocation, bool>>
1215	getLocForTerminator(Instruction I) const* {
1216	uint64_t Len;
1217	Value *Ptr;
1218	if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1219	m_Value(Ptr))))
1220	return {std::make_pair(x: MemoryLocation (Ptr, Len), y: false)};
1221
1222	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1223	if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1224	return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1225	}
1226
1227	return std::nullopt;
1228	}
1229
1230	/// Returns true if \p I is a memory terminator instruction like
1231	/// llvm.lifetime.end or free.
1232	bool isMemTerminatorInst(Instruction I) const* {
1233	auto *CB = dyn_cast<CallBase>(Val: I);
1234	return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end \|\|
1235	getFreedOperand(CB, TLI: &TLI) != nullptr);
1236	}
1237
1238	/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1239	/// instruction \p AccessI.
1240	bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1241	Instruction *MaybeTerm) {
1242	std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1243	getLocForTerminator(I: MaybeTerm);
1244
1245	if (!MaybeTermLoc)
1246	return false;
1247
1248	// If the terminator is a free-like call, all accesses to the underlying
1249	// object can be considered terminated.
1250	if (getUnderlyingObject(V: Loc.Ptr) !=
1251	getUnderlyingObject(V: MaybeTermLoc ->first.Ptr))
1252	return false;
1253
1254	auto TermLoc = MaybeTermLoc ->first;
1255	if (MaybeTermLoc ->second) {
1256	const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1257	return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1258	}
1259	int64_t InstWriteOffset = `0`;
1260	int64_t DepWriteOffset = `0`;
1261	return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1262	DeadOff&: DepWriteOffset) == OW_Complete;
1263	}
1264
1265	// Returns true if \p Use may read from \p DefLoc.
1266	bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1267	if (isNoopIntrinsic(I: UseInst))
1268	return false;
1269
1270	// Monotonic or weaker atomic stores can be re-ordered and do not need to be
1271	// treated as read clobber.
1272	if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1273	return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1274
1275	if (!UseInst->mayReadFromMemory())
1276	return false;
1277
1278	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1279	if (CB->onlyAccessesInaccessibleMemory())
1280	return false;
1281
1282	return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1283	}
1284
1285	/// Returns true if a dependency between \p Current and \p KillingDef is
1286	/// guaranteed to be loop invariant for the loops that they are in. Either
1287	/// because they are known to be in the same block, in the same loop level or
1288	/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1289	/// during execution of the containing function.
1290	bool isGuaranteedLoopIndependent(const Instruction *Current,
1291	const Instruction *KillingDef,
1292	const MemoryLocation &CurrentLoc) {
1293	// If the dependency is within the same block or loop level (being careful
1294	// of irreducible loops), we know that AA will return a valid result for the
1295	// memory dependency. (Both at the function level, outside of any loop,
1296	// would also be valid but we currently disable that to limit compile time).
1297	if (Current->getParent() == KillingDef->getParent())
1298	return true;
1299	const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1300	if (!ContainsIrreducibleLoops && CurrentLI &&
1301	CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1302	return true;
1303	// Otherwise check the memory location is invariant to any loops.
1304	return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1305	}
1306
1307	/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1308	/// loop. In particular, this guarantees that it only references a single
1309	/// MemoryLocation during execution of the containing function.
1310	bool isGuaranteedLoopInvariant(const Value *Ptr) {
1311	Ptr = Ptr->stripPointerCasts();
1312	if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1313	if (GEP->hasAllConstantIndices())
1314	Ptr = GEP->getPointerOperand()->stripPointerCasts();
1315
1316	if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1317	return I->getParent()->isEntryBlock() \|\|
1318	(!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1319	}
1320	return true;
1321	}
1322
1323	// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1324	// with no read access between them or on any other path to a function exit
1325	// block if \p KillingLoc is not accessible after the function returns. If
1326	// there is no such MemoryDef, return std::nullopt. The returned value may not
1327	// (completely) overwrite \p KillingLoc. Currently we bail out when we
1328	// encounter an aliasing MemoryUse (read).
1329	std::optional<MemoryAccess *>
1330	getDomMemoryDef(MemoryDef KillingDef, MemoryAccess StartAccess,
1331	const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1332	unsigned &ScanLimit, unsigned &WalkerStepLimit,
1333	bool IsMemTerm, unsigned &PartialLimit) {
1334	if (ScanLimit == `0` \|\| WalkerStepLimit == `0`) {
1335	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1336	return std::nullopt;
1337	}
1338
1339	MemoryAccess *Current = StartAccess;
1340	Instruction *KillingI = KillingDef->getMemoryInst();
1341	LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1342
1343	// Only optimize defining access of KillingDef when directly starting at its
1344	// defining access. The defining access also must only access KillingLoc. At
1345	// the moment we only support instructions with a single write location, so
1346	// it should be sufficient to disable optimizations for instructions that
1347	// also read from memory.
1348	bool CanOptimize = OptimizeMemorySSA &&
1349	KillingDef->getDefiningAccess() == StartAccess &&
1350	!KillingI->mayReadFromMemory();
1351
1352	// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1353	std::optional<MemoryLocation> CurrentLoc;
1354	for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1355	LLVM_DEBUG({
1356	dbgs() << " visiting " << *Current;
1357	if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1358	dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1359	<< ")";
1360	dbgs() << "\n";
1361	});
1362
1363	// Reached TOP.
1364	if (MSSA.isLiveOnEntryDef(MA: Current)) {
1365	LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1366	if (CanOptimize && Current != KillingDef->getDefiningAccess())
1367	// The first clobbering def is... none.
1368	KillingDef->setOptimized(Current);
1369	return std::nullopt;
1370	}
1371
1372	// Cost of a step. Accesses in the same block are more likely to be valid
1373	// candidates for elimination, hence consider them cheaper.
1374	unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1375	? MemorySSASameBBStepCost
1376	: MemorySSAOtherBBStepCost;
1377	if (WalkerStepLimit <= StepCost) {
1378	LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1379	return std::nullopt;
1380	}
1381	WalkerStepLimit -= StepCost;
1382
1383	// Return for MemoryPhis. They cannot be eliminated directly and the
1384	// caller is responsible for traversing them.
1385	if (isa<MemoryPhi>(Val: Current)) {
1386	LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1387	return Current;
1388	}
1389
1390	// Below, check if CurrentDef is a valid candidate to be eliminated by
1391	// KillingDef. If it is not, check the next candidate.
1392	MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1393	Instruction *CurrentI = CurrentDef->getMemoryInst();
1394
1395	if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1396	CanOptimize = false;
1397	continue;
1398	}
1399
1400	// Before we try to remove anything, check for any extra throwing
1401	// instructions that block us from DSEing
1402	if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1403	LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1404	return std::nullopt;
1405	}
1406
1407	// Check for anything that looks like it will be a barrier to further
1408	// removal
1409	if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1410	LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1411	return std::nullopt;
1412	}
1413
1414	// If Current is known to be on path that reads DefLoc or is a read
1415	// clobber, bail out, as the path is not profitable. We skip this check
1416	// for intrinsic calls, because the code knows how to handle memcpy
1417	// intrinsics.
1418	if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1419	return std::nullopt;
1420
1421	// Quick check if there are direct uses that are read-clobbers.
1422	if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1423	if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1424	return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1425	isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1426	return false;
1427	})) {
1428	LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1429	return std::nullopt;
1430	}
1431
1432	// If Current does not have an analyzable write location or is not
1433	// removable, skip it.
1434	CurrentLoc = getLocForWrite(I: CurrentI);
1435	if (!CurrentLoc \|\| !isRemovable(I: CurrentI)) {
1436	CanOptimize = false;
1437	continue;
1438	}
1439
1440	// AliasAnalysis does not account for loops. Limit elimination to
1441	// candidates for which we can guarantee they always store to the same
1442	// memory location and not located in different loops.
1443	if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1444	LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1445	CanOptimize = false;
1446	continue;
1447	}
1448
1449	if (IsMemTerm) {
1450	// If the killing def is a memory terminator (e.g. lifetime.end), check
1451	// the next candidate if the current Current does not write the same
1452	// underlying object as the terminator.
1453	if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1454	CanOptimize = false;
1455	continue;
1456	}
1457	} else {
1458	int64_t KillingOffset = `0`;
1459	int64_t DeadOffset = `0`;
1460	auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1461	KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1462	if (CanOptimize) {
1463	// CurrentDef is the earliest write clobber of KillingDef. Use it as
1464	// optimized access. Do not optimize if CurrentDef is already the
1465	// defining access of KillingDef.
1466	if (CurrentDef != KillingDef->getDefiningAccess() &&
1467	(OR == OW_Complete \|\| OR == OW_MaybePartial))
1468	KillingDef->setOptimized(CurrentDef);
1469
1470	// Once a may-aliasing def is encountered do not set an optimized
1471	// access.
1472	if (OR != OW_None)
1473	CanOptimize = false;
1474	}
1475
1476	// If Current does not write to the same object as KillingDef, check
1477	// the next candidate.
1478	if (OR == OW_Unknown \|\| OR == OW_None)
1479	continue;
1480	else if (OR == OW_MaybePartial) {
1481	// If KillingDef only partially overwrites Current, check the next
1482	// candidate if the partial step limit is exceeded. This aggressively
1483	// limits the number of candidates for partial store elimination,
1484	// which are less likely to be removable in the end.
1485	if (PartialLimit <= `1`) {
1486	WalkerStepLimit -= `1`;
1487	LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1488	continue;
1489	}
1490	PartialLimit -= `1`;
1491	}
1492	}
1493	break;
1494	};
1495
1496	// Accesses to objects accessible after the function returns can only be
1497	// eliminated if the access is dead along all paths to the exit. Collect
1498	// the blocks with killing (=completely overwriting MemoryDefs) and check if
1499	// they cover all paths from MaybeDeadAccess to any function exit.
1500	SmallPtrSet<Instruction *, `16`> KillingDefs;
1501	KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1502	MemoryAccess *MaybeDeadAccess = Current;
1503	MemoryLocation MaybeDeadLoc = *CurrentLoc;
1504	Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1505	LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1506	<< *MaybeDeadI << ")\n");
1507
1508	SmallSetVector<MemoryAccess *, `32`> WorkList;
1509	auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1510	for (Use &U : Acc->uses())
1511	WorkList.insert(X: cast<MemoryAccess>(Val: U.getUser()));
1512	};
1513	PushMemUses(MaybeDeadAccess);
1514
1515	// Check if DeadDef may be read.
1516	for (unsigned I = `0`; I < WorkList.size(); I++) {
1517	MemoryAccess *UseAccess = WorkList [I];
1518
1519	LLVM_DEBUG(dbgs() << " " << *UseAccess);
1520	// Bail out if the number of accesses to check exceeds the scan limit.
1521	if (ScanLimit < (WorkList.size() - I)) {
1522	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1523	return std::nullopt;
1524	}
1525	--ScanLimit;
1526	NumDomMemDefChecks ++;
1527
1528	if (isa<MemoryPhi>(Val: UseAccess)) {
1529	if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1530	return DT.properlyDominates(A: KI->getParent(),
1531	B: UseAccess->getBlock());
1532	})) {
1533	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1534	continue;
1535	}
1536	LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1537	PushMemUses(UseAccess);
1538	continue;
1539	}
1540
1541	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1542	LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1543
1544	if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1545	return DT.dominates(Def: KI, User: UseInst);
1546	})) {
1547	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1548	continue;
1549	}
1550
1551	// A memory terminator kills all preceeding MemoryDefs and all succeeding
1552	// MemoryAccesses. We do not have to check it's users.
1553	if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1554	LLVM_DEBUG(
1555	dbgs()
1556	<< " ... skipping, memterminator invalidates following accesses\n");
1557	continue;
1558	}
1559
1560	if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1561	LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1562	PushMemUses(UseAccess);
1563	continue;
1564	}
1565
1566	if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1567	LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1568	return std::nullopt;
1569	}
1570
1571	// Uses which may read the original MemoryDef mean we cannot eliminate the
1572	// original MD. Stop walk.
1573	if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst)) {
1574	LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1575	return std::nullopt;
1576	}
1577
1578	// If this worklist walks back to the original memory access (and the
1579	// pointer is not guarenteed loop invariant) then we cannot assume that a
1580	// store kills itself.
1581	if (MaybeDeadAccess == UseAccess &&
1582	!isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1583	LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1584	return std::nullopt;
1585	}
1586	// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1587	// if it reads the memory location.
1588	// TODO: It would probably be better to check for self-reads before
1589	// calling the function.
1590	if (KillingDef == UseAccess \|\| MaybeDeadAccess == UseAccess) {
1591	LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1592	continue;
1593	}
1594
1595	// Check all uses for MemoryDefs, except for defs completely overwriting
1596	// the original location. Otherwise we have to check uses of all
1597	// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1598	// miss cases like the following
1599	// 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1600	// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1601	// Use(2) ; MayAlias 2 and* 1, loads [0, 3].*
1602	// (The Use points to the first* Def it may alias)*
1603	// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1604	// stores [0,1]
1605	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1606	if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1607	BasicBlock *MaybeKillingBlock = UseInst->getParent();
1608	if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1609	PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1610	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1611	LLVM_DEBUG(dbgs()
1612	<< " ... found killing def " << *UseInst << "\n");
1613	KillingDefs.insert(Ptr: UseInst);
1614	}
1615	} else {
1616	LLVM_DEBUG(dbgs()
1617	<< " ... found preceeding def " << *UseInst << "\n");
1618	return std::nullopt;
1619	}
1620	} else
1621	PushMemUses(UseDef);
1622	}
1623	}
1624
1625	// For accesses to locations visible after the function returns, make sure
1626	// that the location is dead (=overwritten) along all paths from
1627	// MaybeDeadAccess to the exit.
1628	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1629	SmallPtrSet<BasicBlock *, `16`> KillingBlocks;
1630	for (Instruction *KD : KillingDefs)
1631	KillingBlocks.insert(Ptr: KD->getParent());
1632	assert(!KillingBlocks.empty() &&
1633	"Expected at least a single killing block");
1634
1635	// Find the common post-dominator of all killing blocks.
1636	BasicBlock CommonPred = KillingBlocks.begin();
1637	for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1638	if (!CommonPred)
1639	break;
1640	CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1641	}
1642
1643	// If the common post-dominator does not post-dominate MaybeDeadAccess,
1644	// there is a path from MaybeDeadAccess to an exit not going through a
1645	// killing block.
1646	if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1647	if (!AnyUnreachableExit)
1648	return std::nullopt;
1649
1650	// Fall back to CFG scan starting at all non-unreachable roots if not
1651	// all paths to the exit go through CommonPred.
1652	CommonPred = nullptr;
1653	}
1654
1655	// If CommonPred itself is in the set of killing blocks, we're done.
1656	if (KillingBlocks.count(Ptr: CommonPred))
1657	return {MaybeDeadAccess};
1658
1659	SetVector<BasicBlock *> WorkList;
1660	// If CommonPred is null, there are multiple exits from the function.
1661	// They all have to be added to the worklist.
1662	if (CommonPred)
1663	WorkList.insert(X: CommonPred);
1664	else
1665	for (BasicBlock *R : PDT.roots()) {
1666	if (!isa<UnreachableInst>(Val: R->getTerminator()))
1667	WorkList.insert(X: R);
1668	}
1669
1670	NumCFGTries ++;
1671	// Check if all paths starting from an exit node go through one of the
1672	// killing blocks before reaching MaybeDeadAccess.
1673	for (unsigned I = `0`; I < WorkList.size(); I++) {
1674	NumCFGChecks ++;
1675	BasicBlock *Current = WorkList [I];
1676	if (KillingBlocks.count(Ptr: Current))
1677	continue;
1678	if (Current == MaybeDeadAccess->getBlock())
1679	return std::nullopt;
1680
1681	// MaybeDeadAccess is reachable from the entry, so we don't have to
1682	// explore unreachable blocks further.
1683	if (!DT.isReachableFromEntry(A: Current))
1684	continue;
1685
1686	for (BasicBlock *Pred : predecessors(BB: Current))
1687	WorkList.insert(X: Pred);
1688
1689	if (WorkList.size() >= MemorySSAPathCheckLimit)
1690	return std::nullopt;
1691	}
1692	NumCFGSuccess ++;
1693	}
1694
1695	// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1696	// potentially dead.
1697	return {MaybeDeadAccess};
1698	}
1699
1700	/// Delete dead memory defs and recursively add their operands to ToRemove if
1701	/// they became dead.
1702	void
1703	deleteDeadInstruction(Instruction *SI,
1704	SmallPtrSetImpl<MemoryAccess > Deleted = nullptr) {
1705	MemorySSAUpdater Updater(&MSSA);
1706	SmallVector<Instruction *, `32`> NowDeadInsts;
1707	NowDeadInsts.push_back(Elt: SI);
1708	--NumFastOther;
1709
1710	while (!NowDeadInsts.empty()) {
1711	Instruction *DeadInst = NowDeadInsts.pop_back_val();
1712	++NumFastOther;
1713
1714	// Try to preserve debug information attached to the dead instruction.
1715	salvageDebugInfo(I&: *DeadInst);
1716	salvageKnowledge(I: DeadInst);
1717
1718	// Remove the Instruction from MSSA.
1719	MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst);
1720	bool IsMemDef = MA && isa<MemoryDef>(Val: MA);
1721	if (MA) {
1722	if (IsMemDef) {
1723	auto *MD = cast<MemoryDef>(Val: MA);
1724	SkipStores.insert(Ptr: MD);
1725	if (Deleted)
1726	Deleted->insert(Ptr: MD);
1727	if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
1728	if (SI->getValueOperand()->getType()->isPointerTy()) {
1729	const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
1730	if (CapturedBeforeReturn.erase(Val: UO))
1731	ShouldIterateEndOfFunctionDSE = true;
1732	InvisibleToCallerAfterRet.erase(Val: UO);
1733	}
1734	}
1735	}
1736
1737	Updater.removeMemoryAccess(MA);
1738	}
1739
1740	auto I = IOLs.find(Key: DeadInst->getParent());
1741	if (I != IOLs.end())
1742	I->second.erase(Val: DeadInst);
1743	// Remove its operands
1744	for (Use &O : DeadInst->operands())
1745	if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
1746	O.set(PoisonValue::get(T: O ->getType()));
1747	if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
1748	NowDeadInsts.push_back(Elt: OpI);
1749	}
1750
1751	EI.removeInstruction(I: DeadInst);
1752	// Remove memory defs directly if they don't produce results, but only
1753	// queue other dead instructions for later removal. They may have been
1754	// used as memory locations that have been cached by BatchAA. Removing
1755	// them here may lead to newly created instructions to be allocated at the
1756	// same address, yielding stale cache entries.
1757	if (IsMemDef && DeadInst->getType()->isVoidTy())
1758	DeadInst->eraseFromParent();
1759	else
1760	ToRemove.push_back(Elt: DeadInst);
1761	}
1762	}
1763
1764	// Check for any extra throws between \p KillingI and \p DeadI that block
1765	// DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1766	// MemoryDef that may throw are handled during the walk from one def to the
1767	// next.
1768	bool mayThrowBetween(Instruction KillingI, Instruction DeadI,
1769	const Value *KillingUndObj) {
1770	// First see if we can ignore it by using the fact that KillingI is an
1771	// alloca/alloca like object that is not visible to the caller during
1772	// execution of the function.
1773	if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
1774	return false;
1775
1776	if (KillingI->getParent() == DeadI->getParent())
1777	return ThrowingBlocks.count(Ptr: KillingI->getParent());
1778	return !ThrowingBlocks.empty();
1779	}
1780
1781	// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1782	// instructions act as barriers:
1783	// A memory instruction that may throw and \p KillingI accesses a non-stack*
1784	// object.
1785	// Atomic stores stronger that monotonic.*
1786	bool isDSEBarrier(const Value KillingUndObj, Instruction DeadI) {
1787	// If DeadI may throw it acts as a barrier, unless we are to an
1788	// alloca/alloca like object that does not escape.
1789	if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
1790	return true;
1791
1792	// If DeadI is an atomic load/store stronger than monotonic, do not try to
1793	// eliminate/reorder it.
1794	if (DeadI->isAtomic()) {
1795	if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
1796	return isStrongerThanMonotonic(AO: LI->getOrdering());
1797	if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
1798	return isStrongerThanMonotonic(AO: SI->getOrdering());
1799	if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
1800	return isStrongerThanMonotonic(AO: ARMW->getOrdering());
1801	if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
1802	return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) \|\|
1803	isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
1804	llvm_unreachable("other instructions should be skipped in MemorySSA");
1805	}
1806	return false;
1807	}
1808
1809	/// Eliminate writes to objects that are not visible in the caller and are not
1810	/// accessed before returning from the function.
1811	bool eliminateDeadWritesAtEndOfFunction() {
1812	bool MadeChange = false;
1813	LLVM_DEBUG(
1814	dbgs()
1815	<< "Trying to eliminate MemoryDefs at the end of the function\n");
1816	do {
1817	ShouldIterateEndOfFunctionDSE = false;
1818	for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
1819	if (SkipStores.contains(Ptr: Def))
1820	continue;
1821
1822	Instruction *DefI = Def->getMemoryInst();
1823	auto DefLoc = getLocForWrite(I: DefI);
1824	if (!DefLoc \|\| !isRemovable(I: DefI))
1825	continue;
1826
1827	// NOTE: Currently eliminating writes at the end of a function is
1828	// limited to MemoryDefs with a single underlying object, to save
1829	// compile-time. In practice it appears the case with multiple
1830	// underlying objects is very uncommon. If it turns out to be important,
1831	// we can use getUnderlyingObjects here instead.
1832	const Value *UO = getUnderlyingObject(V: DefLoc ->Ptr);
1833	if (!isInvisibleToCallerAfterRet(V: UO))
1834	continue;
1835
1836	if (isWriteAtEndOfFunction(Def)) {
1837	// See through pointer-to-pointer bitcasts
1838	LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1839	"of the function\n");
1840	deleteDeadInstruction(SI: DefI);
1841	++NumFastStores;
1842	MadeChange = true;
1843	}
1844	}
1845	} while (ShouldIterateEndOfFunctionDSE);
1846	return MadeChange;
1847	}
1848
1849	/// If we have a zero initializing memset following a call to malloc,
1850	/// try folding it into a call to calloc.
1851	bool tryFoldIntoCalloc(MemoryDef Def, const* Value *DefUO) {
1852	Instruction *DefI = Def->getMemoryInst();
1853	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
1854	if (!MemSet)
1855	// TODO: Could handle zero store to small allocation as well.
1856	return false;
1857	Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
1858	if (!StoredConstant \|\| !StoredConstant->isNullValue())
1859	return false;
1860
1861	if (!isRemovable(I: DefI))
1862	// The memset might be volatile..
1863	return false;
1864
1865	if (F.hasFnAttribute(Attribute::SanitizeMemory) \|\|
1866	F.hasFnAttribute(Attribute::SanitizeAddress) \|\|
1867	F.hasFnAttribute(Attribute::SanitizeHWAddress) \|\|
1868	F.getName() == "calloc")
1869	return false;
1870	auto Malloc = const_cast<CallInst >(dyn_cast<CallInst>(Val: DefUO));
1871	if (!Malloc)
1872	return false;
1873	auto *InnerCallee = Malloc->getCalledFunction();
1874	if (!InnerCallee)
1875	return false;
1876	LibFunc Func;
1877	if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) \|\| !TLI.has(F: Func) \|\|
1878	Func != LibFunc_malloc)
1879	return false;
1880	// Gracefully handle malloc with unexpected memory attributes.
1881	auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
1882	if (!MallocDef)
1883	return false;
1884
1885	auto shouldCreateCalloc = [](CallInst Malloc, CallInst Memset) {
1886	// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1887	// of malloc block
1888	auto *MallocBB = Malloc->getParent(),
1889	*MemsetBB = Memset->getParent();
1890	if (MallocBB == MemsetBB)
1891	return true;
1892	auto *Ptr = Memset->getArgOperand(i: `0`);
1893	auto *TI = MallocBB->getTerminator();
1894	ICmpInst::Predicate Pred;
1895	BasicBlock TrueBB, FalseBB;
1896	if (!match(V: TI, P: m_Br(C: m_ICmp(Pred, L: m_Specific(V: Ptr), R: m_Zero()), T&: TrueBB,
1897	F&: FalseBB)))
1898	return false;
1899	if (Pred != ICmpInst::ICMP_EQ \|\| MemsetBB != FalseBB)
1900	return false;
1901	return true;
1902	};
1903
1904	if (Malloc->getOperand(i_nocapture: `0`) != MemSet->getLength())
1905	return false;
1906	if (!shouldCreateCalloc(Malloc, MemSet) \|\|
1907	!DT.dominates(Def: Malloc, User: MemSet) \|\|
1908	!memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
1909	return false;
1910	IRBuilder<> IRB(Malloc);
1911	Type *SizeTTy = Malloc->getArgOperand(i: `0`)->getType();
1912	auto *Calloc = emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: `1`),
1913	Size: Malloc->getArgOperand(i: `0`), B&: IRB, TLI);
1914	if (!Calloc)
1915	return false;
1916
1917	MemorySSAUpdater Updater(&MSSA);
1918	auto *NewAccess =
1919	Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc), Definition: nullptr,
1920	InsertPt: MallocDef);
1921	auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
1922	Updater.insertDef(Def: NewAccessMD, /RenameUses=/true);
1923	Malloc->replaceAllUsesWith(V: Calloc);
1924	deleteDeadInstruction(SI: Malloc);
1925	return true;
1926	}
1927
1928	// Check if there is a dominating condition, that implies that the value
1929	// being stored in a ptr is already present in the ptr.
1930	bool dominatingConditionImpliesValue(MemoryDef *Def) {
1931	auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
1932	BasicBlock *StoreBB = StoreI->getParent();
1933	Value *StorePtr = StoreI->getPointerOperand();
1934	Value *StoreVal = StoreI->getValueOperand();
1935
1936	DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
1937	if (!IDom)
1938	return false;
1939
1940	auto *BI = dyn_cast<BranchInst>(Val: IDom->getBlock()->getTerminator());
1941	if (!BI \|\| !BI->isConditional())
1942	return false;
1943
1944	// In case both blocks are the same, it is not possible to determine
1945	// if optimization is possible. (We would not want to optimize a store
1946	// in the FalseBB if condition is true and vice versa.)
1947	if (BI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`))
1948	return false;
1949
1950	Instruction *ICmpL;
1951	ICmpInst::Predicate Pred;
1952	if (!match(V: BI->getCondition(),
1953	P: m_c_ICmp(Pred,
1954	L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
1955	R: m_Instruction(I&: ICmpL)),
1956	R: m_Specific(V: StoreVal))) \|\|
1957	!ICmpInst::isEquality(P: Pred))
1958	return false;
1959
1960	// In case the else blocks also branches to the if block or the other way
1961	// around it is not possible to determine if the optimization is possible.
1962	if (Pred == ICmpInst::ICMP_EQ &&
1963	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `0`)),
1964	BB: StoreBB))
1965	return false;
1966
1967	if (Pred == ICmpInst::ICMP_NE &&
1968	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `1`)),
1969	BB: StoreBB))
1970	return false;
1971
1972	MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
1973	MemoryAccess *ClobAcc =
1974	MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
1975
1976	return MSSA.dominates(A: ClobAcc, B: LoadAcc);
1977	}
1978
1979	/// \returns true if \p Def is a no-op store, either because it
1980	/// directly stores back a loaded value or stores zero to a calloced object.
1981	bool storeIsNoop(MemoryDef Def, const* Value *DefUO) {
1982	Instruction *DefI = Def->getMemoryInst();
1983	StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
1984	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
1985	Constant StoredConstant = nullptr*;
1986	if (Store)
1987	StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: `0`));
1988	else if (MemSet)
1989	StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
1990	else
1991	return false;
1992
1993	if (!isRemovable(I: DefI))
1994	return false;
1995
1996	if (StoredConstant) {
1997	Constant *InitC =
1998	getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
1999	// If the clobbering access is LiveOnEntry, no instructions between them
2000	// can modify the memory location.
2001	if (InitC && InitC == StoredConstant)
2002	return MSSA.isLiveOnEntryDef(
2003	MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
2004	}
2005
2006	if (!Store)
2007	return false;
2008
2009	if (dominatingConditionImpliesValue(Def))
2010	return true;
2011
2012	if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: `0`))) {
2013	if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: `1`)) {
2014	// Get the defining access for the load.
2015	auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
2016	// Fast path: the defining accesses are the same.
2017	if (LoadAccess == Def->getDefiningAccess())
2018	return true;
2019
2020	// Look through phi accesses. Recursively scan all phi accesses by
2021	// adding them to a worklist. Bail when we run into a memory def that
2022	// does not match LoadAccess.
2023	SetVector<MemoryAccess *> ToCheck;
2024	MemoryAccess *Current =
2025	MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2026	// We don't want to bail when we run into the store memory def. But,
2027	// the phi access may point to it. So, pretend like we've already
2028	// checked it.
2029	ToCheck.insert(X: Def);
2030	ToCheck.insert(X: Current);
2031	// Start at current (1) to simulate already having checked Def.
2032	for (unsigned I = `1`; I < ToCheck.size(); ++I) {
2033	Current = ToCheck [I];
2034	if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2035	// Check all the operands.
2036	for (auto &Use : PhiAccess->incoming_values())
2037	ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2038	continue;
2039	}
2040
2041	// If we found a memory def, bail. This happens when we have an
2042	// unrelated write in between an otherwise noop store.
2043	assert(isa<MemoryDef>(Current) &&
2044	"Only MemoryDefs should reach here.");
2045	// TODO: Skip no alias MemoryDefs that have no aliasing reads.
2046	// We are searching for the definition of the store's destination.
2047	// So, if that is the same definition as the load, then this is a
2048	// noop. Otherwise, fail.
2049	if (LoadAccess != Current)
2050	return false;
2051	}
2052	return true;
2053	}
2054	}
2055
2056	return false;
2057	}
2058
2059	bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2060	bool Changed = false;
2061	for (auto OI : IOL) {
2062	Instruction *DeadI = OI.first;
2063	MemoryLocation Loc = *getLocForWrite(I: DeadI);
2064	assert(isRemovable(DeadI) && "Expect only removable instruction");
2065
2066	const Value *Ptr = Loc.Ptr->stripPointerCasts();
2067	int64_t DeadStart = `0`;
2068	uint64_t DeadSize = Loc.Size.getValue();
2069	GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2070	OverlapIntervalsTy &IntervalMap = OI.second;
2071	Changed \|= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2072	if (IntervalMap.empty())
2073	continue;
2074	Changed \|= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2075	}
2076	return Changed;
2077	}
2078
2079	/// Eliminates writes to locations where the value that is being written
2080	/// is already stored at the same location.
2081	bool eliminateRedundantStoresOfExistingValues() {
2082	bool MadeChange = false;
2083	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2084	"already existing value\n");
2085	for (auto *Def : MemDefs) {
2086	if (SkipStores.contains(Ptr: Def) \|\| MSSA.isLiveOnEntryDef(MA: Def))
2087	continue;
2088
2089	Instruction *DefInst = Def->getMemoryInst();
2090	auto MaybeDefLoc = getLocForWrite(I: DefInst);
2091	if (!MaybeDefLoc \|\| !isRemovable(I: DefInst))
2092	continue;
2093
2094	MemoryDef *UpperDef;
2095	// To conserve compile-time, we avoid walking to the next clobbering def.
2096	// Instead, we just try to get the optimized access, if it exists. DSE
2097	// will try to optimize defs during the earlier traversal.
2098	if (Def->isOptimized())
2099	UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2100	else
2101	UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2102	if (!UpperDef \|\| MSSA.isLiveOnEntryDef(MA: UpperDef))
2103	continue;
2104
2105	Instruction *UpperInst = UpperDef->getMemoryInst();
2106	auto IsRedundantStore = [&]() {
2107	if (DefInst->isIdenticalTo(I: UpperInst))
2108	return true;
2109	if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2110	if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2111	// MemSetInst must have a write location.
2112	MemoryLocation UpperLoc = *getLocForWrite(I: UpperInst);
2113	int64_t InstWriteOffset = `0`;
2114	int64_t DepWriteOffset = `0`;
2115	auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: UpperLoc, DeadLoc: *MaybeDefLoc,
2116	KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2117	Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2118	return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: `1`) &&
2119	OR == OW_Complete;
2120	}
2121	}
2122	return false;
2123	};
2124
2125	if (!IsRedundantStore() \|\| isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2126	continue;
2127	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2128	<< `'\n'`);
2129	deleteDeadInstruction(SI: DefInst);
2130	NumRedundantStores ++;
2131	MadeChange = true;
2132	}
2133	return MadeChange;
2134	}
2135	};
2136
2137	static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2138	DominatorTree &DT, PostDominatorTree &PDT,
2139	const TargetLibraryInfo &TLI,
2140	const LoopInfo &LI) {
2141	bool MadeChange = false;
2142
2143	DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2144	// For each store:
2145	for (unsigned I = `0`; I < State.MemDefs.size(); I++) {
2146	MemoryDef *KillingDef = State.MemDefs [I];
2147	if (State.SkipStores.count(Ptr: KillingDef))
2148	continue;
2149	Instruction *KillingI = KillingDef->getMemoryInst();
2150
2151	std::optional<MemoryLocation> MaybeKillingLoc;
2152	if (State.isMemTerminatorInst(I: KillingI)) {
2153	if (auto KillingLoc = State.getLocForTerminator(I: KillingI))
2154	MaybeKillingLoc = KillingLoc ->first;
2155	} else {
2156	MaybeKillingLoc = State.getLocForWrite(I: KillingI);
2157	}
2158
2159	if (!MaybeKillingLoc) {
2160	LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2161	<< *KillingI << "\n");
2162	continue;
2163	}
2164	MemoryLocation KillingLoc = *MaybeKillingLoc;
2165	assert(KillingLoc.Ptr && "KillingLoc should not be null");
2166	const Value *KillingUndObj = getUnderlyingObject(V: KillingLoc.Ptr);
2167	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2168	<< KillingDef << " (" << KillingI << ")\n");
2169
2170	unsigned ScanLimit = MemorySSAScanLimit;
2171	unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2172	unsigned PartialLimit = MemorySSAPartialStoreLimit;
2173	// Worklist of MemoryAccesses that may be killed by KillingDef.
2174	SmallSetVector<MemoryAccess *, `8`> ToCheck;
2175	// Track MemoryAccesses that have been deleted in the loop below, so we can
2176	// skip them. Don't use SkipStores for this, which may contain reused
2177	// MemoryAccess addresses.
2178	SmallPtrSet<MemoryAccess *, `8`> Deleted;
2179	[[maybe_unused]] unsigned OrigNumSkipStores = State.SkipStores.size();
2180	ToCheck.insert(X: KillingDef->getDefiningAccess());
2181
2182	bool Shortend = false;
2183	bool IsMemTerm = State.isMemTerminatorInst(I: KillingI);
2184	// Check if MemoryAccesses in the worklist are killed by KillingDef.
2185	for (unsigned I = `0`; I < ToCheck.size(); I++) {
2186	MemoryAccess *Current = ToCheck [I];
2187	if (Deleted.contains(Ptr: Current))
2188	continue;
2189
2190	std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2191	KillingDef, StartAccess: Current, KillingLoc, KillingUndObj, ScanLimit,
2192	WalkerStepLimit, IsMemTerm, PartialLimit);
2193
2194	if (!MaybeDeadAccess) {
2195	LLVM_DEBUG(dbgs() << " finished walk\n");
2196	continue;
2197	}
2198
2199	MemoryAccess DeadAccess = MaybeDeadAccess;
2200	LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2201	if (isa<MemoryPhi>(Val: DeadAccess)) {
2202	LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2203	for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2204	MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2205	BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2206	BasicBlock *PhiBlock = DeadAccess->getBlock();
2207
2208	// We only consider incoming MemoryAccesses that come before the
2209	// MemoryPhi. Otherwise we could discover candidates that do not
2210	// strictly dominate our starting def.
2211	if (State.PostOrderNumbers [IncomingBlock] >
2212	State.PostOrderNumbers [PhiBlock])
2213	ToCheck.insert(X: IncomingAccess);
2214	}
2215	continue;
2216	}
2217	auto *DeadDefAccess = cast<MemoryDef>(Val: DeadAccess);
2218	Instruction *DeadI = DeadDefAccess->getMemoryInst();
2219	LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2220	ToCheck.insert(X: DeadDefAccess->getDefiningAccess());
2221	NumGetDomMemoryDefPassed ++;
2222
2223	if (!DebugCounter::shouldExecute(CounterName: MemorySSACounter))
2224	continue;
2225
2226	MemoryLocation DeadLoc = *State.getLocForWrite(I: DeadI);
2227
2228	if (IsMemTerm) {
2229	const Value *DeadUndObj = getUnderlyingObject(V: DeadLoc.Ptr);
2230	if (KillingUndObj != DeadUndObj)
2231	continue;
2232	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2233	<< "\n KILLER: " << *KillingI << `'\n'`);
2234	State.deleteDeadInstruction(SI: DeadI, Deleted: &Deleted);
2235	++NumFastStores;
2236	MadeChange = true;
2237	} else {
2238	// Check if DeadI overwrites KillingI.
2239	int64_t KillingOffset = `0`;
2240	int64_t DeadOffset = `0`;
2241	OverwriteResult OR = State.isOverwrite(
2242	KillingI, DeadI, KillingLoc, DeadLoc, KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2243	if (OR == OW_MaybePartial) {
2244	auto Iter = State.IOLs.insert(
2245	KV: std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2246	x: DeadI->getParent(), y: InstOverlapIntervalsTy ()));
2247	auto &IOL = Iter.first->second;
2248	OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOff: KillingOffset,
2249	DeadOff: DeadOffset, DeadI, IOL);
2250	}
2251
2252	if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2253	auto *DeadSI = dyn_cast<StoreInst>(Val: DeadI);
2254	auto *KillingSI = dyn_cast<StoreInst>(Val: KillingI);
2255	// We are re-using tryToMergePartialOverlappingStores, which requires
2256	// DeadSI to dominate KillingSI.
2257	// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2258	if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2259	if (Constant *Merged = tryToMergePartialOverlappingStores(
2260	KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL: State.DL,
2261	AA&: State.BatchAA, DT: &DT)) {
2262
2263	// Update stored value of earlier store to merged constant.
2264	DeadSI->setOperand(i_nocapture: `0`, Val_nocapture: Merged);
2265	++NumModifiedStores;
2266	MadeChange = true;
2267
2268	Shortend = true;
2269	// Remove killing store and remove any outstanding overlap
2270	// intervals for the updated store.
2271	State.deleteDeadInstruction(SI: KillingSI, Deleted: &Deleted);
2272	auto I = State.IOLs.find(Key: DeadSI->getParent());
2273	if (I != State.IOLs.end())
2274	I->second.erase(Val: DeadSI);
2275	break;
2276	}
2277	}
2278	}
2279
2280	if (OR == OW_Complete) {
2281	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2282	<< "\n KILLER: " << *KillingI << `'\n'`);
2283	State.deleteDeadInstruction(SI: DeadI, Deleted: &Deleted);
2284	++NumFastStores;
2285	MadeChange = true;
2286	}
2287	}
2288	}
2289
2290	assert(State.SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2291	"SkipStores and Deleted out of sync?");
2292
2293	// Check if the store is a no-op.
2294	if (!Shortend && State.storeIsNoop(Def: KillingDef, DefUO: KillingUndObj)) {
2295	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2296	<< `'\n'`);
2297	State.deleteDeadInstruction(SI: KillingI);
2298	NumRedundantStores ++;
2299	MadeChange = true;
2300	continue;
2301	}
2302
2303	// Can we form a calloc from a memset/malloc pair?
2304	if (!Shortend && State.tryFoldIntoCalloc(Def: KillingDef, DefUO: KillingUndObj)) {
2305	LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2306	<< " DEAD: " << *KillingI << `'\n'`);
2307	State.deleteDeadInstruction(SI: KillingI);
2308	MadeChange = true;
2309	continue;
2310	}
2311	}
2312
2313	if (EnablePartialOverwriteTracking)
2314	for (auto &KV : State.IOLs)
2315	MadeChange \|= State.removePartiallyOverlappedStores(IOL&: KV.second);
2316
2317	MadeChange \|= State.eliminateRedundantStoresOfExistingValues();
2318	MadeChange \|= State.eliminateDeadWritesAtEndOfFunction();
2319
2320	while (!State.ToRemove.empty()) {
2321	Instruction *DeadInst = State.ToRemove.pop_back_val();
2322	DeadInst->eraseFromParent();
2323	}
2324
2325	return MadeChange;
2326	}
2327	} // end anonymous namespace
2328
2329	//===----------------------------------------------------------------------===//
2330	// DSE Pass
2331	//===----------------------------------------------------------------------===//
2332	PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2333	AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2334	const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2335	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2336	MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2337	PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2338	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2339
2340	bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2341
2342	#ifdef LLVM_ENABLE_STATS
2343	if (AreStatisticsEnabled())
2344	for (auto &I : instructions(F))
2345	NumRemainingStores += isa<StoreInst>(Val: &I);
2346	#endif
2347
2348	if (!Changed)
2349	return PreservedAnalyses::all();
2350
2351	PreservedAnalyses PA;
2352	PA.preserveSet<CFGAnalyses>();
2353	PA.preserve<MemorySSAAnalysis>();
2354	PA.preserve<LoopAnalysis>();
2355	return PA;
2356	}
2357

source code of llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp