LoadStoreOpt.cpp source code [llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp]

1	//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the LoadStoreOpt optimization pass.
10	//===----------------------------------------------------------------------===//
11
12	#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
13	#include "llvm/ADT/STLExtras.h"
14	#include "llvm/ADT/SmallPtrSet.h"
15	#include "llvm/ADT/Statistic.h"
16	#include "llvm/Analysis/AliasAnalysis.h"
17	#include "llvm/Analysis/MemoryLocation.h"
18	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
19	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20	#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
22	#include "llvm/CodeGen/GlobalISel/Utils.h"
23	#include "llvm/CodeGen/LowLevelTypeUtils.h"
24	#include "llvm/CodeGen/MachineBasicBlock.h"
25	#include "llvm/CodeGen/MachineFrameInfo.h"
26	#include "llvm/CodeGen/MachineFunction.h"
27	#include "llvm/CodeGen/MachineInstr.h"
28	#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/Register.h"
31	#include "llvm/CodeGen/TargetLowering.h"
32	#include "llvm/CodeGen/TargetOpcodes.h"
33	#include "llvm/IR/DebugInfoMetadata.h"
34	#include "llvm/InitializePasses.h"
35	#include "llvm/Support/AtomicOrdering.h"
36	#include "llvm/Support/Casting.h"
37	#include "llvm/Support/Debug.h"
38	#include "llvm/Support/ErrorHandling.h"
39	#include "llvm/Support/MathExtras.h"
40	#include <algorithm>
41
42	#define DEBUG_TYPE "loadstore-opt"
43
44	using namespace llvm;
45	using namespace ore;
46	using namespace MIPatternMatch;
47
48	STATISTIC(NumStoresMerged, "Number of stores merged");
49
50	const unsigned MaxStoreSizeToForm = `128`;
51
52	char LoadStoreOpt::ID = `0`;
53	INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
54	false, false)
55	INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
56	false, false)
57
58	LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
59	: MachineFunctionPass (ID), DoNotRunPass (F) {}
60
61	LoadStoreOpt::LoadStoreOpt()
62	: LoadStoreOpt ([](const MachineFunction &) { return false; }) {}
63
64	void LoadStoreOpt::init(MachineFunction &MF) {
65	this->MF = &MF;
66	MRI = &MF.getRegInfo();
67	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
68	TLI = MF.getSubtarget().getTargetLowering();
69	LI = MF.getSubtarget().getLegalizerInfo();
70	Builder.setMF(MF);
71	IsPreLegalizer = !MF.getProperties().hasProperty(
72	P: MachineFunctionProperties::Property::Legalized);
73	InstsToErase.clear();
74	}
75
76	void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
77	AU.addRequired<AAResultsWrapperPass>();
78	AU.setPreservesAll();
79	getSelectionDAGFallbackAnalysisUsage(AU);
80	MachineFunctionPass::getAnalysisUsage(AU);
81	}
82
83	BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
84	MachineRegisterInfo &MRI) {
85	BaseIndexOffset Info;
86	Register PtrAddRHS;
87	if (!mi_match(R: Ptr, MRI, P: m_GPtrAdd(L: m_Reg(R&: Info.BaseReg), R: m_Reg(R&: PtrAddRHS)))) {
88	Info.BaseReg = Ptr;
89	Info.IndexReg = Register ();
90	Info.IsIndexSignExt = false;
91	return Info;
92	}
93
94	auto RHSCst = getIConstantVRegValWithLookThrough(VReg: PtrAddRHS, MRI);
95	if (RHSCst)
96	Info.Offset = RHSCst ->Value.getSExtValue();
97
98	// Just recognize a simple case for now. In future we'll need to match
99	// indexing patterns for base + index + constant.
100	Info.IndexReg = PtrAddRHS;
101	Info.IsIndexSignExt = false;
102	return Info;
103	}
104
105	bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
106	const MachineInstr &MI2,
107	bool &IsAlias,
108	MachineRegisterInfo &MRI) {
109	auto *LdSt1 = dyn_cast<GLoadStore>(Val: &MI1);
110	auto *LdSt2 = dyn_cast<GLoadStore>(Val: &MI2);
111	if (!LdSt1 \|\| !LdSt2)
112	return false;
113
114	BaseIndexOffset BasePtr0 = getPointerInfo(Ptr: LdSt1->getPointerReg(), MRI);
115	BaseIndexOffset BasePtr1 = getPointerInfo(Ptr: LdSt2->getPointerReg(), MRI);
116
117	if (!BasePtr0.BaseReg.isValid() \|\| !BasePtr1.BaseReg.isValid())
118	return false;
119
120	int64_t Size1 = LdSt1->getMemSize();
121	int64_t Size2 = LdSt2->getMemSize();
122
123	int64_t PtrDiff;
124	if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
125	PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
126	// If the size of memory access is unknown, do not use it to do analysis.
127	// One example of unknown size memory access is to load/store scalable
128	// vector objects on the stack.
129	// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
130	// following situations arise:
131	if (PtrDiff >= `0` &&
132	Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
133	// [----BasePtr0----]
134	// [---BasePtr1--]
135	// ========PtrDiff========>
136	IsAlias = !(Size1 <= PtrDiff);
137	return true;
138	}
139	if (PtrDiff < `0` &&
140	Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
141	// [----BasePtr0----]
142	// [---BasePtr1--]
143	// =====(-PtrDiff)====>
144	IsAlias = !((PtrDiff + Size2) <= `0`);
145	return true;
146	}
147	return false;
148	}
149
150	// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
151	// able to calculate their relative offset if at least one arises
152	// from an alloca. However, these allocas cannot overlap and we
153	// can infer there is no alias.
154	auto *Base0Def = getDefIgnoringCopies(Reg: BasePtr0.BaseReg, MRI);
155	auto *Base1Def = getDefIgnoringCopies(Reg: BasePtr1.BaseReg, MRI);
156	if (!Base0Def \|\| !Base1Def)
157	return false; // Couldn't tell anything.
158
159
160	if (Base0Def->getOpcode() != Base1Def->getOpcode())
161	return false;
162
163	if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
164	MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
165	// If the bases have the same frame index but we couldn't find a
166	// constant offset, (indices are different) be conservative.
167	if (Base0Def != Base1Def &&
168	(!MFI.isFixedObjectIndex(ObjectIdx: Base0Def->getOperand(i: `1`).getIndex()) \|\|
169	!MFI.isFixedObjectIndex(ObjectIdx: Base1Def->getOperand(i: `1`).getIndex()))) {
170	IsAlias = false;
171	return true;
172	}
173	}
174
175	// This implementation is a lot more primitive than the SDAG one for now.
176	// FIXME: what about constant pools?
177	if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
178	auto GV0 = Base0Def->getOperand(i: `1`).getGlobal();
179	auto GV1 = Base1Def->getOperand(i: `1`).getGlobal();
180	if (GV0 != GV1) {
181	IsAlias = false;
182	return true;
183	}
184	}
185
186	// Can't tell anything about aliasing.
187	return false;
188	}
189
190	bool GISelAddressing::instMayAlias(const MachineInstr &MI,
191	const MachineInstr &Other,
192	MachineRegisterInfo &MRI,
193	AliasAnalysis *AA) {
194	struct MemUseCharacteristics {
195	bool IsVolatile;
196	bool IsAtomic;
197	Register BasePtr;
198	int64_t Offset;
199	uint64_t NumBytes;
200	MachineMemOperand *MMO;
201	};
202
203	auto getCharacteristics =
204	[&](const MachineInstr *MI) -> MemUseCharacteristics {
205	if (const auto *LS = dyn_cast<GLoadStore>(Val: MI)) {
206	Register BaseReg;
207	int64_t Offset = `0`;
208	// No pre/post-inc addressing modes are considered here, unlike in SDAG.
209	if (!mi_match(R: LS->getPointerReg(), MRI,
210	P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_ICst(Cst&: Offset)))) {
211	BaseReg = LS->getPointerReg();
212	Offset = `0`;
213	}
214
215	uint64_t Size = MemoryLocation::getSizeOrUnknown(
216	T: LS->getMMO().getMemoryType().getSizeInBytes());
217	return {.IsVolatile: LS->isVolatile(), .IsAtomic: LS->isAtomic(), .BasePtr: BaseReg,
218	.Offset: Offset /base offset/, .NumBytes: Size, .MMO: &LS->getMMO()};
219	}
220	// FIXME: support recognizing lifetime instructions.
221	// Default.
222	return {.IsVolatile: false /isvolatile/,
223	/isAtomic/ .IsAtomic: false, .BasePtr: Register (),
224	.Offset: (int64_t)`0` /offset/, .NumBytes: `0` /size/,
225	.MMO: (MachineMemOperand )nullptr*};
226	};
227	MemUseCharacteristics MUC0 = getCharacteristics (&MI),
228	MUC1 = getCharacteristics (&Other);
229
230	// If they are to the same address, then they must be aliases.
231	if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
232	MUC0.Offset == MUC1.Offset)
233	return true;
234
235	// If they are both volatile then they cannot be reordered.
236	if (MUC0.IsVolatile && MUC1.IsVolatile)
237	return true;
238
239	// Be conservative about atomics for the moment
240	// TODO: This is way overconservative for unordered atomics (see D66309)
241	if (MUC0.IsAtomic && MUC1.IsAtomic)
242	return true;
243
244	// If one operation reads from invariant memory, and the other may store, they
245	// cannot alias.
246	if (MUC0.MMO && MUC1.MMO) {
247	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
248	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
249	return false;
250	}
251
252	// Try to prove that there is aliasing, or that there is no aliasing. Either
253	// way, we can return now. If nothing can be proved, proceed with more tests.
254	bool IsAlias;
255	if (GISelAddressing::aliasIsKnownForLoadStore(MI1: MI, MI2: Other, IsAlias, MRI))
256	return IsAlias;
257
258	// The following all rely on MMO0 and MMO1 being valid.
259	if (!MUC0.MMO \|\| !MUC1.MMO)
260	return true;
261
262	// FIXME: port the alignment based alias analysis from SDAG's isAlias().
263	int64_t SrcValOffset0 = MUC0.MMO->getOffset();
264	int64_t SrcValOffset1 = MUC1.MMO->getOffset();
265	uint64_t Size0 = MUC0.NumBytes;
266	uint64_t Size1 = MUC1.NumBytes;
267	if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
268	Size0 != MemoryLocation::UnknownSize &&
269	Size1 != MemoryLocation::UnknownSize) {
270	// Use alias analysis information.
271	int64_t MinOffset = std::min(a: SrcValOffset0, b: SrcValOffset1);
272	int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
273	int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
274	if (AA->isNoAlias(LocA: MemoryLocation (MUC0.MMO->getValue(), Overlap0,
275	MUC0.MMO->getAAInfo()),
276	LocB: MemoryLocation (MUC1.MMO->getValue(), Overlap1,
277	MUC1.MMO->getAAInfo())))
278	return false;
279	}
280
281	// Otherwise we have to assume they alias.
282	return true;
283	}
284
285	/// Returns true if the instruction creates an unavoidable hazard that
286	/// forces a boundary between store merge candidates.
287	static bool isInstHardMergeHazard(MachineInstr &MI) {
288	return MI.hasUnmodeledSideEffects() \|\| MI.hasOrderedMemoryRef();
289	}
290
291	bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
292	// Try to merge all the stores in the vector, splitting into separate segments
293	// as necessary.
294	assert(StoresToMerge.size() > `1` && "Expected multiple stores to merge");
295	LLT OrigTy = MRI->getType(Reg: StoresToMerge [`0`]->getValueReg());
296	LLT PtrTy = MRI->getType(Reg: StoresToMerge [`0`]->getPointerReg());
297	unsigned AS = PtrTy.getAddressSpace();
298	// Ensure the legal store info is computed for this address space.
299	initializeStoreMergeTargetInfo(AddrSpace: AS);
300	const auto &LegalSizes = LegalStoreSizes [AS];
301
302	#ifndef NDEBUG
303	for (auto *StoreMI : StoresToMerge)
304	assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
305	#endif
306
307	const auto &DL = MF->getFunction().getParent()->getDataLayout();
308	bool AnyMerged = false;
309	do {
310	unsigned NumPow2 = llvm::bit_floor(Value: StoresToMerge.size());
311	unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
312	// Compute the biggest store we can generate to handle the number of stores.
313	unsigned MergeSizeBits;
314	for (MergeSizeBits = MaxSizeBits; MergeSizeBits > `1`; MergeSizeBits /= `2`) {
315	LLT StoreTy = LLT::scalar(SizeInBits: MergeSizeBits);
316	EVT StoreEVT =
317	getApproximateEVTForLLT(Ty: StoreTy, DL, Ctx&: MF->getFunction().getContext());
318	if (LegalSizes.size() > MergeSizeBits && LegalSizes [MergeSizeBits] &&
319	TLI->canMergeStoresTo(AS, MemVT: StoreEVT, MF: *MF) &&
320	(TLI->isTypeLegal(VT: StoreEVT)))
321	break; // We can generate a MergeSize bits store.
322	}
323	if (MergeSizeBits <= OrigTy.getSizeInBits())
324	return AnyMerged; // No greater merge.
325
326	unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
327	// Perform the actual merging.
328	SmallVector<GStore *, `8`> SingleMergeStores(
329	StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
330	AnyMerged \|= doSingleStoreMerge(Stores&: SingleMergeStores);
331	StoresToMerge.erase(CS: StoresToMerge.begin(),
332	CE: StoresToMerge.begin() + NumStoresToMerge);
333	} while (StoresToMerge.size() > `1`);
334	return AnyMerged;
335	}
336
337	bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
338	MachineFunction &MF) const {
339	auto Action = LI->getAction(Query).Action;
340	// If the instruction is unsupported, it can't be legalized at all.
341	if (Action == LegalizeActions::Unsupported)
342	return false;
343	return IsPreLegalizer \|\| Action == LegalizeAction::Legal;
344	}
345
346	bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
347	assert(Stores.size() > `1`);
348	// We know that all the stores are consecutive and there are no aliasing
349	// operations in the range. However, the values that are being stored may be
350	// generated anywhere before each store. To ensure we have the values
351	// available, we materialize the wide value and new store at the place of the
352	// final store in the merge sequence.
353	GStore *FirstStore = Stores [`0`];
354	const unsigned NumStores = Stores.size();
355	LLT SmallTy = MRI->getType(Reg: FirstStore->getValueReg());
356	LLT WideValueTy =
357	LLT::scalar(SizeInBits: NumStores * SmallTy.getSizeInBits().getFixedValue());
358
359	// For each store, compute pairwise merged debug locs.
360	DebugLoc MergedLoc = Stores.front()->getDebugLoc();
361	for (auto *Store : drop_begin(RangeOrContainer&: Stores))
362	MergedLoc = DILocation::getMergedLocation(LocA: MergedLoc, LocB: Store->getDebugLoc());
363
364	Builder.setInstr(*Stores.back());
365	Builder.setDebugLoc(MergedLoc);
366
367	// If all of the store values are constants, then create a wide constant
368	// directly. Otherwise, we need to generate some instructions to merge the
369	// existing values together into a wider type.
370	SmallVector<APInt, `8`> ConstantVals;
371	for (auto *Store : Stores) {
372	auto MaybeCst =
373	getIConstantVRegValWithLookThrough(VReg: Store->getValueReg(), MRI: *MRI);
374	if (!MaybeCst) {
375	ConstantVals.clear();
376	break;
377	}
378	ConstantVals.emplace_back(Args&: MaybeCst ->Value);
379	}
380
381	Register WideReg;
382	auto *WideMMO =
383	MF->getMachineMemOperand(MMO: &FirstStore->getMMO(), Offset: `0`, Ty: WideValueTy);
384	if (ConstantVals.empty()) {
385	// Mimic the SDAG behaviour here and don't try to do anything for unknown
386	// values. In future, we should also support the cases of loads and
387	// extracted vector elements.
388	return false;
389	}
390
391	assert(ConstantVals.size() == NumStores);
392	// Check if our wide constant is legal.
393	if (!isLegalOrBeforeLegalizer(Query: {TargetOpcode::G_CONSTANT, {WideValueTy}}, MF&: *MF))
394	return false;
395	APInt WideConst(WideValueTy.getSizeInBits(), `0`);
396	for (unsigned Idx = `0`; Idx < ConstantVals.size(); ++Idx) {
397	// Insert the smaller constant into the corresponding position in the
398	// wider one.
399	WideConst.insertBits(SubBits: ConstantVals [Idx], bitPosition: Idx * SmallTy.getSizeInBits());
400	}
401	WideReg = Builder.buildConstant(Res: WideValueTy, Val: WideConst).getReg(Idx: `0`);
402	auto NewStore =
403	Builder.buildStore(Val: WideReg, Addr: FirstStore->getPointerReg(), MMO&: *WideMMO);
404	(void) NewStore;
405	LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
406	<< " stores into merged store: " << *NewStore);
407	LLVM_DEBUG(for (auto MI : Stores) dbgs() << " " << MI;);
408	NumStoresMerged += Stores.size();
409
410	MachineOptimizationRemarkEmitter MORE(MF, nullptr*);
411	MORE.emit(RemarkBuilder: [&]() {
412	MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
413	FirstStore->getDebugLoc(),
414	FirstStore->getParent());
415	R << "Merged " << NV ("NumMerged", Stores.size()) << " stores of "
416	<< NV ("OrigWidth", SmallTy.getSizeInBytes())
417	<< " bytes into a single store of "
418	<< NV ("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
419	return R;
420	});
421
422	for (auto *MI : Stores)
423	InstsToErase.insert(Ptr: MI);
424	return true;
425	}
426
427	bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
428	if (C.Stores.size() < `2`) {
429	C.reset();
430	return false;
431	}
432
433	LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
434	<< " stores, starting with " << *C.Stores[`0`]);
435	// We know that the stores in the candidate are adjacent.
436	// Now we need to check if any potential aliasing instructions recorded
437	// during the search alias with load/stores added to the candidate after.
438	// For example, if we have the candidate:
439	// C.Stores = [ST1, ST2, ST3, ST4]
440	// and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
441	// ST2, then we would have recorded it into the PotentialAliases structure
442	// with the associated index value of "1". Then we see ST3 and ST4 and add
443	// them to the candidate group. We know that LD1 does not alias with ST1 or
444	// ST2, since we already did that check. However we don't yet know if it
445	// may alias ST3 and ST4, so we perform those checks now.
446	SmallVector<GStore *> StoresToMerge;
447
448	auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
449	for (auto AliasInfo : reverse(C&: C.PotentialAliases)) {
450	MachineInstr *PotentialAliasOp = AliasInfo.first;
451	unsigned PreCheckedIdx = AliasInfo.second;
452	if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
453	// Once our store index is lower than the index associated with the
454	// potential alias, we know that we've already checked for this alias
455	// and all of the earlier potential aliases too.
456	return false;
457	}
458	// Need to check this alias.
459	if (GISelAddressing::instMayAlias(MI: CheckStore, Other: PotentialAliasOp, MRI&: MRI,
460	AA)) {
461	LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
462	<< " detected\n");
463	return true;
464	}
465	}
466	return false;
467	};
468	// Start from the last store in the group, and check if it aliases with any
469	// of the potential aliasing operations in the list.
470	for (int StoreIdx = C.Stores.size() - `1`; StoreIdx >= `0`; --StoreIdx) {
471	auto *CheckStore = C.Stores [StoreIdx];
472	if (DoesStoreAliasWithPotential (StoreIdx, *CheckStore))
473	continue;
474	StoresToMerge.emplace_back(Args&: CheckStore);
475	}
476
477	LLVM_DEBUG(dbgs() << StoresToMerge.size()
478	<< " stores remaining after alias checks. Merging...\n");
479
480	// Now we've checked for aliasing hazards, merge any stores left.
481	C.reset();
482	if (StoresToMerge.size() < `2`)
483	return false;
484	return mergeStores(StoresToMerge);
485	}
486
487	bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
488	StoreMergeCandidate &C) {
489	if (C.Stores.empty())
490	return false;
491	return llvm::any_of(Range&: C.Stores, P: [&](MachineInstr *OtherMI) {
492	return instMayAlias(MI, Other: OtherMI, MRI&: MRI, AA);
493	});
494	}
495
496	void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
497	PotentialAliases.emplace_back(Args: std::make_pair(x: &MI, y: Stores.size() - `1`));
498	}
499
500	bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
501	StoreMergeCandidate &C) {
502	// Check if the given store writes to an adjacent address, and other
503	// requirements.
504	LLT ValueTy = MRI->getType(Reg: StoreMI.getValueReg());
505	LLT PtrTy = MRI->getType(Reg: StoreMI.getPointerReg());
506
507	// Only handle scalars.
508	if (!ValueTy.isScalar())
509	return false;
510
511	// Don't allow truncating stores for now.
512	if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
513	return false;
514
515	// Avoid adding volatile or ordered stores to the candidate. We already have a
516	// check for this in instMayAlias() but that only get's called later between
517	// potential aliasing hazards.
518	if (!StoreMI.isSimple())
519	return false;
520
521	Register StoreAddr = StoreMI.getPointerReg();
522	auto BIO = getPointerInfo(Ptr: StoreAddr, MRI&: *MRI);
523	Register StoreBase = BIO.BaseReg;
524	uint64_t StoreOffCst = BIO.Offset;
525	if (C.Stores.empty()) {
526	// This is the first store of the candidate.
527	// If the offset can't possibly allow for a lower addressed store with the
528	// same base, don't bother adding it.
529	if (StoreOffCst < ValueTy.getSizeInBytes())
530	return false;
531	C.BasePtr = StoreBase;
532	C.CurrentLowestOffset = StoreOffCst;
533	C.Stores.emplace_back(Args: &StoreMI);
534	LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
535	<< StoreMI);
536	return true;
537	}
538
539	// Check the store is the same size as the existing ones in the candidate.
540	if (MRI->getType(Reg: C.Stores [`0`]->getValueReg()).getSizeInBits() !=
541	ValueTy.getSizeInBits())
542	return false;
543
544	if (MRI->getType(Reg: C.Stores [`0`]->getPointerReg()).getAddressSpace() !=
545	PtrTy.getAddressSpace())
546	return false;
547
548	// There are other stores in the candidate. Check that the store address
549	// writes to the next lowest adjacent address.
550	if (C.BasePtr != StoreBase)
551	return false;
552	if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
553	static_cast<uint64_t>(StoreOffCst))
554	return false;
555
556	// This writes to an adjacent address. Allow it.
557	C.Stores.emplace_back(Args: &StoreMI);
558	C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
559	LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
560	return true;
561	}
562
563	bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
564	bool Changed = false;
565	// Walk through the block bottom-up, looking for merging candidates.
566	StoreMergeCandidate Candidate;
567	for (MachineInstr &MI : llvm::reverse(C&: MBB)) {
568	if (InstsToErase.contains(Ptr: &MI))
569	continue;
570
571	if (auto *StoreMI = dyn_cast<GStore>(Val: &MI)) {
572	// We have a G_STORE. Add it to the candidate if it writes to an adjacent
573	// address.
574	if (!addStoreToCandidate(StoreMI&: *StoreMI, C&: Candidate)) {
575	// Store wasn't eligible to be added. May need to record it as a
576	// potential alias.
577	if (operationAliasesWithCandidate(MI&: *StoreMI, C&: Candidate)) {
578	Changed \|= processMergeCandidate(C&: Candidate);
579	continue;
580	}
581	Candidate.addPotentialAlias(MI&: *StoreMI);
582	}
583	continue;
584	}
585
586	// If we don't have any stores yet, this instruction can't pose a problem.
587	if (Candidate.Stores.empty())
588	continue;
589
590	// We're dealing with some other kind of instruction.
591	if (isInstHardMergeHazard(MI)) {
592	Changed \|= processMergeCandidate(C&: Candidate);
593	Candidate.Stores.clear();
594	continue;
595	}
596
597	if (!MI.mayLoadOrStore())
598	continue;
599
600	if (operationAliasesWithCandidate(MI, C&: Candidate)) {
601	// We have a potential alias, so process the current candidate if we can
602	// and then continue looking for a new candidate.
603	Changed \|= processMergeCandidate(C&: Candidate);
604	continue;
605	}
606
607	// Record this instruction as a potential alias for future stores that are
608	// added to the candidate.
609	Candidate.addPotentialAlias(MI);
610	}
611
612	// Process any candidate left after finishing searching the entire block.
613	Changed \|= processMergeCandidate(C&: Candidate);
614
615	// Erase instructions now that we're no longer iterating over the block.
616	for (auto *MI : InstsToErase)
617	MI->eraseFromParent();
618	InstsToErase.clear();
619	return Changed;
620	}
621
622	/// Check if the store \p Store is a truncstore that can be merged. That is,
623	/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
624	/// Register then it does not need to match and SrcVal is set to the source
625	/// value found.
626	/// On match, returns the start byte offset of the \p SrcVal that is being
627	/// stored.
628	static std::optional<int64_t>
629	getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
630	MachineRegisterInfo &MRI) {
631	Register TruncVal;
632	if (!mi_match(R: Store.getValueReg(), MRI, P: m_GTrunc(Src: m_Reg(R&: TruncVal))))
633	return std::nullopt;
634
635	// The shift amount must be a constant multiple of the narrow type.
636	// It is translated to the offset address in the wide source value "y".
637	//
638	// x = G_LSHR y, ShiftAmtC
639	// s8 z = G_TRUNC x
640	// store z, ...
641	Register FoundSrcVal;
642	int64_t ShiftAmt;
643	if (!mi_match(R: TruncVal, MRI,
644	P: m_any_of(preds: m_GLShr(L: m_Reg(R&: FoundSrcVal), R: m_ICst(Cst&: ShiftAmt)),
645	preds: m_GAShr(L: m_Reg(R&: FoundSrcVal), R: m_ICst(Cst&: ShiftAmt))))) {
646	if (!SrcVal.isValid() \|\| TruncVal == SrcVal) {
647	if (!SrcVal.isValid())
648	SrcVal = TruncVal;
649	return `0`; // If it's the lowest index store.
650	}
651	return std::nullopt;
652	}
653
654	unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
655	if (ShiftAmt % NarrowBits != `0`)
656	return std::nullopt;
657	const unsigned Offset = ShiftAmt / NarrowBits;
658
659	if (SrcVal.isValid() && FoundSrcVal != SrcVal)
660	return std::nullopt;
661
662	if (!SrcVal.isValid())
663	SrcVal = FoundSrcVal;
664	else if (MRI.getType(Reg: SrcVal) != MRI.getType(Reg: FoundSrcVal))
665	return std::nullopt;
666	return Offset;
667	}
668
669	/// Match a pattern where a wide type scalar value is stored by several narrow
670	/// stores. Fold it into a single store or a BSWAP and a store if the targets
671	/// supports it.
672	///
673	/// Assuming little endian target:
674	/// i8 p = ...*
675	/// i32 val = ...
676	/// p[0] = (val >> 0) & 0xFF;
677	/// p[1] = (val >> 8) & 0xFF;
678	/// p[2] = (val >> 16) & 0xFF;
679	/// p[3] = (val >> 24) & 0xFF;
680	/// =>
681	/// ((i32)p) = val;*
682	///
683	/// i8 p = ...*
684	/// i32 val = ...
685	/// p[0] = (val >> 24) & 0xFF;
686	/// p[1] = (val >> 16) & 0xFF;
687	/// p[2] = (val >> 8) & 0xFF;
688	/// p[3] = (val >> 0) & 0xFF;
689	/// =>
690	/// ((i32)p) = BSWAP(val);*
691	bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
692	SmallPtrSetImpl<GStore *> &DeletedStores) {
693	LLT MemTy = StoreMI.getMMO().getMemoryType();
694
695	// We only handle merging simple stores of 1-4 bytes.
696	if (!MemTy.isScalar())
697	return false;
698	switch (MemTy.getSizeInBits()) {
699	case `8`:
700	case `16`:
701	case `32`:
702	break;
703	default:
704	return false;
705	}
706	if (!StoreMI.isSimple())
707	return false;
708
709	// We do a simple search for mergeable stores prior to this one.
710	// Any potential alias hazard along the way terminates the search.
711	SmallVector<GStore *> FoundStores;
712
713	// We're looking for:
714	// 1) a (store(trunc(...)))
715	// 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
716	// the partial value stored.
717	// 3) where the offsets form either a little or big-endian sequence.
718
719	auto &LastStore = StoreMI;
720
721	// The single base pointer that all stores must use.
722	Register BaseReg;
723	int64_t LastOffset;
724	if (!mi_match(R: LastStore.getPointerReg(), MRI: *MRI,
725	P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_ICst(Cst&: LastOffset)))) {
726	BaseReg = LastStore.getPointerReg();
727	LastOffset = `0`;
728	}
729
730	GStore *LowestIdxStore = &LastStore;
731	int64_t LowestIdxOffset = LastOffset;
732
733	Register WideSrcVal;
734	auto LowestShiftAmt = getTruncStoreByteOffset(Store&: LastStore, SrcVal&: WideSrcVal, MRI&: *MRI);
735	if (!LowestShiftAmt)
736	return false; // Didn't match a trunc.
737	assert(WideSrcVal.isValid());
738
739	LLT WideStoreTy = MRI->getType(Reg: WideSrcVal);
740	// The wide type might not be a multiple of the memory type, e.g. s48 and s32.
741	if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != `0`)
742	return false;
743	const unsigned NumStoresRequired =
744	WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
745
746	SmallVector<int64_t, `8`> OffsetMap(NumStoresRequired, INT64_MAX);
747	OffsetMap [*LowestShiftAmt] = LastOffset;
748	FoundStores.emplace_back(Args: &LastStore);
749
750	const int MaxInstsToCheck = `10`;
751	int NumInstsChecked = `0`;
752	for (auto II = ++LastStore.getReverseIterator();
753	II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
754	++II) {
755	NumInstsChecked++;
756	GStore *NewStore;
757	if ((NewStore = dyn_cast<GStore>(Val: &*II))) {
758	if (NewStore->getMMO().getMemoryType() != MemTy \|\| !NewStore->isSimple())
759	break;
760	} else if (II ->isLoadFoldBarrier() \|\| II ->mayLoad()) {
761	break;
762	} else {
763	continue; // This is a safe instruction we can look past.
764	}
765
766	Register NewBaseReg;
767	int64_t MemOffset;
768	// Check we're storing to the same base + some offset.
769	if (!mi_match(R: NewStore->getPointerReg(), MRI: *MRI,
770	P: m_GPtrAdd(L: m_Reg(R&: NewBaseReg), R: m_ICst(Cst&: MemOffset)))) {
771	NewBaseReg = NewStore->getPointerReg();
772	MemOffset = `0`;
773	}
774	if (BaseReg != NewBaseReg)
775	break;
776
777	auto ShiftByteOffset = getTruncStoreByteOffset(Store&: NewStore, SrcVal&: WideSrcVal, MRI&: MRI);
778	if (!ShiftByteOffset)
779	break;
780	if (MemOffset < LowestIdxOffset) {
781	LowestIdxOffset = MemOffset;
782	LowestIdxStore = NewStore;
783	}
784
785	// Map the offset in the store and the offset in the combined value, and
786	// early return if it has been set before.
787	if (ShiftByteOffset < `0` \|\| ShiftByteOffset >= NumStoresRequired \|\|
788	OffsetMap [*ShiftByteOffset] != INT64_MAX)
789	break;
790	OffsetMap [*ShiftByteOffset] = MemOffset;
791
792	FoundStores.emplace_back(Args&: NewStore);
793	// Reset counter since we've found a matching inst.
794	NumInstsChecked = `0`;
795	if (FoundStores.size() == NumStoresRequired)
796	break;
797	}
798
799	if (FoundStores.size() != NumStoresRequired) {
800	if (FoundStores.size() == `1`)
801	return false;
802	// We didn't find enough stores to merge into the size of the original
803	// source value, but we may be able to generate a smaller store if we
804	// truncate the source value.
805	WideStoreTy = LLT::scalar(SizeInBits: FoundStores.size() * MemTy.getScalarSizeInBits());
806	}
807
808	unsigned NumStoresFound = FoundStores.size();
809
810	const auto &DL = LastStore.getMF()->getDataLayout();
811	auto &C = LastStore.getMF()->getFunction().getContext();
812	// Check that a store of the wide type is both allowed and fast on the target
813	unsigned Fast = `0`;
814	bool Allowed = TLI->allowsMemoryAccess(
815	Context&: C, DL, Ty: WideStoreTy, MMO: LowestIdxStore->getMMO(), Fast: &Fast);
816	if (!Allowed \|\| !Fast)
817	return false;
818
819	// Check if the pieces of the value are going to the expected places in memory
820	// to merge the stores.
821	unsigned NarrowBits = MemTy.getScalarSizeInBits();
822	auto checkOffsets = [&](bool MatchLittleEndian) {
823	if (MatchLittleEndian) {
824	for (unsigned i = `0`; i != NumStoresFound; ++i)
825	if (OffsetMap [i] != i * (NarrowBits / `8`) + LowestIdxOffset)
826	return false;
827	} else { // MatchBigEndian by reversing loop counter.
828	for (unsigned i = `0`, j = NumStoresFound - `1`; i != NumStoresFound;
829	++i, --j)
830	if (OffsetMap [j] != i * (NarrowBits / `8`) + LowestIdxOffset)
831	return false;
832	}
833	return true;
834	};
835
836	// Check if the offsets line up for the native data layout of this target.
837	bool NeedBswap = false;
838	bool NeedRotate = false;
839	if (!checkOffsets (DL.isLittleEndian())) {
840	// Special-case: check if byte offsets line up for the opposite endian.
841	if (NarrowBits == `8` && checkOffsets (DL.isBigEndian()))
842	NeedBswap = true;
843	else if (NumStoresFound == `2` && checkOffsets (DL.isBigEndian()))
844	NeedRotate = true;
845	else
846	return false;
847	}
848
849	if (NeedBswap &&
850	!isLegalOrBeforeLegalizer(Query: {TargetOpcode::G_BSWAP, {WideStoreTy}}, MF&: *MF))
851	return false;
852	if (NeedRotate &&
853	!isLegalOrBeforeLegalizer(
854	Query: {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, MF&: *MF))
855	return false;
856
857	Builder.setInstrAndDebugLoc(StoreMI);
858
859	if (WideStoreTy != MRI->getType(Reg: WideSrcVal))
860	WideSrcVal = Builder.buildTrunc(Res: WideStoreTy, Op: WideSrcVal).getReg(Idx: `0`);
861
862	if (NeedBswap) {
863	WideSrcVal = Builder.buildBSwap(Dst: WideStoreTy, Src0: WideSrcVal).getReg(Idx: `0`);
864	} else if (NeedRotate) {
865	assert(WideStoreTy.getSizeInBits() % `2` == `0` &&
866	"Unexpected type for rotate");
867	auto RotAmt =
868	Builder.buildConstant(Res: WideStoreTy, Val: WideStoreTy.getSizeInBits() / `2`);
869	WideSrcVal =
870	Builder.buildRotateRight(Dst: WideStoreTy, Src: WideSrcVal, Amt: RotAmt).getReg(Idx: `0`);
871	}
872
873	Builder.buildStore(Val: WideSrcVal, Addr: LowestIdxStore->getPointerReg(),
874	PtrInfo: LowestIdxStore->getMMO().getPointerInfo(),
875	Alignment: LowestIdxStore->getMMO().getAlign());
876
877	// Erase the old stores.
878	for (auto *ST : FoundStores) {
879	ST->eraseFromParent();
880	DeletedStores.insert(Ptr: ST);
881	}
882	return true;
883	}
884
885	bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
886	bool Changed = false;
887	SmallVector<GStore *, `16`> Stores;
888	SmallPtrSet<GStore *, `8`> DeletedStores;
889	// Walk up the block so we can see the most eligible stores.
890	for (MachineInstr &MI : llvm::reverse(C&: BB))
891	if (auto *StoreMI = dyn_cast<GStore>(Val: &MI))
892	Stores.emplace_back(Args&: StoreMI);
893
894	for (auto *StoreMI : Stores) {
895	if (DeletedStores.count(Ptr: StoreMI))
896	continue;
897	if (mergeTruncStore(StoreMI&: *StoreMI, DeletedStores))
898	Changed = true;
899	}
900	return Changed;
901	}
902
903	bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
904	bool Changed = false;
905	for (auto &BB : MF){
906	Changed \|= mergeBlockStores(MBB&: BB);
907	Changed \|= mergeTruncStoresBlock(BB);
908	}
909
910	// Erase all dead instructions left over by the merging.
911	if (Changed) {
912	for (auto &BB : MF) {
913	for (auto &I : make_early_inc_range(Range: make_range(x: BB.rbegin(), y: BB.rend()))) {
914	if (isTriviallyDead(MI: I, MRI: *MRI))
915	I.eraseFromParent();
916	}
917	}
918	}
919
920	return Changed;
921	}
922
923	void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
924	// Query the legalizer info to record what store types are legal.
925	// We record this because we don't want to bother trying to merge stores into
926	// illegal ones, which would just result in being split again.
927
928	if (LegalStoreSizes.count(Val: AddrSpace)) {
929	assert(LegalStoreSizes[AddrSpace].any());
930	return; // Already cached sizes for this address space.
931	}
932
933	// Need to reserve at least MaxStoreSizeToForm + 1 bits.
934	BitVector LegalSizes(MaxStoreSizeToForm * `2`);
935	const auto &LI = *MF->getSubtarget().getLegalizerInfo();
936	const auto &DL = MF->getFunction().getParent()->getDataLayout();
937	Type *IRPtrTy = PointerType::get(C&: MF->getFunction().getContext(), AddressSpace: AddrSpace);
938	LLT PtrTy = getLLTForType(Ty&: *IRPtrTy, DL);
939	// We assume that we're not going to be generating any stores wider than
940	// MaxStoreSizeToForm bits for now.
941	for (unsigned Size = `2`; Size <= MaxStoreSizeToForm; Size *= `2`) {
942	LLT Ty = LLT::scalar(SizeInBits: Size);
943	SmallVector<LegalityQuery::MemDesc, `2`> MemDescrs(
944	{{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
945	SmallVector<LLT> StoreTys({Ty, PtrTy});
946	LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
947	LegalizeActionStep ActionStep = LI.getAction(Query: Q);
948	if (ActionStep.Action == LegalizeActions::Legal)
949	LegalSizes.set(Size);
950	}
951	assert(LegalSizes.any() && "Expected some store sizes to be legal!");
952	LegalStoreSizes [AddrSpace] = LegalSizes;
953	}
954
955	bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
956	// If the ISel pipeline failed, do not bother running that pass.
957	if (MF.getProperties().hasProperty(
958	P: MachineFunctionProperties::Property::FailedISel))
959	return false;
960
961	LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
962	<< `'\n'`);
963
964	init(MF);
965	bool Changed = false;
966	Changed \|= mergeFunctionStores(MF);
967
968	LegalStoreSizes.clear();
969	return Changed;
970	}
971

source code of llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp