AArch64InstrInfo.cpp source code [llvm/lib/Target/AArch64/AArch64InstrInfo.cpp]

1	//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the AArch64 implementation of the TargetInstrInfo class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64InstrInfo.h"
14	#include "AArch64ExpandImm.h"
15	#include "AArch64FrameLowering.h"
16	#include "AArch64MachineFunctionInfo.h"
17	#include "AArch64PointerAuth.h"
18	#include "AArch64Subtarget.h"
19	#include "MCTargetDesc/AArch64AddressingModes.h"
20	#include "Utils/AArch64BaseInfo.h"
21	#include "llvm/ADT/ArrayRef.h"
22	#include "llvm/ADT/STLExtras.h"
23	#include "llvm/ADT/SmallVector.h"
24	#include "llvm/CodeGen/LivePhysRegs.h"
25	#include "llvm/CodeGen/MachineBasicBlock.h"
26	#include "llvm/CodeGen/MachineCombinerPattern.h"
27	#include "llvm/CodeGen/MachineFrameInfo.h"
28	#include "llvm/CodeGen/MachineFunction.h"
29	#include "llvm/CodeGen/MachineInstr.h"
30	#include "llvm/CodeGen/MachineInstrBuilder.h"
31	#include "llvm/CodeGen/MachineMemOperand.h"
32	#include "llvm/CodeGen/MachineModuleInfo.h"
33	#include "llvm/CodeGen/MachineOperand.h"
34	#include "llvm/CodeGen/MachineRegisterInfo.h"
35	#include "llvm/CodeGen/RegisterScavenging.h"
36	#include "llvm/CodeGen/StackMaps.h"
37	#include "llvm/CodeGen/TargetRegisterInfo.h"
38	#include "llvm/CodeGen/TargetSubtargetInfo.h"
39	#include "llvm/IR/DebugInfoMetadata.h"
40	#include "llvm/IR/DebugLoc.h"
41	#include "llvm/IR/GlobalValue.h"
42	#include "llvm/MC/MCAsmInfo.h"
43	#include "llvm/MC/MCInst.h"
44	#include "llvm/MC/MCInstBuilder.h"
45	#include "llvm/MC/MCInstrDesc.h"
46	#include "llvm/Support/Casting.h"
47	#include "llvm/Support/CodeGen.h"
48	#include "llvm/Support/CommandLine.h"
49	#include "llvm/Support/ErrorHandling.h"
50	#include "llvm/Support/LEB128.h"
51	#include "llvm/Support/MathExtras.h"
52	#include "llvm/Target/TargetMachine.h"
53	#include "llvm/Target/TargetOptions.h"
54	#include <cassert>
55	#include <cstdint>
56	#include <iterator>
57	#include <utility>
58
59	using namespace llvm;
60
61	#define GET_INSTRINFO_CTOR_DTOR
62	#include "AArch64GenInstrInfo.inc"
63
64	static cl::opt<unsigned> TBZDisplacementBits(
65	"aarch64-tbz-offset-bits", cl::Hidden, cl::init(`14`),
66	cl::desc ("Restrict range of TB[N]Z instructions (DEBUG)"));
67
68	static cl::opt<unsigned> CBZDisplacementBits(
69	"aarch64-cbz-offset-bits", cl::Hidden, cl::init(Val: `19`),
70	cl::desc ("Restrict range of CB[N]Z instructions (DEBUG)"));
71
72	static cl::opt<unsigned>
73	BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(Val: `19`),
74	cl::desc ("Restrict range of Bcc instructions (DEBUG)"));
75
76	static cl::opt<unsigned>
77	BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(Val: `26`),
78	cl::desc ("Restrict range of B instructions (DEBUG)"));
79
80	AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
81	: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
82	AArch64::CATCHRET),
83	RI (STI.getTargetTriple()), Subtarget(STI) {}
84
85	/// GetInstSize - Return the number of bytes of code the specified
86	/// instruction may be. This returns the maximum number of bytes.
87	unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
88	const MachineBasicBlock &MBB = *MI.getParent();
89	const MachineFunction *MF = MBB.getParent();
90	const Function &F = MF->getFunction();
91	const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
92
93	{
94	auto Op = MI.getOpcode();
95	if (Op == AArch64::INLINEASM \|\| Op == AArch64::INLINEASM_BR)
96	return getInlineAsmLength(MI.getOperand(i: `0`).getSymbolName(), *MAI);
97	}
98
99	// Meta-instructions emit no code.
100	if (MI.isMetaInstruction())
101	return `0`;
102
103	// FIXME: We currently only handle pseudoinstructions that don't get expanded
104	// before the assembly printer.
105	unsigned NumBytes = `0`;
106	const MCInstrDesc &Desc = MI.getDesc();
107
108	// Size should be preferably set in
109	// llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
110	// Specific cases handle instructions of variable sizes
111	switch (Desc.getOpcode()) {
112	default:
113	if (Desc.getSize())
114	return Desc.getSize();
115
116	// Anything not explicitly designated otherwise (i.e. pseudo-instructions
117	// with fixed constant size but not specified in .td file) is a normal
118	// 4-byte insn.
119	NumBytes = `4`;
120	break;
121	case TargetOpcode::STACKMAP:
122	// The upper bound for a stackmap intrinsic is the full length of its shadow
123	NumBytes = StackMapOpers (&MI).getNumPatchBytes();
124	assert(NumBytes % `4` == `0` && "Invalid number of NOP bytes requested!");
125	break;
126	case TargetOpcode::PATCHPOINT:
127	// The size of the patchpoint intrinsic is the number of bytes requested
128	NumBytes = PatchPointOpers (&MI).getNumPatchBytes();
129	assert(NumBytes % `4` == `0` && "Invalid number of NOP bytes requested!");
130	break;
131	case TargetOpcode::STATEPOINT:
132	NumBytes = StatepointOpers (&MI).getNumPatchBytes();
133	assert(NumBytes % `4` == `0` && "Invalid number of NOP bytes requested!");
134	// No patch bytes means a normal call inst is emitted
135	if (NumBytes == `0`)
136	NumBytes = `4`;
137	break;
138	case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
139	// If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
140	// instructions are expanded to the specified number of NOPs. Otherwise,
141	// they are expanded to 36-byte XRay sleds.
142	NumBytes =
143	F.getFnAttributeAsParsedInteger(Kind: "patchable-function-entry", Default: `9`) * `4`;
144	break;
145	case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
146	case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
147	// An XRay sled can be 4 bytes of alignment plus a 32-byte block.
148	NumBytes = `36`;
149	break;
150	case TargetOpcode::PATCHABLE_EVENT_CALL:
151	// EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
152	NumBytes = `24`;
153	break;
154
155	case AArch64::SPACE:
156	NumBytes = MI.getOperand(i: `1`).getImm();
157	break;
158	case TargetOpcode::BUNDLE:
159	NumBytes = getInstBundleLength(MI);
160	break;
161	}
162
163	return NumBytes;
164	}
165
166	unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
167	unsigned Size = `0`;
168	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
169	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
170	while (++I != E && I ->isInsideBundle()) {
171	assert(!I ->isBundle() && "No nested bundle!");
172	Size += getInstSizeInBytes(MI: *I);
173	}
174	return Size;
175	}
176
177	static void parseCondBranch(MachineInstr LastInst, MachineBasicBlock &Target,
178	SmallVectorImpl<MachineOperand> &Cond) {
179	// Block ends with fall-through condbranch.
180	switch (LastInst->getOpcode()) {
181	default:
182	llvm_unreachable("Unknown branch instruction?");
183	case AArch64::Bcc:
184	Target = LastInst->getOperand(i: `1`).getMBB();
185	Cond.push_back(Elt: LastInst->getOperand(i: `0`));
186	break;
187	case AArch64::CBZW:
188	case AArch64::CBZX:
189	case AArch64::CBNZW:
190	case AArch64::CBNZX:
191	Target = LastInst->getOperand(i: `1`).getMBB();
192	Cond.push_back(Elt: MachineOperand::CreateImm(Val: -`1`));
193	Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
194	Cond.push_back(Elt: LastInst->getOperand(i: `0`));
195	break;
196	case AArch64::TBZW:
197	case AArch64::TBZX:
198	case AArch64::TBNZW:
199	case AArch64::TBNZX:
200	Target = LastInst->getOperand(i: `2`).getMBB();
201	Cond.push_back(Elt: MachineOperand::CreateImm(Val: -`1`));
202	Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
203	Cond.push_back(Elt: LastInst->getOperand(i: `0`));
204	Cond.push_back(Elt: LastInst->getOperand(i: `1`));
205	}
206	}
207
208	static unsigned getBranchDisplacementBits(unsigned Opc) {
209	switch (Opc) {
210	default:
211	llvm_unreachable("unexpected opcode!");
212	case AArch64::B:
213	return BDisplacementBits;
214	case AArch64::TBNZW:
215	case AArch64::TBZW:
216	case AArch64::TBNZX:
217	case AArch64::TBZX:
218	return TBZDisplacementBits;
219	case AArch64::CBNZW:
220	case AArch64::CBZW:
221	case AArch64::CBNZX:
222	case AArch64::CBZX:
223	return CBZDisplacementBits;
224	case AArch64::Bcc:
225	return BCCDisplacementBits;
226	}
227	}
228
229	bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
230	int64_t BrOffset) const {
231	unsigned Bits = getBranchDisplacementBits(Opc: BranchOp);
232	assert(Bits >= `3` && "max branch displacement must be enough to jump"
233	"over conditional branch expansion");
234	return isIntN(N: Bits, x: BrOffset / `4`);
235	}
236
237	MachineBasicBlock *
238	AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
239	switch (MI.getOpcode()) {
240	default:
241	llvm_unreachable("unexpected opcode!");
242	case AArch64::B:
243	return MI.getOperand(i: `0`).getMBB();
244	case AArch64::TBZW:
245	case AArch64::TBNZW:
246	case AArch64::TBZX:
247	case AArch64::TBNZX:
248	return MI.getOperand(i: `2`).getMBB();
249	case AArch64::CBZW:
250	case AArch64::CBNZW:
251	case AArch64::CBZX:
252	case AArch64::CBNZX:
253	case AArch64::Bcc:
254	return MI.getOperand(i: `1`).getMBB();
255	}
256	}
257
258	void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
259	MachineBasicBlock &NewDestBB,
260	MachineBasicBlock &RestoreBB,
261	const DebugLoc &DL,
262	int64_t BrOffset,
263	RegScavenger RS) const* {
264	assert(RS && "RegScavenger required for long branching");
265	assert(MBB.empty() &&
266	"new block should be inserted for expanding unconditional branch");
267	assert(MBB.pred_size() == `1`);
268	assert(RestoreBB.empty() &&
269	"restore block should be inserted for restoring clobbered registers");
270
271	auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
272	// Offsets outside of the signed 33-bit range are not supported for ADRP +
273	// ADD.
274	if (!isInt<`33`>(x: BrOffset))
275	report_fatal_error(
276	reason: "Branch offsets outside of the signed 33-bit range not supported");
277
278	BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
279	.addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
280	BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
281	.addReg(Reg)
282	.addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF \| AArch64II::MO_NC)
283	.addImm(`0`);
284	BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
285	};
286
287	RS->enterBasicBlockEnd(MBB);
288	// If X16 is unused, we can rely on the linker to insert a range extension
289	// thunk if NewDestBB is out of range of a single B instruction.
290	constexpr Register Reg = AArch64::X16;
291	if (!RS->isRegUsed(Reg)) {
292	insertUnconditionalBranch(MBB, &NewDestBB, DL);
293	RS->setRegUsed(Reg);
294	return;
295	}
296
297	// If there's a free register and it's worth inflating the code size,
298	// manually insert the indirect branch.
299	Register Scavenged = RS->FindUnusedReg(RC: &AArch64::GPR64RegClass);
300	if (Scavenged != AArch64::NoRegister &&
301	MBB.getSectionID() == MBBSectionID::ColdSectionID) {
302	buildIndirectBranch (Scavenged, NewDestBB);
303	RS->setRegUsed(Reg: Scavenged);
304	return;
305	}
306
307	// Note: Spilling X16 briefly moves the stack pointer, making it incompatible
308	// with red zones.
309	AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
310	if (!AFI \|\| AFI->hasRedZone().value_or(u: true))
311	report_fatal_error(
312	reason: "Unable to insert indirect branch inside function that has red zone");
313
314	// Otherwise, spill X16 and defer range extension to the linker.
315	BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
316	.addReg(AArch64::SP, RegState::Define)
317	.addReg(Reg)
318	.addReg(AArch64::SP)
319	.addImm(-`16`);
320
321	BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
322
323	BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
324	.addReg(AArch64::SP, RegState::Define)
325	.addReg(Reg, RegState::Define)
326	.addReg(AArch64::SP)
327	.addImm(`16`);
328	}
329
330	// Branch analysis.
331	bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
332	MachineBasicBlock *&TBB,
333	MachineBasicBlock *&FBB,
334	SmallVectorImpl<MachineOperand> &Cond,
335	bool AllowModify) const {
336	// If the block has no terminators, it just falls into the block after it.
337	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
338	if (I == MBB.end())
339	return false;
340
341	// Skip over SpeculationBarrierEndBB terminators
342	if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB \|\|
343	I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
344	--I;
345	}
346
347	if (!isUnpredicatedTerminator(*I))
348	return false;
349
350	// Get the last instruction in the block.
351	MachineInstr LastInst = &I;
352
353	// If there is only one terminator instruction, process it.
354	unsigned LastOpc = LastInst->getOpcode();
355	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(*--I)) {
356	if (isUncondBranchOpcode(Opc: LastOpc)) {
357	TBB = LastInst->getOperand(i: `0`).getMBB();
358	return false;
359	}
360	if (isCondBranchOpcode(Opc: LastOpc)) {
361	// Block ends with fall-through condbranch.
362	parseCondBranch(LastInst, Target&: TBB, Cond);
363	return false;
364	}
365	return true; // Can't handle indirect branch.
366	}
367
368	// Get the instruction before it if it is a terminator.
369	MachineInstr SecondLastInst = &I;
370	unsigned SecondLastOpc = SecondLastInst->getOpcode();
371
372	// If AllowModify is true and the block ends with two or more unconditional
373	// branches, delete all but the first unconditional branch.
374	if (AllowModify && isUncondBranchOpcode(Opc: LastOpc)) {
375	while (isUncondBranchOpcode(Opc: SecondLastOpc)) {
376	LastInst->eraseFromParent();
377	LastInst = SecondLastInst;
378	LastOpc = LastInst->getOpcode();
379	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(*--I)) {
380	// Return now the only terminator is an unconditional branch.
381	TBB = LastInst->getOperand(i: `0`).getMBB();
382	return false;
383	}
384	SecondLastInst = &*I;
385	SecondLastOpc = SecondLastInst->getOpcode();
386	}
387	}
388
389	// If we're allowed to modify and the block ends in a unconditional branch
390	// which could simply fallthrough, remove the branch. (Note: This case only
391	// matters when we can't understand the whole sequence, otherwise it's also
392	// handled by BranchFolding.cpp.)
393	if (AllowModify && isUncondBranchOpcode(Opc: LastOpc) &&
394	MBB.isLayoutSuccessor(MBB: getBranchDestBlock(MI: *LastInst))) {
395	LastInst->eraseFromParent();
396	LastInst = SecondLastInst;
397	LastOpc = LastInst->getOpcode();
398	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(*--I)) {
399	assert(!isUncondBranchOpcode(LastOpc) &&
400	"unreachable unconditional branches removed above");
401
402	if (isCondBranchOpcode(Opc: LastOpc)) {
403	// Block ends with fall-through condbranch.
404	parseCondBranch(LastInst, Target&: TBB, Cond);
405	return false;
406	}
407	return true; // Can't handle indirect branch.
408	}
409	SecondLastInst = &*I;
410	SecondLastOpc = SecondLastInst->getOpcode();
411	}
412
413	// If there are three terminators, we don't know what sort of block this is.
414	if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
415	return true;
416
417	// If the block ends with a B and a Bcc, handle it.
418	if (isCondBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
419	parseCondBranch(LastInst: SecondLastInst, Target&: TBB, Cond);
420	FBB = LastInst->getOperand(i: `0`).getMBB();
421	return false;
422	}
423
424	// If the block ends with two unconditional branches, handle it. The second
425	// one is not executed, so remove it.
426	if (isUncondBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
427	TBB = SecondLastInst->getOperand(i: `0`).getMBB();
428	I = LastInst;
429	if (AllowModify)
430	I ->eraseFromParent();
431	return false;
432	}
433
434	// ...likewise if it ends with an indirect branch followed by an unconditional
435	// branch.
436	if (isIndirectBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
437	I = LastInst;
438	if (AllowModify)
439	I ->eraseFromParent();
440	return true;
441	}
442
443	// Otherwise, can't handle this.
444	return true;
445	}
446
447	bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
448	MachineBranchPredicate &MBP,
449	bool AllowModify) const {
450	// For the moment, handle only a block which ends with a cb(n)zx followed by
451	// a fallthrough. Why this? Because it is a common form.
452	// TODO: Should we handle b.cc?
453
454	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
455	if (I == MBB.end())
456	return true;
457
458	// Skip over SpeculationBarrierEndBB terminators
459	if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB \|\|
460	I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
461	--I;
462	}
463
464	if (!isUnpredicatedTerminator(*I))
465	return true;
466
467	// Get the last instruction in the block.
468	MachineInstr LastInst = &I;
469	unsigned LastOpc = LastInst->getOpcode();
470	if (!isCondBranchOpcode(Opc: LastOpc))
471	return true;
472
473	switch (LastOpc) {
474	default:
475	return true;
476	case AArch64::CBZW:
477	case AArch64::CBZX:
478	case AArch64::CBNZW:
479	case AArch64::CBNZX:
480	break;
481	};
482
483	MBP.TrueDest = LastInst->getOperand(i: `1`).getMBB();
484	assert(MBP.TrueDest && "expected!");
485	MBP.FalseDest = MBB.getNextNode();
486
487	MBP.ConditionDef = nullptr;
488	MBP.SingleUseCondition = false;
489
490	MBP.LHS = LastInst->getOperand(i: `0`);
491	MBP.RHS = MachineOperand::CreateImm(Val: `0`);
492	MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
493	: MachineBranchPredicate::PRED_EQ;
494	return false;
495	}
496
497	bool AArch64InstrInfo::reverseBranchCondition(
498	SmallVectorImpl<MachineOperand> &Cond) const {
499	if (Cond [`0`].getImm() != -`1`) {
500	// Regular Bcc
501	AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond [`0`].getImm();
502	Cond [`0`].setImm(AArch64CC::getInvertedCondCode(Code: CC));
503	} else {
504	// Folded compare-and-branch
505	switch (Cond [`1`].getImm()) {
506	default:
507	llvm_unreachable("Unknown conditional branch!");
508	case AArch64::CBZW:
509	Cond[`1`].setImm(AArch64::CBNZW);
510	break;
511	case AArch64::CBNZW:
512	Cond[`1`].setImm(AArch64::CBZW);
513	break;
514	case AArch64::CBZX:
515	Cond[`1`].setImm(AArch64::CBNZX);
516	break;
517	case AArch64::CBNZX:
518	Cond[`1`].setImm(AArch64::CBZX);
519	break;
520	case AArch64::TBZW:
521	Cond[`1`].setImm(AArch64::TBNZW);
522	break;
523	case AArch64::TBNZW:
524	Cond[`1`].setImm(AArch64::TBZW);
525	break;
526	case AArch64::TBZX:
527	Cond[`1`].setImm(AArch64::TBNZX);
528	break;
529	case AArch64::TBNZX:
530	Cond[`1`].setImm(AArch64::TBZX);
531	break;
532	}
533	}
534
535	return false;
536	}
537
538	unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
539	int BytesRemoved) const* {
540	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
541	if (I == MBB.end())
542	return `0`;
543
544	if (!isUncondBranchOpcode(Opc: I ->getOpcode()) &&
545	!isCondBranchOpcode(Opc: I ->getOpcode()))
546	return `0`;
547
548	// Remove the branch.
549	I ->eraseFromParent();
550
551	I = MBB.end();
552
553	if (I == MBB.begin()) {
554	if (BytesRemoved)
555	*BytesRemoved = `4`;
556	return `1`;
557	}
558	--I;
559	if (!isCondBranchOpcode(Opc: I ->getOpcode())) {
560	if (BytesRemoved)
561	*BytesRemoved = `4`;
562	return `1`;
563	}
564
565	// Remove the branch.
566	I ->eraseFromParent();
567	if (BytesRemoved)
568	*BytesRemoved = `8`;
569
570	return `2`;
571	}
572
573	void AArch64InstrInfo::instantiateCondBranch(
574	MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
575	ArrayRef<MachineOperand> Cond) const {
576	if (Cond [`0`].getImm() != -`1`) {
577	// Regular Bcc
578	BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[`0`].getImm()).addMBB(TBB);
579	} else {
580	// Folded compare-and-branch
581	// Note that we use addOperand instead of addReg to keep the flags.
582	const MachineInstrBuilder MIB =
583	BuildMI(&MBB, DL, get(Cond [`1`].getImm())).add(Cond [`2`]);
584	if (Cond.size() > `3`)
585	MIB.addImm(Val: Cond [`3`].getImm());
586	MIB.addMBB(MBB: TBB);
587	}
588	}
589
590	unsigned AArch64InstrInfo::insertBranch(
591	MachineBasicBlock &MBB, MachineBasicBlock TBB, MachineBasicBlock FBB,
592	ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int BytesAdded) const* {
593	// Shouldn't be a fall through.
594	assert(TBB && "insertBranch must not be told to insert a fallthrough");
595
596	if (!FBB) {
597	if (Cond.empty()) // Unconditional branch?
598	BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
599	else
600	instantiateCondBranch(MBB, DL, TBB, Cond);
601
602	if (BytesAdded)
603	*BytesAdded = `4`;
604
605	return `1`;
606	}
607
608	// Two-way conditional branch.
609	instantiateCondBranch(MBB, DL, TBB, Cond);
610	BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
611
612	if (BytesAdded)
613	*BytesAdded = `8`;
614
615	return `2`;
616	}
617
618	// Find the original register that VReg is copied from.
619	static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
620	while (Register::isVirtualRegister(Reg: VReg)) {
621	const MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
622	if (!DefMI->isFullCopy())
623	return VReg;
624	VReg = DefMI->getOperand(i: `1`).getReg();
625	}
626	return VReg;
627	}
628
629	// Determine if VReg is defined by an instruction that can be folded into a
630	// csel instruction. If so, return the folded opcode, and the replacement
631	// register.
632	static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
633	unsigned NewVReg = nullptr*) {
634	VReg = removeCopies(MRI, VReg);
635	if (!Register::isVirtualRegister(Reg: VReg))
636	return `0`;
637
638	bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
639	const MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
640	unsigned Opc = `0`;
641	unsigned SrcOpNum = `0`;
642	switch (DefMI->getOpcode()) {
643	case AArch64::ADDSXri:
644	case AArch64::ADDSWri:
645	// if NZCV is used, do not fold.
646	if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr,
647	true) == -`1`)
648	return `0`;
649	// fall-through to ADDXri and ADDWri.
650	[[fallthrough]];
651	case AArch64::ADDXri:
652	case AArch64::ADDWri:
653	// add x, 1 -> csinc.
654	if (!DefMI->getOperand(i: `2`).isImm() \|\| DefMI->getOperand(i: `2`).getImm() != `1` \|\|
655	DefMI->getOperand(i: `3`).getImm() != `0`)
656	return `0`;
657	SrcOpNum = `1`;
658	Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
659	break;
660
661	case AArch64::ORNXrr:
662	case AArch64::ORNWrr: {
663	// not x -> csinv, represented as orn dst, xzr, src.
664	unsigned ZReg = removeCopies(MRI, VReg: DefMI->getOperand(i: `1`).getReg());
665	if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
666	return `0`;
667	SrcOpNum = `2`;
668	Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
669	break;
670	}
671
672	case AArch64::SUBSXrr:
673	case AArch64::SUBSWrr:
674	// if NZCV is used, do not fold.
675	if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr,
676	true) == -`1`)
677	return `0`;
678	// fall-through to SUBXrr and SUBWrr.
679	[[fallthrough]];
680	case AArch64::SUBXrr:
681	case AArch64::SUBWrr: {
682	// neg x -> csneg, represented as sub dst, xzr, src.
683	unsigned ZReg = removeCopies(MRI, VReg: DefMI->getOperand(i: `1`).getReg());
684	if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
685	return `0`;
686	SrcOpNum = `2`;
687	Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
688	break;
689	}
690	default:
691	return `0`;
692	}
693	assert(Opc && SrcOpNum && "Missing parameters");
694
695	if (NewVReg)
696	*NewVReg = DefMI->getOperand(i: SrcOpNum).getReg();
697	return Opc;
698	}
699
700	bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
701	ArrayRef<MachineOperand> Cond,
702	Register DstReg, Register TrueReg,
703	Register FalseReg, int &CondCycles,
704	int &TrueCycles,
705	int &FalseCycles) const {
706	// Check register classes.
707	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
708	const TargetRegisterClass *RC =
709	RI.getCommonSubClass(MRI.getRegClass(Reg: TrueReg), MRI.getRegClass(Reg: FalseReg));
710	if (!RC)
711	return false;
712
713	// Also need to check the dest regclass, in case we're trying to optimize
714	// something like:
715	// %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
716	if (!RI.getCommonSubClass(RC, MRI.getRegClass(Reg: DstReg)))
717	return false;
718
719	// Expanding cbz/tbz requires an extra cycle of latency on the condition.
720	unsigned ExtraCondLat = Cond.size() != `1`;
721
722	// GPRs are handled by csel.
723	// FIXME: Fold in x+1, -x, and ~x when applicable.
724	if (AArch64::GPR64allRegClass.hasSubClassEq(RC) \|\|
725	AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
726	// Single-cycle csel, csinc, csinv, and csneg.
727	CondCycles = `1` + ExtraCondLat;
728	TrueCycles = FalseCycles = `1`;
729	if (canFoldIntoCSel(MRI, VReg: TrueReg))
730	TrueCycles = `0`;
731	else if (canFoldIntoCSel(MRI, VReg: FalseReg))
732	FalseCycles = `0`;
733	return true;
734	}
735
736	// Scalar floating point is handled by fcsel.
737	// FIXME: Form fabs, fmin, and fmax when applicable.
738	if (AArch64::FPR64RegClass.hasSubClassEq(RC) \|\|
739	AArch64::FPR32RegClass.hasSubClassEq(RC)) {
740	CondCycles = `5` + ExtraCondLat;
741	TrueCycles = FalseCycles = `2`;
742	return true;
743	}
744
745	// Can't do vectors.
746	return false;
747	}
748
749	void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
750	MachineBasicBlock::iterator I,
751	const DebugLoc &DL, Register DstReg,
752	ArrayRef<MachineOperand> Cond,
753	Register TrueReg, Register FalseReg) const {
754	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
755
756	// Parse the condition code, see parseCondBranch() above.
757	AArch64CC::CondCode CC;
758	switch (Cond.size()) {
759	default:
760	llvm_unreachable("Unknown condition opcode in Cond");
761	case `1`: // b.cc
762	CC = AArch64CC::CondCode(Cond [`0`].getImm());
763	break;
764	case `3`: { // cbz/cbnz
765	// We must insert a compare against 0.
766	bool Is64Bit;
767	switch (Cond [`1`].getImm()) {
768	default:
769	llvm_unreachable("Unknown branch opcode in Cond");
770	case AArch64::CBZW:
771	Is64Bit = false;
772	CC = AArch64CC::EQ;
773	break;
774	case AArch64::CBZX:
775	Is64Bit = true;
776	CC = AArch64CC::EQ;
777	break;
778	case AArch64::CBNZW:
779	Is64Bit = false;
780	CC = AArch64CC::NE;
781	break;
782	case AArch64::CBNZX:
783	Is64Bit = true;
784	CC = AArch64CC::NE;
785	break;
786	}
787	Register SrcReg = Cond [`2`].getReg();
788	if (Is64Bit) {
789	// cmp reg, #0 is actually subs xzr, reg, #0.
790	MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
791	BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
792	.addReg(SrcReg)
793	.addImm(`0`)
794	.addImm(`0`);
795	} else {
796	MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
797	BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
798	.addReg(SrcReg)
799	.addImm(`0`)
800	.addImm(`0`);
801	}
802	break;
803	}
804	case `4`: { // tbz/tbnz
805	// We must insert a tst instruction.
806	switch (Cond [`1`].getImm()) {
807	default:
808	llvm_unreachable("Unknown branch opcode in Cond");
809	case AArch64::TBZW:
810	case AArch64::TBZX:
811	CC = AArch64CC::EQ;
812	break;
813	case AArch64::TBNZW:
814	case AArch64::TBNZX:
815	CC = AArch64CC::NE;
816	break;
817	}
818	// cmp reg, #foo is actually ands xzr, reg, #1<<foo.
819	if (Cond[`1`].getImm() == AArch64::TBZW \|\| Cond[`1`].getImm() == AArch64::TBNZW)
820	BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
821	.addReg(Cond[`2`].getReg())
822	.addImm(
823	AArch64_AM::encodeLogicalImmediate(`1ull` << Cond[`3`].getImm(), `32`));
824	else
825	BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
826	.addReg(Cond[`2`].getReg())
827	.addImm(
828	AArch64_AM::encodeLogicalImmediate(`1ull` << Cond[`3`].getImm(), `64`));
829	break;
830	}
831	}
832
833	unsigned Opc = `0`;
834	const TargetRegisterClass RC = nullptr*;
835	bool TryFold = false;
836	if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
837	RC = &AArch64::GPR64RegClass;
838	Opc = AArch64::CSELXr;
839	TryFold = true;
840	} else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
841	RC = &AArch64::GPR32RegClass;
842	Opc = AArch64::CSELWr;
843	TryFold = true;
844	} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
845	RC = &AArch64::FPR64RegClass;
846	Opc = AArch64::FCSELDrrr;
847	} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
848	RC = &AArch64::FPR32RegClass;
849	Opc = AArch64::FCSELSrrr;
850	}
851	assert(RC && "Unsupported regclass");
852
853	// Try folding simple instructions into the csel.
854	if (TryFold) {
855	unsigned NewVReg = `0`;
856	unsigned FoldedOpc = canFoldIntoCSel(MRI, VReg: TrueReg, NewVReg: &NewVReg);
857	if (FoldedOpc) {
858	// The folded opcodes csinc, csinc and csneg apply the operation to
859	// FalseReg, so we need to invert the condition.
860	CC = AArch64CC::getInvertedCondCode(Code: CC);
861	TrueReg = FalseReg;
862	} else
863	FoldedOpc = canFoldIntoCSel(MRI, VReg: FalseReg, NewVReg: &NewVReg);
864
865	// Fold the operation. Leave any dead instructions for DCE to clean up.
866	if (FoldedOpc) {
867	FalseReg = NewVReg;
868	Opc = FoldedOpc;
869	// The extends the live range of NewVReg.
870	MRI.clearKillFlags(Reg: NewVReg);
871	}
872	}
873
874	// Pull all virtual register into the appropriate class.
875	MRI.constrainRegClass(Reg: TrueReg, RC);
876	MRI.constrainRegClass(Reg: FalseReg, RC);
877
878	// Insert the csel.
879	BuildMI(MBB, I, DL, get(Opc), DstReg)
880	.addReg(TrueReg)
881	.addReg(FalseReg)
882	.addImm(CC);
883	}
884
885	// Return true if Imm can be loaded into a register by a "cheap" sequence of
886	// instructions. For now, "cheap" means at most two instructions.
887	static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
888	if (BitSize == `32`)
889	return true;
890
891	assert(BitSize == `64` && "Only bit sizes of 32 or 64 allowed");
892	uint64_t Imm = static_cast<uint64_t>(MI.getOperand(i: `1`).getImm());
893	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Is;
894	AArch64_IMM::expandMOVImm(Imm, BitSize, Insn&: Is);
895
896	return Is.size() <= `2`;
897	}
898
899	// FIXME: this implementation should be micro-architecture dependent, so a
900	// micro-architecture target hook should be introduced here in future.
901	bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
902	if (Subtarget.hasExynosCheapAsMoveHandling()) {
903	if (isExynosCheapAsMove(MI))
904	return true;
905	return MI.isAsCheapAsAMove();
906	}
907
908	switch (MI.getOpcode()) {
909	default:
910	return MI.isAsCheapAsAMove();
911
912	case AArch64::ADDWrs:
913	case AArch64::ADDXrs:
914	case AArch64::SUBWrs:
915	case AArch64::SUBXrs:
916	return Subtarget.hasALULSLFast() && MI.getOperand(i: `3`).getImm() <= `4`;
917
918	// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
919	// ORRXri, it is as cheap as MOV.
920	// Likewise if it can be expanded to MOVZ/MOVN/MOVK.
921	case AArch64::MOVi32imm:
922	return isCheapImmediate(MI, BitSize: `32`);
923	case AArch64::MOVi64imm:
924	return isCheapImmediate(MI, BitSize: `64`);
925	}
926	}
927
928	bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
929	switch (MI.getOpcode()) {
930	default:
931	return false;
932
933	case AArch64::ADDWrs:
934	case AArch64::ADDXrs:
935	case AArch64::ADDSWrs:
936	case AArch64::ADDSXrs: {
937	unsigned Imm = MI.getOperand(i: `3`).getImm();
938	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
939	if (ShiftVal == `0`)
940	return true;
941	return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= `5`;
942	}
943
944	case AArch64::ADDWrx:
945	case AArch64::ADDXrx:
946	case AArch64::ADDXrx64:
947	case AArch64::ADDSWrx:
948	case AArch64::ADDSXrx:
949	case AArch64::ADDSXrx64: {
950	unsigned Imm = MI.getOperand(i: `3`).getImm();
951	switch (AArch64_AM::getArithExtendType(Imm)) {
952	default:
953	return false;
954	case AArch64_AM::UXTB:
955	case AArch64_AM::UXTH:
956	case AArch64_AM::UXTW:
957	case AArch64_AM::UXTX:
958	return AArch64_AM::getArithShiftValue(Imm) <= `4`;
959	}
960	}
961
962	case AArch64::SUBWrs:
963	case AArch64::SUBSWrs: {
964	unsigned Imm = MI.getOperand(i: `3`).getImm();
965	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
966	return ShiftVal == `0` \|\|
967	(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == `31`);
968	}
969
970	case AArch64::SUBXrs:
971	case AArch64::SUBSXrs: {
972	unsigned Imm = MI.getOperand(i: `3`).getImm();
973	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
974	return ShiftVal == `0` \|\|
975	(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == `63`);
976	}
977
978	case AArch64::SUBWrx:
979	case AArch64::SUBXrx:
980	case AArch64::SUBXrx64:
981	case AArch64::SUBSWrx:
982	case AArch64::SUBSXrx:
983	case AArch64::SUBSXrx64: {
984	unsigned Imm = MI.getOperand(i: `3`).getImm();
985	switch (AArch64_AM::getArithExtendType(Imm)) {
986	default:
987	return false;
988	case AArch64_AM::UXTB:
989	case AArch64_AM::UXTH:
990	case AArch64_AM::UXTW:
991	case AArch64_AM::UXTX:
992	return AArch64_AM::getArithShiftValue(Imm) == `0`;
993	}
994	}
995
996	case AArch64::LDRBBroW:
997	case AArch64::LDRBBroX:
998	case AArch64::LDRBroW:
999	case AArch64::LDRBroX:
1000	case AArch64::LDRDroW:
1001	case AArch64::LDRDroX:
1002	case AArch64::LDRHHroW:
1003	case AArch64::LDRHHroX:
1004	case AArch64::LDRHroW:
1005	case AArch64::LDRHroX:
1006	case AArch64::LDRQroW:
1007	case AArch64::LDRQroX:
1008	case AArch64::LDRSBWroW:
1009	case AArch64::LDRSBWroX:
1010	case AArch64::LDRSBXroW:
1011	case AArch64::LDRSBXroX:
1012	case AArch64::LDRSHWroW:
1013	case AArch64::LDRSHWroX:
1014	case AArch64::LDRSHXroW:
1015	case AArch64::LDRSHXroX:
1016	case AArch64::LDRSWroW:
1017	case AArch64::LDRSWroX:
1018	case AArch64::LDRSroW:
1019	case AArch64::LDRSroX:
1020	case AArch64::LDRWroW:
1021	case AArch64::LDRWroX:
1022	case AArch64::LDRXroW:
1023	case AArch64::LDRXroX:
1024	case AArch64::PRFMroW:
1025	case AArch64::PRFMroX:
1026	case AArch64::STRBBroW:
1027	case AArch64::STRBBroX:
1028	case AArch64::STRBroW:
1029	case AArch64::STRBroX:
1030	case AArch64::STRDroW:
1031	case AArch64::STRDroX:
1032	case AArch64::STRHHroW:
1033	case AArch64::STRHHroX:
1034	case AArch64::STRHroW:
1035	case AArch64::STRHroX:
1036	case AArch64::STRQroW:
1037	case AArch64::STRQroX:
1038	case AArch64::STRSroW:
1039	case AArch64::STRSroX:
1040	case AArch64::STRWroW:
1041	case AArch64::STRWroX:
1042	case AArch64::STRXroW:
1043	case AArch64::STRXroX: {
1044	unsigned IsSigned = MI.getOperand(i: `3`).getImm();
1045	return !IsSigned;
1046	}
1047	}
1048	}
1049
1050	bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1051	unsigned Opc = MI.getOpcode();
1052	switch (Opc) {
1053	default:
1054	return false;
1055	case AArch64::SEH_StackAlloc:
1056	case AArch64::SEH_SaveFPLR:
1057	case AArch64::SEH_SaveFPLR_X:
1058	case AArch64::SEH_SaveReg:
1059	case AArch64::SEH_SaveReg_X:
1060	case AArch64::SEH_SaveRegP:
1061	case AArch64::SEH_SaveRegP_X:
1062	case AArch64::SEH_SaveFReg:
1063	case AArch64::SEH_SaveFReg_X:
1064	case AArch64::SEH_SaveFRegP:
1065	case AArch64::SEH_SaveFRegP_X:
1066	case AArch64::SEH_SetFP:
1067	case AArch64::SEH_AddFP:
1068	case AArch64::SEH_Nop:
1069	case AArch64::SEH_PrologEnd:
1070	case AArch64::SEH_EpilogStart:
1071	case AArch64::SEH_EpilogEnd:
1072	case AArch64::SEH_PACSignLR:
1073	case AArch64::SEH_SaveAnyRegQP:
1074	case AArch64::SEH_SaveAnyRegQPX:
1075	return true;
1076	}
1077	}
1078
1079	bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1080	Register &SrcReg, Register &DstReg,
1081	unsigned &SubIdx) const {
1082	switch (MI.getOpcode()) {
1083	default:
1084	return false;
1085	case AArch64::SBFMXri: // aka sxtw
1086	case AArch64::UBFMXri: // aka uxtw
1087	// Check for the 32 -> 64 bit extension case, these instructions can do
1088	// much more.
1089	if (MI.getOperand(i: `2`).getImm() != `0` \|\| MI.getOperand(i: `3`).getImm() != `31`)
1090	return false;
1091	// This is a signed or unsigned 32 -> 64 bit extension.
1092	SrcReg = MI.getOperand(i: `1`).getReg();
1093	DstReg = MI.getOperand(i: `0`).getReg();
1094	SubIdx = AArch64::sub_32;
1095	return true;
1096	}
1097	}
1098
1099	bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1100	const MachineInstr &MIa, const MachineInstr &MIb) const {
1101	const TargetRegisterInfo *TRI = &getRegisterInfo();
1102	const MachineOperand BaseOpA = nullptr, BaseOpB = nullptr;
1103	int64_t OffsetA = `0`, OffsetB = `0`;
1104	TypeSize WidthA(`0`, false), WidthB(`0`, false);
1105	bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1106
1107	assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1108	assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1109
1110	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects() \|\|
1111	MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
1112	return false;
1113
1114	// Retrieve the base, offset from the base and width. Width
1115	// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1116	// base are identical, and the offset of a lower memory access +
1117	// the width doesn't overlap the offset of a higher memory access,
1118	// then the memory accesses are different.
1119	// If OffsetAIsScalable and OffsetBIsScalable are both true, they
1120	// are assumed to have the same scale (vscale).
1121	if (getMemOperandWithOffsetWidth(MI: MIa, BaseOp&: BaseOpA, Offset&: OffsetA, OffsetIsScalable&: OffsetAIsScalable,
1122	Width&: WidthA, TRI) &&
1123	getMemOperandWithOffsetWidth(MI: MIb, BaseOp&: BaseOpB, Offset&: OffsetB, OffsetIsScalable&: OffsetBIsScalable,
1124	Width&: WidthB, TRI)) {
1125	if (BaseOpA->isIdenticalTo(Other: *BaseOpB) &&
1126	OffsetAIsScalable == OffsetBIsScalable) {
1127	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1128	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1129	TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1130	if (LowWidth.isScalable() == OffsetAIsScalable &&
1131	LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1132	return true;
1133	}
1134	}
1135	return false;
1136	}
1137
1138	bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1139	const MachineBasicBlock *MBB,
1140	const MachineFunction &MF) const {
1141	if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
1142	return true;
1143
1144	// Do not move an instruction that can be recognized as a branch target.
1145	if (hasBTISemantics(MI))
1146	return true;
1147
1148	switch (MI.getOpcode()) {
1149	case AArch64::HINT:
1150	// CSDB hints are scheduling barriers.
1151	if (MI.getOperand(i: `0`).getImm() == `0x14`)
1152	return true;
1153	break;
1154	case AArch64::DSB:
1155	case AArch64::ISB:
1156	// DSB and ISB also are scheduling barriers.
1157	return true;
1158	case AArch64::MSRpstatesvcrImm1:
1159	// SMSTART and SMSTOP are also scheduling barriers.
1160	return true;
1161	default:;
1162	}
1163	if (isSEHInstruction(MI))
1164	return true;
1165	auto Next = std::next(x: MI.getIterator());
1166	return Next != MBB->end() && Next ->isCFIInstruction();
1167	}
1168
1169	/// analyzeCompare - For a comparison instruction, return the source registers
1170	/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1171	/// Return true if the comparison instruction can be analyzed.
1172	bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1173	Register &SrcReg2, int64_t &CmpMask,
1174	int64_t &CmpValue) const {
1175	// The first operand can be a frame index where we'd normally expect a
1176	// register.
1177	assert(MI.getNumOperands() >= `2` && "All AArch64 cmps should have 2 operands");
1178	if (!MI.getOperand(i: `1`).isReg())
1179	return false;
1180
1181	switch (MI.getOpcode()) {
1182	default:
1183	break;
1184	case AArch64::PTEST_PP:
1185	case AArch64::PTEST_PP_ANY:
1186	SrcReg = MI.getOperand(i: `0`).getReg();
1187	SrcReg2 = MI.getOperand(i: `1`).getReg();
1188	// Not sure about the mask and value for now...
1189	CmpMask = ~`0`;
1190	CmpValue = `0`;
1191	return true;
1192	case AArch64::SUBSWrr:
1193	case AArch64::SUBSWrs:
1194	case AArch64::SUBSWrx:
1195	case AArch64::SUBSXrr:
1196	case AArch64::SUBSXrs:
1197	case AArch64::SUBSXrx:
1198	case AArch64::ADDSWrr:
1199	case AArch64::ADDSWrs:
1200	case AArch64::ADDSWrx:
1201	case AArch64::ADDSXrr:
1202	case AArch64::ADDSXrs:
1203	case AArch64::ADDSXrx:
1204	// Replace SUBSWrr with SUBWrr if NZCV is not used.
1205	SrcReg = MI.getOperand(i: `1`).getReg();
1206	SrcReg2 = MI.getOperand(i: `2`).getReg();
1207	CmpMask = ~`0`;
1208	CmpValue = `0`;
1209	return true;
1210	case AArch64::SUBSWri:
1211	case AArch64::ADDSWri:
1212	case AArch64::SUBSXri:
1213	case AArch64::ADDSXri:
1214	SrcReg = MI.getOperand(i: `1`).getReg();
1215	SrcReg2 = `0`;
1216	CmpMask = ~`0`;
1217	CmpValue = MI.getOperand(i: `2`).getImm();
1218	return true;
1219	case AArch64::ANDSWri:
1220	case AArch64::ANDSXri:
1221	// ANDS does not use the same encoding scheme as the others xxxS
1222	// instructions.
1223	SrcReg = MI.getOperand(i: `1`).getReg();
1224	SrcReg2 = `0`;
1225	CmpMask = ~`0`;
1226	CmpValue = AArch64_AM::decodeLogicalImmediate(
1227	MI.getOperand(`2`).getImm(),
1228	MI.getOpcode() == AArch64::ANDSWri ? `32` : `64`);
1229	return true;
1230	}
1231
1232	return false;
1233	}
1234
1235	static bool UpdateOperandRegClass(MachineInstr &Instr) {
1236	MachineBasicBlock *MBB = Instr.getParent();
1237	assert(MBB && "Can't get MachineBasicBlock here");
1238	MachineFunction *MF = MBB->getParent();
1239	assert(MF && "Can't get MachineFunction here");
1240	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1241	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1242	MachineRegisterInfo *MRI = &MF->getRegInfo();
1243
1244	for (unsigned OpIdx = `0`, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1245	++OpIdx) {
1246	MachineOperand &MO = Instr.getOperand(i: OpIdx);
1247	const TargetRegisterClass *OpRegCstraints =
1248	Instr.getRegClassConstraint(OpIdx, TII, TRI);
1249
1250	// If there's no constraint, there's nothing to do.
1251	if (!OpRegCstraints)
1252	continue;
1253	// If the operand is a frame index, there's nothing to do here.
1254	// A frame index operand will resolve correctly during PEI.
1255	if (MO.isFI())
1256	continue;
1257
1258	assert(MO.isReg() &&
1259	"Operand has register constraints without being a register!");
1260
1261	Register Reg = MO.getReg();
1262	if (Reg.isPhysical()) {
1263	if (!OpRegCstraints->contains(Reg))
1264	return false;
1265	} else if (!OpRegCstraints->hasSubClassEq(RC: MRI->getRegClass(Reg)) &&
1266	!MRI->constrainRegClass(Reg, RC: OpRegCstraints))
1267	return false;
1268	}
1269
1270	return true;
1271	}
1272
1273	/// Return the opcode that does not set flags when possible - otherwise
1274	/// return the original opcode. The caller is responsible to do the actual
1275	/// substitution and legality checking.
1276	static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1277	// Don't convert all compare instructions, because for some the zero register
1278	// encoding becomes the sp register.
1279	bool MIDefinesZeroReg = false;
1280	if (MI.definesRegister(AArch64::WZR, /TRI=/nullptr) \|\|
1281	MI.definesRegister(AArch64::XZR, /TRI=/nullptr))
1282	MIDefinesZeroReg = true;
1283
1284	switch (MI.getOpcode()) {
1285	default:
1286	return MI.getOpcode();
1287	case AArch64::ADDSWrr:
1288	return AArch64::ADDWrr;
1289	case AArch64::ADDSWri:
1290	return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1291	case AArch64::ADDSWrs:
1292	return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1293	case AArch64::ADDSWrx:
1294	return AArch64::ADDWrx;
1295	case AArch64::ADDSXrr:
1296	return AArch64::ADDXrr;
1297	case AArch64::ADDSXri:
1298	return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1299	case AArch64::ADDSXrs:
1300	return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1301	case AArch64::ADDSXrx:
1302	return AArch64::ADDXrx;
1303	case AArch64::SUBSWrr:
1304	return AArch64::SUBWrr;
1305	case AArch64::SUBSWri:
1306	return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1307	case AArch64::SUBSWrs:
1308	return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1309	case AArch64::SUBSWrx:
1310	return AArch64::SUBWrx;
1311	case AArch64::SUBSXrr:
1312	return AArch64::SUBXrr;
1313	case AArch64::SUBSXri:
1314	return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1315	case AArch64::SUBSXrs:
1316	return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1317	case AArch64::SUBSXrx:
1318	return AArch64::SUBXrx;
1319	}
1320	}
1321
1322	enum AccessKind { AK_Write = `0x01`, AK_Read = `0x10`, AK_All = `0x11` };
1323
1324	/// True when condition flags are accessed (either by writing or reading)
1325	/// on the instruction trace starting at From and ending at To.
1326	///
1327	/// Note: If From and To are from different blocks it's assumed CC are accessed
1328	/// on the path.
1329	static bool areCFlagsAccessedBetweenInstrs(
1330	MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1331	const TargetRegisterInfo TRI, const* AccessKind AccessToCheck = AK_All) {
1332	// Early exit if To is at the beginning of the BB.
1333	if (To == To ->getParent()->begin())
1334	return true;
1335
1336	// Check whether the instructions are in the same basic block
1337	// If not, assume the condition flags might get modified somewhere.
1338	if (To ->getParent() != From ->getParent())
1339	return true;
1340
1341	// From must be above To.
1342	assert(std::any_of(
1343	++To.getReverse(), To ->getParent()->rend(),
1344	[From](MachineInstr &MI) { return MI.getIterator() == From; }));
1345
1346	// We iterate backward starting at \p To until we hit \p From.
1347	for (const MachineInstr &Instr :
1348	instructionsWithoutDebug(It: ++To.getReverse(), End: From.getReverse())) {
1349	if (((AccessToCheck & AK_Write) &&
1350	Instr.modifiesRegister(AArch64::NZCV, TRI)) \|\|
1351	((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1352	return true;
1353	}
1354	return false;
1355	}
1356
1357	/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1358	/// operation which could set the flags in an identical manner
1359	bool AArch64InstrInfo::optimizePTestInstr(
1360	MachineInstr PTest, unsigned* MaskReg, unsigned PredReg,
1361	const MachineRegisterInfo MRI) const* {
1362	auto *Mask = MRI->getUniqueVRegDef(Reg: MaskReg);
1363	auto *Pred = MRI->getUniqueVRegDef(Reg: PredReg);
1364	auto NewOp = Pred->getOpcode();
1365	bool OpChanged = false;
1366
1367	unsigned MaskOpcode = Mask->getOpcode();
1368	unsigned PredOpcode = Pred->getOpcode();
1369	bool PredIsPTestLike = isPTestLikeOpcode(Opc: PredOpcode);
1370	bool PredIsWhileLike = isWhileOpcode(Opc: PredOpcode);
1371
1372	if (isPTrueOpcode(Opc: MaskOpcode) && (PredIsPTestLike \|\| PredIsWhileLike) &&
1373	getElementSizeForOpcode(Opc: MaskOpcode) ==
1374	getElementSizeForOpcode(Opc: PredOpcode) &&
1375	Mask->getOperand(i: `1`).getImm() == `31`) {
1376	// For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1377	// redundant since WHILE performs an implicit PTEST with an all active
1378	// mask. Must be an all active predicate of matching element size.
1379
1380	// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1381	// PTEST_LIKE instruction uses the same all active mask and the element
1382	// size matches. If the PTEST has a condition of any then it is always
1383	// redundant.
1384	if (PredIsPTestLike) {
1385	auto PTestLikeMask = MRI->getUniqueVRegDef(Reg: Pred->getOperand(i: `1`).getReg());
1386	if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1387	return false;
1388	}
1389
1390	// Fallthough to simply remove the PTEST.
1391	} else if ((Mask == Pred) && (PredIsPTestLike \|\| PredIsWhileLike) &&
1392	PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1393	// For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1394	// instruction that sets the flags as PTEST would. This is only valid when
1395	// the condition is any.
1396
1397	// Fallthough to simply remove the PTEST.
1398	} else if (PredIsPTestLike) {
1399	// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1400	// flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1401	// on 8-bit predicates like the PTEST. Otherwise, for instructions like
1402	// compare that also support 16/32/64-bit predicates, the implicit PTEST
1403	// performed by the compare could consider fewer lanes for these element
1404	// sizes.
1405	//
1406	// For example, consider
1407	//
1408	// ptrue p0.b ; P0=1111-1111-1111-1111
1409	// index z0.s, #0, #1 ; Z0=<0,1,2,3>
1410	// index z1.s, #1, #1 ; Z1=<1,2,3,4>
1411	// cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1412	// ; ^ last active
1413	// ptest p0, p1.b ; P1=0001-0001-0001-0001
1414	// ; ^ last active
1415	//
1416	// where the compare generates a canonical all active 32-bit predicate
1417	// (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1418	// active flag, whereas the PTEST instruction with the same mask doesn't.
1419	// For PTEST_ANY this doesn't apply as the flags in this case would be
1420	// identical regardless of element size.
1421	auto PTestLikeMask = MRI->getUniqueVRegDef(Reg: Pred->getOperand(i: `1`).getReg());
1422	uint64_t PredElementSize = getElementSizeForOpcode(Opc: PredOpcode);
1423	if ((Mask != PTestLikeMask) \|\|
1424	(PredElementSize != AArch64::ElementSizeB &&
1425	PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1426	return false;
1427
1428	// Fallthough to simply remove the PTEST.
1429	} else {
1430	// If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1431	// opcode so the PTEST becomes redundant.
1432	switch (PredOpcode) {
1433	case AArch64::AND_PPzPP:
1434	case AArch64::BIC_PPzPP:
1435	case AArch64::EOR_PPzPP:
1436	case AArch64::NAND_PPzPP:
1437	case AArch64::NOR_PPzPP:
1438	case AArch64::ORN_PPzPP:
1439	case AArch64::ORR_PPzPP:
1440	case AArch64::BRKA_PPzP:
1441	case AArch64::BRKPA_PPzPP:
1442	case AArch64::BRKB_PPzP:
1443	case AArch64::BRKPB_PPzPP:
1444	case AArch64::RDFFR_PPz: {
1445	// Check to see if our mask is the same. If not the resulting flag bits
1446	// may be different and we can't remove the ptest.
1447	auto *PredMask = MRI->getUniqueVRegDef(Reg: Pred->getOperand(i: `1`).getReg());
1448	if (Mask != PredMask)
1449	return false;
1450	break;
1451	}
1452	case AArch64::BRKN_PPzP: {
1453	// BRKN uses an all active implicit mask to set flags unlike the other
1454	// flag-setting instructions.
1455	// PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1456	if ((MaskOpcode != AArch64::PTRUE_B) \|\|
1457	(Mask->getOperand(`1`).getImm() != `31`))
1458	return false;
1459	break;
1460	}
1461	case AArch64::PTRUE_B:
1462	// PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1463	break;
1464	default:
1465	// Bail out if we don't recognize the input
1466	return false;
1467	}
1468
1469	NewOp = convertToFlagSettingOpc(Opc: PredOpcode);
1470	OpChanged = true;
1471	}
1472
1473	const TargetRegisterInfo *TRI = &getRegisterInfo();
1474
1475	// If another instruction between Pred and PTest accesses flags, don't remove
1476	// the ptest or update the earlier instruction to modify them.
1477	if (areCFlagsAccessedBetweenInstrs(From: Pred, To: PTest, TRI))
1478	return false;
1479
1480	// If we pass all the checks, it's safe to remove the PTEST and use the flags
1481	// as they are prior to PTEST. Sometimes this requires the tested PTEST
1482	// operand to be replaced with an equivalent instruction that also sets the
1483	// flags.
1484	Pred->setDesc(get(NewOp));
1485	PTest->eraseFromParent();
1486	if (OpChanged) {
1487	bool succeeded = UpdateOperandRegClass(Instr&: *Pred);
1488	(void)succeeded;
1489	assert(succeeded && "Operands have incompatible register classes!");
1490	Pred->addRegisterDefined(AArch64::NZCV, TRI);
1491	}
1492
1493	// Ensure that the flags def is live.
1494	if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1495	unsigned i = `0`, e = Pred->getNumOperands();
1496	for (; i != e; ++i) {
1497	MachineOperand &MO = Pred->getOperand(i);
1498	if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1499	MO.setIsDead(false);
1500	break;
1501	}
1502	}
1503	}
1504	return true;
1505	}
1506
1507	/// Try to optimize a compare instruction. A compare instruction is an
1508	/// instruction which produces AArch64::NZCV. It can be truly compare
1509	/// instruction
1510	/// when there are no uses of its destination register.
1511	///
1512	/// The following steps are tried in order:
1513	/// 1. Convert CmpInstr into an unconditional version.
1514	/// 2. Remove CmpInstr if above there is an instruction producing a needed
1515	/// condition code or an instruction which can be converted into such an
1516	/// instruction.
1517	/// Only comparison with zero is supported.
1518	bool AArch64InstrInfo::optimizeCompareInstr(
1519	MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1520	int64_t CmpValue, const MachineRegisterInfo MRI) const* {
1521	assert(CmpInstr.getParent());
1522	assert(MRI);
1523
1524	// Replace SUBSWrr with SUBWrr if NZCV is not used.
1525	int DeadNZCVIdx =
1526	CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true);
1527	if (DeadNZCVIdx != -`1`) {
1528	if (CmpInstr.definesRegister(AArch64::WZR, /TRI=/nullptr) \|\|
1529	CmpInstr.definesRegister(AArch64::XZR, /TRI=/nullptr)) {
1530	CmpInstr.eraseFromParent();
1531	return true;
1532	}
1533	unsigned Opc = CmpInstr.getOpcode();
1534	unsigned NewOpc = convertToNonFlagSettingOpc(MI: CmpInstr);
1535	if (NewOpc == Opc)
1536	return false;
1537	const MCInstrDesc &MCID = get(NewOpc);
1538	CmpInstr.setDesc(MCID);
1539	CmpInstr.removeOperand(OpNo: DeadNZCVIdx);
1540	bool succeeded = UpdateOperandRegClass(Instr&: CmpInstr);
1541	(void)succeeded;
1542	assert(succeeded && "Some operands reg class are incompatible!");
1543	return true;
1544	}
1545
1546	if (CmpInstr.getOpcode() == AArch64::PTEST_PP \|\|
1547	CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1548	return optimizePTestInstr(PTest: &CmpInstr, MaskReg: SrcReg, PredReg: SrcReg2, MRI);
1549
1550	if (SrcReg2 != `0`)
1551	return false;
1552
1553	// CmpInstr is a Compare instruction if destination register is not used.
1554	if (!MRI->use_nodbg_empty(RegNo: CmpInstr.getOperand(i: `0`).getReg()))
1555	return false;
1556
1557	if (CmpValue == `0` && substituteCmpToZero(CmpInstr, SrcReg, MRI: *MRI))
1558	return true;
1559	return (CmpValue == `0` \|\| CmpValue == `1`) &&
1560	removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, MRI: *MRI);
1561	}
1562
1563	/// Get opcode of S version of Instr.
1564	/// If Instr is S version its opcode is returned.
1565	/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1566	/// or we are not interested in it.
1567	static unsigned sForm(MachineInstr &Instr) {
1568	switch (Instr.getOpcode()) {
1569	default:
1570	return AArch64::INSTRUCTION_LIST_END;
1571
1572	case AArch64::ADDSWrr:
1573	case AArch64::ADDSWri:
1574	case AArch64::ADDSXrr:
1575	case AArch64::ADDSXri:
1576	case AArch64::SUBSWrr:
1577	case AArch64::SUBSWri:
1578	case AArch64::SUBSXrr:
1579	case AArch64::SUBSXri:
1580	return Instr.getOpcode();
1581
1582	case AArch64::ADDWrr:
1583	return AArch64::ADDSWrr;
1584	case AArch64::ADDWri:
1585	return AArch64::ADDSWri;
1586	case AArch64::ADDXrr:
1587	return AArch64::ADDSXrr;
1588	case AArch64::ADDXri:
1589	return AArch64::ADDSXri;
1590	case AArch64::ADCWr:
1591	return AArch64::ADCSWr;
1592	case AArch64::ADCXr:
1593	return AArch64::ADCSXr;
1594	case AArch64::SUBWrr:
1595	return AArch64::SUBSWrr;
1596	case AArch64::SUBWri:
1597	return AArch64::SUBSWri;
1598	case AArch64::SUBXrr:
1599	return AArch64::SUBSXrr;
1600	case AArch64::SUBXri:
1601	return AArch64::SUBSXri;
1602	case AArch64::SBCWr:
1603	return AArch64::SBCSWr;
1604	case AArch64::SBCXr:
1605	return AArch64::SBCSXr;
1606	case AArch64::ANDWri:
1607	return AArch64::ANDSWri;
1608	case AArch64::ANDXri:
1609	return AArch64::ANDSXri;
1610	}
1611	}
1612
1613	/// Check if AArch64::NZCV should be alive in successors of MBB.
1614	static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {
1615	for (auto *BB : MBB->successors())
1616	if (BB->isLiveIn(AArch64::NZCV))
1617	return true;
1618	return false;
1619	}
1620
1621	/// \returns The condition code operand index for \p Instr if it is a branch
1622	/// or select and -1 otherwise.
1623	static int
1624	findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
1625	switch (Instr.getOpcode()) {
1626	default:
1627	return -`1`;
1628
1629	case AArch64::Bcc: {
1630	int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /TRI=/nullptr);
1631	assert(Idx >= `2`);
1632	return Idx - `2`;
1633	}
1634
1635	case AArch64::CSINVWr:
1636	case AArch64::CSINVXr:
1637	case AArch64::CSINCWr:
1638	case AArch64::CSINCXr:
1639	case AArch64::CSELWr:
1640	case AArch64::CSELXr:
1641	case AArch64::CSNEGWr:
1642	case AArch64::CSNEGXr:
1643	case AArch64::FCSELSrrr:
1644	case AArch64::FCSELDrrr: {
1645	int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /TRI=/nullptr);
1646	assert(Idx >= `1`);
1647	return Idx - `1`;
1648	}
1649	}
1650	}
1651
1652	/// Find a condition code used by the instruction.
1653	/// Returns AArch64CC::Invalid if either the instruction does not use condition
1654	/// codes or we don't optimize CmpInstr in the presence of such instructions.
1655	static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1656	int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
1657	return CCIdx >= `0` ? static_cast<AArch64CC::CondCode>(
1658	Instr.getOperand(i: CCIdx).getImm())
1659	: AArch64CC::Invalid;
1660	}
1661
1662	static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1663	assert(CC != AArch64CC::Invalid);
1664	UsedNZCV UsedFlags;
1665	switch (CC) {
1666	default:
1667	break;
1668
1669	case AArch64CC::EQ: // Z set
1670	case AArch64CC::NE: // Z clear
1671	UsedFlags.Z = true;
1672	break;
1673
1674	case AArch64CC::HI: // Z clear and C set
1675	case AArch64CC::LS: // Z set or C clear
1676	UsedFlags.Z = true;
1677	[[fallthrough]];
1678	case AArch64CC::HS: // C set
1679	case AArch64CC::LO: // C clear
1680	UsedFlags.C = true;
1681	break;
1682
1683	case AArch64CC::MI: // N set
1684	case AArch64CC::PL: // N clear
1685	UsedFlags.N = true;
1686	break;
1687
1688	case AArch64CC::VS: // V set
1689	case AArch64CC::VC: // V clear
1690	UsedFlags.V = true;
1691	break;
1692
1693	case AArch64CC::GT: // Z clear, N and V the same
1694	case AArch64CC::LE: // Z set, N and V differ
1695	UsedFlags.Z = true;
1696	[[fallthrough]];
1697	case AArch64CC::GE: // N and V the same
1698	case AArch64CC::LT: // N and V differ
1699	UsedFlags.N = true;
1700	UsedFlags.V = true;
1701	break;
1702	}
1703	return UsedFlags;
1704	}
1705
1706	/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1707	/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1708	/// \returns std::nullopt otherwise.
1709	///
1710	/// Collect instructions using that flags in \p CCUseInstrs if provided.
1711	std::optional<UsedNZCV>
1712	llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
1713	const TargetRegisterInfo &TRI,
1714	SmallVectorImpl<MachineInstr > CCUseInstrs) {
1715	MachineBasicBlock *CmpParent = CmpInstr.getParent();
1716	if (MI.getParent() != CmpParent)
1717	return std::nullopt;
1718
1719	if (areCFlagsAliveInSuccessors(MBB: CmpParent))
1720	return std::nullopt;
1721
1722	UsedNZCV NZCVUsedAfterCmp;
1723	for (MachineInstr &Instr : instructionsWithoutDebug(
1724	It: std::next(x: CmpInstr.getIterator()), End: CmpParent->instr_end())) {
1725	if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1726	AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1727	if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1728	return std::nullopt;
1729	NZCVUsedAfterCmp \|= getUsedNZCV(CC);
1730	if (CCUseInstrs)
1731	CCUseInstrs->push_back(Elt: &Instr);
1732	}
1733	if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1734	break;
1735	}
1736	return NZCVUsedAfterCmp;
1737	}
1738
1739	static bool isADDSRegImm(unsigned Opcode) {
1740	return Opcode == AArch64::ADDSWri \|\| Opcode == AArch64::ADDSXri;
1741	}
1742
1743	static bool isSUBSRegImm(unsigned Opcode) {
1744	return Opcode == AArch64::SUBSWri \|\| Opcode == AArch64::SUBSXri;
1745	}
1746
1747	/// Check if CmpInstr can be substituted by MI.
1748	///
1749	/// CmpInstr can be substituted:
1750	/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1751	/// - and, MI and CmpInstr are from the same MachineBB
1752	/// - and, condition flags are not alive in successors of the CmpInstr parent
1753	/// - and, if MI opcode is the S form there must be no defs of flags between
1754	/// MI and CmpInstr
1755	/// or if MI opcode is not the S form there must be neither defs of flags
1756	/// nor uses of flags between MI and CmpInstr.
1757	/// - and, if C/V flags are not used after CmpInstr
1758	/// or if N flag is used but MI produces poison value if signed overflow
1759	/// occurs.
1760	static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
1761	const TargetRegisterInfo &TRI) {
1762	// NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1763	// that may or may not set flags.
1764	assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1765
1766	const unsigned CmpOpcode = CmpInstr.getOpcode();
1767	if (!isADDSRegImm(Opcode: CmpOpcode) && !isSUBSRegImm(Opcode: CmpOpcode))
1768	return false;
1769
1770	assert((CmpInstr.getOperand(`2`).isImm() &&
1771	CmpInstr.getOperand(`2`).getImm() == `0`) &&
1772	"Caller guarantees that CmpInstr compares with constant 0");
1773
1774	std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1775	if (!NZVCUsed \|\| NZVCUsed ->C)
1776	return false;
1777
1778	// CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1779	// '%vreg = add ...' or '%vreg = sub ...'.
1780	// Condition flag V is used to indicate signed overflow.
1781	// 1) MI and CmpInstr set N and V to the same value.
1782	// 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1783	// signed overflow occurs, so CmpInstr could still be simplified away.
1784	if (NZVCUsed ->V && !MI.getFlag(Flag: MachineInstr::NoSWrap))
1785	return false;
1786
1787	AccessKind AccessToCheck = AK_Write;
1788	if (sForm(Instr&: MI) != MI.getOpcode())
1789	AccessToCheck = AK_All;
1790	return !areCFlagsAccessedBetweenInstrs(From: &MI, To: &CmpInstr, TRI: &TRI, AccessToCheck);
1791	}
1792
1793	/// Substitute an instruction comparing to zero with another instruction
1794	/// which produces needed condition flags.
1795	///
1796	/// Return true on success.
1797	bool AArch64InstrInfo::substituteCmpToZero(
1798	MachineInstr &CmpInstr, unsigned SrcReg,
1799	const MachineRegisterInfo &MRI) const {
1800	// Get the unique definition of SrcReg.
1801	MachineInstr *MI = MRI.getUniqueVRegDef(Reg: SrcReg);
1802	if (!MI)
1803	return false;
1804
1805	const TargetRegisterInfo &TRI = getRegisterInfo();
1806
1807	unsigned NewOpc = sForm(Instr&: *MI);
1808	if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1809	return false;
1810
1811	if (!canInstrSubstituteCmpInstr(MI&: *MI, CmpInstr, TRI))
1812	return false;
1813
1814	// Update the instruction to set NZCV.
1815	MI->setDesc(get(NewOpc));
1816	CmpInstr.eraseFromParent();
1817	bool succeeded = UpdateOperandRegClass(Instr&: *MI);
1818	(void)succeeded;
1819	assert(succeeded && "Some operands reg class are incompatible!");
1820	MI->addRegisterDefined(AArch64::NZCV, &TRI);
1821	return true;
1822	}
1823
1824	/// \returns True if \p CmpInstr can be removed.
1825	///
1826	/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1827	/// codes used in \p CCUseInstrs must be inverted.
1828	static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
1829	int CmpValue, const TargetRegisterInfo &TRI,
1830	SmallVectorImpl<MachineInstr *> &CCUseInstrs,
1831	bool &IsInvertCC) {
1832	assert((CmpValue == `0` \|\| CmpValue == `1`) &&
1833	"Only comparisons to 0 or 1 considered for removal!");
1834
1835	// MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1836	unsigned MIOpc = MI.getOpcode();
1837	if (MIOpc == AArch64::CSINCWr) {
1838	if (MI.getOperand(`1`).getReg() != AArch64::WZR \|\|
1839	MI.getOperand(`2`).getReg() != AArch64::WZR)
1840	return false;
1841	} else if (MIOpc == AArch64::CSINCXr) {
1842	if (MI.getOperand(`1`).getReg() != AArch64::XZR \|\|
1843	MI.getOperand(`2`).getReg() != AArch64::XZR)
1844	return false;
1845	} else {
1846	return false;
1847	}
1848	AArch64CC::CondCode MICC = findCondCodeUsedByInstr(Instr: MI);
1849	if (MICC == AArch64CC::Invalid)
1850	return false;
1851
1852	// NZCV needs to be defined
1853	if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true) != -`1`)
1854	return false;
1855
1856	// CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1857	const unsigned CmpOpcode = CmpInstr.getOpcode();
1858	bool IsSubsRegImm = isSUBSRegImm(Opcode: CmpOpcode);
1859	if (CmpValue && !IsSubsRegImm)
1860	return false;
1861	if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(Opcode: CmpOpcode))
1862	return false;
1863
1864	// MI conditions allowed: eq, ne, mi, pl
1865	UsedNZCV MIUsedNZCV = getUsedNZCV(CC: MICC);
1866	if (MIUsedNZCV.C \|\| MIUsedNZCV.V)
1867	return false;
1868
1869	std::optional<UsedNZCV> NZCVUsedAfterCmp =
1870	examineCFlagsUse(MI, CmpInstr, TRI, CCUseInstrs: &CCUseInstrs);
1871	// Condition flags are not used in CmpInstr basic block successors and only
1872	// Z or N flags allowed to be used after CmpInstr within its basic block
1873	if (!NZCVUsedAfterCmp \|\| NZCVUsedAfterCmp ->C \|\| NZCVUsedAfterCmp ->V)
1874	return false;
1875	// Z or N flag used after CmpInstr must correspond to the flag used in MI
1876	if ((MIUsedNZCV.Z && NZCVUsedAfterCmp ->N) \|\|
1877	(MIUsedNZCV.N && NZCVUsedAfterCmp ->Z))
1878	return false;
1879	// If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1880	if (MIUsedNZCV.N && !CmpValue)
1881	return false;
1882
1883	// There must be no defs of flags between MI and CmpInstr
1884	if (areCFlagsAccessedBetweenInstrs(From: &MI, To: &CmpInstr, TRI: &TRI, AccessToCheck: AK_Write))
1885	return false;
1886
1887	// Condition code is inverted in the following cases:
1888	// 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1889	// 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1890	IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ \|\| MICC == AArch64CC::PL)) \|\|
1891	(!CmpValue && MICC == AArch64CC::NE);
1892	return true;
1893	}
1894
1895	/// Remove comparison in csinc-cmp sequence
1896	///
1897	/// Examples:
1898	/// 1. \code
1899	/// csinc w9, wzr, wzr, ne
1900	/// cmp w9, #0
1901	/// b.eq
1902	/// \endcode
1903	/// to
1904	/// \code
1905	/// csinc w9, wzr, wzr, ne
1906	/// b.ne
1907	/// \endcode
1908	///
1909	/// 2. \code
1910	/// csinc x2, xzr, xzr, mi
1911	/// cmp x2, #1
1912	/// b.pl
1913	/// \endcode
1914	/// to
1915	/// \code
1916	/// csinc x2, xzr, xzr, mi
1917	/// b.pl
1918	/// \endcode
1919	///
1920	/// \param CmpInstr comparison instruction
1921	/// \return True when comparison removed
1922	bool AArch64InstrInfo::removeCmpToZeroOrOne(
1923	MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1924	const MachineRegisterInfo &MRI) const {
1925	MachineInstr *MI = MRI.getUniqueVRegDef(Reg: SrcReg);
1926	if (!MI)
1927	return false;
1928	const TargetRegisterInfo &TRI = getRegisterInfo();
1929	SmallVector<MachineInstr *, `4`> CCUseInstrs;
1930	bool IsInvertCC = false;
1931	if (!canCmpInstrBeRemoved(MI&: *MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1932	IsInvertCC))
1933	return false;
1934	// Make transformation
1935	CmpInstr.eraseFromParent();
1936	if (IsInvertCC) {
1937	// Invert condition codes in CmpInstr CC users
1938	for (MachineInstr *CCUseInstr : CCUseInstrs) {
1939	int Idx = findCondCodeUseOperandIdxForBranchOrSelect(Instr: *CCUseInstr);
1940	assert(Idx >= `0` && "Unexpected instruction using CC.");
1941	MachineOperand &CCOperand = CCUseInstr->getOperand(i: Idx);
1942	AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
1943	Code: static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1944	CCOperand.setImm(CCUse);
1945	}
1946	}
1947	return true;
1948	}
1949
1950	bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1951	if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1952	MI.getOpcode() != AArch64::CATCHRET)
1953	return false;
1954
1955	MachineBasicBlock &MBB = *MI.getParent();
1956	auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1957	auto TRI = Subtarget.getRegisterInfo();
1958	DebugLoc DL = MI.getDebugLoc();
1959
1960	if (MI.getOpcode() == AArch64::CATCHRET) {
1961	// Skip to the first instruction before the epilog.
1962	const TargetInstrInfo *TII =
1963	MBB.getParent()->getSubtarget().getInstrInfo();
1964	MachineBasicBlock *TargetMBB = MI.getOperand(i: `0`).getMBB();
1965	auto MBBI = MachineBasicBlock::iterator (MI);
1966	MachineBasicBlock::iterator FirstEpilogSEH = std::prev(x: MBBI);
1967	while (FirstEpilogSEH ->getFlag(Flag: MachineInstr::FrameDestroy) &&
1968	FirstEpilogSEH != MBB.begin())
1969	FirstEpilogSEH = std::prev(x: FirstEpilogSEH);
1970	if (FirstEpilogSEH != MBB.begin())
1971	FirstEpilogSEH = std::next(x: FirstEpilogSEH);
1972	BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1973	.addReg(AArch64::X0, RegState::Define)
1974	.addMBB(TargetMBB);
1975	BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1976	.addReg(AArch64::X0, RegState::Define)
1977	.addReg(AArch64::X0)
1978	.addMBB(TargetMBB)
1979	.addImm(`0`);
1980	return true;
1981	}
1982
1983	Register Reg = MI.getOperand(i: `0`).getReg();
1984	Module &M = *MBB.getParent()->getFunction().getParent();
1985	if (M.getStackProtectorGuard() == "sysreg") {
1986	const AArch64SysReg::SysReg *SrcReg =
1987	AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1988	if (!SrcReg)
1989	report_fatal_error(reason: "Unknown SysReg for Stack Protector Guard Register");
1990
1991	// mrs xN, sysreg
1992	BuildMI(MBB, MI, DL, get(AArch64::MRS))
1993	.addDef(Reg, RegState::Renamable)
1994	.addImm(SrcReg->Encoding);
1995	int Offset = M.getStackProtectorGuardOffset();
1996	if (Offset >= `0` && Offset <= `32760` && Offset % `8` == `0`) {
1997	// ldr xN, [xN, #offset]
1998	BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1999	.addDef(Reg)
2000	.addUse(Reg, RegState::Kill)
2001	.addImm(Offset / `8`);
2002	} else if (Offset >= -`256` && Offset <= `255`) {
2003	// ldur xN, [xN, #offset]
2004	BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2005	.addDef(Reg)
2006	.addUse(Reg, RegState::Kill)
2007	.addImm(Offset);
2008	} else if (Offset >= -`4095` && Offset <= `4095`) {
2009	if (Offset > `0`) {
2010	// add xN, xN, #offset
2011	BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2012	.addDef(Reg)
2013	.addUse(Reg, RegState::Kill)
2014	.addImm(Offset)
2015	.addImm(`0`);
2016	} else {
2017	// sub xN, xN, #offset
2018	BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2019	.addDef(Reg)
2020	.addUse(Reg, RegState::Kill)
2021	.addImm(-Offset)
2022	.addImm(`0`);
2023	}
2024	// ldr xN, [xN]
2025	BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2026	.addDef(Reg)
2027	.addUse(Reg, RegState::Kill)
2028	.addImm(`0`);
2029	} else {
2030	// Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2031	// than 23760.
2032	// It might be nice to use AArch64::MOVi32imm here, which would get
2033	// expanded in PreSched2 after PostRA, but our lone scratch Reg already
2034	// contains the MRS result. findScratchNonCalleeSaveRegister() in
2035	// AArch64FrameLowering might help us find such a scratch register
2036	// though. If we failed to find a scratch register, we could emit a
2037	// stream of add instructions to build up the immediate. Or, we could try
2038	// to insert a AArch64::MOVi32imm before register allocation so that we
2039	// didn't need to scavenge for a scratch register.
2040	report_fatal_error(reason: "Unable to encode Stack Protector Guard Offset");
2041	}
2042	MBB.erase(I: MI);
2043	return true;
2044	}
2045
2046	const GlobalValue *GV =
2047	cast<GlobalValue>(Val: (*MI.memoperands_begin())->getValue());
2048	const TargetMachine &TM = MBB.getParent()->getTarget();
2049	unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2050	const unsigned char MO_NC = AArch64II::MO_NC;
2051
2052	if ((OpFlags & AArch64II::MO_GOT) != `0`) {
2053	BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2054	.addGlobalAddress(GV, `0`, OpFlags);
2055	if (Subtarget.isTargetILP32()) {
2056	unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2057	BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2058	.addDef(Reg32, RegState::Dead)
2059	.addUse(Reg, RegState::Kill)
2060	.addImm(`0`)
2061	.addMemOperand(*MI.memoperands_begin())
2062	.addDef(Reg, RegState::Implicit);
2063	} else {
2064	BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2065	.addReg(Reg, RegState::Kill)
2066	.addImm(`0`)
2067	.addMemOperand(*MI.memoperands_begin());
2068	}
2069	} else if (TM.getCodeModel() == CodeModel::Large) {
2070	assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2071	BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2072	.addGlobalAddress(GV, `0`, AArch64II::MO_G0 \| MO_NC)
2073	.addImm(`0`);
2074	BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2075	.addReg(Reg, RegState::Kill)
2076	.addGlobalAddress(GV, `0`, AArch64II::MO_G1 \| MO_NC)
2077	.addImm(`16`);
2078	BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2079	.addReg(Reg, RegState::Kill)
2080	.addGlobalAddress(GV, `0`, AArch64II::MO_G2 \| MO_NC)
2081	.addImm(`32`);
2082	BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2083	.addReg(Reg, RegState::Kill)
2084	.addGlobalAddress(GV, `0`, AArch64II::MO_G3)
2085	.addImm(`48`);
2086	BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2087	.addReg(Reg, RegState::Kill)
2088	.addImm(`0`)
2089	.addMemOperand(*MI.memoperands_begin());
2090	} else if (TM.getCodeModel() == CodeModel::Tiny) {
2091	BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2092	.addGlobalAddress(GV, `0`, OpFlags);
2093	} else {
2094	BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2095	.addGlobalAddress(GV, `0`, OpFlags \| AArch64II::MO_PAGE);
2096	unsigned char LoFlags = OpFlags \| AArch64II::MO_PAGEOFF \| MO_NC;
2097	if (Subtarget.isTargetILP32()) {
2098	unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2099	BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2100	.addDef(Reg32, RegState::Dead)
2101	.addUse(Reg, RegState::Kill)
2102	.addGlobalAddress(GV, `0`, LoFlags)
2103	.addMemOperand(*MI.memoperands_begin())
2104	.addDef(Reg, RegState::Implicit);
2105	} else {
2106	BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2107	.addReg(Reg, RegState::Kill)
2108	.addGlobalAddress(GV, `0`, LoFlags)
2109	.addMemOperand(*MI.memoperands_begin());
2110	}
2111	}
2112
2113	MBB.erase(I: MI);
2114
2115	return true;
2116	}
2117
2118	// Return true if this instruction simply sets its single destination register
2119	// to zero. This is equivalent to a register rename of the zero-register.
2120	bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
2121	switch (MI.getOpcode()) {
2122	default:
2123	break;
2124	case AArch64::MOVZWi:
2125	case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2126	if (MI.getOperand(i: `1`).isImm() && MI.getOperand(i: `1`).getImm() == `0`) {
2127	assert(MI.getDesc().getNumOperands() == `3` &&
2128	MI.getOperand(`2`).getImm() == `0` && "invalid MOVZi operands");
2129	return true;
2130	}
2131	break;
2132	case AArch64::ANDWri: // and Rd, Rzr, #imm
2133	return MI.getOperand(`1`).getReg() == AArch64::WZR;
2134	case AArch64::ANDXri:
2135	return MI.getOperand(`1`).getReg() == AArch64::XZR;
2136	case TargetOpcode::COPY:
2137	return MI.getOperand(`1`).getReg() == AArch64::WZR;
2138	}
2139	return false;
2140	}
2141
2142	// Return true if this instruction simply renames a general register without
2143	// modifying bits.
2144	bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
2145	switch (MI.getOpcode()) {
2146	default:
2147	break;
2148	case TargetOpcode::COPY: {
2149	// GPR32 copies will by lowered to ORRXrs
2150	Register DstReg = MI.getOperand(i: `0`).getReg();
2151	return (AArch64::GPR32RegClass.contains(DstReg) \|\|
2152	AArch64::GPR64RegClass.contains(DstReg));
2153	}
2154	case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2155	if (MI.getOperand(`1`).getReg() == AArch64::XZR) {
2156	assert(MI.getDesc().getNumOperands() == `4` &&
2157	MI.getOperand(`3`).getImm() == `0` && "invalid ORRrs operands");
2158	return true;
2159	}
2160	break;
2161	case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2162	if (MI.getOperand(i: `2`).getImm() == `0`) {
2163	assert(MI.getDesc().getNumOperands() == `4` &&
2164	MI.getOperand(`3`).getImm() == `0` && "invalid ADDXri operands");
2165	return true;
2166	}
2167	break;
2168	}
2169	return false;
2170	}
2171
2172	// Return true if this instruction simply renames a general register without
2173	// modifying bits.
2174	bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
2175	switch (MI.getOpcode()) {
2176	default:
2177	break;
2178	case TargetOpcode::COPY: {
2179	Register DstReg = MI.getOperand(i: `0`).getReg();
2180	return AArch64::FPR128RegClass.contains(DstReg);
2181	}
2182	case AArch64::ORRv16i8:
2183	if (MI.getOperand(i: `1`).getReg() == MI.getOperand(i: `2`).getReg()) {
2184	assert(MI.getDesc().getNumOperands() == `3` && MI.getOperand(`0`).isReg() &&
2185	"invalid ORRv16i8 operands");
2186	return true;
2187	}
2188	break;
2189	}
2190	return false;
2191	}
2192
2193	Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
2194	int &FrameIndex) const {
2195	switch (MI.getOpcode()) {
2196	default:
2197	break;
2198	case AArch64::LDRWui:
2199	case AArch64::LDRXui:
2200	case AArch64::LDRBui:
2201	case AArch64::LDRHui:
2202	case AArch64::LDRSui:
2203	case AArch64::LDRDui:
2204	case AArch64::LDRQui:
2205	case AArch64::LDR_PXI:
2206	if (MI.getOperand(i: `0`).getSubReg() == `0` && MI.getOperand(i: `1`).isFI() &&
2207	MI.getOperand(i: `2`).isImm() && MI.getOperand(i: `2`).getImm() == `0`) {
2208	FrameIndex = MI.getOperand(i: `1`).getIndex();
2209	return MI.getOperand(i: `0`).getReg();
2210	}
2211	break;
2212	}
2213
2214	return `0`;
2215	}
2216
2217	Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
2218	int &FrameIndex) const {
2219	switch (MI.getOpcode()) {
2220	default:
2221	break;
2222	case AArch64::STRWui:
2223	case AArch64::STRXui:
2224	case AArch64::STRBui:
2225	case AArch64::STRHui:
2226	case AArch64::STRSui:
2227	case AArch64::STRDui:
2228	case AArch64::STRQui:
2229	case AArch64::STR_PXI:
2230	if (MI.getOperand(i: `0`).getSubReg() == `0` && MI.getOperand(i: `1`).isFI() &&
2231	MI.getOperand(i: `2`).isImm() && MI.getOperand(i: `2`).getImm() == `0`) {
2232	FrameIndex = MI.getOperand(i: `1`).getIndex();
2233	return MI.getOperand(i: `0`).getReg();
2234	}
2235	break;
2236	}
2237	return `0`;
2238	}
2239
2240	/// Check all MachineMemOperands for a hint to suppress pairing.
2241	bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
2242	return llvm::any_of(Range: MI.memoperands(), P: [](MachineMemOperand *MMO) {
2243	return MMO->getFlags() & MOSuppressPair;
2244	});
2245	}
2246
2247	/// Set a flag on the first MachineMemOperand to suppress pairing.
2248	void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
2249	if (MI.memoperands_empty())
2250	return;
2251	(*MI.memoperands_begin())->setFlags(MOSuppressPair);
2252	}
2253
2254	/// Check all MachineMemOperands for a hint that the load/store is strided.
2255	bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
2256	return llvm::any_of(Range: MI.memoperands(), P: [](MachineMemOperand *MMO) {
2257	return MMO->getFlags() & MOStridedAccess;
2258	});
2259	}
2260
2261	bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
2262	switch (Opc) {
2263	default:
2264	return false;
2265	case AArch64::STURSi:
2266	case AArch64::STRSpre:
2267	case AArch64::STURDi:
2268	case AArch64::STRDpre:
2269	case AArch64::STURQi:
2270	case AArch64::STRQpre:
2271	case AArch64::STURBBi:
2272	case AArch64::STURHHi:
2273	case AArch64::STURWi:
2274	case AArch64::STRWpre:
2275	case AArch64::STURXi:
2276	case AArch64::STRXpre:
2277	case AArch64::LDURSi:
2278	case AArch64::LDRSpre:
2279	case AArch64::LDURDi:
2280	case AArch64::LDRDpre:
2281	case AArch64::LDURQi:
2282	case AArch64::LDRQpre:
2283	case AArch64::LDURWi:
2284	case AArch64::LDRWpre:
2285	case AArch64::LDURXi:
2286	case AArch64::LDRXpre:
2287	case AArch64::LDRSWpre:
2288	case AArch64::LDURSWi:
2289	case AArch64::LDURHHi:
2290	case AArch64::LDURBBi:
2291	case AArch64::LDURSBWi:
2292	case AArch64::LDURSHWi:
2293	return true;
2294	}
2295	}
2296
2297	std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2298	switch (Opc) {
2299	default: return {};
2300	case AArch64::PRFMui: return AArch64::PRFUMi;
2301	case AArch64::LDRXui: return AArch64::LDURXi;
2302	case AArch64::LDRWui: return AArch64::LDURWi;
2303	case AArch64::LDRBui: return AArch64::LDURBi;
2304	case AArch64::LDRHui: return AArch64::LDURHi;
2305	case AArch64::LDRSui: return AArch64::LDURSi;
2306	case AArch64::LDRDui: return AArch64::LDURDi;
2307	case AArch64::LDRQui: return AArch64::LDURQi;
2308	case AArch64::LDRBBui: return AArch64::LDURBBi;
2309	case AArch64::LDRHHui: return AArch64::LDURHHi;
2310	case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2311	case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2312	case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2313	case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2314	case AArch64::LDRSWui: return AArch64::LDURSWi;
2315	case AArch64::STRXui: return AArch64::STURXi;
2316	case AArch64::STRWui: return AArch64::STURWi;
2317	case AArch64::STRBui: return AArch64::STURBi;
2318	case AArch64::STRHui: return AArch64::STURHi;
2319	case AArch64::STRSui: return AArch64::STURSi;
2320	case AArch64::STRDui: return AArch64::STURDi;
2321	case AArch64::STRQui: return AArch64::STURQi;
2322	case AArch64::STRBBui: return AArch64::STURBBi;
2323	case AArch64::STRHHui: return AArch64::STURHHi;
2324	}
2325	}
2326
2327	unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
2328	switch (Opc) {
2329	default:
2330	return `2`;
2331	case AArch64::LDPXi:
2332	case AArch64::LDPDi:
2333	case AArch64::STPXi:
2334	case AArch64::STPDi:
2335	case AArch64::LDNPXi:
2336	case AArch64::LDNPDi:
2337	case AArch64::STNPXi:
2338	case AArch64::STNPDi:
2339	case AArch64::LDPQi:
2340	case AArch64::STPQi:
2341	case AArch64::LDNPQi:
2342	case AArch64::STNPQi:
2343	case AArch64::LDPWi:
2344	case AArch64::LDPSi:
2345	case AArch64::STPWi:
2346	case AArch64::STPSi:
2347	case AArch64::LDNPWi:
2348	case AArch64::LDNPSi:
2349	case AArch64::STNPWi:
2350	case AArch64::STNPSi:
2351	case AArch64::LDG:
2352	case AArch64::STGPi:
2353
2354	case AArch64::LD1B_IMM:
2355	case AArch64::LD1B_H_IMM:
2356	case AArch64::LD1B_S_IMM:
2357	case AArch64::LD1B_D_IMM:
2358	case AArch64::LD1SB_H_IMM:
2359	case AArch64::LD1SB_S_IMM:
2360	case AArch64::LD1SB_D_IMM:
2361	case AArch64::LD1H_IMM:
2362	case AArch64::LD1H_S_IMM:
2363	case AArch64::LD1H_D_IMM:
2364	case AArch64::LD1SH_S_IMM:
2365	case AArch64::LD1SH_D_IMM:
2366	case AArch64::LD1W_IMM:
2367	case AArch64::LD1W_D_IMM:
2368	case AArch64::LD1SW_D_IMM:
2369	case AArch64::LD1D_IMM:
2370
2371	case AArch64::LD2B_IMM:
2372	case AArch64::LD2H_IMM:
2373	case AArch64::LD2W_IMM:
2374	case AArch64::LD2D_IMM:
2375	case AArch64::LD3B_IMM:
2376	case AArch64::LD3H_IMM:
2377	case AArch64::LD3W_IMM:
2378	case AArch64::LD3D_IMM:
2379	case AArch64::LD4B_IMM:
2380	case AArch64::LD4H_IMM:
2381	case AArch64::LD4W_IMM:
2382	case AArch64::LD4D_IMM:
2383
2384	case AArch64::ST1B_IMM:
2385	case AArch64::ST1B_H_IMM:
2386	case AArch64::ST1B_S_IMM:
2387	case AArch64::ST1B_D_IMM:
2388	case AArch64::ST1H_IMM:
2389	case AArch64::ST1H_S_IMM:
2390	case AArch64::ST1H_D_IMM:
2391	case AArch64::ST1W_IMM:
2392	case AArch64::ST1W_D_IMM:
2393	case AArch64::ST1D_IMM:
2394
2395	case AArch64::ST2B_IMM:
2396	case AArch64::ST2H_IMM:
2397	case AArch64::ST2W_IMM:
2398	case AArch64::ST2D_IMM:
2399	case AArch64::ST3B_IMM:
2400	case AArch64::ST3H_IMM:
2401	case AArch64::ST3W_IMM:
2402	case AArch64::ST3D_IMM:
2403	case AArch64::ST4B_IMM:
2404	case AArch64::ST4H_IMM:
2405	case AArch64::ST4W_IMM:
2406	case AArch64::ST4D_IMM:
2407
2408	case AArch64::LD1RB_IMM:
2409	case AArch64::LD1RB_H_IMM:
2410	case AArch64::LD1RB_S_IMM:
2411	case AArch64::LD1RB_D_IMM:
2412	case AArch64::LD1RSB_H_IMM:
2413	case AArch64::LD1RSB_S_IMM:
2414	case AArch64::LD1RSB_D_IMM:
2415	case AArch64::LD1RH_IMM:
2416	case AArch64::LD1RH_S_IMM:
2417	case AArch64::LD1RH_D_IMM:
2418	case AArch64::LD1RSH_S_IMM:
2419	case AArch64::LD1RSH_D_IMM:
2420	case AArch64::LD1RW_IMM:
2421	case AArch64::LD1RW_D_IMM:
2422	case AArch64::LD1RSW_IMM:
2423	case AArch64::LD1RD_IMM:
2424
2425	case AArch64::LDNT1B_ZRI:
2426	case AArch64::LDNT1H_ZRI:
2427	case AArch64::LDNT1W_ZRI:
2428	case AArch64::LDNT1D_ZRI:
2429	case AArch64::STNT1B_ZRI:
2430	case AArch64::STNT1H_ZRI:
2431	case AArch64::STNT1W_ZRI:
2432	case AArch64::STNT1D_ZRI:
2433
2434	case AArch64::LDNF1B_IMM:
2435	case AArch64::LDNF1B_H_IMM:
2436	case AArch64::LDNF1B_S_IMM:
2437	case AArch64::LDNF1B_D_IMM:
2438	case AArch64::LDNF1SB_H_IMM:
2439	case AArch64::LDNF1SB_S_IMM:
2440	case AArch64::LDNF1SB_D_IMM:
2441	case AArch64::LDNF1H_IMM:
2442	case AArch64::LDNF1H_S_IMM:
2443	case AArch64::LDNF1H_D_IMM:
2444	case AArch64::LDNF1SH_S_IMM:
2445	case AArch64::LDNF1SH_D_IMM:
2446	case AArch64::LDNF1W_IMM:
2447	case AArch64::LDNF1W_D_IMM:
2448	case AArch64::LDNF1SW_D_IMM:
2449	case AArch64::LDNF1D_IMM:
2450	return `3`;
2451	case AArch64::ADDG:
2452	case AArch64::STGi:
2453	case AArch64::LDR_PXI:
2454	case AArch64::STR_PXI:
2455	return `2`;
2456	}
2457	}
2458
2459	bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
2460	switch (MI.getOpcode()) {
2461	default:
2462	return false;
2463	// Scaled instructions.
2464	case AArch64::STRSui:
2465	case AArch64::STRDui:
2466	case AArch64::STRQui:
2467	case AArch64::STRXui:
2468	case AArch64::STRWui:
2469	case AArch64::LDRSui:
2470	case AArch64::LDRDui:
2471	case AArch64::LDRQui:
2472	case AArch64::LDRXui:
2473	case AArch64::LDRWui:
2474	case AArch64::LDRSWui:
2475	// Unscaled instructions.
2476	case AArch64::STURSi:
2477	case AArch64::STRSpre:
2478	case AArch64::STURDi:
2479	case AArch64::STRDpre:
2480	case AArch64::STURQi:
2481	case AArch64::STRQpre:
2482	case AArch64::STURWi:
2483	case AArch64::STRWpre:
2484	case AArch64::STURXi:
2485	case AArch64::STRXpre:
2486	case AArch64::LDURSi:
2487	case AArch64::LDRSpre:
2488	case AArch64::LDURDi:
2489	case AArch64::LDRDpre:
2490	case AArch64::LDURQi:
2491	case AArch64::LDRQpre:
2492	case AArch64::LDURWi:
2493	case AArch64::LDRWpre:
2494	case AArch64::LDURXi:
2495	case AArch64::LDRXpre:
2496	case AArch64::LDURSWi:
2497	case AArch64::LDRSWpre:
2498	return true;
2499	}
2500	}
2501
2502	bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
2503	switch (MI.getOpcode()) {
2504	default:
2505	assert((!MI.isCall() \|\| !MI.isReturn()) &&
2506	"Unexpected instruction - was a new tail call opcode introduced?");
2507	return false;
2508	case AArch64::TCRETURNdi:
2509	case AArch64::TCRETURNri:
2510	case AArch64::TCRETURNrix16x17:
2511	case AArch64::TCRETURNrix17:
2512	case AArch64::TCRETURNrinotx16:
2513	case AArch64::TCRETURNriALL:
2514	return true;
2515	}
2516	}
2517
2518	unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
2519	switch (Opc) {
2520	default:
2521	llvm_unreachable("Opcode has no flag setting equivalent!");
2522	// 32-bit cases:
2523	case AArch64::ADDWri:
2524	return AArch64::ADDSWri;
2525	case AArch64::ADDWrr:
2526	return AArch64::ADDSWrr;
2527	case AArch64::ADDWrs:
2528	return AArch64::ADDSWrs;
2529	case AArch64::ADDWrx:
2530	return AArch64::ADDSWrx;
2531	case AArch64::ANDWri:
2532	return AArch64::ANDSWri;
2533	case AArch64::ANDWrr:
2534	return AArch64::ANDSWrr;
2535	case AArch64::ANDWrs:
2536	return AArch64::ANDSWrs;
2537	case AArch64::BICWrr:
2538	return AArch64::BICSWrr;
2539	case AArch64::BICWrs:
2540	return AArch64::BICSWrs;
2541	case AArch64::SUBWri:
2542	return AArch64::SUBSWri;
2543	case AArch64::SUBWrr:
2544	return AArch64::SUBSWrr;
2545	case AArch64::SUBWrs:
2546	return AArch64::SUBSWrs;
2547	case AArch64::SUBWrx:
2548	return AArch64::SUBSWrx;
2549	// 64-bit cases:
2550	case AArch64::ADDXri:
2551	return AArch64::ADDSXri;
2552	case AArch64::ADDXrr:
2553	return AArch64::ADDSXrr;
2554	case AArch64::ADDXrs:
2555	return AArch64::ADDSXrs;
2556	case AArch64::ADDXrx:
2557	return AArch64::ADDSXrx;
2558	case AArch64::ANDXri:
2559	return AArch64::ANDSXri;
2560	case AArch64::ANDXrr:
2561	return AArch64::ANDSXrr;
2562	case AArch64::ANDXrs:
2563	return AArch64::ANDSXrs;
2564	case AArch64::BICXrr:
2565	return AArch64::BICSXrr;
2566	case AArch64::BICXrs:
2567	return AArch64::BICSXrs;
2568	case AArch64::SUBXri:
2569	return AArch64::SUBSXri;
2570	case AArch64::SUBXrr:
2571	return AArch64::SUBSXrr;
2572	case AArch64::SUBXrs:
2573	return AArch64::SUBSXrs;
2574	case AArch64::SUBXrx:
2575	return AArch64::SUBSXrx;
2576	// SVE instructions:
2577	case AArch64::AND_PPzPP:
2578	return AArch64::ANDS_PPzPP;
2579	case AArch64::BIC_PPzPP:
2580	return AArch64::BICS_PPzPP;
2581	case AArch64::EOR_PPzPP:
2582	return AArch64::EORS_PPzPP;
2583	case AArch64::NAND_PPzPP:
2584	return AArch64::NANDS_PPzPP;
2585	case AArch64::NOR_PPzPP:
2586	return AArch64::NORS_PPzPP;
2587	case AArch64::ORN_PPzPP:
2588	return AArch64::ORNS_PPzPP;
2589	case AArch64::ORR_PPzPP:
2590	return AArch64::ORRS_PPzPP;
2591	case AArch64::BRKA_PPzP:
2592	return AArch64::BRKAS_PPzP;
2593	case AArch64::BRKPA_PPzPP:
2594	return AArch64::BRKPAS_PPzPP;
2595	case AArch64::BRKB_PPzP:
2596	return AArch64::BRKBS_PPzP;
2597	case AArch64::BRKPB_PPzPP:
2598	return AArch64::BRKPBS_PPzPP;
2599	case AArch64::BRKN_PPzP:
2600	return AArch64::BRKNS_PPzP;
2601	case AArch64::RDFFR_PPz:
2602	return AArch64::RDFFRS_PPz;
2603	case AArch64::PTRUE_B:
2604	return AArch64::PTRUES_B;
2605	}
2606	}
2607
2608	// Is this a candidate for ld/st merging or pairing? For example, we don't
2609	// touch volatiles or load/stores that have a hint to avoid pair formation.
2610	bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
2611
2612	bool IsPreLdSt = isPreLdSt(MI);
2613
2614	// If this is a volatile load/store, don't mess with it.
2615	if (MI.hasOrderedMemoryRef())
2616	return false;
2617
2618	// Make sure this is a reg/fi+imm (as opposed to an address reloc).
2619	// For Pre-inc LD/ST, the operand is shifted by one.
2620	assert((MI.getOperand(IsPreLdSt ? `2` : `1`).isReg() \|\|
2621	MI.getOperand(IsPreLdSt ? `2` : `1`).isFI()) &&
2622	"Expected a reg or frame index operand.");
2623
2624	// For Pre-indexed addressing quadword instructions, the third operand is the
2625	// immediate value.
2626	bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(i: `3`).isImm();
2627
2628	if (!MI.getOperand(i: `2`).isImm() && !IsImmPreLdSt)
2629	return false;
2630
2631	// Can't merge/pair if the instruction modifies the base register.
2632	// e.g., ldr x0, [x0]
2633	// This case will never occur with an FI base.
2634	// However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2635	// STR<S,D,Q,W,X>pre, it can be merged.
2636	// For example:
2637	// ldr q0, [x11, #32]!
2638	// ldr q1, [x11, #16]
2639	// to
2640	// ldp q0, q1, [x11, #32]!
2641	if (MI.getOperand(i: `1`).isReg() && !IsPreLdSt) {
2642	Register BaseReg = MI.getOperand(i: `1`).getReg();
2643	const TargetRegisterInfo *TRI = &getRegisterInfo();
2644	if (MI.modifiesRegister(Reg: BaseReg, TRI))
2645	return false;
2646	}
2647
2648	// Check if this load/store has a hint to avoid pair formation.
2649	// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2650	if (isLdStPairSuppressed(MI))
2651	return false;
2652
2653	// Do not pair any callee-save store/reload instructions in the
2654	// prologue/epilogue if the CFI information encoded the operations as separate
2655	// instructions, as that will cause the size of the actual prologue to mismatch
2656	// with the prologue size recorded in the Windows CFI.
2657	const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2658	bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2659	MI.getMF()->getFunction().needsUnwindTableEntry();
2660	if (NeedsWinCFI && (MI.getFlag(Flag: MachineInstr::FrameSetup) \|\|
2661	MI.getFlag(Flag: MachineInstr::FrameDestroy)))
2662	return false;
2663
2664	// On some CPUs quad load/store pairs are slower than two single load/stores.
2665	if (Subtarget.isPaired128Slow()) {
2666	switch (MI.getOpcode()) {
2667	default:
2668	break;
2669	case AArch64::LDURQi:
2670	case AArch64::STURQi:
2671	case AArch64::LDRQui:
2672	case AArch64::STRQui:
2673	return false;
2674	}
2675	}
2676
2677	return true;
2678	}
2679
2680	bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2681	const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2682	int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2683	const TargetRegisterInfo TRI) const* {
2684	if (!LdSt.mayLoadOrStore())
2685	return false;
2686
2687	const MachineOperand *BaseOp;
2688	TypeSize WidthN(`0`, false);
2689	if (!getMemOperandWithOffsetWidth(MI: LdSt, BaseOp, Offset, OffsetIsScalable,
2690	Width&: WidthN, TRI))
2691	return false;
2692	// The maximum vscale is 16 under AArch64, return the maximal extent for the
2693	// vector.
2694	Width = LocationSize::precise(Value: WidthN);
2695	BaseOps.push_back(Elt: BaseOp);
2696	return true;
2697	}
2698
2699	std::optional<ExtAddrMode>
2700	AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
2701	const TargetRegisterInfo TRI) const* {
2702	const MachineOperand Base; // Filled with the base operand of MI.*
2703	int64_t Offset; // Filled with the offset of MI.
2704	bool OffsetIsScalable;
2705	if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2706	return std::nullopt;
2707
2708	if (!Base->isReg())
2709	return std::nullopt;
2710	ExtAddrMode AM;
2711	AM.BaseReg = Base->getReg();
2712	AM.Displacement = Offset;
2713	AM.ScaledReg = `0`;
2714	AM.Scale = `0`;
2715	return AM;
2716	}
2717
2718	bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
2719	Register Reg,
2720	const MachineInstr &AddrI,
2721	ExtAddrMode &AM) const {
2722	// Filter out instructions into which we cannot fold.
2723	unsigned NumBytes;
2724	int64_t OffsetScale = `1`;
2725	switch (MemI.getOpcode()) {
2726	default:
2727	return false;
2728
2729	case AArch64::LDURQi:
2730	case AArch64::STURQi:
2731	NumBytes = `16`;
2732	break;
2733
2734	case AArch64::LDURDi:
2735	case AArch64::STURDi:
2736	case AArch64::LDURXi:
2737	case AArch64::STURXi:
2738	NumBytes = `8`;
2739	break;
2740
2741	case AArch64::LDURWi:
2742	case AArch64::LDURSWi:
2743	case AArch64::STURWi:
2744	NumBytes = `4`;
2745	break;
2746
2747	case AArch64::LDURHi:
2748	case AArch64::STURHi:
2749	case AArch64::LDURHHi:
2750	case AArch64::STURHHi:
2751	case AArch64::LDURSHXi:
2752	case AArch64::LDURSHWi:
2753	NumBytes = `2`;
2754	break;
2755
2756	case AArch64::LDRBroX:
2757	case AArch64::LDRBBroX:
2758	case AArch64::LDRSBXroX:
2759	case AArch64::LDRSBWroX:
2760	case AArch64::STRBroX:
2761	case AArch64::STRBBroX:
2762	case AArch64::LDURBi:
2763	case AArch64::LDURBBi:
2764	case AArch64::LDURSBXi:
2765	case AArch64::LDURSBWi:
2766	case AArch64::STURBi:
2767	case AArch64::STURBBi:
2768	case AArch64::LDRBui:
2769	case AArch64::LDRBBui:
2770	case AArch64::LDRSBXui:
2771	case AArch64::LDRSBWui:
2772	case AArch64::STRBui:
2773	case AArch64::STRBBui:
2774	NumBytes = `1`;
2775	break;
2776
2777	case AArch64::LDRQroX:
2778	case AArch64::STRQroX:
2779	case AArch64::LDRQui:
2780	case AArch64::STRQui:
2781	NumBytes = `16`;
2782	OffsetScale = `16`;
2783	break;
2784
2785	case AArch64::LDRDroX:
2786	case AArch64::STRDroX:
2787	case AArch64::LDRXroX:
2788	case AArch64::STRXroX:
2789	case AArch64::LDRDui:
2790	case AArch64::STRDui:
2791	case AArch64::LDRXui:
2792	case AArch64::STRXui:
2793	NumBytes = `8`;
2794	OffsetScale = `8`;
2795	break;
2796
2797	case AArch64::LDRWroX:
2798	case AArch64::LDRSWroX:
2799	case AArch64::STRWroX:
2800	case AArch64::LDRWui:
2801	case AArch64::LDRSWui:
2802	case AArch64::STRWui:
2803	NumBytes = `4`;
2804	OffsetScale = `4`;
2805	break;
2806
2807	case AArch64::LDRHroX:
2808	case AArch64::STRHroX:
2809	case AArch64::LDRHHroX:
2810	case AArch64::STRHHroX:
2811	case AArch64::LDRSHXroX:
2812	case AArch64::LDRSHWroX:
2813	case AArch64::LDRHui:
2814	case AArch64::STRHui:
2815	case AArch64::LDRHHui:
2816	case AArch64::STRHHui:
2817	case AArch64::LDRSHXui:
2818	case AArch64::LDRSHWui:
2819	NumBytes = `2`;
2820	OffsetScale = `2`;
2821	break;
2822	}
2823
2824	// Check the fold operand is not the loaded/stored value.
2825	const MachineOperand &BaseRegOp = MemI.getOperand(i: `0`);
2826	if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2827	return false;
2828
2829	// Handle memory instructions with a [Reg, Reg] addressing mode.
2830	if (MemI.getOperand(i: `2`).isReg()) {
2831	// Bail if the addressing mode already includes extension of the offset
2832	// register.
2833	if (MemI.getOperand(i: `3`).getImm())
2834	return false;
2835
2836	// Check if we actually have a scaled offset.
2837	if (MemI.getOperand(i: `4`).getImm() == `0`)
2838	OffsetScale = `1`;
2839
2840	// If the address instructions is folded into the base register, then the
2841	// addressing mode must not have a scale. Then we can swap the base and the
2842	// scaled registers.
2843	if (MemI.getOperand(i: `1`).getReg() == Reg && OffsetScale != `1`)
2844	return false;
2845
2846	switch (AddrI.getOpcode()) {
2847	default:
2848	return false;
2849
2850	case AArch64::SBFMXri:
2851	// sxtw Xa, Wm
2852	// ldr Xd, [Xn, Xa, lsl #N]
2853	// ->
2854	// ldr Xd, [Xn, Wm, sxtw #N]
2855	if (AddrI.getOperand(i: `2`).getImm() != `0` \|\|
2856	AddrI.getOperand(i: `3`).getImm() != `31`)
2857	return false;
2858
2859	AM.BaseReg = MemI.getOperand(i: `1`).getReg();
2860	if (AM.BaseReg == Reg)
2861	AM.BaseReg = MemI.getOperand(i: `2`).getReg();
2862	AM.ScaledReg = AddrI.getOperand(i: `1`).getReg();
2863	AM.Scale = OffsetScale;
2864	AM.Displacement = `0`;
2865	AM.Form = ExtAddrMode::Formula::SExtScaledReg;
2866	return true;
2867
2868	case TargetOpcode::SUBREG_TO_REG: {
2869	// mov Wa, Wm
2870	// ldr Xd, [Xn, Xa, lsl #N]
2871	// ->
2872	// ldr Xd, [Xn, Wm, uxtw #N]
2873
2874	// Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2875	if (AddrI.getOperand(`1`).getImm() != `0` \|\|
2876	AddrI.getOperand(`3`).getImm() != AArch64::sub_32)
2877	return false;
2878
2879	const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2880	Register OffsetReg = AddrI.getOperand(i: `2`).getReg();
2881	if (!OffsetReg.isVirtual() \|\| !MRI.hasOneNonDBGUse(RegNo: OffsetReg))
2882	return false;
2883
2884	const MachineInstr &DefMI = *MRI.getVRegDef(Reg: OffsetReg);
2885	if (DefMI.getOpcode() != AArch64::ORRWrs \|\|
2886	DefMI.getOperand(`1`).getReg() != AArch64::WZR \|\|
2887	DefMI.getOperand(`3`).getImm() != `0`)
2888	return false;
2889
2890	AM.BaseReg = MemI.getOperand(i: `1`).getReg();
2891	if (AM.BaseReg == Reg)
2892	AM.BaseReg = MemI.getOperand(i: `2`).getReg();
2893	AM.ScaledReg = DefMI.getOperand(i: `2`).getReg();
2894	AM.Scale = OffsetScale;
2895	AM.Displacement = `0`;
2896	AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
2897	return true;
2898	}
2899	}
2900	}
2901
2902	// Handle memory instructions with a [Reg, #Imm] addressing mode.
2903
2904	// Check we are not breaking a potential conversion to an LDP.
2905	auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2906	int64_t NewOffset) -> bool {
2907	int64_t MinOffset, MaxOffset;
2908	switch (NumBytes) {
2909	default:
2910	return true;
2911	case `4`:
2912	MinOffset = -`256`;
2913	MaxOffset = `252`;
2914	break;
2915	case `8`:
2916	MinOffset = -`512`;
2917	MaxOffset = `504`;
2918	break;
2919	case `16`:
2920	MinOffset = -`1024`;
2921	MaxOffset = `1008`;
2922	break;
2923	}
2924	return OldOffset < MinOffset \|\| OldOffset > MaxOffset \|\|
2925	(NewOffset >= MinOffset && NewOffset <= MaxOffset);
2926	};
2927	auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2928	int64_t OldOffset = MemI.getOperand(i: `2`).getImm() * OffsetScale;
2929	int64_t NewOffset = OldOffset + Disp;
2930	if (!isLegalAddressingMode(NumBytes, Offset: NewOffset, / Scale / `0`))
2931	return false;
2932	// If the old offset would fit into an LDP, but the new offset wouldn't,
2933	// bail out.
2934	if (!validateOffsetForLDP (NumBytes, OldOffset, NewOffset))
2935	return false;
2936	AM.BaseReg = AddrI.getOperand(i: `1`).getReg();
2937	AM.ScaledReg = `0`;
2938	AM.Scale = `0`;
2939	AM.Displacement = NewOffset;
2940	AM.Form = ExtAddrMode::Formula::Basic;
2941	return true;
2942	};
2943
2944	auto canFoldAddRegIntoAddrMode =
2945	[&](int64_t Scale,
2946	ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
2947	if (MemI.getOperand(i: `2`).getImm() != `0`)
2948	return false;
2949	if (!isLegalAddressingMode(NumBytes, / Offset / `0`, Scale))
2950	return false;
2951	AM.BaseReg = AddrI.getOperand(i: `1`).getReg();
2952	AM.ScaledReg = AddrI.getOperand(i: `2`).getReg();
2953	AM.Scale = Scale;
2954	AM.Displacement = `0`;
2955	AM.Form = Form;
2956	return true;
2957	};
2958
2959	auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2960	unsigned Opcode = MemI.getOpcode();
2961	return (Opcode == AArch64::STURQi \|\| Opcode == AArch64::STRQui) &&
2962	Subtarget.isSTRQroSlow();
2963	};
2964
2965	int64_t Disp = `0`;
2966	const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2967	switch (AddrI.getOpcode()) {
2968	default:
2969	return false;
2970
2971	case AArch64::ADDXri:
2972	// add Xa, Xn, #N
2973	// ldr Xd, [Xa, #M]
2974	// ->
2975	// ldr Xd, [Xn, #N'+M]
2976	Disp = AddrI.getOperand(i: `2`).getImm() << AddrI.getOperand(i: `3`).getImm();
2977	return canFoldAddSubImmIntoAddrMode (Disp);
2978
2979	case AArch64::SUBXri:
2980	// sub Xa, Xn, #N
2981	// ldr Xd, [Xa, #M]
2982	// ->
2983	// ldr Xd, [Xn, #N'+M]
2984	Disp = AddrI.getOperand(i: `2`).getImm() << AddrI.getOperand(i: `3`).getImm();
2985	return canFoldAddSubImmIntoAddrMode (-Disp);
2986
2987	case AArch64::ADDXrs: {
2988	// add Xa, Xn, Xm, lsl #N
2989	// ldr Xd, [Xa]
2990	// ->
2991	// ldr Xd, [Xn, Xm, lsl #N]
2992
2993	// Don't fold the add if the result would be slower, unless optimising for
2994	// size.
2995	unsigned Shift = static_cast<unsigned>(AddrI.getOperand(i: `3`).getImm());
2996	if (AArch64_AM::getShiftType(Imm: Shift) != AArch64_AM::ShiftExtendType::LSL)
2997	return false;
2998	Shift = AArch64_AM::getShiftValue(Imm: Shift);
2999	if (!OptSize) {
3000	if (Shift != `2` && Shift != `3` && Subtarget.hasAddrLSLSlow14())
3001	return false;
3002	if (avoidSlowSTRQ(MemI))
3003	return false;
3004	}
3005	return canFoldAddRegIntoAddrMode (`1ULL` << Shift);
3006	}
3007
3008	case AArch64::ADDXrr:
3009	// add Xa, Xn, Xm
3010	// ldr Xd, [Xa]
3011	// ->
3012	// ldr Xd, [Xn, Xm, lsl #0]
3013
3014	// Don't fold the add if the result would be slower, unless optimising for
3015	// size.
3016	if (!OptSize && avoidSlowSTRQ(MemI))
3017	return false;
3018	return canFoldAddRegIntoAddrMode (`1`);
3019
3020	case AArch64::ADDXrx:
3021	// add Xa, Xn, Wm, {s,u}xtw #N
3022	// ldr Xd, [Xa]
3023	// ->
3024	// ldr Xd, [Xn, Wm, {s,u}xtw #N]
3025
3026	// Don't fold the add if the result would be slower, unless optimising for
3027	// size.
3028	if (!OptSize && avoidSlowSTRQ(MemI))
3029	return false;
3030
3031	// Can fold only sign-/zero-extend of a word.
3032	unsigned Imm = static_cast<unsigned>(AddrI.getOperand(i: `3`).getImm());
3033	AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
3034	if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3035	return false;
3036
3037	return canFoldAddRegIntoAddrMode (
3038	`1ULL` << AArch64_AM::getArithShiftValue(Imm),
3039	(Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
3040	: ExtAddrMode::Formula::ZExtScaledReg);
3041	}
3042	}
3043
3044	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3045	// return the opcode of an instruction performing the same operation, but using
3046	// the [Reg, Reg] addressing mode.
3047	static unsigned regOffsetOpcode(unsigned Opcode) {
3048	switch (Opcode) {
3049	default:
3050	llvm_unreachable("Address folding not implemented for instruction");
3051
3052	case AArch64::LDURQi:
3053	case AArch64::LDRQui:
3054	return AArch64::LDRQroX;
3055	case AArch64::STURQi:
3056	case AArch64::STRQui:
3057	return AArch64::STRQroX;
3058	case AArch64::LDURDi:
3059	case AArch64::LDRDui:
3060	return AArch64::LDRDroX;
3061	case AArch64::STURDi:
3062	case AArch64::STRDui:
3063	return AArch64::STRDroX;
3064	case AArch64::LDURXi:
3065	case AArch64::LDRXui:
3066	return AArch64::LDRXroX;
3067	case AArch64::STURXi:
3068	case AArch64::STRXui:
3069	return AArch64::STRXroX;
3070	case AArch64::LDURWi:
3071	case AArch64::LDRWui:
3072	return AArch64::LDRWroX;
3073	case AArch64::LDURSWi:
3074	case AArch64::LDRSWui:
3075	return AArch64::LDRSWroX;
3076	case AArch64::STURWi:
3077	case AArch64::STRWui:
3078	return AArch64::STRWroX;
3079	case AArch64::LDURHi:
3080	case AArch64::LDRHui:
3081	return AArch64::LDRHroX;
3082	case AArch64::STURHi:
3083	case AArch64::STRHui:
3084	return AArch64::STRHroX;
3085	case AArch64::LDURHHi:
3086	case AArch64::LDRHHui:
3087	return AArch64::LDRHHroX;
3088	case AArch64::STURHHi:
3089	case AArch64::STRHHui:
3090	return AArch64::STRHHroX;
3091	case AArch64::LDURSHXi:
3092	case AArch64::LDRSHXui:
3093	return AArch64::LDRSHXroX;
3094	case AArch64::LDURSHWi:
3095	case AArch64::LDRSHWui:
3096	return AArch64::LDRSHWroX;
3097	case AArch64::LDURBi:
3098	case AArch64::LDRBui:
3099	return AArch64::LDRBroX;
3100	case AArch64::LDURBBi:
3101	case AArch64::LDRBBui:
3102	return AArch64::LDRBBroX;
3103	case AArch64::LDURSBXi:
3104	case AArch64::LDRSBXui:
3105	return AArch64::LDRSBXroX;
3106	case AArch64::LDURSBWi:
3107	case AArch64::LDRSBWui:
3108	return AArch64::LDRSBWroX;
3109	case AArch64::STURBi:
3110	case AArch64::STRBui:
3111	return AArch64::STRBroX;
3112	case AArch64::STURBBi:
3113	case AArch64::STRBBui:
3114	return AArch64::STRBBroX;
3115	}
3116	}
3117
3118	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3119	// the opcode of an instruction performing the same operation, but using the
3120	// [Reg, #Imm] addressing mode with scaled offset.
3121	unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3122	switch (Opcode) {
3123	default:
3124	llvm_unreachable("Address folding not implemented for instruction");
3125
3126	case AArch64::LDURQi:
3127	Scale = `16`;
3128	return AArch64::LDRQui;
3129	case AArch64::STURQi:
3130	Scale = `16`;
3131	return AArch64::STRQui;
3132	case AArch64::LDURDi:
3133	Scale = `8`;
3134	return AArch64::LDRDui;
3135	case AArch64::STURDi:
3136	Scale = `8`;
3137	return AArch64::STRDui;
3138	case AArch64::LDURXi:
3139	Scale = `8`;
3140	return AArch64::LDRXui;
3141	case AArch64::STURXi:
3142	Scale = `8`;
3143	return AArch64::STRXui;
3144	case AArch64::LDURWi:
3145	Scale = `4`;
3146	return AArch64::LDRWui;
3147	case AArch64::LDURSWi:
3148	Scale = `4`;
3149	return AArch64::LDRSWui;
3150	case AArch64::STURWi:
3151	Scale = `4`;
3152	return AArch64::STRWui;
3153	case AArch64::LDURHi:
3154	Scale = `2`;
3155	return AArch64::LDRHui;
3156	case AArch64::STURHi:
3157	Scale = `2`;
3158	return AArch64::STRHui;
3159	case AArch64::LDURHHi:
3160	Scale = `2`;
3161	return AArch64::LDRHHui;
3162	case AArch64::STURHHi:
3163	Scale = `2`;
3164	return AArch64::STRHHui;
3165	case AArch64::LDURSHXi:
3166	Scale = `2`;
3167	return AArch64::LDRSHXui;
3168	case AArch64::LDURSHWi:
3169	Scale = `2`;
3170	return AArch64::LDRSHWui;
3171	case AArch64::LDURBi:
3172	Scale = `1`;
3173	return AArch64::LDRBui;
3174	case AArch64::LDURBBi:
3175	Scale = `1`;
3176	return AArch64::LDRBBui;
3177	case AArch64::LDURSBXi:
3178	Scale = `1`;
3179	return AArch64::LDRSBXui;
3180	case AArch64::LDURSBWi:
3181	Scale = `1`;
3182	return AArch64::LDRSBWui;
3183	case AArch64::STURBi:
3184	Scale = `1`;
3185	return AArch64::STRBui;
3186	case AArch64::STURBBi:
3187	Scale = `1`;
3188	return AArch64::STRBBui;
3189	case AArch64::LDRQui:
3190	case AArch64::STRQui:
3191	Scale = `16`;
3192	return Opcode;
3193	case AArch64::LDRDui:
3194	case AArch64::STRDui:
3195	case AArch64::LDRXui:
3196	case AArch64::STRXui:
3197	Scale = `8`;
3198	return Opcode;
3199	case AArch64::LDRWui:
3200	case AArch64::LDRSWui:
3201	case AArch64::STRWui:
3202	Scale = `4`;
3203	return Opcode;
3204	case AArch64::LDRHui:
3205	case AArch64::STRHui:
3206	case AArch64::LDRHHui:
3207	case AArch64::STRHHui:
3208	case AArch64::LDRSHXui:
3209	case AArch64::LDRSHWui:
3210	Scale = `2`;
3211	return Opcode;
3212	case AArch64::LDRBui:
3213	case AArch64::LDRBBui:
3214	case AArch64::LDRSBXui:
3215	case AArch64::LDRSBWui:
3216	case AArch64::STRBui:
3217	case AArch64::STRBBui:
3218	Scale = `1`;
3219	return Opcode;
3220	}
3221	}
3222
3223	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3224	// the opcode of an instruction performing the same operation, but using the
3225	// [Reg, #Imm] addressing mode with unscaled offset.
3226	unsigned unscaledOffsetOpcode(unsigned Opcode) {
3227	switch (Opcode) {
3228	default:
3229	llvm_unreachable("Address folding not implemented for instruction");
3230
3231	case AArch64::LDURQi:
3232	case AArch64::STURQi:
3233	case AArch64::LDURDi:
3234	case AArch64::STURDi:
3235	case AArch64::LDURXi:
3236	case AArch64::STURXi:
3237	case AArch64::LDURWi:
3238	case AArch64::LDURSWi:
3239	case AArch64::STURWi:
3240	case AArch64::LDURHi:
3241	case AArch64::STURHi:
3242	case AArch64::LDURHHi:
3243	case AArch64::STURHHi:
3244	case AArch64::LDURSHXi:
3245	case AArch64::LDURSHWi:
3246	case AArch64::LDURBi:
3247	case AArch64::STURBi:
3248	case AArch64::LDURBBi:
3249	case AArch64::STURBBi:
3250	case AArch64::LDURSBWi:
3251	case AArch64::LDURSBXi:
3252	return Opcode;
3253	case AArch64::LDRQui:
3254	return AArch64::LDURQi;
3255	case AArch64::STRQui:
3256	return AArch64::STURQi;
3257	case AArch64::LDRDui:
3258	return AArch64::LDURDi;
3259	case AArch64::STRDui:
3260	return AArch64::STURDi;
3261	case AArch64::LDRXui:
3262	return AArch64::LDURXi;
3263	case AArch64::STRXui:
3264	return AArch64::STURXi;
3265	case AArch64::LDRWui:
3266	return AArch64::LDURWi;
3267	case AArch64::LDRSWui:
3268	return AArch64::LDURSWi;
3269	case AArch64::STRWui:
3270	return AArch64::STURWi;
3271	case AArch64::LDRHui:
3272	return AArch64::LDURHi;
3273	case AArch64::STRHui:
3274	return AArch64::STURHi;
3275	case AArch64::LDRHHui:
3276	return AArch64::LDURHHi;
3277	case AArch64::STRHHui:
3278	return AArch64::STURHHi;
3279	case AArch64::LDRSHXui:
3280	return AArch64::LDURSHXi;
3281	case AArch64::LDRSHWui:
3282	return AArch64::LDURSHWi;
3283	case AArch64::LDRBBui:
3284	return AArch64::LDURBBi;
3285	case AArch64::LDRBui:
3286	return AArch64::LDURBi;
3287	case AArch64::STRBBui:
3288	return AArch64::STURBBi;
3289	case AArch64::STRBui:
3290	return AArch64::STURBi;
3291	case AArch64::LDRSBWui:
3292	return AArch64::LDURSBWi;
3293	case AArch64::LDRSBXui:
3294	return AArch64::LDURSBXi;
3295	}
3296	}
3297
3298	// Given the opcode of a memory load/store instruction, return the opcode of an
3299	// instruction performing the same operation, but using
3300	// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3301	// offset register.
3302	static unsigned offsetExtendOpcode(unsigned Opcode) {
3303	switch (Opcode) {
3304	default:
3305	llvm_unreachable("Address folding not implemented for instruction");
3306
3307	case AArch64::LDRQroX:
3308	case AArch64::LDURQi:
3309	case AArch64::LDRQui:
3310	return AArch64::LDRQroW;
3311	case AArch64::STRQroX:
3312	case AArch64::STURQi:
3313	case AArch64::STRQui:
3314	return AArch64::STRQroW;
3315	case AArch64::LDRDroX:
3316	case AArch64::LDURDi:
3317	case AArch64::LDRDui:
3318	return AArch64::LDRDroW;
3319	case AArch64::STRDroX:
3320	case AArch64::STURDi:
3321	case AArch64::STRDui:
3322	return AArch64::STRDroW;
3323	case AArch64::LDRXroX:
3324	case AArch64::LDURXi:
3325	case AArch64::LDRXui:
3326	return AArch64::LDRXroW;
3327	case AArch64::STRXroX:
3328	case AArch64::STURXi:
3329	case AArch64::STRXui:
3330	return AArch64::STRXroW;
3331	case AArch64::LDRWroX:
3332	case AArch64::LDURWi:
3333	case AArch64::LDRWui:
3334	return AArch64::LDRWroW;
3335	case AArch64::LDRSWroX:
3336	case AArch64::LDURSWi:
3337	case AArch64::LDRSWui:
3338	return AArch64::LDRSWroW;
3339	case AArch64::STRWroX:
3340	case AArch64::STURWi:
3341	case AArch64::STRWui:
3342	return AArch64::STRWroW;
3343	case AArch64::LDRHroX:
3344	case AArch64::LDURHi:
3345	case AArch64::LDRHui:
3346	return AArch64::LDRHroW;
3347	case AArch64::STRHroX:
3348	case AArch64::STURHi:
3349	case AArch64::STRHui:
3350	return AArch64::STRHroW;
3351	case AArch64::LDRHHroX:
3352	case AArch64::LDURHHi:
3353	case AArch64::LDRHHui:
3354	return AArch64::LDRHHroW;
3355	case AArch64::STRHHroX:
3356	case AArch64::STURHHi:
3357	case AArch64::STRHHui:
3358	return AArch64::STRHHroW;
3359	case AArch64::LDRSHXroX:
3360	case AArch64::LDURSHXi:
3361	case AArch64::LDRSHXui:
3362	return AArch64::LDRSHXroW;
3363	case AArch64::LDRSHWroX:
3364	case AArch64::LDURSHWi:
3365	case AArch64::LDRSHWui:
3366	return AArch64::LDRSHWroW;
3367	case AArch64::LDRBroX:
3368	case AArch64::LDURBi:
3369	case AArch64::LDRBui:
3370	return AArch64::LDRBroW;
3371	case AArch64::LDRBBroX:
3372	case AArch64::LDURBBi:
3373	case AArch64::LDRBBui:
3374	return AArch64::LDRBBroW;
3375	case AArch64::LDRSBXroX:
3376	case AArch64::LDURSBXi:
3377	case AArch64::LDRSBXui:
3378	return AArch64::LDRSBXroW;
3379	case AArch64::LDRSBWroX:
3380	case AArch64::LDURSBWi:
3381	case AArch64::LDRSBWui:
3382	return AArch64::LDRSBWroW;
3383	case AArch64::STRBroX:
3384	case AArch64::STURBi:
3385	case AArch64::STRBui:
3386	return AArch64::STRBroW;
3387	case AArch64::STRBBroX:
3388	case AArch64::STURBBi:
3389	case AArch64::STRBBui:
3390	return AArch64::STRBBroW;
3391	}
3392	}
3393
3394	MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
3395	const ExtAddrMode &AM) const {
3396
3397	const DebugLoc &DL = MemI.getDebugLoc();
3398	MachineBasicBlock &MBB = *MemI.getParent();
3399	MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
3400
3401	if (AM.Form == ExtAddrMode::Formula::Basic) {
3402	if (AM.ScaledReg) {
3403	// The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3404	unsigned Opcode = regOffsetOpcode(Opcode: MemI.getOpcode());
3405	MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3406	auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3407	.addReg(MemI.getOperand(i: `0`).getReg(),
3408	MemI.mayLoad() ? RegState::Define : `0`)
3409	.addReg(AM.BaseReg)
3410	.addReg(AM.ScaledReg)
3411	.addImm(`0`)
3412	.addImm(AM.Scale > `1`)
3413	.setMemRefs(MemI.memoperands())
3414	.setMIFlags(MemI.getFlags());
3415	return B.getInstr();
3416	}
3417
3418	assert(AM.ScaledReg == `0` && AM.Scale == `0` &&
3419	"Addressing mode not supported for folding");
3420
3421	// The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3422	unsigned Scale = `1`;
3423	unsigned Opcode = MemI.getOpcode();
3424	if (isInt<`9`>(x: AM.Displacement))
3425	Opcode = unscaledOffsetOpcode(Opcode);
3426	else
3427	Opcode = scaledOffsetOpcode(Opcode, Scale);
3428
3429	auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3430	.addReg(MemI.getOperand(i: `0`).getReg(),
3431	MemI.mayLoad() ? RegState::Define : `0`)
3432	.addReg(AM.BaseReg)
3433	.addImm(AM.Displacement / Scale)
3434	.setMemRefs(MemI.memoperands())
3435	.setMIFlags(MemI.getFlags());
3436	return B.getInstr();
3437	}
3438
3439	if (AM.Form == ExtAddrMode::Formula::SExtScaledReg \|\|
3440	AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
3441	// The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3442	assert(AM.ScaledReg && !AM.Displacement &&
3443	"Address offset can be a register or an immediate, but not both");
3444	unsigned Opcode = offsetExtendOpcode(Opcode: MemI.getOpcode());
3445	MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3446	// Make sure the offset register is in the correct register class.
3447	Register OffsetReg = AM.ScaledReg;
3448	const TargetRegisterClass *RC = MRI.getRegClass(Reg: OffsetReg);
3449	if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3450	OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3451	BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3452	.addReg(AM.ScaledReg, `0`, AArch64::sub_32);
3453	}
3454	auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3455	.addReg(MemI.getOperand(i: `0`).getReg(),
3456	MemI.mayLoad() ? RegState::Define : `0`)
3457	.addReg(AM.BaseReg)
3458	.addReg(OffsetReg)
3459	.addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
3460	.addImm(AM.Scale != `1`)
3461	.setMemRefs(MemI.memoperands())
3462	.setMIFlags(MemI.getFlags());
3463
3464	return B.getInstr();
3465	}
3466
3467	llvm_unreachable(
3468	"Function must not be called with an addressing mode it can't handle");
3469	}
3470
3471	bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
3472	const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3473	bool &OffsetIsScalable, TypeSize &Width,
3474	const TargetRegisterInfo TRI) const* {
3475	assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3476	// Handle only loads/stores with base register followed by immediate offset.
3477	if (LdSt.getNumExplicitOperands() == `3`) {
3478	// Non-paired instruction (e.g., ldr x1, [x0, #8]).
3479	if ((!LdSt.getOperand(i: `1`).isReg() && !LdSt.getOperand(i: `1`).isFI()) \|\|
3480	!LdSt.getOperand(i: `2`).isImm())
3481	return false;
3482	} else if (LdSt.getNumExplicitOperands() == `4`) {
3483	// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3484	if (!LdSt.getOperand(i: `1`).isReg() \|\|
3485	(!LdSt.getOperand(i: `2`).isReg() && !LdSt.getOperand(i: `2`).isFI()) \|\|
3486	!LdSt.getOperand(i: `3`).isImm())
3487	return false;
3488	} else
3489	return false;
3490
3491	// Get the scaling factor for the instruction and set the width for the
3492	// instruction.
3493	TypeSize Scale(`0U`, false);
3494	int64_t Dummy1, Dummy2;
3495
3496	// If this returns false, then it's an instruction we don't want to handle.
3497	if (!getMemOpInfo(Opcode: LdSt.getOpcode(), Scale, Width, MinOffset&: Dummy1, MaxOffset&: Dummy2))
3498	return false;
3499
3500	// Compute the offset. Offset is calculated as the immediate operand
3501	// multiplied by the scaling factor. Unscaled instructions have scaling factor
3502	// set to 1.
3503	if (LdSt.getNumExplicitOperands() == `3`) {
3504	BaseOp = &LdSt.getOperand(i: `1`);
3505	Offset = LdSt.getOperand(i: `2`).getImm() * Scale.getKnownMinValue();
3506	} else {
3507	assert(LdSt.getNumExplicitOperands() == `4` && "invalid number of operands");
3508	BaseOp = &LdSt.getOperand(i: `2`);
3509	Offset = LdSt.getOperand(i: `3`).getImm() * Scale.getKnownMinValue();
3510	}
3511	OffsetIsScalable = Scale.isScalable();
3512
3513	if (!BaseOp->isReg() && !BaseOp->isFI())
3514	return false;
3515
3516	return true;
3517	}
3518
3519	MachineOperand &
3520	AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
3521	assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3522	MachineOperand &OfsOp = LdSt.getOperand(i: LdSt.getNumExplicitOperands() - `1`);
3523	assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3524	return OfsOp;
3525	}
3526
3527	bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3528	TypeSize &Width, int64_t &MinOffset,
3529	int64_t &MaxOffset) {
3530	switch (Opcode) {
3531	// Not a memory operation or something we want to handle.
3532	default:
3533	Scale = TypeSize::getFixed(ExactSize: `0`);
3534	Width = TypeSize::getFixed(ExactSize: `0`);
3535	MinOffset = MaxOffset = `0`;
3536	return false;
3537	case AArch64::STRWpost:
3538	case AArch64::LDRWpost:
3539	Width = TypeSize::getFixed(ExactSize: `32`);
3540	Scale = TypeSize::getFixed(ExactSize: `4`);
3541	MinOffset = -`256`;
3542	MaxOffset = `255`;
3543	break;
3544	case AArch64::LDURQi:
3545	case AArch64::STURQi:
3546	Width = TypeSize::getFixed(ExactSize: `16`);
3547	Scale = TypeSize::getFixed(ExactSize: `1`);
3548	MinOffset = -`256`;
3549	MaxOffset = `255`;
3550	break;
3551	case AArch64::PRFUMi:
3552	case AArch64::LDURXi:
3553	case AArch64::LDURDi:
3554	case AArch64::LDAPURXi:
3555	case AArch64::STURXi:
3556	case AArch64::STURDi:
3557	case AArch64::STLURXi:
3558	Width = TypeSize::getFixed(ExactSize: `8`);
3559	Scale = TypeSize::getFixed(ExactSize: `1`);
3560	MinOffset = -`256`;
3561	MaxOffset = `255`;
3562	break;
3563	case AArch64::LDURWi:
3564	case AArch64::LDURSi:
3565	case AArch64::LDURSWi:
3566	case AArch64::LDAPURi:
3567	case AArch64::LDAPURSWi:
3568	case AArch64::STURWi:
3569	case AArch64::STURSi:
3570	case AArch64::STLURWi:
3571	Width = TypeSize::getFixed(ExactSize: `4`);
3572	Scale = TypeSize::getFixed(ExactSize: `1`);
3573	MinOffset = -`256`;
3574	MaxOffset = `255`;
3575	break;
3576	case AArch64::LDURHi:
3577	case AArch64::LDURHHi:
3578	case AArch64::LDURSHXi:
3579	case AArch64::LDURSHWi:
3580	case AArch64::LDAPURHi:
3581	case AArch64::LDAPURSHWi:
3582	case AArch64::LDAPURSHXi:
3583	case AArch64::STURHi:
3584	case AArch64::STURHHi:
3585	case AArch64::STLURHi:
3586	Width = TypeSize::getFixed(ExactSize: `2`);
3587	Scale = TypeSize::getFixed(ExactSize: `1`);
3588	MinOffset = -`256`;
3589	MaxOffset = `255`;
3590	break;
3591	case AArch64::LDURBi:
3592	case AArch64::LDURBBi:
3593	case AArch64::LDURSBXi:
3594	case AArch64::LDURSBWi:
3595	case AArch64::LDAPURBi:
3596	case AArch64::LDAPURSBWi:
3597	case AArch64::LDAPURSBXi:
3598	case AArch64::STURBi:
3599	case AArch64::STURBBi:
3600	case AArch64::STLURBi:
3601	Width = TypeSize::getFixed(ExactSize: `1`);
3602	Scale = TypeSize::getFixed(ExactSize: `1`);
3603	MinOffset = -`256`;
3604	MaxOffset = `255`;
3605	break;
3606	case AArch64::LDPQi:
3607	case AArch64::LDNPQi:
3608	case AArch64::STPQi:
3609	case AArch64::STNPQi:
3610	Scale = TypeSize::getFixed(ExactSize: `16`);
3611	Width = TypeSize::getFixed(ExactSize: `32`);
3612	MinOffset = -`64`;
3613	MaxOffset = `63`;
3614	break;
3615	case AArch64::LDRQui:
3616	case AArch64::STRQui:
3617	Scale = TypeSize::getFixed(ExactSize: `16`);
3618	Width = TypeSize::getFixed(ExactSize: `16`);
3619	MinOffset = `0`;
3620	MaxOffset = `4095`;
3621	break;
3622	case AArch64::LDPXi:
3623	case AArch64::LDPDi:
3624	case AArch64::LDNPXi:
3625	case AArch64::LDNPDi:
3626	case AArch64::STPXi:
3627	case AArch64::STPDi:
3628	case AArch64::STNPXi:
3629	case AArch64::STNPDi:
3630	Scale = TypeSize::getFixed(ExactSize: `8`);
3631	Width = TypeSize::getFixed(ExactSize: `16`);
3632	MinOffset = -`64`;
3633	MaxOffset = `63`;
3634	break;
3635	case AArch64::PRFMui:
3636	case AArch64::LDRXui:
3637	case AArch64::LDRDui:
3638	case AArch64::STRXui:
3639	case AArch64::STRDui:
3640	Scale = TypeSize::getFixed(ExactSize: `8`);
3641	Width = TypeSize::getFixed(ExactSize: `8`);
3642	MinOffset = `0`;
3643	MaxOffset = `4095`;
3644	break;
3645	case AArch64::StoreSwiftAsyncContext:
3646	// Store is an STRXui, but there might be an ADDXri in the expansion too.
3647	Scale = TypeSize::getFixed(ExactSize: `1`);
3648	Width = TypeSize::getFixed(ExactSize: `8`);
3649	MinOffset = `0`;
3650	MaxOffset = `4095`;
3651	break;
3652	case AArch64::LDPWi:
3653	case AArch64::LDPSi:
3654	case AArch64::LDNPWi:
3655	case AArch64::LDNPSi:
3656	case AArch64::STPWi:
3657	case AArch64::STPSi:
3658	case AArch64::STNPWi:
3659	case AArch64::STNPSi:
3660	Scale = TypeSize::getFixed(ExactSize: `4`);
3661	Width = TypeSize::getFixed(ExactSize: `8`);
3662	MinOffset = -`64`;
3663	MaxOffset = `63`;
3664	break;
3665	case AArch64::LDRWui:
3666	case AArch64::LDRSui:
3667	case AArch64::LDRSWui:
3668	case AArch64::STRWui:
3669	case AArch64::STRSui:
3670	Scale = TypeSize::getFixed(ExactSize: `4`);
3671	Width = TypeSize::getFixed(ExactSize: `4`);
3672	MinOffset = `0`;
3673	MaxOffset = `4095`;
3674	break;
3675	case AArch64::LDRHui:
3676	case AArch64::LDRHHui:
3677	case AArch64::LDRSHWui:
3678	case AArch64::LDRSHXui:
3679	case AArch64::STRHui:
3680	case AArch64::STRHHui:
3681	Scale = TypeSize::getFixed(ExactSize: `2`);
3682	Width = TypeSize::getFixed(ExactSize: `2`);
3683	MinOffset = `0`;
3684	MaxOffset = `4095`;
3685	break;
3686	case AArch64::LDRBui:
3687	case AArch64::LDRBBui:
3688	case AArch64::LDRSBWui:
3689	case AArch64::LDRSBXui:
3690	case AArch64::STRBui:
3691	case AArch64::STRBBui:
3692	Scale = TypeSize::getFixed(ExactSize: `1`);
3693	Width = TypeSize::getFixed(ExactSize: `1`);
3694	MinOffset = `0`;
3695	MaxOffset = `4095`;
3696	break;
3697	case AArch64::STPXpre:
3698	case AArch64::LDPXpost:
3699	case AArch64::STPDpre:
3700	case AArch64::LDPDpost:
3701	Scale = TypeSize::getFixed(ExactSize: `8`);
3702	Width = TypeSize::getFixed(ExactSize: `8`);
3703	MinOffset = -`512`;
3704	MaxOffset = `504`;
3705	break;
3706	case AArch64::STPQpre:
3707	case AArch64::LDPQpost:
3708	Scale = TypeSize::getFixed(ExactSize: `16`);
3709	Width = TypeSize::getFixed(ExactSize: `16`);
3710	MinOffset = -`1024`;
3711	MaxOffset = `1008`;
3712	break;
3713	case AArch64::STRXpre:
3714	case AArch64::STRDpre:
3715	case AArch64::LDRXpost:
3716	case AArch64::LDRDpost:
3717	Scale = TypeSize::getFixed(ExactSize: `1`);
3718	Width = TypeSize::getFixed(ExactSize: `8`);
3719	MinOffset = -`256`;
3720	MaxOffset = `255`;
3721	break;
3722	case AArch64::STRQpre:
3723	case AArch64::LDRQpost:
3724	Scale = TypeSize::getFixed(ExactSize: `1`);
3725	Width = TypeSize::getFixed(ExactSize: `16`);
3726	MinOffset = -`256`;
3727	MaxOffset = `255`;
3728	break;
3729	case AArch64::ADDG:
3730	Scale = TypeSize::getFixed(ExactSize: `16`);
3731	Width = TypeSize::getFixed(ExactSize: `0`);
3732	MinOffset = `0`;
3733	MaxOffset = `63`;
3734	break;
3735	case AArch64::TAGPstack:
3736	Scale = TypeSize::getFixed(ExactSize: `16`);
3737	Width = TypeSize::getFixed(ExactSize: `0`);
3738	// TAGP with a negative offset turns into SUBP, which has a maximum offset
3739	// of 63 (not 64!).
3740	MinOffset = -`63`;
3741	MaxOffset = `63`;
3742	break;
3743	case AArch64::LDG:
3744	case AArch64::STGi:
3745	case AArch64::STZGi:
3746	Scale = TypeSize::getFixed(ExactSize: `16`);
3747	Width = TypeSize::getFixed(ExactSize: `16`);
3748	MinOffset = -`256`;
3749	MaxOffset = `255`;
3750	break;
3751	case AArch64::STR_ZZZZXI:
3752	case AArch64::LDR_ZZZZXI:
3753	Scale = TypeSize::getScalable(MinimumSize: `16`);
3754	Width = TypeSize::getScalable(MinimumSize: `16` * `4`);
3755	MinOffset = -`256`;
3756	MaxOffset = `252`;
3757	break;
3758	case AArch64::STR_ZZZXI:
3759	case AArch64::LDR_ZZZXI:
3760	Scale = TypeSize::getScalable(MinimumSize: `16`);
3761	Width = TypeSize::getScalable(MinimumSize: `16` * `3`);
3762	MinOffset = -`256`;
3763	MaxOffset = `253`;
3764	break;
3765	case AArch64::STR_ZZXI:
3766	case AArch64::LDR_ZZXI:
3767	Scale = TypeSize::getScalable(MinimumSize: `16`);
3768	Width = TypeSize::getScalable(MinimumSize: `16` * `2`);
3769	MinOffset = -`256`;
3770	MaxOffset = `254`;
3771	break;
3772	case AArch64::LDR_PXI:
3773	case AArch64::STR_PXI:
3774	Scale = TypeSize::getScalable(MinimumSize: `2`);
3775	Width = TypeSize::getScalable(MinimumSize: `2`);
3776	MinOffset = -`256`;
3777	MaxOffset = `255`;
3778	break;
3779	case AArch64::LDR_PPXI:
3780	case AArch64::STR_PPXI:
3781	Scale = TypeSize::getScalable(MinimumSize: `2`);
3782	Width = TypeSize::getScalable(MinimumSize: `2` * `2`);
3783	MinOffset = -`256`;
3784	MaxOffset = `254`;
3785	break;
3786	case AArch64::LDR_ZXI:
3787	case AArch64::STR_ZXI:
3788	Scale = TypeSize::getScalable(MinimumSize: `16`);
3789	Width = TypeSize::getScalable(MinimumSize: `16`);
3790	MinOffset = -`256`;
3791	MaxOffset = `255`;
3792	break;
3793	case AArch64::LD1B_IMM:
3794	case AArch64::LD1H_IMM:
3795	case AArch64::LD1W_IMM:
3796	case AArch64::LD1D_IMM:
3797	case AArch64::LDNT1B_ZRI:
3798	case AArch64::LDNT1H_ZRI:
3799	case AArch64::LDNT1W_ZRI:
3800	case AArch64::LDNT1D_ZRI:
3801	case AArch64::ST1B_IMM:
3802	case AArch64::ST1H_IMM:
3803	case AArch64::ST1W_IMM:
3804	case AArch64::ST1D_IMM:
3805	case AArch64::STNT1B_ZRI:
3806	case AArch64::STNT1H_ZRI:
3807	case AArch64::STNT1W_ZRI:
3808	case AArch64::STNT1D_ZRI:
3809	case AArch64::LDNF1B_IMM:
3810	case AArch64::LDNF1H_IMM:
3811	case AArch64::LDNF1W_IMM:
3812	case AArch64::LDNF1D_IMM:
3813	// A full vectors worth of data
3814	// Width = mbytes elements*
3815	Scale = TypeSize::getScalable(MinimumSize: `16`);
3816	Width = TypeSize::getScalable(MinimumSize: `16`);
3817	MinOffset = -`8`;
3818	MaxOffset = `7`;
3819	break;
3820	case AArch64::LD2B_IMM:
3821	case AArch64::LD2H_IMM:
3822	case AArch64::LD2W_IMM:
3823	case AArch64::LD2D_IMM:
3824	case AArch64::ST2B_IMM:
3825	case AArch64::ST2H_IMM:
3826	case AArch64::ST2W_IMM:
3827	case AArch64::ST2D_IMM:
3828	Scale = TypeSize::getScalable(MinimumSize: `32`);
3829	Width = TypeSize::getScalable(MinimumSize: `16` * `2`);
3830	MinOffset = -`8`;
3831	MaxOffset = `7`;
3832	break;
3833	case AArch64::LD3B_IMM:
3834	case AArch64::LD3H_IMM:
3835	case AArch64::LD3W_IMM:
3836	case AArch64::LD3D_IMM:
3837	case AArch64::ST3B_IMM:
3838	case AArch64::ST3H_IMM:
3839	case AArch64::ST3W_IMM:
3840	case AArch64::ST3D_IMM:
3841	Scale = TypeSize::getScalable(MinimumSize: `48`);
3842	Width = TypeSize::getScalable(MinimumSize: `16` * `3`);
3843	MinOffset = -`8`;
3844	MaxOffset = `7`;
3845	break;
3846	case AArch64::LD4B_IMM:
3847	case AArch64::LD4H_IMM:
3848	case AArch64::LD4W_IMM:
3849	case AArch64::LD4D_IMM:
3850	case AArch64::ST4B_IMM:
3851	case AArch64::ST4H_IMM:
3852	case AArch64::ST4W_IMM:
3853	case AArch64::ST4D_IMM:
3854	Scale = TypeSize::getScalable(MinimumSize: `64`);
3855	Width = TypeSize::getScalable(MinimumSize: `16` * `4`);
3856	MinOffset = -`8`;
3857	MaxOffset = `7`;
3858	break;
3859	case AArch64::LD1B_H_IMM:
3860	case AArch64::LD1SB_H_IMM:
3861	case AArch64::LD1H_S_IMM:
3862	case AArch64::LD1SH_S_IMM:
3863	case AArch64::LD1W_D_IMM:
3864	case AArch64::LD1SW_D_IMM:
3865	case AArch64::ST1B_H_IMM:
3866	case AArch64::ST1H_S_IMM:
3867	case AArch64::ST1W_D_IMM:
3868	case AArch64::LDNF1B_H_IMM:
3869	case AArch64::LDNF1SB_H_IMM:
3870	case AArch64::LDNF1H_S_IMM:
3871	case AArch64::LDNF1SH_S_IMM:
3872	case AArch64::LDNF1W_D_IMM:
3873	case AArch64::LDNF1SW_D_IMM:
3874	// A half vector worth of data
3875	// Width = mbytes elements*
3876	Scale = TypeSize::getScalable(MinimumSize: `8`);
3877	Width = TypeSize::getScalable(MinimumSize: `8`);
3878	MinOffset = -`8`;
3879	MaxOffset = `7`;
3880	break;
3881	case AArch64::LD1B_S_IMM:
3882	case AArch64::LD1SB_S_IMM:
3883	case AArch64::LD1H_D_IMM:
3884	case AArch64::LD1SH_D_IMM:
3885	case AArch64::ST1B_S_IMM:
3886	case AArch64::ST1H_D_IMM:
3887	case AArch64::LDNF1B_S_IMM:
3888	case AArch64::LDNF1SB_S_IMM:
3889	case AArch64::LDNF1H_D_IMM:
3890	case AArch64::LDNF1SH_D_IMM:
3891	// A quarter vector worth of data
3892	// Width = mbytes elements*
3893	Scale = TypeSize::getScalable(MinimumSize: `4`);
3894	Width = TypeSize::getScalable(MinimumSize: `4`);
3895	MinOffset = -`8`;
3896	MaxOffset = `7`;
3897	break;
3898	case AArch64::LD1B_D_IMM:
3899	case AArch64::LD1SB_D_IMM:
3900	case AArch64::ST1B_D_IMM:
3901	case AArch64::LDNF1B_D_IMM:
3902	case AArch64::LDNF1SB_D_IMM:
3903	// A eighth vector worth of data
3904	// Width = mbytes elements*
3905	Scale = TypeSize::getScalable(MinimumSize: `2`);
3906	Width = TypeSize::getScalable(MinimumSize: `2`);
3907	MinOffset = -`8`;
3908	MaxOffset = `7`;
3909	break;
3910	case AArch64::ST2Gi:
3911	case AArch64::STZ2Gi:
3912	Scale = TypeSize::getFixed(ExactSize: `16`);
3913	Width = TypeSize::getFixed(ExactSize: `32`);
3914	MinOffset = -`256`;
3915	MaxOffset = `255`;
3916	break;
3917	case AArch64::STGPi:
3918	Scale = TypeSize::getFixed(ExactSize: `16`);
3919	Width = TypeSize::getFixed(ExactSize: `16`);
3920	MinOffset = -`64`;
3921	MaxOffset = `63`;
3922	break;
3923	case AArch64::LD1RB_IMM:
3924	case AArch64::LD1RB_H_IMM:
3925	case AArch64::LD1RB_S_IMM:
3926	case AArch64::LD1RB_D_IMM:
3927	case AArch64::LD1RSB_H_IMM:
3928	case AArch64::LD1RSB_S_IMM:
3929	case AArch64::LD1RSB_D_IMM:
3930	Scale = TypeSize::getFixed(ExactSize: `1`);
3931	Width = TypeSize::getFixed(ExactSize: `1`);
3932	MinOffset = `0`;
3933	MaxOffset = `63`;
3934	break;
3935	case AArch64::LD1RH_IMM:
3936	case AArch64::LD1RH_S_IMM:
3937	case AArch64::LD1RH_D_IMM:
3938	case AArch64::LD1RSH_S_IMM:
3939	case AArch64::LD1RSH_D_IMM:
3940	Scale = TypeSize::getFixed(ExactSize: `2`);
3941	Width = TypeSize::getFixed(ExactSize: `2`);
3942	MinOffset = `0`;
3943	MaxOffset = `63`;
3944	break;
3945	case AArch64::LD1RW_IMM:
3946	case AArch64::LD1RW_D_IMM:
3947	case AArch64::LD1RSW_IMM:
3948	Scale = TypeSize::getFixed(ExactSize: `4`);
3949	Width = TypeSize::getFixed(ExactSize: `4`);
3950	MinOffset = `0`;
3951	MaxOffset = `63`;
3952	break;
3953	case AArch64::LD1RD_IMM:
3954	Scale = TypeSize::getFixed(ExactSize: `8`);
3955	Width = TypeSize::getFixed(ExactSize: `8`);
3956	MinOffset = `0`;
3957	MaxOffset = `63`;
3958	break;
3959	}
3960
3961	return true;
3962	}
3963
3964	// Scaling factor for unscaled load or store.
3965	int AArch64InstrInfo::getMemScale(unsigned Opc) {
3966	switch (Opc) {
3967	default:
3968	llvm_unreachable("Opcode has unknown scale!");
3969	case AArch64::LDRBBui:
3970	case AArch64::LDURBBi:
3971	case AArch64::LDRSBWui:
3972	case AArch64::LDURSBWi:
3973	case AArch64::STRBBui:
3974	case AArch64::STURBBi:
3975	return `1`;
3976	case AArch64::LDRHHui:
3977	case AArch64::LDURHHi:
3978	case AArch64::LDRSHWui:
3979	case AArch64::LDURSHWi:
3980	case AArch64::STRHHui:
3981	case AArch64::STURHHi:
3982	return `2`;
3983	case AArch64::LDRSui:
3984	case AArch64::LDURSi:
3985	case AArch64::LDRSpre:
3986	case AArch64::LDRSWui:
3987	case AArch64::LDURSWi:
3988	case AArch64::LDRSWpre:
3989	case AArch64::LDRWpre:
3990	case AArch64::LDRWui:
3991	case AArch64::LDURWi:
3992	case AArch64::STRSui:
3993	case AArch64::STURSi:
3994	case AArch64::STRSpre:
3995	case AArch64::STRWui:
3996	case AArch64::STURWi:
3997	case AArch64::STRWpre:
3998	case AArch64::LDPSi:
3999	case AArch64::LDPSWi:
4000	case AArch64::LDPWi:
4001	case AArch64::STPSi:
4002	case AArch64::STPWi:
4003	return `4`;
4004	case AArch64::LDRDui:
4005	case AArch64::LDURDi:
4006	case AArch64::LDRDpre:
4007	case AArch64::LDRXui:
4008	case AArch64::LDURXi:
4009	case AArch64::LDRXpre:
4010	case AArch64::STRDui:
4011	case AArch64::STURDi:
4012	case AArch64::STRDpre:
4013	case AArch64::STRXui:
4014	case AArch64::STURXi:
4015	case AArch64::STRXpre:
4016	case AArch64::LDPDi:
4017	case AArch64::LDPXi:
4018	case AArch64::STPDi:
4019	case AArch64::STPXi:
4020	return `8`;
4021	case AArch64::LDRQui:
4022	case AArch64::LDURQi:
4023	case AArch64::STRQui:
4024	case AArch64::STURQi:
4025	case AArch64::STRQpre:
4026	case AArch64::LDPQi:
4027	case AArch64::LDRQpre:
4028	case AArch64::STPQi:
4029	case AArch64::STGi:
4030	case AArch64::STZGi:
4031	case AArch64::ST2Gi:
4032	case AArch64::STZ2Gi:
4033	case AArch64::STGPi:
4034	return `16`;
4035	}
4036	}
4037
4038	bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
4039	switch (MI.getOpcode()) {
4040	default:
4041	return false;
4042	case AArch64::LDRWpre:
4043	case AArch64::LDRXpre:
4044	case AArch64::LDRSWpre:
4045	case AArch64::LDRSpre:
4046	case AArch64::LDRDpre:
4047	case AArch64::LDRQpre:
4048	return true;
4049	}
4050	}
4051
4052	bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
4053	switch (MI.getOpcode()) {
4054	default:
4055	return false;
4056	case AArch64::STRWpre:
4057	case AArch64::STRXpre:
4058	case AArch64::STRSpre:
4059	case AArch64::STRDpre:
4060	case AArch64::STRQpre:
4061	return true;
4062	}
4063	}
4064
4065	bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
4066	return isPreLd(MI) \|\| isPreSt(MI);
4067	}
4068
4069	bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
4070	switch (MI.getOpcode()) {
4071	default:
4072	return false;
4073	case AArch64::LDPSi:
4074	case AArch64::LDPSWi:
4075	case AArch64::LDPDi:
4076	case AArch64::LDPQi:
4077	case AArch64::LDPWi:
4078	case AArch64::LDPXi:
4079	case AArch64::STPSi:
4080	case AArch64::STPDi:
4081	case AArch64::STPQi:
4082	case AArch64::STPWi:
4083	case AArch64::STPXi:
4084	case AArch64::STGPi:
4085	return true;
4086	}
4087	}
4088
4089	const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
4090	unsigned Idx =
4091	AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? `2`
4092	: `1`;
4093	return MI.getOperand(i: Idx);
4094	}
4095
4096	const MachineOperand &
4097	AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
4098	unsigned Idx =
4099	AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? `3`
4100	: `2`;
4101	return MI.getOperand(i: Idx);
4102	}
4103
4104	static const TargetRegisterClass getRegClass(const* MachineInstr &MI,
4105	Register Reg) {
4106	if (MI.getParent() == nullptr)
4107	return nullptr;
4108	const MachineFunction *MF = MI.getParent()->getParent();
4109	return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4110	}
4111
4112	bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
4113	auto IsHFPR = [&](const MachineOperand &Op) {
4114	if (!Op.isReg())
4115	return false;
4116	auto Reg = Op.getReg();
4117	if (Reg.isPhysical())
4118	return AArch64::FPR16RegClass.contains(Reg);
4119	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4120	return TRC == &AArch64::FPR16RegClass \|\|
4121	TRC == &AArch64::FPR16_loRegClass;
4122	};
4123	return llvm::any_of(Range: MI.operands(), P: IsHFPR);
4124	}
4125
4126	bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
4127	auto IsQFPR = [&](const MachineOperand &Op) {
4128	if (!Op.isReg())
4129	return false;
4130	auto Reg = Op.getReg();
4131	if (Reg.isPhysical())
4132	return AArch64::FPR128RegClass.contains(Reg);
4133	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4134	return TRC == &AArch64::FPR128RegClass \|\|
4135	TRC == &AArch64::FPR128_loRegClass;
4136	};
4137	return llvm::any_of(Range: MI.operands(), P: IsQFPR);
4138	}
4139
4140	bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
4141	switch (MI.getOpcode()) {
4142	case AArch64::BRK:
4143	case AArch64::HLT:
4144	case AArch64::PACIASP:
4145	case AArch64::PACIBSP:
4146	// Implicit BTI behavior.
4147	return true;
4148	case AArch64::PAUTH_PROLOGUE:
4149	// PAUTH_PROLOGUE expands to PACI(A\|B)SP.
4150	return true;
4151	case AArch64::HINT: {
4152	unsigned Imm = MI.getOperand(i: `0`).getImm();
4153	// Explicit BTI instruction.
4154	if (Imm == `32` \|\| Imm == `34` \|\| Imm == `36` \|\| Imm == `38`)
4155	return true;
4156	// PACI(A\|B)SP instructions.
4157	if (Imm == `25` \|\| Imm == `27`)
4158	return true;
4159	return false;
4160	}
4161	default:
4162	return false;
4163	}
4164	}
4165
4166	bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
4167	auto IsFPR = [&](const MachineOperand &Op) {
4168	if (!Op.isReg())
4169	return false;
4170	auto Reg = Op.getReg();
4171	if (Reg.isPhysical())
4172	return AArch64::FPR128RegClass.contains(Reg) \|\|
4173	AArch64::FPR64RegClass.contains(Reg) \|\|
4174	AArch64::FPR32RegClass.contains(Reg) \|\|
4175	AArch64::FPR16RegClass.contains(Reg) \|\|
4176	AArch64::FPR8RegClass.contains(Reg);
4177
4178	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4179	return TRC == &AArch64::FPR128RegClass \|\|
4180	TRC == &AArch64::FPR128_loRegClass \|\|
4181	TRC == &AArch64::FPR64RegClass \|\|
4182	TRC == &AArch64::FPR64_loRegClass \|\|
4183	TRC == &AArch64::FPR32RegClass \|\| TRC == &AArch64::FPR16RegClass \|\|
4184	TRC == &AArch64::FPR8RegClass;
4185	};
4186	return llvm::any_of(Range: MI.operands(), P: IsFPR);
4187	}
4188
4189	// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4190	// scaled.
4191	static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4192	int Scale = AArch64InstrInfo::getMemScale(Opc);
4193
4194	// If the byte-offset isn't a multiple of the stride, we can't scale this
4195	// offset.
4196	if (Offset % Scale != `0`)
4197	return false;
4198
4199	// Convert the byte-offset used by unscaled into an "element" offset used
4200	// by the scaled pair load/store instructions.
4201	Offset /= Scale;
4202	return true;
4203	}
4204
4205	static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4206	if (FirstOpc == SecondOpc)
4207	return true;
4208	// We can also pair sign-ext and zero-ext instructions.
4209	switch (FirstOpc) {
4210	default:
4211	return false;
4212	case AArch64::STRSui:
4213	case AArch64::STURSi:
4214	return SecondOpc == AArch64::STRSui \|\| SecondOpc == AArch64::STURSi;
4215	case AArch64::STRDui:
4216	case AArch64::STURDi:
4217	return SecondOpc == AArch64::STRDui \|\| SecondOpc == AArch64::STURDi;
4218	case AArch64::STRQui:
4219	case AArch64::STURQi:
4220	return SecondOpc == AArch64::STRQui \|\| SecondOpc == AArch64::STURQi;
4221	case AArch64::STRWui:
4222	case AArch64::STURWi:
4223	return SecondOpc == AArch64::STRWui \|\| SecondOpc == AArch64::STURWi;
4224	case AArch64::STRXui:
4225	case AArch64::STURXi:
4226	return SecondOpc == AArch64::STRXui \|\| SecondOpc == AArch64::STURXi;
4227	case AArch64::LDRSui:
4228	case AArch64::LDURSi:
4229	return SecondOpc == AArch64::LDRSui \|\| SecondOpc == AArch64::LDURSi;
4230	case AArch64::LDRDui:
4231	case AArch64::LDURDi:
4232	return SecondOpc == AArch64::LDRDui \|\| SecondOpc == AArch64::LDURDi;
4233	case AArch64::LDRQui:
4234	case AArch64::LDURQi:
4235	return SecondOpc == AArch64::LDRQui \|\| SecondOpc == AArch64::LDURQi;
4236	case AArch64::LDRWui:
4237	case AArch64::LDURWi:
4238	return SecondOpc == AArch64::LDRSWui \|\| SecondOpc == AArch64::LDURSWi;
4239	case AArch64::LDRSWui:
4240	case AArch64::LDURSWi:
4241	return SecondOpc == AArch64::LDRWui \|\| SecondOpc == AArch64::LDURWi;
4242	case AArch64::LDRXui:
4243	case AArch64::LDURXi:
4244	return SecondOpc == AArch64::LDRXui \|\| SecondOpc == AArch64::LDURXi;
4245	}
4246	// These instructions can't be paired based on their opcodes.
4247	return false;
4248	}
4249
4250	static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4251	int64_t Offset1, unsigned Opcode1, int FI2,
4252	int64_t Offset2, unsigned Opcode2) {
4253	// Accesses through fixed stack object frame indices may access a different
4254	// fixed stack slot. Check that the object offsets + offsets match.
4255	if (MFI.isFixedObjectIndex(ObjectIdx: FI1) && MFI.isFixedObjectIndex(ObjectIdx: FI2)) {
4256	int64_t ObjectOffset1 = MFI.getObjectOffset(ObjectIdx: FI1);
4257	int64_t ObjectOffset2 = MFI.getObjectOffset(ObjectIdx: FI2);
4258	assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4259	// Convert to scaled object offsets.
4260	int Scale1 = AArch64InstrInfo::getMemScale(Opc: Opcode1);
4261	if (ObjectOffset1 % Scale1 != `0`)
4262	return false;
4263	ObjectOffset1 /= Scale1;
4264	int Scale2 = AArch64InstrInfo::getMemScale(Opc: Opcode2);
4265	if (ObjectOffset2 % Scale2 != `0`)
4266	return false;
4267	ObjectOffset2 /= Scale2;
4268	ObjectOffset1 += Offset1;
4269	ObjectOffset2 += Offset2;
4270	return ObjectOffset1 + `1` == ObjectOffset2;
4271	}
4272
4273	return FI1 == FI2;
4274	}
4275
4276	/// Detect opportunities for ldp/stp formation.
4277	///
4278	/// Only called for LdSt for which getMemOperandWithOffset returns true.
4279	bool AArch64InstrInfo::shouldClusterMemOps(
4280	ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4281	bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4282	int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4283	unsigned NumBytes) const {
4284	assert(BaseOps1.size() == `1` && BaseOps2.size() == `1`);
4285	const MachineOperand &BaseOp1 = *BaseOps1.front();
4286	const MachineOperand &BaseOp2 = *BaseOps2.front();
4287	const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4288	const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4289	if (BaseOp1.getType() != BaseOp2.getType())
4290	return false;
4291
4292	assert((BaseOp1.isReg() \|\| BaseOp1.isFI()) &&
4293	"Only base registers and frame indices are supported.");
4294
4295	// Check for both base regs and base FI.
4296	if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4297	return false;
4298
4299	// Only cluster up to a single pair.
4300	if (ClusterSize > `2`)
4301	return false;
4302
4303	if (!isPairableLdStInst(MI: FirstLdSt) \|\| !isPairableLdStInst(MI: SecondLdSt))
4304	return false;
4305
4306	// Can we pair these instructions based on their opcodes?
4307	unsigned FirstOpc = FirstLdSt.getOpcode();
4308	unsigned SecondOpc = SecondLdSt.getOpcode();
4309	if (!canPairLdStOpc(FirstOpc, SecondOpc))
4310	return false;
4311
4312	// Can't merge volatiles or load/stores that have a hint to avoid pair
4313	// formation, for example.
4314	if (!isCandidateToMergeOrPair(MI: FirstLdSt) \|\|
4315	!isCandidateToMergeOrPair(MI: SecondLdSt))
4316	return false;
4317
4318	// isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4319	int64_t Offset1 = FirstLdSt.getOperand(i: `2`).getImm();
4320	if (hasUnscaledLdStOffset(Opc: FirstOpc) && !scaleOffset(Opc: FirstOpc, Offset&: Offset1))
4321	return false;
4322
4323	int64_t Offset2 = SecondLdSt.getOperand(i: `2`).getImm();
4324	if (hasUnscaledLdStOffset(Opc: SecondOpc) && !scaleOffset(Opc: SecondOpc, Offset&: Offset2))
4325	return false;
4326
4327	// Pairwise instructions have a 7-bit signed offset field.
4328	if (Offset1 > `63` \|\| Offset1 < -`64`)
4329	return false;
4330
4331	// The caller should already have ordered First/SecondLdSt by offset.
4332	// Note: except for non-equal frame index bases
4333	if (BaseOp1.isFI()) {
4334	assert((!BaseOp1.isIdenticalTo(BaseOp2) \|\| Offset1 <= Offset2) &&
4335	"Caller should have ordered offsets.");
4336
4337	const MachineFrameInfo &MFI =
4338	FirstLdSt.getParent()->getParent()->getFrameInfo();
4339	return shouldClusterFI(MFI, FI1: BaseOp1.getIndex(), Offset1, Opcode1: FirstOpc,
4340	FI2: BaseOp2.getIndex(), Offset2, Opcode2: SecondOpc);
4341	}
4342
4343	assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4344
4345	return Offset1 + `1` == Offset2;
4346	}
4347
4348	static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
4349	unsigned Reg, unsigned SubIdx,
4350	unsigned State,
4351	const TargetRegisterInfo *TRI) {
4352	if (!SubIdx)
4353	return MIB.addReg(RegNo: Reg, flags: State);
4354
4355	if (Register::isPhysicalRegister(Reg))
4356	return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State);
4357	return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx);
4358	}
4359
4360	static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4361	unsigned NumRegs) {
4362	// We really want the positive remainder mod 32 here, that happens to be
4363	// easily obtainable with a mask.
4364	return ((DestReg - SrcReg) & `0x1f`) < NumRegs;
4365	}
4366
4367	void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
4368	MachineBasicBlock::iterator I,
4369	const DebugLoc &DL, MCRegister DestReg,
4370	MCRegister SrcReg, bool KillSrc,
4371	unsigned Opcode,
4372	ArrayRef<unsigned> Indices) const {
4373	assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4374	const TargetRegisterInfo *TRI = &getRegisterInfo();
4375	uint16_t DestEncoding = TRI->getEncodingValue(RegNo: DestReg);
4376	uint16_t SrcEncoding = TRI->getEncodingValue(RegNo: SrcReg);
4377	unsigned NumRegs = Indices.size();
4378
4379	int SubReg = `0`, End = NumRegs, Incr = `1`;
4380	if (forwardCopyWillClobberTuple(DestReg: DestEncoding, SrcReg: SrcEncoding, NumRegs)) {
4381	SubReg = NumRegs - `1`;
4382	End = -`1`;
4383	Incr = -`1`;
4384	}
4385
4386	for (; SubReg != End; SubReg += Incr) {
4387	const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4388	AddSubReg(MIB, Reg: DestReg, SubIdx: Indices [SubReg], State: RegState::Define, TRI);
4389	AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices [SubReg], State: `0`, TRI);
4390	AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices [SubReg], State: getKillRegState(B: KillSrc), TRI);
4391	}
4392	}
4393
4394	void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
4395	MachineBasicBlock::iterator I,
4396	DebugLoc DL, unsigned DestReg,
4397	unsigned SrcReg, bool KillSrc,
4398	unsigned Opcode, unsigned ZeroReg,
4399	llvm::ArrayRef<unsigned> Indices) const {
4400	const TargetRegisterInfo *TRI = &getRegisterInfo();
4401	unsigned NumRegs = Indices.size();
4402
4403	#ifndef NDEBUG
4404	uint16_t DestEncoding = TRI->getEncodingValue(RegNo: DestReg);
4405	uint16_t SrcEncoding = TRI->getEncodingValue(RegNo: SrcReg);
4406	assert(DestEncoding % NumRegs == `0` && SrcEncoding % NumRegs == `0` &&
4407	"GPR reg sequences should not be able to overlap");
4408	#endif
4409
4410	for (unsigned SubReg = `0`; SubReg != NumRegs; ++SubReg) {
4411	const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4412	AddSubReg(MIB, Reg: DestReg, SubIdx: Indices [SubReg], State: RegState::Define, TRI);
4413	MIB.addReg(RegNo: ZeroReg);
4414	AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices [SubReg], State: getKillRegState(B: KillSrc), TRI);
4415	MIB.addImm(Val: `0`);
4416	}
4417	}
4418
4419	void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
4420	MachineBasicBlock::iterator I,
4421	const DebugLoc &DL, MCRegister DestReg,
4422	MCRegister SrcReg, bool KillSrc) const {
4423	if (AArch64::GPR32spRegClass.contains(DestReg) &&
4424	(AArch64::GPR32spRegClass.contains(SrcReg) \|\| SrcReg == AArch64::WZR)) {
4425	const TargetRegisterInfo *TRI = &getRegisterInfo();
4426
4427	if (DestReg == AArch64::WSP \|\| SrcReg == AArch64::WSP) {
4428	// If either operand is WSP, expand to ADD #0.
4429	if (Subtarget.hasZeroCycleRegMove()) {
4430	// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4431	MCRegister DestRegX = TRI->getMatchingSuperReg(
4432	DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4433	MCRegister SrcRegX = TRI->getMatchingSuperReg(
4434	SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4435	// This instruction is reading and writing X registers. This may upset
4436	// the register scavenger and machine verifier, so we need to indicate
4437	// that we are reading an undefined value from SrcRegX, but a proper
4438	// value from SrcReg.
4439	BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4440	.addReg(SrcRegX, RegState::Undef)
4441	.addImm(`0`)
4442	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, `0`))
4443	.addReg(SrcReg, RegState::Implicit \| getKillRegState(KillSrc));
4444	} else {
4445	BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4446	.addReg(SrcReg, getKillRegState(KillSrc))
4447	.addImm(`0`)
4448	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, `0`));
4449	}
4450	} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4451	BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4452	.addImm(`0`)
4453	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, `0`));
4454	} else {
4455	if (Subtarget.hasZeroCycleRegMove()) {
4456	// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4457	MCRegister DestRegX = TRI->getMatchingSuperReg(
4458	DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4459	MCRegister SrcRegX = TRI->getMatchingSuperReg(
4460	SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4461	// This instruction is reading and writing X registers. This may upset
4462	// the register scavenger and machine verifier, so we need to indicate
4463	// that we are reading an undefined value from SrcRegX, but a proper
4464	// value from SrcReg.
4465	BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4466	.addReg(AArch64::XZR)
4467	.addReg(SrcRegX, RegState::Undef)
4468	.addReg(SrcReg, RegState::Implicit \| getKillRegState(KillSrc));
4469	} else {
4470	// Otherwise, expand to ORR WZR.
4471	BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4472	.addReg(AArch64::WZR)
4473	.addReg(SrcReg, getKillRegState(KillSrc));
4474	}
4475	}
4476	return;
4477	}
4478
4479	// Copy a Predicate register by ORRing with itself.
4480	if (AArch64::PPRRegClass.contains(DestReg) &&
4481	AArch64::PPRRegClass.contains(SrcReg)) {
4482	assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4483	BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4484	.addReg(SrcReg) // Pg
4485	.addReg(SrcReg)
4486	.addReg(SrcReg, getKillRegState(KillSrc));
4487	return;
4488	}
4489
4490	// Copy a predicate-as-counter register by ORRing with itself as if it
4491	// were a regular predicate (mask) register.
4492	bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4493	bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4494	if (DestIsPNR \|\| SrcIsPNR) {
4495	assert((Subtarget.hasSVE2p1() \|\| Subtarget.hasSME2()) &&
4496	"Unexpected predicate-as-counter register.");
4497	auto ToPPR = [](MCRegister R) -> MCRegister {
4498	return (R - AArch64::PN0) + AArch64::P0;
4499	};
4500	MCRegister PPRSrcReg = SrcIsPNR ? ToPPR (SrcReg) : SrcReg;
4501	MCRegister PPRDestReg = DestIsPNR ? ToPPR (DestReg) : DestReg;
4502
4503	if (PPRSrcReg != PPRDestReg) {
4504	auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4505	.addReg(PPRSrcReg) // Pg
4506	.addReg(PPRSrcReg)
4507	.addReg(PPRSrcReg, getKillRegState(KillSrc));
4508	if (DestIsPNR)
4509	NewMI.addDef(DestReg, RegState::Implicit);
4510	}
4511	return;
4512	}
4513
4514	// Copy a Z register by ORRing with itself.
4515	if (AArch64::ZPRRegClass.contains(DestReg) &&
4516	AArch64::ZPRRegClass.contains(SrcReg)) {
4517	assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4518	BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4519	.addReg(SrcReg)
4520	.addReg(SrcReg, getKillRegState(KillSrc));
4521	return;
4522	}
4523
4524	// Copy a Z register pair by copying the individual sub-registers.
4525	if ((AArch64::ZPR2RegClass.contains(DestReg) \|\|
4526	AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4527	(AArch64::ZPR2RegClass.contains(SrcReg) \|\|
4528	AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4529	assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4530	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4531	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4532	Indices);
4533	return;
4534	}
4535
4536	// Copy a Z register triple by copying the individual sub-registers.
4537	if (AArch64::ZPR3RegClass.contains(DestReg) &&
4538	AArch64::ZPR3RegClass.contains(SrcReg)) {
4539	assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4540	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4541	AArch64::zsub2};
4542	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4543	Indices);
4544	return;
4545	}
4546
4547	// Copy a Z register quad by copying the individual sub-registers.
4548	if ((AArch64::ZPR4RegClass.contains(DestReg) \|\|
4549	AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4550	(AArch64::ZPR4RegClass.contains(SrcReg) \|\|
4551	AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4552	assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4553	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4554	AArch64::zsub2, AArch64::zsub3};
4555	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4556	Indices);
4557	return;
4558	}
4559
4560	if (AArch64::GPR64spRegClass.contains(DestReg) &&
4561	(AArch64::GPR64spRegClass.contains(SrcReg) \|\| SrcReg == AArch64::XZR)) {
4562	if (DestReg == AArch64::SP \|\| SrcReg == AArch64::SP) {
4563	// If either operand is SP, expand to ADD #0.
4564	BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4565	.addReg(SrcReg, getKillRegState(KillSrc))
4566	.addImm(`0`)
4567	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, `0`));
4568	} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4569	BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4570	.addImm(`0`)
4571	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, `0`));
4572	} else {
4573	// Otherwise, expand to ORR XZR.
4574	BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4575	.addReg(AArch64::XZR)
4576	.addReg(SrcReg, getKillRegState(KillSrc));
4577	}
4578	return;
4579	}
4580
4581	// Copy a DDDD register quad by copying the individual sub-registers.
4582	if (AArch64::DDDDRegClass.contains(DestReg) &&
4583	AArch64::DDDDRegClass.contains(SrcReg)) {
4584	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4585	AArch64::dsub2, AArch64::dsub3};
4586	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4587	Indices);
4588	return;
4589	}
4590
4591	// Copy a DDD register triple by copying the individual sub-registers.
4592	if (AArch64::DDDRegClass.contains(DestReg) &&
4593	AArch64::DDDRegClass.contains(SrcReg)) {
4594	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4595	AArch64::dsub2};
4596	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4597	Indices);
4598	return;
4599	}
4600
4601	// Copy a DD register pair by copying the individual sub-registers.
4602	if (AArch64::DDRegClass.contains(DestReg) &&
4603	AArch64::DDRegClass.contains(SrcReg)) {
4604	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4605	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4606	Indices);
4607	return;
4608	}
4609
4610	// Copy a QQQQ register quad by copying the individual sub-registers.
4611	if (AArch64::QQQQRegClass.contains(DestReg) &&
4612	AArch64::QQQQRegClass.contains(SrcReg)) {
4613	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4614	AArch64::qsub2, AArch64::qsub3};
4615	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4616	Indices);
4617	return;
4618	}
4619
4620	// Copy a QQQ register triple by copying the individual sub-registers.
4621	if (AArch64::QQQRegClass.contains(DestReg) &&
4622	AArch64::QQQRegClass.contains(SrcReg)) {
4623	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4624	AArch64::qsub2};
4625	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4626	Indices);
4627	return;
4628	}
4629
4630	// Copy a QQ register pair by copying the individual sub-registers.
4631	if (AArch64::QQRegClass.contains(DestReg) &&
4632	AArch64::QQRegClass.contains(SrcReg)) {
4633	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4634	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4635	Indices);
4636	return;
4637	}
4638
4639	if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4640	AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4641	static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4642	copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4643	AArch64::XZR, Indices);
4644	return;
4645	}
4646
4647	if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4648	AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4649	static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4650	copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4651	AArch64::WZR, Indices);
4652	return;
4653	}
4654
4655	if (AArch64::FPR128RegClass.contains(DestReg) &&
4656	AArch64::FPR128RegClass.contains(SrcReg)) {
4657	if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4658	BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4659	.addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4660	.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4661	.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4662	else if (Subtarget.hasNEON())
4663	BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4664	.addReg(SrcReg)
4665	.addReg(SrcReg, getKillRegState(KillSrc));
4666	else {
4667	BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4668	.addReg(AArch64::SP, RegState::Define)
4669	.addReg(SrcReg, getKillRegState(KillSrc))
4670	.addReg(AArch64::SP)
4671	.addImm(-`16`);
4672	BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4673	.addReg(AArch64::SP, RegState::Define)
4674	.addReg(DestReg, RegState::Define)
4675	.addReg(AArch64::SP)
4676	.addImm(`16`);
4677	}
4678	return;
4679	}
4680
4681	if (AArch64::FPR64RegClass.contains(DestReg) &&
4682	AArch64::FPR64RegClass.contains(SrcReg)) {
4683	BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4684	.addReg(SrcReg, getKillRegState(KillSrc));
4685	return;
4686	}
4687
4688	if (AArch64::FPR32RegClass.contains(DestReg) &&
4689	AArch64::FPR32RegClass.contains(SrcReg)) {
4690	BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4691	.addReg(SrcReg, getKillRegState(KillSrc));
4692	return;
4693	}
4694
4695	if (AArch64::FPR16RegClass.contains(DestReg) &&
4696	AArch64::FPR16RegClass.contains(SrcReg)) {
4697	DestReg =
4698	RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4699	SrcReg =
4700	RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4701	BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4702	.addReg(SrcReg, getKillRegState(KillSrc));
4703	return;
4704	}
4705
4706	if (AArch64::FPR8RegClass.contains(DestReg) &&
4707	AArch64::FPR8RegClass.contains(SrcReg)) {
4708	DestReg =
4709	RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4710	SrcReg =
4711	RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4712	BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4713	.addReg(SrcReg, getKillRegState(KillSrc));
4714	return;
4715	}
4716
4717	// Copies between GPR64 and FPR64.
4718	if (AArch64::FPR64RegClass.contains(DestReg) &&
4719	AArch64::GPR64RegClass.contains(SrcReg)) {
4720	BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4721	.addReg(SrcReg, getKillRegState(KillSrc));
4722	return;
4723	}
4724	if (AArch64::GPR64RegClass.contains(DestReg) &&
4725	AArch64::FPR64RegClass.contains(SrcReg)) {
4726	BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4727	.addReg(SrcReg, getKillRegState(KillSrc));
4728	return;
4729	}
4730	// Copies between GPR32 and FPR32.
4731	if (AArch64::FPR32RegClass.contains(DestReg) &&
4732	AArch64::GPR32RegClass.contains(SrcReg)) {
4733	BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4734	.addReg(SrcReg, getKillRegState(KillSrc));
4735	return;
4736	}
4737	if (AArch64::GPR32RegClass.contains(DestReg) &&
4738	AArch64::FPR32RegClass.contains(SrcReg)) {
4739	BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4740	.addReg(SrcReg, getKillRegState(KillSrc));
4741	return;
4742	}
4743
4744	if (DestReg == AArch64::NZCV) {
4745	assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4746	BuildMI(MBB, I, DL, get(AArch64::MSR))
4747	.addImm(AArch64SysReg::NZCV)
4748	.addReg(SrcReg, getKillRegState(KillSrc))
4749	.addReg(AArch64::NZCV, RegState::Implicit \| RegState::Define);
4750	return;
4751	}
4752
4753	if (SrcReg == AArch64::NZCV) {
4754	assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4755	BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4756	.addImm(AArch64SysReg::NZCV)
4757	.addReg(AArch64::NZCV, RegState::Implicit \| getKillRegState(KillSrc));
4758	return;
4759	}
4760
4761	#ifndef NDEBUG
4762	const TargetRegisterInfo &TRI = getRegisterInfo();
4763	errs() << TRI.getRegAsmName(Reg: DestReg) << " = COPY "
4764	<< TRI.getRegAsmName(Reg: SrcReg) << "\n";
4765	#endif
4766	llvm_unreachable("unimplemented reg-to-reg copy");
4767	}
4768
4769	static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
4770	MachineBasicBlock &MBB,
4771	MachineBasicBlock::iterator InsertBefore,
4772	const MCInstrDesc &MCID,
4773	Register SrcReg, bool IsKill,
4774	unsigned SubIdx0, unsigned SubIdx1, int FI,
4775	MachineMemOperand *MMO) {
4776	Register SrcReg0 = SrcReg;
4777	Register SrcReg1 = SrcReg;
4778	if (SrcReg.isPhysical()) {
4779	SrcReg0 = TRI.getSubReg(Reg: SrcReg, Idx: SubIdx0);
4780	SubIdx0 = `0`;
4781	SrcReg1 = TRI.getSubReg(Reg: SrcReg, Idx: SubIdx1);
4782	SubIdx1 = `0`;
4783	}
4784	BuildMI(BB&: MBB, I: InsertBefore, MIMD: DebugLoc (), MCID)
4785	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: IsKill), SubReg: SubIdx0)
4786	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: IsKill), SubReg: SubIdx1)
4787	.addFrameIndex(Idx: FI)
4788	.addImm(Val: `0`)
4789	.addMemOperand(MMO);
4790	}
4791
4792	void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
4793	MachineBasicBlock::iterator MBBI,
4794	Register SrcReg, bool isKill, int FI,
4795	const TargetRegisterClass *RC,
4796	const TargetRegisterInfo *TRI,
4797	Register VReg) const {
4798	MachineFunction &MF = *MBB.getParent();
4799	MachineFrameInfo &MFI = MF.getFrameInfo();
4800
4801	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
4802	MachineMemOperand *MMO =
4803	MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore,
4804	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
4805	unsigned Opc = `0`;
4806	bool Offset = true;
4807	MCRegister PNRReg = MCRegister::NoRegister;
4808	unsigned StackID = TargetStackID::Default;
4809	switch (TRI->getSpillSize(RC: *RC)) {
4810	case `1`:
4811	if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4812	Opc = AArch64::STRBui;
4813	break;
4814	case `2`: {
4815	bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4816	if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4817	Opc = AArch64::STRHui;
4818	else if (IsPNR \|\| AArch64::PPRRegClass.hasSubClassEq(RC)) {
4819	assert(Subtarget.hasSVEorSME() &&
4820	"Unexpected register store without SVE store instructions");
4821	assert((!IsPNR \|\| Subtarget.hasSVE2p1() \|\| Subtarget.hasSME2()) &&
4822	"Unexpected register store without SVE2p1 or SME2");
4823	Opc = AArch64::STR_PXI;
4824	StackID = TargetStackID::ScalableVector;
4825	}
4826	break;
4827	}
4828	case `4`:
4829	if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4830	Opc = AArch64::STRWui;
4831	if (SrcReg.isVirtual())
4832	MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4833	else
4834	assert(SrcReg != AArch64::WSP);
4835	} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4836	Opc = AArch64::STRSui;
4837	else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4838	Opc = AArch64::STR_PPXI;
4839	StackID = TargetStackID::ScalableVector;
4840	}
4841	break;
4842	case `8`:
4843	if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4844	Opc = AArch64::STRXui;
4845	if (SrcReg.isVirtual())
4846	MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4847	else
4848	assert(SrcReg != AArch64::SP);
4849	} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4850	Opc = AArch64::STRDui;
4851	} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4852	storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
4853	get(AArch64::STPWi), SrcReg, isKill,
4854	AArch64::sube32, AArch64::subo32, FI, MMO);
4855	return;
4856	}
4857	break;
4858	case `16`:
4859	if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4860	Opc = AArch64::STRQui;
4861	else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4862	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4863	Opc = AArch64::ST1Twov1d;
4864	Offset = false;
4865	} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4866	storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
4867	get(AArch64::STPXi), SrcReg, isKill,
4868	AArch64::sube64, AArch64::subo64, FI, MMO);
4869	return;
4870	} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4871	assert(Subtarget.hasSVEorSME() &&
4872	"Unexpected register store without SVE store instructions");
4873	Opc = AArch64::STR_ZXI;
4874	StackID = TargetStackID::ScalableVector;
4875	}
4876	break;
4877	case `24`:
4878	if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4879	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4880	Opc = AArch64::ST1Threev1d;
4881	Offset = false;
4882	}
4883	break;
4884	case `32`:
4885	if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4886	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4887	Opc = AArch64::ST1Fourv1d;
4888	Offset = false;
4889	} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4890	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4891	Opc = AArch64::ST1Twov2d;
4892	Offset = false;
4893	} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) \|\|
4894	AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4895	assert(Subtarget.hasSVEorSME() &&
4896	"Unexpected register store without SVE store instructions");
4897	Opc = AArch64::STR_ZZXI;
4898	StackID = TargetStackID::ScalableVector;
4899	}
4900	break;
4901	case `48`:
4902	if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4903	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4904	Opc = AArch64::ST1Threev2d;
4905	Offset = false;
4906	} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4907	assert(Subtarget.hasSVEorSME() &&
4908	"Unexpected register store without SVE store instructions");
4909	Opc = AArch64::STR_ZZZXI;
4910	StackID = TargetStackID::ScalableVector;
4911	}
4912	break;
4913	case `64`:
4914	if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4915	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4916	Opc = AArch64::ST1Fourv2d;
4917	Offset = false;
4918	} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) \|\|
4919	AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4920	assert(Subtarget.hasSVEorSME() &&
4921	"Unexpected register store without SVE store instructions");
4922	Opc = AArch64::STR_ZZZZXI;
4923	StackID = TargetStackID::ScalableVector;
4924	}
4925	break;
4926	}
4927	assert(Opc && "Unknown register class");
4928	MFI.setStackID(ObjectIdx: FI, ID: StackID);
4929
4930	const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc (), get(Opc))
4931	.addReg(SrcReg, getKillRegState(B: isKill))
4932	.addFrameIndex(FI);
4933
4934	if (Offset)
4935	MI.addImm(Val: `0`);
4936	if (PNRReg.isValid())
4937	MI.addDef(RegNo: PNRReg, Flags: RegState::Implicit);
4938	MI.addMemOperand(MMO);
4939	}
4940
4941	static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
4942	MachineBasicBlock &MBB,
4943	MachineBasicBlock::iterator InsertBefore,
4944	const MCInstrDesc &MCID,
4945	Register DestReg, unsigned SubIdx0,
4946	unsigned SubIdx1, int FI,
4947	MachineMemOperand *MMO) {
4948	Register DestReg0 = DestReg;
4949	Register DestReg1 = DestReg;
4950	bool IsUndef = true;
4951	if (DestReg.isPhysical()) {
4952	DestReg0 = TRI.getSubReg(Reg: DestReg, Idx: SubIdx0);
4953	SubIdx0 = `0`;
4954	DestReg1 = TRI.getSubReg(Reg: DestReg, Idx: SubIdx1);
4955	SubIdx1 = `0`;
4956	IsUndef = false;
4957	}
4958	BuildMI(BB&: MBB, I: InsertBefore, MIMD: DebugLoc (), MCID)
4959	.addReg(RegNo: DestReg0, flags: RegState::Define \| getUndefRegState(B: IsUndef), SubReg: SubIdx0)
4960	.addReg(RegNo: DestReg1, flags: RegState::Define \| getUndefRegState(B: IsUndef), SubReg: SubIdx1)
4961	.addFrameIndex(Idx: FI)
4962	.addImm(Val: `0`)
4963	.addMemOperand(MMO);
4964	}
4965
4966	void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
4967	MachineBasicBlock::iterator MBBI,
4968	Register DestReg, int FI,
4969	const TargetRegisterClass *RC,
4970	const TargetRegisterInfo *TRI,
4971	Register VReg) const {
4972	MachineFunction &MF = *MBB.getParent();
4973	MachineFrameInfo &MFI = MF.getFrameInfo();
4974	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
4975	MachineMemOperand *MMO =
4976	MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad,
4977	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
4978
4979	unsigned Opc = `0`;
4980	bool Offset = true;
4981	unsigned StackID = TargetStackID::Default;
4982	Register PNRReg = MCRegister::NoRegister;
4983	switch (TRI->getSpillSize(RC: *RC)) {
4984	case `1`:
4985	if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4986	Opc = AArch64::LDRBui;
4987	break;
4988	case `2`: {
4989	bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4990	if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4991	Opc = AArch64::LDRHui;
4992	else if (IsPNR \|\| AArch64::PPRRegClass.hasSubClassEq(RC)) {
4993	assert(Subtarget.hasSVEorSME() &&
4994	"Unexpected register load without SVE load instructions");
4995	assert((!IsPNR \|\| Subtarget.hasSVE2p1() \|\| Subtarget.hasSME2()) &&
4996	"Unexpected register load without SVE2p1 or SME2");
4997	if (IsPNR)
4998	PNRReg = DestReg;
4999	Opc = AArch64::LDR_PXI;
5000	StackID = TargetStackID::ScalableVector;
5001	}
5002	break;
5003	}
5004	case `4`:
5005	if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5006	Opc = AArch64::LDRWui;
5007	if (DestReg.isVirtual())
5008	MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5009	else
5010	assert(DestReg != AArch64::WSP);
5011	} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5012	Opc = AArch64::LDRSui;
5013	else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5014	Opc = AArch64::LDR_PPXI;
5015	StackID = TargetStackID::ScalableVector;
5016	}
5017	break;
5018	case `8`:
5019	if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5020	Opc = AArch64::LDRXui;
5021	if (DestReg.isVirtual())
5022	MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5023	else
5024	assert(DestReg != AArch64::SP);
5025	} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5026	Opc = AArch64::LDRDui;
5027	} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5028	loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
5029	get(AArch64::LDPWi), DestReg, AArch64::sube32,
5030	AArch64::subo32, FI, MMO);
5031	return;
5032	}
5033	break;
5034	case `16`:
5035	if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5036	Opc = AArch64::LDRQui;
5037	else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5038	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5039	Opc = AArch64::LD1Twov1d;
5040	Offset = false;
5041	} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5042	loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
5043	get(AArch64::LDPXi), DestReg, AArch64::sube64,
5044	AArch64::subo64, FI, MMO);
5045	return;
5046	} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5047	assert(Subtarget.hasSVEorSME() &&
5048	"Unexpected register load without SVE load instructions");
5049	Opc = AArch64::LDR_ZXI;
5050	StackID = TargetStackID::ScalableVector;
5051	}
5052	break;
5053	case `24`:
5054	if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5055	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5056	Opc = AArch64::LD1Threev1d;
5057	Offset = false;
5058	}
5059	break;
5060	case `32`:
5061	if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5062	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5063	Opc = AArch64::LD1Fourv1d;
5064	Offset = false;
5065	} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5066	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5067	Opc = AArch64::LD1Twov2d;
5068	Offset = false;
5069	} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) \|\|
5070	AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5071	assert(Subtarget.hasSVEorSME() &&
5072	"Unexpected register load without SVE load instructions");
5073	Opc = AArch64::LDR_ZZXI;
5074	StackID = TargetStackID::ScalableVector;
5075	}
5076	break;
5077	case `48`:
5078	if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5079	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5080	Opc = AArch64::LD1Threev2d;
5081	Offset = false;
5082	} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5083	assert(Subtarget.hasSVEorSME() &&
5084	"Unexpected register load without SVE load instructions");
5085	Opc = AArch64::LDR_ZZZXI;
5086	StackID = TargetStackID::ScalableVector;
5087	}
5088	break;
5089	case `64`:
5090	if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5091	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5092	Opc = AArch64::LD1Fourv2d;
5093	Offset = false;
5094	} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) \|\|
5095	AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5096	assert(Subtarget.hasSVEorSME() &&
5097	"Unexpected register load without SVE load instructions");
5098	Opc = AArch64::LDR_ZZZZXI;
5099	StackID = TargetStackID::ScalableVector;
5100	}
5101	break;
5102	}
5103
5104	assert(Opc && "Unknown register class");
5105	MFI.setStackID(ObjectIdx: FI, ID: StackID);
5106
5107	const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc (), get(Opc))
5108	.addReg(DestReg, getDefRegState(B: true))
5109	.addFrameIndex(FI);
5110	if (Offset)
5111	MI.addImm(Val: `0`);
5112	if (PNRReg.isValid() && !PNRReg.isVirtual())
5113	MI.addDef(RegNo: PNRReg, Flags: RegState::Implicit);
5114	MI.addMemOperand(MMO);
5115
5116	if (PNRReg.isValid() && PNRReg.isVirtual())
5117	BuildMI(MBB, MBBI, DebugLoc (), get(TargetOpcode::COPY), PNRReg)
5118	.addReg(DestReg);
5119	}
5120
5121	bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
5122	const MachineInstr &UseMI,
5123	const TargetRegisterInfo *TRI) {
5124	return any_of(Range: instructionsWithoutDebug(It: std::next(x: DefMI.getIterator()),
5125	End: UseMI.getIterator()),
5126	P: [TRI](const MachineInstr &I) {
5127	return I.modifiesRegister(AArch64::NZCV, TRI) \|\|
5128	I.readsRegister(AArch64::NZCV, TRI);
5129	});
5130	}
5131
5132	void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5133	const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5134	// The smallest scalable element supported by scaled SVE addressing
5135	// modes are predicates, which are 2 scalable bytes in size. So the scalable
5136	// byte offset must always be a multiple of 2.
5137	assert(Offset.getScalable() % `2` == `0` && "Invalid frame offset");
5138
5139	// VGSized offsets are divided by '2', because the VG register is the
5140	// the number of 64bit granules as opposed to 128bit vector chunks,
5141	// which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5142	// So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5143	// VG = n 2 and the dwarf offset must be VG * 8 bytes.*
5144	ByteSized = Offset.getFixed();
5145	VGSized = Offset.getScalable() / `2`;
5146	}
5147
5148	/// Returns the offset in parts to which this frame offset can be
5149	/// decomposed for the purpose of describing a frame offset.
5150	/// For non-scalable offsets this is simply its byte size.
5151	void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5152	const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5153	int64_t &NumDataVectors) {
5154	// The smallest scalable element supported by scaled SVE addressing
5155	// modes are predicates, which are 2 scalable bytes in size. So the scalable
5156	// byte offset must always be a multiple of 2.
5157	assert(Offset.getScalable() % `2` == `0` && "Invalid frame offset");
5158
5159	NumBytes = Offset.getFixed();
5160	NumDataVectors = `0`;
5161	NumPredicateVectors = Offset.getScalable() / `2`;
5162	// This method is used to get the offsets to adjust the frame offset.
5163	// If the function requires ADDPL to be used and needs more than two ADDPL
5164	// instructions, part of the offset is folded into NumDataVectors so that it
5165	// uses ADDVL for part of it, reducing the number of ADDPL instructions.
5166	if (NumPredicateVectors % `8` == `0` \|\| NumPredicateVectors < -`64` \|\|
5167	NumPredicateVectors > `62`) {
5168	NumDataVectors = NumPredicateVectors / `8`;
5169	NumPredicateVectors -= NumDataVectors * `8`;
5170	}
5171	}
5172
5173	// Convenience function to create a DWARF expression for
5174	// Expr + NumBytes + NumVGScaledBytes AArch64::VG*
5175	static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5176	int NumVGScaledBytes, unsigned VG,
5177	llvm::raw_string_ostream &Comment) {
5178	uint8_t buffer[`16`];
5179
5180	if (NumBytes) {
5181	Expr.push_back(Elt: dwarf::DW_OP_consts);
5182	Expr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: NumBytes, p: buffer));
5183	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
5184	Comment << (NumBytes < `0` ? " - " : " + ") << std::abs(x: NumBytes);
5185	}
5186
5187	if (NumVGScaledBytes) {
5188	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_consts);
5189	Expr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: NumVGScaledBytes, p: buffer));
5190
5191	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_bregx);
5192	Expr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: VG, p: buffer));
5193	Expr.push_back(Elt: `0`);
5194
5195	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_mul);
5196	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
5197
5198	Comment << (NumVGScaledBytes < `0` ? " - " : " + ")
5199	<< std::abs(x: NumVGScaledBytes) << " * VG";
5200	}
5201	}
5202
5203	// Creates an MCCFIInstruction:
5204	// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5205	static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
5206	unsigned Reg,
5207	const StackOffset &Offset) {
5208	int64_t NumBytes, NumVGScaledBytes;
5209	AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, ByteSized&: NumBytes,
5210	VGSized&: NumVGScaledBytes);
5211	std::string CommentBuffer;
5212	llvm::raw_string_ostream Comment(CommentBuffer);
5213
5214	if (Reg == AArch64::SP)
5215	Comment << "sp";
5216	else if (Reg == AArch64::FP)
5217	Comment << "fp";
5218	else
5219	Comment << printReg(Reg, TRI: &TRI);
5220
5221	// Build up the expression (Reg + NumBytes + NumVGScaledBytes AArch64::VG)*
5222	SmallString<`64`> Expr;
5223	unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5224	Expr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5225	Expr.push_back(Elt: `0`);
5226	appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5227	TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5228
5229	// Wrap this into DW_CFA_def_cfa.
5230	SmallString<`64`> DefCfaExpr;
5231	DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression);
5232	uint8_t buffer[`16`];
5233	DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: Expr.size(), p: buffer));
5234	DefCfaExpr.append(RHS: Expr.str());
5235	return MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str(), Loc: SMLoc (),
5236	Comment: Comment.str());
5237	}
5238
5239	MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI,
5240	unsigned FrameReg, unsigned Reg,
5241	const StackOffset &Offset,
5242	bool LastAdjustmentWasScalable) {
5243	if (Offset.getScalable())
5244	return createDefCFAExpression(TRI, Reg, Offset);
5245
5246	if (FrameReg == Reg && !LastAdjustmentWasScalable)
5247	return MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: int(Offset.getFixed()));
5248
5249	unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5250	return MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfReg, Offset: (int)Offset.getFixed());
5251	}
5252
5253	MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
5254	unsigned Reg,
5255	const StackOffset &OffsetFromDefCFA) {
5256	int64_t NumBytes, NumVGScaledBytes;
5257	AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5258	Offset: OffsetFromDefCFA, ByteSized&: NumBytes, VGSized&: NumVGScaledBytes);
5259
5260	unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5261
5262	// Non-scalable offsets can use DW_CFA_offset directly.
5263	if (!NumVGScaledBytes)
5264	return MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: NumBytes);
5265
5266	std::string CommentBuffer;
5267	llvm::raw_string_ostream Comment(CommentBuffer);
5268	Comment << printReg(Reg, TRI: &TRI) << " @ cfa";
5269
5270	// Build up expression (NumBytes + NumVGScaledBytes AArch64::VG)*
5271	SmallString<`64`> OffsetExpr;
5272	appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5273	TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5274
5275	// Wrap this into DW_CFA_expression
5276	SmallString<`64`> CfaExpr;
5277	CfaExpr.push_back(Elt: dwarf::DW_CFA_expression);
5278	uint8_t buffer[`16`];
5279	CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer));
5280	CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: OffsetExpr.size(), p: buffer));
5281	CfaExpr.append(RHS: OffsetExpr.str());
5282
5283	return MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str(), Loc: SMLoc (),
5284	Comment: Comment.str());
5285	}
5286
5287	// Helper function to emit a frame offset adjustment from a given
5288	// pointer (SrcReg), stored into DestReg. This function is explicit
5289	// in that it requires the opcode.
5290	static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
5291	MachineBasicBlock::iterator MBBI,
5292	const DebugLoc &DL, unsigned DestReg,
5293	unsigned SrcReg, int64_t Offset, unsigned Opc,
5294	const TargetInstrInfo *TII,
5295	MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5296	bool HasWinCFI, bool* EmitCFAOffset,
5297	StackOffset CFAOffset, unsigned FrameReg) {
5298	int Sign = `1`;
5299	unsigned MaxEncoding, ShiftSize;
5300	switch (Opc) {
5301	case AArch64::ADDXri:
5302	case AArch64::ADDSXri:
5303	case AArch64::SUBXri:
5304	case AArch64::SUBSXri:
5305	MaxEncoding = `0xfff`;
5306	ShiftSize = `12`;
5307	break;
5308	case AArch64::ADDVL_XXI:
5309	case AArch64::ADDPL_XXI:
5310	case AArch64::ADDSVL_XXI:
5311	case AArch64::ADDSPL_XXI:
5312	MaxEncoding = `31`;
5313	ShiftSize = `0`;
5314	if (Offset < `0`) {
5315	MaxEncoding = `32`;
5316	Sign = -`1`;
5317	Offset = -Offset;
5318	}
5319	break;
5320	default:
5321	llvm_unreachable("Unsupported opcode");
5322	}
5323
5324	// `Offset` can be in bytes or in "scalable bytes".
5325	int VScale = `1`;
5326	if (Opc == AArch64::ADDVL_XXI \|\| Opc == AArch64::ADDSVL_XXI)
5327	VScale = `16`;
5328	else if (Opc == AArch64::ADDPL_XXI \|\| Opc == AArch64::ADDSPL_XXI)
5329	VScale = `2`;
5330
5331	// FIXME: If the offset won't fit in 24-bits, compute the offset into a
5332	// scratch register. If DestReg is a virtual register, use it as the
5333	// scratch register; otherwise, create a new virtual register (to be
5334	// replaced by the scavenger at the end of PEI). That case can be optimized
5335	// slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5336	// register can be loaded with offset%8 and the add/sub can use an extending
5337	// instruction with LSL#3.
5338	// Currently the function handles any offsets but generates a poor sequence
5339	// of code.
5340	// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5341
5342	const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5343	Register TmpReg = DestReg;
5344	if (TmpReg == AArch64::XZR)
5345	TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
5346	&AArch64::GPR64RegClass);
5347	do {
5348	uint64_t ThisVal = std::min<uint64_t>(a: Offset, b: MaxEncodableValue);
5349	unsigned LocalShiftSize = `0`;
5350	if (ThisVal > MaxEncoding) {
5351	ThisVal = ThisVal >> ShiftSize;
5352	LocalShiftSize = ShiftSize;
5353	}
5354	assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5355	"Encoding cannot handle value that big");
5356
5357	Offset -= ThisVal << LocalShiftSize;
5358	if (Offset == `0`)
5359	TmpReg = DestReg;
5360	auto MBI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: TmpReg)
5361	.addReg(RegNo: SrcReg)
5362	.addImm(Val: Sign * (int)ThisVal);
5363	if (ShiftSize)
5364	MBI = MBI.addImm(
5365	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LocalShiftSize));
5366	MBI = MBI.setMIFlag(Flag);
5367
5368	auto Change =
5369	VScale == `1`
5370	? StackOffset::getFixed(Fixed: ThisVal << LocalShiftSize)
5371	: StackOffset::getScalable(Scalable: VScale * (ThisVal << LocalShiftSize));
5372	if (Sign == -`1` \|\| Opc == AArch64::SUBXri \|\| Opc == AArch64::SUBSXri)
5373	CFAOffset += Change;
5374	else
5375	CFAOffset -= Change;
5376	if (EmitCFAOffset && DestReg == TmpReg) {
5377	MachineFunction &MF = *MBB.getParent();
5378	const TargetSubtargetInfo &STI = MF.getSubtarget();
5379	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5380
5381	unsigned CFIIndex = MF.addFrameInst(
5382	Inst: createDefCFA(TRI, FrameReg, Reg: DestReg, Offset: CFAOffset, LastAdjustmentWasScalable: VScale != `1`));
5383	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::CFI_INSTRUCTION))
5384	.addCFIIndex(CFIIndex)
5385	.setMIFlags(Flag);
5386	}
5387
5388	if (NeedsWinCFI) {
5389	assert(Sign == `1` && "SEH directives should always have a positive sign");
5390	int Imm = (int)(ThisVal << LocalShiftSize);
5391	if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) \|\|
5392	(SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5393	if (HasWinCFI)
5394	HasWinCFI = true*;
5395	if (Imm == `0`)
5396	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5397	else
5398	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5399	.addImm(Imm)
5400	.setMIFlag(Flag);
5401	assert(Offset == `0` && "Expected remaining offset to be zero to "
5402	"emit a single SEH directive");
5403	} else if (DestReg == AArch64::SP) {
5404	if (HasWinCFI)
5405	HasWinCFI = true*;
5406	assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5407	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5408	.addImm(Imm)
5409	.setMIFlag(Flag);
5410	}
5411	}
5412
5413	SrcReg = TmpReg;
5414	} while (Offset);
5415	}
5416
5417	void llvm::emitFrameOffset(MachineBasicBlock &MBB,
5418	MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
5419	unsigned DestReg, unsigned SrcReg,
5420	StackOffset Offset, const TargetInstrInfo *TII,
5421	MachineInstr::MIFlag Flag, bool SetNZCV,
5422	bool NeedsWinCFI, bool *HasWinCFI,
5423	bool EmitCFAOffset, StackOffset CFAOffset,
5424	unsigned FrameReg) {
5425	// If a function is marked as arm_locally_streaming, then the runtime value of
5426	// vscale in the prologue/epilogue is different the runtime value of vscale
5427	// in the function's body. To avoid having to consider multiple vscales,
5428	// we can use `addsvl` to allocate any scalable stack-slots, which under
5429	// most circumstances will be only locals, not callee-save slots.
5430	const Function &F = MBB.getParent()->getFunction();
5431	bool UseSVL = F.hasFnAttribute(Kind: "aarch64_pstate_sm_body");
5432
5433	int64_t Bytes, NumPredicateVectors, NumDataVectors;
5434	AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5435	Offset, NumBytes&: Bytes, NumPredicateVectors, NumDataVectors);
5436
5437	// First emit non-scalable frame offsets, or a simple 'mov'.
5438	if (Bytes \|\| (!Offset && SrcReg != DestReg)) {
5439	assert((DestReg != AArch64::SP \|\| Bytes % `8` == `0`) &&
5440	"SP increment/decrement not 8-byte aligned");
5441	unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5442	if (Bytes < `0`) {
5443	Bytes = -Bytes;
5444	Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5445	}
5446	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Offset: Bytes, Opc, TII, Flag,
5447	NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5448	FrameReg);
5449	CFAOffset += (Opc == AArch64::ADDXri \|\| Opc == AArch64::ADDSXri)
5450	? StackOffset::getFixed(-Bytes)
5451	: StackOffset::getFixed(Bytes);
5452	SrcReg = DestReg;
5453	FrameReg = DestReg;
5454	}
5455
5456	assert(!(SetNZCV && (NumPredicateVectors \|\| NumDataVectors)) &&
5457	"SetNZCV not supported with SVE vectors");
5458	assert(!(NeedsWinCFI && (NumPredicateVectors \|\| NumDataVectors)) &&
5459	"WinCFI not supported with SVE vectors");
5460
5461	if (NumDataVectors) {
5462	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5463	UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5464	TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5465	CFAOffset, FrameReg);
5466	CFAOffset += StackOffset::getScalable(Scalable: -NumDataVectors * `16`);
5467	SrcReg = DestReg;
5468	}
5469
5470	if (NumPredicateVectors) {
5471	assert(DestReg != AArch64::SP && "Unaligned access to SP");
5472	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5473	UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5474	TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5475	CFAOffset, FrameReg);
5476	}
5477	}
5478
5479	MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
5480	MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
5481	MachineBasicBlock::iterator InsertPt, int FrameIndex,
5482	LiveIntervals LIS, VirtRegMap VRM) const {
5483	// This is a bit of a hack. Consider this instruction:
5484	//
5485	// %0 = COPY %sp; GPR64all:%0
5486	//
5487	// We explicitly chose GPR64all for the virtual register so such a copy might
5488	// be eliminated by RegisterCoalescer. However, that may not be possible, and
5489	// %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5490	// register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5491	//
5492	// To prevent that, we are going to constrain the %0 register class here.
5493	if (MI.isFullCopy()) {
5494	Register DstReg = MI.getOperand(i: `0`).getReg();
5495	Register SrcReg = MI.getOperand(i: `1`).getReg();
5496	if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5497	MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5498	return nullptr;
5499	}
5500	if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5501	MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5502	return nullptr;
5503	}
5504	// Nothing can folded with copy from/to NZCV.
5505	if (SrcReg == AArch64::NZCV \|\| DstReg == AArch64::NZCV)
5506	return nullptr;
5507	}
5508
5509	// Handle the case where a copy is being spilled or filled but the source
5510	// and destination register class don't match. For example:
5511	//
5512	// %0 = COPY %xzr; GPR64common:%0
5513	//
5514	// In this case we can still safely fold away the COPY and generate the
5515	// following spill code:
5516	//
5517	// STRXui %xzr, %stack.0
5518	//
5519	// This also eliminates spilled cross register class COPYs (e.g. between x and
5520	// d regs) of the same size. For example:
5521	//
5522	// %0 = COPY %1; GPR64:%0, FPR64:%1
5523	//
5524	// will be filled as
5525	//
5526	// LDRDui %0, fi<#0>
5527	//
5528	// instead of
5529	//
5530	// LDRXui %Temp, fi<#0>
5531	// %0 = FMOV %Temp
5532	//
5533	if (MI.isCopy() && Ops.size() == `1` &&
5534	// Make sure we're only folding the explicit COPY defs/uses.
5535	(Ops [`0`] == `0` \|\| Ops [`0`] == `1`)) {
5536	bool IsSpill = Ops [`0`] == `0`;
5537	bool IsFill = !IsSpill;
5538	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
5539	const MachineRegisterInfo &MRI = MF.getRegInfo();
5540	MachineBasicBlock &MBB = *MI.getParent();
5541	const MachineOperand &DstMO = MI.getOperand(i: `0`);
5542	const MachineOperand &SrcMO = MI.getOperand(i: `1`);
5543	Register DstReg = DstMO.getReg();
5544	Register SrcReg = SrcMO.getReg();
5545	// This is slightly expensive to compute for physical regs since
5546	// getMinimalPhysRegClass is slow.
5547	auto getRegClass = [&](unsigned Reg) {
5548	return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5549	: TRI.getMinimalPhysRegClass(Reg);
5550	};
5551
5552	if (DstMO.getSubReg() == `0` && SrcMO.getSubReg() == `0`) {
5553	assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5554	TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5555	"Mismatched register size in non subreg COPY");
5556	if (IsSpill)
5557	storeRegToStackSlot(MBB, MBBI: InsertPt, SrcReg, isKill: SrcMO.isKill(), FI: FrameIndex,
5558	RC: getRegClass (SrcReg), TRI: &TRI, VReg: Register ());
5559	else
5560	loadRegFromStackSlot(MBB, MBBI: InsertPt, DestReg: DstReg, FI: FrameIndex,
5561	RC: getRegClass (DstReg), TRI: &TRI, VReg: Register ());
5562	return &*--InsertPt;
5563	}
5564
5565	// Handle cases like spilling def of:
5566	//
5567	// %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
5568	//
5569	// where the physical register source can be widened and stored to the full
5570	// virtual reg destination stack slot, in this case producing:
5571	//
5572	// STRXui %xzr, %stack.0
5573	//
5574	if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
5575	TRI.getRegSizeInBits(*getRegClass(DstReg)) == `64`) {
5576	assert(SrcMO.getSubReg() == `0` &&
5577	"Unexpected subreg on physical register");
5578	storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
5579	FrameIndex, &AArch64::GPR64RegClass, &TRI,
5580	Register());
5581	return &*--InsertPt;
5582	}
5583
5584	// Handle cases like filling use of:
5585	//
5586	// %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
5587	//
5588	// where we can load the full virtual reg source stack slot, into the subreg
5589	// destination, in this case producing:
5590	//
5591	// LDRWui %0:sub_32<def,read-undef>, %stack.0
5592	//
5593	if (IsFill && SrcMO.getSubReg() == `0` && DstMO.isUndef()) {
5594	const TargetRegisterClass *FillRC;
5595	switch (DstMO.getSubReg()) {
5596	default:
5597	FillRC = nullptr;
5598	break;
5599	case AArch64::sub_32:
5600	FillRC = &AArch64::GPR32RegClass;
5601	break;
5602	case AArch64::ssub:
5603	FillRC = &AArch64::FPR32RegClass;
5604	break;
5605	case AArch64::dsub:
5606	FillRC = &AArch64::FPR64RegClass;
5607	break;
5608	}
5609
5610	if (FillRC) {
5611	assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
5612	TRI.getRegSizeInBits(*FillRC) &&
5613	"Mismatched regclass size on folded subreg COPY");
5614	loadRegFromStackSlot(MBB, MBBI: InsertPt, DestReg: DstReg, FI: FrameIndex, RC: FillRC, TRI: &TRI,
5615	VReg: Register ());
5616	MachineInstr &LoadMI = *--InsertPt;
5617	MachineOperand &LoadDst = LoadMI.getOperand(i: `0`);
5618	assert(LoadDst.getSubReg() == `0` && "unexpected subreg on fill load");
5619	LoadDst.setSubReg(DstMO.getSubReg());
5620	LoadDst.setIsUndef();
5621	return &LoadMI;
5622	}
5623	}
5624	}
5625
5626	// Cannot fold.
5627	return nullptr;
5628	}
5629
5630	int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
5631	StackOffset &SOffset,
5632	bool *OutUseUnscaledOp,
5633	unsigned *OutUnscaledOp,
5634	int64_t *EmittableOffset) {
5635	// Set output values in case of early exit.
5636	if (EmittableOffset)
5637	*EmittableOffset = `0`;
5638	if (OutUseUnscaledOp)
5639	OutUseUnscaledOp = false*;
5640	if (OutUnscaledOp)
5641	*OutUnscaledOp = `0`;
5642
5643	// Exit early for structured vector spills/fills as they can't take an
5644	// immediate offset.
5645	switch (MI.getOpcode()) {
5646	default:
5647	break;
5648	case AArch64::LD1Rv1d:
5649	case AArch64::LD1Rv2s:
5650	case AArch64::LD1Rv2d:
5651	case AArch64::LD1Rv4h:
5652	case AArch64::LD1Rv4s:
5653	case AArch64::LD1Rv8b:
5654	case AArch64::LD1Rv8h:
5655	case AArch64::LD1Rv16b:
5656	case AArch64::LD1Twov2d:
5657	case AArch64::LD1Threev2d:
5658	case AArch64::LD1Fourv2d:
5659	case AArch64::LD1Twov1d:
5660	case AArch64::LD1Threev1d:
5661	case AArch64::LD1Fourv1d:
5662	case AArch64::ST1Twov2d:
5663	case AArch64::ST1Threev2d:
5664	case AArch64::ST1Fourv2d:
5665	case AArch64::ST1Twov1d:
5666	case AArch64::ST1Threev1d:
5667	case AArch64::ST1Fourv1d:
5668	case AArch64::ST1i8:
5669	case AArch64::ST1i16:
5670	case AArch64::ST1i32:
5671	case AArch64::ST1i64:
5672	case AArch64::IRG:
5673	case AArch64::IRGstack:
5674	case AArch64::STGloop:
5675	case AArch64::STZGloop:
5676	return AArch64FrameOffsetCannotUpdate;
5677	}
5678
5679	// Get the min/max offset and the scale.
5680	TypeSize ScaleValue(`0U`, false), Width(`0U`, false);
5681	int64_t MinOff, MaxOff;
5682	if (!AArch64InstrInfo::getMemOpInfo(Opcode: MI.getOpcode(), Scale&: ScaleValue, Width, MinOffset&: MinOff,
5683	MaxOffset&: MaxOff))
5684	llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5685
5686	// Construct the complete offset.
5687	bool IsMulVL = ScaleValue.isScalable();
5688	unsigned Scale = ScaleValue.getKnownMinValue();
5689	int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
5690
5691	const MachineOperand &ImmOpnd =
5692	MI.getOperand(i: AArch64InstrInfo::getLoadStoreImmIdx(Opc: MI.getOpcode()));
5693	Offset += ImmOpnd.getImm() * Scale;
5694
5695	// If the offset doesn't match the scale, we rewrite the instruction to
5696	// use the unscaled instruction instead. Likewise, if we have a negative
5697	// offset and there is an unscaled op to use.
5698	std::optional<unsigned> UnscaledOp =
5699	AArch64InstrInfo::getUnscaledLdSt(Opc: MI.getOpcode());
5700	bool useUnscaledOp = UnscaledOp && (Offset % Scale \|\| Offset < `0`);
5701	if (useUnscaledOp &&
5702	!AArch64InstrInfo::getMemOpInfo(Opcode: *UnscaledOp, Scale&: ScaleValue, Width, MinOffset&: MinOff,
5703	MaxOffset&: MaxOff))
5704	llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5705
5706	Scale = ScaleValue.getKnownMinValue();
5707	assert(IsMulVL == ScaleValue.isScalable() &&
5708	"Unscaled opcode has different value for scalable");
5709
5710	int64_t Remainder = Offset % Scale;
5711	assert(!(Remainder && useUnscaledOp) &&
5712	"Cannot have remainder when using unscaled op");
5713
5714	assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
5715	int64_t NewOffset = Offset / Scale;
5716	if (MinOff <= NewOffset && NewOffset <= MaxOff)
5717	Offset = Remainder;
5718	else {
5719	NewOffset = NewOffset < `0` ? MinOff : MaxOff;
5720	Offset = Offset - NewOffset * Scale;
5721	}
5722
5723	if (EmittableOffset)
5724	*EmittableOffset = NewOffset;
5725	if (OutUseUnscaledOp)
5726	*OutUseUnscaledOp = useUnscaledOp;
5727	if (OutUnscaledOp && UnscaledOp)
5728	OutUnscaledOp = UnscaledOp;
5729
5730	if (IsMulVL)
5731	SOffset = StackOffset::get(Fixed: SOffset.getFixed(), Scalable: Offset);
5732	else
5733	SOffset = StackOffset::get(Fixed: Offset, Scalable: SOffset.getScalable());
5734	return AArch64FrameOffsetCanUpdate \|
5735	(SOffset ? `0` : AArch64FrameOffsetIsLegal);
5736	}
5737
5738	bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
5739	unsigned FrameReg, StackOffset &Offset,
5740	const AArch64InstrInfo *TII) {
5741	unsigned Opcode = MI.getOpcode();
5742	unsigned ImmIdx = FrameRegIdx + `1`;
5743
5744	if (Opcode == AArch64::ADDSXri \|\| Opcode == AArch64::ADDXri) {
5745	Offset += StackOffset::getFixed(Fixed: MI.getOperand(i: ImmIdx).getImm());
5746	emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
5747	MI.getOperand(`0`).getReg(), FrameReg, Offset, TII,
5748	MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
5749	MI.eraseFromParent();
5750	Offset = StackOffset ();
5751	return true;
5752	}
5753
5754	int64_t NewOffset;
5755	unsigned UnscaledOp;
5756	bool UseUnscaledOp;
5757	int Status = isAArch64FrameOffsetLegal(MI, SOffset&: Offset, OutUseUnscaledOp: &UseUnscaledOp,
5758	OutUnscaledOp: &UnscaledOp, EmittableOffset: &NewOffset);
5759	if (Status & AArch64FrameOffsetCanUpdate) {
5760	if (Status & AArch64FrameOffsetIsLegal)
5761	// Replace the FrameIndex with FrameReg.
5762	MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false);
5763	if (UseUnscaledOp)
5764	MI.setDesc(TII->get(UnscaledOp));
5765
5766	MI.getOperand(i: ImmIdx).ChangeToImmediate(ImmVal: NewOffset);
5767	return !Offset;
5768	}
5769
5770	return false;
5771	}
5772
5773	void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB,
5774	MachineBasicBlock::iterator MI) const {
5775	DebugLoc DL;
5776	BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(`0`);
5777	}
5778
5779	MCInst AArch64InstrInfo::getNop() const {
5780	return MCInstBuilder(AArch64::HINT).addImm(`0`);
5781	}
5782
5783	// AArch64 supports MachineCombiner.
5784	bool AArch64InstrInfo::useMachineCombiner() const { return true; }
5785
5786	// True when Opc sets flag
5787	static bool isCombineInstrSettingFlag(unsigned Opc) {
5788	switch (Opc) {
5789	case AArch64::ADDSWrr:
5790	case AArch64::ADDSWri:
5791	case AArch64::ADDSXrr:
5792	case AArch64::ADDSXri:
5793	case AArch64::SUBSWrr:
5794	case AArch64::SUBSXrr:
5795	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5796	case AArch64::SUBSWri:
5797	case AArch64::SUBSXri:
5798	return true;
5799	default:
5800	break;
5801	}
5802	return false;
5803	}
5804
5805	// 32b Opcodes that can be combined with a MUL
5806	static bool isCombineInstrCandidate32(unsigned Opc) {
5807	switch (Opc) {
5808	case AArch64::ADDWrr:
5809	case AArch64::ADDWri:
5810	case AArch64::SUBWrr:
5811	case AArch64::ADDSWrr:
5812	case AArch64::ADDSWri:
5813	case AArch64::SUBSWrr:
5814	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5815	case AArch64::SUBWri:
5816	case AArch64::SUBSWri:
5817	return true;
5818	default:
5819	break;
5820	}
5821	return false;
5822	}
5823
5824	// 64b Opcodes that can be combined with a MUL
5825	static bool isCombineInstrCandidate64(unsigned Opc) {
5826	switch (Opc) {
5827	case AArch64::ADDXrr:
5828	case AArch64::ADDXri:
5829	case AArch64::SUBXrr:
5830	case AArch64::ADDSXrr:
5831	case AArch64::ADDSXri:
5832	case AArch64::SUBSXrr:
5833	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5834	case AArch64::SUBXri:
5835	case AArch64::SUBSXri:
5836	case AArch64::ADDv8i8:
5837	case AArch64::ADDv16i8:
5838	case AArch64::ADDv4i16:
5839	case AArch64::ADDv8i16:
5840	case AArch64::ADDv2i32:
5841	case AArch64::ADDv4i32:
5842	case AArch64::SUBv8i8:
5843	case AArch64::SUBv16i8:
5844	case AArch64::SUBv4i16:
5845	case AArch64::SUBv8i16:
5846	case AArch64::SUBv2i32:
5847	case AArch64::SUBv4i32:
5848	return true;
5849	default:
5850	break;
5851	}
5852	return false;
5853	}
5854
5855	// FP Opcodes that can be combined with a FMUL.
5856	static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
5857	switch (Inst.getOpcode()) {
5858	default:
5859	break;
5860	case AArch64::FADDHrr:
5861	case AArch64::FADDSrr:
5862	case AArch64::FADDDrr:
5863	case AArch64::FADDv4f16:
5864	case AArch64::FADDv8f16:
5865	case AArch64::FADDv2f32:
5866	case AArch64::FADDv2f64:
5867	case AArch64::FADDv4f32:
5868	case AArch64::FSUBHrr:
5869	case AArch64::FSUBSrr:
5870	case AArch64::FSUBDrr:
5871	case AArch64::FSUBv4f16:
5872	case AArch64::FSUBv8f16:
5873	case AArch64::FSUBv2f32:
5874	case AArch64::FSUBv2f64:
5875	case AArch64::FSUBv4f32:
5876	TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
5877	// We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
5878	// the target options or if FADD/FSUB has the contract fast-math flag.
5879	return Options.UnsafeFPMath \|\|
5880	Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
5881	Inst.getFlag(Flag: MachineInstr::FmContract);
5882	return true;
5883	}
5884	return false;
5885	}
5886
5887	// Opcodes that can be combined with a MUL
5888	static bool isCombineInstrCandidate(unsigned Opc) {
5889	return (isCombineInstrCandidate32(Opc) \|\| isCombineInstrCandidate64(Opc));
5890	}
5891
5892	//
5893	// Utility routine that checks if \param MO is defined by an
5894	// \param CombineOpc instruction in the basic block \param MBB
5895	static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
5896	unsigned CombineOpc, unsigned ZeroReg = `0`,
5897	bool CheckZeroReg = false) {
5898	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
5899	MachineInstr MI = nullptr*;
5900
5901	if (MO.isReg() && MO.getReg().isVirtual())
5902	MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
5903	// And it needs to be in the trace (otherwise, it won't have a depth).
5904	if (!MI \|\| MI->getParent() != &MBB \|\| (unsigned)MI->getOpcode() != CombineOpc)
5905	return false;
5906	// Must only used by the user we combine with.
5907	if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
5908	return false;
5909
5910	if (CheckZeroReg) {
5911	assert(MI->getNumOperands() >= `4` && MI->getOperand(`0`).isReg() &&
5912	MI->getOperand(`1`).isReg() && MI->getOperand(`2`).isReg() &&
5913	MI->getOperand(`3`).isReg() && "MAdd/MSub must have a least 4 regs");
5914	// The third input reg must be zero.
5915	if (MI->getOperand(i: `3`).getReg() != ZeroReg)
5916	return false;
5917	}
5918
5919	if (isCombineInstrSettingFlag(CombineOpc) &&
5920	MI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true) == -`1`)
5921	return false;
5922
5923	return true;
5924	}
5925
5926	//
5927	// Is \param MO defined by an integer multiply and can be combined?
5928	static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
5929	unsigned MulOpc, unsigned ZeroReg) {
5930	return canCombine(MBB, MO, CombineOpc: MulOpc, ZeroReg, CheckZeroReg: true);
5931	}
5932
5933	//
5934	// Is \param MO defined by a floating-point multiply and can be combined?
5935	static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
5936	unsigned MulOpc) {
5937	return canCombine(MBB, MO, CombineOpc: MulOpc);
5938	}
5939
5940	// TODO: There are many more machine instruction opcodes to match:
5941	// 1. Other data types (integer, vectors)
5942	// 2. Other math / logic operations (xor, or)
5943	// 3. Other forms of the same operation (intrinsics and other variants)
5944	bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
5945	bool Invert) const {
5946	if (Invert)
5947	return false;
5948	switch (Inst.getOpcode()) {
5949	// == Floating-point types ==
5950	// -- Floating-point instructions --
5951	case AArch64::FADDHrr:
5952	case AArch64::FADDSrr:
5953	case AArch64::FADDDrr:
5954	case AArch64::FMULHrr:
5955	case AArch64::FMULSrr:
5956	case AArch64::FMULDrr:
5957	case AArch64::FMULX16:
5958	case AArch64::FMULX32:
5959	case AArch64::FMULX64:
5960	// -- Advanced SIMD instructions --
5961	case AArch64::FADDv4f16:
5962	case AArch64::FADDv8f16:
5963	case AArch64::FADDv2f32:
5964	case AArch64::FADDv4f32:
5965	case AArch64::FADDv2f64:
5966	case AArch64::FMULv4f16:
5967	case AArch64::FMULv8f16:
5968	case AArch64::FMULv2f32:
5969	case AArch64::FMULv4f32:
5970	case AArch64::FMULv2f64:
5971	case AArch64::FMULXv4f16:
5972	case AArch64::FMULXv8f16:
5973	case AArch64::FMULXv2f32:
5974	case AArch64::FMULXv4f32:
5975	case AArch64::FMULXv2f64:
5976	// -- SVE instructions --
5977	// Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
5978	// in the SVE instruction set (though there are predicated ones).
5979	case AArch64::FADD_ZZZ_H:
5980	case AArch64::FADD_ZZZ_S:
5981	case AArch64::FADD_ZZZ_D:
5982	case AArch64::FMUL_ZZZ_H:
5983	case AArch64::FMUL_ZZZ_S:
5984	case AArch64::FMUL_ZZZ_D:
5985	return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath \|\|
5986	(Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
5987	Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz));
5988
5989	// == Integer types ==
5990	// -- Base instructions --
5991	// Opcodes MULWrr and MULXrr don't exist because
5992	// `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
5993	// `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
5994	// The machine-combiner does not support three-source-operands machine
5995	// instruction. So we cannot reassociate MULs.
5996	case AArch64::ADDWrr:
5997	case AArch64::ADDXrr:
5998	case AArch64::ANDWrr:
5999	case AArch64::ANDXrr:
6000	case AArch64::ORRWrr:
6001	case AArch64::ORRXrr:
6002	case AArch64::EORWrr:
6003	case AArch64::EORXrr:
6004	case AArch64::EONWrr:
6005	case AArch64::EONXrr:
6006	// -- Advanced SIMD instructions --
6007	// Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6008	// in the Advanced SIMD instruction set.
6009	case AArch64::ADDv8i8:
6010	case AArch64::ADDv16i8:
6011	case AArch64::ADDv4i16:
6012	case AArch64::ADDv8i16:
6013	case AArch64::ADDv2i32:
6014	case AArch64::ADDv4i32:
6015	case AArch64::ADDv1i64:
6016	case AArch64::ADDv2i64:
6017	case AArch64::MULv8i8:
6018	case AArch64::MULv16i8:
6019	case AArch64::MULv4i16:
6020	case AArch64::MULv8i16:
6021	case AArch64::MULv2i32:
6022	case AArch64::MULv4i32:
6023	case AArch64::ANDv8i8:
6024	case AArch64::ANDv16i8:
6025	case AArch64::ORRv8i8:
6026	case AArch64::ORRv16i8:
6027	case AArch64::EORv8i8:
6028	case AArch64::EORv16i8:
6029	// -- SVE instructions --
6030	case AArch64::ADD_ZZZ_B:
6031	case AArch64::ADD_ZZZ_H:
6032	case AArch64::ADD_ZZZ_S:
6033	case AArch64::ADD_ZZZ_D:
6034	case AArch64::MUL_ZZZ_B:
6035	case AArch64::MUL_ZZZ_H:
6036	case AArch64::MUL_ZZZ_S:
6037	case AArch64::MUL_ZZZ_D:
6038	case AArch64::AND_ZZZ:
6039	case AArch64::ORR_ZZZ:
6040	case AArch64::EOR_ZZZ:
6041	return true;
6042
6043	default:
6044	return false;
6045	}
6046	}
6047
6048	/// Find instructions that can be turned into madd.
6049	static bool getMaddPatterns(MachineInstr &Root,
6050	SmallVectorImpl<unsigned> &Patterns) {
6051	unsigned Opc = Root.getOpcode();
6052	MachineBasicBlock &MBB = *Root.getParent();
6053	bool Found = false;
6054
6055	if (!isCombineInstrCandidate(Opc))
6056	return false;
6057	if (isCombineInstrSettingFlag(Opc)) {
6058	int Cmp_NZCV =
6059	Root.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true);
6060	// When NZCV is live bail out.
6061	if (Cmp_NZCV == -`1`)
6062	return false;
6063	unsigned NewOpc = convertToNonFlagSettingOpc(MI: Root);
6064	// When opcode can't change bail out.
6065	// CHECKME: do we miss any cases for opcode conversion?
6066	if (NewOpc == Opc)
6067	return false;
6068	Opc = NewOpc;
6069	}
6070
6071	auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6072	unsigned Pattern) {
6073	if (canCombineWithMUL(MBB, MO&: Root.getOperand(i: Operand), MulOpc: Opcode, ZeroReg)) {
6074	Patterns.push_back(Elt: Pattern);
6075	Found = true;
6076	}
6077	};
6078
6079	auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6080	if (canCombine(MBB, MO&: Root.getOperand(i: Operand), CombineOpc: Opcode)) {
6081	Patterns.push_back(Elt: Pattern);
6082	Found = true;
6083	}
6084	};
6085
6086	typedef AArch64MachineCombinerPattern MCP;
6087
6088	switch (Opc) {
6089	default:
6090	break;
6091	case AArch64::ADDWrr:
6092	assert(Root.getOperand(`1`).isReg() && Root.getOperand(`2`).isReg() &&
6093	"ADDWrr does not have register operands");
6094	setFound(AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULADDW_OP1);
6095	setFound(AArch64::MADDWrrr, `2`, AArch64::WZR, MCP::MULADDW_OP2);
6096	break;
6097	case AArch64::ADDXrr:
6098	setFound(AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULADDX_OP1);
6099	setFound(AArch64::MADDXrrr, `2`, AArch64::XZR, MCP::MULADDX_OP2);
6100	break;
6101	case AArch64::SUBWrr:
6102	setFound(AArch64::MADDWrrr, `2`, AArch64::WZR, MCP::MULSUBW_OP2);
6103	setFound(AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULSUBW_OP1);
6104	break;
6105	case AArch64::SUBXrr:
6106	setFound(AArch64::MADDXrrr, `2`, AArch64::XZR, MCP::MULSUBX_OP2);
6107	setFound(AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULSUBX_OP1);
6108	break;
6109	case AArch64::ADDWri:
6110	setFound(AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULADDWI_OP1);
6111	break;
6112	case AArch64::ADDXri:
6113	setFound(AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULADDXI_OP1);
6114	break;
6115	case AArch64::SUBWri:
6116	setFound(AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULSUBWI_OP1);
6117	break;
6118	case AArch64::SUBXri:
6119	setFound(AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULSUBXI_OP1);
6120	break;
6121	case AArch64::ADDv8i8:
6122	setVFound(AArch64::MULv8i8, `1`, MCP::MULADDv8i8_OP1);
6123	setVFound(AArch64::MULv8i8, `2`, MCP::MULADDv8i8_OP2);
6124	break;
6125	case AArch64::ADDv16i8:
6126	setVFound(AArch64::MULv16i8, `1`, MCP::MULADDv16i8_OP1);
6127	setVFound(AArch64::MULv16i8, `2`, MCP::MULADDv16i8_OP2);
6128	break;
6129	case AArch64::ADDv4i16:
6130	setVFound(AArch64::MULv4i16, `1`, MCP::MULADDv4i16_OP1);
6131	setVFound(AArch64::MULv4i16, `2`, MCP::MULADDv4i16_OP2);
6132	setVFound(AArch64::MULv4i16_indexed, `1`, MCP::MULADDv4i16_indexed_OP1);
6133	setVFound(AArch64::MULv4i16_indexed, `2`, MCP::MULADDv4i16_indexed_OP2);
6134	break;
6135	case AArch64::ADDv8i16:
6136	setVFound(AArch64::MULv8i16, `1`, MCP::MULADDv8i16_OP1);
6137	setVFound(AArch64::MULv8i16, `2`, MCP::MULADDv8i16_OP2);
6138	setVFound(AArch64::MULv8i16_indexed, `1`, MCP::MULADDv8i16_indexed_OP1);
6139	setVFound(AArch64::MULv8i16_indexed, `2`, MCP::MULADDv8i16_indexed_OP2);
6140	break;
6141	case AArch64::ADDv2i32:
6142	setVFound(AArch64::MULv2i32, `1`, MCP::MULADDv2i32_OP1);
6143	setVFound(AArch64::MULv2i32, `2`, MCP::MULADDv2i32_OP2);
6144	setVFound(AArch64::MULv2i32_indexed, `1`, MCP::MULADDv2i32_indexed_OP1);
6145	setVFound(AArch64::MULv2i32_indexed, `2`, MCP::MULADDv2i32_indexed_OP2);
6146	break;
6147	case AArch64::ADDv4i32:
6148	setVFound(AArch64::MULv4i32, `1`, MCP::MULADDv4i32_OP1);
6149	setVFound(AArch64::MULv4i32, `2`, MCP::MULADDv4i32_OP2);
6150	setVFound(AArch64::MULv4i32_indexed, `1`, MCP::MULADDv4i32_indexed_OP1);
6151	setVFound(AArch64::MULv4i32_indexed, `2`, MCP::MULADDv4i32_indexed_OP2);
6152	break;
6153	case AArch64::SUBv8i8:
6154	setVFound(AArch64::MULv8i8, `1`, MCP::MULSUBv8i8_OP1);
6155	setVFound(AArch64::MULv8i8, `2`, MCP::MULSUBv8i8_OP2);
6156	break;
6157	case AArch64::SUBv16i8:
6158	setVFound(AArch64::MULv16i8, `1`, MCP::MULSUBv16i8_OP1);
6159	setVFound(AArch64::MULv16i8, `2`, MCP::MULSUBv16i8_OP2);
6160	break;
6161	case AArch64::SUBv4i16:
6162	setVFound(AArch64::MULv4i16, `1`, MCP::MULSUBv4i16_OP1);
6163	setVFound(AArch64::MULv4i16, `2`, MCP::MULSUBv4i16_OP2);
6164	setVFound(AArch64::MULv4i16_indexed, `1`, MCP::MULSUBv4i16_indexed_OP1);
6165	setVFound(AArch64::MULv4i16_indexed, `2`, MCP::MULSUBv4i16_indexed_OP2);
6166	break;
6167	case AArch64::SUBv8i16:
6168	setVFound(AArch64::MULv8i16, `1`, MCP::MULSUBv8i16_OP1);
6169	setVFound(AArch64::MULv8i16, `2`, MCP::MULSUBv8i16_OP2);
6170	setVFound(AArch64::MULv8i16_indexed, `1`, MCP::MULSUBv8i16_indexed_OP1);
6171	setVFound(AArch64::MULv8i16_indexed, `2`, MCP::MULSUBv8i16_indexed_OP2);
6172	break;
6173	case AArch64::SUBv2i32:
6174	setVFound(AArch64::MULv2i32, `1`, MCP::MULSUBv2i32_OP1);
6175	setVFound(AArch64::MULv2i32, `2`, MCP::MULSUBv2i32_OP2);
6176	setVFound(AArch64::MULv2i32_indexed, `1`, MCP::MULSUBv2i32_indexed_OP1);
6177	setVFound(AArch64::MULv2i32_indexed, `2`, MCP::MULSUBv2i32_indexed_OP2);
6178	break;
6179	case AArch64::SUBv4i32:
6180	setVFound(AArch64::MULv4i32, `1`, MCP::MULSUBv4i32_OP1);
6181	setVFound(AArch64::MULv4i32, `2`, MCP::MULSUBv4i32_OP2);
6182	setVFound(AArch64::MULv4i32_indexed, `1`, MCP::MULSUBv4i32_indexed_OP1);
6183	setVFound(AArch64::MULv4i32_indexed, `2`, MCP::MULSUBv4i32_indexed_OP2);
6184	break;
6185	}
6186	return Found;
6187	}
6188	/// Floating-Point Support
6189
6190	/// Find instructions that can be turned into madd.
6191	static bool getFMAPatterns(MachineInstr &Root,
6192	SmallVectorImpl<unsigned> &Patterns) {
6193
6194	if (!isCombineInstrCandidateFP(Inst: Root))
6195	return false;
6196
6197	MachineBasicBlock &MBB = *Root.getParent();
6198	bool Found = false;
6199
6200	auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6201	if (canCombineWithFMUL(MBB, MO&: Root.getOperand(i: Operand), MulOpc: Opcode)) {
6202	Patterns.push_back(Elt: Pattern);
6203	return true;
6204	}
6205	return false;
6206	};
6207
6208	typedef AArch64MachineCombinerPattern MCP;
6209
6210	switch (Root.getOpcode()) {
6211	default:
6212	assert(false && "Unsupported FP instruction in combiner\n");
6213	break;
6214	case AArch64::FADDHrr:
6215	assert(Root.getOperand(`1`).isReg() && Root.getOperand(`2`).isReg() &&
6216	"FADDHrr does not have register operands");
6217
6218	Found = Match(AArch64::FMULHrr, `1`, MCP::FMULADDH_OP1);
6219	Found \|= Match(AArch64::FMULHrr, `2`, MCP::FMULADDH_OP2);
6220	break;
6221	case AArch64::FADDSrr:
6222	assert(Root.getOperand(`1`).isReg() && Root.getOperand(`2`).isReg() &&
6223	"FADDSrr does not have register operands");
6224
6225	Found \|= Match(AArch64::FMULSrr, `1`, MCP::FMULADDS_OP1) \|\|
6226	Match(AArch64::FMULv1i32_indexed, `1`, MCP::FMLAv1i32_indexed_OP1);
6227
6228	Found \|= Match(AArch64::FMULSrr, `2`, MCP::FMULADDS_OP2) \|\|
6229	Match(AArch64::FMULv1i32_indexed, `2`, MCP::FMLAv1i32_indexed_OP2);
6230	break;
6231	case AArch64::FADDDrr:
6232	Found \|= Match(AArch64::FMULDrr, `1`, MCP::FMULADDD_OP1) \|\|
6233	Match(AArch64::FMULv1i64_indexed, `1`, MCP::FMLAv1i64_indexed_OP1);
6234
6235	Found \|= Match(AArch64::FMULDrr, `2`, MCP::FMULADDD_OP2) \|\|
6236	Match(AArch64::FMULv1i64_indexed, `2`, MCP::FMLAv1i64_indexed_OP2);
6237	break;
6238	case AArch64::FADDv4f16:
6239	Found \|= Match(AArch64::FMULv4i16_indexed, `1`, MCP::FMLAv4i16_indexed_OP1) \|\|
6240	Match(AArch64::FMULv4f16, `1`, MCP::FMLAv4f16_OP1);
6241
6242	Found \|= Match(AArch64::FMULv4i16_indexed, `2`, MCP::FMLAv4i16_indexed_OP2) \|\|
6243	Match(AArch64::FMULv4f16, `2`, MCP::FMLAv4f16_OP2);
6244	break;
6245	case AArch64::FADDv8f16:
6246	Found \|= Match(AArch64::FMULv8i16_indexed, `1`, MCP::FMLAv8i16_indexed_OP1) \|\|
6247	Match(AArch64::FMULv8f16, `1`, MCP::FMLAv8f16_OP1);
6248
6249	Found \|= Match(AArch64::FMULv8i16_indexed, `2`, MCP::FMLAv8i16_indexed_OP2) \|\|
6250	Match(AArch64::FMULv8f16, `2`, MCP::FMLAv8f16_OP2);
6251	break;
6252	case AArch64::FADDv2f32:
6253	Found \|= Match(AArch64::FMULv2i32_indexed, `1`, MCP::FMLAv2i32_indexed_OP1) \|\|
6254	Match(AArch64::FMULv2f32, `1`, MCP::FMLAv2f32_OP1);
6255
6256	Found \|= Match(AArch64::FMULv2i32_indexed, `2`, MCP::FMLAv2i32_indexed_OP2) \|\|
6257	Match(AArch64::FMULv2f32, `2`, MCP::FMLAv2f32_OP2);
6258	break;
6259	case AArch64::FADDv2f64:
6260	Found \|= Match(AArch64::FMULv2i64_indexed, `1`, MCP::FMLAv2i64_indexed_OP1) \|\|
6261	Match(AArch64::FMULv2f64, `1`, MCP::FMLAv2f64_OP1);
6262
6263	Found \|= Match(AArch64::FMULv2i64_indexed, `2`, MCP::FMLAv2i64_indexed_OP2) \|\|
6264	Match(AArch64::FMULv2f64, `2`, MCP::FMLAv2f64_OP2);
6265	break;
6266	case AArch64::FADDv4f32:
6267	Found \|= Match(AArch64::FMULv4i32_indexed, `1`, MCP::FMLAv4i32_indexed_OP1) \|\|
6268	Match(AArch64::FMULv4f32, `1`, MCP::FMLAv4f32_OP1);
6269
6270	Found \|= Match(AArch64::FMULv4i32_indexed, `2`, MCP::FMLAv4i32_indexed_OP2) \|\|
6271	Match(AArch64::FMULv4f32, `2`, MCP::FMLAv4f32_OP2);
6272	break;
6273	case AArch64::FSUBHrr:
6274	Found = Match(AArch64::FMULHrr, `1`, MCP::FMULSUBH_OP1);
6275	Found \|= Match(AArch64::FMULHrr, `2`, MCP::FMULSUBH_OP2);
6276	Found \|= Match(AArch64::FNMULHrr, `1`, MCP::FNMULSUBH_OP1);
6277	break;
6278	case AArch64::FSUBSrr:
6279	Found = Match(AArch64::FMULSrr, `1`, MCP::FMULSUBS_OP1);
6280
6281	Found \|= Match(AArch64::FMULSrr, `2`, MCP::FMULSUBS_OP2) \|\|
6282	Match(AArch64::FMULv1i32_indexed, `2`, MCP::FMLSv1i32_indexed_OP2);
6283
6284	Found \|= Match(AArch64::FNMULSrr, `1`, MCP::FNMULSUBS_OP1);
6285	break;
6286	case AArch64::FSUBDrr:
6287	Found = Match(AArch64::FMULDrr, `1`, MCP::FMULSUBD_OP1);
6288
6289	Found \|= Match(AArch64::FMULDrr, `2`, MCP::FMULSUBD_OP2) \|\|
6290	Match(AArch64::FMULv1i64_indexed, `2`, MCP::FMLSv1i64_indexed_OP2);
6291
6292	Found \|= Match(AArch64::FNMULDrr, `1`, MCP::FNMULSUBD_OP1);
6293	break;
6294	case AArch64::FSUBv4f16:
6295	Found \|= Match(AArch64::FMULv4i16_indexed, `2`, MCP::FMLSv4i16_indexed_OP2) \|\|
6296	Match(AArch64::FMULv4f16, `2`, MCP::FMLSv4f16_OP2);
6297
6298	Found \|= Match(AArch64::FMULv4i16_indexed, `1`, MCP::FMLSv4i16_indexed_OP1) \|\|
6299	Match(AArch64::FMULv4f16, `1`, MCP::FMLSv4f16_OP1);
6300	break;
6301	case AArch64::FSUBv8f16:
6302	Found \|= Match(AArch64::FMULv8i16_indexed, `2`, MCP::FMLSv8i16_indexed_OP2) \|\|
6303	Match(AArch64::FMULv8f16, `2`, MCP::FMLSv8f16_OP2);
6304
6305	Found \|= Match(AArch64::FMULv8i16_indexed, `1`, MCP::FMLSv8i16_indexed_OP1) \|\|
6306	Match(AArch64::FMULv8f16, `1`, MCP::FMLSv8f16_OP1);
6307	break;
6308	case AArch64::FSUBv2f32:
6309	Found \|= Match(AArch64::FMULv2i32_indexed, `2`, MCP::FMLSv2i32_indexed_OP2) \|\|
6310	Match(AArch64::FMULv2f32, `2`, MCP::FMLSv2f32_OP2);
6311
6312	Found \|= Match(AArch64::FMULv2i32_indexed, `1`, MCP::FMLSv2i32_indexed_OP1) \|\|
6313	Match(AArch64::FMULv2f32, `1`, MCP::FMLSv2f32_OP1);
6314	break;
6315	case AArch64::FSUBv2f64:
6316	Found \|= Match(AArch64::FMULv2i64_indexed, `2`, MCP::FMLSv2i64_indexed_OP2) \|\|
6317	Match(AArch64::FMULv2f64, `2`, MCP::FMLSv2f64_OP2);
6318
6319	Found \|= Match(AArch64::FMULv2i64_indexed, `1`, MCP::FMLSv2i64_indexed_OP1) \|\|
6320	Match(AArch64::FMULv2f64, `1`, MCP::FMLSv2f64_OP1);
6321	break;
6322	case AArch64::FSUBv4f32:
6323	Found \|= Match(AArch64::FMULv4i32_indexed, `2`, MCP::FMLSv4i32_indexed_OP2) \|\|
6324	Match(AArch64::FMULv4f32, `2`, MCP::FMLSv4f32_OP2);
6325
6326	Found \|= Match(AArch64::FMULv4i32_indexed, `1`, MCP::FMLSv4i32_indexed_OP1) \|\|
6327	Match(AArch64::FMULv4f32, `1`, MCP::FMLSv4f32_OP1);
6328	break;
6329	}
6330	return Found;
6331	}
6332
6333	static bool getFMULPatterns(MachineInstr &Root,
6334	SmallVectorImpl<unsigned> &Patterns) {
6335	MachineBasicBlock &MBB = *Root.getParent();
6336	bool Found = false;
6337
6338	auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6339	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6340	MachineOperand &MO = Root.getOperand(i: Operand);
6341	MachineInstr MI = nullptr*;
6342	if (MO.isReg() && MO.getReg().isVirtual())
6343	MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
6344	// Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6345	if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6346	MI->getOperand(i: `1`).getReg().isVirtual())
6347	MI = MRI.getUniqueVRegDef(Reg: MI->getOperand(i: `1`).getReg());
6348	if (MI && MI->getOpcode() == Opcode) {
6349	Patterns.push_back(Elt: Pattern);
6350	return true;
6351	}
6352	return false;
6353	};
6354
6355	typedef AArch64MachineCombinerPattern MCP;
6356
6357	switch (Root.getOpcode()) {
6358	default:
6359	return false;
6360	case AArch64::FMULv2f32:
6361	Found = Match(AArch64::DUPv2i32lane, `1`, MCP::FMULv2i32_indexed_OP1);
6362	Found \|= Match(AArch64::DUPv2i32lane, `2`, MCP::FMULv2i32_indexed_OP2);
6363	break;
6364	case AArch64::FMULv2f64:
6365	Found = Match(AArch64::DUPv2i64lane, `1`, MCP::FMULv2i64_indexed_OP1);
6366	Found \|= Match(AArch64::DUPv2i64lane, `2`, MCP::FMULv2i64_indexed_OP2);
6367	break;
6368	case AArch64::FMULv4f16:
6369	Found = Match(AArch64::DUPv4i16lane, `1`, MCP::FMULv4i16_indexed_OP1);
6370	Found \|= Match(AArch64::DUPv4i16lane, `2`, MCP::FMULv4i16_indexed_OP2);
6371	break;
6372	case AArch64::FMULv4f32:
6373	Found = Match(AArch64::DUPv4i32lane, `1`, MCP::FMULv4i32_indexed_OP1);
6374	Found \|= Match(AArch64::DUPv4i32lane, `2`, MCP::FMULv4i32_indexed_OP2);
6375	break;
6376	case AArch64::FMULv8f16:
6377	Found = Match(AArch64::DUPv8i16lane, `1`, MCP::FMULv8i16_indexed_OP1);
6378	Found \|= Match(AArch64::DUPv8i16lane, `2`, MCP::FMULv8i16_indexed_OP2);
6379	break;
6380	}
6381
6382	return Found;
6383	}
6384
6385	static bool getFNEGPatterns(MachineInstr &Root,
6386	SmallVectorImpl<unsigned> &Patterns) {
6387	unsigned Opc = Root.getOpcode();
6388	MachineBasicBlock &MBB = *Root.getParent();
6389	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6390
6391	auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6392	MachineOperand &MO = Root.getOperand(i: `1`);
6393	MachineInstr *MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
6394	if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6395	MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()) &&
6396	Root.getFlag(Flag: MachineInstr::MIFlag::FmContract) &&
6397	Root.getFlag(Flag: MachineInstr::MIFlag::FmNsz) &&
6398	MI->getFlag(Flag: MachineInstr::MIFlag::FmContract) &&
6399	MI->getFlag(Flag: MachineInstr::MIFlag::FmNsz)) {
6400	Patterns.push_back(Elt: Pattern);
6401	return true;
6402	}
6403	return false;
6404	};
6405
6406	switch (Opc) {
6407	default:
6408	break;
6409	case AArch64::FNEGDr:
6410	return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6411	case AArch64::FNEGSr:
6412	return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6413	}
6414
6415	return false;
6416	}
6417
6418	/// Return true when a code sequence can improve throughput. It
6419	/// should be called only for instructions in loops.
6420	/// \param Pattern - combiner pattern
6421	bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
6422	switch (Pattern) {
6423	default:
6424	break;
6425	case AArch64MachineCombinerPattern::FMULADDH_OP1:
6426	case AArch64MachineCombinerPattern::FMULADDH_OP2:
6427	case AArch64MachineCombinerPattern::FMULSUBH_OP1:
6428	case AArch64MachineCombinerPattern::FMULSUBH_OP2:
6429	case AArch64MachineCombinerPattern::FMULADDS_OP1:
6430	case AArch64MachineCombinerPattern::FMULADDS_OP2:
6431	case AArch64MachineCombinerPattern::FMULSUBS_OP1:
6432	case AArch64MachineCombinerPattern::FMULSUBS_OP2:
6433	case AArch64MachineCombinerPattern::FMULADDD_OP1:
6434	case AArch64MachineCombinerPattern::FMULADDD_OP2:
6435	case AArch64MachineCombinerPattern::FMULSUBD_OP1:
6436	case AArch64MachineCombinerPattern::FMULSUBD_OP2:
6437	case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
6438	case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
6439	case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
6440	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
6441	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
6442	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
6443	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
6444	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
6445	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
6446	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
6447	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
6448	case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
6449	case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
6450	case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
6451	case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
6452	case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
6453	case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
6454	case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
6455	case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
6456	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
6457	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
6458	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
6459	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
6460	case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
6461	case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
6462	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
6463	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
6464	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1:
6465	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
6466	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1:
6467	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
6468	case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
6469	case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
6470	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
6471	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
6472	case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
6473	case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
6474	case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
6475	case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
6476	case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
6477	case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
6478	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
6479	case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
6480	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
6481	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2:
6482	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
6483	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2:
6484	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
6485	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2:
6486	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
6487	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2:
6488	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
6489	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2:
6490	case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
6491	case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
6492	case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
6493	case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
6494	case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
6495	case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
6496	case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
6497	case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
6498	case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
6499	case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
6500	case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
6501	case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
6502	case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
6503	case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
6504	case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
6505	case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
6506	case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
6507	case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
6508	case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
6509	case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
6510	case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
6511	case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
6512	case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
6513	case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
6514	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
6515	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
6516	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
6517	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
6518	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
6519	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
6520	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
6521	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
6522	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
6523	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
6524	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
6525	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
6526	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
6527	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
6528	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
6529	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
6530	return true;
6531	} // end switch (Pattern)
6532	return false;
6533	}
6534
6535	/// Find other MI combine patterns.
6536	static bool getMiscPatterns(MachineInstr &Root,
6537	SmallVectorImpl<unsigned> &Patterns) {
6538	// A - (B + C) ==> (A - B) - C or (A - C) - B
6539	unsigned Opc = Root.getOpcode();
6540	MachineBasicBlock &MBB = *Root.getParent();
6541
6542	switch (Opc) {
6543	case AArch64::SUBWrr:
6544	case AArch64::SUBSWrr:
6545	case AArch64::SUBXrr:
6546	case AArch64::SUBSXrr:
6547	// Found candidate root.
6548	break;
6549	default:
6550	return false;
6551	}
6552
6553	if (isCombineInstrSettingFlag(Opc) &&
6554	Root.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true) ==
6555	-`1`)
6556	return false;
6557
6558	if (canCombine(MBB, Root.getOperand(`2`), AArch64::ADDWrr) \|\|
6559	canCombine(MBB, Root.getOperand(`2`), AArch64::ADDSWrr) \|\|
6560	canCombine(MBB, Root.getOperand(`2`), AArch64::ADDXrr) \|\|
6561	canCombine(MBB, Root.getOperand(`2`), AArch64::ADDSXrr)) {
6562	Patterns.push_back(Elt: AArch64MachineCombinerPattern::SUBADD_OP1);
6563	Patterns.push_back(Elt: AArch64MachineCombinerPattern::SUBADD_OP2);
6564	return true;
6565	}
6566
6567	return false;
6568	}
6569
6570	CombinerObjective
6571	AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
6572	switch (Pattern) {
6573	case AArch64MachineCombinerPattern::SUBADD_OP1:
6574	case AArch64MachineCombinerPattern::SUBADD_OP2:
6575	return CombinerObjective::MustReduceDepth;
6576	default:
6577	return TargetInstrInfo::getCombinerObjective(Pattern);
6578	}
6579	}
6580
6581	/// Return true when there is potentially a faster code sequence for an
6582	/// instruction chain ending in \p Root. All potential patterns are listed in
6583	/// the \p Pattern vector. Pattern should be sorted in priority order since the
6584	/// pattern evaluator stops checking as soon as it finds a faster sequence.
6585
6586	bool AArch64InstrInfo::getMachineCombinerPatterns(
6587	MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
6588	bool DoRegPressureReduce) const {
6589	// Integer patterns
6590	if (getMaddPatterns(Root, Patterns))
6591	return true;
6592	// Floating point patterns
6593	if (getFMULPatterns(Root, Patterns))
6594	return true;
6595	if (getFMAPatterns(Root, Patterns))
6596	return true;
6597	if (getFNEGPatterns(Root, Patterns))
6598	return true;
6599
6600	// Other patterns
6601	if (getMiscPatterns(Root, Patterns))
6602	return true;
6603
6604	return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
6605	DoRegPressureReduce);
6606	}
6607
6608	enum class FMAInstKind { Default, Indexed, Accumulator };
6609	/// genFusedMultiply - Generate fused multiply instructions.
6610	/// This function supports both integer and floating point instructions.
6611	/// A typical example:
6612	/// F\|MUL I=A,B,0
6613	/// F\|ADD R,I,C
6614	/// ==> F\|MADD R,A,B,C
6615	/// \param MF Containing MachineFunction
6616	/// \param MRI Register information
6617	/// \param TII Target information
6618	/// \param Root is the F\|ADD instruction
6619	/// \param [out] InsInstrs is a vector of machine instructions and will
6620	/// contain the generated madd instruction
6621	/// \param IdxMulOpd is index of operand in Root that is the result of
6622	/// the F\|MUL. In the example above IdxMulOpd is 1.
6623	/// \param MaddOpc the opcode fo the f\|madd instruction
6624	/// \param RC Register class of operands
6625	/// \param kind of fma instruction (addressing mode) to be generated
6626	/// \param ReplacedAddend is the result register from the instruction
6627	/// replacing the non-combined operand, if any.
6628	static MachineInstr *
6629	genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
6630	const TargetInstrInfo *TII, MachineInstr &Root,
6631	SmallVectorImpl<MachineInstr > &InsInstrs, unsigned* IdxMulOpd,
6632	unsigned MaddOpc, const TargetRegisterClass *RC,
6633	FMAInstKind kind = FMAInstKind::Default,
6634	const Register ReplacedAddend = nullptr*) {
6635	assert(IdxMulOpd == `1` \|\| IdxMulOpd == `2`);
6636
6637	unsigned IdxOtherOpd = IdxMulOpd == `1` ? `2` : `1`;
6638	MachineInstr *MUL = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: IdxMulOpd).getReg());
6639	Register ResultReg = Root.getOperand(i: `0`).getReg();
6640	Register SrcReg0 = MUL->getOperand(i: `1`).getReg();
6641	bool Src0IsKill = MUL->getOperand(i: `1`).isKill();
6642	Register SrcReg1 = MUL->getOperand(i: `2`).getReg();
6643	bool Src1IsKill = MUL->getOperand(i: `2`).isKill();
6644
6645	Register SrcReg2;
6646	bool Src2IsKill;
6647	if (ReplacedAddend) {
6648	// If we just generated a new addend, we must be it's only use.
6649	SrcReg2 = *ReplacedAddend;
6650	Src2IsKill = true;
6651	} else {
6652	SrcReg2 = Root.getOperand(i: IdxOtherOpd).getReg();
6653	Src2IsKill = Root.getOperand(i: IdxOtherOpd).isKill();
6654	}
6655
6656	if (ResultReg.isVirtual())
6657	MRI.constrainRegClass(Reg: ResultReg, RC);
6658	if (SrcReg0.isVirtual())
6659	MRI.constrainRegClass(Reg: SrcReg0, RC);
6660	if (SrcReg1.isVirtual())
6661	MRI.constrainRegClass(Reg: SrcReg1, RC);
6662	if (SrcReg2.isVirtual())
6663	MRI.constrainRegClass(Reg: SrcReg2, RC);
6664
6665	MachineInstrBuilder MIB;
6666	if (kind == FMAInstKind::Default)
6667	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
6668	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
6669	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
6670	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill));
6671	else if (kind == FMAInstKind::Indexed)
6672	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
6673	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill))
6674	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
6675	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
6676	.addImm(Val: MUL->getOperand(i: `3`).getImm());
6677	else if (kind == FMAInstKind::Accumulator)
6678	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
6679	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill))
6680	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
6681	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill));
6682	else
6683	assert(false && "Invalid FMA instruction kind \n");
6684	// Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
6685	InsInstrs.push_back(Elt: MIB);
6686	return MUL;
6687	}
6688
6689	static MachineInstr *
6690	genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI,
6691	const TargetInstrInfo *TII, MachineInstr &Root,
6692	SmallVectorImpl<MachineInstr *> &InsInstrs) {
6693	MachineInstr *MAD = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: `1`).getReg());
6694
6695	unsigned Opc = `0`;
6696	const TargetRegisterClass *RC = MRI.getRegClass(Reg: MAD->getOperand(i: `0`).getReg());
6697	if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6698	Opc = AArch64::FNMADDSrrr;
6699	else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
6700	Opc = AArch64::FNMADDDrrr;
6701	else
6702	return nullptr;
6703
6704	Register ResultReg = Root.getOperand(i: `0`).getReg();
6705	Register SrcReg0 = MAD->getOperand(i: `1`).getReg();
6706	Register SrcReg1 = MAD->getOperand(i: `2`).getReg();
6707	Register SrcReg2 = MAD->getOperand(i: `3`).getReg();
6708	bool Src0IsKill = MAD->getOperand(i: `1`).isKill();
6709	bool Src1IsKill = MAD->getOperand(i: `2`).isKill();
6710	bool Src2IsKill = MAD->getOperand(i: `3`).isKill();
6711	if (ResultReg.isVirtual())
6712	MRI.constrainRegClass(Reg: ResultReg, RC);
6713	if (SrcReg0.isVirtual())
6714	MRI.constrainRegClass(Reg: SrcReg0, RC);
6715	if (SrcReg1.isVirtual())
6716	MRI.constrainRegClass(Reg: SrcReg1, RC);
6717	if (SrcReg2.isVirtual())
6718	MRI.constrainRegClass(Reg: SrcReg2, RC);
6719
6720	MachineInstrBuilder MIB =
6721	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: Opc), DestReg: ResultReg)
6722	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
6723	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
6724	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill));
6725	InsInstrs.push_back(Elt: MIB);
6726
6727	return MAD;
6728	}
6729
6730	/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
6731	static MachineInstr *
6732	genIndexedMultiply(MachineInstr &Root,
6733	SmallVectorImpl<MachineInstr *> &InsInstrs,
6734	unsigned IdxDupOp, unsigned MulOpc,
6735	const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
6736	assert(((IdxDupOp == `1`) \|\| (IdxDupOp == `2`)) &&
6737	"Invalid index of FMUL operand");
6738
6739	MachineFunction &MF = *Root.getMF();
6740	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
6741
6742	MachineInstr *Dup =
6743	MF.getRegInfo().getUniqueVRegDef(Reg: Root.getOperand(i: IdxDupOp).getReg());
6744
6745	if (Dup->getOpcode() == TargetOpcode::COPY)
6746	Dup = MRI.getUniqueVRegDef(Reg: Dup->getOperand(i: `1`).getReg());
6747
6748	Register DupSrcReg = Dup->getOperand(i: `1`).getReg();
6749	MRI.clearKillFlags(Reg: DupSrcReg);
6750	MRI.constrainRegClass(Reg: DupSrcReg, RC);
6751
6752	unsigned DupSrcLane = Dup->getOperand(i: `2`).getImm();
6753
6754	unsigned IdxMulOp = IdxDupOp == `1` ? `2` : `1`;
6755	MachineOperand &MulOp = Root.getOperand(i: IdxMulOp);
6756
6757	Register ResultReg = Root.getOperand(i: `0`).getReg();
6758
6759	MachineInstrBuilder MIB;
6760	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MulOpc), DestReg: ResultReg)
6761	.add(MO: MulOp)
6762	.addReg(RegNo: DupSrcReg)
6763	.addImm(Val: DupSrcLane);
6764
6765	InsInstrs.push_back(Elt: MIB);
6766	return &Root;
6767	}
6768
6769	/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
6770	/// instructions.
6771	///
6772	/// \see genFusedMultiply
6773	static MachineInstr *genFusedMultiplyAcc(
6774	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
6775	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
6776	unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6777	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6778	kind: FMAInstKind::Accumulator);
6779	}
6780
6781	/// genNeg - Helper to generate an intermediate negation of the second operand
6782	/// of Root
6783	static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
6784	const TargetInstrInfo *TII, MachineInstr &Root,
6785	SmallVectorImpl<MachineInstr *> &InsInstrs,
6786	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
6787	unsigned MnegOpc, const TargetRegisterClass *RC) {
6788	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
6789	MachineInstrBuilder MIB =
6790	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MnegOpc), DestReg: NewVR)
6791	.add(MO: Root.getOperand(i: `2`));
6792	InsInstrs.push_back(Elt: MIB);
6793
6794	assert(InstrIdxForVirtReg.empty());
6795	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
6796
6797	return NewVR;
6798	}
6799
6800	/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6801	/// instructions with an additional negation of the accumulator
6802	static MachineInstr *genFusedMultiplyAccNeg(
6803	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
6804	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
6805	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6806	unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6807	assert(IdxMulOpd == `1`);
6808
6809	Register NewVR =
6810	genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6811	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6812	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
6813	}
6814
6815	/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
6816	/// instructions.
6817	///
6818	/// \see genFusedMultiply
6819	static MachineInstr *genFusedMultiplyIdx(
6820	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
6821	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
6822	unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6823	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6824	kind: FMAInstKind::Indexed);
6825	}
6826
6827	/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6828	/// instructions with an additional negation of the accumulator
6829	static MachineInstr *genFusedMultiplyIdxNeg(
6830	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
6831	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
6832	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6833	unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6834	assert(IdxMulOpd == `1`);
6835
6836	Register NewVR =
6837	genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6838
6839	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6840	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
6841	}
6842
6843	/// genMaddR - Generate madd instruction and combine mul and add using
6844	/// an extra virtual register
6845	/// Example - an ADD intermediate needs to be stored in a register:
6846	/// MUL I=A,B,0
6847	/// ADD R,I,Imm
6848	/// ==> ORR V, ZR, Imm
6849	/// ==> MADD R,A,B,V
6850	/// \param MF Containing MachineFunction
6851	/// \param MRI Register information
6852	/// \param TII Target information
6853	/// \param Root is the ADD instruction
6854	/// \param [out] InsInstrs is a vector of machine instructions and will
6855	/// contain the generated madd instruction
6856	/// \param IdxMulOpd is index of operand in Root that is the result of
6857	/// the MUL. In the example above IdxMulOpd is 1.
6858	/// \param MaddOpc the opcode fo the madd instruction
6859	/// \param VR is a virtual register that holds the value of an ADD operand
6860	/// (V in the example above).
6861	/// \param RC Register class of operands
6862	static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
6863	const TargetInstrInfo *TII, MachineInstr &Root,
6864	SmallVectorImpl<MachineInstr *> &InsInstrs,
6865	unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
6866	const TargetRegisterClass *RC) {
6867	assert(IdxMulOpd == `1` \|\| IdxMulOpd == `2`);
6868
6869	MachineInstr *MUL = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: IdxMulOpd).getReg());
6870	Register ResultReg = Root.getOperand(i: `0`).getReg();
6871	Register SrcReg0 = MUL->getOperand(i: `1`).getReg();
6872	bool Src0IsKill = MUL->getOperand(i: `1`).isKill();
6873	Register SrcReg1 = MUL->getOperand(i: `2`).getReg();
6874	bool Src1IsKill = MUL->getOperand(i: `2`).isKill();
6875
6876	if (ResultReg.isVirtual())
6877	MRI.constrainRegClass(Reg: ResultReg, RC);
6878	if (SrcReg0.isVirtual())
6879	MRI.constrainRegClass(Reg: SrcReg0, RC);
6880	if (SrcReg1.isVirtual())
6881	MRI.constrainRegClass(Reg: SrcReg1, RC);
6882	if (Register::isVirtualRegister(Reg: VR))
6883	MRI.constrainRegClass(Reg: VR, RC);
6884
6885	MachineInstrBuilder MIB =
6886	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
6887	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
6888	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
6889	.addReg(RegNo: VR);
6890	// Insert the MADD
6891	InsInstrs.push_back(Elt: MIB);
6892	return MUL;
6893	}
6894
6895	/// Do the following transformation
6896	/// A - (B + C) ==> (A - B) - C
6897	/// A - (B + C) ==> (A - C) - B
6898	static void
6899	genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
6900	const TargetInstrInfo *TII, MachineInstr &Root,
6901	SmallVectorImpl<MachineInstr *> &InsInstrs,
6902	SmallVectorImpl<MachineInstr *> &DelInstrs,
6903	unsigned IdxOpd1,
6904	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6905	assert(IdxOpd1 == `1` \|\| IdxOpd1 == `2`);
6906	unsigned IdxOtherOpd = IdxOpd1 == `1` ? `2` : `1`;
6907	MachineInstr *AddMI = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: `2`).getReg());
6908
6909	Register ResultReg = Root.getOperand(i: `0`).getReg();
6910	Register RegA = Root.getOperand(i: `1`).getReg();
6911	bool RegAIsKill = Root.getOperand(i: `1`).isKill();
6912	Register RegB = AddMI->getOperand(i: IdxOpd1).getReg();
6913	bool RegBIsKill = AddMI->getOperand(i: IdxOpd1).isKill();
6914	Register RegC = AddMI->getOperand(i: IdxOtherOpd).getReg();
6915	bool RegCIsKill = AddMI->getOperand(i: IdxOtherOpd).isKill();
6916	Register NewVR = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: RegA));
6917
6918	unsigned Opcode = Root.getOpcode();
6919	if (Opcode == AArch64::SUBSWrr)
6920	Opcode = AArch64::SUBWrr;
6921	else if (Opcode == AArch64::SUBSXrr)
6922	Opcode = AArch64::SUBXrr;
6923	else
6924	assert((Opcode == AArch64::SUBWrr \|\| Opcode == AArch64::SUBXrr) &&
6925	"Unexpected instruction opcode.");
6926
6927	MachineInstrBuilder MIB1 =
6928	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode), DestReg: NewVR)
6929	.addReg(RegNo: RegA, flags: getKillRegState(B: RegAIsKill))
6930	.addReg(RegNo: RegB, flags: getKillRegState(B: RegBIsKill));
6931	MachineInstrBuilder MIB2 =
6932	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode), DestReg: ResultReg)
6933	.addReg(RegNo: NewVR, flags: getKillRegState(B: true))
6934	.addReg(RegNo: RegC, flags: getKillRegState(B: RegCIsKill));
6935
6936	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
6937	InsInstrs.push_back(Elt: MIB1);
6938	InsInstrs.push_back(Elt: MIB2);
6939	DelInstrs.push_back(Elt: AddMI);
6940	}
6941
6942	/// When getMachineCombinerPatterns() finds potential patterns,
6943	/// this function generates the instructions that could replace the
6944	/// original code sequence
6945	void AArch64InstrInfo::genAlternativeCodeSequence(
6946	MachineInstr &Root, unsigned Pattern,
6947	SmallVectorImpl<MachineInstr *> &InsInstrs,
6948	SmallVectorImpl<MachineInstr *> &DelInstrs,
6949	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
6950	MachineBasicBlock &MBB = *Root.getParent();
6951	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6952	MachineFunction &MF = *MBB.getParent();
6953	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
6954
6955	MachineInstr MUL = nullptr*;
6956	const TargetRegisterClass *RC;
6957	unsigned Opc;
6958	switch (Pattern) {
6959	default:
6960	// Reassociate instructions.
6961	TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
6962	DelInstrs, InstrIdxForVirtReg);
6963	return;
6964	case AArch64MachineCombinerPattern::SUBADD_OP1:
6965	// A - (B + C)
6966	// ==> (A - B) - C
6967	genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, IdxOpd1: `1`,
6968	InstrIdxForVirtReg);
6969	break;
6970	case AArch64MachineCombinerPattern::SUBADD_OP2:
6971	// A - (B + C)
6972	// ==> (A - C) - B
6973	genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, IdxOpd1: `2`,
6974	InstrIdxForVirtReg);
6975	break;
6976	case AArch64MachineCombinerPattern::MULADDW_OP1:
6977	case AArch64MachineCombinerPattern::MULADDX_OP1:
6978	// MUL I=A,B,0
6979	// ADD R,I,C
6980	// ==> MADD R,A,B,C
6981	// --- Create(MADD);
6982	if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) {
6983	Opc = AArch64::MADDWrrr;
6984	RC = &AArch64::GPR32RegClass;
6985	} else {
6986	Opc = AArch64::MADDXrrr;
6987	RC = &AArch64::GPR64RegClass;
6988	}
6989	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
6990	break;
6991	case AArch64MachineCombinerPattern::MULADDW_OP2:
6992	case AArch64MachineCombinerPattern::MULADDX_OP2:
6993	// MUL I=A,B,0
6994	// ADD R,C,I
6995	// ==> MADD R,A,B,C
6996	// --- Create(MADD);
6997	if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) {
6998	Opc = AArch64::MADDWrrr;
6999	RC = &AArch64::GPR32RegClass;
7000	} else {
7001	Opc = AArch64::MADDXrrr;
7002	RC = &AArch64::GPR64RegClass;
7003	}
7004	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7005	break;
7006	case AArch64MachineCombinerPattern::MULADDWI_OP1:
7007	case AArch64MachineCombinerPattern::MULADDXI_OP1: {
7008	// MUL I=A,B,0
7009	// ADD R,I,Imm
7010	// ==> MOV V, Imm
7011	// ==> MADD R,A,B,V
7012	// --- Create(MADD);
7013	const TargetRegisterClass *OrrRC;
7014	unsigned BitSize, OrrOpc, ZeroReg;
7015	if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
7016	OrrOpc = AArch64::ORRWri;
7017	OrrRC = &AArch64::GPR32spRegClass;
7018	BitSize = `32`;
7019	ZeroReg = AArch64::WZR;
7020	Opc = AArch64::MADDWrrr;
7021	RC = &AArch64::GPR32RegClass;
7022	} else {
7023	OrrOpc = AArch64::ORRXri;
7024	OrrRC = &AArch64::GPR64spRegClass;
7025	BitSize = `64`;
7026	ZeroReg = AArch64::XZR;
7027	Opc = AArch64::MADDXrrr;
7028	RC = &AArch64::GPR64RegClass;
7029	}
7030	Register NewVR = MRI.createVirtualRegister(RegClass: OrrRC);
7031	uint64_t Imm = Root.getOperand(i: `2`).getImm();
7032
7033	if (Root.getOperand(i: `3`).isImm()) {
7034	unsigned Val = Root.getOperand(i: `3`).getImm();
7035	Imm = Imm << Val;
7036	}
7037	uint64_t UImm = SignExtend64(X: Imm, B: BitSize);
7038	// The immediate can be composed via a single instruction.
7039	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Insn;
7040	AArch64_IMM::expandMOVImm(Imm: UImm, BitSize, Insn);
7041	if (Insn.size() != `1`)
7042	return;
7043	auto MovI = Insn.begin();
7044	MachineInstrBuilder MIB1;
7045	// MOV is an alias for one of three instructions: movz, movn, and orr.
7046	if (MovI->Opcode == OrrOpc)
7047	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: OrrOpc), DestReg: NewVR)
7048	.addReg(RegNo: ZeroReg)
7049	.addImm(Val: MovI->Op2);
7050	else {
7051	if (BitSize == `32`)
7052	assert((MovI->Opcode == AArch64::MOVNWi \|\|
7053	MovI->Opcode == AArch64::MOVZWi) &&
7054	"Expected opcode");
7055	else
7056	assert((MovI->Opcode == AArch64::MOVNXi \|\|
7057	MovI->Opcode == AArch64::MOVZXi) &&
7058	"Expected opcode");
7059	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MovI->Opcode), DestReg: NewVR)
7060	.addImm(Val: MovI->Op1)
7061	.addImm(Val: MovI->Op2);
7062	}
7063	InsInstrs.push_back(Elt: MIB1);
7064	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7065	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, VR: NewVR, RC);
7066	break;
7067	}
7068	case AArch64MachineCombinerPattern::MULSUBW_OP1:
7069	case AArch64MachineCombinerPattern::MULSUBX_OP1: {
7070	// MUL I=A,B,0
7071	// SUB R,I, C
7072	// ==> SUB V, 0, C
7073	// ==> MADD R,A,B,V // = -C + AB*
7074	// --- Create(MADD);
7075	const TargetRegisterClass *SubRC;
7076	unsigned SubOpc, ZeroReg;
7077	if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) {
7078	SubOpc = AArch64::SUBWrr;
7079	SubRC = &AArch64::GPR32spRegClass;
7080	ZeroReg = AArch64::WZR;
7081	Opc = AArch64::MADDWrrr;
7082	RC = &AArch64::GPR32RegClass;
7083	} else {
7084	SubOpc = AArch64::SUBXrr;
7085	SubRC = &AArch64::GPR64spRegClass;
7086	ZeroReg = AArch64::XZR;
7087	Opc = AArch64::MADDXrrr;
7088	RC = &AArch64::GPR64RegClass;
7089	}
7090	Register NewVR = MRI.createVirtualRegister(RegClass: SubRC);
7091	// SUB NewVR, 0, C
7092	MachineInstrBuilder MIB1 =
7093	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: SubOpc), DestReg: NewVR)
7094	.addReg(RegNo: ZeroReg)
7095	.add(MO: Root.getOperand(i: `2`));
7096	InsInstrs.push_back(Elt: MIB1);
7097	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7098	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, VR: NewVR, RC);
7099	break;
7100	}
7101	case AArch64MachineCombinerPattern::MULSUBW_OP2:
7102	case AArch64MachineCombinerPattern::MULSUBX_OP2:
7103	// MUL I=A,B,0
7104	// SUB R,C,I
7105	// ==> MSUB R,A,B,C (computes C - AB)*
7106	// --- Create(MSUB);
7107	if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) {
7108	Opc = AArch64::MSUBWrrr;
7109	RC = &AArch64::GPR32RegClass;
7110	} else {
7111	Opc = AArch64::MSUBXrrr;
7112	RC = &AArch64::GPR64RegClass;
7113	}
7114	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7115	break;
7116	case AArch64MachineCombinerPattern::MULSUBWI_OP1:
7117	case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
7118	// MUL I=A,B,0
7119	// SUB R,I, Imm
7120	// ==> MOV V, -Imm
7121	// ==> MADD R,A,B,V // = -Imm + AB*
7122	// --- Create(MADD);
7123	const TargetRegisterClass *OrrRC;
7124	unsigned BitSize, OrrOpc, ZeroReg;
7125	if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
7126	OrrOpc = AArch64::ORRWri;
7127	OrrRC = &AArch64::GPR32spRegClass;
7128	BitSize = `32`;
7129	ZeroReg = AArch64::WZR;
7130	Opc = AArch64::MADDWrrr;
7131	RC = &AArch64::GPR32RegClass;
7132	} else {
7133	OrrOpc = AArch64::ORRXri;
7134	OrrRC = &AArch64::GPR64spRegClass;
7135	BitSize = `64`;
7136	ZeroReg = AArch64::XZR;
7137	Opc = AArch64::MADDXrrr;
7138	RC = &AArch64::GPR64RegClass;
7139	}
7140	Register NewVR = MRI.createVirtualRegister(RegClass: OrrRC);
7141	uint64_t Imm = Root.getOperand(i: `2`).getImm();
7142	if (Root.getOperand(i: `3`).isImm()) {
7143	unsigned Val = Root.getOperand(i: `3`).getImm();
7144	Imm = Imm << Val;
7145	}
7146	uint64_t UImm = SignExtend64(X: -Imm, B: BitSize);
7147	// The immediate can be composed via a single instruction.
7148	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Insn;
7149	AArch64_IMM::expandMOVImm(Imm: UImm, BitSize, Insn);
7150	if (Insn.size() != `1`)
7151	return;
7152	auto MovI = Insn.begin();
7153	MachineInstrBuilder MIB1;
7154	// MOV is an alias for one of three instructions: movz, movn, and orr.
7155	if (MovI->Opcode == OrrOpc)
7156	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: OrrOpc), DestReg: NewVR)
7157	.addReg(RegNo: ZeroReg)
7158	.addImm(Val: MovI->Op2);
7159	else {
7160	if (BitSize == `32`)
7161	assert((MovI->Opcode == AArch64::MOVNWi \|\|
7162	MovI->Opcode == AArch64::MOVZWi) &&
7163	"Expected opcode");
7164	else
7165	assert((MovI->Opcode == AArch64::MOVNXi \|\|
7166	MovI->Opcode == AArch64::MOVZXi) &&
7167	"Expected opcode");
7168	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MovI->Opcode), DestReg: NewVR)
7169	.addImm(Val: MovI->Op1)
7170	.addImm(Val: MovI->Op2);
7171	}
7172	InsInstrs.push_back(Elt: MIB1);
7173	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7174	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, VR: NewVR, RC);
7175	break;
7176	}
7177
7178	case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
7179	Opc = AArch64::MLAv8i8;
7180	RC = &AArch64::FPR64RegClass;
7181	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7182	break;
7183	case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
7184	Opc = AArch64::MLAv8i8;
7185	RC = &AArch64::FPR64RegClass;
7186	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7187	break;
7188	case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
7189	Opc = AArch64::MLAv16i8;
7190	RC = &AArch64::FPR128RegClass;
7191	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7192	break;
7193	case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
7194	Opc = AArch64::MLAv16i8;
7195	RC = &AArch64::FPR128RegClass;
7196	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7197	break;
7198	case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
7199	Opc = AArch64::MLAv4i16;
7200	RC = &AArch64::FPR64RegClass;
7201	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7202	break;
7203	case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
7204	Opc = AArch64::MLAv4i16;
7205	RC = &AArch64::FPR64RegClass;
7206	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7207	break;
7208	case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
7209	Opc = AArch64::MLAv8i16;
7210	RC = &AArch64::FPR128RegClass;
7211	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7212	break;
7213	case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
7214	Opc = AArch64::MLAv8i16;
7215	RC = &AArch64::FPR128RegClass;
7216	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7217	break;
7218	case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
7219	Opc = AArch64::MLAv2i32;
7220	RC = &AArch64::FPR64RegClass;
7221	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7222	break;
7223	case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
7224	Opc = AArch64::MLAv2i32;
7225	RC = &AArch64::FPR64RegClass;
7226	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7227	break;
7228	case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
7229	Opc = AArch64::MLAv4i32;
7230	RC = &AArch64::FPR128RegClass;
7231	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7232	break;
7233	case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
7234	Opc = AArch64::MLAv4i32;
7235	RC = &AArch64::FPR128RegClass;
7236	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7237	break;
7238
7239	case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
7240	Opc = AArch64::MLAv8i8;
7241	RC = &AArch64::FPR64RegClass;
7242	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7243	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv8i8,
7244	RC);
7245	break;
7246	case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
7247	Opc = AArch64::MLSv8i8;
7248	RC = &AArch64::FPR64RegClass;
7249	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7250	break;
7251	case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
7252	Opc = AArch64::MLAv16i8;
7253	RC = &AArch64::FPR128RegClass;
7254	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7255	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv16i8,
7256	RC);
7257	break;
7258	case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
7259	Opc = AArch64::MLSv16i8;
7260	RC = &AArch64::FPR128RegClass;
7261	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7262	break;
7263	case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
7264	Opc = AArch64::MLAv4i16;
7265	RC = &AArch64::FPR64RegClass;
7266	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7267	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv4i16,
7268	RC);
7269	break;
7270	case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
7271	Opc = AArch64::MLSv4i16;
7272	RC = &AArch64::FPR64RegClass;
7273	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7274	break;
7275	case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
7276	Opc = AArch64::MLAv8i16;
7277	RC = &AArch64::FPR128RegClass;
7278	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7279	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv8i16,
7280	RC);
7281	break;
7282	case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
7283	Opc = AArch64::MLSv8i16;
7284	RC = &AArch64::FPR128RegClass;
7285	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7286	break;
7287	case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
7288	Opc = AArch64::MLAv2i32;
7289	RC = &AArch64::FPR64RegClass;
7290	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7291	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv2i32,
7292	RC);
7293	break;
7294	case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
7295	Opc = AArch64::MLSv2i32;
7296	RC = &AArch64::FPR64RegClass;
7297	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7298	break;
7299	case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
7300	Opc = AArch64::MLAv4i32;
7301	RC = &AArch64::FPR128RegClass;
7302	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7303	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv4i32,
7304	RC);
7305	break;
7306	case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
7307	Opc = AArch64::MLSv4i32;
7308	RC = &AArch64::FPR128RegClass;
7309	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7310	break;
7311
7312	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
7313	Opc = AArch64::MLAv4i16_indexed;
7314	RC = &AArch64::FPR64RegClass;
7315	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7316	break;
7317	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
7318	Opc = AArch64::MLAv4i16_indexed;
7319	RC = &AArch64::FPR64RegClass;
7320	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7321	break;
7322	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
7323	Opc = AArch64::MLAv8i16_indexed;
7324	RC = &AArch64::FPR128RegClass;
7325	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7326	break;
7327	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
7328	Opc = AArch64::MLAv8i16_indexed;
7329	RC = &AArch64::FPR128RegClass;
7330	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7331	break;
7332	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
7333	Opc = AArch64::MLAv2i32_indexed;
7334	RC = &AArch64::FPR64RegClass;
7335	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7336	break;
7337	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
7338	Opc = AArch64::MLAv2i32_indexed;
7339	RC = &AArch64::FPR64RegClass;
7340	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7341	break;
7342	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
7343	Opc = AArch64::MLAv4i32_indexed;
7344	RC = &AArch64::FPR128RegClass;
7345	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7346	break;
7347	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
7348	Opc = AArch64::MLAv4i32_indexed;
7349	RC = &AArch64::FPR128RegClass;
7350	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7351	break;
7352
7353	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
7354	Opc = AArch64::MLAv4i16_indexed;
7355	RC = &AArch64::FPR64RegClass;
7356	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7357	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv4i16,
7358	RC);
7359	break;
7360	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
7361	Opc = AArch64::MLSv4i16_indexed;
7362	RC = &AArch64::FPR64RegClass;
7363	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7364	break;
7365	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
7366	Opc = AArch64::MLAv8i16_indexed;
7367	RC = &AArch64::FPR128RegClass;
7368	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7369	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv8i16,
7370	RC);
7371	break;
7372	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
7373	Opc = AArch64::MLSv8i16_indexed;
7374	RC = &AArch64::FPR128RegClass;
7375	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7376	break;
7377	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
7378	Opc = AArch64::MLAv2i32_indexed;
7379	RC = &AArch64::FPR64RegClass;
7380	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7381	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv2i32,
7382	RC);
7383	break;
7384	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
7385	Opc = AArch64::MLSv2i32_indexed;
7386	RC = &AArch64::FPR64RegClass;
7387	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7388	break;
7389	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
7390	Opc = AArch64::MLAv4i32_indexed;
7391	RC = &AArch64::FPR128RegClass;
7392	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7393	InstrIdxForVirtReg, `1`, Opc, AArch64::NEGv4i32,
7394	RC);
7395	break;
7396	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
7397	Opc = AArch64::MLSv4i32_indexed;
7398	RC = &AArch64::FPR128RegClass;
7399	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7400	break;
7401
7402	// Floating Point Support
7403	case AArch64MachineCombinerPattern::FMULADDH_OP1:
7404	Opc = AArch64::FMADDHrrr;
7405	RC = &AArch64::FPR16RegClass;
7406	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7407	break;
7408	case AArch64MachineCombinerPattern::FMULADDS_OP1:
7409	Opc = AArch64::FMADDSrrr;
7410	RC = &AArch64::FPR32RegClass;
7411	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7412	break;
7413	case AArch64MachineCombinerPattern::FMULADDD_OP1:
7414	Opc = AArch64::FMADDDrrr;
7415	RC = &AArch64::FPR64RegClass;
7416	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7417	break;
7418
7419	case AArch64MachineCombinerPattern::FMULADDH_OP2:
7420	Opc = AArch64::FMADDHrrr;
7421	RC = &AArch64::FPR16RegClass;
7422	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7423	break;
7424	case AArch64MachineCombinerPattern::FMULADDS_OP2:
7425	Opc = AArch64::FMADDSrrr;
7426	RC = &AArch64::FPR32RegClass;
7427	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7428	break;
7429	case AArch64MachineCombinerPattern::FMULADDD_OP2:
7430	Opc = AArch64::FMADDDrrr;
7431	RC = &AArch64::FPR64RegClass;
7432	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7433	break;
7434
7435	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
7436	Opc = AArch64::FMLAv1i32_indexed;
7437	RC = &AArch64::FPR32RegClass;
7438	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7439	kind: FMAInstKind::Indexed);
7440	break;
7441	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
7442	Opc = AArch64::FMLAv1i32_indexed;
7443	RC = &AArch64::FPR32RegClass;
7444	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7445	kind: FMAInstKind::Indexed);
7446	break;
7447
7448	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
7449	Opc = AArch64::FMLAv1i64_indexed;
7450	RC = &AArch64::FPR64RegClass;
7451	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7452	kind: FMAInstKind::Indexed);
7453	break;
7454	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
7455	Opc = AArch64::FMLAv1i64_indexed;
7456	RC = &AArch64::FPR64RegClass;
7457	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7458	kind: FMAInstKind::Indexed);
7459	break;
7460
7461	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
7462	RC = &AArch64::FPR64RegClass;
7463	Opc = AArch64::FMLAv4i16_indexed;
7464	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7465	kind: FMAInstKind::Indexed);
7466	break;
7467	case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
7468	RC = &AArch64::FPR64RegClass;
7469	Opc = AArch64::FMLAv4f16;
7470	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7471	kind: FMAInstKind::Accumulator);
7472	break;
7473	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
7474	RC = &AArch64::FPR64RegClass;
7475	Opc = AArch64::FMLAv4i16_indexed;
7476	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7477	kind: FMAInstKind::Indexed);
7478	break;
7479	case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
7480	RC = &AArch64::FPR64RegClass;
7481	Opc = AArch64::FMLAv4f16;
7482	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7483	kind: FMAInstKind::Accumulator);
7484	break;
7485
7486	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
7487	case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
7488	RC = &AArch64::FPR64RegClass;
7489	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
7490	Opc = AArch64::FMLAv2i32_indexed;
7491	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7492	kind: FMAInstKind::Indexed);
7493	} else {
7494	Opc = AArch64::FMLAv2f32;
7495	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7496	kind: FMAInstKind::Accumulator);
7497	}
7498	break;
7499	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
7500	case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
7501	RC = &AArch64::FPR64RegClass;
7502	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
7503	Opc = AArch64::FMLAv2i32_indexed;
7504	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7505	kind: FMAInstKind::Indexed);
7506	} else {
7507	Opc = AArch64::FMLAv2f32;
7508	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7509	kind: FMAInstKind::Accumulator);
7510	}
7511	break;
7512
7513	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
7514	RC = &AArch64::FPR128RegClass;
7515	Opc = AArch64::FMLAv8i16_indexed;
7516	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7517	kind: FMAInstKind::Indexed);
7518	break;
7519	case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
7520	RC = &AArch64::FPR128RegClass;
7521	Opc = AArch64::FMLAv8f16;
7522	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7523	kind: FMAInstKind::Accumulator);
7524	break;
7525	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
7526	RC = &AArch64::FPR128RegClass;
7527	Opc = AArch64::FMLAv8i16_indexed;
7528	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7529	kind: FMAInstKind::Indexed);
7530	break;
7531	case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
7532	RC = &AArch64::FPR128RegClass;
7533	Opc = AArch64::FMLAv8f16;
7534	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7535	kind: FMAInstKind::Accumulator);
7536	break;
7537
7538	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
7539	case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
7540	RC = &AArch64::FPR128RegClass;
7541	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
7542	Opc = AArch64::FMLAv2i64_indexed;
7543	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7544	kind: FMAInstKind::Indexed);
7545	} else {
7546	Opc = AArch64::FMLAv2f64;
7547	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7548	kind: FMAInstKind::Accumulator);
7549	}
7550	break;
7551	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
7552	case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
7553	RC = &AArch64::FPR128RegClass;
7554	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
7555	Opc = AArch64::FMLAv2i64_indexed;
7556	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7557	kind: FMAInstKind::Indexed);
7558	} else {
7559	Opc = AArch64::FMLAv2f64;
7560	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7561	kind: FMAInstKind::Accumulator);
7562	}
7563	break;
7564
7565	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
7566	case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
7567	RC = &AArch64::FPR128RegClass;
7568	if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
7569	Opc = AArch64::FMLAv4i32_indexed;
7570	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7571	kind: FMAInstKind::Indexed);
7572	} else {
7573	Opc = AArch64::FMLAv4f32;
7574	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7575	kind: FMAInstKind::Accumulator);
7576	}
7577	break;
7578
7579	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
7580	case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
7581	RC = &AArch64::FPR128RegClass;
7582	if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
7583	Opc = AArch64::FMLAv4i32_indexed;
7584	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7585	kind: FMAInstKind::Indexed);
7586	} else {
7587	Opc = AArch64::FMLAv4f32;
7588	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7589	kind: FMAInstKind::Accumulator);
7590	}
7591	break;
7592
7593	case AArch64MachineCombinerPattern::FMULSUBH_OP1:
7594	Opc = AArch64::FNMSUBHrrr;
7595	RC = &AArch64::FPR16RegClass;
7596	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7597	break;
7598	case AArch64MachineCombinerPattern::FMULSUBS_OP1:
7599	Opc = AArch64::FNMSUBSrrr;
7600	RC = &AArch64::FPR32RegClass;
7601	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7602	break;
7603	case AArch64MachineCombinerPattern::FMULSUBD_OP1:
7604	Opc = AArch64::FNMSUBDrrr;
7605	RC = &AArch64::FPR64RegClass;
7606	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7607	break;
7608
7609	case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
7610	Opc = AArch64::FNMADDHrrr;
7611	RC = &AArch64::FPR16RegClass;
7612	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7613	break;
7614	case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
7615	Opc = AArch64::FNMADDSrrr;
7616	RC = &AArch64::FPR32RegClass;
7617	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7618	break;
7619	case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
7620	Opc = AArch64::FNMADDDrrr;
7621	RC = &AArch64::FPR64RegClass;
7622	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7623	break;
7624
7625	case AArch64MachineCombinerPattern::FMULSUBH_OP2:
7626	Opc = AArch64::FMSUBHrrr;
7627	RC = &AArch64::FPR16RegClass;
7628	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7629	break;
7630	case AArch64MachineCombinerPattern::FMULSUBS_OP2:
7631	Opc = AArch64::FMSUBSrrr;
7632	RC = &AArch64::FPR32RegClass;
7633	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7634	break;
7635	case AArch64MachineCombinerPattern::FMULSUBD_OP2:
7636	Opc = AArch64::FMSUBDrrr;
7637	RC = &AArch64::FPR64RegClass;
7638	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7639	break;
7640
7641	case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
7642	Opc = AArch64::FMLSv1i32_indexed;
7643	RC = &AArch64::FPR32RegClass;
7644	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7645	kind: FMAInstKind::Indexed);
7646	break;
7647
7648	case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
7649	Opc = AArch64::FMLSv1i64_indexed;
7650	RC = &AArch64::FPR64RegClass;
7651	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7652	kind: FMAInstKind::Indexed);
7653	break;
7654
7655	case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
7656	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
7657	RC = &AArch64::FPR64RegClass;
7658	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
7659	MachineInstrBuilder MIB1 =
7660	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
7661	.add(Root.getOperand(`2`));
7662	InsInstrs.push_back(Elt: MIB1);
7663	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7664	if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) {
7665	Opc = AArch64::FMLAv4f16;
7666	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7667	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
7668	} else {
7669	Opc = AArch64::FMLAv4i16_indexed;
7670	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7671	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
7672	}
7673	break;
7674	}
7675	case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
7676	RC = &AArch64::FPR64RegClass;
7677	Opc = AArch64::FMLSv4f16;
7678	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7679	kind: FMAInstKind::Accumulator);
7680	break;
7681	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
7682	RC = &AArch64::FPR64RegClass;
7683	Opc = AArch64::FMLSv4i16_indexed;
7684	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7685	kind: FMAInstKind::Indexed);
7686	break;
7687
7688	case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
7689	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
7690	RC = &AArch64::FPR64RegClass;
7691	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
7692	Opc = AArch64::FMLSv2i32_indexed;
7693	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7694	kind: FMAInstKind::Indexed);
7695	} else {
7696	Opc = AArch64::FMLSv2f32;
7697	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7698	kind: FMAInstKind::Accumulator);
7699	}
7700	break;
7701
7702	case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
7703	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
7704	RC = &AArch64::FPR128RegClass;
7705	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
7706	MachineInstrBuilder MIB1 =
7707	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
7708	.add(Root.getOperand(`2`));
7709	InsInstrs.push_back(Elt: MIB1);
7710	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7711	if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) {
7712	Opc = AArch64::FMLAv8f16;
7713	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7714	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
7715	} else {
7716	Opc = AArch64::FMLAv8i16_indexed;
7717	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7718	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
7719	}
7720	break;
7721	}
7722	case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
7723	RC = &AArch64::FPR128RegClass;
7724	Opc = AArch64::FMLSv8f16;
7725	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7726	kind: FMAInstKind::Accumulator);
7727	break;
7728	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
7729	RC = &AArch64::FPR128RegClass;
7730	Opc = AArch64::FMLSv8i16_indexed;
7731	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7732	kind: FMAInstKind::Indexed);
7733	break;
7734
7735	case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
7736	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
7737	RC = &AArch64::FPR128RegClass;
7738	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
7739	Opc = AArch64::FMLSv2i64_indexed;
7740	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7741	kind: FMAInstKind::Indexed);
7742	} else {
7743	Opc = AArch64::FMLSv2f64;
7744	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7745	kind: FMAInstKind::Accumulator);
7746	}
7747	break;
7748
7749	case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
7750	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
7751	RC = &AArch64::FPR128RegClass;
7752	if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
7753	Opc = AArch64::FMLSv4i32_indexed;
7754	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7755	kind: FMAInstKind::Indexed);
7756	} else {
7757	Opc = AArch64::FMLSv4f32;
7758	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
7759	kind: FMAInstKind::Accumulator);
7760	}
7761	break;
7762	case AArch64MachineCombinerPattern::FMLSv2f32_OP1:
7763	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
7764	RC = &AArch64::FPR64RegClass;
7765	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
7766	MachineInstrBuilder MIB1 =
7767	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
7768	.add(Root.getOperand(`2`));
7769	InsInstrs.push_back(Elt: MIB1);
7770	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7771	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
7772	Opc = AArch64::FMLAv2i32_indexed;
7773	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7774	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
7775	} else {
7776	Opc = AArch64::FMLAv2f32;
7777	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7778	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
7779	}
7780	break;
7781	}
7782	case AArch64MachineCombinerPattern::FMLSv4f32_OP1:
7783	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
7784	RC = &AArch64::FPR128RegClass;
7785	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
7786	MachineInstrBuilder MIB1 =
7787	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
7788	.add(Root.getOperand(`2`));
7789	InsInstrs.push_back(Elt: MIB1);
7790	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7791	if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
7792	Opc = AArch64::FMLAv4i32_indexed;
7793	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7794	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
7795	} else {
7796	Opc = AArch64::FMLAv4f32;
7797	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7798	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
7799	}
7800	break;
7801	}
7802	case AArch64MachineCombinerPattern::FMLSv2f64_OP1:
7803	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
7804	RC = &AArch64::FPR128RegClass;
7805	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
7806	MachineInstrBuilder MIB1 =
7807	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
7808	.add(Root.getOperand(`2`));
7809	InsInstrs.push_back(Elt: MIB1);
7810	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7811	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
7812	Opc = AArch64::FMLAv2i64_indexed;
7813	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7814	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
7815	} else {
7816	Opc = AArch64::FMLAv2f64;
7817	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
7818	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
7819	}
7820	break;
7821	}
7822	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
7823	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: {
7824	unsigned IdxDupOp =
7825	(Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? `1`
7826	: `2`;
7827	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
7828	&AArch64::FPR128RegClass, MRI);
7829	break;
7830	}
7831	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
7832	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: {
7833	unsigned IdxDupOp =
7834	(Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? `1`
7835	: `2`;
7836	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
7837	&AArch64::FPR128RegClass, MRI);
7838	break;
7839	}
7840	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
7841	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: {
7842	unsigned IdxDupOp =
7843	(Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? `1`
7844	: `2`;
7845	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
7846	&AArch64::FPR128_loRegClass, MRI);
7847	break;
7848	}
7849	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
7850	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: {
7851	unsigned IdxDupOp =
7852	(Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? `1`
7853	: `2`;
7854	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
7855	&AArch64::FPR128RegClass, MRI);
7856	break;
7857	}
7858	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
7859	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: {
7860	unsigned IdxDupOp =
7861	(Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? `1`
7862	: `2`;
7863	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
7864	&AArch64::FPR128_loRegClass, MRI);
7865	break;
7866	}
7867	case AArch64MachineCombinerPattern::FNMADD: {
7868	MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
7869	break;
7870	}
7871
7872	} // end switch (Pattern)
7873	// Record MUL and ADD/SUB for deletion
7874	if (MUL)
7875	DelInstrs.push_back(Elt: MUL);
7876	DelInstrs.push_back(Elt: &Root);
7877
7878	// Set the flags on the inserted instructions to be the merged flags of the
7879	// instructions that we have combined.
7880	uint32_t Flags = Root.getFlags();
7881	if (MUL)
7882	Flags = Root.mergeFlagsWith(Other: *MUL);
7883	for (auto *MI : InsInstrs)
7884	MI->setFlags(Flags);
7885	}
7886
7887	/// Replace csincr-branch sequence by simple conditional branch
7888	///
7889	/// Examples:
7890	/// 1. \code
7891	/// csinc w9, wzr, wzr, <condition code>
7892	/// tbnz w9, #0, 0x44
7893	/// \endcode
7894	/// to
7895	/// \code
7896	/// b.<inverted condition code>
7897	/// \endcode
7898	///
7899	/// 2. \code
7900	/// csinc w9, wzr, wzr, <condition code>
7901	/// tbz w9, #0, 0x44
7902	/// \endcode
7903	/// to
7904	/// \code
7905	/// b.<condition code>
7906	/// \endcode
7907	///
7908	/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
7909	/// compare's constant operand is power of 2.
7910	///
7911	/// Examples:
7912	/// \code
7913	/// and w8, w8, #0x400
7914	/// cbnz w8, L1
7915	/// \endcode
7916	/// to
7917	/// \code
7918	/// tbnz w8, #10, L1
7919	/// \endcode
7920	///
7921	/// \param MI Conditional Branch
7922	/// \return True when the simple conditional branch is generated
7923	///
7924	bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
7925	bool IsNegativeBranch = false;
7926	bool IsTestAndBranch = false;
7927	unsigned TargetBBInMI = `0`;
7928	switch (MI.getOpcode()) {
7929	default:
7930	llvm_unreachable("Unknown branch instruction?");
7931	case AArch64::Bcc:
7932	return false;
7933	case AArch64::CBZW:
7934	case AArch64::CBZX:
7935	TargetBBInMI = `1`;
7936	break;
7937	case AArch64::CBNZW:
7938	case AArch64::CBNZX:
7939	TargetBBInMI = `1`;
7940	IsNegativeBranch = true;
7941	break;
7942	case AArch64::TBZW:
7943	case AArch64::TBZX:
7944	TargetBBInMI = `2`;
7945	IsTestAndBranch = true;
7946	break;
7947	case AArch64::TBNZW:
7948	case AArch64::TBNZX:
7949	TargetBBInMI = `2`;
7950	IsNegativeBranch = true;
7951	IsTestAndBranch = true;
7952	break;
7953	}
7954	// So we increment a zero register and test for bits other
7955	// than bit 0? Conservatively bail out in case the verifier
7956	// missed this case.
7957	if (IsTestAndBranch && MI.getOperand(i: `1`).getImm())
7958	return false;
7959
7960	// Find Definition.
7961	assert(MI.getParent() && "Incomplete machine instruciton\n");
7962	MachineBasicBlock *MBB = MI.getParent();
7963	MachineFunction *MF = MBB->getParent();
7964	MachineRegisterInfo *MRI = &MF->getRegInfo();
7965	Register VReg = MI.getOperand(i: `0`).getReg();
7966	if (!VReg.isVirtual())
7967	return false;
7968
7969	MachineInstr *DefMI = MRI->getVRegDef(Reg: VReg);
7970
7971	// Look through COPY instructions to find definition.
7972	while (DefMI->isCopy()) {
7973	Register CopyVReg = DefMI->getOperand(i: `1`).getReg();
7974	if (!MRI->hasOneNonDBGUse(RegNo: CopyVReg))
7975	return false;
7976	if (!MRI->hasOneDef(RegNo: CopyVReg))
7977	return false;
7978	DefMI = MRI->getVRegDef(Reg: CopyVReg);
7979	}
7980
7981	switch (DefMI->getOpcode()) {
7982	default:
7983	return false;
7984	// Fold AND into a TBZ/TBNZ if constant operand is power of 2.
7985	case AArch64::ANDWri:
7986	case AArch64::ANDXri: {
7987	if (IsTestAndBranch)
7988	return false;
7989	if (DefMI->getParent() != MBB)
7990	return false;
7991	if (!MRI->hasOneNonDBGUse(RegNo: VReg))
7992	return false;
7993
7994	bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
7995	uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
7996	val: DefMI->getOperand(i: `2`).getImm(), regSize: Is32Bit ? `32` : `64`);
7997	if (!isPowerOf2_64(Value: Mask))
7998	return false;
7999
8000	MachineOperand &MO = DefMI->getOperand(i: `1`);
8001	Register NewReg = MO.getReg();
8002	if (!NewReg.isVirtual())
8003	return false;
8004
8005	assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8006
8007	MachineBasicBlock &RefToMBB = *MBB;
8008	MachineBasicBlock *TBB = MI.getOperand(i: `1`).getMBB();
8009	DebugLoc DL = MI.getDebugLoc();
8010	unsigned Imm = Log2_64(Value: Mask);
8011	unsigned Opc = (Imm < `32`)
8012	? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8013	: (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8014	MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8015	.addReg(NewReg)
8016	.addImm(Imm)
8017	.addMBB(TBB);
8018	// Register lives on to the CBZ now.
8019	MO.setIsKill(false);
8020
8021	// For immediate smaller than 32, we need to use the 32-bit
8022	// variant (W) in all cases. Indeed the 64-bit variant does not
8023	// allow to encode them.
8024	// Therefore, if the input register is 64-bit, we need to take the
8025	// 32-bit sub-part.
8026	if (!Is32Bit && Imm < `32`)
8027	NewMI->getOperand(`0`).setSubReg(AArch64::sub_32);
8028	MI.eraseFromParent();
8029	return true;
8030	}
8031	// Look for CSINC
8032	case AArch64::CSINCWr:
8033	case AArch64::CSINCXr: {
8034	if (!(DefMI->getOperand(`1`).getReg() == AArch64::WZR &&
8035	DefMI->getOperand(`2`).getReg() == AArch64::WZR) &&
8036	!(DefMI->getOperand(`1`).getReg() == AArch64::XZR &&
8037	DefMI->getOperand(`2`).getReg() == AArch64::XZR))
8038	return false;
8039
8040	if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr,
8041	true) != -`1`)
8042	return false;
8043
8044	AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(i: `3`).getImm();
8045	// Convert only when the condition code is not modified between
8046	// the CSINC and the branch. The CC may be used by other
8047	// instructions in between.
8048	if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
8049	return false;
8050	MachineBasicBlock &RefToMBB = *MBB;
8051	MachineBasicBlock *TBB = MI.getOperand(i: TargetBBInMI).getMBB();
8052	DebugLoc DL = MI.getDebugLoc();
8053	if (IsNegativeBranch)
8054	CC = AArch64CC::getInvertedCondCode(Code: CC);
8055	BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8056	MI.eraseFromParent();
8057	return true;
8058	}
8059	}
8060	}
8061
8062	std::pair<unsigned, unsigned>
8063	AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
8064	const unsigned Mask = AArch64II::MO_FRAGMENT;
8065	return std::make_pair(x: TF & Mask, y: TF & ~Mask);
8066	}
8067
8068	ArrayRef<std::pair<unsigned, const char *>>
8069	AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
8070	using namespace AArch64II;
8071
8072	static const std::pair<unsigned, const char *> TargetFlags[] = {
8073	{MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8074	{MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8075	{MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8076	{MO_HI12, "aarch64-hi12"}};
8077	return ArrayRef(TargetFlags);
8078	}
8079
8080	ArrayRef<std::pair<unsigned, const char *>>
8081	AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
8082	using namespace AArch64II;
8083
8084	static const std::pair<unsigned, const char *> TargetFlags[] = {
8085	{MO_COFFSTUB, "aarch64-coffstub"},
8086	{MO_GOT, "aarch64-got"},
8087	{MO_NC, "aarch64-nc"},
8088	{MO_S, "aarch64-s"},
8089	{MO_TLS, "aarch64-tls"},
8090	{MO_DLLIMPORT, "aarch64-dllimport"},
8091	{MO_PREL, "aarch64-prel"},
8092	{MO_TAGGED, "aarch64-tagged"},
8093	{MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8094	};
8095	return ArrayRef(TargetFlags);
8096	}
8097
8098	ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
8099	AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
8100	static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8101	{{MOSuppressPair, "aarch64-suppress-pair"},
8102	{MOStridedAccess, "aarch64-strided-access"}};
8103	return ArrayRef(TargetFlags);
8104	}
8105
8106	/// Constants defining how certain sequences should be outlined.
8107	/// This encompasses how an outlined function should be called, and what kind of
8108	/// frame should be emitted for that outlined function.
8109	///
8110	/// \p MachineOutlinerDefault implies that the function should be called with
8111	/// a save and restore of LR to the stack.
8112	///
8113	/// That is,
8114	///
8115	/// I1 Save LR OUTLINED_FUNCTION:
8116	/// I2 --> BL OUTLINED_FUNCTION I1
8117	/// I3 Restore LR I2
8118	/// I3
8119	/// RET
8120	///
8121	/// Call construction overhead: 3 (save + BL + restore)*
8122	/// Frame construction overhead: 1 (ret)*
8123	/// Requires stack fixups? Yes*
8124	///
8125	/// \p MachineOutlinerTailCall implies that the function is being created from
8126	/// a sequence of instructions ending in a return.
8127	///
8128	/// That is,
8129	///
8130	/// I1 OUTLINED_FUNCTION:
8131	/// I2 --> B OUTLINED_FUNCTION I1
8132	/// RET I2
8133	/// RET
8134	///
8135	/// Call construction overhead: 1 (B)*
8136	/// Frame construction overhead: 0 (Return included in sequence)*
8137	/// Requires stack fixups? No*
8138	///
8139	/// \p MachineOutlinerNoLRSave implies that the function should be called using
8140	/// a BL instruction, but doesn't require LR to be saved and restored. This
8141	/// happens when LR is known to be dead.
8142	///
8143	/// That is,
8144	///
8145	/// I1 OUTLINED_FUNCTION:
8146	/// I2 --> BL OUTLINED_FUNCTION I1
8147	/// I3 I2
8148	/// I3
8149	/// RET
8150	///
8151	/// Call construction overhead: 1 (BL)*
8152	/// Frame construction overhead: 1 (RET)*
8153	/// Requires stack fixups? No*
8154	///
8155	/// \p MachineOutlinerThunk implies that the function is being created from
8156	/// a sequence of instructions ending in a call. The outlined function is
8157	/// called with a BL instruction, and the outlined function tail-calls the
8158	/// original call destination.
8159	///
8160	/// That is,
8161	///
8162	/// I1 OUTLINED_FUNCTION:
8163	/// I2 --> BL OUTLINED_FUNCTION I1
8164	/// BL f I2
8165	/// B f
8166	/// Call construction overhead: 1 (BL)*
8167	/// Frame construction overhead: 0*
8168	/// Requires stack fixups? No*
8169	///
8170	/// \p MachineOutlinerRegSave implies that the function should be called with a
8171	/// save and restore of LR to an available register. This allows us to avoid
8172	/// stack fixups. Note that this outlining variant is compatible with the
8173	/// NoLRSave case.
8174	///
8175	/// That is,
8176	///
8177	/// I1 Save LR OUTLINED_FUNCTION:
8178	/// I2 --> BL OUTLINED_FUNCTION I1
8179	/// I3 Restore LR I2
8180	/// I3
8181	/// RET
8182	///
8183	/// Call construction overhead: 3 (save + BL + restore)*
8184	/// Frame construction overhead: 1 (ret)*
8185	/// Requires stack fixups? No*
8186	enum MachineOutlinerClass {
8187	MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8188	MachineOutlinerTailCall, /// Only emit a branch.
8189	MachineOutlinerNoLRSave, /// Emit a call and return.
8190	MachineOutlinerThunk, /// Emit a call and tail-call.
8191	MachineOutlinerRegSave /// Same as default, but save to a register.
8192	};
8193
8194	enum MachineOutlinerMBBFlags {
8195	LRUnavailableSomewhere = `0x2`,
8196	HasCalls = `0x4`,
8197	UnsafeRegsDead = `0x8`
8198	};
8199
8200	Register
8201	AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8202	MachineFunction *MF = C.getMF();
8203	const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
8204	const AArch64RegisterInfo *ARI =
8205	static_cast<const AArch64RegisterInfo *>(&TRI);
8206	// Check if there is an available register across the sequence that we can
8207	// use.
8208	for (unsigned Reg : AArch64::GPR64RegClass) {
8209	if (!ARI->isReservedReg(*MF, Reg) &&
8210	Reg != AArch64::LR && // LR is not reserved, but don't use it.
8211	Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8212	Reg != AArch64::X17 && // Ditto for X17.
8213	C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8214	C.isAvailableInsideSeq(Reg, TRI))
8215	return Reg;
8216	}
8217	return Register ();
8218	}
8219
8220	static bool
8221	outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
8222	const outliner::Candidate &b) {
8223	const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8224	const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8225
8226	return MFIa->shouldSignReturnAddress(SpillsLR: false) == MFIb->shouldSignReturnAddress(SpillsLR: false) &&
8227	MFIa->shouldSignReturnAddress(SpillsLR: true) == MFIb->shouldSignReturnAddress(SpillsLR: true);
8228	}
8229
8230	static bool
8231	outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
8232	const outliner::Candidate &b) {
8233	const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8234	const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8235
8236	return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8237	}
8238
8239	static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
8240	const outliner::Candidate &b) {
8241	const AArch64Subtarget &SubtargetA =
8242	a.getMF()->getSubtarget<AArch64Subtarget>();
8243	const AArch64Subtarget &SubtargetB =
8244	b.getMF()->getSubtarget<AArch64Subtarget>();
8245	return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8246	}
8247
8248	std::optional<outliner::OutlinedFunction>
8249	AArch64InstrInfo::getOutliningCandidateInfo(
8250	std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8251	outliner::Candidate &FirstCand = RepeatedSequenceLocs [`0`];
8252
8253	unsigned SequenceSize = `0`;
8254	for (auto &MI : FirstCand)
8255	SequenceSize += getInstSizeInBytes(MI);
8256
8257	unsigned NumBytesToCreateFrame = `0`;
8258
8259	// We only allow outlining for functions having exactly matching return
8260	// address signing attributes, i.e., all share the same value for the
8261	// attribute "sign-return-address" and all share the same type of key they
8262	// are signed with.
8263	// Additionally we require all functions to simultaniously either support
8264	// v8.3a features or not. Otherwise an outlined function could get signed
8265	// using dedicated v8.3 instructions and a call from a function that doesn't
8266	// support v8.3 instructions would therefore be invalid.
8267	if (std::adjacent_find(
8268	first: RepeatedSequenceLocs.begin(), last: RepeatedSequenceLocs.end(),
8269	binary_pred: [](const outliner::Candidate &a, const outliner::Candidate &b) {
8270	// Return true if a and b are non-equal w.r.t. return address
8271	// signing or support of v8.3a features
8272	if (outliningCandidatesSigningScopeConsensus(a, b) &&
8273	outliningCandidatesSigningKeyConsensus(a, b) &&
8274	outliningCandidatesV8_3OpsConsensus(a, b)) {
8275	return false;
8276	}
8277	return true;
8278	}) != RepeatedSequenceLocs.end()) {
8279	return std::nullopt;
8280	}
8281
8282	// Since at this point all candidates agree on their return address signing
8283	// picking just one is fine. If the candidate functions potentially sign their
8284	// return addresses, the outlined function should do the same. Note that in
8285	// the case of "sign-return-address"="non-leaf" this is an assumption: It is
8286	// not certainly true that the outlined function will have to sign its return
8287	// address but this decision is made later, when the decision to outline
8288	// has already been made.
8289	// The same holds for the number of additional instructions we need: On
8290	// v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8291	// necessary. However, at this point we don't know if the outlined function
8292	// will have a RET instruction so we assume the worst.
8293	const TargetRegisterInfo &TRI = getRegisterInfo();
8294	// Performing a tail call may require extra checks when PAuth is enabled.
8295	// If PAuth is disabled, set it to zero for uniformity.
8296	unsigned NumBytesToCheckLRInTCEpilogue = `0`;
8297	if (FirstCand.getMF()
8298	->getInfo<AArch64FunctionInfo>()
8299	->shouldSignReturnAddress(SpillsLR: true)) {
8300	// One PAC and one AUT instructions
8301	NumBytesToCreateFrame += `8`;
8302
8303	// PAuth is enabled - set extra tail call cost, if any.
8304	auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
8305	NumBytesToCheckLRInTCEpilogue =
8306	AArch64PAuth::getCheckerSizeInBytes(Method: LRCheckMethod);
8307	// Checking the authenticated LR value may significantly impact
8308	// SequenceSize, so account for it for more precise results.
8309	if (isTailCallReturnInst(MI: RepeatedSequenceLocs [`0`].back()))
8310	SequenceSize += NumBytesToCheckLRInTCEpilogue;
8311
8312	// We have to check if sp modifying instructions would get outlined.
8313	// If so we only allow outlining if sp is unchanged overall, so matching
8314	// sub and add instructions are okay to outline, all other sp modifications
8315	// are not
8316	auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8317	int SPValue = `0`;
8318	for (auto &MI : C) {
8319	if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8320	switch (MI.getOpcode()) {
8321	case AArch64::ADDXri:
8322	case AArch64::ADDWri:
8323	assert(MI.getNumOperands() == `4` && "Wrong number of operands");
8324	assert(MI.getOperand(`2`).isImm() &&
8325	"Expected operand to be immediate");
8326	assert(MI.getOperand(`1`).isReg() &&
8327	"Expected operand to be a register");
8328	// Check if the add just increments sp. If so, we search for
8329	// matching sub instructions that decrement sp. If not, the
8330	// modification is illegal
8331	if (MI.getOperand(`1`).getReg() == AArch64::SP)
8332	SPValue += MI.getOperand(i: `2`).getImm();
8333	else
8334	return true;
8335	break;
8336	case AArch64::SUBXri:
8337	case AArch64::SUBWri:
8338	assert(MI.getNumOperands() == `4` && "Wrong number of operands");
8339	assert(MI.getOperand(`2`).isImm() &&
8340	"Expected operand to be immediate");
8341	assert(MI.getOperand(`1`).isReg() &&
8342	"Expected operand to be a register");
8343	// Check if the sub just decrements sp. If so, we search for
8344	// matching add instructions that increment sp. If not, the
8345	// modification is illegal
8346	if (MI.getOperand(`1`).getReg() == AArch64::SP)
8347	SPValue -= MI.getOperand(i: `2`).getImm();
8348	else
8349	return true;
8350	break;
8351	default:
8352	return true;
8353	}
8354	}
8355	}
8356	if (SPValue)
8357	return true;
8358	return false;
8359	};
8360	// Remove candidates with illegal stack modifying instructions
8361	llvm::erase_if(C&: RepeatedSequenceLocs, P: hasIllegalSPModification);
8362
8363	// If the sequence doesn't have enough candidates left, then we're done.
8364	if (RepeatedSequenceLocs.size() < `2`)
8365	return std::nullopt;
8366	}
8367
8368	// Properties about candidate MBBs that hold for all of them.
8369	unsigned FlagsSetInAll = `0xF`;
8370
8371	// Compute liveness information for each candidate, and set FlagsSetInAll.
8372	for (outliner::Candidate &C : RepeatedSequenceLocs)
8373	FlagsSetInAll &= C.Flags;
8374
8375	unsigned LastInstrOpcode = RepeatedSequenceLocs [`0`].back().getOpcode();
8376
8377	// Helper lambda which sets call information for every candidate.
8378	auto SetCandidateCallInfo =
8379	[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8380	for (outliner::Candidate &C : RepeatedSequenceLocs)
8381	C.setCallInfo(CID: CallID, CO: NumBytesForCall);
8382	};
8383
8384	unsigned FrameID = MachineOutlinerDefault;
8385	NumBytesToCreateFrame += `4`;
8386
8387	bool HasBTI = any_of(Range&: RepeatedSequenceLocs, P: [](outliner::Candidate &C) {
8388	return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8389	});
8390
8391	// We check to see if CFI Instructions are present, and if they are
8392	// we find the number of CFI Instructions in the candidates.
8393	unsigned CFICount = `0`;
8394	for (auto &I : RepeatedSequenceLocs [`0`]) {
8395	if (I.isCFIInstruction())
8396	CFICount++;
8397	}
8398
8399	// We compare the number of found CFI Instructions to the number of CFI
8400	// instructions in the parent function for each candidate. We must check this
8401	// since if we outline one of the CFI instructions in a function, we have to
8402	// outline them all for correctness. If we do not, the address offsets will be
8403	// incorrect between the two sections of the program.
8404	for (outliner::Candidate &C : RepeatedSequenceLocs) {
8405	std::vector<MCCFIInstruction> CFIInstructions =
8406	C.getMF()->getFrameInstructions();
8407
8408	if (CFICount > `0` && CFICount != CFIInstructions.size())
8409	return std::nullopt;
8410	}
8411
8412	// Returns true if an instructions is safe to fix up, false otherwise.
8413	auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8414	if (MI.isCall())
8415	return true;
8416
8417	if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8418	!MI.readsRegister(AArch64::SP, &TRI))
8419	return true;
8420
8421	// Any modification of SP will break our code to save/restore LR.
8422	// FIXME: We could handle some instructions which add a constant
8423	// offset to SP, with a bit more work.
8424	if (MI.modifiesRegister(AArch64::SP, &TRI))
8425	return false;
8426
8427	// At this point, we have a stack instruction that we might need to
8428	// fix up. We'll handle it if it's a load or store.
8429	if (MI.mayLoadOrStore()) {
8430	const MachineOperand Base; // Filled with the base operand of MI.*
8431	int64_t Offset; // Filled with the offset of MI.
8432	bool OffsetIsScalable;
8433
8434	// Does it allow us to offset the base operand and is the base the
8435	// register SP?
8436	if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) \|\|
8437	!Base->isReg() \|\| Base->getReg() != AArch64::SP)
8438	return false;
8439
8440	// Fixe-up code below assumes bytes.
8441	if (OffsetIsScalable)
8442	return false;
8443
8444	// Find the minimum/maximum offset for this instruction and check
8445	// if fixing it up would be in range.
8446	int64_t MinOffset,
8447	MaxOffset; // Unscaled offsets for the instruction.
8448	// The scale to multiply the offsets by.
8449	TypeSize Scale(`0U`, false), DummyWidth(`0U`, false);
8450	getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width&: DummyWidth, MinOffset, MaxOffset);
8451
8452	Offset += `16`; // Update the offset to what it would be if we outlined.
8453	if (Offset < MinOffset * (int64_t)Scale.getFixedValue() \|\|
8454	Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8455	return false;
8456
8457	// It's in range, so we can outline it.
8458	return true;
8459	}
8460
8461	// FIXME: Add handling for instructions like "add x0, sp, #8".
8462
8463	// We can't fix it up, so don't outline it.
8464	return false;
8465	};
8466
8467	// True if it's possible to fix up each stack instruction in this sequence.
8468	// Important for frames/call variants that modify the stack.
8469	bool AllStackInstrsSafe = llvm::all_of(Range&: FirstCand, P: IsSafeToFixup);
8470
8471	// If the last instruction in any candidate is a terminator, then we should
8472	// tail call all of the candidates.
8473	if (RepeatedSequenceLocs [`0`].back().isTerminator()) {
8474	FrameID = MachineOutlinerTailCall;
8475	NumBytesToCreateFrame = `0`;
8476	unsigned NumBytesForCall = `4` + NumBytesToCheckLRInTCEpilogue;
8477	SetCandidateCallInfo (MachineOutlinerTailCall, NumBytesForCall);
8478	}
8479
8480	else if (LastInstrOpcode == AArch64::BL \|\|
8481	((LastInstrOpcode == AArch64::BLR \|\|
8482	LastInstrOpcode == AArch64::BLRNoIP) &&
8483	!HasBTI)) {
8484	// FIXME: Do we need to check if the code after this uses the value of LR?
8485	FrameID = MachineOutlinerThunk;
8486	NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8487	SetCandidateCallInfo (MachineOutlinerThunk, `4`);
8488	}
8489
8490	else {
8491	// We need to decide how to emit calls + frames. We can always emit the same
8492	// frame if we don't need to save to the stack. If we have to save to the
8493	// stack, then we need a different frame.
8494	unsigned NumBytesNoStackCalls = `0`;
8495	std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8496
8497	// Check if we have to save LR.
8498	for (outliner::Candidate &C : RepeatedSequenceLocs) {
8499	bool LRAvailable =
8500	(C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8501	? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8502	: true;
8503	// If we have a noreturn caller, then we're going to be conservative and
8504	// say that we have to save LR. If we don't have a ret at the end of the
8505	// block, then we can't reason about liveness accurately.
8506	//
8507	// FIXME: We can probably do better than always disabling this in
8508	// noreturn functions by fixing up the liveness info.
8509	bool IsNoReturn =
8510	C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8511
8512	// Is LR available? If so, we don't need a save.
8513	if (LRAvailable && !IsNoReturn) {
8514	NumBytesNoStackCalls += `4`;
8515	C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: `4`);
8516	CandidatesWithoutStackFixups.push_back(x: C);
8517	}
8518
8519	// Is an unused register available? If so, we won't modify the stack, so
8520	// we can outline with the same frame type as those that don't save LR.
8521	else if (findRegisterToSaveLRTo(C)) {
8522	NumBytesNoStackCalls += `12`;
8523	C.setCallInfo(CID: MachineOutlinerRegSave, CO: `12`);
8524	CandidatesWithoutStackFixups.push_back(x: C);
8525	}
8526
8527	// Is SP used in the sequence at all? If not, we don't have to modify
8528	// the stack, so we are guaranteed to get the same frame.
8529	else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8530	NumBytesNoStackCalls += `12`;
8531	C.setCallInfo(CID: MachineOutlinerDefault, CO: `12`);
8532	CandidatesWithoutStackFixups.push_back(x: C);
8533	}
8534
8535	// If we outline this, we need to modify the stack. Pretend we don't
8536	// outline this by saving all of its bytes.
8537	else {
8538	NumBytesNoStackCalls += SequenceSize;
8539	}
8540	}
8541
8542	// If there are no places where we have to save LR, then note that we
8543	// don't have to update the stack. Otherwise, give every candidate the
8544	// default call type, as long as it's safe to do so.
8545	if (!AllStackInstrsSafe \|\|
8546	NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * `12`) {
8547	RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8548	FrameID = MachineOutlinerNoLRSave;
8549	} else {
8550	SetCandidateCallInfo (MachineOutlinerDefault, `12`);
8551
8552	// Bugzilla ID: 46767
8553	// TODO: Check if fixing up the stack more than once is safe so we can
8554	// outline these.
8555	//
8556	// An outline resulting in a caller that requires stack fixups at the
8557	// callsite to a callee that also requires stack fixups can happen when
8558	// there are no available registers at the candidate callsite for a
8559	// candidate that itself also has calls.
8560	//
8561	// In other words if function_containing_sequence in the following pseudo
8562	// assembly requires that we save LR at the point of the call, but there
8563	// are no available registers: in this case we save using SP and as a
8564	// result the SP offsets requires stack fixups by multiples of 16.
8565	//
8566	// function_containing_sequence:
8567	// ...
8568	// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8569	// call OUTLINED_FUNCTION_N
8570	// restore LR from SP
8571	// ...
8572	//
8573	// OUTLINED_FUNCTION_N:
8574	// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8575	// ...
8576	// bl foo
8577	// restore LR from SP
8578	// ret
8579	//
8580	// Because the code to handle more than one stack fixup does not
8581	// currently have the proper checks for legality, these cases will assert
8582	// in the AArch64 MachineOutliner. This is because the code to do this
8583	// needs more hardening, testing, better checks that generated code is
8584	// legal, etc and because it is only verified to handle a single pass of
8585	// stack fixup.
8586	//
8587	// The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
8588	// these cases until they are known to be handled. Bugzilla 46767 is
8589	// referenced in comments at the assert site.
8590	//
8591	// To avoid asserting (or generating non-legal code on noassert builds)
8592	// we remove all candidates which would need more than one stack fixup by
8593	// pruning the cases where the candidate has calls while also having no
8594	// available LR and having no available general purpose registers to copy
8595	// LR to (ie one extra stack save/restore).
8596	//
8597	if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8598	erase_if(C&: RepeatedSequenceLocs, P: [this, &TRI](outliner::Candidate &C) {
8599	auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
8600	return (llvm::any_of(C, IsCall)) &&
8601	(!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) \|\|
8602	!findRegisterToSaveLRTo(C));
8603	});
8604	}
8605	}
8606
8607	// If we dropped all of the candidates, bail out here.
8608	if (RepeatedSequenceLocs.size() < `2`) {
8609	RepeatedSequenceLocs.clear();
8610	return std::nullopt;
8611	}
8612	}
8613
8614	// Does every candidate's MBB contain a call? If so, then we might have a call
8615	// in the range.
8616	if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8617	// Check if the range contains a call. These require a save + restore of the
8618	// link register.
8619	bool ModStackToSaveLR = false;
8620	if (std::any_of(first: FirstCand.begin(), last: std::prev(x: FirstCand.end()),
8621	pred: [](const MachineInstr &MI) { return MI.isCall(); }))
8622	ModStackToSaveLR = true;
8623
8624	// Handle the last instruction separately. If this is a tail call, then the
8625	// last instruction is a call. We don't want to save + restore in this case.
8626	// However, it could be possible that the last instruction is a call without
8627	// it being valid to tail call this sequence. We should consider this as
8628	// well.
8629	else if (FrameID != MachineOutlinerThunk &&
8630	FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
8631	ModStackToSaveLR = true;
8632
8633	if (ModStackToSaveLR) {
8634	// We can't fix up the stack. Bail out.
8635	if (!AllStackInstrsSafe) {
8636	RepeatedSequenceLocs.clear();
8637	return std::nullopt;
8638	}
8639
8640	// Save + restore LR.
8641	NumBytesToCreateFrame += `8`;
8642	}
8643	}
8644
8645	// If we have CFI instructions, we can only outline if the outlined section
8646	// can be a tail call
8647	if (FrameID != MachineOutlinerTailCall && CFICount > `0`)
8648	return std::nullopt;
8649
8650	return outliner::OutlinedFunction (RepeatedSequenceLocs, SequenceSize,
8651	NumBytesToCreateFrame, FrameID);
8652	}
8653
8654	void AArch64InstrInfo::mergeOutliningCandidateAttributes(
8655	Function &F, std::vector<outliner::Candidate> &Candidates) const {
8656	// If a bunch of candidates reach this point they must agree on their return
8657	// address signing. It is therefore enough to just consider the signing
8658	// behaviour of one of them
8659	const auto &CFn = Candidates.front().getMF()->getFunction();
8660
8661	// Since all candidates belong to the same module, just copy the
8662	// function-level attributes of an arbitrary function.
8663	if (CFn.hasFnAttribute(Kind: "sign-return-address"))
8664	F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address"));
8665	if (CFn.hasFnAttribute(Kind: "sign-return-address-key"))
8666	F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address-key"));
8667
8668	AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
8669	}
8670
8671	bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
8672	MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
8673	const Function &F = MF.getFunction();
8674
8675	// Can F be deduplicated by the linker? If it can, don't outline from it.
8676	if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
8677	return false;
8678
8679	// Don't outline from functions with section markings; the program could
8680	// expect that all the code is in the named section.
8681	// FIXME: Allow outlining from multiple functions with the same section
8682	// marking.
8683	if (F.hasSection())
8684	return false;
8685
8686	// Outlining from functions with redzones is unsafe since the outliner may
8687	// modify the stack. Check if hasRedZone is true or unknown; if yes, don't
8688	// outline from it.
8689	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
8690	if (!AFI \|\| AFI->hasRedZone().value_or(u: true))
8691	return false;
8692
8693	// FIXME: Teach the outliner to generate/handle Windows unwind info.
8694	if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
8695	return false;
8696
8697	// It's safe to outline from MF.
8698	return true;
8699	}
8700
8701	SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8702	AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
8703	unsigned &Flags) const {
8704	assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
8705	"Must track liveness!");
8706	SmallVector<
8707	std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8708	Ranges;
8709	// According to the AArch64 Procedure Call Standard, the following are
8710	// undefined on entry/exit from a function call:
8711	//
8712	// Registers x16, x17, (and thus w16, w17)*
8713	// Condition codes (and thus the NZCV register)*
8714	//
8715	// If any of these registers are used inside or live across an outlined
8716	// function, then they may be modified later, either by the compiler or
8717	// some other tool (like the linker).
8718	//
8719	// To avoid outlining in these situations, partition each block into ranges
8720	// where these registers are dead. We will only outline from those ranges.
8721	LiveRegUnits LRU(getRegisterInfo());
8722	auto AreAllUnsafeRegsDead = [&LRU]() {
8723	return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
8724	LRU.available(AArch64::NZCV);
8725	};
8726
8727	// We need to know if LR is live across an outlining boundary later on in
8728	// order to decide how we'll create the outlined call, frame, etc.
8729	//
8730	// It's pretty expensive to check this for every candidate* within a block.*
8731	// That's some potentially n^2 behaviour, since in the worst case, we'd need
8732	// to compute liveness from the end of the block for O(n) candidates within
8733	// the block.
8734	//
8735	// So, to improve the average case, let's keep track of liveness from the end
8736	// of the block to the beginning of every outlinable range. If we know that
8737	// LR is available in every range we could outline from, then we know that
8738	// we don't need to check liveness for any candidate within that range.
8739	bool LRAvailableEverywhere = true;
8740	// Compute liveness bottom-up.
8741	LRU.addLiveOuts(MBB);
8742	// Update flags that require info about the entire MBB.
8743	auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
8744	if (MI.isCall() && !MI.isTerminator())
8745	Flags \|= MachineOutlinerMBBFlags::HasCalls;
8746	};
8747	// Range: [RangeBegin, RangeEnd)
8748	MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
8749	unsigned RangeLen;
8750	auto CreateNewRangeStartingAt =
8751	[&RangeBegin, &RangeEnd,
8752	&RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
8753	RangeBegin = NewBegin;
8754	RangeEnd = std::next(x: RangeBegin);
8755	RangeLen = `0`;
8756	};
8757	auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
8758	// At least one unsafe register is not dead. We do not want to outline at
8759	// this point. If it is long enough to outline from, save the range
8760	// [RangeBegin, RangeEnd).
8761	if (RangeLen > `1`)
8762	Ranges.push_back(Elt: std::make_pair(x&: RangeBegin, y&: RangeEnd));
8763	};
8764	// Find the first point where all unsafe registers are dead.
8765	// FIND: <safe instr> <-- end of first potential range
8766	// SKIP: <unsafe def>
8767	// SKIP: ... everything between ...
8768	// SKIP: <unsafe use>
8769	auto FirstPossibleEndPt = MBB.instr_rbegin();
8770	for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
8771	LRU.stepBackward(MI: *FirstPossibleEndPt);
8772	// Update flags that impact how we outline across the entire block,
8773	// regardless of safety.
8774	UpdateWholeMBBFlags (*FirstPossibleEndPt);
8775	if (AreAllUnsafeRegsDead())
8776	break;
8777	}
8778	// If we exhausted the entire block, we have no safe ranges to outline.
8779	if (FirstPossibleEndPt == MBB.instr_rend())
8780	return Ranges;
8781	// Current range.
8782	CreateNewRangeStartingAt (FirstPossibleEndPt ->getIterator());
8783	// StartPt points to the first place where all unsafe registers
8784	// are dead (if there is any such point). Begin partitioning the MBB into
8785	// ranges.
8786	for (auto &MI : make_range(x: FirstPossibleEndPt, y: MBB.instr_rend())) {
8787	LRU.stepBackward(MI);
8788	UpdateWholeMBBFlags (MI);
8789	if (!AreAllUnsafeRegsDead ()) {
8790	SaveRangeIfNonEmpty ();
8791	CreateNewRangeStartingAt (MI.getIterator());
8792	continue;
8793	}
8794	LRAvailableEverywhere &= LRU.available(AArch64::LR);
8795	RangeBegin = MI.getIterator();
8796	++RangeLen;
8797	}
8798	// Above loop misses the last (or only) range. If we are still safe, then
8799	// let's save the range.
8800	if (AreAllUnsafeRegsDead())
8801	SaveRangeIfNonEmpty ();
8802	if (Ranges.empty())
8803	return Ranges;
8804	// We found the ranges bottom-up. Mapping expects the top-down. Reverse
8805	// the order.
8806	std::reverse(first: Ranges.begin(), last: Ranges.end());
8807	// If there is at least one outlinable range where LR is unavailable
8808	// somewhere, remember that.
8809	if (!LRAvailableEverywhere)
8810	Flags \|= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
8811	return Ranges;
8812	}
8813
8814	outliner::InstrType
8815	AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
8816	unsigned Flags) const {
8817	MachineInstr &MI = *MIT;
8818	MachineBasicBlock *MBB = MI.getParent();
8819	MachineFunction *MF = MBB->getParent();
8820	AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
8821
8822	// Don't outline anything used for return address signing. The outlined
8823	// function will get signed later if needed
8824	switch (MI.getOpcode()) {
8825	case AArch64::PACM:
8826	case AArch64::PACIASP:
8827	case AArch64::PACIBSP:
8828	case AArch64::PACIASPPC:
8829	case AArch64::PACIBSPPC:
8830	case AArch64::AUTIASP:
8831	case AArch64::AUTIBSP:
8832	case AArch64::AUTIASPPCi:
8833	case AArch64::AUTIASPPCr:
8834	case AArch64::AUTIBSPPCi:
8835	case AArch64::AUTIBSPPCr:
8836	case AArch64::RETAA:
8837	case AArch64::RETAB:
8838	case AArch64::RETAASPPCi:
8839	case AArch64::RETAASPPCr:
8840	case AArch64::RETABSPPCi:
8841	case AArch64::RETABSPPCr:
8842	case AArch64::EMITBKEY:
8843	case AArch64::PAUTH_PROLOGUE:
8844	case AArch64::PAUTH_EPILOGUE:
8845	return outliner::InstrType::Illegal;
8846	}
8847
8848	// Don't outline LOHs.
8849	if (FuncInfo->getLOHRelated().count(Ptr: &MI))
8850	return outliner::InstrType::Illegal;
8851
8852	// We can only outline these if we will tail call the outlined function, or
8853	// fix up the CFI offsets. Currently, CFI instructions are outlined only if
8854	// in a tail call.
8855	//
8856	// FIXME: If the proper fixups for the offset are implemented, this should be
8857	// possible.
8858	if (MI.isCFIInstruction())
8859	return outliner::InstrType::Legal;
8860
8861	// Is this a terminator for a basic block?
8862	if (MI.isTerminator())
8863	// TargetInstrInfo::getOutliningType has already filtered out anything
8864	// that would break this, so we can allow it here.
8865	return outliner::InstrType::Legal;
8866
8867	// Make sure none of the operands are un-outlinable.
8868	for (const MachineOperand &MOP : MI.operands()) {
8869	// A check preventing CFI indices was here before, but only CFI
8870	// instructions should have those.
8871	assert(!MOP.isCFIIndex());
8872
8873	// If it uses LR or W30 explicitly, then don't touch it.
8874	if (MOP.isReg() && !MOP.isImplicit() &&
8875	(MOP.getReg() == AArch64::LR \|\| MOP.getReg() == AArch64::W30))
8876	return outliner::InstrType::Illegal;
8877	}
8878
8879	// Special cases for instructions that can always be outlined, but will fail
8880	// the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
8881	// be outlined because they don't require a specific* value to be in LR.*
8882	if (MI.getOpcode() == AArch64::ADRP)
8883	return outliner::InstrType::Legal;
8884
8885	// If MI is a call we might be able to outline it. We don't want to outline
8886	// any calls that rely on the position of items on the stack. When we outline
8887	// something containing a call, we have to emit a save and restore of LR in
8888	// the outlined function. Currently, this always happens by saving LR to the
8889	// stack. Thus, if we outline, say, half the parameters for a function call
8890	// plus the call, then we'll break the callee's expectations for the layout
8891	// of the stack.
8892	//
8893	// FIXME: Allow calls to functions which construct a stack frame, as long
8894	// as they don't access arguments on the stack.
8895	// FIXME: Figure out some way to analyze functions defined in other modules.
8896	// We should be able to compute the memory usage based on the IR calling
8897	// convention, even if we can't see the definition.
8898	if (MI.isCall()) {
8899	// Get the function associated with the call. Look at each operand and find
8900	// the one that represents the callee and get its name.
8901	const Function Callee = nullptr*;
8902	for (const MachineOperand &MOP : MI.operands()) {
8903	if (MOP.isGlobal()) {
8904	Callee = dyn_cast<Function>(Val: MOP.getGlobal());
8905	break;
8906	}
8907	}
8908
8909	// Never outline calls to mcount. There isn't any rule that would require
8910	// this, but the Linux kernel's "ftrace" feature depends on it.
8911	if (Callee && Callee->getName() == "\01_mcount")
8912	return outliner::InstrType::Illegal;
8913
8914	// If we don't know anything about the callee, assume it depends on the
8915	// stack layout of the caller. In that case, it's only legal to outline
8916	// as a tail-call. Explicitly list the call instructions we know about so we
8917	// don't get unexpected results with call pseudo-instructions.
8918	auto UnknownCallOutlineType = outliner::InstrType::Illegal;
8919	if (MI.getOpcode() == AArch64::BLR \|\|
8920	MI.getOpcode() == AArch64::BLRNoIP \|\| MI.getOpcode() == AArch64::BL)
8921	UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
8922
8923	if (!Callee)
8924	return UnknownCallOutlineType;
8925
8926	// We have a function we have information about. Check it if it's something
8927	// can safely outline.
8928	MachineFunction CalleeMF = MF->getMMI().getMachineFunction(F: Callee);
8929
8930	// We don't know what's going on with the callee at all. Don't touch it.
8931	if (!CalleeMF)
8932	return UnknownCallOutlineType;
8933
8934	// Check if we know anything about the callee saves on the function. If we
8935	// don't, then don't touch it, since that implies that we haven't
8936	// computed anything about its stack frame yet.
8937	MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
8938	if (!MFI.isCalleeSavedInfoValid() \|\| MFI.getStackSize() > `0` \|\|
8939	MFI.getNumObjects() > `0`)
8940	return UnknownCallOutlineType;
8941
8942	// At this point, we can say that CalleeMF ought to not pass anything on the
8943	// stack. Therefore, we can outline it.
8944	return outliner::InstrType::Legal;
8945	}
8946
8947	// Don't touch the link register or W30.
8948	if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) \|\|
8949	MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
8950	return outliner::InstrType::Illegal;
8951
8952	// Don't outline BTI instructions, because that will prevent the outlining
8953	// site from being indirectly callable.
8954	if (hasBTISemantics(MI))
8955	return outliner::InstrType::Illegal;
8956
8957	return outliner::InstrType::Legal;
8958	}
8959
8960	void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
8961	for (MachineInstr &MI : MBB) {
8962	const MachineOperand *Base;
8963	TypeSize Width(`0`, false);
8964	int64_t Offset;
8965	bool OffsetIsScalable;
8966
8967	// Is this a load or store with an immediate offset with SP as the base?
8968	if (!MI.mayLoadOrStore() \|\|
8969	!getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
8970	&RI) \|\|
8971	(Base->isReg() && Base->getReg() != AArch64::SP))
8972	continue;
8973
8974	// It is, so we have to fix it up.
8975	TypeSize Scale(`0U`, false);
8976	int64_t Dummy1, Dummy2;
8977
8978	MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(LdSt&: MI);
8979	assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
8980	getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width, MinOffset&: Dummy1, MaxOffset&: Dummy2);
8981	assert(Scale != `0` && "Unexpected opcode!");
8982	assert(!OffsetIsScalable && "Expected offset to be a byte offset");
8983
8984	// We've pushed the return address to the stack, so add 16 to the offset.
8985	// This is safe, since we already checked if it would overflow when we
8986	// checked if this instruction was legal to outline.
8987	int64_t NewImm = (Offset + `16`) / (int64_t)Scale.getFixedValue();
8988	StackOffsetOperand.setImm(NewImm);
8989	}
8990	}
8991
8992	static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
8993	const AArch64InstrInfo *TII,
8994	bool ShouldSignReturnAddr) {
8995	if (!ShouldSignReturnAddr)
8996	return;
8997
8998	BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
8999	.setMIFlag(MachineInstr::FrameSetup);
9000	BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
9001	TII->get(AArch64::PAUTH_EPILOGUE))
9002	.setMIFlag(MachineInstr::FrameDestroy);
9003	}
9004
9005	void AArch64InstrInfo::buildOutlinedFrame(
9006	MachineBasicBlock &MBB, MachineFunction &MF,
9007	const outliner::OutlinedFunction &OF) const {
9008
9009	AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
9010
9011	if (OF.FrameConstructionID == MachineOutlinerTailCall)
9012	FI->setOutliningStyle("Tail Call");
9013	else if (OF.FrameConstructionID == MachineOutlinerThunk) {
9014	// For thunk outlining, rewrite the last instruction from a call to a
9015	// tail-call.
9016	MachineInstr Call = &--MBB.instr_end();
9017	unsigned TailOpcode;
9018	if (Call->getOpcode() == AArch64::BL) {
9019	TailOpcode = AArch64::TCRETURNdi;
9020	} else {
9021	assert(Call->getOpcode() == AArch64::BLR \|\|
9022	Call->getOpcode() == AArch64::BLRNoIP);
9023	TailOpcode = AArch64::TCRETURNriALL;
9024	}
9025	MachineInstr *TC = BuildMI(MF, DebugLoc (), get(TailOpcode))
9026	.add(Call->getOperand(i: `0`))
9027	.addImm(`0`);
9028	MBB.insert(I: MBB.end(), MI: TC);
9029	Call->eraseFromParent();
9030
9031	FI->setOutliningStyle("Thunk");
9032	}
9033
9034	bool IsLeafFunction = true;
9035
9036	// Is there a call in the outlined range?
9037	auto IsNonTailCall = [](const MachineInstr &MI) {
9038	return MI.isCall() && !MI.isReturn();
9039	};
9040
9041	if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) {
9042	// Fix up the instructions in the range, since we're going to modify the
9043	// stack.
9044
9045	// Bugzilla ID: 46767
9046	// TODO: Check if fixing up twice is safe so we can outline these.
9047	assert(OF.FrameConstructionID != MachineOutlinerDefault &&
9048	"Can only fix up stack references once");
9049	fixupPostOutline(MBB);
9050
9051	IsLeafFunction = false;
9052
9053	// LR has to be a live in so that we can save it.
9054	if (!MBB.isLiveIn(AArch64::LR))
9055	MBB.addLiveIn(AArch64::LR);
9056
9057	MachineBasicBlock::iterator It = MBB.begin();
9058	MachineBasicBlock::iterator Et = MBB.end();
9059
9060	if (OF.FrameConstructionID == MachineOutlinerTailCall \|\|
9061	OF.FrameConstructionID == MachineOutlinerThunk)
9062	Et = std::prev(x: MBB.end());
9063
9064	// Insert a save before the outlined region
9065	MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9066	.addReg(AArch64::SP, RegState::Define)
9067	.addReg(AArch64::LR)
9068	.addReg(AArch64::SP)
9069	.addImm(-`16`);
9070	It = MBB.insert(I: It, MI: STRXpre);
9071
9072	if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
9073	const TargetSubtargetInfo &STI = MF.getSubtarget();
9074	const MCRegisterInfo *MRI = STI.getRegisterInfo();
9075	unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9076
9077	// Add a CFI saying the stack was moved 16 B down.
9078	int64_t StackPosEntry =
9079	MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `16`));
9080	BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9081	.addCFIIndex(StackPosEntry)
9082	.setMIFlags(MachineInstr::FrameSetup);
9083
9084	// Add a CFI saying that the LR that we want to find is now 16 B higher
9085	// than before.
9086	int64_t LRPosEntry = MF.addFrameInst(
9087	Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: -`16`));
9088	BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9089	.addCFIIndex(LRPosEntry)
9090	.setMIFlags(MachineInstr::FrameSetup);
9091	}
9092
9093	// Insert a restore before the terminator for the function.
9094	MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9095	.addReg(AArch64::SP, RegState::Define)
9096	.addReg(AArch64::LR, RegState::Define)
9097	.addReg(AArch64::SP)
9098	.addImm(`16`);
9099	Et = MBB.insert(I: Et, MI: LDRXpost);
9100	}
9101
9102	bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(SpillsLR: !IsLeafFunction);
9103
9104	// If this is a tail call outlined function, then there's already a return.
9105	if (OF.FrameConstructionID == MachineOutlinerTailCall \|\|
9106	OF.FrameConstructionID == MachineOutlinerThunk) {
9107	signOutlinedFunction(MF, MBB, TII: this, ShouldSignReturnAddr);
9108	return;
9109	}
9110
9111	// It's not a tail call, so we have to insert the return ourselves.
9112
9113	// LR has to be a live in so that we can return to it.
9114	if (!MBB.isLiveIn(AArch64::LR))
9115	MBB.addLiveIn(AArch64::LR);
9116
9117	MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9118	.addReg(AArch64::LR);
9119	MBB.insert(I: MBB.end(), MI: ret);
9120
9121	signOutlinedFunction(MF, MBB, TII: this, ShouldSignReturnAddr);
9122
9123	FI->setOutliningStyle("Function");
9124
9125	// Did we have to modify the stack by saving the link register?
9126	if (OF.FrameConstructionID != MachineOutlinerDefault)
9127	return;
9128
9129	// We modified the stack.
9130	// Walk over the basic block and fix up all the stack accesses.
9131	fixupPostOutline(MBB);
9132	}
9133
9134	MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
9135	Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
9136	MachineFunction &MF, outliner::Candidate &C) const {
9137
9138	// Are we tail calling?
9139	if (C.CallConstructionID == MachineOutlinerTailCall) {
9140	// If yes, then we can just branch to the label.
9141	It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9142	.addGlobalAddress(M.getNamedValue(MF.getName()))
9143	.addImm(`0`));
9144	return It;
9145	}
9146
9147	// Are we saving the link register?
9148	if (C.CallConstructionID == MachineOutlinerNoLRSave \|\|
9149	C.CallConstructionID == MachineOutlinerThunk) {
9150	// No, so just insert the call.
9151	It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9152	.addGlobalAddress(M.getNamedValue(MF.getName())));
9153	return It;
9154	}
9155
9156	// We want to return the spot where we inserted the call.
9157	MachineBasicBlock::iterator CallPt;
9158
9159	// Instructions for saving and restoring LR around the call instruction we're
9160	// going to insert.
9161	MachineInstr *Save;
9162	MachineInstr *Restore;
9163	// Can we save to a register?
9164	if (C.CallConstructionID == MachineOutlinerRegSave) {
9165	// FIXME: This logic should be sunk into a target-specific interface so that
9166	// we don't have to recompute the register.
9167	Register Reg = findRegisterToSaveLRTo(C);
9168	assert(Reg && "No callee-saved register available?");
9169
9170	// LR has to be a live in so that we can save it.
9171	if (!MBB.isLiveIn(AArch64::LR))
9172	MBB.addLiveIn(AArch64::LR);
9173
9174	// Save and restore LR from Reg.
9175	Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9176	.addReg(AArch64::XZR)
9177	.addReg(AArch64::LR)
9178	.addImm(`0`);
9179	Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9180	.addReg(AArch64::XZR)
9181	.addReg(Reg)
9182	.addImm(`0`);
9183	} else {
9184	// We have the default case. Save and restore from SP.
9185	Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9186	.addReg(AArch64::SP, RegState::Define)
9187	.addReg(AArch64::LR)
9188	.addReg(AArch64::SP)
9189	.addImm(-`16`);
9190	Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9191	.addReg(AArch64::SP, RegState::Define)
9192	.addReg(AArch64::LR, RegState::Define)
9193	.addReg(AArch64::SP)
9194	.addImm(`16`);
9195	}
9196
9197	It = MBB.insert(I: It, MI: Save);
9198	It ++;
9199
9200	// Insert the call.
9201	It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9202	.addGlobalAddress(M.getNamedValue(MF.getName())));
9203	CallPt = It;
9204	It ++;
9205
9206	It = MBB.insert(I: It, MI: Restore);
9207	return CallPt;
9208	}
9209
9210	bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
9211	MachineFunction &MF) const {
9212	return MF.getFunction().hasMinSize();
9213	}
9214
9215	void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
9216	MachineBasicBlock::iterator Iter,
9217	DebugLoc &DL,
9218	bool AllowSideEffects) const {
9219	const MachineFunction &MF = *MBB.getParent();
9220	const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
9221	const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9222
9223	if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9224	BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(`0`).addImm(`0`);
9225	} else if (STI.hasSVE()) {
9226	BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9227	.addImm(`0`)
9228	.addImm(`0`);
9229	} else {
9230	BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9231	.addImm(`0`);
9232	}
9233	}
9234
9235	std::optional<DestSourcePair>
9236	AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
9237
9238	// AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9239	// and zero immediate operands used as an alias for mov instruction.
9240	if (MI.getOpcode() == AArch64::ORRWrs &&
9241	MI.getOperand(`1`).getReg() == AArch64::WZR &&
9242	MI.getOperand(`3`).getImm() == `0x0` &&
9243	// Check that the w->w move is not a zero-extending w->x mov.
9244	(!MI.getOperand(`0`).getReg().isVirtual() \|\|
9245	MI.getOperand(`0`).getSubReg() == `0`) &&
9246	(!MI.getOperand(`0`).getReg().isPhysical() \|\|
9247	MI.findRegisterDefOperandIdx(MI.getOperand(`0`).getReg() - AArch64::W0 +
9248	AArch64::X0,
9249	/TRI=/nullptr) == -`1`))
9250	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `2`)};
9251
9252	if (MI.getOpcode() == AArch64::ORRXrs &&
9253	MI.getOperand(`1`).getReg() == AArch64::XZR &&
9254	MI.getOperand(`3`).getImm() == `0x0`)
9255	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `2`)};
9256
9257	return std::nullopt;
9258	}
9259
9260	std::optional<DestSourcePair>
9261	AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const {
9262	if (MI.getOpcode() == AArch64::ORRWrs &&
9263	MI.getOperand(`1`).getReg() == AArch64::WZR &&
9264	MI.getOperand(`3`).getImm() == `0x0`)
9265	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `2`)};
9266	return std::nullopt;
9267	}
9268
9269	std::optional<RegImmPair>
9270	AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
9271	int Sign = `1`;
9272	int64_t Offset = `0`;
9273
9274	// TODO: Handle cases where Reg is a super- or sub-register of the
9275	// destination register.
9276	const MachineOperand &Op0 = MI.getOperand(i: `0`);
9277	if (!Op0.isReg() \|\| Reg != Op0.getReg())
9278	return std::nullopt;
9279
9280	switch (MI.getOpcode()) {
9281	default:
9282	return std::nullopt;
9283	case AArch64::SUBWri:
9284	case AArch64::SUBXri:
9285	case AArch64::SUBSWri:
9286	case AArch64::SUBSXri:
9287	Sign *= -`1`;
9288	[[fallthrough]];
9289	case AArch64::ADDSWri:
9290	case AArch64::ADDSXri:
9291	case AArch64::ADDWri:
9292	case AArch64::ADDXri: {
9293	// TODO: Third operand can be global address (usually some string).
9294	if (!MI.getOperand(i: `0`).isReg() \|\| !MI.getOperand(i: `1`).isReg() \|\|
9295	!MI.getOperand(i: `2`).isImm())
9296	return std::nullopt;
9297	int Shift = MI.getOperand(i: `3`).getImm();
9298	assert((Shift == `0` \|\| Shift == `12`) && "Shift can be either 0 or 12");
9299	Offset = Sign * (MI.getOperand(i: `2`).getImm() << Shift);
9300	}
9301	}
9302	return RegImmPair {MI.getOperand(i: `1`).getReg(), Offset};
9303	}
9304
9305	/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9306	/// the destination register then, if possible, describe the value in terms of
9307	/// the source register.
9308	static std::optional<ParamLoadedValue>
9309	describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
9310	const TargetInstrInfo *TII,
9311	const TargetRegisterInfo *TRI) {
9312	auto DestSrc = TII->isCopyLikeInstr(MI);
9313	if (!DestSrc)
9314	return std::nullopt;
9315
9316	Register DestReg = DestSrc ->Destination->getReg();
9317	Register SrcReg = DestSrc ->Source->getReg();
9318
9319	auto Expr = DIExpression::get(Context&: MI.getMF()->getFunction().getContext(), Elements: {});
9320
9321	// If the described register is the destination, just return the source.
9322	if (DestReg == DescribedReg)
9323	return ParamLoadedValue (MachineOperand::CreateReg(Reg: SrcReg, isDef: false), Expr);
9324
9325	// ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9326	if (MI.getOpcode() == AArch64::ORRWrs &&
9327	TRI->isSuperRegister(DestReg, DescribedReg))
9328	return ParamLoadedValue (MachineOperand::CreateReg(Reg: SrcReg, isDef: false), Expr);
9329
9330	// We may need to describe the lower part of a ORRXrs move.
9331	if (MI.getOpcode() == AArch64::ORRXrs &&
9332	TRI->isSubRegister(DestReg, DescribedReg)) {
9333	Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9334	return ParamLoadedValue (MachineOperand::CreateReg(Reg: SrcSubReg, isDef: false), Expr);
9335	}
9336
9337	assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9338	"Unhandled ORR[XW]rs copy case");
9339
9340	return std::nullopt;
9341	}
9342
9343	bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
9344	// Functions cannot be split to different sections on AArch64 if they have
9345	// a red zone. This is because relaxing a cross-section branch may require
9346	// incrementing the stack pointer to spill a register, which would overwrite
9347	// the red zone.
9348	if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(u: true))
9349	return false;
9350
9351	return TargetInstrInfo::isFunctionSafeToSplit(MF);
9352	}
9353
9354	bool AArch64InstrInfo::isMBBSafeToSplitToCold(
9355	const MachineBasicBlock &MBB) const {
9356	// Asm Goto blocks can contain conditional branches to goto labels, which can
9357	// get moved out of range of the branch instruction.
9358	auto isAsmGoto = [](const MachineInstr &MI) {
9359	return MI.getOpcode() == AArch64::INLINEASM_BR;
9360	};
9361	if (llvm::any_of(Range: MBB, P: isAsmGoto) \|\| MBB.isInlineAsmBrIndirectTarget())
9362	return false;
9363
9364	// Because jump tables are label-relative instead of table-relative, they all
9365	// must be in the same section or relocation fixup handling will fail.
9366
9367	// Check if MBB is a jump table target
9368	const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
9369	auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9370	return llvm::is_contained(Range: JTE.MBBs, Element: &MBB);
9371	};
9372	if (MJTI != nullptr && llvm::any_of(Range: MJTI->getJumpTables(), P: containsMBB))
9373	return false;
9374
9375	// Check if MBB contains a jump table lookup
9376	for (const MachineInstr &MI : MBB) {
9377	switch (MI.getOpcode()) {
9378	case TargetOpcode::G_BRJT:
9379	case AArch64::JumpTableDest32:
9380	case AArch64::JumpTableDest16:
9381	case AArch64::JumpTableDest8:
9382	return false;
9383	default:
9384	continue;
9385	}
9386	}
9387
9388	// MBB isn't a special case, so it's safe to be split to the cold section.
9389	return true;
9390	}
9391
9392	std::optional<ParamLoadedValue>
9393	AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
9394	Register Reg) const {
9395	const MachineFunction *MF = MI.getMF();
9396	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
9397	switch (MI.getOpcode()) {
9398	case AArch64::MOVZWi:
9399	case AArch64::MOVZXi: {
9400	// MOVZWi may be used for producing zero-extended 32-bit immediates in
9401	// 64-bit parameters, so we need to consider super-registers.
9402	if (!TRI->isSuperRegisterEq(RegA: MI.getOperand(i: `0`).getReg(), RegB: Reg))
9403	return std::nullopt;
9404
9405	if (!MI.getOperand(i: `1`).isImm())
9406	return std::nullopt;
9407	int64_t Immediate = MI.getOperand(i: `1`).getImm();
9408	int Shift = MI.getOperand(i: `2`).getImm();
9409	return ParamLoadedValue (MachineOperand::CreateImm(Val: Immediate << Shift),
9410	nullptr);
9411	}
9412	case AArch64::ORRWrs:
9413	case AArch64::ORRXrs:
9414	return describeORRLoadedValue(MI, Reg, this, TRI);
9415	}
9416
9417	return TargetInstrInfo::describeLoadedValue(MI, Reg);
9418	}
9419
9420	bool AArch64InstrInfo::isExtendLikelyToBeFolded(
9421	MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9422	assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT \|\|
9423	ExtMI.getOpcode() == TargetOpcode::G_ZEXT \|\|
9424	ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9425
9426	// Anyexts are nops.
9427	if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9428	return true;
9429
9430	Register DefReg = ExtMI.getOperand(i: `0`).getReg();
9431	if (!MRI.hasOneNonDBGUse(RegNo: DefReg))
9432	return false;
9433
9434	// It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9435	// addressing mode.
9436	auto UserMI = &MRI.use_instr_nodbg_begin(RegNo: DefReg);
9437	return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9438	}
9439
9440	uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
9441	return get(Opc).TSFlags & AArch64::ElementSizeMask;
9442	}
9443
9444	bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9445	return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9446	}
9447
9448	bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9449	return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9450	}
9451
9452	unsigned int
9453	AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
9454	return OptLevel >= CodeGenOptLevel::Aggressive ? `6` : `2`;
9455	}
9456
9457	bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9458	unsigned Scale) const {
9459	if (Offset && Scale)
9460	return false;
9461
9462	// Check Reg + Imm
9463	if (!Scale) {
9464	// 9-bit signed offset
9465	if (isInt<`9`>(x: Offset))
9466	return true;
9467
9468	// 12-bit unsigned offset
9469	unsigned Shift = Log2_64(Value: NumBytes);
9470	if (NumBytes && Offset > `0` && (Offset / NumBytes) <= (`1LL` << `12`) - `1` &&
9471	// Must be a multiple of NumBytes (NumBytes is a power of 2)
9472	(Offset >> Shift) << Shift == Offset)
9473	return true;
9474	return false;
9475	}
9476
9477	// Check reg1 + SIZE_IN_BYTES reg2 and reg1 + reg2*
9478	return Scale == `1` \|\| (Scale > `0` && Scale == NumBytes);
9479	}
9480
9481	unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
9482	if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9483	return AArch64::BLRNoIP;
9484	else
9485	return AArch64::BLR;
9486	}
9487
9488	MachineBasicBlock::iterator
9489	AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
9490	Register TargetReg, bool FrameSetup) const {
9491	assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9492
9493	MachineBasicBlock &MBB = *MBBI ->getParent();
9494	MachineFunction &MF = *MBB.getParent();
9495	const AArch64InstrInfo *TII =
9496	MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9497	int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9498	DebugLoc DL = MBB.findDebugLoc(MBBI);
9499
9500	MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator());
9501	MachineBasicBlock *LoopTestMBB =
9502	MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
9503	MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
9504	MachineBasicBlock *LoopBodyMBB =
9505	MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
9506	MF.insert(MBBI: MBBInsertPoint, MBB: LoopBodyMBB);
9507	MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
9508	MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
9509	MachineInstr::MIFlag Flags =
9510	FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
9511
9512	// LoopTest:
9513	// SUB SP, SP, #ProbeSize
9514	emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9515	AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9516
9517	// CMP SP, TargetReg
9518	BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9519	AArch64::XZR)
9520	.addReg(AArch64::SP)
9521	.addReg(TargetReg)
9522	.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, `0`))
9523	.setMIFlags(Flags);
9524
9525	// B.<Cond> LoopExit
9526	BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9527	.addImm(AArch64CC::LE)
9528	.addMBB(ExitMBB)
9529	.setMIFlags(Flags);
9530
9531	// STR XZR, [SP]
9532	BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9533	.addReg(AArch64::XZR)
9534	.addReg(AArch64::SP)
9535	.addImm(`0`)
9536	.setMIFlags(Flags);
9537
9538	// B loop
9539	BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9540	.addMBB(LoopTestMBB)
9541	.setMIFlags(Flags);
9542
9543	// LoopExit:
9544	// MOV SP, TargetReg
9545	BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9546	.addReg(TargetReg)
9547	.addImm(`0`)
9548	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, `0`))
9549	.setMIFlags(Flags);
9550
9551	// LDR XZR, [SP]
9552	BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
9553	.addReg(AArch64::XZR, RegState::Define)
9554	.addReg(AArch64::SP)
9555	.addImm(`0`)
9556	.setMIFlags(Flags);
9557
9558	ExitMBB->splice(Where: ExitMBB->end(), Other: &MBB, From: std::next(x: MBBI), To: MBB.end());
9559	ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
9560
9561	LoopTestMBB->addSuccessor(Succ: ExitMBB);
9562	LoopTestMBB->addSuccessor(Succ: LoopBodyMBB);
9563	LoopBodyMBB->addSuccessor(Succ: LoopTestMBB);
9564	MBB.addSuccessor(Succ: LoopTestMBB);
9565
9566	// Update liveins.
9567	if (MF.getRegInfo().reservedRegsFrozen())
9568	fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopBodyMBB, LoopTestMBB});
9569
9570	return ExitMBB->begin();
9571	}
9572
9573	namespace {
9574	class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
9575	MachineInstr *PredBranch;
9576	SmallVector<MachineOperand, `4`> Cond;
9577
9578	public:
9579	AArch64PipelinerLoopInfo(MachineInstr *PredBranch,
9580	const SmallVectorImpl<MachineOperand> &Cond)
9581	: PredBranch(PredBranch), Cond (Cond.begin(), Cond.end()) {}
9582
9583	bool shouldIgnoreForPipelining(const MachineInstr MI) const* override {
9584	// Make the instructions for loop control be placed in stage 0.
9585	// The predecessors of PredBranch are considered by the caller.
9586	return MI == PredBranch;
9587	}
9588
9589	std::optional<bool> createTripCountGreaterCondition(
9590	int TC, MachineBasicBlock &MBB,
9591	SmallVectorImpl<MachineOperand> &CondParam) override {
9592	// A branch instruction will be inserted as "if (Cond) goto epilogue".
9593	// Cond is normalized for such use.
9594	// The predecessors of the branch are assumed to have already been inserted.
9595	CondParam = Cond;
9596	return {};
9597	}
9598
9599	void setPreheader(MachineBasicBlock *NewPreheader) override {}
9600
9601	void adjustTripCount(int TripCountAdjust) override {}
9602
9603	void disposed() override {}
9604	};
9605	} // namespace
9606
9607	static bool isCompareAndBranch(unsigned Opcode) {
9608	switch (Opcode) {
9609	case AArch64::CBZW:
9610	case AArch64::CBZX:
9611	case AArch64::CBNZW:
9612	case AArch64::CBNZX:
9613	case AArch64::TBZW:
9614	case AArch64::TBZX:
9615	case AArch64::TBNZW:
9616	case AArch64::TBNZX:
9617	return true;
9618	}
9619	return false;
9620	}
9621
9622	std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
9623	AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock LoopBB) const* {
9624	MachineBasicBlock TBB = nullptr, FBB = nullptr;
9625	SmallVector<MachineOperand, `4`> Cond;
9626	if (analyzeBranch(MBB&: *LoopBB, TBB, FBB, Cond))
9627	return nullptr;
9628
9629	// Infinite loops are not supported
9630	if (TBB == LoopBB && FBB == LoopBB)
9631	return nullptr;
9632
9633	// Must be conditional branch
9634	if (FBB == nullptr)
9635	return nullptr;
9636
9637	assert((TBB == LoopBB \|\| FBB == LoopBB) &&
9638	"The Loop must be a single-basic-block loop");
9639
9640	// Normalization for createTripCountGreaterCondition()
9641	if (TBB == LoopBB)
9642	reverseBranchCondition(Cond);
9643
9644	MachineInstr CondBranch = &LoopBB->getFirstTerminator();
9645	const TargetRegisterInfo &TRI = getRegisterInfo();
9646
9647	// Find the immediate predecessor of the conditional branch
9648	MachineInstr PredBranch = nullptr*;
9649	if (CondBranch->getOpcode() == AArch64::Bcc) {
9650	for (MachineInstr &MI : reverse(C&: *LoopBB)) {
9651	if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
9652	PredBranch = &MI;
9653	break;
9654	}
9655	}
9656	if (!PredBranch)
9657	return nullptr;
9658	} else if (isCompareAndBranch(Opcode: CondBranch->getOpcode())) {
9659	const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
9660	Register Reg = CondBranch->getOperand(i: `0`).getReg();
9661	if (!Reg.isVirtual())
9662	return nullptr;
9663	PredBranch = MRI.getVRegDef(Reg);
9664
9665	// MachinePipeliner does not expect that the immediate predecessor is a Phi
9666	if (PredBranch->isPHI())
9667	return nullptr;
9668
9669	if (PredBranch->getParent() != LoopBB)
9670	return nullptr;
9671	} else {
9672	return nullptr;
9673	}
9674
9675	return std::make_unique<AArch64PipelinerLoopInfo>(args&: PredBranch, args&: Cond);
9676	}
9677
9678	#define GET_INSTRINFO_HELPERS
9679	#define GET_INSTRMAP_INFO
9680	#include "AArch64GenInstrInfo.inc"
9681

source code of llvm/lib/Target/AArch64/AArch64InstrInfo.cpp