AArch64InstructionSelector.cpp source code [llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp]

1	//===- AArch64InstructionSelector.cpp ----------------------------- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the targeting of the InstructionSelector class for
10	/// AArch64.
11	/// \todo This should be generated by TableGen.
12	//===----------------------------------------------------------------------===//
13
14	#include "AArch64GlobalISelUtils.h"
15	#include "AArch64InstrInfo.h"
16	#include "AArch64MachineFunctionInfo.h"
17	#include "AArch64RegisterBankInfo.h"
18	#include "AArch64RegisterInfo.h"
19	#include "AArch64Subtarget.h"
20	#include "AArch64TargetMachine.h"
21	#include "MCTargetDesc/AArch64AddressingModes.h"
22	#include "MCTargetDesc/AArch64MCTargetDesc.h"
23	#include "llvm/BinaryFormat/Dwarf.h"
24	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
28	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29	#include "llvm/CodeGen/GlobalISel/Utils.h"
30	#include "llvm/CodeGen/MachineBasicBlock.h"
31	#include "llvm/CodeGen/MachineConstantPool.h"
32	#include "llvm/CodeGen/MachineFrameInfo.h"
33	#include "llvm/CodeGen/MachineFunction.h"
34	#include "llvm/CodeGen/MachineInstr.h"
35	#include "llvm/CodeGen/MachineInstrBuilder.h"
36	#include "llvm/CodeGen/MachineMemOperand.h"
37	#include "llvm/CodeGen/MachineOperand.h"
38	#include "llvm/CodeGen/MachineRegisterInfo.h"
39	#include "llvm/CodeGen/TargetOpcodes.h"
40	#include "llvm/CodeGen/TargetRegisterInfo.h"
41	#include "llvm/IR/Constants.h"
42	#include "llvm/IR/DerivedTypes.h"
43	#include "llvm/IR/Instructions.h"
44	#include "llvm/IR/IntrinsicsAArch64.h"
45	#include "llvm/IR/PatternMatch.h"
46	#include "llvm/IR/Type.h"
47	#include "llvm/Pass.h"
48	#include "llvm/Support/Debug.h"
49	#include "llvm/Support/raw_ostream.h"
50	#include <optional>
51
52	#define DEBUG_TYPE "aarch64-isel"
53
54	using namespace llvm;
55	using namespace MIPatternMatch;
56	using namespace AArch64GISelUtils;
57
58	namespace llvm {
59	class BlockFrequencyInfo;
60	class ProfileSummaryInfo;
61	}
62
63	namespace {
64
65	#define GET_GLOBALISEL_PREDICATE_BITSET
66	#include "AArch64GenGlobalISel.inc"
67	#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70	class AArch64InstructionSelector : public InstructionSelector {
71	public:
72	AArch64InstructionSelector(const AArch64TargetMachine &TM,
73	const AArch64Subtarget &STI,
74	const AArch64RegisterBankInfo &RBI);
75
76	bool select(MachineInstr &I) override;
77	static const char getName() { return* DEBUG_TYPE; }
78
79	void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80	CodeGenCoverage CoverageInfo, ProfileSummaryInfo PSI,
81	BlockFrequencyInfo *BFI) override {
82	InstructionSelector::setupMF(mf&: MF, kb: KB, covinfo: CoverageInfo, psi: PSI, bfi: BFI);
83	MIB.setMF(MF);
84
85	// hasFnAttribute() is expensive to call on every BRCOND selection, so
86	// cache it here for each run of the selector.
87	ProduceNonFlagSettingCondBr =
88	!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89	MFReturnAddr = Register ();
90
91	processPHIs(MF);
92	}
93
94	private:
95	/// tblgen-erated 'select' implementation, used as the initial selector for
96	/// the patterns that don't require complex C++.
97	bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99	// A lowering phase that runs before any selection attempts.
100	// Returns true if the instruction was modified.
101	bool preISelLower(MachineInstr &I);
102
103	// An early selection function that runs before the selectImpl() call.
104	bool earlySelect(MachineInstr &I);
105
106	/// Save state that is shared between select calls, call select on \p I and
107	/// then restore the saved state. This can be used to recursively call select
108	/// within a select call.
109	bool selectAndRestoreState(MachineInstr &I);
110
111	// Do some preprocessing of G_PHIs before we begin selection.
112	void processPHIs(MachineFunction &MF);
113
114	bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116	/// Eliminate same-sized cross-bank copies into stores before selectImpl().
117	bool contractCrossBankCopyIntoStore(MachineInstr &I,
118	MachineRegisterInfo &MRI);
119
120	bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122	bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123	MachineRegisterInfo &MRI) const;
124	bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125	MachineRegisterInfo &MRI) const;
126
127	///@{
128	/// Helper functions for selectCompareBranch.
129	bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130	MachineIRBuilder &MIB) const;
131	bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132	MachineIRBuilder &MIB) const;
133	bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134	MachineIRBuilder &MIB) const;
135	bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136	MachineBasicBlock *DstMBB,
137	MachineIRBuilder &MIB) const;
138	///@}
139
140	bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141	MachineRegisterInfo &MRI);
142
143	bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144	bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146	// Helper to generate an equivalent of scalar_to_vector into a new register,
147	// returned via 'Dst'.
148	MachineInstr emitScalarToVector(unsigned* EltSize,
149	const TargetRegisterClass *DstRC,
150	Register Scalar,
151	MachineIRBuilder &MIRBuilder) const;
152	/// Helper to narrow vector that was widened by emitScalarToVector.
153	/// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154	/// vector, correspondingly.
155	MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156	MachineIRBuilder &MIRBuilder,
157	MachineRegisterInfo &MRI) const;
158
159	/// Emit a lane insert into \p DstReg, or a new vector register if
160	/// std::nullopt is provided.
161	///
162	/// The lane inserted into is defined by \p LaneIdx. The vector source
163	/// register is given by \p SrcReg. The register containing the element is
164	/// given by \p EltReg.
165	MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166	Register EltReg, unsigned LaneIdx,
167	const RegisterBank &RB,
168	MachineIRBuilder &MIRBuilder) const;
169
170	/// Emit a sequence of instructions representing a constant \p CV for a
171	/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172	///
173	/// \returns the last instruction in the sequence on success, and nullptr
174	/// otherwise.
175	MachineInstr emitConstantVector(Register Dst, Constant CV,
176	MachineIRBuilder &MIRBuilder,
177	MachineRegisterInfo &MRI);
178
179	MachineInstr tryAdvSIMDModImm8(Register Dst, unsigned* DstSize, APInt Bits,
180	MachineIRBuilder &MIRBuilder);
181
182	MachineInstr tryAdvSIMDModImm16(Register Dst, unsigned* DstSize, APInt Bits,
183	MachineIRBuilder &MIRBuilder, bool Inv);
184
185	MachineInstr tryAdvSIMDModImm32(Register Dst, unsigned* DstSize, APInt Bits,
186	MachineIRBuilder &MIRBuilder, bool Inv);
187	MachineInstr tryAdvSIMDModImm64(Register Dst, unsigned* DstSize, APInt Bits,
188	MachineIRBuilder &MIRBuilder);
189	MachineInstr tryAdvSIMDModImm321s(Register Dst, unsigned* DstSize, APInt Bits,
190	MachineIRBuilder &MIRBuilder, bool Inv);
191	MachineInstr tryAdvSIMDModImmFP(Register Dst, unsigned* DstSize, APInt Bits,
192	MachineIRBuilder &MIRBuilder);
193
194	bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195	MachineRegisterInfo &MRI);
196	/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197	/// SUBREG_TO_REG.
198	bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199	bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
200	bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
201	bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
202
203	bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204	bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205	bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206	bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208	/// Helper function to select vector load intrinsics like
209	/// @llvm.aarch64.neon.ld2., @llvm.aarch64.neon.ld4., etc.
210	/// \p Opc is the opcode that the selected instruction should use.
211	/// \p NumVecs is the number of vector destinations for the instruction.
212	/// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213	bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214	MachineInstr &I);
215	bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216	MachineInstr &I);
217	void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218	unsigned Opc);
219	bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220	unsigned Opc);
221	bool selectIntrinsicWithSideEffects(MachineInstr &I,
222	MachineRegisterInfo &MRI);
223	bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224	bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
225	bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
226	bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
227	bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
228	bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
229	bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
230	bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
231
232	bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
233	bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
234	bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
235
236	unsigned emitConstantPoolEntry(const Constant *CPVal,
237	MachineFunction &MF) const;
238	MachineInstr emitLoadFromConstantPool(const* Constant *CPVal,
239	MachineIRBuilder &MIRBuilder) const;
240
241	// Emit a vector concat operation.
242	MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
243	Register Op2,
244	MachineIRBuilder &MIRBuilder) const;
245
246	// Emit an integer compare between LHS and RHS, which checks for Predicate.
247	MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
248	MachineOperand &Predicate,
249	MachineIRBuilder &MIRBuilder) const;
250
251	/// Emit a floating point comparison between \p LHS and \p RHS.
252	/// \p Pred if given is the intended predicate to use.
253	MachineInstr *
254	emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
255	std::optional<CmpInst::Predicate> = std::nullopt) const;
256
257	MachineInstr *
258	emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
259	std::initializer_list<llvm::SrcOp> SrcOps,
260	MachineIRBuilder &MIRBuilder,
261	const ComplexRendererFns &RenderFns = std::nullopt) const;
262	/// Helper function to emit an add or sub instruction.
263	///
264	/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
265	/// in a specific order.
266	///
267	/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
268	///
269	/// \code
270	/// const std::array<std::array<unsigned, 2>, 4> Table {
271	/// {{AArch64::ADDXri, AArch64::ADDWri},
272	/// {AArch64::ADDXrs, AArch64::ADDWrs},
273	/// {AArch64::ADDXrr, AArch64::ADDWrr},
274	/// {AArch64::SUBXri, AArch64::SUBWri},
275	/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
276	/// \endcode
277	///
278	/// Each row in the table corresponds to a different addressing mode. Each
279	/// column corresponds to a different register size.
280	///
281	/// \attention Rows must be structured as follows:
282	/// - Row 0: The ri opcode variants
283	/// - Row 1: The rs opcode variants
284	/// - Row 2: The rr opcode variants
285	/// - Row 3: The ri opcode variants for negative immediates
286	/// - Row 4: The rx opcode variants
287	///
288	/// \attention Columns must be structured as follows:
289	/// - Column 0: The 64-bit opcode variants
290	/// - Column 1: The 32-bit opcode variants
291	///
292	/// \p Dst is the destination register of the binop to emit.
293	/// \p LHS is the left-hand operand of the binop to emit.
294	/// \p RHS is the right-hand operand of the binop to emit.
295	MachineInstr *emitAddSub(
296	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
297	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
298	MachineIRBuilder &MIRBuilder) const;
299	MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
300	MachineOperand &RHS,
301	MachineIRBuilder &MIRBuilder) const;
302	MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
303	MachineIRBuilder &MIRBuilder) const;
304	MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
305	MachineIRBuilder &MIRBuilder) const;
306	MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
307	MachineIRBuilder &MIRBuilder) const;
308	MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
309	MachineIRBuilder &MIRBuilder) const;
310	MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
311	MachineIRBuilder &MIRBuilder) const;
312	MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
313	MachineIRBuilder &MIRBuilder) const;
314	MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
315	AArch64CC::CondCode CC,
316	MachineIRBuilder &MIRBuilder) const;
317	MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
318	const RegisterBank &DstRB, LLT ScalarTy,
319	Register VecReg, unsigned LaneIdx,
320	MachineIRBuilder &MIRBuilder) const;
321	MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
322	AArch64CC::CondCode Pred,
323	MachineIRBuilder &MIRBuilder) const;
324	/// Emit a CSet for a FP compare.
325	///
326	/// \p Dst is expected to be a 32-bit scalar register.
327	MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
328	MachineIRBuilder &MIRBuilder) const;
329
330	/// Emit an instruction that sets NZCV to the carry-in expected by \p I.
331	/// Might elide the instruction if the previous instruction already sets NZCV
332	/// correctly.
333	MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
334
335	/// Emit the overflow op for \p Opcode.
336	///
337	/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
338	/// G_USUBO, etc.
339	std::pair<MachineInstr *, AArch64CC::CondCode>
340	emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
341	MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
342
343	bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
344
345	/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
346	/// In some cases this is even possible with OR operations in the expression.
347	MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
348	MachineIRBuilder &MIB) const;
349	MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
350	CmpInst::Predicate CC,
351	AArch64CC::CondCode Predicate,
352	AArch64CC::CondCode OutCC,
353	MachineIRBuilder &MIB) const;
354	MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
355	bool Negate, Register CCOp,
356	AArch64CC::CondCode Predicate,
357	MachineIRBuilder &MIB) const;
358
359	/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
360	/// \p IsNegative is true if the test should be "not zero".
361	/// This will also optimize the test bit instruction when possible.
362	MachineInstr emitTestBit(Register TestReg, uint64_t Bit, bool* IsNegative,
363	MachineBasicBlock *DstMBB,
364	MachineIRBuilder &MIB) const;
365
366	/// Emit a CB(N)Z instruction which branches to \p DestMBB.
367	MachineInstr emitCBZ(Register CompareReg, bool* IsNegative,
368	MachineBasicBlock *DestMBB,
369	MachineIRBuilder &MIB) const;
370
371	// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
372	// We use these manually instead of using the importer since it doesn't
373	// support SDNodeXForm.
374	ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
375	ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
376	ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
377	ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
378
379	ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
380	ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
381	ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
382
383	ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
384	unsigned Size) const;
385
386	ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
387	return selectAddrModeUnscaled(Root, Size: `1`);
388	}
389	ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
390	return selectAddrModeUnscaled(Root, Size: `2`);
391	}
392	ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
393	return selectAddrModeUnscaled(Root, Size: `4`);
394	}
395	ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
396	return selectAddrModeUnscaled(Root, Size: `8`);
397	}
398	ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
399	return selectAddrModeUnscaled(Root, Size: `16`);
400	}
401
402	/// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
403	/// from complex pattern matchers like selectAddrModeIndexed().
404	ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
405	MachineRegisterInfo &MRI) const;
406
407	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
408	unsigned Size) const;
409	template <int Width>
410	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
411	return selectAddrModeIndexed(Root, Size: Width / `8`);
412	}
413
414	bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
415	const MachineRegisterInfo &MRI) const;
416	ComplexRendererFns
417	selectAddrModeShiftedExtendXReg(MachineOperand &Root,
418	unsigned SizeInBytes) const;
419
420	/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
421	/// or not a shift + extend should be folded into an addressing mode. Returns
422	/// None when this is not profitable or possible.
423	ComplexRendererFns
424	selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
425	MachineOperand &Offset, unsigned SizeInBytes,
426	bool WantsExt) const;
427	ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
428	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
429	unsigned SizeInBytes) const;
430	template <int Width>
431	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
432	return selectAddrModeXRO(Root, SizeInBytes: Width / `8`);
433	}
434
435	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
436	unsigned SizeInBytes) const;
437	template <int Width>
438	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
439	return selectAddrModeWRO(Root, SizeInBytes: Width / `8`);
440	}
441
442	ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
443	bool AllowROR = false) const;
444
445	ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
446	return selectShiftedRegister(Root);
447	}
448
449	ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
450	return selectShiftedRegister(Root, AllowROR: true);
451	}
452
453	/// Given an extend instruction, determine the correct shift-extend type for
454	/// that instruction.
455	///
456	/// If the instruction is going to be used in a load or store, pass
457	/// \p IsLoadStore = true.
458	AArch64_AM::ShiftExtendType
459	getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
460	bool IsLoadStore = false) const;
461
462	/// Move \p Reg to \p RC if \p Reg is not already on \p RC.
463	///
464	/// \returns Either \p Reg if no change was necessary, or the new register
465	/// created by moving \p Reg.
466	///
467	/// Note: This uses emitCopy right now.
468	Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
469	MachineIRBuilder &MIB) const;
470
471	ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
472
473	ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
474
475	void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
476	int OpIdx = -`1`) const;
477	void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
478	int OpIdx = -`1`) const;
479	void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
480	int OpIdx = -`1`) const;
481	void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
482	int OpIdx) const;
483	void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
484	int OpIdx = -`1`) const;
485	void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
486	int OpIdx = -`1`) const;
487	void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
488	int OpIdx = -`1`) const;
489	void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
490	const MachineInstr &MI,
491	int OpIdx = -`1`) const;
492
493	// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
494	void materializeLargeCMVal(MachineInstr &I, const Value V, unsigned* OpFlags);
495
496	// Optimization methods.
497	bool tryOptSelect(GSelect &Sel);
498	bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
499	MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
500	MachineOperand &Predicate,
501	MachineIRBuilder &MIRBuilder) const;
502
503	/// Return true if \p MI is a load or store of \p NumBytes bytes.
504	bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
505
506	/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
507	/// register zeroed out. In other words, the result of MI has been explicitly
508	/// zero extended.
509	bool isDef32(const MachineInstr &MI) const;
510
511	const AArch64TargetMachine &TM;
512	const AArch64Subtarget &STI;
513	const AArch64InstrInfo &TII;
514	const AArch64RegisterInfo &TRI;
515	const AArch64RegisterBankInfo &RBI;
516
517	bool ProduceNonFlagSettingCondBr = false;
518
519	// Some cached values used during selection.
520	// We use LR as a live-in register, and we keep track of it here as it can be
521	// clobbered by calls.
522	Register MFReturnAddr;
523
524	MachineIRBuilder MIB;
525
526	#define GET_GLOBALISEL_PREDICATES_DECL
527	#include "AArch64GenGlobalISel.inc"
528	#undef GET_GLOBALISEL_PREDICATES_DECL
529
530	// We declare the temporaries used by selectImpl() in the class to minimize the
531	// cost of constructing placeholder values.
532	#define GET_GLOBALISEL_TEMPORARIES_DECL
533	#include "AArch64GenGlobalISel.inc"
534	#undef GET_GLOBALISEL_TEMPORARIES_DECL
535	};
536
537	} // end anonymous namespace
538
539	#define GET_GLOBALISEL_IMPL
540	#include "AArch64GenGlobalISel.inc"
541	#undef GET_GLOBALISEL_IMPL
542
543	AArch64InstructionSelector::AArch64InstructionSelector(
544	const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
545	const AArch64RegisterBankInfo &RBI)
546	: TM(TM), STI(STI), TII(STI.getInstrInfo()), TRI(STI.getRegisterInfo()),
547	RBI(RBI),
548	#define GET_GLOBALISEL_PREDICATES_INIT
549	#include "AArch64GenGlobalISel.inc"
550	#undef GET_GLOBALISEL_PREDICATES_INIT
551	#define GET_GLOBALISEL_TEMPORARIES_INIT
552	#include "AArch64GenGlobalISel.inc"
553	#undef GET_GLOBALISEL_TEMPORARIES_INIT
554	{
555	}
556
557	// FIXME: This should be target-independent, inferred from the types declared
558	// for each class in the bank.
559	//
560	/// Given a register bank, and a type, return the smallest register class that
561	/// can represent that combination.
562	static const TargetRegisterClass *
563	getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
564	bool GetAllRegSet = false) {
565	if (RB.getID() == AArch64::GPRRegBankID) {
566	if (Ty.getSizeInBits() <= `32`)
567	return GetAllRegSet ? &AArch64::GPR32allRegClass
568	: &AArch64::GPR32RegClass;
569	if (Ty.getSizeInBits() == `64`)
570	return GetAllRegSet ? &AArch64::GPR64allRegClass
571	: &AArch64::GPR64RegClass;
572	if (Ty.getSizeInBits() == `128`)
573	return &AArch64::XSeqPairsClassRegClass;
574	return nullptr;
575	}
576
577	if (RB.getID() == AArch64::FPRRegBankID) {
578	switch (Ty.getSizeInBits()) {
579	case `8`:
580	return &AArch64::FPR8RegClass;
581	case `16`:
582	return &AArch64::FPR16RegClass;
583	case `32`:
584	return &AArch64::FPR32RegClass;
585	case `64`:
586	return &AArch64::FPR64RegClass;
587	case `128`:
588	return &AArch64::FPR128RegClass;
589	}
590	return nullptr;
591	}
592
593	return nullptr;
594	}
595
596	/// Given a register bank, and size in bits, return the smallest register class
597	/// that can represent that combination.
598	static const TargetRegisterClass *
599	getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
600	bool GetAllRegSet = false) {
601	unsigned RegBankID = RB.getID();
602
603	if (RegBankID == AArch64::GPRRegBankID) {
604	if (SizeInBits <= `32`)
605	return GetAllRegSet ? &AArch64::GPR32allRegClass
606	: &AArch64::GPR32RegClass;
607	if (SizeInBits == `64`)
608	return GetAllRegSet ? &AArch64::GPR64allRegClass
609	: &AArch64::GPR64RegClass;
610	if (SizeInBits == `128`)
611	return &AArch64::XSeqPairsClassRegClass;
612	}
613
614	if (RegBankID == AArch64::FPRRegBankID) {
615	switch (SizeInBits) {
616	default:
617	return nullptr;
618	case `8`:
619	return &AArch64::FPR8RegClass;
620	case `16`:
621	return &AArch64::FPR16RegClass;
622	case `32`:
623	return &AArch64::FPR32RegClass;
624	case `64`:
625	return &AArch64::FPR64RegClass;
626	case `128`:
627	return &AArch64::FPR128RegClass;
628	}
629	}
630
631	return nullptr;
632	}
633
634	/// Returns the correct subregister to use for a given register class.
635	static bool getSubRegForClass(const TargetRegisterClass *RC,
636	const TargetRegisterInfo &TRI, unsigned &SubReg) {
637	switch (TRI.getRegSizeInBits(RC: *RC)) {
638	case `8`:
639	SubReg = AArch64::bsub;
640	break;
641	case `16`:
642	SubReg = AArch64::hsub;
643	break;
644	case `32`:
645	if (RC != &AArch64::FPR32RegClass)
646	SubReg = AArch64::sub_32;
647	else
648	SubReg = AArch64::ssub;
649	break;
650	case `64`:
651	SubReg = AArch64::dsub;
652	break;
653	default:
654	LLVM_DEBUG(
655	dbgs() << "Couldn't find appropriate subregister for register class.");
656	return false;
657	}
658
659	return true;
660	}
661
662	/// Returns the minimum size the given register bank can hold.
663	static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
664	switch (RB.getID()) {
665	case AArch64::GPRRegBankID:
666	return `32`;
667	case AArch64::FPRRegBankID:
668	return `8`;
669	default:
670	llvm_unreachable("Tried to get minimum size for unknown register bank.");
671	}
672	}
673
674	/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
675	/// Helper function for functions like createDTuple and createQTuple.
676	///
677	/// \p RegClassIDs - The list of register class IDs available for some tuple of
678	/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
679	/// expected to contain between 2 and 4 tuple classes.
680	///
681	/// \p SubRegs - The list of subregister classes associated with each register
682	/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
683	/// subregister class. The index of each subregister class is expected to
684	/// correspond with the index of each register class.
685	///
686	/// \returns Either the destination register of REG_SEQUENCE instruction that
687	/// was created, or the 0th element of \p Regs if \p Regs contains a single
688	/// element.
689	static Register createTuple(ArrayRef<Register> Regs,
690	const unsigned RegClassIDs[],
691	const unsigned SubRegs[], MachineIRBuilder &MIB) {
692	unsigned NumRegs = Regs.size();
693	if (NumRegs == `1`)
694	return Regs [`0`];
695	assert(NumRegs >= `2` && NumRegs <= `4` &&
696	"Only support between two and 4 registers in a tuple!");
697	const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
698	auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - `2`]);
699	auto RegSequence =
700	MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {});
701	for (unsigned I = `0`, E = Regs.size(); I < E; ++I) {
702	RegSequence.addUse(RegNo: Regs [I]);
703	RegSequence.addImm(Val: SubRegs[I]);
704	}
705	return RegSequence.getReg(Idx: `0`);
706	}
707
708	/// Create a tuple of D-registers using the registers in \p Regs.
709	static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
710	static const unsigned RegClassIDs[] = {
711	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
712	static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
713	AArch64::dsub2, AArch64::dsub3};
714	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
715	}
716
717	/// Create a tuple of Q-registers using the registers in \p Regs.
718	static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
719	static const unsigned RegClassIDs[] = {
720	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
721	static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
722	AArch64::qsub2, AArch64::qsub3};
723	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
724	}
725
726	static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
727	auto &MI = *Root.getParent();
728	auto &MBB = *MI.getParent();
729	auto &MF = *MBB.getParent();
730	auto &MRI = MF.getRegInfo();
731	uint64_t Immed;
732	if (Root.isImm())
733	Immed = Root.getImm();
734	else if (Root.isCImm())
735	Immed = Root.getCImm()->getZExtValue();
736	else if (Root.isReg()) {
737	auto ValAndVReg =
738	getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true);
739	if (!ValAndVReg)
740	return std::nullopt;
741	Immed = ValAndVReg ->Value.getSExtValue();
742	} else
743	return std::nullopt;
744	return Immed;
745	}
746
747	/// Check whether \p I is a currently unsupported binary operation:
748	/// - it has an unsized type
749	/// - an operand is not a vreg
750	/// - all operands are not in the same bank
751	/// These are checks that should someday live in the verifier, but right now,
752	/// these are mostly limitations of the aarch64 selector.
753	static bool unsupportedBinOp(const MachineInstr &I,
754	const AArch64RegisterBankInfo &RBI,
755	const MachineRegisterInfo &MRI,
756	const AArch64RegisterInfo &TRI) {
757	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
758	if (!Ty.isValid()) {
759	LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
760	return true;
761	}
762
763	const RegisterBank PrevOpBank = nullptr*;
764	for (auto &MO : I.operands()) {
765	// FIXME: Support non-register operands.
766	if (!MO.isReg()) {
767	LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
768	return true;
769	}
770
771	// FIXME: Can generic operations have physical registers operands? If
772	// so, this will need to be taught about that, and we'll need to get the
773	// bank out of the minimal class for the register.
774	// Either way, this needs to be documented (and possibly verified).
775	if (!MO.getReg().isVirtual()) {
776	LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
777	return true;
778	}
779
780	const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
781	if (!OpBank) {
782	LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
783	return true;
784	}
785
786	if (PrevOpBank && OpBank != PrevOpBank) {
787	LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
788	return true;
789	}
790	PrevOpBank = OpBank;
791	}
792	return false;
793	}
794
795	/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
796	/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
797	/// and of size \p OpSize.
798	/// \returns \p GenericOpc if the combination is unsupported.
799	static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
800	unsigned OpSize) {
801	switch (RegBankID) {
802	case AArch64::GPRRegBankID:
803	if (OpSize == `32`) {
804	switch (GenericOpc) {
805	case TargetOpcode::G_SHL:
806	return AArch64::LSLVWr;
807	case TargetOpcode::G_LSHR:
808	return AArch64::LSRVWr;
809	case TargetOpcode::G_ASHR:
810	return AArch64::ASRVWr;
811	default:
812	return GenericOpc;
813	}
814	} else if (OpSize == `64`) {
815	switch (GenericOpc) {
816	case TargetOpcode::G_PTR_ADD:
817	return AArch64::ADDXrr;
818	case TargetOpcode::G_SHL:
819	return AArch64::LSLVXr;
820	case TargetOpcode::G_LSHR:
821	return AArch64::LSRVXr;
822	case TargetOpcode::G_ASHR:
823	return AArch64::ASRVXr;
824	default:
825	return GenericOpc;
826	}
827	}
828	break;
829	case AArch64::FPRRegBankID:
830	switch (OpSize) {
831	case `32`:
832	switch (GenericOpc) {
833	case TargetOpcode::G_FADD:
834	return AArch64::FADDSrr;
835	case TargetOpcode::G_FSUB:
836	return AArch64::FSUBSrr;
837	case TargetOpcode::G_FMUL:
838	return AArch64::FMULSrr;
839	case TargetOpcode::G_FDIV:
840	return AArch64::FDIVSrr;
841	default:
842	return GenericOpc;
843	}
844	case `64`:
845	switch (GenericOpc) {
846	case TargetOpcode::G_FADD:
847	return AArch64::FADDDrr;
848	case TargetOpcode::G_FSUB:
849	return AArch64::FSUBDrr;
850	case TargetOpcode::G_FMUL:
851	return AArch64::FMULDrr;
852	case TargetOpcode::G_FDIV:
853	return AArch64::FDIVDrr;
854	case TargetOpcode::G_OR:
855	return AArch64::ORRv8i8;
856	default:
857	return GenericOpc;
858	}
859	}
860	break;
861	}
862	return GenericOpc;
863	}
864
865	/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
866	/// appropriate for the (value) register bank \p RegBankID and of memory access
867	/// size \p OpSize. This returns the variant with the base+unsigned-immediate
868	/// addressing mode (e.g., LDRXui).
869	/// \returns \p GenericOpc if the combination is unsupported.
870	static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
871	unsigned OpSize) {
872	const bool isStore = GenericOpc == TargetOpcode::G_STORE;
873	switch (RegBankID) {
874	case AArch64::GPRRegBankID:
875	switch (OpSize) {
876	case `8`:
877	return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
878	case `16`:
879	return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
880	case `32`:
881	return isStore ? AArch64::STRWui : AArch64::LDRWui;
882	case `64`:
883	return isStore ? AArch64::STRXui : AArch64::LDRXui;
884	}
885	break;
886	case AArch64::FPRRegBankID:
887	switch (OpSize) {
888	case `8`:
889	return isStore ? AArch64::STRBui : AArch64::LDRBui;
890	case `16`:
891	return isStore ? AArch64::STRHui : AArch64::LDRHui;
892	case `32`:
893	return isStore ? AArch64::STRSui : AArch64::LDRSui;
894	case `64`:
895	return isStore ? AArch64::STRDui : AArch64::LDRDui;
896	case `128`:
897	return isStore ? AArch64::STRQui : AArch64::LDRQui;
898	}
899	break;
900	}
901	return GenericOpc;
902	}
903
904	/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
905	/// to \p To.*
906	///
907	/// E.g "To = COPY SrcReg:SubReg"
908	static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
909	const RegisterBankInfo &RBI, Register SrcReg,
910	const TargetRegisterClass To, unsigned* SubReg) {
911	assert(SrcReg.isValid() && "Expected a valid source register?");
912	assert(To && "Destination register class cannot be null");
913	assert(SubReg && "Expected a valid subregister");
914
915	MachineIRBuilder MIB(I);
916	auto SubRegCopy =
917	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: `0`, SubReg);
918	MachineOperand &RegOp = I.getOperand(i: `1`);
919	RegOp.setReg(SubRegCopy.getReg(Idx: `0`));
920
921	// It's possible that the destination register won't be constrained. Make
922	// sure that happens.
923	if (!I.getOperand(i: `0`).getReg().isPhysical())
924	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(), RC: *To, MRI);
925
926	return true;
927	}
928
929	/// Helper function to get the source and destination register classes for a
930	/// copy. Returns a std::pair containing the source register class for the
931	/// copy, and the destination register class for the copy. If a register class
932	/// cannot be determined, then it will be nullptr.
933	static std::pair<const TargetRegisterClass , const* TargetRegisterClass *>
934	getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
935	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
936	const RegisterBankInfo &RBI) {
937	Register DstReg = I.getOperand(i: `0`).getReg();
938	Register SrcReg = I.getOperand(i: `1`).getReg();
939	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
940	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
941	unsigned DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI);
942	unsigned SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI);
943
944	// Special casing for cross-bank copies of s1s. We can technically represent
945	// a 1-bit value with any size of register. The minimum size for a GPR is 32
946	// bits. So, we need to put the FPR on 32 bits as well.
947	//
948	// FIXME: I'm not sure if this case holds true outside of copies. If it does,
949	// then we can pull it into the helpers that get the appropriate class for a
950	// register bank. Or make a new helper that carries along some constraint
951	// information.
952	if (SrcRegBank != DstRegBank && (DstSize == `1` && SrcSize == `1`))
953	SrcSize = DstSize = `32`;
954
955	return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true),
956	getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)};
957	}
958
959	// FIXME: We need some sort of API in RBI/TRI to allow generic code to
960	// constrain operands of simple instructions given a TargetRegisterClass
961	// and LLT
962	static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
963	const RegisterBankInfo &RBI) {
964	for (MachineOperand &MO : I.operands()) {
965	if (!MO.isReg())
966	continue;
967	Register Reg = MO.getReg();
968	if (!Reg)
969	continue;
970	if (Reg.isPhysical())
971	continue;
972	LLT Ty = MRI.getType(Reg);
973	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
974	const TargetRegisterClass *RC =
975	RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
976	if (!RC) {
977	const RegisterBank &RB = RegClassOrBank.get<const* RegisterBank *>();
978	RC = getRegClassForTypeOnBank(Ty, RB);
979	if (!RC) {
980	LLVM_DEBUG(
981	dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
982	break;
983	}
984	}
985	RBI.constrainGenericRegister(Reg, RC: *RC, MRI);
986	}
987
988	return true;
989	}
990
991	static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
992	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
993	const RegisterBankInfo &RBI) {
994	Register DstReg = I.getOperand(i: `0`).getReg();
995	Register SrcReg = I.getOperand(i: `1`).getReg();
996	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
997	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
998
999	// Find the correct register classes for the source and destination registers.
1000	const TargetRegisterClass *SrcRC;
1001	const TargetRegisterClass *DstRC;
1002	std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1003
1004	if (!DstRC) {
1005	LLVM_DEBUG(dbgs() << "Unexpected dest size "
1006	<< RBI.getSizeInBits(DstReg, MRI, TRI) << `'\n'`);
1007	return false;
1008	}
1009
1010	// Is this a copy? If so, then we may need to insert a subregister copy.
1011	if (I.isCopy()) {
1012	// Yes. Check if there's anything to fix up.
1013	if (!SrcRC) {
1014	LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1015	return false;
1016	}
1017
1018	unsigned SrcSize = TRI.getRegSizeInBits(RC: *SrcRC);
1019	unsigned DstSize = TRI.getRegSizeInBits(RC: *DstRC);
1020	unsigned SubReg;
1021
1022	// If the source bank doesn't support a subregister copy small enough,
1023	// then we first need to copy to the destination bank.
1024	if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) {
1025	const TargetRegisterClass *DstTempRC =
1026	getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, / GetAllRegSet / true);
1027	getSubRegForClass(RC: DstRC, TRI, SubReg);
1028
1029	MachineIRBuilder MIB(I);
1030	auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg});
1031	copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: `0`), To: DstRC, SubReg);
1032	} else if (SrcSize > DstSize) {
1033	// If the source register is bigger than the destination we need to
1034	// perform a subregister copy.
1035	const TargetRegisterClass *SubRegRC =
1036	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1037	getSubRegForClass(RC: SubRegRC, TRI, SubReg);
1038	copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg);
1039	} else if (DstSize > SrcSize) {
1040	// If the destination register is bigger than the source we need to do
1041	// a promotion using SUBREG_TO_REG.
1042	const TargetRegisterClass *PromotionRC =
1043	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1044	getSubRegForClass(RC: SrcRC, TRI, SubReg);
1045
1046	Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC);
1047	BuildMI(*I.getParent(), I, I.getDebugLoc(),
1048	TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1049	.addImm(`0`)
1050	.addUse(SrcReg)
1051	.addImm(SubReg);
1052	MachineOperand &RegOp = I.getOperand(i: `1`);
1053	RegOp.setReg(PromoteReg);
1054	}
1055
1056	// If the destination is a physical register, then there's nothing to
1057	// change, so we're done.
1058	if (DstReg.isPhysical())
1059	return true;
1060	}
1061
1062	// No need to constrain SrcReg. It will get constrained when we hit another
1063	// of its use or its defs. Copies do not have constraints.
1064	if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
1065	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1066	<< " operand\n");
1067	return false;
1068	}
1069
1070	// If this a GPR ZEXT that we want to just reduce down into a copy.
1071	// The sizes will be mismatched with the source < 32b but that's ok.
1072	if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1073	I.setDesc(TII.get(AArch64::COPY));
1074	assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1075	return selectCopy(I, TII, MRI, TRI, RBI);
1076	}
1077
1078	I.setDesc(TII.get(AArch64::COPY));
1079	return true;
1080	}
1081
1082	static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1083	if (!DstTy.isScalar() \|\| !SrcTy.isScalar())
1084	return GenericOpc;
1085
1086	const unsigned DstSize = DstTy.getSizeInBits();
1087	const unsigned SrcSize = SrcTy.getSizeInBits();
1088
1089	switch (DstSize) {
1090	case `32`:
1091	switch (SrcSize) {
1092	case `32`:
1093	switch (GenericOpc) {
1094	case TargetOpcode::G_SITOFP:
1095	return AArch64::SCVTFUWSri;
1096	case TargetOpcode::G_UITOFP:
1097	return AArch64::UCVTFUWSri;
1098	case TargetOpcode::G_FPTOSI:
1099	return AArch64::FCVTZSUWSr;
1100	case TargetOpcode::G_FPTOUI:
1101	return AArch64::FCVTZUUWSr;
1102	default:
1103	return GenericOpc;
1104	}
1105	case `64`:
1106	switch (GenericOpc) {
1107	case TargetOpcode::G_SITOFP:
1108	return AArch64::SCVTFUXSri;
1109	case TargetOpcode::G_UITOFP:
1110	return AArch64::UCVTFUXSri;
1111	case TargetOpcode::G_FPTOSI:
1112	return AArch64::FCVTZSUWDr;
1113	case TargetOpcode::G_FPTOUI:
1114	return AArch64::FCVTZUUWDr;
1115	default:
1116	return GenericOpc;
1117	}
1118	default:
1119	return GenericOpc;
1120	}
1121	case `64`:
1122	switch (SrcSize) {
1123	case `32`:
1124	switch (GenericOpc) {
1125	case TargetOpcode::G_SITOFP:
1126	return AArch64::SCVTFUWDri;
1127	case TargetOpcode::G_UITOFP:
1128	return AArch64::UCVTFUWDri;
1129	case TargetOpcode::G_FPTOSI:
1130	return AArch64::FCVTZSUXSr;
1131	case TargetOpcode::G_FPTOUI:
1132	return AArch64::FCVTZUUXSr;
1133	default:
1134	return GenericOpc;
1135	}
1136	case `64`:
1137	switch (GenericOpc) {
1138	case TargetOpcode::G_SITOFP:
1139	return AArch64::SCVTFUXDri;
1140	case TargetOpcode::G_UITOFP:
1141	return AArch64::UCVTFUXDri;
1142	case TargetOpcode::G_FPTOSI:
1143	return AArch64::FCVTZSUXDr;
1144	case TargetOpcode::G_FPTOUI:
1145	return AArch64::FCVTZUUXDr;
1146	default:
1147	return GenericOpc;
1148	}
1149	default:
1150	return GenericOpc;
1151	}
1152	default:
1153	return GenericOpc;
1154	};
1155	return GenericOpc;
1156	}
1157
1158	MachineInstr *
1159	AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1160	Register False, AArch64CC::CondCode CC,
1161	MachineIRBuilder &MIB) const {
1162	MachineRegisterInfo &MRI = *MIB.getMRI();
1163	assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1164	RBI.getRegBank(True, MRI, TRI)->getID() &&
1165	"Expected both select operands to have the same regbank?");
1166	LLT Ty = MRI.getType(Reg: True);
1167	if (Ty.isVector())
1168	return nullptr;
1169	const unsigned Size = Ty.getSizeInBits();
1170	assert((Size == `32` \|\| Size == `64`) &&
1171	"Expected 32 bit or 64 bit select only?");
1172	const bool Is32Bit = Size == `32`;
1173	if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1174	unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1175	auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1176	constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1177	return &*FCSel;
1178	}
1179
1180	// By default, we'll try and emit a CSEL.
1181	unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1182	bool Optimized = false;
1183	auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1184	&Optimized](Register &Reg, Register &OtherReg,
1185	bool Invert) {
1186	if (Optimized)
1187	return false;
1188
1189	// Attempt to fold:
1190	//
1191	// %sub = G_SUB 0, %x
1192	// %select = G_SELECT cc, %reg, %sub
1193	//
1194	// Into:
1195	// %select = CSNEG %reg, %x, cc
1196	Register MatchReg;
1197	if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) {
1198	Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1199	Reg = MatchReg;
1200	if (Invert) {
1201	CC = AArch64CC::getInvertedCondCode(Code: CC);
1202	std::swap(a&: Reg, b&: OtherReg);
1203	}
1204	return true;
1205	}
1206
1207	// Attempt to fold:
1208	//
1209	// %xor = G_XOR %x, -1
1210	// %select = G_SELECT cc, %reg, %xor
1211	//
1212	// Into:
1213	// %select = CSINV %reg, %x, cc
1214	if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) {
1215	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1216	Reg = MatchReg;
1217	if (Invert) {
1218	CC = AArch64CC::getInvertedCondCode(Code: CC);
1219	std::swap(a&: Reg, b&: OtherReg);
1220	}
1221	return true;
1222	}
1223
1224	// Attempt to fold:
1225	//
1226	// %add = G_ADD %x, 1
1227	// %select = G_SELECT cc, %reg, %add
1228	//
1229	// Into:
1230	// %select = CSINC %reg, %x, cc
1231	if (mi_match(R: Reg, MRI,
1232	P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`)),
1233	preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`))))) {
1234	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1235	Reg = MatchReg;
1236	if (Invert) {
1237	CC = AArch64CC::getInvertedCondCode(Code: CC);
1238	std::swap(a&: Reg, b&: OtherReg);
1239	}
1240	return true;
1241	}
1242
1243	return false;
1244	};
1245
1246	// Helper lambda which tries to use CSINC/CSINV for the instruction when its
1247	// true/false values are constants.
1248	// FIXME: All of these patterns already exist in tablegen. We should be
1249	// able to import these.
1250	auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1251	&Optimized]() {
1252	if (Optimized)
1253	return false;
1254	auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI);
1255	auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI);
1256	if (!TrueCst && !FalseCst)
1257	return false;
1258
1259	Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1260	if (TrueCst && FalseCst) {
1261	int64_t T = TrueCst ->Value.getSExtValue();
1262	int64_t F = FalseCst ->Value.getSExtValue();
1263
1264	if (T == `0` && F == `1`) {
1265	// G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1266	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1267	True = ZReg;
1268	False = ZReg;
1269	return true;
1270	}
1271
1272	if (T == `0` && F == -`1`) {
1273	// G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1274	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1275	True = ZReg;
1276	False = ZReg;
1277	return true;
1278	}
1279	}
1280
1281	if (TrueCst) {
1282	int64_t T = TrueCst ->Value.getSExtValue();
1283	if (T == `1`) {
1284	// G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1285	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1286	True = False;
1287	False = ZReg;
1288	CC = AArch64CC::getInvertedCondCode(Code: CC);
1289	return true;
1290	}
1291
1292	if (T == -`1`) {
1293	// G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1294	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1295	True = False;
1296	False = ZReg;
1297	CC = AArch64CC::getInvertedCondCode(Code: CC);
1298	return true;
1299	}
1300	}
1301
1302	if (FalseCst) {
1303	int64_t F = FalseCst ->Value.getSExtValue();
1304	if (F == `1`) {
1305	// G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1306	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1307	False = ZReg;
1308	return true;
1309	}
1310
1311	if (F == -`1`) {
1312	// G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1313	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1314	False = ZReg;
1315	return true;
1316	}
1317	}
1318	return false;
1319	};
1320
1321	Optimized \|= TryFoldBinOpIntoSelect (False, True, /Invert = / false);
1322	Optimized \|= TryFoldBinOpIntoSelect (True, False, /Invert = / true);
1323	Optimized \|= TryOptSelectCst ();
1324	auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1325	constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1326	return &*SelectInst;
1327	}
1328
1329	static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1330	switch (P) {
1331	default:
1332	llvm_unreachable("Unknown condition code!");
1333	case CmpInst::ICMP_NE:
1334	return AArch64CC::NE;
1335	case CmpInst::ICMP_EQ:
1336	return AArch64CC::EQ;
1337	case CmpInst::ICMP_SGT:
1338	return AArch64CC::GT;
1339	case CmpInst::ICMP_SGE:
1340	return AArch64CC::GE;
1341	case CmpInst::ICMP_SLT:
1342	return AArch64CC::LT;
1343	case CmpInst::ICMP_SLE:
1344	return AArch64CC::LE;
1345	case CmpInst::ICMP_UGT:
1346	return AArch64CC::HI;
1347	case CmpInst::ICMP_UGE:
1348	return AArch64CC::HS;
1349	case CmpInst::ICMP_ULT:
1350	return AArch64CC::LO;
1351	case CmpInst::ICMP_ULE:
1352	return AArch64CC::LS;
1353	}
1354	}
1355
1356	/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1357	static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1358	AArch64CC::CondCode &CondCode,
1359	AArch64CC::CondCode &CondCode2) {
1360	CondCode2 = AArch64CC::AL;
1361	switch (CC) {
1362	default:
1363	llvm_unreachable("Unknown FP condition!");
1364	case CmpInst::FCMP_OEQ:
1365	CondCode = AArch64CC::EQ;
1366	break;
1367	case CmpInst::FCMP_OGT:
1368	CondCode = AArch64CC::GT;
1369	break;
1370	case CmpInst::FCMP_OGE:
1371	CondCode = AArch64CC::GE;
1372	break;
1373	case CmpInst::FCMP_OLT:
1374	CondCode = AArch64CC::MI;
1375	break;
1376	case CmpInst::FCMP_OLE:
1377	CondCode = AArch64CC::LS;
1378	break;
1379	case CmpInst::FCMP_ONE:
1380	CondCode = AArch64CC::MI;
1381	CondCode2 = AArch64CC::GT;
1382	break;
1383	case CmpInst::FCMP_ORD:
1384	CondCode = AArch64CC::VC;
1385	break;
1386	case CmpInst::FCMP_UNO:
1387	CondCode = AArch64CC::VS;
1388	break;
1389	case CmpInst::FCMP_UEQ:
1390	CondCode = AArch64CC::EQ;
1391	CondCode2 = AArch64CC::VS;
1392	break;
1393	case CmpInst::FCMP_UGT:
1394	CondCode = AArch64CC::HI;
1395	break;
1396	case CmpInst::FCMP_UGE:
1397	CondCode = AArch64CC::PL;
1398	break;
1399	case CmpInst::FCMP_ULT:
1400	CondCode = AArch64CC::LT;
1401	break;
1402	case CmpInst::FCMP_ULE:
1403	CondCode = AArch64CC::LE;
1404	break;
1405	case CmpInst::FCMP_UNE:
1406	CondCode = AArch64CC::NE;
1407	break;
1408	}
1409	}
1410
1411	/// Convert an IR fp condition code to an AArch64 CC.
1412	/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1413	/// should be AND'ed instead of OR'ed.
1414	static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1415	AArch64CC::CondCode &CondCode,
1416	AArch64CC::CondCode &CondCode2) {
1417	CondCode2 = AArch64CC::AL;
1418	switch (CC) {
1419	default:
1420	changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1421	assert(CondCode2 == AArch64CC::AL);
1422	break;
1423	case CmpInst::FCMP_ONE:
1424	// (a one b)
1425	// == ((a olt b) \|\| (a ogt b))
1426	// == ((a ord b) && (a une b))
1427	CondCode = AArch64CC::VC;
1428	CondCode2 = AArch64CC::NE;
1429	break;
1430	case CmpInst::FCMP_UEQ:
1431	// (a ueq b)
1432	// == ((a uno b) \|\| (a oeq b))
1433	// == ((a ule b) && (a uge b))
1434	CondCode = AArch64CC::PL;
1435	CondCode2 = AArch64CC::LE;
1436	break;
1437	}
1438	}
1439
1440	/// Return a register which can be used as a bit to test in a TB(N)Z.
1441	static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1442	MachineRegisterInfo &MRI) {
1443	assert(Reg.isValid() && "Expected valid register!");
1444	bool HasZext = false;
1445	while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1446	unsigned Opc = MI->getOpcode();
1447
1448	if (!MI->getOperand(i: `0`).isReg() \|\|
1449	!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
1450	break;
1451
1452	// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1453	//
1454	// (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1455	// on the truncated x is the same as the bit number on x.
1456	if (Opc == TargetOpcode::G_ANYEXT \|\| Opc == TargetOpcode::G_ZEXT \|\|
1457	Opc == TargetOpcode::G_TRUNC) {
1458	if (Opc == TargetOpcode::G_ZEXT)
1459	HasZext = true;
1460
1461	Register NextReg = MI->getOperand(i: `1`).getReg();
1462	// Did we find something worth folding?
1463	if (!NextReg.isValid() \|\| !MRI.hasOneNonDBGUse(RegNo: NextReg))
1464	break;
1465
1466	// NextReg is worth folding. Keep looking.
1467	Reg = NextReg;
1468	continue;
1469	}
1470
1471	// Attempt to find a suitable operation with a constant on one side.
1472	std::optional<uint64_t> C;
1473	Register TestReg;
1474	switch (Opc) {
1475	default:
1476	break;
1477	case TargetOpcode::G_AND:
1478	case TargetOpcode::G_XOR: {
1479	TestReg = MI->getOperand(i: `1`).getReg();
1480	Register ConstantReg = MI->getOperand(i: `2`).getReg();
1481	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1482	if (!VRegAndVal) {
1483	// AND commutes, check the other side for a constant.
1484	// FIXME: Can we canonicalize the constant so that it's always on the
1485	// same side at some point earlier?
1486	std::swap(a&: ConstantReg, b&: TestReg);
1487	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1488	}
1489	if (VRegAndVal) {
1490	if (HasZext)
1491	C = VRegAndVal ->Value.getZExtValue();
1492	else
1493	C = VRegAndVal ->Value.getSExtValue();
1494	}
1495	break;
1496	}
1497	case TargetOpcode::G_ASHR:
1498	case TargetOpcode::G_LSHR:
1499	case TargetOpcode::G_SHL: {
1500	TestReg = MI->getOperand(i: `1`).getReg();
1501	auto VRegAndVal =
1502	getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: `2`).getReg(), MRI);
1503	if (VRegAndVal)
1504	C = VRegAndVal ->Value.getSExtValue();
1505	break;
1506	}
1507	}
1508
1509	// Didn't find a constant or viable register. Bail out of the loop.
1510	if (!C \|\| !TestReg.isValid())
1511	break;
1512
1513	// We found a suitable instruction with a constant. Check to see if we can
1514	// walk through the instruction.
1515	Register NextReg;
1516	unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits();
1517	switch (Opc) {
1518	default:
1519	break;
1520	case TargetOpcode::G_AND:
1521	// (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1522	if ((*C >> Bit) & `1`)
1523	NextReg = TestReg;
1524	break;
1525	case TargetOpcode::G_SHL:
1526	// (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1527	// the type of the register.
1528	if (C <= Bit && (Bit - C) < TestRegSize) {
1529	NextReg = TestReg;
1530	Bit = Bit - *C;
1531	}
1532	break;
1533	case TargetOpcode::G_ASHR:
1534	// (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1535	// in x
1536	NextReg = TestReg;
1537	Bit = Bit + *C;
1538	if (Bit >= TestRegSize)
1539	Bit = TestRegSize - `1`;
1540	break;
1541	case TargetOpcode::G_LSHR:
1542	// (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1543	if ((Bit + *C) < TestRegSize) {
1544	NextReg = TestReg;
1545	Bit = Bit + *C;
1546	}
1547	break;
1548	case TargetOpcode::G_XOR:
1549	// We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1550	// appropriate.
1551	//
1552	// e.g. If x' = xor x, c, and the b-th bit is set in c then
1553	//
1554	// tbz x', b -> tbnz x, b
1555	//
1556	// Because x' only has the b-th bit set if x does not.
1557	if ((*C >> Bit) & `1`)
1558	Invert = !Invert;
1559	NextReg = TestReg;
1560	break;
1561	}
1562
1563	// Check if we found anything worth folding.
1564	if (!NextReg.isValid())
1565	return Reg;
1566	Reg = NextReg;
1567	}
1568
1569	return Reg;
1570	}
1571
1572	MachineInstr *AArch64InstructionSelector::emitTestBit(
1573	Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1574	MachineIRBuilder &MIB) const {
1575	assert(TestReg.isValid());
1576	assert(ProduceNonFlagSettingCondBr &&
1577	"Cannot emit TB(N)Z with speculation tracking!");
1578	MachineRegisterInfo &MRI = *MIB.getMRI();
1579
1580	// Attempt to optimize the test bit by walking over instructions.
1581	TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI);
1582	LLT Ty = MRI.getType(Reg: TestReg);
1583	unsigned Size = Ty.getSizeInBits();
1584	assert(!Ty.isVector() && "Expected a scalar!");
1585	assert(Bit < `64` && "Bit is too large!");
1586
1587	// When the test register is a 64-bit register, we have to narrow to make
1588	// TBNZW work.
1589	bool UseWReg = Bit < `32`;
1590	unsigned NecessarySize = UseWReg ? `32` : `64`;
1591	if (Size != NecessarySize)
1592	TestReg = moveScalarRegClass(
1593	TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1594	MIB);
1595
1596	static const unsigned OpcTable[`2`][`2`] = {{AArch64::TBZX, AArch64::TBNZX},
1597	{AArch64::TBZW, AArch64::TBNZW}};
1598	unsigned Opc = OpcTable[UseWReg][IsNegative];
1599	auto TestBitMI =
1600	MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB);
1601	constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1602	return &*TestBitMI;
1603	}
1604
1605	bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1606	MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1607	MachineIRBuilder &MIB) const {
1608	assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1609	// Given something like this:
1610	//
1611	// %x = ...Something...
1612	// %one = G_CONSTANT i64 1
1613	// %zero = G_CONSTANT i64 0
1614	// %and = G_AND %x, %one
1615	// %cmp = G_ICMP intpred(ne), %and, %zero
1616	// %cmp_trunc = G_TRUNC %cmp
1617	// G_BRCOND %cmp_trunc, %bb.3
1618	//
1619	// We want to try and fold the AND into the G_BRCOND and produce either a
1620	// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1621	//
1622	// In this case, we'd get
1623	//
1624	// TBNZ %x %bb.3
1625	//
1626
1627	// Check if the AND has a constant on its RHS which we can use as a mask.
1628	// If it's a power of 2, then it's the same as checking a specific bit.
1629	// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1630	auto MaybeBit = getIConstantVRegValWithLookThrough(
1631	VReg: AndInst.getOperand(i: `2`).getReg(), MRI: *MIB.getMRI());
1632	if (!MaybeBit)
1633	return false;
1634
1635	int32_t Bit = MaybeBit ->Value.exactLogBase2();
1636	if (Bit < `0`)
1637	return false;
1638
1639	Register TestReg = AndInst.getOperand(i: `1`).getReg();
1640
1641	// Emit a TB(N)Z.
1642	emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB);
1643	return true;
1644	}
1645
1646	MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1647	bool IsNegative,
1648	MachineBasicBlock *DestMBB,
1649	MachineIRBuilder &MIB) const {
1650	assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1651	MachineRegisterInfo &MRI = *MIB.getMRI();
1652	assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1653	AArch64::GPRRegBankID &&
1654	"Expected GPRs only?");
1655	auto Ty = MRI.getType(Reg: CompareReg);
1656	unsigned Width = Ty.getSizeInBits();
1657	assert(!Ty.isVector() && "Expected scalar only?");
1658	assert(Width <= `64` && "Expected width to be at most 64?");
1659	static const unsigned OpcTable[`2`][`2`] = {{AArch64::CBZW, AArch64::CBZX},
1660	{AArch64::CBNZW, AArch64::CBNZX}};
1661	unsigned Opc = OpcTable[IsNegative][Width == `64`];
1662	auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB);
1663	constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1664	return &*BranchMI;
1665	}
1666
1667	bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1668	MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1669	assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1670	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1671	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1672	// totally clean. Some of them require two branches to implement.
1673	auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: `1`).getPredicate();
1674	emitFPCompare(LHS: FCmp.getOperand(i: `2`).getReg(), RHS: FCmp.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
1675	Pred);
1676	AArch64CC::CondCode CC1, CC2;
1677	changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2);
1678	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1679	MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1680	if (CC2 != AArch64CC::AL)
1681	MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1682	I.eraseFromParent();
1683	return true;
1684	}
1685
1686	bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1687	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1688	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1689	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1690	// Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1691	//
1692	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1693	// instructions will not be produced, as they are conditional branch
1694	// instructions that do not set flags.
1695	if (!ProduceNonFlagSettingCondBr)
1696	return false;
1697
1698	MachineRegisterInfo &MRI = *MIB.getMRI();
1699	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1700	auto Pred =
1701	static_cast<CmpInst::Predicate>(ICmp.getOperand(i: `1`).getPredicate());
1702	Register LHS = ICmp.getOperand(i: `2`).getReg();
1703	Register RHS = ICmp.getOperand(i: `3`).getReg();
1704
1705	// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1706	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1707	MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1708
1709	// When we can emit a TB(N)Z, prefer that.
1710	//
1711	// Handle non-commutative condition codes first.
1712	// Note that we don't want to do this when we have a G_AND because it can
1713	// become a tst. The tst will make the test bit in the TB(N)Z redundant.
1714	if (VRegAndVal && !AndInst) {
1715	int64_t C = VRegAndVal ->Value.getSExtValue();
1716
1717	// When we have a greater-than comparison, we can just test if the msb is
1718	// zero.
1719	if (C == -`1` && Pred == CmpInst::ICMP_SGT) {
1720	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1721	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1722	I.eraseFromParent();
1723	return true;
1724	}
1725
1726	// When we have a less than comparison, we can just test if the msb is not
1727	// zero.
1728	if (C == `0` && Pred == CmpInst::ICMP_SLT) {
1729	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1730	emitTestBit(TestReg: LHS, Bit, /IsNegative = / true, DstMBB: DestMBB, MIB);
1731	I.eraseFromParent();
1732	return true;
1733	}
1734
1735	// Inversely, if we have a signed greater-than-or-equal comparison to zero,
1736	// we can test if the msb is zero.
1737	if (C == `0` && Pred == CmpInst::ICMP_SGE) {
1738	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1739	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1740	I.eraseFromParent();
1741	return true;
1742	}
1743	}
1744
1745	// Attempt to handle commutative condition codes. Right now, that's only
1746	// eq/ne.
1747	if (ICmpInst::isEquality(P: Pred)) {
1748	if (!VRegAndVal) {
1749	std::swap(a&: RHS, b&: LHS);
1750	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1751	AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1752	}
1753
1754	if (VRegAndVal && VRegAndVal ->Value == `0`) {
1755	// If there's a G_AND feeding into this branch, try to fold it away by
1756	// emitting a TB(N)Z instead.
1757	//
1758	// Note: If we have LT, then it is* possible to fold, but it wouldn't be*
1759	// beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1760	// would be redundant.
1761	if (AndInst &&
1762	tryOptAndIntoCompareBranch(
1763	AndInst&: AndInst, /Invert = /* Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) {
1764	I.eraseFromParent();
1765	return true;
1766	}
1767
1768	// Otherwise, try to emit a CB(N)Z instead.
1769	auto LHSTy = MRI.getType(Reg: LHS);
1770	if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= `64`) {
1771	emitCBZ(CompareReg: LHS, /IsNegative = / Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1772	I.eraseFromParent();
1773	return true;
1774	}
1775	}
1776	}
1777
1778	return false;
1779	}
1780
1781	bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1782	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1783	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1784	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1785	if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1786	return true;
1787
1788	// Couldn't optimize. Emit a compare + a Bcc.
1789	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1790	auto PredOp = ICmp.getOperand(i: `1`);
1791	emitIntegerCompare(LHS&: ICmp.getOperand(i: `2`), RHS&: ICmp.getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
1792	const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1793	P: static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1794	MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1795	I.eraseFromParent();
1796	return true;
1797	}
1798
1799	bool AArch64InstructionSelector::selectCompareBranch(
1800	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1801	Register CondReg = I.getOperand(i: `0`).getReg();
1802	MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg);
1803	// Try to select the G_BRCOND using whatever is feeding the condition if
1804	// possible.
1805	unsigned CCMIOpc = CCMI->getOpcode();
1806	if (CCMIOpc == TargetOpcode::G_FCMP)
1807	return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB);
1808	if (CCMIOpc == TargetOpcode::G_ICMP)
1809	return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB);
1810
1811	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1812	// instructions will not be produced, as they are conditional branch
1813	// instructions that do not set flags.
1814	if (ProduceNonFlagSettingCondBr) {
1815	emitTestBit(TestReg: CondReg, /Bit = / `0`, /IsNegative = / true,
1816	DstMBB: I.getOperand(i: `1`).getMBB(), MIB);
1817	I.eraseFromParent();
1818	return true;
1819	}
1820
1821	// Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1822	auto TstMI =
1823	MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(`32`)}, {CondReg}).addImm(`1`);
1824	constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1825	auto Bcc = MIB.buildInstr(AArch64::Bcc)
1826	.addImm(AArch64CC::NE)
1827	.addMBB(I.getOperand(`1`).getMBB());
1828	I.eraseFromParent();
1829	return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1830	}
1831
1832	/// Returns the element immediate value of a vector shift operand if found.
1833	/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1834	static std::optional<int64_t> getVectorShiftImm(Register Reg,
1835	MachineRegisterInfo &MRI) {
1836	assert(MRI.getType(Reg).isVector() && "Expected a vector shift operand");
1837	MachineInstr *OpMI = MRI.getVRegDef(Reg);
1838	return getAArch64VectorSplatScalar(MI: *OpMI, MRI);
1839	}
1840
1841	/// Matches and returns the shift immediate value for a SHL instruction given
1842	/// a shift operand.
1843	static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1844	MachineRegisterInfo &MRI) {
1845	std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1846	if (!ShiftImm)
1847	return std::nullopt;
1848	// Check the immediate is in range for a SHL.
1849	int64_t Imm = *ShiftImm;
1850	if (Imm < `0`)
1851	return std::nullopt;
1852	switch (SrcTy.getElementType().getSizeInBits()) {
1853	default:
1854	LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1855	return std::nullopt;
1856	case `8`:
1857	if (Imm > `7`)
1858	return std::nullopt;
1859	break;
1860	case `16`:
1861	if (Imm > `15`)
1862	return std::nullopt;
1863	break;
1864	case `32`:
1865	if (Imm > `31`)
1866	return std::nullopt;
1867	break;
1868	case `64`:
1869	if (Imm > `63`)
1870	return std::nullopt;
1871	break;
1872	}
1873	return Imm;
1874	}
1875
1876	bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1877	MachineRegisterInfo &MRI) {
1878	assert(I.getOpcode() == TargetOpcode::G_SHL);
1879	Register DstReg = I.getOperand(i: `0`).getReg();
1880	const LLT Ty = MRI.getType(Reg: DstReg);
1881	Register Src1Reg = I.getOperand(i: `1`).getReg();
1882	Register Src2Reg = I.getOperand(i: `2`).getReg();
1883
1884	if (!Ty.isVector())
1885	return false;
1886
1887	// Check if we have a vector of constants on RHS that we can select as the
1888	// immediate form.
1889	std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI);
1890
1891	unsigned Opc = `0`;
1892	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1893	Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1894	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1895	Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1896	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1897	Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1898	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1899	Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1900	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1901	Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1902	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1903	Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1904	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1905	Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1906	} else {
1907	LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1908	return false;
1909	}
1910
1911	auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg});
1912	if (ImmVal)
1913	Shl.addImm(Val: *ImmVal);
1914	else
1915	Shl.addUse(RegNo: Src2Reg);
1916	constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1917	I.eraseFromParent();
1918	return true;
1919	}
1920
1921	bool AArch64InstructionSelector::selectVectorAshrLshr(
1922	MachineInstr &I, MachineRegisterInfo &MRI) {
1923	assert(I.getOpcode() == TargetOpcode::G_ASHR \|\|
1924	I.getOpcode() == TargetOpcode::G_LSHR);
1925	Register DstReg = I.getOperand(i: `0`).getReg();
1926	const LLT Ty = MRI.getType(Reg: DstReg);
1927	Register Src1Reg = I.getOperand(i: `1`).getReg();
1928	Register Src2Reg = I.getOperand(i: `2`).getReg();
1929
1930	if (!Ty.isVector())
1931	return false;
1932
1933	bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1934
1935	// We expect the immediate case to be lowered in the PostLegalCombiner to
1936	// AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1937
1938	// There is not a shift right register instruction, but the shift left
1939	// register instruction takes a signed value, where negative numbers specify a
1940	// right shift.
1941
1942	unsigned Opc = `0`;
1943	unsigned NegOpc = `0`;
1944	const TargetRegisterClass *RC =
1945	getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1946	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1947	Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1948	NegOpc = AArch64::NEGv2i64;
1949	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1950	Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1951	NegOpc = AArch64::NEGv4i32;
1952	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1953	Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1954	NegOpc = AArch64::NEGv2i32;
1955	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1956	Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1957	NegOpc = AArch64::NEGv4i16;
1958	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1959	Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1960	NegOpc = AArch64::NEGv8i16;
1961	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1962	Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1963	NegOpc = AArch64::NEGv16i8;
1964	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1965	Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1966	NegOpc = AArch64::NEGv8i8;
1967	} else {
1968	LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1969	return false;
1970	}
1971
1972	auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg});
1973	constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1974	auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg});
1975	constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1976	I.eraseFromParent();
1977	return true;
1978	}
1979
1980	bool AArch64InstructionSelector::selectVaStartAAPCS(
1981	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1982	return false;
1983	}
1984
1985	bool AArch64InstructionSelector::selectVaStartDarwin(
1986	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1987	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1988	Register ListReg = I.getOperand(i: `0`).getReg();
1989
1990	Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1991
1992	int FrameIdx = FuncInfo->getVarArgsStackIndex();
1993	if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
1994	CC: MF.getFunction().getCallingConv())) {
1995	FrameIdx = FuncInfo->getVarArgsGPRSize() > `0`
1996	? FuncInfo->getVarArgsGPRIndex()
1997	: FuncInfo->getVarArgsStackIndex();
1998	}
1999
2000	auto MIB =
2001	BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2002	.addDef(ArgsAddrReg)
2003	.addFrameIndex(FrameIdx)
2004	.addImm(`0`)
2005	.addImm(`0`);
2006
2007	constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2008
2009	MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2010	.addUse(ArgsAddrReg)
2011	.addUse(ListReg)
2012	.addImm(`0`)
2013	.addMemOperand(*I.memoperands_begin());
2014
2015	constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2016	I.eraseFromParent();
2017	return true;
2018	}
2019
2020	void AArch64InstructionSelector::materializeLargeCMVal(
2021	MachineInstr &I, const Value V, unsigned* OpFlags) {
2022	MachineBasicBlock &MBB = *I.getParent();
2023	MachineFunction &MF = *MBB.getParent();
2024	MachineRegisterInfo &MRI = MF.getRegInfo();
2025
2026	auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2027	MovZ->addOperand(MF, I.getOperand(i: `1`));
2028	MovZ->getOperand(`1`).setTargetFlags(OpFlags \| AArch64II::MO_G0 \|
2029	AArch64II::MO_NC);
2030	MovZ->addOperand(MF, MachineOperand::CreateImm(Val: `0`));
2031	constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
2032
2033	auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2034	Register ForceDstReg) {
2035	Register DstReg = ForceDstReg
2036	? ForceDstReg
2037	: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2038	auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2039	if (auto *GV = dyn_cast<GlobalValue>(Val: V)) {
2040	MovI->addOperand(MF, MachineOperand::CreateGA(
2041	GV, Offset: MovZ->getOperand(`1`).getOffset(), TargetFlags: Flags));
2042	} else {
2043	MovI->addOperand(
2044	MF, MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V),
2045	Offset: MovZ->getOperand(`1`).getOffset(), TargetFlags: Flags));
2046	}
2047	MovI->addOperand(MF, MachineOperand::CreateImm(Val: Offset));
2048	constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
2049	return DstReg;
2050	};
2051	Register DstReg = BuildMovK(MovZ.getReg(`0`),
2052	AArch64II::MO_G1 \| AArch64II::MO_NC, `16`, `0`);
2053	DstReg = BuildMovK (DstReg, AArch64II::MO_G2 \| AArch64II::MO_NC, `32`, `0`);
2054	BuildMovK (DstReg, AArch64II::MO_G3, `48`, I.getOperand(i: `0`).getReg());
2055	}
2056
2057	bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2058	MachineBasicBlock &MBB = *I.getParent();
2059	MachineFunction &MF = *MBB.getParent();
2060	MachineRegisterInfo &MRI = MF.getRegInfo();
2061
2062	switch (I.getOpcode()) {
2063	case TargetOpcode::G_STORE: {
2064	bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2065	MachineOperand &SrcOp = I.getOperand(i: `0`);
2066	if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) {
2067	// Allow matching with imported patterns for stores of pointers. Unlike
2068	// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2069	// and constrain.
2070	auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: SrcOp);
2071	Register NewSrc = Copy.getReg(Idx: `0`);
2072	SrcOp.setReg(NewSrc);
2073	RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2074	Changed = true;
2075	}
2076	return Changed;
2077	}
2078	case TargetOpcode::G_PTR_ADD:
2079	return convertPtrAddToAdd(I, MRI);
2080	case TargetOpcode::G_LOAD: {
2081	// For scalar loads of pointers, we try to convert the dest type from p0
2082	// to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2083	// conversion, this should be ok because all users should have been
2084	// selected already, so the type doesn't matter for them.
2085	Register DstReg = I.getOperand(i: `0`).getReg();
2086	const LLT DstTy = MRI.getType(Reg: DstReg);
2087	if (!DstTy.isPointer())
2088	return false;
2089	MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: `64`));
2090	return true;
2091	}
2092	case AArch64::G_DUP: {
2093	// Convert the type from p0 to s64 to help selection.
2094	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2095	if (!DstTy.isPointerVector())
2096	return false;
2097	auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: I.getOperand(i: `1`).getReg());
2098	MRI.setType(VReg: I.getOperand(i: `0`).getReg(),
2099	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2100	MRI.setRegClass(NewSrc.getReg(`0`), &AArch64::GPR64RegClass);
2101	I.getOperand(i: `1`).setReg(NewSrc.getReg(Idx: `0`));
2102	return true;
2103	}
2104	case TargetOpcode::G_UITOFP:
2105	case TargetOpcode::G_SITOFP: {
2106	// If both source and destination regbanks are FPR, then convert the opcode
2107	// to G_SITOF so that the importer can select it to an fpr variant.
2108	// Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2109	// copy.
2110	Register SrcReg = I.getOperand(i: `1`).getReg();
2111	LLT SrcTy = MRI.getType(Reg: SrcReg);
2112	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2113	if (SrcTy.isVector() \|\| SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2114	return false;
2115
2116	if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2117	if (I.getOpcode() == TargetOpcode::G_SITOFP)
2118	I.setDesc(TII.get(AArch64::G_SITOF));
2119	else
2120	I.setDesc(TII.get(AArch64::G_UITOF));
2121	return true;
2122	}
2123	return false;
2124	}
2125	default:
2126	return false;
2127	}
2128	}
2129
2130	/// This lowering tries to look for G_PTR_ADD instructions and then converts
2131	/// them to a standard G_ADD with a COPY on the source.
2132	///
2133	/// The motivation behind this is to expose the add semantics to the imported
2134	/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2135	/// because the selector works bottom up, uses before defs. By the time we
2136	/// end up trying to select a G_PTR_ADD, we should have already attempted to
2137	/// fold this into addressing modes and were therefore unsuccessful.
2138	bool AArch64InstructionSelector::convertPtrAddToAdd(
2139	MachineInstr &I, MachineRegisterInfo &MRI) {
2140	assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2141	Register DstReg = I.getOperand(i: `0`).getReg();
2142	Register AddOp1Reg = I.getOperand(i: `1`).getReg();
2143	const LLT PtrTy = MRI.getType(Reg: DstReg);
2144	if (PtrTy.getAddressSpace() != `0`)
2145	return false;
2146
2147	const LLT CastPtrTy =
2148	PtrTy.isVector() ? LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) : LLT::scalar(SizeInBits: `64`);
2149	auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg);
2150	// Set regbanks on the registers.
2151	if (PtrTy.isVector())
2152	MRI.setRegBank(PtrToInt.getReg(`0`), RBI.getRegBank(AArch64::FPRRegBankID));
2153	else
2154	MRI.setRegBank(PtrToInt.getReg(`0`), RBI.getRegBank(AArch64::GPRRegBankID));
2155
2156	// Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2157	// %dst(intty) = G_ADD %intbase, off
2158	I.setDesc(TII.get(TargetOpcode::G_ADD));
2159	MRI.setType(VReg: DstReg, Ty: CastPtrTy);
2160	I.getOperand(i: `1`).setReg(PtrToInt.getReg(Idx: `0`));
2161	if (!select(I&: *PtrToInt)) {
2162	LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2163	return false;
2164	}
2165
2166	// Also take the opportunity here to try to do some optimization.
2167	// Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2168	Register NegatedReg;
2169	if (!mi_match(R: I.getOperand(i: `2`).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg))))
2170	return true;
2171	I.getOperand(i: `2`).setReg(NegatedReg);
2172	I.setDesc(TII.get(TargetOpcode::G_SUB));
2173	return true;
2174	}
2175
2176	bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2177	MachineRegisterInfo &MRI) {
2178	// We try to match the immediate variant of LSL, which is actually an alias
2179	// for a special case of UBFM. Otherwise, we fall back to the imported
2180	// selector which will match the register variant.
2181	assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2182	const auto &MO = I.getOperand(i: `2`);
2183	auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI);
2184	if (!VRegAndVal)
2185	return false;
2186
2187	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2188	if (DstTy.isVector())
2189	return false;
2190	bool Is64Bit = DstTy.getSizeInBits() == `64`;
2191	auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO);
2192	auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO);
2193
2194	if (!Imm1Fn \|\| !Imm2Fn)
2195	return false;
2196
2197	auto NewI =
2198	MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2199	{I.getOperand(`0`).getReg()}, {I.getOperand(`1`).getReg()});
2200
2201	for (auto &RenderFn : *Imm1Fn)
2202	RenderFn(NewI);
2203	for (auto &RenderFn : *Imm2Fn)
2204	RenderFn(NewI);
2205
2206	I.eraseFromParent();
2207	return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2208	}
2209
2210	bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2211	MachineInstr &I, MachineRegisterInfo &MRI) {
2212	assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2213	// If we're storing a scalar, it doesn't matter what register bank that
2214	// scalar is on. All that matters is the size.
2215	//
2216	// So, if we see something like this (with a 32-bit scalar as an example):
2217	//
2218	// %x:gpr(s32) = ... something ...
2219	// %y:fpr(s32) = COPY %x:gpr(s32)
2220	// G_STORE %y:fpr(s32)
2221	//
2222	// We can fix this up into something like this:
2223	//
2224	// G_STORE %x:gpr(s32)
2225	//
2226	// And then continue the selection process normally.
2227	Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: `0`).getReg(), MRI);
2228	if (!DefDstReg.isValid())
2229	return false;
2230	LLT DefDstTy = MRI.getType(Reg: DefDstReg);
2231	Register StoreSrcReg = I.getOperand(i: `0`).getReg();
2232	LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg);
2233
2234	// If we get something strange like a physical register, then we shouldn't
2235	// go any further.
2236	if (!DefDstTy.isValid())
2237	return false;
2238
2239	// Are the source and dst types the same size?
2240	if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2241	return false;
2242
2243	if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2244	RBI.getRegBank(DefDstReg, MRI, TRI))
2245	return false;
2246
2247	// We have a cross-bank copy, which is entering a store. Let's fold it.
2248	I.getOperand(i: `0`).setReg(DefDstReg);
2249	return true;
2250	}
2251
2252	bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2253	assert(I.getParent() && "Instruction should be in a basic block!");
2254	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2255
2256	MachineBasicBlock &MBB = *I.getParent();
2257	MachineFunction &MF = *MBB.getParent();
2258	MachineRegisterInfo &MRI = MF.getRegInfo();
2259
2260	switch (I.getOpcode()) {
2261	case AArch64::G_DUP: {
2262	// Before selecting a DUP instruction, check if it is better selected as a
2263	// MOV or load from a constant pool.
2264	Register Src = I.getOperand(i: `1`).getReg();
2265	auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI);
2266	if (!ValAndVReg)
2267	return false;
2268	LLVMContext &Ctx = MF.getFunction().getContext();
2269	Register Dst = I.getOperand(i: `0`).getReg();
2270	auto *CV = ConstantDataVector::getSplat(
2271	NumElts: MRI.getType(Reg: Dst).getNumElements(),
2272	Elt: ConstantInt::get(Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Src).getSizeInBits()),
2273	V: ValAndVReg ->Value));
2274	if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI))
2275	return false;
2276	I.eraseFromParent();
2277	return true;
2278	}
2279	case TargetOpcode::G_SEXT:
2280	// Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2281	// over a normal extend.
2282	if (selectUSMovFromExtend(I, MRI))
2283	return true;
2284	return false;
2285	case TargetOpcode::G_BR:
2286	return false;
2287	case TargetOpcode::G_SHL:
2288	return earlySelectSHL(I, MRI);
2289	case TargetOpcode::G_CONSTANT: {
2290	bool IsZero = false;
2291	if (I.getOperand(i: `1`).isCImm())
2292	IsZero = I.getOperand(i: `1`).getCImm()->isZero();
2293	else if (I.getOperand(i: `1`).isImm())
2294	IsZero = I.getOperand(i: `1`).getImm() == `0`;
2295
2296	if (!IsZero)
2297	return false;
2298
2299	Register DefReg = I.getOperand(i: `0`).getReg();
2300	LLT Ty = MRI.getType(Reg: DefReg);
2301	if (Ty.getSizeInBits() == `64`) {
2302	I.getOperand(`1`).ChangeToRegister(AArch64::XZR, false);
2303	RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2304	} else if (Ty.getSizeInBits() == `32`) {
2305	I.getOperand(`1`).ChangeToRegister(AArch64::WZR, false);
2306	RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2307	} else
2308	return false;
2309
2310	I.setDesc(TII.get(TargetOpcode::COPY));
2311	return true;
2312	}
2313
2314	case TargetOpcode::G_ADD: {
2315	// Check if this is being fed by a G_ICMP on either side.
2316	//
2317	// (cmp pred, x, y) + z
2318	//
2319	// In the above case, when the cmp is true, we increment z by 1. So, we can
2320	// fold the add into the cset for the cmp by using cinc.
2321	//
2322	// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2323	Register AddDst = I.getOperand(i: `0`).getReg();
2324	Register AddLHS = I.getOperand(i: `1`).getReg();
2325	Register AddRHS = I.getOperand(i: `2`).getReg();
2326	// Only handle scalars.
2327	LLT Ty = MRI.getType(Reg: AddLHS);
2328	if (Ty.isVector())
2329	return false;
2330	// Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2331	// bits.
2332	unsigned Size = Ty.getSizeInBits();
2333	if (Size != `32` && Size != `64`)
2334	return false;
2335	auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2336	if (!MRI.hasOneNonDBGUse(RegNo: Reg))
2337	return nullptr;
2338	// If the LHS of the add is 32 bits, then we want to fold a 32-bit
2339	// compare.
2340	if (Size == `32`)
2341	return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI);
2342	// We model scalar compares using 32-bit destinations right now.
2343	// If it's a 64-bit compare, it'll have 64-bit sources.
2344	Register ZExt;
2345	if (!mi_match(R: Reg, MRI,
2346	P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt))))))
2347	return nullptr;
2348	auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI);
2349	if (!Cmp \|\|
2350	MRI.getType(Reg: Cmp->getOperand(i: `2`).getReg()).getSizeInBits() != `64`)
2351	return nullptr;
2352	return Cmp;
2353	};
2354	// Try to match
2355	// z + (cmp pred, x, y)
2356	MachineInstr *Cmp = MatchCmp (AddRHS);
2357	if (!Cmp) {
2358	// (cmp pred, x, y) + z
2359	std::swap(a&: AddLHS, b&: AddRHS);
2360	Cmp = MatchCmp (AddRHS);
2361	if (!Cmp)
2362	return false;
2363	}
2364	auto &PredOp = Cmp->getOperand(i: `1`);
2365	auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2366	const AArch64CC::CondCode InvCC =
2367	changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
2368	MIB.setInstrAndDebugLoc(I);
2369	emitIntegerCompare(/LHS=/Cmp->getOperand(i: `2`),
2370	/RHS=/Cmp->getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
2371	emitCSINC(/Dst=/AddDst, /Src =/Src1: AddLHS, /Src2=/AddLHS, Pred: InvCC, MIRBuilder&: MIB);
2372	I.eraseFromParent();
2373	return true;
2374	}
2375	case TargetOpcode::G_OR: {
2376	// Look for operations that take the lower `Width=Size-ShiftImm` bits of
2377	// `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2378	// shifting and masking that we can replace with a BFI (encoded as a BFM).
2379	Register Dst = I.getOperand(i: `0`).getReg();
2380	LLT Ty = MRI.getType(Reg: Dst);
2381
2382	if (!Ty.isScalar())
2383	return false;
2384
2385	unsigned Size = Ty.getSizeInBits();
2386	if (Size != `32` && Size != `64`)
2387	return false;
2388
2389	Register ShiftSrc;
2390	int64_t ShiftImm;
2391	Register MaskSrc;
2392	int64_t MaskImm;
2393	if (!mi_match(
2394	R: Dst, MRI,
2395	P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))),
2396	R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm))))))
2397	return false;
2398
2399	if (ShiftImm > Size \|\| ((`1ULL` << ShiftImm) - `1ULL`) != uint64_t(MaskImm))
2400	return false;
2401
2402	int64_t Immr = Size - ShiftImm;
2403	int64_t Imms = Size - ShiftImm - `1`;
2404	unsigned Opc = Size == `32` ? AArch64::BFMWri : AArch64::BFMXri;
2405	emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB);
2406	I.eraseFromParent();
2407	return true;
2408	}
2409	case TargetOpcode::G_FENCE: {
2410	if (I.getOperand(i: `1`).getImm() == `0`)
2411	BuildMI(MBB, I, MIMetadata (I), TII.get(TargetOpcode::MEMBARRIER));
2412	else
2413	BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2414	.addImm(I.getOperand(`0`).getImm() == `4` ? `0x9` : `0xb`);
2415	I.eraseFromParent();
2416	return true;
2417	}
2418	default:
2419	return false;
2420	}
2421	}
2422
2423	bool AArch64InstructionSelector::select(MachineInstr &I) {
2424	assert(I.getParent() && "Instruction should be in a basic block!");
2425	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2426
2427	MachineBasicBlock &MBB = *I.getParent();
2428	MachineFunction &MF = *MBB.getParent();
2429	MachineRegisterInfo &MRI = MF.getRegInfo();
2430
2431	const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2432	if (Subtarget->requiresStrictAlign()) {
2433	// We don't support this feature yet.
2434	LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2435	return false;
2436	}
2437
2438	MIB.setInstrAndDebugLoc(I);
2439
2440	unsigned Opcode = I.getOpcode();
2441	// G_PHI requires same handling as PHI
2442	if (!I.isPreISelOpcode() \|\| Opcode == TargetOpcode::G_PHI) {
2443	// Certain non-generic instructions also need some special handling.
2444
2445	if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2446	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2447
2448	if (Opcode == TargetOpcode::PHI \|\| Opcode == TargetOpcode::G_PHI) {
2449	const Register DefReg = I.getOperand(i: `0`).getReg();
2450	const LLT DefTy = MRI.getType(Reg: DefReg);
2451
2452	const RegClassOrRegBank &RegClassOrBank =
2453	MRI.getRegClassOrRegBank(Reg: DefReg);
2454
2455	const TargetRegisterClass *DefRC
2456	= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2457	if (!DefRC) {
2458	if (!DefTy.isValid()) {
2459	LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2460	return false;
2461	}
2462	const RegisterBank &RB = RegClassOrBank.get<const* RegisterBank *>();
2463	DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB);
2464	if (!DefRC) {
2465	LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2466	return false;
2467	}
2468	}
2469
2470	I.setDesc(TII.get(TargetOpcode::PHI));
2471
2472	return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI);
2473	}
2474
2475	if (I.isCopy())
2476	return selectCopy(I, TII, MRI, TRI, RBI);
2477
2478	if (I.isDebugInstr())
2479	return selectDebugInstr(I, MRI, RBI);
2480
2481	return true;
2482	}
2483
2484
2485	if (I.getNumOperands() != I.getNumExplicitOperands()) {
2486	LLVM_DEBUG(
2487	dbgs() << "Generic instruction has unexpected implicit operands\n");
2488	return false;
2489	}
2490
2491	// Try to do some lowering before we start instruction selecting. These
2492	// lowerings are purely transformations on the input G_MIR and so selection
2493	// must continue after any modification of the instruction.
2494	if (preISelLower(I)) {
2495	Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2496	}
2497
2498	// There may be patterns where the importer can't deal with them optimally,
2499	// but does select it to a suboptimal sequence so our custom C++ selection
2500	// code later never has a chance to work on it. Therefore, we have an early
2501	// selection attempt here to give priority to certain selection routines
2502	// over the imported ones.
2503	if (earlySelect(I))
2504	return true;
2505
2506	if (selectImpl(I, CoverageInfo&: *CoverageInfo))
2507	return true;
2508
2509	LLT Ty =
2510	I.getOperand(i: `0`).isReg() ? MRI.getType(Reg: I.getOperand(i: `0`).getReg()) : LLT {};
2511
2512	switch (Opcode) {
2513	case TargetOpcode::G_SBFX:
2514	case TargetOpcode::G_UBFX: {
2515	static const unsigned OpcTable[`2`][`2`] = {
2516	{AArch64::UBFMWri, AArch64::UBFMXri},
2517	{AArch64::SBFMWri, AArch64::SBFMXri}};
2518	bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2519	unsigned Size = Ty.getSizeInBits();
2520	unsigned Opc = OpcTable[IsSigned][Size == `64`];
2521	auto Cst1 =
2522	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `2`).getReg(), MRI);
2523	assert(Cst1 && "Should have gotten a constant for src 1?");
2524	auto Cst2 =
2525	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `3`).getReg(), MRI);
2526	assert(Cst2 && "Should have gotten a constant for src 2?");
2527	auto LSB = Cst1 ->Value.getZExtValue();
2528	auto Width = Cst2 ->Value.getZExtValue();
2529	auto BitfieldInst =
2530	MIB.buildInstr(Opc, DstOps: {I.getOperand(i: `0`)}, SrcOps: {I.getOperand(i: `1`)})
2531	.addImm(Val: LSB)
2532	.addImm(Val: LSB + Width - `1`);
2533	I.eraseFromParent();
2534	return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2535	}
2536	case TargetOpcode::G_BRCOND:
2537	return selectCompareBranch(I, MF, MRI);
2538
2539	case TargetOpcode::G_BRINDIRECT: {
2540	I.setDesc(TII.get(AArch64::BR));
2541	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2542	}
2543
2544	case TargetOpcode::G_BRJT:
2545	return selectBrJT(I, MRI);
2546
2547	case AArch64::G_ADD_LOW: {
2548	// This op may have been separated from it's ADRP companion by the localizer
2549	// or some other code motion pass. Given that many CPUs will try to
2550	// macro fuse these operations anyway, select this into a MOVaddr pseudo
2551	// which will later be expanded into an ADRP+ADD pair after scheduling.
2552	MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
2553	if (BaseMI->getOpcode() != AArch64::ADRP) {
2554	I.setDesc(TII.get(AArch64::ADDXri));
2555	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2556	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2557	}
2558	assert(TM.getCodeModel() == CodeModel::Small &&
2559	"Expected small code model");
2560	auto Op1 = BaseMI->getOperand(i: `1`);
2561	auto Op2 = I.getOperand(i: `2`);
2562	auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(`0`)}, {})
2563	.addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2564	Op1.getTargetFlags())
2565	.addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2566	Op2.getTargetFlags());
2567	I.eraseFromParent();
2568	return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2569	}
2570
2571	case TargetOpcode::G_FCONSTANT:
2572	case TargetOpcode::G_CONSTANT: {
2573	const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2574
2575	const LLT s8 = LLT::scalar(SizeInBits: `8`);
2576	const LLT s16 = LLT::scalar(SizeInBits: `16`);
2577	const LLT s32 = LLT::scalar(SizeInBits: `32`);
2578	const LLT s64 = LLT::scalar(SizeInBits: `64`);
2579	const LLT s128 = LLT::scalar(SizeInBits: `128`);
2580	const LLT p0 = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
2581
2582	const Register DefReg = I.getOperand(i: `0`).getReg();
2583	const LLT DefTy = MRI.getType(Reg: DefReg);
2584	const unsigned DefSize = DefTy.getSizeInBits();
2585	const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2586
2587	// FIXME: Redundant check, but even less readable when factored out.
2588	if (isFP) {
2589	if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2590	LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2591	<< " constant, expected: " << s16 << " or " << s32
2592	<< " or " << s64 << " or " << s128 << `'\n'`);
2593	return false;
2594	}
2595
2596	if (RB.getID() != AArch64::FPRRegBankID) {
2597	LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2598	<< " constant on bank: " << RB
2599	<< ", expected: FPR\n");
2600	return false;
2601	}
2602
2603	// The case when we have 0.0 is covered by tablegen. Reject it here so we
2604	// can be sure tablegen works correctly and isn't rescued by this code.
2605	// 0.0 is not covered by tablegen for FP128. So we will handle this
2606	// scenario in the code here.
2607	if (DefSize != `128` && I.getOperand(i: `1`).getFPImm()->isExactlyValue(V: `0.0`))
2608	return false;
2609	} else {
2610	// s32 and s64 are covered by tablegen.
2611	if (Ty != p0 && Ty != s8 && Ty != s16) {
2612	LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2613	<< " constant, expected: " << s32 << ", " << s64
2614	<< ", or " << p0 << `'\n'`);
2615	return false;
2616	}
2617
2618	if (RB.getID() != AArch64::GPRRegBankID) {
2619	LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2620	<< " constant on bank: " << RB
2621	<< ", expected: GPR\n");
2622	return false;
2623	}
2624	}
2625
2626	if (isFP) {
2627	const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB);
2628	// For 16, 64, and 128b values, emit a constant pool load.
2629	switch (DefSize) {
2630	default:
2631	llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2632	case `32`:
2633	case `64`: {
2634	bool OptForSize = shouldOptForSize(MF: &MF);
2635	const auto &TLI = MF.getSubtarget().getTargetLowering();
2636	// If TLI says that this fpimm is illegal, then we'll expand to a
2637	// constant pool load.
2638	if (TLI->isFPImmLegal(I.getOperand(i: `1`).getFPImm()->getValueAPF(),
2639	EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize))
2640	break;
2641	[[fallthrough]];
2642	}
2643	case `16`:
2644	case `128`: {
2645	auto *FPImm = I.getOperand(i: `1`).getFPImm();
2646	auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB);
2647	if (!LoadMI) {
2648	LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2649	return false;
2650	}
2651	MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: `0`).getReg()});
2652	I.eraseFromParent();
2653	return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI);
2654	}
2655	}
2656
2657	assert((DefSize == `32` \|\| DefSize == `64`) && "Unexpected const def size");
2658	// Either emit a FMOV, or emit a copy to emit a normal mov.
2659	const Register DefGPRReg = MRI.createVirtualRegister(
2660	DefSize == `32` ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2661	MachineOperand &RegOp = I.getOperand(i: `0`);
2662	RegOp.setReg(DefGPRReg);
2663	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2664	MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg});
2665
2666	if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) {
2667	LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2668	return false;
2669	}
2670
2671	MachineOperand &ImmOp = I.getOperand(i: `1`);
2672	// FIXME: Is going through int64_t always correct?
2673	ImmOp.ChangeToImmediate(
2674	ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2675	} else if (I.getOperand(i: `1`).isCImm()) {
2676	uint64_t Val = I.getOperand(i: `1`).getCImm()->getZExtValue();
2677	I.getOperand(i: `1`).ChangeToImmediate(ImmVal: Val);
2678	} else if (I.getOperand(i: `1`).isImm()) {
2679	uint64_t Val = I.getOperand(i: `1`).getImm();
2680	I.getOperand(i: `1`).ChangeToImmediate(ImmVal: Val);
2681	}
2682
2683	const unsigned MovOpc =
2684	DefSize == `64` ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2685	I.setDesc(TII.get(MovOpc));
2686	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2687	return true;
2688	}
2689	case TargetOpcode::G_EXTRACT: {
2690	Register DstReg = I.getOperand(i: `0`).getReg();
2691	Register SrcReg = I.getOperand(i: `1`).getReg();
2692	LLT SrcTy = MRI.getType(Reg: SrcReg);
2693	LLT DstTy = MRI.getType(Reg: DstReg);
2694	(void)DstTy;
2695	unsigned SrcSize = SrcTy.getSizeInBits();
2696
2697	if (SrcTy.getSizeInBits() > `64`) {
2698	// This should be an extract of an s128, which is like a vector extract.
2699	if (SrcTy.getSizeInBits() != `128`)
2700	return false;
2701	// Only support extracting 64 bits from an s128 at the moment.
2702	if (DstTy.getSizeInBits() != `64`)
2703	return false;
2704
2705	unsigned Offset = I.getOperand(i: `2`).getImm();
2706	if (Offset % `64` != `0`)
2707	return false;
2708
2709	// Check we have the right regbank always.
2710	const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2711	const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2712	assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2713
2714	if (SrcRB.getID() == AArch64::GPRRegBankID) {
2715	auto NewI =
2716	MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2717	.addUse(SrcReg, `0`,
2718	Offset == `0` ? AArch64::sube64 : AArch64::subo64);
2719	constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2720	AArch64::GPR64RegClass, NewI->getOperand(`0`));
2721	I.eraseFromParent();
2722	return true;
2723	}
2724
2725	// Emit the same code as a vector extract.
2726	// Offset must be a multiple of 64.
2727	unsigned LaneIdx = Offset / `64`;
2728	MachineInstr *Extract = emitExtractVectorElt(
2729	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: `64`), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB);
2730	if (!Extract)
2731	return false;
2732	I.eraseFromParent();
2733	return true;
2734	}
2735
2736	I.setDesc(TII.get(SrcSize == `64` ? AArch64::UBFMXri : AArch64::UBFMWri));
2737	MachineInstrBuilder (MF, I).addImm(Val: I.getOperand(i: `2`).getImm() +
2738	Ty.getSizeInBits() - `1`);
2739
2740	if (SrcSize < `64`) {
2741	assert(SrcSize == `32` && DstTy.getSizeInBits() == `16` &&
2742	"unexpected G_EXTRACT types");
2743	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2744	}
2745
2746	DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2747	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2748	MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(`0`).getReg()}, {})
2749	.addReg(DstReg, `0`, AArch64::sub_32);
2750	RBI.constrainGenericRegister(I.getOperand(`0`).getReg(),
2751	AArch64::GPR32RegClass, MRI);
2752	I.getOperand(i: `0`).setReg(DstReg);
2753
2754	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2755	}
2756
2757	case TargetOpcode::G_INSERT: {
2758	LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `2`).getReg());
2759	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2760	unsigned DstSize = DstTy.getSizeInBits();
2761	// Larger inserts are vectors, same-size ones should be something else by
2762	// now (split up or turned into COPYs).
2763	if (Ty.getSizeInBits() > `64` \|\| SrcTy.getSizeInBits() > `32`)
2764	return false;
2765
2766	I.setDesc(TII.get(DstSize == `64` ? AArch64::BFMXri : AArch64::BFMWri));
2767	unsigned LSB = I.getOperand(i: `3`).getImm();
2768	unsigned Width = MRI.getType(Reg: I.getOperand(i: `2`).getReg()).getSizeInBits();
2769	I.getOperand(i: `3`).setImm((DstSize - LSB) % DstSize);
2770	MachineInstrBuilder (MF, I).addImm(Val: Width - `1`);
2771
2772	if (DstSize < `64`) {
2773	assert(DstSize == `32` && SrcTy.getSizeInBits() == `16` &&
2774	"unexpected G_INSERT types");
2775	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2776	}
2777
2778	Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2779	BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2780	TII.get(AArch64::SUBREG_TO_REG))
2781	.addDef(SrcReg)
2782	.addImm(`0`)
2783	.addUse(I.getOperand(`2`).getReg())
2784	.addImm(AArch64::sub_32);
2785	RBI.constrainGenericRegister(I.getOperand(`2`).getReg(),
2786	AArch64::GPR32RegClass, MRI);
2787	I.getOperand(i: `2`).setReg(SrcReg);
2788
2789	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2790	}
2791	case TargetOpcode::G_FRAME_INDEX: {
2792	// allocas and G_FRAME_INDEX are only supported in addrspace(0).
2793	if (Ty != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
2794	LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2795	<< ", expected: " << LLT::pointer(`0`, `64`) << `'\n'`);
2796	return false;
2797	}
2798	I.setDesc(TII.get(AArch64::ADDXri));
2799
2800	// MOs for a #0 shifted immediate.
2801	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2802	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2803
2804	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2805	}
2806
2807	case TargetOpcode::G_GLOBAL_VALUE: {
2808	const GlobalValue GV = nullptr*;
2809	unsigned OpFlags;
2810	if (I.getOperand(i: `1`).isSymbol()) {
2811	OpFlags = I.getOperand(i: `1`).getTargetFlags();
2812	// Currently only used by "RtLibUseGOT".
2813	assert(OpFlags == AArch64II::MO_GOT);
2814	} else {
2815	GV = I.getOperand(i: `1`).getGlobal();
2816	if (GV->isThreadLocal())
2817	return selectTLSGlobalValue(I, MRI);
2818	OpFlags = STI.ClassifyGlobalReference(GV, TM);
2819	}
2820
2821	if (OpFlags & AArch64II::MO_GOT) {
2822	I.setDesc(TII.get(AArch64::LOADgot));
2823	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2824	} else if (TM.getCodeModel() == CodeModel::Large &&
2825	!TM.isPositionIndependent()) {
2826	// Materialize the global using movz/movk instructions.
2827	materializeLargeCMVal(I, V: GV, OpFlags);
2828	I.eraseFromParent();
2829	return true;
2830	} else if (TM.getCodeModel() == CodeModel::Tiny) {
2831	I.setDesc(TII.get(AArch64::ADR));
2832	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2833	} else {
2834	I.setDesc(TII.get(AArch64::MOVaddr));
2835	I.getOperand(i: `1`).setTargetFlags(OpFlags \| AArch64II::MO_PAGE);
2836	MachineInstrBuilder MIB(MF, I);
2837	MIB.addGlobalAddress(GV, Offset: I.getOperand(i: `1`).getOffset(),
2838	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
2839	}
2840	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2841	}
2842
2843	case TargetOpcode::G_ZEXTLOAD:
2844	case TargetOpcode::G_LOAD:
2845	case TargetOpcode::G_STORE: {
2846	GLoadStore &LdSt = cast<GLoadStore>(Val&: I);
2847	bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2848	LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg());
2849
2850	if (PtrTy != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
2851	LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2852	<< ", expected: " << LLT::pointer(`0`, `64`) << `'\n'`);
2853	return false;
2854	}
2855
2856	uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2857	unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2858	AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2859
2860	// Need special instructions for atomics that affect ordering.
2861	if (Order != AtomicOrdering::NotAtomic &&
2862	Order != AtomicOrdering::Unordered &&
2863	Order != AtomicOrdering::Monotonic) {
2864	assert(!isa<GZExtLoad>(LdSt));
2865	if (MemSizeInBytes > `64`)
2866	return false;
2867
2868	if (isa<GLoad>(Val: LdSt)) {
2869	static constexpr unsigned LDAPROpcodes[] = {
2870	AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2871	static constexpr unsigned LDAROpcodes[] = {
2872	AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2873	ArrayRef<unsigned> Opcodes =
2874	STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2875	? LDAPROpcodes
2876	: LDAROpcodes;
2877	I.setDesc(TII.get(Opcodes [Log2_32(Value: MemSizeInBytes)]));
2878	} else {
2879	static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2880	AArch64::STLRW, AArch64::STLRX};
2881	Register ValReg = LdSt.getReg(Idx: `0`);
2882	if (MRI.getType(Reg: ValReg).getSizeInBits() == `64` && MemSizeInBits != `64`) {
2883	// Emit a subreg copy of 32 bits.
2884	Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2885	MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2886	.addReg(I.getOperand(`0`).getReg(), `0`, AArch64::sub_32);
2887	I.getOperand(i: `0`).setReg(NewVal);
2888	}
2889	I.setDesc(TII.get(Opcodes[Log2_32(Value: MemSizeInBytes)]));
2890	}
2891	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2892	return true;
2893	}
2894
2895	#ifndef NDEBUG
2896	const Register PtrReg = LdSt.getPointerReg();
2897	const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2898	// Check that the pointer register is valid.
2899	assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2900	"Load/Store pointer operand isn't a GPR");
2901	assert(MRI.getType(PtrReg).isPointer() &&
2902	"Load/Store pointer operand isn't a pointer");
2903	#endif
2904
2905	const Register ValReg = LdSt.getReg(Idx: `0`);
2906	const LLT ValTy = MRI.getType(Reg: ValReg);
2907	const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2908
2909	// The code below doesn't support truncating stores, so we need to split it
2910	// again.
2911	if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2912	unsigned SubReg;
2913	LLT MemTy = LdSt.getMMO().getMemoryType();
2914	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
2915	if (!getSubRegForClass(RC, TRI, SubReg))
2916	return false;
2917
2918	// Generate a subreg copy.
2919	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {})
2920	.addReg(RegNo: ValReg, flags: `0`, SubReg)
2921	.getReg(Idx: `0`);
2922	RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI);
2923	LdSt.getOperand(i: `0`).setReg(Copy);
2924	} else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2925	// If this is an any-extending load from the FPR bank, split it into a regular
2926	// load + extend.
2927	if (RB.getID() == AArch64::FPRRegBankID) {
2928	unsigned SubReg;
2929	LLT MemTy = LdSt.getMMO().getMemoryType();
2930	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
2931	if (!getSubRegForClass(RC, TRI, SubReg))
2932	return false;
2933	Register OldDst = LdSt.getReg(Idx: `0`);
2934	Register NewDst =
2935	MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType());
2936	LdSt.getOperand(i: `0`).setReg(NewDst);
2937	MRI.setRegBank(Reg: NewDst, RegBank: RB);
2938	// Generate a SUBREG_TO_REG to extend it.
2939	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator()));
2940	MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2941	.addImm(`0`)
2942	.addUse(NewDst)
2943	.addImm(SubReg);
2944	auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB);
2945	RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI);
2946	MIB.setInstr(LdSt);
2947	}
2948	}
2949
2950	// Helper lambda for partially selecting I. Either returns the original
2951	// instruction with an updated opcode, or a new instruction.
2952	auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2953	bool IsStore = isa<GStore>(Val: I);
2954	const unsigned NewOpc =
2955	selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits);
2956	if (NewOpc == I.getOpcode())
2957	return nullptr;
2958	// Check if we can fold anything into the addressing mode.
2959	auto AddrModeFns =
2960	selectAddrModeIndexed(Root&: I.getOperand(i: `1`), Size: MemSizeInBytes);
2961	if (!AddrModeFns) {
2962	// Can't fold anything. Use the original instruction.
2963	I.setDesc(TII.get(NewOpc));
2964	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2965	return &I;
2966	}
2967
2968	// Folded something. Create a new instruction and return it.
2969	auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags());
2970	Register CurValReg = I.getOperand(i: `0`).getReg();
2971	IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2972	NewInst.cloneMemRefs(I);
2973	for (auto &Fn : *AddrModeFns)
2974	Fn(NewInst);
2975	I.eraseFromParent();
2976	return &*NewInst;
2977	};
2978
2979	MachineInstr *LoadStore = SelectLoadStoreAddressingMode ();
2980	if (!LoadStore)
2981	return false;
2982
2983	// If we're storing a 0, use WZR/XZR.
2984	if (Opcode == TargetOpcode::G_STORE) {
2985	auto CVal = getIConstantVRegValWithLookThrough(
2986	VReg: LoadStore->getOperand(i: `0`).getReg(), MRI);
2987	if (CVal && CVal ->Value == `0`) {
2988	switch (LoadStore->getOpcode()) {
2989	case AArch64::STRWui:
2990	case AArch64::STRHHui:
2991	case AArch64::STRBBui:
2992	LoadStore->getOperand(`0`).setReg(AArch64::WZR);
2993	break;
2994	case AArch64::STRXui:
2995	LoadStore->getOperand(`0`).setReg(AArch64::XZR);
2996	break;
2997	}
2998	}
2999	}
3000
3001	if (IsZExtLoad \|\| (Opcode == TargetOpcode::G_LOAD &&
3002	ValTy == LLT::scalar(SizeInBits: `64`) && MemSizeInBits == `32`)) {
3003	// The any/zextload from a smaller type to i32 should be handled by the
3004	// importer.
3005	if (MRI.getType(Reg: LoadStore->getOperand(i: `0`).getReg()).getSizeInBits() != `64`)
3006	return false;
3007	// If we have an extending load then change the load's type to be a
3008	// narrower reg and zero_extend with SUBREG_TO_REG.
3009	Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3010	Register DstReg = LoadStore->getOperand(i: `0`).getReg();
3011	LoadStore->getOperand(i: `0`).setReg(LdReg);
3012
3013	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator()));
3014	MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3015	.addImm(`0`)
3016	.addUse(LdReg)
3017	.addImm(AArch64::sub_32);
3018	constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3019	return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3020	MRI);
3021	}
3022	return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3023	}
3024
3025	case TargetOpcode::G_INDEXED_ZEXTLOAD:
3026	case TargetOpcode::G_INDEXED_SEXTLOAD:
3027	return selectIndexedExtLoad(I, MRI);
3028	case TargetOpcode::G_INDEXED_LOAD:
3029	return selectIndexedLoad(I, MRI);
3030	case TargetOpcode::G_INDEXED_STORE:
3031	return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI);
3032
3033	case TargetOpcode::G_LSHR:
3034	case TargetOpcode::G_ASHR:
3035	if (MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3036	return selectVectorAshrLshr(I, MRI);
3037	[[fallthrough]];
3038	case TargetOpcode::G_SHL:
3039	if (Opcode == TargetOpcode::G_SHL &&
3040	MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3041	return selectVectorSHL(I, MRI);
3042
3043	// These shifts were legalized to have 64 bit shift amounts because we
3044	// want to take advantage of the selection patterns that assume the
3045	// immediates are s64s, however, selectBinaryOp will assume both operands
3046	// will have the same bit size.
3047	{
3048	Register SrcReg = I.getOperand(i: `1`).getReg();
3049	Register ShiftReg = I.getOperand(i: `2`).getReg();
3050	const LLT ShiftTy = MRI.getType(Reg: ShiftReg);
3051	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3052	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `32` &&
3053	ShiftTy.getSizeInBits() == `64`) {
3054	assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3055	// Insert a subregister copy to implement a 64->32 trunc
3056	auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3057	.addReg(ShiftReg, `0`, AArch64::sub_32);
3058	MRI.setRegBank(Trunc.getReg(`0`), RBI.getRegBank(AArch64::GPRRegBankID));
3059	I.getOperand(i: `2`).setReg(Trunc.getReg(`0`));
3060	}
3061	}
3062	[[fallthrough]];
3063	case TargetOpcode::G_OR: {
3064	// Reject the various things we don't support yet.
3065	if (unsupportedBinOp(I, RBI, MRI, TRI))
3066	return false;
3067
3068	const unsigned OpSize = Ty.getSizeInBits();
3069
3070	const Register DefReg = I.getOperand(i: `0`).getReg();
3071	const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3072
3073	const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize);
3074	if (NewOpc == I.getOpcode())
3075	return false;
3076
3077	I.setDesc(TII.get(NewOpc));
3078	// FIXME: Should the type be always reset in setDesc?
3079
3080	// Now that we selected an opcode, we need to constrain the register
3081	// operands to use appropriate classes.
3082	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3083	}
3084
3085	case TargetOpcode::G_PTR_ADD: {
3086	emitADD(DefReg: I.getOperand(i: `0`).getReg(), LHS&: I.getOperand(i: `1`), RHS&: I.getOperand(i: `2`), MIRBuilder&: MIB);
3087	I.eraseFromParent();
3088	return true;
3089	}
3090
3091	case TargetOpcode::G_SADDE:
3092	case TargetOpcode::G_UADDE:
3093	case TargetOpcode::G_SSUBE:
3094	case TargetOpcode::G_USUBE:
3095	case TargetOpcode::G_SADDO:
3096	case TargetOpcode::G_UADDO:
3097	case TargetOpcode::G_SSUBO:
3098	case TargetOpcode::G_USUBO:
3099	return selectOverflowOp(I, MRI);
3100
3101	case TargetOpcode::G_PTRMASK: {
3102	Register MaskReg = I.getOperand(i: `2`).getReg();
3103	std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI);
3104	// TODO: Implement arbitrary cases
3105	if (!MaskVal \|\| !isShiftedMask_64(Value: *MaskVal))
3106	return false;
3107
3108	uint64_t Mask = *MaskVal;
3109	I.setDesc(TII.get(AArch64::ANDXri));
3110	I.getOperand(i: `2`).ChangeToImmediate(
3111	ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: `64`));
3112
3113	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3114	}
3115	case TargetOpcode::G_PTRTOINT:
3116	case TargetOpcode::G_TRUNC: {
3117	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3118	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3119
3120	const Register DstReg = I.getOperand(i: `0`).getReg();
3121	const Register SrcReg = I.getOperand(i: `1`).getReg();
3122
3123	const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3124	const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3125
3126	if (DstRB.getID() != SrcRB.getID()) {
3127	LLVM_DEBUG(
3128	dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3129	return false;
3130	}
3131
3132	if (DstRB.getID() == AArch64::GPRRegBankID) {
3133	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3134	if (!DstRC)
3135	return false;
3136
3137	const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB);
3138	if (!SrcRC)
3139	return false;
3140
3141	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) \|\|
3142	!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
3143	LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3144	return false;
3145	}
3146
3147	if (DstRC == SrcRC) {
3148	// Nothing to be done
3149	} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: `32`) &&
3150	SrcTy == LLT::scalar(SizeInBits: `64`)) {
3151	llvm_unreachable("TableGen can import this case");
3152	return false;
3153	} else if (DstRC == &AArch64::GPR32RegClass &&
3154	SrcRC == &AArch64::GPR64RegClass) {
3155	I.getOperand(`1`).setSubReg(AArch64::sub_32);
3156	} else {
3157	LLVM_DEBUG(
3158	dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3159	return false;
3160	}
3161
3162	I.setDesc(TII.get(TargetOpcode::COPY));
3163	return true;
3164	} else if (DstRB.getID() == AArch64::FPRRegBankID) {
3165	if (DstTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`) &&
3166	SrcTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
3167	I.setDesc(TII.get(AArch64::XTNv4i16));
3168	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3169	return true;
3170	}
3171
3172	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `128`) {
3173	MachineInstr *Extract = emitExtractVectorElt(
3174	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: `0`, MIRBuilder&: MIB);
3175	if (!Extract)
3176	return false;
3177	I.eraseFromParent();
3178	return true;
3179	}
3180
3181	// We might have a vector G_PTRTOINT, in which case just emit a COPY.
3182	if (Opcode == TargetOpcode::G_PTRTOINT) {
3183	assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3184	I.setDesc(TII.get(TargetOpcode::COPY));
3185	return selectCopy(I, TII, MRI, TRI, RBI);
3186	}
3187	}
3188
3189	return false;
3190	}
3191
3192	case TargetOpcode::G_ANYEXT: {
3193	if (selectUSMovFromExtend(I, MRI))
3194	return true;
3195
3196	const Register DstReg = I.getOperand(i: `0`).getReg();
3197	const Register SrcReg = I.getOperand(i: `1`).getReg();
3198
3199	const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3200	if (RBDst.getID() != AArch64::GPRRegBankID) {
3201	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3202	<< ", expected: GPR\n");
3203	return false;
3204	}
3205
3206	const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3207	if (RBSrc.getID() != AArch64::GPRRegBankID) {
3208	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3209	<< ", expected: GPR\n");
3210	return false;
3211	}
3212
3213	const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits();
3214
3215	if (DstSize == `0`) {
3216	LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3217	return false;
3218	}
3219
3220	if (DstSize != `64` && DstSize > `32`) {
3221	LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3222	<< ", expected: 32 or 64\n");
3223	return false;
3224	}
3225	// At this point G_ANYEXT is just like a plain COPY, but we need
3226	// to explicitly form the 64-bit value if any.
3227	if (DstSize > `32`) {
3228	Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3229	BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3230	.addDef(ExtSrc)
3231	.addImm(`0`)
3232	.addUse(SrcReg)
3233	.addImm(AArch64::sub_32);
3234	I.getOperand(i: `1`).setReg(ExtSrc);
3235	}
3236	return selectCopy(I, TII, MRI, TRI, RBI);
3237	}
3238
3239	case TargetOpcode::G_ZEXT:
3240	case TargetOpcode::G_SEXT_INREG:
3241	case TargetOpcode::G_SEXT: {
3242	if (selectUSMovFromExtend(I, MRI))
3243	return true;
3244
3245	unsigned Opcode = I.getOpcode();
3246	const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3247	const Register DefReg = I.getOperand(i: `0`).getReg();
3248	Register SrcReg = I.getOperand(i: `1`).getReg();
3249	const LLT DstTy = MRI.getType(Reg: DefReg);
3250	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3251	unsigned DstSize = DstTy.getSizeInBits();
3252	unsigned SrcSize = SrcTy.getSizeInBits();
3253
3254	// SEXT_INREG has the same src reg size as dst, the size of the value to be
3255	// extended is encoded in the imm.
3256	if (Opcode == TargetOpcode::G_SEXT_INREG)
3257	SrcSize = I.getOperand(i: `2`).getImm();
3258
3259	if (DstTy.isVector())
3260	return false; // Should be handled by imported patterns.
3261
3262	assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3263	AArch64::GPRRegBankID &&
3264	"Unexpected ext regbank");
3265
3266	MachineInstr *ExtI;
3267
3268	// First check if we're extending the result of a load which has a dest type
3269	// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3270	// GPR register on AArch64 and all loads which are smaller automatically
3271	// zero-extend the upper bits. E.g.
3272	// %v(s8) = G_LOAD %p, :: (load 1)
3273	// %v2(s32) = G_ZEXT %v(s8)
3274	if (!IsSigned) {
3275	auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI);
3276	bool IsGPR =
3277	RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3278	if (LoadMI && IsGPR) {
3279	const MachineMemOperand MemOp = LoadMI->memoperands_begin();
3280	unsigned BytesLoaded = MemOp->getSize().getValue();
3281	if (BytesLoaded < `4` && SrcTy.getSizeInBytes() == BytesLoaded)
3282	return selectCopy(I, TII, MRI, TRI, RBI);
3283	}
3284
3285	// For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3286	// + SUBREG_TO_REG.
3287	if (IsGPR && SrcSize == `32` && DstSize == `64`) {
3288	Register SubregToRegSrc =
3289	MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3290	const Register ZReg = AArch64::WZR;
3291	MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3292	.addImm(`0`);
3293
3294	MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3295	.addImm(`0`)
3296	.addUse(SubregToRegSrc)
3297	.addImm(AArch64::sub_32);
3298
3299	if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3300	MRI)) {
3301	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3302	return false;
3303	}
3304
3305	if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3306	MRI)) {
3307	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3308	return false;
3309	}
3310
3311	I.eraseFromParent();
3312	return true;
3313	}
3314	}
3315
3316	if (DstSize == `64`) {
3317	if (Opcode != TargetOpcode::G_SEXT_INREG) {
3318	// FIXME: Can we avoid manually doing this?
3319	if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3320	MRI)) {
3321	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3322	<< " operand\n");
3323	return false;
3324	}
3325	SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3326	{&AArch64::GPR64RegClass}, {})
3327	.addImm(`0`)
3328	.addUse(SrcReg)
3329	.addImm(AArch64::sub_32)
3330	.getReg(`0`);
3331	}
3332
3333	ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3334	{DefReg}, {SrcReg})
3335	.addImm(`0`)
3336	.addImm(SrcSize - `1`);
3337	} else if (DstSize <= `32`) {
3338	ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3339	{DefReg}, {SrcReg})
3340	.addImm(`0`)
3341	.addImm(SrcSize - `1`);
3342	} else {
3343	return false;
3344	}
3345
3346	constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3347	I.eraseFromParent();
3348	return true;
3349	}
3350
3351	case TargetOpcode::G_SITOFP:
3352	case TargetOpcode::G_UITOFP:
3353	case TargetOpcode::G_FPTOSI:
3354	case TargetOpcode::G_FPTOUI: {
3355	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg()),
3356	SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3357	const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy);
3358	if (NewOpc == Opcode)
3359	return false;
3360
3361	I.setDesc(TII.get(NewOpc));
3362	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3363	I.setFlags(MachineInstr::NoFPExcept);
3364
3365	return true;
3366	}
3367
3368	case TargetOpcode::G_FREEZE:
3369	return selectCopy(I, TII, MRI, TRI, RBI);
3370
3371	case TargetOpcode::G_INTTOPTR:
3372	// The importer is currently unable to import pointer types since they
3373	// didn't exist in SelectionDAG.
3374	return selectCopy(I, TII, MRI, TRI, RBI);
3375
3376	case TargetOpcode::G_BITCAST:
3377	// Imported SelectionDAG rules can handle every bitcast except those that
3378	// bitcast from a type to the same type. Ideally, these shouldn't occur
3379	// but we might not run an optimizer that deletes them. The other exception
3380	// is bitcasts involving pointer types, as SelectionDAG has no knowledge
3381	// of them.
3382	return selectCopy(I, TII, MRI, TRI, RBI);
3383
3384	case TargetOpcode::G_SELECT: {
3385	auto &Sel = cast<GSelect>(Val&: I);
3386	const Register CondReg = Sel.getCondReg();
3387	const Register TReg = Sel.getTrueReg();
3388	const Register FReg = Sel.getFalseReg();
3389
3390	if (tryOptSelect(Sel))
3391	return true;
3392
3393	// Make sure to use an unused vreg instead of wzr, so that the peephole
3394	// optimizations will be able to optimize these.
3395	Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3396	auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3397	.addImm(AArch64_AM::encodeLogicalImmediate(`1`, `32`));
3398	constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3399	if (!emitSelect(Dst: Sel.getReg(Idx: `0`), True: TReg, False: FReg, CC: AArch64CC::NE, MIB))
3400	return false;
3401	Sel.eraseFromParent();
3402	return true;
3403	}
3404	case TargetOpcode::G_ICMP: {
3405	if (Ty.isVector())
3406	return selectVectorICmp(I, MRI);
3407
3408	if (Ty != LLT::scalar(SizeInBits: `32`)) {
3409	LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3410	<< ", expected: " << LLT::scalar(`32`) << `'\n'`);
3411	return false;
3412	}
3413
3414	auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: `1`).getPredicate());
3415	const AArch64CC::CondCode InvCC =
3416	changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
3417	emitIntegerCompare(LHS&: I.getOperand(i: `2`), RHS&: I.getOperand(i: `3`), Predicate&: I.getOperand(i: `1`), MIRBuilder&: MIB);
3418	emitCSINC(/Dst=/I.getOperand(`0`).getReg(), /Src1=/AArch64::WZR,
3419	/Src2=/AArch64::WZR, InvCC, MIB);
3420	I.eraseFromParent();
3421	return true;
3422	}
3423
3424	case TargetOpcode::G_FCMP: {
3425	CmpInst::Predicate Pred =
3426	static_cast<CmpInst::Predicate>(I.getOperand(i: `1`).getPredicate());
3427	if (!emitFPCompare(LHS: I.getOperand(i: `2`).getReg(), RHS: I.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
3428	Pred) \|\|
3429	!emitCSetForFCmp(Dst: I.getOperand(i: `0`).getReg(), Pred, MIRBuilder&: MIB))
3430	return false;
3431	I.eraseFromParent();
3432	return true;
3433	}
3434	case TargetOpcode::G_VASTART:
3435	return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3436	: selectVaStartAAPCS(I, MF, MRI);
3437	case TargetOpcode::G_INTRINSIC:
3438	return selectIntrinsic(I, MRI);
3439	case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3440	return selectIntrinsicWithSideEffects(I, MRI);
3441	case TargetOpcode::G_IMPLICIT_DEF: {
3442	I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3443	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3444	const Register DstReg = I.getOperand(i: `0`).getReg();
3445	const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3446	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3447	RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI);
3448	return true;
3449	}
3450	case TargetOpcode::G_BLOCK_ADDR: {
3451	if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3452	materializeLargeCMVal(I, V: I.getOperand(i: `1`).getBlockAddress(), OpFlags: `0`);
3453	I.eraseFromParent();
3454	return true;
3455	} else {
3456	I.setDesc(TII.get(AArch64::MOVaddrBA));
3457	auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3458	I.getOperand(`0`).getReg())
3459	.addBlockAddress(I.getOperand(`1`).getBlockAddress(),
3460	/ Offset / `0`, AArch64II::MO_PAGE)
3461	.addBlockAddress(
3462	I.getOperand(`1`).getBlockAddress(), / Offset / `0`,
3463	AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3464	I.eraseFromParent();
3465	return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3466	}
3467	}
3468	case AArch64::G_DUP: {
3469	// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3470	// imported patterns. Do it manually here. Avoiding generating s16 gpr is
3471	// difficult because at RBS we may end up pessimizing the fpr case if we
3472	// decided to add an anyextend to fix this. Manual selection is the most
3473	// robust solution for now.
3474	if (RBI.getRegBank(I.getOperand(`1`).getReg(), MRI, TRI)->getID() !=
3475	AArch64::GPRRegBankID)
3476	return false; // We expect the fpr regbank case to be imported.
3477	LLT VecTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3478	if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`))
3479	I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3480	else if (VecTy == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`))
3481	I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3482	else if (VecTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`))
3483	I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3484	else if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`))
3485	I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3486	else
3487	return false;
3488	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3489	}
3490	case TargetOpcode::G_BUILD_VECTOR:
3491	return selectBuildVector(I, MRI);
3492	case TargetOpcode::G_MERGE_VALUES:
3493	return selectMergeValues(I, MRI);
3494	case TargetOpcode::G_UNMERGE_VALUES:
3495	return selectUnmergeValues(I, MRI);
3496	case TargetOpcode::G_SHUFFLE_VECTOR:
3497	return selectShuffleVector(I, MRI);
3498	case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3499	return selectExtractElt(I, MRI);
3500	case TargetOpcode::G_CONCAT_VECTORS:
3501	return selectConcatVectors(I, MRI);
3502	case TargetOpcode::G_JUMP_TABLE:
3503	return selectJumpTable(I, MRI);
3504	case TargetOpcode::G_MEMCPY:
3505	case TargetOpcode::G_MEMCPY_INLINE:
3506	case TargetOpcode::G_MEMMOVE:
3507	case TargetOpcode::G_MEMSET:
3508	assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3509	return selectMOPS(I, MRI);
3510	}
3511
3512	return false;
3513	}
3514
3515	bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3516	MachineIRBuilderState OldMIBState = MIB.getState();
3517	bool Success = select(I);
3518	MIB.setState(OldMIBState);
3519	return Success;
3520	}
3521
3522	bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3523	MachineRegisterInfo &MRI) {
3524	unsigned Mopcode;
3525	switch (GI.getOpcode()) {
3526	case TargetOpcode::G_MEMCPY:
3527	case TargetOpcode::G_MEMCPY_INLINE:
3528	Mopcode = AArch64::MOPSMemoryCopyPseudo;
3529	break;
3530	case TargetOpcode::G_MEMMOVE:
3531	Mopcode = AArch64::MOPSMemoryMovePseudo;
3532	break;
3533	case TargetOpcode::G_MEMSET:
3534	// For tagged memset see llvm.aarch64.mops.memset.tag
3535	Mopcode = AArch64::MOPSMemorySetPseudo;
3536	break;
3537	}
3538
3539	auto &DstPtr = GI.getOperand(i: `0`);
3540	auto &SrcOrVal = GI.getOperand(i: `1`);
3541	auto &Size = GI.getOperand(i: `2`);
3542
3543	// Create copies of the registers that can be clobbered.
3544	const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg());
3545	const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg());
3546	const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg());
3547
3548	const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3549	const auto &SrcValRegClass =
3550	IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3551
3552	// Constrain to specific registers
3553	RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3554	RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI);
3555	RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3556
3557	MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr);
3558	MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal);
3559	MIB.buildCopy(Res: SizeCopy, Op: Size);
3560
3561	// New instruction uses the copied registers because it must update them.
3562	// The defs are not used since they don't exist in G_MEM. They are still*
3563	// tied.
3564	// Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3565	Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3566	Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3567	if (IsSet) {
3568	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize},
3569	SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy});
3570	} else {
3571	Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3572	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize},
3573	SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy});
3574	}
3575
3576	GI.eraseFromParent();
3577	return true;
3578	}
3579
3580	bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3581	MachineRegisterInfo &MRI) {
3582	assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3583	Register JTAddr = I.getOperand(i: `0`).getReg();
3584	unsigned JTI = I.getOperand(i: `1`).getIndex();
3585	Register Index = I.getOperand(i: `2`).getReg();
3586
3587	Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3588	Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3589
3590	MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: `4`, PCRelSym: nullptr);
3591	auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3592	{TargetReg, ScratchReg}, {JTAddr, Index})
3593	.addJumpTableIndex(JTI);
3594	// Save the jump table info.
3595	MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {},
3596	SrcOps: {static_cast<int64_t>(JTI)});
3597	// Build the indirect branch.
3598	MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3599	I.eraseFromParent();
3600	return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3601	}
3602
3603	bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3604	MachineRegisterInfo &MRI) {
3605	assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3606	assert(I.getOperand(`1`).isJTI() && "Jump table op should have a JTI!");
3607
3608	Register DstReg = I.getOperand(i: `0`).getReg();
3609	unsigned JTI = I.getOperand(i: `1`).getIndex();
3610	// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3611	auto MovMI =
3612	MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3613	.addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3614	.addJumpTableIndex(JTI, AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3615	I.eraseFromParent();
3616	return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3617	}
3618
3619	bool AArch64InstructionSelector::selectTLSGlobalValue(
3620	MachineInstr &I, MachineRegisterInfo &MRI) {
3621	if (!STI.isTargetMachO())
3622	return false;
3623	MachineFunction &MF = *I.getParent()->getParent();
3624	MF.getFrameInfo().setAdjustsStack(true);
3625
3626	const auto &GlobalOp = I.getOperand(i: `1`);
3627	assert(GlobalOp.getOffset() == `0` &&
3628	"Shouldn't have an offset on TLS globals!");
3629	const GlobalValue &GV = *GlobalOp.getGlobal();
3630
3631	auto LoadGOT =
3632	MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3633	.addGlobalAddress(&GV, `0`, AArch64II::MO_TLS);
3634
3635	auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3636	{LoadGOT.getReg(`0`)})
3637	.addImm(`0`);
3638
3639	MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(`0`));
3640	// TLS calls preserve all registers except those that absolutely must be
3641	// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3642	// silly).
3643	MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3644	.addUse(AArch64::X0, RegState::Implicit)
3645	.addDef(AArch64::X0, RegState::Implicit)
3646	.addRegMask(TRI.getTLSCallPreservedMask());
3647
3648	MIB.buildCopy(I.getOperand(`0`).getReg(), Register(AArch64::X0));
3649	RBI.constrainGenericRegister(I.getOperand(`0`).getReg(), AArch64::GPR64RegClass,
3650	MRI);
3651	I.eraseFromParent();
3652	return true;
3653	}
3654
3655	bool AArch64InstructionSelector::selectVectorICmp(
3656	MachineInstr &I, MachineRegisterInfo &MRI) {
3657	Register DstReg = I.getOperand(i: `0`).getReg();
3658	LLT DstTy = MRI.getType(Reg: DstReg);
3659	Register SrcReg = I.getOperand(i: `2`).getReg();
3660	Register Src2Reg = I.getOperand(i: `3`).getReg();
3661	LLT SrcTy = MRI.getType(Reg: SrcReg);
3662
3663	unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3664	unsigned NumElts = DstTy.getNumElements();
3665
3666	// First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3667	// Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3668	// Third index is cc opcode:
3669	// 0 == eq
3670	// 1 == ugt
3671	// 2 == uge
3672	// 3 == ult
3673	// 4 == ule
3674	// 5 == sgt
3675	// 6 == sge
3676	// 7 == slt
3677	// 8 == sle
3678	// ne is done by negating 'eq' result.
3679
3680	// This table below assumes that for some comparisons the operands will be
3681	// commuted.
3682	// ult op == commute + ugt op
3683	// ule op == commute + uge op
3684	// slt op == commute + sgt op
3685	// sle op == commute + sge op
3686	unsigned PredIdx = `0`;
3687	bool SwapOperands = false;
3688	CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(i: `1`).getPredicate();
3689	switch (Pred) {
3690	case CmpInst::ICMP_NE:
3691	case CmpInst::ICMP_EQ:
3692	PredIdx = `0`;
3693	break;
3694	case CmpInst::ICMP_UGT:
3695	PredIdx = `1`;
3696	break;
3697	case CmpInst::ICMP_UGE:
3698	PredIdx = `2`;
3699	break;
3700	case CmpInst::ICMP_ULT:
3701	PredIdx = `3`;
3702	SwapOperands = true;
3703	break;
3704	case CmpInst::ICMP_ULE:
3705	PredIdx = `4`;
3706	SwapOperands = true;
3707	break;
3708	case CmpInst::ICMP_SGT:
3709	PredIdx = `5`;
3710	break;
3711	case CmpInst::ICMP_SGE:
3712	PredIdx = `6`;
3713	break;
3714	case CmpInst::ICMP_SLT:
3715	PredIdx = `7`;
3716	SwapOperands = true;
3717	break;
3718	case CmpInst::ICMP_SLE:
3719	PredIdx = `8`;
3720	SwapOperands = true;
3721	break;
3722	default:
3723	llvm_unreachable("Unhandled icmp predicate");
3724	return false;
3725	}
3726
3727	// This table obviously should be tablegen'd when we have our GISel native
3728	// tablegen selector.
3729
3730	static const unsigned OpcTable[`4`][`4`][`9`] = {
3731	{
3732	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3733	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3734	`0` / invalid /},
3735	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3736	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3737	`0` / invalid /},
3738	{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3739	AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3740	AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3741	{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3742	AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3743	AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3744	},
3745	{
3746	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3747	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3748	`0` / invalid /},
3749	{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3750	AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3751	AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3752	{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3753	AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3754	AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3755	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3756	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3757	`0` / invalid /}
3758	},
3759	{
3760	{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3761	AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3762	AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3763	{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3764	AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3765	AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3766	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3767	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3768	`0` / invalid /},
3769	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3770	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3771	`0` / invalid /}
3772	},
3773	{
3774	{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3775	AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3776	AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3777	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3778	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3779	`0` / invalid /},
3780	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3781	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3782	`0` / invalid /},
3783	{`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3784	`0` / invalid /, `0` / invalid /, `0` / invalid /, `0` / invalid /,
3785	`0` / invalid /}
3786	},
3787	};
3788	unsigned EltIdx = Log2_32(Value: SrcEltSize / `8`);
3789	unsigned NumEltsIdx = Log2_32(Value: NumElts / `2`);
3790	unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3791	if (!Opc) {
3792	LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3793	return false;
3794	}
3795
3796	const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3797	const TargetRegisterClass *SrcRC =
3798	getRegClassForTypeOnBank(Ty: SrcTy, RB: VecRB, GetAllRegSet: true);
3799	if (!SrcRC) {
3800	LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3801	return false;
3802	}
3803
3804	unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : `0`;
3805	if (SrcTy.getSizeInBits() == `128`)
3806	NotOpc = NotOpc ? AArch64::NOTv16i8 : `0`;
3807
3808	if (SwapOperands)
3809	std::swap(a&: SrcReg, b&: Src2Reg);
3810
3811	auto Cmp = MIB.buildInstr(Opc, DstOps: {SrcRC}, SrcOps: {SrcReg, Src2Reg});
3812	constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3813
3814	// Invert if we had a 'ne' cc.
3815	if (NotOpc) {
3816	Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3817	constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3818	} else {
3819	MIB.buildCopy(Res: DstReg, Op: Cmp.getReg(`0`));
3820	}
3821	RBI.constrainGenericRegister(Reg: DstReg, RC: *SrcRC, MRI);
3822	I.eraseFromParent();
3823	return true;
3824	}
3825
3826	MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3827	unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3828	MachineIRBuilder &MIRBuilder) const {
3829	auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {});
3830
3831	auto BuildFn = [&](unsigned SubregIndex) {
3832	auto Ins =
3833	MIRBuilder
3834	.buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar})
3835	.addImm(Val: SubregIndex);
3836	constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3837	constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3838	return &*Ins;
3839	};
3840
3841	switch (EltSize) {
3842	case `8`:
3843	return BuildFn(AArch64::bsub);
3844	case `16`:
3845	return BuildFn(AArch64::hsub);
3846	case `32`:
3847	return BuildFn(AArch64::ssub);
3848	case `64`:
3849	return BuildFn(AArch64::dsub);
3850	default:
3851	return nullptr;
3852	}
3853	}
3854
3855	MachineInstr *
3856	AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3857	MachineIRBuilder &MIB,
3858	MachineRegisterInfo &MRI) const {
3859	LLT DstTy = MRI.getType(Reg: DstReg);
3860	const TargetRegisterClass *RC =
3861	getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3862	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3863	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3864	return nullptr;
3865	}
3866	unsigned SubReg = `0`;
3867	if (!getSubRegForClass(RC, TRI, SubReg))
3868	return nullptr;
3869	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3870	LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3871	<< DstTy.getSizeInBits() << "\n");
3872	return nullptr;
3873	}
3874	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
3875	.addReg(RegNo: SrcReg, flags: `0`, SubReg);
3876	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
3877	return Copy;
3878	}
3879
3880	bool AArch64InstructionSelector::selectMergeValues(
3881	MachineInstr &I, MachineRegisterInfo &MRI) {
3882	assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3883	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3884	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3885	assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3886	const RegisterBank &RB = *RBI.getRegBank(I.getOperand(i: `1`).getReg(), MRI, TRI);
3887
3888	if (I.getNumOperands() != `3`)
3889	return false;
3890
3891	// Merging 2 s64s into an s128.
3892	if (DstTy == LLT::scalar(SizeInBits: `128`)) {
3893	if (SrcTy.getSizeInBits() != `64`)
3894	return false;
3895	Register DstReg = I.getOperand(i: `0`).getReg();
3896	Register Src1Reg = I.getOperand(i: `1`).getReg();
3897	Register Src2Reg = I.getOperand(i: `2`).getReg();
3898	auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {});
3899	MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: `0`), EltReg: Src1Reg,
3900	/ LaneIdx / `0`, RB, MIRBuilder&: MIB);
3901	if (!InsMI)
3902	return false;
3903	MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: `0`).getReg(),
3904	EltReg: Src2Reg, / LaneIdx / `1`, RB, MIRBuilder&: MIB);
3905	if (!Ins2MI)
3906	return false;
3907	constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3908	constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3909	I.eraseFromParent();
3910	return true;
3911	}
3912
3913	if (RB.getID() != AArch64::GPRRegBankID)
3914	return false;
3915
3916	if (DstTy.getSizeInBits() != `64` \|\| SrcTy.getSizeInBits() != `32`)
3917	return false;
3918
3919	auto *DstRC = &AArch64::GPR64RegClass;
3920	Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3921	MachineInstr &SubRegMI = BuildMI(I.getParent(), I, I.getDebugLoc(),
3922	TII.get(TargetOpcode::SUBREG_TO_REG))
3923	.addDef(SubToRegDef)
3924	.addImm(`0`)
3925	.addUse(I.getOperand(`1`).getReg())
3926	.addImm(AArch64::sub_32);
3927	Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3928	// Need to anyext the second scalar before we can use bfm
3929	MachineInstr &SubRegMI2 = BuildMI(I.getParent(), I, I.getDebugLoc(),
3930	TII.get(TargetOpcode::SUBREG_TO_REG))
3931	.addDef(SubToRegDef2)
3932	.addImm(`0`)
3933	.addUse(I.getOperand(`2`).getReg())
3934	.addImm(AArch64::sub_32);
3935	MachineInstr &BFM =
3936	BuildMI(I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3937	.addDef(I.getOperand(`0`).getReg())
3938	.addUse(SubToRegDef)
3939	.addUse(SubToRegDef2)
3940	.addImm(`32`)
3941	.addImm(`31`);
3942	constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3943	constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3944	constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3945	I.eraseFromParent();
3946	return true;
3947	}
3948
3949	static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3950	const unsigned EltSize) {
3951	// Choose a lane copy opcode and subregister based off of the size of the
3952	// vector's elements.
3953	switch (EltSize) {
3954	case `8`:
3955	CopyOpc = AArch64::DUPi8;
3956	ExtractSubReg = AArch64::bsub;
3957	break;
3958	case `16`:
3959	CopyOpc = AArch64::DUPi16;
3960	ExtractSubReg = AArch64::hsub;
3961	break;
3962	case `32`:
3963	CopyOpc = AArch64::DUPi32;
3964	ExtractSubReg = AArch64::ssub;
3965	break;
3966	case `64`:
3967	CopyOpc = AArch64::DUPi64;
3968	ExtractSubReg = AArch64::dsub;
3969	break;
3970	default:
3971	// Unknown size, bail out.
3972	LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3973	return false;
3974	}
3975	return true;
3976	}
3977
3978	MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3979	std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3980	Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3981	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3982	unsigned CopyOpc = `0`;
3983	unsigned ExtractSubReg = `0`;
3984	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) {
3985	LLVM_DEBUG(
3986	dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3987	return nullptr;
3988	}
3989
3990	const TargetRegisterClass *DstRC =
3991	getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true);
3992	if (!DstRC) {
3993	LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3994	return nullptr;
3995	}
3996
3997	const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3998	const LLT &VecTy = MRI.getType(Reg: VecReg);
3999	const TargetRegisterClass *VecRC =
4000	getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true);
4001	if (!VecRC) {
4002	LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4003	return nullptr;
4004	}
4005
4006	// The register that we're going to copy into.
4007	Register InsertReg = VecReg;
4008	if (!DstReg)
4009	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
4010	// If the lane index is 0, we just use a subregister COPY.
4011	if (LaneIdx == `0`) {
4012	auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {})
4013	.addReg(RegNo: VecReg, flags: `0`, SubReg: ExtractSubReg);
4014	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
4015	return &*Copy;
4016	}
4017
4018	// Lane copies require 128-bit wide registers. If we're dealing with an
4019	// unpacked vector, then we need to move up to that width. Insert an implicit
4020	// def and a subregister insert to get us there.
4021	if (VecTy.getSizeInBits() != `128`) {
4022	MachineInstr *ScalarToVector = emitScalarToVector(
4023	VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4024	if (!ScalarToVector)
4025	return nullptr;
4026	InsertReg = ScalarToVector->getOperand(i: `0`).getReg();
4027	}
4028
4029	MachineInstr *LaneCopyMI =
4030	MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx);
4031	constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4032
4033	// Make sure that we actually constrain the initial copy.
4034	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
4035	return LaneCopyMI;
4036	}
4037
4038	bool AArch64InstructionSelector::selectExtractElt(
4039	MachineInstr &I, MachineRegisterInfo &MRI) {
4040	assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4041	"unexpected opcode!");
4042	Register DstReg = I.getOperand(i: `0`).getReg();
4043	const LLT NarrowTy = MRI.getType(Reg: DstReg);
4044	const Register SrcReg = I.getOperand(i: `1`).getReg();
4045	const LLT WideTy = MRI.getType(Reg: SrcReg);
4046	(void)WideTy;
4047	assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4048	"source register size too small!");
4049	assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4050
4051	// Need the lane index to determine the correct copy opcode.
4052	MachineOperand &LaneIdxOp = I.getOperand(i: `2`);
4053	assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4054
4055	if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4056	LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4057	return false;
4058	}
4059
4060	// Find the index to extract from.
4061	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI);
4062	if (!VRegAndVal)
4063	return false;
4064	unsigned LaneIdx = VRegAndVal ->Value.getSExtValue();
4065
4066
4067	const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4068	MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg,
4069	LaneIdx, MIRBuilder&: MIB);
4070	if (!Extract)
4071	return false;
4072
4073	I.eraseFromParent();
4074	return true;
4075	}
4076
4077	bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4078	MachineInstr &I, MachineRegisterInfo &MRI) {
4079	unsigned NumElts = I.getNumOperands() - `1`;
4080	Register SrcReg = I.getOperand(i: NumElts).getReg();
4081	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
4082	const LLT SrcTy = MRI.getType(Reg: SrcReg);
4083
4084	assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4085	if (SrcTy.getSizeInBits() > `128`) {
4086	LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4087	return false;
4088	}
4089
4090	// We implement a split vector operation by treating the sub-vectors as
4091	// scalars and extracting them.
4092	const RegisterBank &DstRB =
4093	*RBI.getRegBank(I.getOperand(i: `0`).getReg(), MRI, TRI);
4094	for (unsigned OpIdx = `0`; OpIdx < NumElts; ++OpIdx) {
4095	Register Dst = I.getOperand(i: OpIdx).getReg();
4096	MachineInstr *Extract =
4097	emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB);
4098	if (!Extract)
4099	return false;
4100	}
4101	I.eraseFromParent();
4102	return true;
4103	}
4104
4105	bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4106	MachineRegisterInfo &MRI) {
4107	assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4108	"unexpected opcode");
4109
4110	// TODO: Handle unmerging into GPRs and from scalars to scalars.
4111	if (RBI.getRegBank(I.getOperand(`0`).getReg(), MRI, TRI)->getID() !=
4112	AArch64::FPRRegBankID \|\|
4113	RBI.getRegBank(I.getOperand(`1`).getReg(), MRI, TRI)->getID() !=
4114	AArch64::FPRRegBankID) {
4115	LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4116	"currently unsupported.\n");
4117	return false;
4118	}
4119
4120	// The last operand is the vector source register, and every other operand is
4121	// a register to unpack into.
4122	unsigned NumElts = I.getNumOperands() - `1`;
4123	Register SrcReg = I.getOperand(i: NumElts).getReg();
4124	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
4125	const LLT WideTy = MRI.getType(Reg: SrcReg);
4126	(void)WideTy;
4127	assert((WideTy.isVector() \|\| WideTy.getSizeInBits() == `128`) &&
4128	"can only unmerge from vector or s128 types!");
4129	assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4130	"source register size too small!");
4131
4132	if (!NarrowTy.isScalar())
4133	return selectSplitVectorUnmerge(I, MRI);
4134
4135	// Choose a lane copy opcode and subregister based off of the size of the
4136	// vector's elements.
4137	unsigned CopyOpc = `0`;
4138	unsigned ExtractSubReg = `0`;
4139	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits()))
4140	return false;
4141
4142	// Set up for the lane copies.
4143	MachineBasicBlock &MBB = *I.getParent();
4144
4145	// Stores the registers we'll be copying from.
4146	SmallVector<Register, `4`> InsertRegs;
4147
4148	// We'll use the first register twice, so we only need NumElts-1 registers.
4149	unsigned NumInsertRegs = NumElts - `1`;
4150
4151	// If our elements fit into exactly 128 bits, then we can copy from the source
4152	// directly. Otherwise, we need to do a bit of setup with some subregister
4153	// inserts.
4154	if (NarrowTy.getSizeInBits() * NumElts == `128`) {
4155	InsertRegs = SmallVector<Register, `4`>(NumInsertRegs, SrcReg);
4156	} else {
4157	// No. We have to perform subregister inserts. For each insert, create an
4158	// implicit def and a subregister insert, and save the register we create.
4159	const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4160	LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()),
4161	*RBI.getRegBank(SrcReg, MRI, TRI));
4162	unsigned SubReg = `0`;
4163	bool Found = getSubRegForClass(RC, TRI, SubReg);
4164	(void)Found;
4165	assert(Found && "expected to find last operand's subeg idx");
4166	for (unsigned Idx = `0`; Idx < NumInsertRegs; ++Idx) {
4167	Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4168	MachineInstr &ImpDefMI =
4169	*BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4170	ImpDefReg);
4171
4172	// Now, create the subregister insert from SrcReg.
4173	Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4174	MachineInstr &InsMI =
4175	*BuildMI(MBB, I, I.getDebugLoc(),
4176	TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4177	.addUse(ImpDefReg)
4178	.addUse(SrcReg)
4179	.addImm(SubReg);
4180
4181	constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4182	constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4183
4184	// Save the register so that we can copy from it after.
4185	InsertRegs.push_back(Elt: InsertReg);
4186	}
4187	}
4188
4189	// Now that we've created any necessary subregister inserts, we can
4190	// create the copies.
4191	//
4192	// Perform the first copy separately as a subregister copy.
4193	Register CopyTo = I.getOperand(i: `0`).getReg();
4194	auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {})
4195	.addReg(RegNo: InsertRegs [`0`], flags: `0`, SubReg: ExtractSubReg);
4196	constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4197
4198	// Now, perform the remaining copies as vector lane copies.
4199	unsigned LaneIdx = `1`;
4200	for (Register InsReg : InsertRegs) {
4201	Register CopyTo = I.getOperand(i: LaneIdx).getReg();
4202	MachineInstr &CopyInst =
4203	*BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4204	.addUse(InsReg)
4205	.addImm(LaneIdx);
4206	constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4207	++LaneIdx;
4208	}
4209
4210	// Separately constrain the first copy's destination. Because of the
4211	// limitation in constrainOperandRegClass, we can't guarantee that this will
4212	// actually be constrained. So, do it ourselves using the second operand.
4213	const TargetRegisterClass *RC =
4214	MRI.getRegClassOrNull(Reg: I.getOperand(i: `1`).getReg());
4215	if (!RC) {
4216	LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4217	return false;
4218	}
4219
4220	RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI);
4221	I.eraseFromParent();
4222	return true;
4223	}
4224
4225	bool AArch64InstructionSelector::selectConcatVectors(
4226	MachineInstr &I, MachineRegisterInfo &MRI) {
4227	assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4228	"Unexpected opcode");
4229	Register Dst = I.getOperand(i: `0`).getReg();
4230	Register Op1 = I.getOperand(i: `1`).getReg();
4231	Register Op2 = I.getOperand(i: `2`).getReg();
4232	MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB);
4233	if (!ConcatMI)
4234	return false;
4235	I.eraseFromParent();
4236	return true;
4237	}
4238
4239	unsigned
4240	AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4241	MachineFunction &MF) const {
4242	Type *CPTy = CPVal->getType();
4243	Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy);
4244
4245	MachineConstantPool *MCP = MF.getConstantPool();
4246	return MCP->getConstantPoolIndex(C: CPVal, Alignment);
4247	}
4248
4249	MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4250	const Constant CPVal, MachineIRBuilder &MIRBuilder) const* {
4251	const TargetRegisterClass *RC;
4252	unsigned Opc;
4253	bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4254	unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType());
4255	switch (Size) {
4256	case `16`:
4257	RC = &AArch64::FPR128RegClass;
4258	Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4259	break;
4260	case `8`:
4261	RC = &AArch64::FPR64RegClass;
4262	Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4263	break;
4264	case `4`:
4265	RC = &AArch64::FPR32RegClass;
4266	Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4267	break;
4268	case `2`:
4269	RC = &AArch64::FPR16RegClass;
4270	Opc = AArch64::LDRHui;
4271	break;
4272	default:
4273	LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4274	<< *CPVal->getType());
4275	return nullptr;
4276	}
4277
4278	MachineInstr LoadMI = nullptr*;
4279	auto &MF = MIRBuilder.getMF();
4280	unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4281	if (IsTiny && (Size == `16` \|\| Size == `8` \|\| Size == `4`)) {
4282	// Use load(literal) for tiny code model.
4283	LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx);
4284	} else {
4285	auto Adrp =
4286	MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4287	.addConstantPoolIndex(CPIdx, `0`, AArch64II::MO_PAGE);
4288
4289	LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4290	.addConstantPoolIndex(
4291	CPIdx, `0`, AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
4292
4293	constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4294	}
4295
4296	MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4297	LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo,
4298	F: MachineMemOperand::MOLoad,
4299	Size, BaseAlignment: Align (Size)));
4300	constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4301	return LoadMI;
4302	}
4303
4304	/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4305	/// size and RB.
4306	static std::pair<unsigned, unsigned>
4307	getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4308	unsigned Opc, SubregIdx;
4309	if (RB.getID() == AArch64::GPRRegBankID) {
4310	if (EltSize == `8`) {
4311	Opc = AArch64::INSvi8gpr;
4312	SubregIdx = AArch64::bsub;
4313	} else if (EltSize == `16`) {
4314	Opc = AArch64::INSvi16gpr;
4315	SubregIdx = AArch64::ssub;
4316	} else if (EltSize == `32`) {
4317	Opc = AArch64::INSvi32gpr;
4318	SubregIdx = AArch64::ssub;
4319	} else if (EltSize == `64`) {
4320	Opc = AArch64::INSvi64gpr;
4321	SubregIdx = AArch64::dsub;
4322	} else {
4323	llvm_unreachable("invalid elt size!");
4324	}
4325	} else {
4326	if (EltSize == `8`) {
4327	Opc = AArch64::INSvi8lane;
4328	SubregIdx = AArch64::bsub;
4329	} else if (EltSize == `16`) {
4330	Opc = AArch64::INSvi16lane;
4331	SubregIdx = AArch64::hsub;
4332	} else if (EltSize == `32`) {
4333	Opc = AArch64::INSvi32lane;
4334	SubregIdx = AArch64::ssub;
4335	} else if (EltSize == `64`) {
4336	Opc = AArch64::INSvi64lane;
4337	SubregIdx = AArch64::dsub;
4338	} else {
4339	llvm_unreachable("invalid elt size!");
4340	}
4341	}
4342	return std::make_pair(x&: Opc, y&: SubregIdx);
4343	}
4344
4345	MachineInstr *AArch64InstructionSelector::emitInstr(
4346	unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4347	std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4348	const ComplexRendererFns &RenderFns) const {
4349	assert(Opcode && "Expected an opcode?");
4350	assert(!isPreISelGenericOpcode(Opcode) &&
4351	"Function should only be used to produce selected instructions!");
4352	auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps);
4353	if (RenderFns)
4354	for (auto &Fn : *RenderFns)
4355	Fn (MI);
4356	constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4357	return &*MI;
4358	}
4359
4360	MachineInstr *AArch64InstructionSelector::emitAddSub(
4361	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
4362	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4363	MachineIRBuilder &MIRBuilder) const {
4364	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4365	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4366	auto Ty = MRI.getType(Reg: LHS.getReg());
4367	assert(!Ty.isVector() && "Expected a scalar or pointer?");
4368	unsigned Size = Ty.getSizeInBits();
4369	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit type only");
4370	bool Is32Bit = Size == `32`;
4371
4372	// INSTRri form with positive arithmetic immediate.
4373	if (auto Fns = selectArithImmed(Root&: RHS))
4374	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`0`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4375	MIRBuilder, RenderFns: Fns);
4376
4377	// INSTRri form with negative arithmetic immediate.
4378	if (auto Fns = selectNegArithImmed(Root&: RHS))
4379	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`3`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4380	MIRBuilder, RenderFns: Fns);
4381
4382	// INSTRrx form.
4383	if (auto Fns = selectArithExtendedRegister(Root&: RHS))
4384	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`4`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4385	MIRBuilder, RenderFns: Fns);
4386
4387	// INSTRrs form.
4388	if (auto Fns = selectShiftedRegister(Root&: RHS))
4389	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`1`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4390	MIRBuilder, RenderFns: Fns);
4391	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`2`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS},
4392	MIRBuilder);
4393	}
4394
4395	MachineInstr *
4396	AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4397	MachineOperand &RHS,
4398	MachineIRBuilder &MIRBuilder) const {
4399	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4400	{{AArch64::ADDXri, AArch64::ADDWri},
4401	{AArch64::ADDXrs, AArch64::ADDWrs},
4402	{AArch64::ADDXrr, AArch64::ADDWrr},
4403	{AArch64::SUBXri, AArch64::SUBWri},
4404	{AArch64::ADDXrx, AArch64::ADDWrx}}};
4405	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder);
4406	}
4407
4408	MachineInstr *
4409	AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4410	MachineOperand &RHS,
4411	MachineIRBuilder &MIRBuilder) const {
4412	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4413	{{AArch64::ADDSXri, AArch64::ADDSWri},
4414	{AArch64::ADDSXrs, AArch64::ADDSWrs},
4415	{AArch64::ADDSXrr, AArch64::ADDSWrr},
4416	{AArch64::SUBSXri, AArch64::SUBSWri},
4417	{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4418	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4419	}
4420
4421	MachineInstr *
4422	AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4423	MachineOperand &RHS,
4424	MachineIRBuilder &MIRBuilder) const {
4425	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4426	{{AArch64::SUBSXri, AArch64::SUBSWri},
4427	{AArch64::SUBSXrs, AArch64::SUBSWrs},
4428	{AArch64::SUBSXrr, AArch64::SUBSWrr},
4429	{AArch64::ADDSXri, AArch64::ADDSWri},
4430	{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4431	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4432	}
4433
4434	MachineInstr *
4435	AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4436	MachineOperand &RHS,
4437	MachineIRBuilder &MIRBuilder) const {
4438	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4439	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4440	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4441	static const unsigned OpcTable[`2`] = {AArch64::ADCSXr, AArch64::ADCSWr};
4442	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4443	}
4444
4445	MachineInstr *
4446	AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4447	MachineOperand &RHS,
4448	MachineIRBuilder &MIRBuilder) const {
4449	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4450	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4451	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4452	static const unsigned OpcTable[`2`] = {AArch64::SBCSXr, AArch64::SBCSWr};
4453	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4454	}
4455
4456	MachineInstr *
4457	AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4458	MachineIRBuilder &MIRBuilder) const {
4459	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4460	bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4461	auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4462	return emitADDS(Dst: MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4463	}
4464
4465	MachineInstr *
4466	AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4467	MachineIRBuilder &MIRBuilder) const {
4468	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4469	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4470	LLT Ty = MRI.getType(Reg: LHS.getReg());
4471	unsigned RegSize = Ty.getSizeInBits();
4472	bool Is32Bit = (RegSize == `32`);
4473	const unsigned OpcTable[`3`][`2`] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4474	{AArch64::ANDSXrs, AArch64::ANDSWrs},
4475	{AArch64::ANDSXrr, AArch64::ANDSWrr}};
4476	// ANDS needs a logical immediate for its immediate form. Check if we can
4477	// fold one in.
4478	if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) {
4479	int64_t Imm = ValAndVReg ->Value.getSExtValue();
4480
4481	if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) {
4482	auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[`0`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS});
4483	TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
4484	constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4485	return &*TstMI;
4486	}
4487	}
4488
4489	if (auto Fns = selectLogicalShiftedRegister(Root&: RHS))
4490	return emitInstr(Opcode: OpcTable[`1`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns);
4491	return emitInstr(Opcode: OpcTable[`2`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder);
4492	}
4493
4494	MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4495	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4496	MachineIRBuilder &MIRBuilder) const {
4497	assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4498	assert(Predicate.isPredicate() && "Expected predicate?");
4499	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500	LLT CmpTy = MRI.getType(Reg: LHS.getReg());
4501	assert(!CmpTy.isVector() && "Expected scalar or pointer");
4502	unsigned Size = CmpTy.getSizeInBits();
4503	(void)Size;
4504	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit LHS/RHS?");
4505	// Fold the compare into a cmn or tst if possible.
4506	if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4507	return FoldCmp;
4508	auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg());
4509	return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4510	}
4511
4512	MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4513	Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4514	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4515	#ifndef NDEBUG
4516	LLT Ty = MRI.getType(Reg: Dst);
4517	assert(!Ty.isVector() && Ty.getSizeInBits() == `32` &&
4518	"Expected a 32-bit scalar register?");
4519	#endif
4520	const Register ZReg = AArch64::WZR;
4521	AArch64CC::CondCode CC1, CC2;
4522	changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
4523	auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1);
4524	if (CC2 == AArch64CC::AL)
4525	return emitCSINC(/Dst=/Dst, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1,
4526	MIRBuilder);
4527	const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4528	Register Def1Reg = MRI.createVirtualRegister(RegClass: RC);
4529	Register Def2Reg = MRI.createVirtualRegister(RegClass: RC);
4530	auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2);
4531	emitCSINC(/Dst=/Def1Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1, MIRBuilder);
4532	emitCSINC(/Dst=/Def2Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC2, MIRBuilder);
4533	auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4534	constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4535	return &*OrMI;
4536	}
4537
4538	MachineInstr *AArch64InstructionSelector::emitFPCompare(
4539	Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4540	std::optional<CmpInst::Predicate> Pred) const {
4541	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4542	LLT Ty = MRI.getType(Reg: LHS);
4543	if (Ty.isVector())
4544	return nullptr;
4545	unsigned OpSize = Ty.getSizeInBits();
4546	assert(OpSize == `16` \|\| OpSize == `32` \|\| OpSize == `64`);
4547
4548	// If this is a compare against +0.0, then we don't have
4549	// to explicitly materialize a constant.
4550	const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI);
4551	bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4552
4553	auto IsEqualityPred = [](CmpInst::Predicate P) {
4554	return P == CmpInst::FCMP_OEQ \|\| P == CmpInst::FCMP_ONE \|\|
4555	P == CmpInst::FCMP_UEQ \|\| P == CmpInst::FCMP_UNE;
4556	};
4557	if (!ShouldUseImm && Pred && IsEqualityPred (*Pred)) {
4558	// Try commutating the operands.
4559	const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI);
4560	if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4561	ShouldUseImm = true;
4562	std::swap(a&: LHS, b&: RHS);
4563	}
4564	}
4565	unsigned CmpOpcTbl[`2`][`3`] = {
4566	{AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4567	{AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4568	unsigned CmpOpc =
4569	CmpOpcTbl[ShouldUseImm][OpSize == `16` ? `0` : (OpSize == `32` ? `1` : `2`)];
4570
4571	// Partially build the compare. Decide if we need to add a use for the
4572	// third operand based off whether or not we're comparing against 0.0.
4573	auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS);
4574	CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4575	if (!ShouldUseImm)
4576	CmpMI.addUse(RegNo: RHS);
4577	constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4578	return &*CmpMI;
4579	}
4580
4581	MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4582	std::optional<Register> Dst, Register Op1, Register Op2,
4583	MachineIRBuilder &MIRBuilder) const {
4584	// We implement a vector concat by:
4585	// 1. Use scalar_to_vector to insert the lower vector into the larger dest
4586	// 2. Insert the upper vector into the destination's upper element
4587	// TODO: some of this code is common with G_BUILD_VECTOR handling.
4588	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4589
4590	const LLT Op1Ty = MRI.getType(Reg: Op1);
4591	const LLT Op2Ty = MRI.getType(Reg: Op2);
4592
4593	if (Op1Ty != Op2Ty) {
4594	LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4595	return nullptr;
4596	}
4597	assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4598
4599	if (Op1Ty.getSizeInBits() >= `128`) {
4600	LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4601	return nullptr;
4602	}
4603
4604	// At the moment we just support 64 bit vector concats.
4605	if (Op1Ty.getSizeInBits() != `64`) {
4606	LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4607	return nullptr;
4608	}
4609
4610	const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits());
4611	const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4612	const TargetRegisterClass *DstRC =
4613	getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: `2`), RB: FPRBank);
4614
4615	MachineInstr *WidenedOp1 =
4616	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder);
4617	MachineInstr *WidenedOp2 =
4618	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder);
4619	if (!WidenedOp1 \|\| !WidenedOp2) {
4620	LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4621	return nullptr;
4622	}
4623
4624	// Now do the insert of the upper element.
4625	unsigned InsertOpc, InsSubRegIdx;
4626	std::tie(args&: InsertOpc, args&: InsSubRegIdx) =
4627	getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits());
4628
4629	if (!Dst)
4630	Dst = MRI.createVirtualRegister(RegClass: DstRC);
4631	auto InsElt =
4632	MIRBuilder
4633	.buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: `0`).getReg()})
4634	.addImm(Val: `1`) / Lane index /
4635	.addUse(RegNo: WidenedOp2->getOperand(i: `0`).getReg())
4636	.addImm(Val: `0`);
4637	constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4638	return &*InsElt;
4639	}
4640
4641	MachineInstr *
4642	AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4643	Register Src2, AArch64CC::CondCode Pred,
4644	MachineIRBuilder &MIRBuilder) const {
4645	auto &MRI = *MIRBuilder.getMRI();
4646	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst);
4647	// If we used a register class, then this won't necessarily have an LLT.
4648	// Compute the size based off whether or not we have a class or bank.
4649	unsigned Size;
4650	if (const auto RC = RegClassOrBank.dyn_cast<const* TargetRegisterClass *>())
4651	Size = TRI.getRegSizeInBits(*RC);
4652	else
4653	Size = MRI.getType(Reg: Dst).getSizeInBits();
4654	// Some opcodes use s1.
4655	assert(Size <= `64` && "Expected 64 bits or less only!");
4656	static const unsigned OpcTable[`2`] = {AArch64::CSINCWr, AArch64::CSINCXr};
4657	unsigned Opc = OpcTable[Size == `64`];
4658	auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred);
4659	constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4660	return &*CSINC;
4661	}
4662
4663	MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4664	Register CarryReg) {
4665	MachineRegisterInfo *MRI = MIB.getMRI();
4666	unsigned Opcode = I.getOpcode();
4667
4668	// If the instruction is a SUB, we need to negate the carry,
4669	// because borrowing is indicated by carry-flag == 0.
4670	bool NeedsNegatedCarry =
4671	(Opcode == TargetOpcode::G_USUBE \|\| Opcode == TargetOpcode::G_SSUBE);
4672
4673	// If the previous instruction will already produce the correct carry, do not
4674	// emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4675	// generated during legalization of wide add/sub. This optimization depends on
4676	// these sequences not being interrupted by other instructions.
4677	// We have to select the previous instruction before the carry-using
4678	// instruction is deleted by the calling function, otherwise the previous
4679	// instruction might become dead and would get deleted.
4680	MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg);
4681	if (SrcMI == I.getPrevNode()) {
4682	if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) {
4683	bool ProducesNegatedCarry = CarrySrcMI->isSub();
4684	if (NeedsNegatedCarry == ProducesNegatedCarry &&
4685	CarrySrcMI->isUnsigned() &&
4686	CarrySrcMI->getCarryOutReg() == CarryReg &&
4687	selectAndRestoreState(I&: *SrcMI))
4688	return nullptr;
4689	}
4690	}
4691
4692	Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4693
4694	if (NeedsNegatedCarry) {
4695	// (0 - Carry) sets !C in NZCV when Carry == 1
4696	Register ZReg = AArch64::WZR;
4697	return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4698	}
4699
4700	// (Carry - 1) sets !C in NZCV when Carry == 0
4701	auto Fns = select12BitValueWithLeftShift(Immed: `1`);
4702	return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4703	}
4704
4705	bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4706	MachineRegisterInfo &MRI) {
4707	auto &CarryMI = cast<GAddSubCarryOut>(Val&: I);
4708
4709	if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) {
4710	// Set NZCV carry according to carry-in VReg
4711	emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg());
4712	}
4713
4714	// Emit the operation and get the correct condition code.
4715	auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(),
4716	LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB);
4717
4718	Register CarryOutReg = CarryMI.getCarryOutReg();
4719
4720	// Don't convert carry-out to VReg if it is never used
4721	if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) {
4722	// Now, put the overflow result in the register given by the first operand
4723	// to the overflow op. CSINC increments the result when the predicate is
4724	// false, so to get the increment when it's true, we need to use the
4725	// inverse. In this case, we want to increment when carry is set.
4726	Register ZReg = AArch64::WZR;
4727	emitCSINC(/Dst=/CarryOutReg, /Src1=/ZReg, /Src2=/ZReg,
4728	Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB);
4729	}
4730
4731	I.eraseFromParent();
4732	return true;
4733	}
4734
4735	std::pair<MachineInstr *, AArch64CC::CondCode>
4736	AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4737	MachineOperand &LHS,
4738	MachineOperand &RHS,
4739	MachineIRBuilder &MIRBuilder) const {
4740	switch (Opcode) {
4741	default:
4742	llvm_unreachable("Unexpected opcode!");
4743	case TargetOpcode::G_SADDO:
4744	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4745	case TargetOpcode::G_UADDO:
4746	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4747	case TargetOpcode::G_SSUBO:
4748	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4749	case TargetOpcode::G_USUBO:
4750	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4751	case TargetOpcode::G_SADDE:
4752	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4753	case TargetOpcode::G_UADDE:
4754	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4755	case TargetOpcode::G_SSUBE:
4756	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4757	case TargetOpcode::G_USUBE:
4758	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4759	}
4760	}
4761
4762	/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4763	/// expressed as a conjunction.
4764	/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4765	/// changing the conditions on the CMP tests.
4766	/// (this means we can call emitConjunctionRec() with
4767	/// Negate==true on this sub-tree)
4768	/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4769	/// cannot do the negation naturally. We are required to
4770	/// emit the subtree first in this case.
4771	/// \param WillNegate Is true if are called when the result of this
4772	/// subexpression must be negated. This happens when the
4773	/// outer expression is an OR. We can use this fact to know
4774	/// that we have a double negation (or (or ...) ...) that
4775	/// can be implemented for free.
4776	static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4777	bool WillNegate, MachineRegisterInfo &MRI,
4778	unsigned Depth = `0`) {
4779	if (!MRI.hasOneNonDBGUse(RegNo: Val))
4780	return false;
4781	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4782	unsigned Opcode = ValDef->getOpcode();
4783	if (isa<GAnyCmp>(Val: ValDef)) {
4784	CanNegate = true;
4785	MustBeFirst = false;
4786	return true;
4787	}
4788	// Protect against exponential runtime and stack overflow.
4789	if (Depth > `6`)
4790	return false;
4791	if (Opcode == TargetOpcode::G_AND \|\| Opcode == TargetOpcode::G_OR) {
4792	bool IsOR = Opcode == TargetOpcode::G_OR;
4793	Register O0 = ValDef->getOperand(i: `1`).getReg();
4794	Register O1 = ValDef->getOperand(i: `2`).getReg();
4795	bool CanNegateL;
4796	bool MustBeFirstL;
4797	if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4798	return false;
4799	bool CanNegateR;
4800	bool MustBeFirstR;
4801	if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4802	return false;
4803
4804	if (MustBeFirstL && MustBeFirstR)
4805	return false;
4806
4807	if (IsOR) {
4808	// For an OR expression we need to be able to naturally negate at least
4809	// one side or we cannot do the transformation at all.
4810	if (!CanNegateL && !CanNegateR)
4811	return false;
4812	// If we the result of the OR will be negated and we can naturally negate
4813	// the leaves, then this sub-tree as a whole negates naturally.
4814	CanNegate = WillNegate && CanNegateL && CanNegateR;
4815	// If we cannot naturally negate the whole sub-tree, then this must be
4816	// emitted first.
4817	MustBeFirst = !CanNegate;
4818	} else {
4819	assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4820	// We cannot naturally negate an AND operation.
4821	CanNegate = false;
4822	MustBeFirst = MustBeFirstL \|\| MustBeFirstR;
4823	}
4824	return true;
4825	}
4826	return false;
4827	}
4828
4829	MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4830	Register LHS, Register RHS, CmpInst::Predicate CC,
4831	AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4832	MachineIRBuilder &MIB) const {
4833	// TODO: emit CMN as an optimization.
4834	auto &MRI = *MIB.getMRI();
4835	LLT OpTy = MRI.getType(Reg: LHS);
4836	unsigned CCmpOpc;
4837	std::optional<ValueAndVReg> C;
4838	if (CmpInst::isIntPredicate(P: CC)) {
4839	assert(OpTy.getSizeInBits() == `32` \|\| OpTy.getSizeInBits() == `64`);
4840	C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
4841	if (C && C->Value.ult(`32`))
4842	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWi : AArch64::CCMPXi;
4843	else
4844	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWr : AArch64::CCMPXr;
4845	} else {
4846	assert(OpTy.getSizeInBits() == `16` \|\| OpTy.getSizeInBits() == `32` \|\|
4847	OpTy.getSizeInBits() == `64`);
4848	switch (OpTy.getSizeInBits()) {
4849	case `16`:
4850	assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4851	CCmpOpc = AArch64::FCCMPHrr;
4852	break;
4853	case `32`:
4854	CCmpOpc = AArch64::FCCMPSrr;
4855	break;
4856	case `64`:
4857	CCmpOpc = AArch64::FCCMPDrr;
4858	break;
4859	default:
4860	return nullptr;
4861	}
4862	}
4863	AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4864	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC);
4865	auto CCmp =
4866	MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS});
4867	if (CCmpOpc == AArch64::CCMPWi \|\| CCmpOpc == AArch64::CCMPXi)
4868	CCmp.addImm(Val: C ->Value.getZExtValue());
4869	else
4870	CCmp.addReg(RegNo: RHS);
4871	CCmp.addImm(Val: NZCV).addImm(Val: Predicate);
4872	constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4873	return &*CCmp;
4874	}
4875
4876	MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4877	Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4878	AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4879	// We're at a tree leaf, produce a conditional comparison operation.
4880	auto &MRI = *MIB.getMRI();
4881	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4882	unsigned Opcode = ValDef->getOpcode();
4883	if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) {
4884	Register LHS = Cmp->getLHSReg();
4885	Register RHS = Cmp->getRHSReg();
4886	CmpInst::Predicate CC = Cmp->getCond();
4887	if (Negate)
4888	CC = CmpInst::getInversePredicate(pred: CC);
4889	if (isa<GICmp>(Val: Cmp)) {
4890	OutCC = changeICMPPredToAArch64CC(P: CC);
4891	} else {
4892	// Handle special FP cases.
4893	AArch64CC::CondCode ExtraCC;
4894	changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC);
4895	// Some floating point conditions can't be tested with a single condition
4896	// code. Construct an additional comparison in this case.
4897	if (ExtraCC != AArch64CC::AL) {
4898	MachineInstr *ExtraCmp;
4899	if (!CCOp)
4900	ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC);
4901	else
4902	ExtraCmp =
4903	emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB);
4904	CCOp = ExtraCmp->getOperand(i: `0`).getReg();
4905	Predicate = ExtraCC;
4906	}
4907	}
4908
4909	// Produce a normal comparison if we are first in the chain
4910	if (!CCOp) {
4911	auto Dst = MRI.cloneVirtualRegister(VReg: LHS);
4912	if (isa<GICmp>(Val: Cmp))
4913	return emitSUBS(Dst, LHS&: Cmp->getOperand(i: `2`), RHS&: Cmp->getOperand(i: `3`), MIRBuilder&: MIB);
4914	return emitFPCompare(LHS: Cmp->getOperand(i: `2`).getReg(),
4915	RHS: Cmp->getOperand(i: `3`).getReg(), MIRBuilder&: MIB);
4916	}
4917	// Otherwise produce a ccmp.
4918	return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4919	}
4920	assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4921
4922	bool IsOR = Opcode == TargetOpcode::G_OR;
4923
4924	Register LHS = ValDef->getOperand(i: `1`).getReg();
4925	bool CanNegateL;
4926	bool MustBeFirstL;
4927	bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI);
4928	assert(ValidL && "Valid conjunction/disjunction tree");
4929	(void)ValidL;
4930
4931	Register RHS = ValDef->getOperand(i: `2`).getReg();
4932	bool CanNegateR;
4933	bool MustBeFirstR;
4934	bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI);
4935	assert(ValidR && "Valid conjunction/disjunction tree");
4936	(void)ValidR;
4937
4938	// Swap sub-tree that must come first to the right side.
4939	if (MustBeFirstL) {
4940	assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4941	std::swap(a&: LHS, b&: RHS);
4942	std::swap(a&: CanNegateL, b&: CanNegateR);
4943	std::swap(a&: MustBeFirstL, b&: MustBeFirstR);
4944	}
4945
4946	bool NegateR;
4947	bool NegateAfterR;
4948	bool NegateL;
4949	bool NegateAfterAll;
4950	if (Opcode == TargetOpcode::G_OR) {
4951	// Swap the sub-tree that we can negate naturally to the left.
4952	if (!CanNegateL) {
4953	assert(CanNegateR && "at least one side must be negatable");
4954	assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4955	assert(!Negate);
4956	std::swap(a&: LHS, b&: RHS);
4957	NegateR = false;
4958	NegateAfterR = true;
4959	} else {
4960	// Negate the left sub-tree if possible, otherwise negate the result.
4961	NegateR = CanNegateR;
4962	NegateAfterR = !CanNegateR;
4963	}
4964	NegateL = true;
4965	NegateAfterAll = !Negate;
4966	} else {
4967	assert(Opcode == TargetOpcode::G_AND &&
4968	"Valid conjunction/disjunction tree");
4969	assert(!Negate && "Valid conjunction/disjunction tree");
4970
4971	NegateL = false;
4972	NegateR = false;
4973	NegateAfterR = false;
4974	NegateAfterAll = false;
4975	}
4976
4977	// Emit sub-trees.
4978	AArch64CC::CondCode RHSCC;
4979	MachineInstr *CmpR =
4980	emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB);
4981	if (NegateAfterR)
4982	RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC);
4983	MachineInstr *CmpL = emitConjunctionRec(
4984	Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: `0`).getReg(), Predicate: RHSCC, MIB);
4985	if (NegateAfterAll)
4986	OutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4987	return CmpL;
4988	}
4989
4990	MachineInstr *AArch64InstructionSelector::emitConjunction(
4991	Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4992	bool DummyCanNegate;
4993	bool DummyMustBeFirst;
4994	if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false,
4995	MRI&: *MIB.getMRI()))
4996	return nullptr;
4997	return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register (), Predicate: AArch64CC::AL, MIB);
4998	}
4999
5000	bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5001	MachineInstr &CondMI) {
5002	AArch64CC::CondCode AArch64CC;
5003	MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB);
5004	if (!ConjMI)
5005	return false;
5006
5007	emitSelect(Dst: SelI.getReg(Idx: `0`), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB);
5008	SelI.eraseFromParent();
5009	return true;
5010	}
5011
5012	bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5013	MachineRegisterInfo &MRI = *MIB.getMRI();
5014	// We want to recognize this pattern:
5015	//
5016	// $z = G_FCMP pred, $x, $y
5017	// ...
5018	// $w = G_SELECT $z, $a, $b
5019	//
5020	// Where the value of $z is only* ever used by the G_SELECT (possibly with*
5021	// some copies/truncs in between.)
5022	//
5023	// If we see this, then we can emit something like this:
5024	//
5025	// fcmp $x, $y
5026	// fcsel $w, $a, $b, pred
5027	//
5028	// Rather than emitting both of the rather long sequences in the standard
5029	// G_FCMP/G_SELECT select methods.
5030
5031	// First, check if the condition is defined by a compare.
5032	MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
5033
5034	// We can only fold if all of the defs have one use.
5035	Register CondDefReg = CondDef->getOperand(i: `0`).getReg();
5036	if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) {
5037	// Unless it's another select.
5038	for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) {
5039	if (CondDef == &UI)
5040	continue;
5041	if (UI.getOpcode() != TargetOpcode::G_SELECT)
5042	return false;
5043	}
5044	}
5045
5046	// Is the condition defined by a compare?
5047	unsigned CondOpc = CondDef->getOpcode();
5048	if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5049	if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef))
5050	return true;
5051	return false;
5052	}
5053
5054	AArch64CC::CondCode CondCode;
5055	if (CondOpc == TargetOpcode::G_ICMP) {
5056	auto Pred =
5057	static_cast<CmpInst::Predicate>(CondDef->getOperand(i: `1`).getPredicate());
5058	CondCode = changeICMPPredToAArch64CC(P: Pred);
5059	emitIntegerCompare(LHS&: CondDef->getOperand(i: `2`), RHS&: CondDef->getOperand(i: `3`),
5060	Predicate&: CondDef->getOperand(i: `1`), MIRBuilder&: MIB);
5061	} else {
5062	// Get the condition code for the select.
5063	auto Pred =
5064	static_cast<CmpInst::Predicate>(CondDef->getOperand(i: `1`).getPredicate());
5065	AArch64CC::CondCode CondCode2;
5066	changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2);
5067
5068	// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5069	// instructions to emit the comparison.
5070	// TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5071	// unnecessary.
5072	if (CondCode2 != AArch64CC::AL)
5073	return false;
5074
5075	if (!emitFPCompare(LHS: CondDef->getOperand(i: `2`).getReg(),
5076	RHS: CondDef->getOperand(i: `3`).getReg(), MIRBuilder&: MIB)) {
5077	LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5078	return false;
5079	}
5080	}
5081
5082	// Emit the select.
5083	emitSelect(Dst: I.getOperand(i: `0`).getReg(), True: I.getOperand(i: `2`).getReg(),
5084	False: I.getOperand(i: `3`).getReg(), CC: CondCode, MIB);
5085	I.eraseFromParent();
5086	return true;
5087	}
5088
5089	MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5090	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5091	MachineIRBuilder &MIRBuilder) const {
5092	assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5093	"Unexpected MachineOperand");
5094	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5095	// We want to find this sort of thing:
5096	// x = G_SUB 0, y
5097	// G_ICMP z, x
5098	//
5099	// In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5100	// e.g:
5101	//
5102	// cmn z, y
5103
5104	// Check if the RHS or LHS of the G_ICMP is defined by a SUB
5105	MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
5106	MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
5107	auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5108	// Given this:
5109	//
5110	// x = G_SUB 0, y
5111	// G_ICMP x, z
5112	//
5113	// Produce this:
5114	//
5115	// cmn y, z
5116	if (isCMN(MaybeSub: LHSDef, Pred: P, MRI))
5117	return emitCMN(LHS&: LHSDef->getOperand(i: `2`), RHS, MIRBuilder);
5118
5119	// Same idea here, but with the RHS of the compare instead:
5120	//
5121	// Given this:
5122	//
5123	// x = G_SUB 0, y
5124	// G_ICMP z, x
5125	//
5126	// Produce this:
5127	//
5128	// cmn z, y
5129	if (isCMN(MaybeSub: RHSDef, Pred: P, MRI))
5130	return emitCMN(LHS, RHS&: RHSDef->getOperand(i: `2`), MIRBuilder);
5131
5132	// Given this:
5133	//
5134	// z = G_AND x, y
5135	// G_ICMP z, 0
5136	//
5137	// Produce this if the compare is signed:
5138	//
5139	// tst x, y
5140	if (!CmpInst::isUnsigned(predicate: P) && LHSDef &&
5141	LHSDef->getOpcode() == TargetOpcode::G_AND) {
5142	// Make sure that the RHS is 0.
5143	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI);
5144	if (!ValAndVReg \|\| ValAndVReg ->Value != `0`)
5145	return nullptr;
5146
5147	return emitTST(LHS&: LHSDef->getOperand(i: `1`),
5148	RHS&: LHSDef->getOperand(i: `2`), MIRBuilder);
5149	}
5150
5151	return nullptr;
5152	}
5153
5154	bool AArch64InstructionSelector::selectShuffleVector(
5155	MachineInstr &I, MachineRegisterInfo &MRI) {
5156	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5157	Register Src1Reg = I.getOperand(i: `1`).getReg();
5158	const LLT Src1Ty = MRI.getType(Reg: Src1Reg);
5159	Register Src2Reg = I.getOperand(i: `2`).getReg();
5160	const LLT Src2Ty = MRI.getType(Reg: Src2Reg);
5161	ArrayRef<int> Mask = I.getOperand(i: `3`).getShuffleMask();
5162
5163	MachineBasicBlock &MBB = *I.getParent();
5164	MachineFunction &MF = *MBB.getParent();
5165	LLVMContext &Ctx = MF.getFunction().getContext();
5166
5167	// G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5168	// it's originated from a <1 x T> type. Those should have been lowered into
5169	// G_BUILD_VECTOR earlier.
5170	if (!Src1Ty.isVector() \|\| !Src2Ty.isVector()) {
5171	LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5172	return false;
5173	}
5174
5175	unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / `8`;
5176
5177	SmallVector<Constant *, `64`> CstIdxs;
5178	for (int Val : Mask) {
5179	// For now, any undef indexes we'll just assume to be 0. This should be
5180	// optimized in future, e.g. to select DUP etc.
5181	Val = Val < `0` ? `0` : Val;
5182	for (unsigned Byte = `0`; Byte < BytesPerElt; ++Byte) {
5183	unsigned Offset = Byte + Val * BytesPerElt;
5184	CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset));
5185	}
5186	}
5187
5188	// Use a constant pool to load the index vector for TBL.
5189	Constant *CPVal = ConstantVector::get(V: CstIdxs);
5190	MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB);
5191	if (!IndexLoad) {
5192	LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5193	return false;
5194	}
5195
5196	if (DstTy.getSizeInBits() != `128`) {
5197	assert(DstTy.getSizeInBits() == `64` && "Unexpected shuffle result ty");
5198	// This case can be done with TBL1.
5199	MachineInstr *Concat =
5200	emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB);
5201	if (!Concat) {
5202	LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5203	return false;
5204	}
5205
5206	// The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5207	IndexLoad = emitScalarToVector(`64`, &AArch64::FPR128RegClass,
5208	IndexLoad->getOperand(`0`).getReg(), MIB);
5209
5210	auto TBL1 = MIB.buildInstr(
5211	AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5212	{Concat->getOperand(`0`).getReg(), IndexLoad->getOperand(`0`).getReg()});
5213	constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5214
5215	auto Copy =
5216	MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(`0`).getReg()}, {})
5217	.addReg(TBL1.getReg(`0`), `0`, AArch64::dsub);
5218	RBI.constrainGenericRegister(Copy.getReg(`0`), AArch64::FPR64RegClass, MRI);
5219	I.eraseFromParent();
5220	return true;
5221	}
5222
5223	// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5224	// Q registers for regalloc.
5225	SmallVector<Register, `2`> Regs = {Src1Reg, Src2Reg};
5226	auto RegSeq = createQTuple(Regs, MIB);
5227	auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(`0`)},
5228	{RegSeq, IndexLoad->getOperand(`0`)});
5229	constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5230	I.eraseFromParent();
5231	return true;
5232	}
5233
5234	MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5235	std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5236	unsigned LaneIdx, const RegisterBank &RB,
5237	MachineIRBuilder &MIRBuilder) const {
5238	MachineInstr InsElt = nullptr*;
5239	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5240	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5241
5242	// Create a register to define with the insert if one wasn't passed in.
5243	if (!DstReg)
5244	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
5245
5246	unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits();
5247	unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5248
5249	if (RB.getID() == AArch64::FPRRegBankID) {
5250	auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder);
5251	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5252	.addImm(Val: LaneIdx)
5253	.addUse(RegNo: InsSub->getOperand(i: `0`).getReg())
5254	.addImm(Val: `0`);
5255	} else {
5256	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5257	.addImm(Val: LaneIdx)
5258	.addUse(RegNo: EltReg);
5259	}
5260
5261	constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5262	return InsElt;
5263	}
5264
5265	bool AArch64InstructionSelector::selectUSMovFromExtend(
5266	MachineInstr &MI, MachineRegisterInfo &MRI) {
5267	if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5268	MI.getOpcode() != TargetOpcode::G_ZEXT &&
5269	MI.getOpcode() != TargetOpcode::G_ANYEXT)
5270	return false;
5271	bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5272	const Register DefReg = MI.getOperand(i: `0`).getReg();
5273	const LLT DstTy = MRI.getType(Reg: DefReg);
5274	unsigned DstSize = DstTy.getSizeInBits();
5275
5276	if (DstSize != `32` && DstSize != `64`)
5277	return false;
5278
5279	MachineInstr *Extract = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT,
5280	Reg: MI.getOperand(i: `1`).getReg(), MRI);
5281	int64_t Lane;
5282	if (!Extract \|\| !mi_match(R: Extract->getOperand(i: `2`).getReg(), MRI, P: m_ICst(Cst&: Lane)))
5283	return false;
5284	Register Src0 = Extract->getOperand(i: `1`).getReg();
5285
5286	const LLT &VecTy = MRI.getType(Reg: Src0);
5287
5288	if (VecTy.getSizeInBits() != `128`) {
5289	const MachineInstr *ScalarToVector = emitScalarToVector(
5290	VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5291	assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5292	Src0 = ScalarToVector->getOperand(i: `0`).getReg();
5293	}
5294
5295	unsigned Opcode;
5296	if (DstSize == `64` && VecTy.getScalarSizeInBits() == `32`)
5297	Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5298	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `16`)
5299	Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5300	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `8`)
5301	Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5302	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `16`)
5303	Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5304	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `8`)
5305	Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5306	else
5307	llvm_unreachable("Unexpected type combo for S/UMov!");
5308
5309	// We may need to generate one of these, depending on the type and sign of the
5310	// input:
5311	// DstReg = SMOV Src0, Lane;
5312	// NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5313	MachineInstr ExtI = nullptr*;
5314	if (DstSize == `64` && !IsSigned) {
5315	Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5316	MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane);
5317	ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5318	.addImm(`0`)
5319	.addUse(NewReg)
5320	.addImm(AArch64::sub_32);
5321	RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5322	} else
5323	ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane);
5324
5325	constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5326	MI.eraseFromParent();
5327	return true;
5328	}
5329
5330	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5331	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5332	unsigned int Op;
5333	if (DstSize == `128`) {
5334	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5335	return nullptr;
5336	Op = AArch64::MOVIv16b_ns;
5337	} else {
5338	Op = AArch64::MOVIv8b_ns;
5339	}
5340
5341	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5342
5343	if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) {
5344	Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val);
5345	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5346	constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5347	return &*Mov;
5348	}
5349	return nullptr;
5350	}
5351
5352	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5353	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5354	bool Inv) {
5355
5356	unsigned int Op;
5357	if (DstSize == `128`) {
5358	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5359	return nullptr;
5360	Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5361	} else {
5362	Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5363	}
5364
5365	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5366	uint64_t Shift;
5367
5368	if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) {
5369	Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val);
5370	Shift = `0`;
5371	} else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) {
5372	Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val);
5373	Shift = `8`;
5374	} else
5375	return nullptr;
5376
5377	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5378	constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5379	return &*Mov;
5380	}
5381
5382	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5383	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5384	bool Inv) {
5385
5386	unsigned int Op;
5387	if (DstSize == `128`) {
5388	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5389	return nullptr;
5390	Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5391	} else {
5392	Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5393	}
5394
5395	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5396	uint64_t Shift;
5397
5398	if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) {
5399	Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val);
5400	Shift = `0`;
5401	} else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) {
5402	Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val);
5403	Shift = `8`;
5404	} else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) {
5405	Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val);
5406	Shift = `16`;
5407	} else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) {
5408	Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val);
5409	Shift = `24`;
5410	} else
5411	return nullptr;
5412
5413	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5414	constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5415	return &*Mov;
5416	}
5417
5418	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5419	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5420
5421	unsigned int Op;
5422	if (DstSize == `128`) {
5423	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5424	return nullptr;
5425	Op = AArch64::MOVIv2d_ns;
5426	} else {
5427	Op = AArch64::MOVID;
5428	}
5429
5430	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5431	if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) {
5432	Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val);
5433	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5434	constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5435	return &*Mov;
5436	}
5437	return nullptr;
5438	}
5439
5440	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5441	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5442	bool Inv) {
5443
5444	unsigned int Op;
5445	if (DstSize == `128`) {
5446	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5447	return nullptr;
5448	Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5449	} else {
5450	Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5451	}
5452
5453	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5454	uint64_t Shift;
5455
5456	if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) {
5457	Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val);
5458	Shift = `264`;
5459	} else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) {
5460	Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val);
5461	Shift = `272`;
5462	} else
5463	return nullptr;
5464
5465	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5466	constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5467	return &*Mov;
5468	}
5469
5470	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5471	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5472
5473	unsigned int Op;
5474	bool IsWide = false;
5475	if (DstSize == `128`) {
5476	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5477	return nullptr;
5478	Op = AArch64::FMOVv4f32_ns;
5479	IsWide = true;
5480	} else {
5481	Op = AArch64::FMOVv2f32_ns;
5482	}
5483
5484	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5485
5486	if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) {
5487	Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val);
5488	} else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) {
5489	Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val);
5490	Op = AArch64::FMOVv2f64_ns;
5491	} else
5492	return nullptr;
5493
5494	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5495	constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5496	return &*Mov;
5497	}
5498
5499	bool AArch64InstructionSelector::selectIndexedExtLoad(
5500	MachineInstr &MI, MachineRegisterInfo &MRI) {
5501	auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI);
5502	Register Dst = ExtLd.getDstReg();
5503	Register WriteBack = ExtLd.getWritebackReg();
5504	Register Base = ExtLd.getBaseReg();
5505	Register Offset = ExtLd.getOffsetReg();
5506	LLT Ty = MRI.getType(Reg: Dst);
5507	assert(Ty.getSizeInBits() <= `64`); // Only for scalar GPRs.
5508	unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5509	bool IsPre = ExtLd.isPre();
5510	bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd);
5511	bool InsertIntoXReg = false;
5512	bool IsDst64 = Ty.getSizeInBits() == `64`;
5513
5514	unsigned Opc = `0`;
5515	LLT NewLdDstTy;
5516	LLT s32 = LLT::scalar(SizeInBits: `32`);
5517	LLT s64 = LLT::scalar(SizeInBits: `64`);
5518
5519	if (MemSizeBits == `8`) {
5520	if (IsSExt) {
5521	if (IsDst64)
5522	Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5523	else
5524	Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5525	NewLdDstTy = IsDst64 ? s64 : s32;
5526	} else {
5527	Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5528	InsertIntoXReg = IsDst64;
5529	NewLdDstTy = s32;
5530	}
5531	} else if (MemSizeBits == `16`) {
5532	if (IsSExt) {
5533	if (IsDst64)
5534	Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5535	else
5536	Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5537	NewLdDstTy = IsDst64 ? s64 : s32;
5538	} else {
5539	Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5540	InsertIntoXReg = IsDst64;
5541	NewLdDstTy = s32;
5542	}
5543	} else if (MemSizeBits == `32`) {
5544	if (IsSExt) {
5545	Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5546	NewLdDstTy = s64;
5547	} else {
5548	Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5549	InsertIntoXReg = IsDst64;
5550	NewLdDstTy = s32;
5551	}
5552	} else {
5553	llvm_unreachable("Unexpected size for indexed load");
5554	}
5555
5556	if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5557	return false; // We should be on gpr.
5558
5559	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5560	if (!Cst)
5561	return false; // Shouldn't happen, but just in case.
5562
5563	auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base})
5564	.addImm(Val: Cst ->getSExtValue());
5565	LdMI.cloneMemRefs(OtherMI: ExtLd);
5566	constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
5567	// Make sure to select the load with the MemTy as the dest type, and then
5568	// insert into X reg if needed.
5569	if (InsertIntoXReg) {
5570	// Generate a SUBREG_TO_REG.
5571	auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5572	.addImm(`0`)
5573	.addUse(LdMI.getReg(`1`))
5574	.addImm(AArch64::sub_32);
5575	RBI.constrainGenericRegister(SubToReg.getReg(`0`), AArch64::GPR64RegClass,
5576	MRI);
5577	} else {
5578	auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: `1`));
5579	selectCopy(*Copy, TII, MRI, TRI, RBI);
5580	}
5581	MI.eraseFromParent();
5582
5583	return true;
5584	}
5585
5586	bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5587	MachineRegisterInfo &MRI) {
5588	auto &Ld = cast<GIndexedLoad>(Val&: MI);
5589	Register Dst = Ld.getDstReg();
5590	Register WriteBack = Ld.getWritebackReg();
5591	Register Base = Ld.getBaseReg();
5592	Register Offset = Ld.getOffsetReg();
5593	assert(MRI.getType(Dst).getSizeInBits() <= `128` &&
5594	"Unexpected type for indexed load");
5595	unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5596
5597	if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes())
5598	return selectIndexedExtLoad(MI, MRI);
5599
5600	unsigned Opc = `0`;
5601	if (Ld.isPre()) {
5602	static constexpr unsigned GPROpcodes[] = {
5603	AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5604	AArch64::LDRXpre};
5605	static constexpr unsigned FPROpcodes[] = {
5606	AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5607	AArch64::LDRQpre};
5608	if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5609	Opc = FPROpcodes[Log2_32(Value: MemSize)];
5610	else
5611	Opc = GPROpcodes[Log2_32(Value: MemSize)];
5612	} else {
5613	static constexpr unsigned GPROpcodes[] = {
5614	AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5615	AArch64::LDRXpost};
5616	static constexpr unsigned FPROpcodes[] = {
5617	AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5618	AArch64::LDRDpost, AArch64::LDRQpost};
5619	if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5620	Opc = FPROpcodes[Log2_32(Value: MemSize)];
5621	else
5622	Opc = GPROpcodes[Log2_32(Value: MemSize)];
5623	}
5624	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5625	if (!Cst)
5626	return false; // Shouldn't happen, but just in case.
5627	auto LdMI =
5628	MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst ->getSExtValue());
5629	LdMI.cloneMemRefs(OtherMI: Ld);
5630	constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
5631	MI.eraseFromParent();
5632	return true;
5633	}
5634
5635	bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5636	MachineRegisterInfo &MRI) {
5637	Register Dst = I.getWritebackReg();
5638	Register Val = I.getValueReg();
5639	Register Base = I.getBaseReg();
5640	Register Offset = I.getOffsetReg();
5641	LLT ValTy = MRI.getType(Reg: Val);
5642	assert(ValTy.getSizeInBits() <= `128` && "Unexpected type for indexed store");
5643
5644	unsigned Opc = `0`;
5645	if (I.isPre()) {
5646	static constexpr unsigned GPROpcodes[] = {
5647	AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5648	AArch64::STRXpre};
5649	static constexpr unsigned FPROpcodes[] = {
5650	AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5651	AArch64::STRQpre};
5652
5653	if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5654	Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5655	else
5656	Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5657	} else {
5658	static constexpr unsigned GPROpcodes[] = {
5659	AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5660	AArch64::STRXpost};
5661	static constexpr unsigned FPROpcodes[] = {
5662	AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5663	AArch64::STRDpost, AArch64::STRQpost};
5664
5665	if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5666	Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5667	else
5668	Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5669	}
5670
5671	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5672	if (!Cst)
5673	return false; // Shouldn't happen, but just in case.
5674	auto Str =
5675	MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst ->getSExtValue());
5676	Str.cloneMemRefs(OtherMI: I);
5677	constrainSelectedInstRegOperands(*Str, TII, TRI, RBI);
5678	I.eraseFromParent();
5679	return true;
5680	}
5681
5682	MachineInstr *
5683	AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5684	MachineIRBuilder &MIRBuilder,
5685	MachineRegisterInfo &MRI) {
5686	LLT DstTy = MRI.getType(Reg: Dst);
5687	unsigned DstSize = DstTy.getSizeInBits();
5688	if (CV->isNullValue()) {
5689	if (DstSize == `128`) {
5690	auto Mov =
5691	MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(`0`);
5692	constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5693	return &*Mov;
5694	}
5695
5696	if (DstSize == `64`) {
5697	auto Mov =
5698	MIRBuilder
5699	.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5700	.addImm(`0`);
5701	auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5702	.addReg(Mov.getReg(`0`), `0`, AArch64::dsub);
5703	RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5704	return &*Copy;
5705	}
5706	}
5707
5708	if (CV->getSplatValue()) {
5709	APInt DefBits = APInt::getSplat(NewLen: DstSize, V: CV->getUniqueInteger());
5710	auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5711	MachineInstr *NewOp;
5712	bool Inv = false;
5713	if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5714	(NewOp =
5715	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5716	(NewOp =
5717	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5718	(NewOp =
5719	tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5720	(NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5721	(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)))
5722	return NewOp;
5723
5724	DefBits = ~DefBits;
5725	Inv = true;
5726	if ((NewOp =
5727	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5728	(NewOp =
5729	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5730	(NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)))
5731	return NewOp;
5732	return nullptr;
5733	};
5734
5735	if (auto *NewOp = TryMOVIWithBits (DefBits))
5736	return NewOp;
5737
5738	// See if a fneg of the constant can be materialized with a MOVI, etc
5739	auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5740	unsigned NegOpc) -> MachineInstr * {
5741	// FNegate each sub-element of the constant
5742	APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: `1`).zext(width: DstSize);
5743	APInt NegBits(DstSize, `0`);
5744	unsigned NumElts = DstSize / NumBits;
5745	for (unsigned i = `0`; i < NumElts; i++)
5746	NegBits \|= Neg << (NumBits * i);
5747	NegBits = DefBits ^ NegBits;
5748
5749	// Try to create the new constants with MOVI, and if so generate a fneg
5750	// for it.
5751	if (auto *NewOp = TryMOVIWithBits (NegBits)) {
5752	Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5753	NewOp->getOperand(i: `0`).setReg(NewDst);
5754	return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst});
5755	}
5756	return nullptr;
5757	};
5758	MachineInstr *R;
5759	if ((R = TryWithFNeg(DefBits, `32`, AArch64::FNEGv4f32)) \|\|
5760	(R = TryWithFNeg(DefBits, `64`, AArch64::FNEGv2f64)) \|\|
5761	(STI.hasFullFP16() &&
5762	(R = TryWithFNeg(DefBits, `16`, AArch64::FNEGv8f16))))
5763	return R;
5764	}
5765
5766	auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder);
5767	if (!CPLoad) {
5768	LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5769	return nullptr;
5770	}
5771
5772	auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: `0`));
5773	RBI.constrainGenericRegister(
5774	Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: `0`).getReg()), MRI);
5775	return &*Copy;
5776	}
5777
5778	bool AArch64InstructionSelector::tryOptConstantBuildVec(
5779	MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5780	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5781	unsigned DstSize = DstTy.getSizeInBits();
5782	assert(DstSize <= `128` && "Unexpected build_vec type!");
5783	if (DstSize < `32`)
5784	return false;
5785	// Check if we're building a constant vector, in which case we want to
5786	// generate a constant pool load instead of a vector insert sequence.
5787	SmallVector<Constant *, `16`> Csts;
5788	for (unsigned Idx = `1`; Idx < I.getNumOperands(); ++Idx) {
5789	// Try to find G_CONSTANT or G_FCONSTANT
5790	auto *OpMI =
5791	getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI);
5792	if (OpMI)
5793	Csts.emplace_back(
5794	Args: const_cast<ConstantInt *>(OpMI->getOperand(i: `1`).getCImm()));
5795	else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT,
5796	Reg: I.getOperand(i: Idx).getReg(), MRI)))
5797	Csts.emplace_back(
5798	Args: const_cast<ConstantFP *>(OpMI->getOperand(i: `1`).getFPImm()));
5799	else
5800	return false;
5801	}
5802	Constant *CV = ConstantVector::get(V: Csts);
5803	if (!emitConstantVector(Dst: I.getOperand(i: `0`).getReg(), CV, MIRBuilder&: MIB, MRI))
5804	return false;
5805	I.eraseFromParent();
5806	return true;
5807	}
5808
5809	bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5810	MachineInstr &I, MachineRegisterInfo &MRI) {
5811	// Given:
5812	// %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5813	//
5814	// Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5815	Register Dst = I.getOperand(i: `0`).getReg();
5816	Register EltReg = I.getOperand(i: `1`).getReg();
5817	LLT EltTy = MRI.getType(Reg: EltReg);
5818	// If the index isn't on the same bank as its elements, then this can't be a
5819	// SUBREG_TO_REG.
5820	const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5821	const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5822	if (EltRB != DstRB)
5823	return false;
5824	if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: `2`), P: [&MRI](const MachineOperand &Op) {
5825	return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI);
5826	}))
5827	return false;
5828	unsigned SubReg;
5829	const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB);
5830	if (!EltRC)
5831	return false;
5832	const TargetRegisterClass *DstRC =
5833	getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB);
5834	if (!DstRC)
5835	return false;
5836	if (!getSubRegForClass(EltRC, TRI, SubReg))
5837	return false;
5838	auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5839	.addImm(`0`)
5840	.addUse(EltReg)
5841	.addImm(SubReg);
5842	I.eraseFromParent();
5843	constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5844	return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI);
5845	}
5846
5847	bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5848	MachineRegisterInfo &MRI) {
5849	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5850	// Until we port more of the optimized selections, for now just use a vector
5851	// insert sequence.
5852	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5853	const LLT EltTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
5854	unsigned EltSize = EltTy.getSizeInBits();
5855
5856	if (tryOptConstantBuildVec(I, DstTy, MRI))
5857	return true;
5858	if (tryOptBuildVecToSubregToReg(I, MRI))
5859	return true;
5860
5861	if (EltSize != `8` && EltSize != `16` && EltSize != `32` && EltSize != `64`)
5862	return false; // Don't support all element types yet.
5863	const RegisterBank &RB = *RBI.getRegBank(I.getOperand(i: `1`).getReg(), MRI, TRI);
5864
5865	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5866	MachineInstr *ScalarToVec =
5867	emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC,
5868	Scalar: I.getOperand(i: `1`).getReg(), MIRBuilder&: MIB);
5869	if (!ScalarToVec)
5870	return false;
5871
5872	Register DstVec = ScalarToVec->getOperand(i: `0`).getReg();
5873	unsigned DstSize = DstTy.getSizeInBits();
5874
5875	// Keep track of the last MI we inserted. Later on, we might be able to save
5876	// a copy using it.
5877	MachineInstr *PrevMI = ScalarToVec;
5878	for (unsigned i = `2`, e = DstSize / EltSize + `1`; i < e; ++i) {
5879	// Note that if we don't do a subregister copy, we can end up making an
5880	// extra register.
5881	Register OpReg = I.getOperand(i).getReg();
5882	// Do not emit inserts for undefs
5883	if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) {
5884	PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - `1`, RB, MIRBuilder&: MIB);
5885	DstVec = PrevMI->getOperand(i: `0`).getReg();
5886	}
5887	}
5888
5889	// If DstTy's size in bits is less than 128, then emit a subregister copy
5890	// from DstVec to the last register we've defined.
5891	if (DstSize < `128`) {
5892	// Force this to be FPR using the destination vector.
5893	const TargetRegisterClass *RC =
5894	getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5895	if (!RC)
5896	return false;
5897	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5898	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5899	return false;
5900	}
5901
5902	unsigned SubReg = `0`;
5903	if (!getSubRegForClass(RC, TRI, SubReg))
5904	return false;
5905	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5906	LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5907	<< "\n");
5908	return false;
5909	}
5910
5911	Register Reg = MRI.createVirtualRegister(RegClass: RC);
5912	Register DstReg = I.getOperand(i: `0`).getReg();
5913
5914	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: `0`, SubReg);
5915	MachineOperand &RegOp = I.getOperand(i: `1`);
5916	RegOp.setReg(Reg);
5917	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5918	} else {
5919	// We either have a vector with all elements (except the first one) undef or
5920	// at least one non-undef non-first element. In the first case, we need to
5921	// constrain the output register ourselves as we may have generated an
5922	// INSERT_SUBREG operation which is a generic operation for which the
5923	// output regclass cannot be automatically chosen.
5924	//
5925	// In the second case, there is no need to do this as it may generate an
5926	// instruction like INSvi32gpr where the regclass can be automatically
5927	// chosen.
5928	//
5929	// Also, we save a copy by re-using the destination register on the final
5930	// insert.
5931	PrevMI->getOperand(i: `0`).setReg(I.getOperand(i: `0`).getReg());
5932	constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5933
5934	Register DstReg = PrevMI->getOperand(i: `0`).getReg();
5935	if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5936	const TargetRegisterClass *RC =
5937	getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5938	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5939	}
5940	}
5941
5942	I.eraseFromParent();
5943	return true;
5944	}
5945
5946	bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5947	unsigned NumVecs,
5948	MachineInstr &I) {
5949	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5950	assert(Opc && "Expected an opcode?");
5951	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
5952	auto &MRI = *MIB.getMRI();
5953	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5954	unsigned Size = Ty.getSizeInBits();
5955	assert((Size == `64` \|\| Size == `128`) &&
5956	"Destination must be 64 bits or 128 bits?");
5957	unsigned SubReg = Size == `64` ? AArch64::dsub0 : AArch64::qsub0;
5958	auto Ptr = I.getOperand(i: I.getNumOperands() - `1`).getReg();
5959	assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5960	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr});
5961	Load.cloneMemRefs(OtherMI: I);
5962	constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5963	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
5964	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
5965	auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {})
5966	.addReg(RegNo: SelectedLoadDst, flags: `0`, SubReg: SubReg + Idx);
5967	// Emit the subreg copies and immediately select them.
5968	// FIXME: We should refactor our copy code into an emitCopy helper and
5969	// clean up uses of this pattern elsewhere in the selector.
5970	selectCopy(*Vec, TII, MRI, TRI, RBI);
5971	}
5972	return true;
5973	}
5974
5975	bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5976	unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5977	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5978	assert(Opc && "Expected an opcode?");
5979	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
5980	auto &MRI = *MIB.getMRI();
5981	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5982	bool Narrow = Ty.getSizeInBits() == `64`;
5983
5984	auto FirstSrcRegIt = I.operands_begin() + NumVecs + `1`;
5985	SmallVector<Register, `4`> Regs(NumVecs);
5986	std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(),
5987	unary_op: [](auto MO) { return MO.getReg(); });
5988
5989	if (Narrow) {
5990	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
5991	return emitScalarToVector(`64`, &AArch64::FPR128RegClass, Reg, MIB)
5992	->getOperand(`0`)
5993	.getReg();
5994	});
5995	Ty = Ty.multiplyElements(Factor: `2`);
5996	}
5997
5998	Register Tuple = createQTuple(Regs, MIB);
5999	auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI);
6000	if (!LaneNo)
6001	return false;
6002
6003	Register Ptr = (FirstSrcRegIt + NumVecs + `1`)->getReg();
6004	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {})
6005	.addReg(RegNo: Tuple)
6006	.addImm(Val: LaneNo ->getZExtValue())
6007	.addReg(RegNo: Ptr);
6008	Load.cloneMemRefs(OtherMI: I);
6009	constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
6010	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
6011	unsigned SubReg = AArch64::qsub0;
6012	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
6013	auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6014	{Narrow ? DstOp(&AArch64::FPR128RegClass)
6015	: DstOp(I.getOperand(Idx).getReg())},
6016	{})
6017	.addReg(SelectedLoadDst, `0`, SubReg + Idx);
6018	Register WideReg = Vec.getReg(`0`);
6019	// Emit the subreg copies and immediately select them.
6020	selectCopy(*Vec, TII, MRI, TRI, RBI);
6021	if (Narrow &&
6022	!emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI))
6023	return false;
6024	}
6025	return true;
6026	}
6027
6028	void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6029	unsigned NumVecs,
6030	unsigned Opc) {
6031	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6032	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6033	Register Ptr = I.getOperand(i: `1` + NumVecs).getReg();
6034
6035	SmallVector<Register, `2`> Regs(NumVecs);
6036	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
6037	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6038
6039	Register Tuple = Ty.getSizeInBits() == `128` ? createQTuple(Regs, MIB)
6040	: createDTuple(Regs, MIB);
6041	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr});
6042	Store.cloneMemRefs(OtherMI: I);
6043	constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
6044	}
6045
6046	bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6047	MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6048	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6049	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6050	bool Narrow = Ty.getSizeInBits() == `64`;
6051
6052	SmallVector<Register, `2`> Regs(NumVecs);
6053	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
6054	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6055
6056	if (Narrow)
6057	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6058	return emitScalarToVector(`64`, &AArch64::FPR128RegClass, Reg, MIB)
6059	->getOperand(`0`)
6060	.getReg();
6061	});
6062
6063	Register Tuple = createQTuple(Regs, MIB);
6064
6065	auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: `1` + NumVecs).getReg(), MRI);
6066	if (!LaneNo)
6067	return false;
6068	Register Ptr = I.getOperand(i: `1` + NumVecs + `1`).getReg();
6069	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {})
6070	.addReg(RegNo: Tuple)
6071	.addImm(Val: LaneNo ->getZExtValue())
6072	.addReg(RegNo: Ptr);
6073	Store.cloneMemRefs(OtherMI: I);
6074	constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
6075	return true;
6076	}
6077
6078	bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6079	MachineInstr &I, MachineRegisterInfo &MRI) {
6080	// Find the intrinsic ID.
6081	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6082
6083	const LLT S8 = LLT::scalar(SizeInBits: `8`);
6084	const LLT S16 = LLT::scalar(SizeInBits: `16`);
6085	const LLT S32 = LLT::scalar(SizeInBits: `32`);
6086	const LLT S64 = LLT::scalar(SizeInBits: `64`);
6087	const LLT P0 = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
6088	// Select the instruction.
6089	switch (IntrinID) {
6090	default:
6091	return false;
6092	case Intrinsic::aarch64_ldxp:
6093	case Intrinsic::aarch64_ldaxp: {
6094	auto NewI = MIB.buildInstr(
6095	IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6096	{I.getOperand(`0`).getReg(), I.getOperand(`1`).getReg()},
6097	{I.getOperand(`3`)});
6098	NewI.cloneMemRefs(I);
6099	constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
6100	break;
6101	}
6102	case Intrinsic::aarch64_neon_ld1x2: {
6103	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6104	unsigned Opc = `0`;
6105	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6106	Opc = AArch64::LD1Twov8b;
6107	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6108	Opc = AArch64::LD1Twov16b;
6109	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6110	Opc = AArch64::LD1Twov4h;
6111	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6112	Opc = AArch64::LD1Twov8h;
6113	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6114	Opc = AArch64::LD1Twov2s;
6115	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6116	Opc = AArch64::LD1Twov4s;
6117	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6118	Opc = AArch64::LD1Twov2d;
6119	else if (Ty == S64 \|\| Ty == P0)
6120	Opc = AArch64::LD1Twov1d;
6121	else
6122	llvm_unreachable("Unexpected type for ld1x2!");
6123	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6124	break;
6125	}
6126	case Intrinsic::aarch64_neon_ld1x3: {
6127	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6128	unsigned Opc = `0`;
6129	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6130	Opc = AArch64::LD1Threev8b;
6131	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6132	Opc = AArch64::LD1Threev16b;
6133	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6134	Opc = AArch64::LD1Threev4h;
6135	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6136	Opc = AArch64::LD1Threev8h;
6137	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6138	Opc = AArch64::LD1Threev2s;
6139	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6140	Opc = AArch64::LD1Threev4s;
6141	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6142	Opc = AArch64::LD1Threev2d;
6143	else if (Ty == S64 \|\| Ty == P0)
6144	Opc = AArch64::LD1Threev1d;
6145	else
6146	llvm_unreachable("Unexpected type for ld1x3!");
6147	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6148	break;
6149	}
6150	case Intrinsic::aarch64_neon_ld1x4: {
6151	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6152	unsigned Opc = `0`;
6153	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6154	Opc = AArch64::LD1Fourv8b;
6155	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6156	Opc = AArch64::LD1Fourv16b;
6157	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6158	Opc = AArch64::LD1Fourv4h;
6159	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6160	Opc = AArch64::LD1Fourv8h;
6161	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6162	Opc = AArch64::LD1Fourv2s;
6163	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6164	Opc = AArch64::LD1Fourv4s;
6165	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6166	Opc = AArch64::LD1Fourv2d;
6167	else if (Ty == S64 \|\| Ty == P0)
6168	Opc = AArch64::LD1Fourv1d;
6169	else
6170	llvm_unreachable("Unexpected type for ld1x4!");
6171	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6172	break;
6173	}
6174	case Intrinsic::aarch64_neon_ld2: {
6175	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6176	unsigned Opc = `0`;
6177	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6178	Opc = AArch64::LD2Twov8b;
6179	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6180	Opc = AArch64::LD2Twov16b;
6181	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6182	Opc = AArch64::LD2Twov4h;
6183	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6184	Opc = AArch64::LD2Twov8h;
6185	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6186	Opc = AArch64::LD2Twov2s;
6187	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6188	Opc = AArch64::LD2Twov4s;
6189	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6190	Opc = AArch64::LD2Twov2d;
6191	else if (Ty == S64 \|\| Ty == P0)
6192	Opc = AArch64::LD1Twov1d;
6193	else
6194	llvm_unreachable("Unexpected type for ld2!");
6195	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6196	break;
6197	}
6198	case Intrinsic::aarch64_neon_ld2lane: {
6199	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6200	unsigned Opc;
6201	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6202	Opc = AArch64::LD2i8;
6203	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6204	Opc = AArch64::LD2i16;
6205	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6206	Opc = AArch64::LD2i32;
6207	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6208	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6209	Opc = AArch64::LD2i64;
6210	else
6211	llvm_unreachable("Unexpected type for st2lane!");
6212	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `2`, I))
6213	return false;
6214	break;
6215	}
6216	case Intrinsic::aarch64_neon_ld2r: {
6217	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6218	unsigned Opc = `0`;
6219	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6220	Opc = AArch64::LD2Rv8b;
6221	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6222	Opc = AArch64::LD2Rv16b;
6223	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6224	Opc = AArch64::LD2Rv4h;
6225	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6226	Opc = AArch64::LD2Rv8h;
6227	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6228	Opc = AArch64::LD2Rv2s;
6229	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6230	Opc = AArch64::LD2Rv4s;
6231	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6232	Opc = AArch64::LD2Rv2d;
6233	else if (Ty == S64 \|\| Ty == P0)
6234	Opc = AArch64::LD2Rv1d;
6235	else
6236	llvm_unreachable("Unexpected type for ld2r!");
6237	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6238	break;
6239	}
6240	case Intrinsic::aarch64_neon_ld3: {
6241	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6242	unsigned Opc = `0`;
6243	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6244	Opc = AArch64::LD3Threev8b;
6245	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6246	Opc = AArch64::LD3Threev16b;
6247	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6248	Opc = AArch64::LD3Threev4h;
6249	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6250	Opc = AArch64::LD3Threev8h;
6251	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6252	Opc = AArch64::LD3Threev2s;
6253	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6254	Opc = AArch64::LD3Threev4s;
6255	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6256	Opc = AArch64::LD3Threev2d;
6257	else if (Ty == S64 \|\| Ty == P0)
6258	Opc = AArch64::LD1Threev1d;
6259	else
6260	llvm_unreachable("Unexpected type for ld3!");
6261	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6262	break;
6263	}
6264	case Intrinsic::aarch64_neon_ld3lane: {
6265	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6266	unsigned Opc;
6267	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6268	Opc = AArch64::LD3i8;
6269	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6270	Opc = AArch64::LD3i16;
6271	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6272	Opc = AArch64::LD3i32;
6273	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6274	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6275	Opc = AArch64::LD3i64;
6276	else
6277	llvm_unreachable("Unexpected type for st3lane!");
6278	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `3`, I))
6279	return false;
6280	break;
6281	}
6282	case Intrinsic::aarch64_neon_ld3r: {
6283	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6284	unsigned Opc = `0`;
6285	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6286	Opc = AArch64::LD3Rv8b;
6287	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6288	Opc = AArch64::LD3Rv16b;
6289	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6290	Opc = AArch64::LD3Rv4h;
6291	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6292	Opc = AArch64::LD3Rv8h;
6293	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6294	Opc = AArch64::LD3Rv2s;
6295	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6296	Opc = AArch64::LD3Rv4s;
6297	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6298	Opc = AArch64::LD3Rv2d;
6299	else if (Ty == S64 \|\| Ty == P0)
6300	Opc = AArch64::LD3Rv1d;
6301	else
6302	llvm_unreachable("Unexpected type for ld3r!");
6303	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6304	break;
6305	}
6306	case Intrinsic::aarch64_neon_ld4: {
6307	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6308	unsigned Opc = `0`;
6309	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6310	Opc = AArch64::LD4Fourv8b;
6311	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6312	Opc = AArch64::LD4Fourv16b;
6313	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6314	Opc = AArch64::LD4Fourv4h;
6315	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6316	Opc = AArch64::LD4Fourv8h;
6317	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6318	Opc = AArch64::LD4Fourv2s;
6319	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6320	Opc = AArch64::LD4Fourv4s;
6321	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6322	Opc = AArch64::LD4Fourv2d;
6323	else if (Ty == S64 \|\| Ty == P0)
6324	Opc = AArch64::LD1Fourv1d;
6325	else
6326	llvm_unreachable("Unexpected type for ld4!");
6327	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6328	break;
6329	}
6330	case Intrinsic::aarch64_neon_ld4lane: {
6331	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6332	unsigned Opc;
6333	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6334	Opc = AArch64::LD4i8;
6335	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6336	Opc = AArch64::LD4i16;
6337	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6338	Opc = AArch64::LD4i32;
6339	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6340	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6341	Opc = AArch64::LD4i64;
6342	else
6343	llvm_unreachable("Unexpected type for st4lane!");
6344	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `4`, I))
6345	return false;
6346	break;
6347	}
6348	case Intrinsic::aarch64_neon_ld4r: {
6349	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6350	unsigned Opc = `0`;
6351	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6352	Opc = AArch64::LD4Rv8b;
6353	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6354	Opc = AArch64::LD4Rv16b;
6355	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6356	Opc = AArch64::LD4Rv4h;
6357	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6358	Opc = AArch64::LD4Rv8h;
6359	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6360	Opc = AArch64::LD4Rv2s;
6361	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6362	Opc = AArch64::LD4Rv4s;
6363	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6364	Opc = AArch64::LD4Rv2d;
6365	else if (Ty == S64 \|\| Ty == P0)
6366	Opc = AArch64::LD4Rv1d;
6367	else
6368	llvm_unreachable("Unexpected type for ld4r!");
6369	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6370	break;
6371	}
6372	case Intrinsic::aarch64_neon_st1x2: {
6373	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6374	unsigned Opc;
6375	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6376	Opc = AArch64::ST1Twov8b;
6377	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6378	Opc = AArch64::ST1Twov16b;
6379	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6380	Opc = AArch64::ST1Twov4h;
6381	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6382	Opc = AArch64::ST1Twov8h;
6383	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6384	Opc = AArch64::ST1Twov2s;
6385	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6386	Opc = AArch64::ST1Twov4s;
6387	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6388	Opc = AArch64::ST1Twov2d;
6389	else if (Ty == S64 \|\| Ty == P0)
6390	Opc = AArch64::ST1Twov1d;
6391	else
6392	llvm_unreachable("Unexpected type for st1x2!");
6393	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6394	break;
6395	}
6396	case Intrinsic::aarch64_neon_st1x3: {
6397	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6398	unsigned Opc;
6399	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6400	Opc = AArch64::ST1Threev8b;
6401	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6402	Opc = AArch64::ST1Threev16b;
6403	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6404	Opc = AArch64::ST1Threev4h;
6405	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6406	Opc = AArch64::ST1Threev8h;
6407	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6408	Opc = AArch64::ST1Threev2s;
6409	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6410	Opc = AArch64::ST1Threev4s;
6411	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6412	Opc = AArch64::ST1Threev2d;
6413	else if (Ty == S64 \|\| Ty == P0)
6414	Opc = AArch64::ST1Threev1d;
6415	else
6416	llvm_unreachable("Unexpected type for st1x3!");
6417	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6418	break;
6419	}
6420	case Intrinsic::aarch64_neon_st1x4: {
6421	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6422	unsigned Opc;
6423	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6424	Opc = AArch64::ST1Fourv8b;
6425	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6426	Opc = AArch64::ST1Fourv16b;
6427	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6428	Opc = AArch64::ST1Fourv4h;
6429	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6430	Opc = AArch64::ST1Fourv8h;
6431	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6432	Opc = AArch64::ST1Fourv2s;
6433	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6434	Opc = AArch64::ST1Fourv4s;
6435	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6436	Opc = AArch64::ST1Fourv2d;
6437	else if (Ty == S64 \|\| Ty == P0)
6438	Opc = AArch64::ST1Fourv1d;
6439	else
6440	llvm_unreachable("Unexpected type for st1x4!");
6441	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6442	break;
6443	}
6444	case Intrinsic::aarch64_neon_st2: {
6445	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6446	unsigned Opc;
6447	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6448	Opc = AArch64::ST2Twov8b;
6449	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6450	Opc = AArch64::ST2Twov16b;
6451	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6452	Opc = AArch64::ST2Twov4h;
6453	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6454	Opc = AArch64::ST2Twov8h;
6455	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6456	Opc = AArch64::ST2Twov2s;
6457	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6458	Opc = AArch64::ST2Twov4s;
6459	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6460	Opc = AArch64::ST2Twov2d;
6461	else if (Ty == S64 \|\| Ty == P0)
6462	Opc = AArch64::ST1Twov1d;
6463	else
6464	llvm_unreachable("Unexpected type for st2!");
6465	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6466	break;
6467	}
6468	case Intrinsic::aarch64_neon_st3: {
6469	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6470	unsigned Opc;
6471	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6472	Opc = AArch64::ST3Threev8b;
6473	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6474	Opc = AArch64::ST3Threev16b;
6475	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6476	Opc = AArch64::ST3Threev4h;
6477	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6478	Opc = AArch64::ST3Threev8h;
6479	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6480	Opc = AArch64::ST3Threev2s;
6481	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6482	Opc = AArch64::ST3Threev4s;
6483	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6484	Opc = AArch64::ST3Threev2d;
6485	else if (Ty == S64 \|\| Ty == P0)
6486	Opc = AArch64::ST1Threev1d;
6487	else
6488	llvm_unreachable("Unexpected type for st3!");
6489	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6490	break;
6491	}
6492	case Intrinsic::aarch64_neon_st4: {
6493	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6494	unsigned Opc;
6495	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6496	Opc = AArch64::ST4Fourv8b;
6497	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6498	Opc = AArch64::ST4Fourv16b;
6499	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6500	Opc = AArch64::ST4Fourv4h;
6501	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6502	Opc = AArch64::ST4Fourv8h;
6503	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6504	Opc = AArch64::ST4Fourv2s;
6505	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6506	Opc = AArch64::ST4Fourv4s;
6507	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6508	Opc = AArch64::ST4Fourv2d;
6509	else if (Ty == S64 \|\| Ty == P0)
6510	Opc = AArch64::ST1Fourv1d;
6511	else
6512	llvm_unreachable("Unexpected type for st4!");
6513	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6514	break;
6515	}
6516	case Intrinsic::aarch64_neon_st2lane: {
6517	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6518	unsigned Opc;
6519	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6520	Opc = AArch64::ST2i8;
6521	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6522	Opc = AArch64::ST2i16;
6523	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6524	Opc = AArch64::ST2i32;
6525	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6526	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6527	Opc = AArch64::ST2i64;
6528	else
6529	llvm_unreachable("Unexpected type for st2lane!");
6530	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `2`, Opc))
6531	return false;
6532	break;
6533	}
6534	case Intrinsic::aarch64_neon_st3lane: {
6535	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6536	unsigned Opc;
6537	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6538	Opc = AArch64::ST3i8;
6539	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6540	Opc = AArch64::ST3i16;
6541	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6542	Opc = AArch64::ST3i32;
6543	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6544	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6545	Opc = AArch64::ST3i64;
6546	else
6547	llvm_unreachable("Unexpected type for st3lane!");
6548	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `3`, Opc))
6549	return false;
6550	break;
6551	}
6552	case Intrinsic::aarch64_neon_st4lane: {
6553	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6554	unsigned Opc;
6555	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6556	Opc = AArch64::ST4i8;
6557	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6558	Opc = AArch64::ST4i16;
6559	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6560	Opc = AArch64::ST4i32;
6561	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6562	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6563	Opc = AArch64::ST4i64;
6564	else
6565	llvm_unreachable("Unexpected type for st4lane!");
6566	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `4`, Opc))
6567	return false;
6568	break;
6569	}
6570	case Intrinsic::aarch64_mops_memset_tag: {
6571	// Transform
6572	// %dst:gpr(p0) = \
6573	// G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6574	// \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6575	// where %dst is updated, into
6576	// %Rd:GPR64common, %Rn:GPR64) = \
6577	// MOPSMemorySetTaggingPseudo \
6578	// %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6579	// where Rd and Rn are tied.
6580	// It is expected that %val has been extended to s64 in legalization.
6581	// Note that the order of the size/value operands are swapped.
6582
6583	Register DstDef = I.getOperand(i: `0`).getReg();
6584	// I.getOperand(1) is the intrinsic function
6585	Register DstUse = I.getOperand(i: `2`).getReg();
6586	Register ValUse = I.getOperand(i: `3`).getReg();
6587	Register SizeUse = I.getOperand(i: `4`).getReg();
6588
6589	// MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6590	// Therefore an additional virtual register is requried for the updated size
6591	// operand. This value is not accessible via the semantics of the intrinsic.
6592	Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
6593
6594	auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6595	{DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6596	Memset.cloneMemRefs(I);
6597	constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
6598	break;
6599	}
6600	}
6601
6602	I.eraseFromParent();
6603	return true;
6604	}
6605
6606	bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6607	MachineRegisterInfo &MRI) {
6608	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6609
6610	switch (IntrinID) {
6611	default:
6612	break;
6613	case Intrinsic::aarch64_crypto_sha1h: {
6614	Register DstReg = I.getOperand(i: `0`).getReg();
6615	Register SrcReg = I.getOperand(i: `2`).getReg();
6616
6617	// FIXME: Should this be an assert?
6618	if (MRI.getType(Reg: DstReg).getSizeInBits() != `32` \|\|
6619	MRI.getType(Reg: SrcReg).getSizeInBits() != `32`)
6620	return false;
6621
6622	// The operation has to happen on FPRs. Set up some new FPR registers for
6623	// the source and destination if they are on GPRs.
6624	if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6625	SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6626	MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: `2`)});
6627
6628	// Make sure the copy ends up getting constrained properly.
6629	RBI.constrainGenericRegister(I.getOperand(`2`).getReg(),
6630	AArch64::GPR32RegClass, MRI);
6631	}
6632
6633	if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6634	DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6635
6636	// Actually insert the instruction.
6637	auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6638	constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6639
6640	// Did we create a new register for the destination?
6641	if (DstReg != I.getOperand(i: `0`).getReg()) {
6642	// Yep. Copy the result of the instruction back into the original
6643	// destination.
6644	MIB.buildCopy(Res: {I.getOperand(i: `0`)}, Op: {DstReg});
6645	RBI.constrainGenericRegister(I.getOperand(`0`).getReg(),
6646	AArch64::GPR32RegClass, MRI);
6647	}
6648
6649	I.eraseFromParent();
6650	return true;
6651	}
6652	case Intrinsic::frameaddress:
6653	case Intrinsic::returnaddress: {
6654	MachineFunction &MF = *I.getParent()->getParent();
6655	MachineFrameInfo &MFI = MF.getFrameInfo();
6656
6657	unsigned Depth = I.getOperand(i: `2`).getImm();
6658	Register DstReg = I.getOperand(i: `0`).getReg();
6659	RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6660
6661	if (Depth == `0` && IntrinID == Intrinsic::returnaddress) {
6662	if (!MFReturnAddr) {
6663	// Insert the copy from LR/X30 into the entry block, before it can be
6664	// clobbered by anything.
6665	MFI.setReturnAddressIsTaken(true);
6666	MFReturnAddr = getFunctionLiveInPhysReg(
6667	MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6668	}
6669
6670	if (STI.hasPAuth()) {
6671	MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6672	} else {
6673	MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6674	MIB.buildInstr(AArch64::XPACLRI);
6675	MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6676	}
6677
6678	I.eraseFromParent();
6679	return true;
6680	}
6681
6682	MFI.setFrameAddressIsTaken(true);
6683	Register FrameAddr(AArch64::FP);
6684	while (Depth--) {
6685	Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6686	auto Ldr =
6687	MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(`0`);
6688	constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
6689	FrameAddr = NextFrame;
6690	}
6691
6692	if (IntrinID == Intrinsic::frameaddress)
6693	MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr});
6694	else {
6695	MFI.setReturnAddressIsTaken(true);
6696
6697	if (STI.hasPAuth()) {
6698	Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6699	MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(`1`);
6700	MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6701	} else {
6702	MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6703	.addImm(`1`);
6704	MIB.buildInstr(AArch64::XPACLRI);
6705	MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6706	}
6707	}
6708
6709	I.eraseFromParent();
6710	return true;
6711	}
6712	case Intrinsic::swift_async_context_addr:
6713	auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(`0`).getReg()},
6714	{Register(AArch64::FP)})
6715	.addImm(`8`)
6716	.addImm(`0`);
6717	constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
6718
6719	MF->getFrameInfo().setFrameAddressIsTaken(true);
6720	MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6721	I.eraseFromParent();
6722	return true;
6723	}
6724	return false;
6725	}
6726
6727	InstructionSelector::ComplexRendererFns
6728	AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6729	auto MaybeImmed = getImmedFromMO(Root);
6730	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
6731	return std::nullopt;
6732	uint64_t Enc = (`32` - *MaybeImmed) & `0x1f`;
6733	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6734	}
6735
6736	InstructionSelector::ComplexRendererFns
6737	AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6738	auto MaybeImmed = getImmedFromMO(Root);
6739	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
6740	return std::nullopt;
6741	uint64_t Enc = `31` - *MaybeImmed;
6742	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6743	}
6744
6745	InstructionSelector::ComplexRendererFns
6746	AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6747	auto MaybeImmed = getImmedFromMO(Root);
6748	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
6749	return std::nullopt;
6750	uint64_t Enc = (`64` - *MaybeImmed) & `0x3f`;
6751	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6752	}
6753
6754	InstructionSelector::ComplexRendererFns
6755	AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6756	auto MaybeImmed = getImmedFromMO(Root);
6757	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
6758	return std::nullopt;
6759	uint64_t Enc = `63` - *MaybeImmed;
6760	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6761	}
6762
6763	/// Helper to select an immediate value that can be represented as a 12-bit
6764	/// value shifted left by either 0 or 12. If it is possible to do so, return
6765	/// the immediate and shift value. If not, return std::nullopt.
6766	///
6767	/// Used by selectArithImmed and selectNegArithImmed.
6768	InstructionSelector::ComplexRendererFns
6769	AArch64InstructionSelector::select12BitValueWithLeftShift(
6770	uint64_t Immed) const {
6771	unsigned ShiftAmt;
6772	if (Immed >> `12` == `0`) {
6773	ShiftAmt = `0`;
6774	} else if ((Immed & `0xfff`) == `0` && Immed >> `24` == `0`) {
6775	ShiftAmt = `12`;
6776	Immed = Immed >> `12`;
6777	} else
6778	return std::nullopt;
6779
6780	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
6781	return {{
6782	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); },
6783	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); },
6784	}};
6785	}
6786
6787	/// SelectArithImmed - Select an immediate value that can be represented as
6788	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6789	/// Val set to the 12-bit value and Shift set to the shifter operand.
6790	InstructionSelector::ComplexRendererFns
6791	AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6792	// This function is called from the addsub_shifted_imm ComplexPattern,
6793	// which lists [imm] as the list of opcode it's interested in, however
6794	// we still need to check whether the operand is actually an immediate
6795	// here because the ComplexPattern opcode list is only used in
6796	// root-level opcode matching.
6797	auto MaybeImmed = getImmedFromMO(Root);
6798	if (MaybeImmed == std::nullopt)
6799	return std::nullopt;
6800	return select12BitValueWithLeftShift(Immed: *MaybeImmed);
6801	}
6802
6803	/// SelectNegArithImmed - As above, but negates the value before trying to
6804	/// select it.
6805	InstructionSelector::ComplexRendererFns
6806	AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6807	// We need a register here, because we need to know if we have a 64 or 32
6808	// bit immediate.
6809	if (!Root.isReg())
6810	return std::nullopt;
6811	auto MaybeImmed = getImmedFromMO(Root);
6812	if (MaybeImmed == std::nullopt)
6813	return std::nullopt;
6814	uint64_t Immed = *MaybeImmed;
6815
6816	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6817	// have the opposite effect on the C flag, so this pattern mustn't match under
6818	// those circumstances.
6819	if (Immed == `0`)
6820	return std::nullopt;
6821
6822	// Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6823	// the root.
6824	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6825	if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == `32`)
6826	Immed = ~((uint32_t)Immed) + `1`;
6827	else
6828	Immed = ~Immed + `1ULL`;
6829
6830	if (Immed & `0xFFFFFFFFFF000000ULL`)
6831	return std::nullopt;
6832
6833	Immed &= `0xFFFFFFULL`;
6834	return select12BitValueWithLeftShift(Immed);
6835	}
6836
6837	/// Return true if it is worth folding MI into an extended register. That is,
6838	/// if it's safe to pull it into the addressing mode of a load or store as a
6839	/// shift.
6840	bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6841	MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6842	// Always fold if there is one use, or if we're optimizing for size.
6843	Register DefReg = MI.getOperand(i: `0`).getReg();
6844	if (MRI.hasOneNonDBGUse(RegNo: DefReg) \|\|
6845	MI.getParent()->getParent()->getFunction().hasOptSize())
6846	return true;
6847
6848	// FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
6849	// appropriate.
6850
6851	// We have a fastpath, so folding a shift in and potentially computing it
6852	// many times may be beneficial. Check if this is only used in memory ops.
6853	// If it is, then we should fold.
6854	return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg),
6855	P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6856	}
6857
6858	static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
6859	switch (Type) {
6860	case AArch64_AM::SXTB:
6861	case AArch64_AM::SXTH:
6862	case AArch64_AM::SXTW:
6863	return true;
6864	default:
6865	return false;
6866	}
6867	}
6868
6869	InstructionSelector::ComplexRendererFns
6870	AArch64InstructionSelector::selectExtendedSHL(
6871	MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
6872	unsigned SizeInBytes, bool WantsExt) const {
6873	assert(Base.isReg() && "Expected base to be a register operand");
6874	assert(Offset.isReg() && "Expected offset to be a register operand");
6875
6876	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6877	MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg());
6878
6879	unsigned OffsetOpc = OffsetInst->getOpcode();
6880	bool LookedThroughZExt = false;
6881	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6882	// Try to look through a ZEXT.
6883	if (OffsetOpc != TargetOpcode::G_ZEXT \|\| !WantsExt)
6884	return std::nullopt;
6885
6886	OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: `1`).getReg());
6887	OffsetOpc = OffsetInst->getOpcode();
6888	LookedThroughZExt = true;
6889
6890	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6891	return std::nullopt;
6892	}
6893	// Make sure that the memory op is a valid size.
6894	int64_t LegalShiftVal = Log2_32(Value: SizeInBytes);
6895	if (LegalShiftVal == `0`)
6896	return std::nullopt;
6897	if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI))
6898	return std::nullopt;
6899
6900	// Now, try to find the specific G_CONSTANT. Start by assuming that the
6901	// register we will offset is the LHS, and the register containing the
6902	// constant is the RHS.
6903	Register OffsetReg = OffsetInst->getOperand(i: `1`).getReg();
6904	Register ConstantReg = OffsetInst->getOperand(i: `2`).getReg();
6905	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
6906	if (!ValAndVReg) {
6907	// We didn't get a constant on the RHS. If the opcode is a shift, then
6908	// we're done.
6909	if (OffsetOpc == TargetOpcode::G_SHL)
6910	return std::nullopt;
6911
6912	// If we have a G_MUL, we can use either register. Try looking at the RHS.
6913	std::swap(a&: OffsetReg, b&: ConstantReg);
6914	ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
6915	if (!ValAndVReg)
6916	return std::nullopt;
6917	}
6918
6919	// The value must fit into 3 bits, and must be positive. Make sure that is
6920	// true.
6921	int64_t ImmVal = ValAndVReg ->Value.getSExtValue();
6922
6923	// Since we're going to pull this into a shift, the constant value must be
6924	// a power of 2. If we got a multiply, then we need to check this.
6925	if (OffsetOpc == TargetOpcode::G_MUL) {
6926	if (!llvm::has_single_bit<uint32_t>(Value: ImmVal))
6927	return std::nullopt;
6928
6929	// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6930	ImmVal = Log2_32(Value: ImmVal);
6931	}
6932
6933	if ((ImmVal & `0x7`) != ImmVal)
6934	return std::nullopt;
6935
6936	// We are only allowed to shift by LegalShiftVal. This shift value is built
6937	// into the instruction, so we can't just use whatever we want.
6938	if (ImmVal != LegalShiftVal)
6939	return std::nullopt;
6940
6941	unsigned SignExtend = `0`;
6942	if (WantsExt) {
6943	// Check if the offset is defined by an extend, unless we looked through a
6944	// G_ZEXT earlier.
6945	if (!LookedThroughZExt) {
6946	MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI);
6947	auto Ext = getExtendTypeForInst(MI&: ExtInst, MRI, IsLoadStore: true*);
6948	if (Ext == AArch64_AM::InvalidShiftExtend)
6949	return std::nullopt;
6950
6951	SignExtend = isSignExtendShiftType(Type: Ext) ? `1` : `0`;
6952	// We only support SXTW for signed extension here.
6953	if (SignExtend && Ext != AArch64_AM::SXTW)
6954	return std::nullopt;
6955	OffsetReg = ExtInst->getOperand(i: `1`).getReg();
6956	}
6957
6958	// Need a 32-bit wide register here.
6959	MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg()));
6960	OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6961	}
6962
6963	// We can use the LHS of the GEP as the base, and the LHS of the shift as an
6964	// offset. Signify that we are shifting by setting the shift flag to 1.
6965	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); },
6966	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); },
6967	[=](MachineInstrBuilder &MIB) {
6968	// Need to add both immediates here to make sure that they are both
6969	// added to the instruction.
6970	MIB.addImm(Val: SignExtend);
6971	MIB.addImm(Val: `1`);
6972	}}};
6973	}
6974
6975	/// This is used for computing addresses like this:
6976	///
6977	/// ldr x1, [x2, x3, lsl #3]
6978	///
6979	/// Where x2 is the base register, and x3 is an offset register. The shift-left
6980	/// is a constant value specific to this load instruction. That is, we'll never
6981	/// see anything other than a 3 here (which corresponds to the size of the
6982	/// element being loaded.)
6983	InstructionSelector::ComplexRendererFns
6984	AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6985	MachineOperand &Root, unsigned SizeInBytes) const {
6986	if (!Root.isReg())
6987	return std::nullopt;
6988	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6989
6990	// We want to find something like this:
6991	//
6992	// val = G_CONSTANT LegalShiftVal
6993	// shift = G_SHL off_reg val
6994	// ptr = G_PTR_ADD base_reg shift
6995	// x = G_LOAD ptr
6996	//
6997	// And fold it into this addressing mode:
6998	//
6999	// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7000
7001	// Check if we can find the G_PTR_ADD.
7002	MachineInstr *PtrAdd =
7003	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7004	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI))
7005	return std::nullopt;
7006
7007	// Now, try to match an opcode which will match our specific offset.
7008	// We want a G_SHL or a G_MUL.
7009	MachineInstr *OffsetInst =
7010	getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7011	return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: `1`),
7012	Offset&: OffsetInst->getOperand(i: `0`), SizeInBytes,
7013	/WantsExt=/false);
7014	}
7015
7016	/// This is used for computing addresses like this:
7017	///
7018	/// ldr x1, [x2, x3]
7019	///
7020	/// Where x2 is the base register, and x3 is an offset register.
7021	///
7022	/// When possible (or profitable) to fold a G_PTR_ADD into the address
7023	/// calculation, this will do so. Otherwise, it will return std::nullopt.
7024	InstructionSelector::ComplexRendererFns
7025	AArch64InstructionSelector::selectAddrModeRegisterOffset(
7026	MachineOperand &Root) const {
7027	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7028
7029	// We need a GEP.
7030	MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg());
7031	if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7032	return std::nullopt;
7033
7034	// If this is used more than once, let's not bother folding.
7035	// TODO: Check if they are memory ops. If they are, then we can still fold
7036	// without having to recompute anything.
7037	if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: `0`).getReg()))
7038	return std::nullopt;
7039
7040	// Base is the GEP's LHS, offset is its RHS.
7041	return {{[=](MachineInstrBuilder &MIB) {
7042	MIB.addUse(RegNo: Gep->getOperand(i: `1`).getReg());
7043	},
7044	[=](MachineInstrBuilder &MIB) {
7045	MIB.addUse(RegNo: Gep->getOperand(i: `2`).getReg());
7046	},
7047	[=](MachineInstrBuilder &MIB) {
7048	// Need to add both immediates here to make sure that they are both
7049	// added to the instruction.
7050	MIB.addImm(Val: `0`);
7051	MIB.addImm(Val: `0`);
7052	}}};
7053	}
7054
7055	/// This is intended to be equivalent to selectAddrModeXRO in
7056	/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7057	InstructionSelector::ComplexRendererFns
7058	AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7059	unsigned SizeInBytes) const {
7060	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7061	if (!Root.isReg())
7062	return std::nullopt;
7063	MachineInstr *PtrAdd =
7064	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7065	if (!PtrAdd)
7066	return std::nullopt;
7067
7068	// Check for an immediates which cannot be encoded in the [base + imm]
7069	// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7070	// end up with code like:
7071	//
7072	// mov x0, wide
7073	// add x1 base, x0
7074	// ldr x2, [x1, x0]
7075	//
7076	// In this situation, we can use the [base, xreg] addressing mode to save an
7077	// add/sub:
7078	//
7079	// mov x0, wide
7080	// ldr x2, [base, x0]
7081	auto ValAndVReg =
7082	getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7083	if (ValAndVReg) {
7084	unsigned Scale = Log2_32(Value: SizeInBytes);
7085	int64_t ImmOff = ValAndVReg ->Value.getSExtValue();
7086
7087	// Skip immediates that can be selected in the load/store addresing
7088	// mode.
7089	if (ImmOff % SizeInBytes == `0` && ImmOff >= `0` &&
7090	ImmOff < (`0x1000` << Scale))
7091	return std::nullopt;
7092
7093	// Helper lambda to decide whether or not it is preferable to emit an add.
7094	auto isPreferredADD = [](int64_t ImmOff) {
7095	// Constants in [0x0, 0xfff] can be encoded in an add.
7096	if ((ImmOff & `0xfffffffffffff000LL`) == `0x0LL`)
7097	return true;
7098
7099	// Can it be encoded in an add lsl #12?
7100	if ((ImmOff & `0xffffffffff000fffLL`) != `0x0LL`)
7101	return false;
7102
7103	// It can be encoded in an add lsl #12, but we may not want to. If it is
7104	// possible to select this as a single movz, then prefer that. A single
7105	// movz is faster than an add with a shift.
7106	return (ImmOff & `0xffffffffff00ffffLL`) != `0x0LL` &&
7107	(ImmOff & `0xffffffffffff0fffLL`) != `0x0LL`;
7108	};
7109
7110	// If the immediate can be encoded in a single add/sub, then bail out.
7111	if (isPreferredADD (ImmOff) \|\| isPreferredADD (-ImmOff))
7112	return std::nullopt;
7113	}
7114
7115	// Try to fold shifts into the addressing mode.
7116	auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7117	if (AddrModeFns)
7118	return AddrModeFns;
7119
7120	// If that doesn't work, see if it's possible to fold in registers from
7121	// a GEP.
7122	return selectAddrModeRegisterOffset(Root);
7123	}
7124
7125	/// This is used for computing addresses like this:
7126	///
7127	/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7128	///
7129	/// Where we have a 64-bit base register, a 32-bit offset register, and an
7130	/// extend (which may or may not be signed).
7131	InstructionSelector::ComplexRendererFns
7132	AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7133	unsigned SizeInBytes) const {
7134	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7135
7136	MachineInstr *PtrAdd =
7137	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7138	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI))
7139	return std::nullopt;
7140
7141	MachineOperand &LHS = PtrAdd->getOperand(i: `1`);
7142	MachineOperand &RHS = PtrAdd->getOperand(i: `2`);
7143	MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
7144
7145	// The first case is the same as selectAddrModeXRO, except we need an extend.
7146	// In this case, we try to find a shift and extend, and fold them into the
7147	// addressing mode.
7148	//
7149	// E.g.
7150	//
7151	// off_reg = G_Z/S/ANYEXT ext_reg
7152	// val = G_CONSTANT LegalShiftVal
7153	// shift = G_SHL off_reg val
7154	// ptr = G_PTR_ADD base_reg shift
7155	// x = G_LOAD ptr
7156	//
7157	// In this case we can get a load like this:
7158	//
7159	// ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7160	auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: `0`),
7161	SizeInBytes, /WantsExt=/true);
7162	if (ExtendedShl)
7163	return ExtendedShl;
7164
7165	// There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7166	//
7167	// e.g.
7168	// ldr something, [base_reg, ext_reg, sxtw]
7169	if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI))
7170	return std::nullopt;
7171
7172	// Check if this is an extend. We'll get an extend type if it is.
7173	AArch64_AM::ShiftExtendType Ext =
7174	getExtendTypeForInst(MI&: OffsetInst, MRI, /IsLoadStore=/*true);
7175	if (Ext == AArch64_AM::InvalidShiftExtend)
7176	return std::nullopt;
7177
7178	// Need a 32-bit wide register.
7179	MachineIRBuilder MIB(*PtrAdd);
7180	Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(`1`).getReg(),
7181	AArch64::GPR32RegClass, MIB);
7182	unsigned SignExtend = Ext == AArch64_AM::SXTW;
7183
7184	// Base is LHS, offset is ExtReg.
7185	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); },
7186	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7187	[=](MachineInstrBuilder &MIB) {
7188	MIB.addImm(Val: SignExtend);
7189	MIB.addImm(Val: `0`);
7190	}}};
7191	}
7192
7193	/// Select a "register plus unscaled signed 9-bit immediate" address. This
7194	/// should only match when there is an offset that is not valid for a scaled
7195	/// immediate addressing mode. The "Size" argument is the size in bytes of the
7196	/// memory reference, which is needed here to know what is valid for a scaled
7197	/// immediate.
7198	InstructionSelector::ComplexRendererFns
7199	AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7200	unsigned Size) const {
7201	MachineRegisterInfo &MRI =
7202	Root.getParent()->getParent()->getParent()->getRegInfo();
7203
7204	if (!Root.isReg())
7205	return std::nullopt;
7206
7207	if (!isBaseWithConstantOffset(Root, MRI))
7208	return std::nullopt;
7209
7210	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7211
7212	MachineOperand &OffImm = RootDef->getOperand(i: `2`);
7213	if (!OffImm.isReg())
7214	return std::nullopt;
7215	MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg());
7216	if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7217	return std::nullopt;
7218	int64_t RHSC;
7219	MachineOperand &RHSOp1 = RHS->getOperand(i: `1`);
7220	if (!RHSOp1.isCImm() \|\| RHSOp1.getCImm()->getBitWidth() > `64`)
7221	return std::nullopt;
7222	RHSC = RHSOp1.getCImm()->getSExtValue();
7223
7224	if (RHSC >= -`256` && RHSC < `256`) {
7225	MachineOperand &Base = RootDef->getOperand(i: `1`);
7226	return {{
7227	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); },
7228	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); },
7229	}};
7230	}
7231	return std::nullopt;
7232	}
7233
7234	InstructionSelector::ComplexRendererFns
7235	AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7236	unsigned Size,
7237	MachineRegisterInfo &MRI) const {
7238	if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7239	return std::nullopt;
7240	MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: `1`).getReg());
7241	if (Adrp.getOpcode() != AArch64::ADRP)
7242	return std::nullopt;
7243
7244	// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7245	auto Offset = Adrp.getOperand(i: `1`).getOffset();
7246	if (Offset % Size != `0`)
7247	return std::nullopt;
7248
7249	auto GV = Adrp.getOperand(i: `1`).getGlobal();
7250	if (GV->isThreadLocal())
7251	return std::nullopt;
7252
7253	auto &MF = *RootDef.getParent()->getParent();
7254	if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size)
7255	return std::nullopt;
7256
7257	unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget());
7258	MachineIRBuilder MIRBuilder(RootDef);
7259	Register AdrpReg = Adrp.getOperand(i: `0`).getReg();
7260	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); },
7261	[=](MachineInstrBuilder &MIB) {
7262	MIB.addGlobalAddress(GV, Offset,
7263	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \|
7264	AArch64II::MO_NC);
7265	}}};
7266	}
7267
7268	/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7269	/// "Size" argument is the size in bytes of the memory reference, which
7270	/// determines the scale.
7271	InstructionSelector::ComplexRendererFns
7272	AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7273	unsigned Size) const {
7274	MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7275	MachineRegisterInfo &MRI = MF.getRegInfo();
7276
7277	if (!Root.isReg())
7278	return std::nullopt;
7279
7280	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7281	if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7282	return {{
7283	[=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: `1`)); },
7284	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7285	}};
7286	}
7287
7288	CodeModel::Model CM = MF.getTarget().getCodeModel();
7289	// Check if we can fold in the ADD of small code model ADRP + ADD address.
7290	if (CM == CodeModel::Small) {
7291	auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI);
7292	if (OpFns)
7293	return OpFns;
7294	}
7295
7296	if (isBaseWithConstantOffset(Root, MRI)) {
7297	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7298	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7299	MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg());
7300	MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg());
7301
7302	int64_t RHSC = (int64_t)RHSDef->getOperand(i: `1`).getCImm()->getZExtValue();
7303	unsigned Scale = Log2_32(Value: Size);
7304	if ((RHSC & (Size - `1`)) == `0` && RHSC >= `0` && RHSC < (`0x1000` << Scale)) {
7305	if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7306	return {{
7307	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: `1`)); },
7308	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7309	}};
7310
7311	return {{
7312	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); },
7313	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7314	}};
7315	}
7316	}
7317
7318	// Before falling back to our general case, check if the unscaled
7319	// instructions can handle this. If so, that's preferable.
7320	if (selectAddrModeUnscaled(Root, Size))
7321	return std::nullopt;
7322
7323	return {{
7324	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); },
7325	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7326	}};
7327	}
7328
7329	/// Given a shift instruction, return the correct shift type for that
7330	/// instruction.
7331	static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
7332	switch (MI.getOpcode()) {
7333	default:
7334	return AArch64_AM::InvalidShiftExtend;
7335	case TargetOpcode::G_SHL:
7336	return AArch64_AM::LSL;
7337	case TargetOpcode::G_LSHR:
7338	return AArch64_AM::LSR;
7339	case TargetOpcode::G_ASHR:
7340	return AArch64_AM::ASR;
7341	case TargetOpcode::G_ROTR:
7342	return AArch64_AM::ROR;
7343	}
7344	}
7345
7346	/// Select a "shifted register" operand. If the value is not shifted, set the
7347	/// shift operand to a default value of "lsl 0".
7348	InstructionSelector::ComplexRendererFns
7349	AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7350	bool AllowROR) const {
7351	if (!Root.isReg())
7352	return std::nullopt;
7353	MachineRegisterInfo &MRI =
7354	Root.getParent()->getParent()->getParent()->getRegInfo();
7355
7356	// Check if the operand is defined by an instruction which corresponds to
7357	// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7358	MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg());
7359	AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst);
7360	if (ShType == AArch64_AM::InvalidShiftExtend)
7361	return std::nullopt;
7362	if (ShType == AArch64_AM::ROR && !AllowROR)
7363	return std::nullopt;
7364	if (!isWorthFoldingIntoExtendedReg(MI&: *ShiftInst, MRI))
7365	return std::nullopt;
7366
7367	// Need an immediate on the RHS.
7368	MachineOperand &ShiftRHS = ShiftInst->getOperand(i: `2`);
7369	auto Immed = getImmedFromMO(Root: ShiftRHS);
7370	if (!Immed)
7371	return std::nullopt;
7372
7373	// We have something that we can fold. Fold in the shift's LHS and RHS into
7374	// the instruction.
7375	MachineOperand &ShiftLHS = ShiftInst->getOperand(i: `1`);
7376	Register ShiftReg = ShiftLHS.getReg();
7377
7378	unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits();
7379	unsigned Val = *Immed & (NumBits - `1`);
7380	unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
7381
7382	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); },
7383	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}};
7384	}
7385
7386	AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7387	MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7388	unsigned Opc = MI.getOpcode();
7389
7390	// Handle explicit extend instructions first.
7391	if (Opc == TargetOpcode::G_SEXT \|\| Opc == TargetOpcode::G_SEXT_INREG) {
7392	unsigned Size;
7393	if (Opc == TargetOpcode::G_SEXT)
7394	Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7395	else
7396	Size = MI.getOperand(i: `2`).getImm();
7397	assert(Size != `64` && "Extend from 64 bits?");
7398	switch (Size) {
7399	case `8`:
7400	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7401	case `16`:
7402	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7403	case `32`:
7404	return AArch64_AM::SXTW;
7405	default:
7406	return AArch64_AM::InvalidShiftExtend;
7407	}
7408	}
7409
7410	if (Opc == TargetOpcode::G_ZEXT \|\| Opc == TargetOpcode::G_ANYEXT) {
7411	unsigned Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7412	assert(Size != `64` && "Extend from 64 bits?");
7413	switch (Size) {
7414	case `8`:
7415	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7416	case `16`:
7417	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7418	case `32`:
7419	return AArch64_AM::UXTW;
7420	default:
7421	return AArch64_AM::InvalidShiftExtend;
7422	}
7423	}
7424
7425	// Don't have an explicit extend. Try to handle a G_AND with a constant mask
7426	// on the RHS.
7427	if (Opc != TargetOpcode::G_AND)
7428	return AArch64_AM::InvalidShiftExtend;
7429
7430	std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: `2`));
7431	if (!MaybeAndMask)
7432	return AArch64_AM::InvalidShiftExtend;
7433	uint64_t AndMask = *MaybeAndMask;
7434	switch (AndMask) {
7435	default:
7436	return AArch64_AM::InvalidShiftExtend;
7437	case `0xFF`:
7438	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7439	case `0xFFFF`:
7440	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7441	case `0xFFFFFFFF`:
7442	return AArch64_AM::UXTW;
7443	}
7444	}
7445
7446	Register AArch64InstructionSelector::moveScalarRegClass(
7447	Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7448	MachineRegisterInfo &MRI = *MIB.getMRI();
7449	auto Ty = MRI.getType(Reg);
7450	assert(!Ty.isVector() && "Expected scalars only!");
7451	if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7452	return Reg;
7453
7454	// Create a copy and immediately select it.
7455	// FIXME: We should have an emitCopy function?
7456	auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg});
7457	selectCopy(*Copy, TII, MRI, TRI, RBI);
7458	return Copy.getReg(Idx: `0`);
7459	}
7460
7461	/// Select an "extended register" operand. This operand folds in an extend
7462	/// followed by an optional left shift.
7463	InstructionSelector::ComplexRendererFns
7464	AArch64InstructionSelector::selectArithExtendedRegister(
7465	MachineOperand &Root) const {
7466	if (!Root.isReg())
7467	return std::nullopt;
7468	MachineRegisterInfo &MRI =
7469	Root.getParent()->getParent()->getParent()->getRegInfo();
7470
7471	uint64_t ShiftVal = `0`;
7472	Register ExtReg;
7473	AArch64_AM::ShiftExtendType Ext;
7474	MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7475	if (!RootDef)
7476	return std::nullopt;
7477
7478	if (!isWorthFoldingIntoExtendedReg(MI&: *RootDef, MRI))
7479	return std::nullopt;
7480
7481	// Check if we can fold a shift and an extend.
7482	if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7483	// Look for a constant on the RHS of the shift.
7484	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7485	std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS);
7486	if (!MaybeShiftVal)
7487	return std::nullopt;
7488	ShiftVal = *MaybeShiftVal;
7489	if (ShiftVal > `4`)
7490	return std::nullopt;
7491	// Look for a valid extend instruction on the LHS of the shift.
7492	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7493	MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
7494	if (!ExtDef)
7495	return std::nullopt;
7496	Ext = getExtendTypeForInst(MI&: *ExtDef, MRI);
7497	if (Ext == AArch64_AM::InvalidShiftExtend)
7498	return std::nullopt;
7499	ExtReg = ExtDef->getOperand(i: `1`).getReg();
7500	} else {
7501	// Didn't get a shift. Try just folding an extend.
7502	Ext = getExtendTypeForInst(MI&: *RootDef, MRI);
7503	if (Ext == AArch64_AM::InvalidShiftExtend)
7504	return std::nullopt;
7505	ExtReg = RootDef->getOperand(i: `1`).getReg();
7506
7507	// If we have a 32 bit instruction which zeroes out the high half of a
7508	// register, we get an implicit zero extend for free. Check if we have one.
7509	// FIXME: We actually emit the extend right now even though we don't have
7510	// to.
7511	if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == `32`) {
7512	MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg);
7513	if (isDef32(MI: *ExtInst))
7514	return std::nullopt;
7515	}
7516	}
7517
7518	// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7519	// copy.
7520	MachineIRBuilder MIB(*RootDef);
7521	ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7522
7523	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7524	[=](MachineInstrBuilder &MIB) {
7525	MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal));
7526	}}};
7527	}
7528
7529	InstructionSelector::ComplexRendererFns
7530	AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7531	if (!Root.isReg())
7532	return std::nullopt;
7533	MachineRegisterInfo &MRI =
7534	Root.getParent()->getParent()->getParent()->getRegInfo();
7535
7536	auto Extract = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI);
7537	while (Extract && Extract ->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7538	STI.isLittleEndian())
7539	Extract =
7540	getDefSrcRegIgnoringCopies(Reg: Extract ->MI->getOperand(i: `1`).getReg(), MRI);
7541	if (!Extract)
7542	return std::nullopt;
7543
7544	if (Extract ->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7545	if (Extract ->Reg == Extract ->MI->getOperand(i: `1`).getReg()) {
7546	Register ExtReg = Extract ->MI->getOperand(i: `2`).getReg();
7547	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7548	}
7549	}
7550	if (Extract ->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7551	LLT SrcTy = MRI.getType(Reg: Extract ->MI->getOperand(i: `1`).getReg());
7552	auto LaneIdx = getIConstantVRegValWithLookThrough(
7553	VReg: Extract ->MI->getOperand(i: `2`).getReg(), MRI);
7554	if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) &&
7555	LaneIdx ->Value.getSExtValue() == `1`) {
7556	Register ExtReg = Extract ->MI->getOperand(i: `1`).getReg();
7557	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7558	}
7559	}
7560
7561	return std::nullopt;
7562	}
7563
7564	void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7565	const MachineInstr &MI,
7566	int OpIdx) const {
7567	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7568	assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7569	"Expected G_CONSTANT");
7570	std::optional<int64_t> CstVal =
7571	getIConstantVRegSExtVal(VReg: MI.getOperand(i: `0`).getReg(), MRI);
7572	assert(CstVal && "Expected constant value");
7573	MIB.addImm(Val: *CstVal);
7574	}
7575
7576	void AArch64InstructionSelector::renderLogicalImm32(
7577	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7578	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7579	"Expected G_CONSTANT");
7580	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
7581	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `32`);
7582	MIB.addImm(Val: Enc);
7583	}
7584
7585	void AArch64InstructionSelector::renderLogicalImm64(
7586	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7587	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7588	"Expected G_CONSTANT");
7589	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
7590	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `64`);
7591	MIB.addImm(Val: Enc);
7592	}
7593
7594	void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7595	const MachineInstr &MI,
7596	int OpIdx) const {
7597	assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == `0` &&
7598	"Expected G_UBSANTRAP");
7599	MIB.addImm(Val: MI.getOperand(i: `0`).getImm() \| (`'U'` << `8`));
7600	}
7601
7602	void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7603	const MachineInstr &MI,
7604	int OpIdx) const {
7605	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7606	"Expected G_FCONSTANT");
7607	MIB.addImm(
7608	Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7609	}
7610
7611	void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7612	const MachineInstr &MI,
7613	int OpIdx) const {
7614	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7615	"Expected G_FCONSTANT");
7616	MIB.addImm(
7617	Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7618	}
7619
7620	void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7621	const MachineInstr &MI,
7622	int OpIdx) const {
7623	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7624	"Expected G_FCONSTANT");
7625	MIB.addImm(
7626	Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7627	}
7628
7629	void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7630	MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7631	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7632	"Expected G_FCONSTANT");
7633	MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: `1`)
7634	.getFPImm()
7635	->getValueAPF()
7636	.bitcastToAPInt()
7637	.getZExtValue()));
7638	}
7639
7640	bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7641	const MachineInstr &MI, unsigned NumBytes) const {
7642	if (!MI.mayLoadOrStore())
7643	return false;
7644	assert(MI.hasOneMemOperand() &&
7645	"Expected load/store to have only one mem op!");
7646	return (*MI.memoperands_begin())->getSize() == NumBytes;
7647	}
7648
7649	bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7650	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7651	if (MRI.getType(Reg: MI.getOperand(i: `0`).getReg()).getSizeInBits() != `32`)
7652	return false;
7653
7654	// Only return true if we know the operation will zero-out the high half of
7655	// the 64-bit register. Truncates can be subregister copies, which don't
7656	// zero out the high bits. Copies and other copy-like instructions can be
7657	// fed by truncates, or could be lowered as subregister copies.
7658	switch (MI.getOpcode()) {
7659	default:
7660	return true;
7661	case TargetOpcode::COPY:
7662	case TargetOpcode::G_BITCAST:
7663	case TargetOpcode::G_TRUNC:
7664	case TargetOpcode::G_PHI:
7665	return false;
7666	}
7667	}
7668
7669
7670	// Perform fixups on the given PHI instruction's operands to force them all
7671	// to be the same as the destination regbank.
7672	static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
7673	const AArch64RegisterBankInfo &RBI) {
7674	assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7675	Register DstReg = MI.getOperand(i: `0`).getReg();
7676	const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg);
7677	assert(DstRB && "Expected PHI dst to have regbank assigned");
7678	MachineIRBuilder MIB(MI);
7679
7680	// Go through each operand and ensure it has the same regbank.
7681	for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
7682	if (!MO.isReg())
7683	continue;
7684	Register OpReg = MO.getReg();
7685	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg);
7686	if (RB != DstRB) {
7687	// Insert a cross-bank copy.
7688	auto *OpDef = MRI.getVRegDef(Reg: OpReg);
7689	const LLT &Ty = MRI.getType(Reg: OpReg);
7690	MachineBasicBlock &OpDefBB = *OpDef->getParent();
7691
7692	// Any instruction we insert must appear after all PHIs in the block
7693	// for the block to be valid MIR.
7694	MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator());
7695	if (InsertPt != OpDefBB.end() && InsertPt ->isPHI())
7696	InsertPt = OpDefBB.getFirstNonPHI();
7697	MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt);
7698	auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg);
7699	MRI.setRegBank(Reg: Copy.getReg(Idx: `0`), RegBank: *DstRB);
7700	MO.setReg(Copy.getReg(Idx: `0`));
7701	}
7702	}
7703	}
7704
7705	void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7706	// We're looking for PHIs, build a list so we don't invalidate iterators.
7707	MachineRegisterInfo &MRI = MF.getRegInfo();
7708	SmallVector<MachineInstr *, `32`> Phis;
7709	for (auto &BB : MF) {
7710	for (auto &MI : BB) {
7711	if (MI.getOpcode() == TargetOpcode::G_PHI)
7712	Phis.emplace_back(Args: &MI);
7713	}
7714	}
7715
7716	for (auto *MI : Phis) {
7717	// We need to do some work here if the operand types are < 16 bit and they
7718	// are split across fpr/gpr banks. Since all types <32b on gpr
7719	// end up being assigned gpr32 regclasses, we can end up with PHIs here
7720	// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7721	// be selecting heterogenous regbanks for operands if possible, but we
7722	// still need to be able to deal with it here.
7723	//
7724	// To fix this, if we have a gpr-bank operand < 32b in size and at least
7725	// one other operand is on the fpr bank, then we add cross-bank copies
7726	// to homogenize the operand banks. For simplicity the bank that we choose
7727	// to settle on is whatever bank the def operand has. For example:
7728	//
7729	// %endbb:
7730	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7731	// =>
7732	// %bb2:
7733	// ...
7734	// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7735	// ...
7736	// %endbb:
7737	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7738	bool HasGPROp = false, HasFPROp = false;
7739	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
7740	if (!MO.isReg())
7741	continue;
7742	const LLT &Ty = MRI.getType(Reg: MO.getReg());
7743	if (!Ty.isValid() \|\| !Ty.isScalar())
7744	break;
7745	if (Ty.getSizeInBits() >= `32`)
7746	break;
7747	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg());
7748	// If for some reason we don't have a regbank yet. Don't try anything.
7749	if (!RB)
7750	break;
7751
7752	if (RB->getID() == AArch64::GPRRegBankID)
7753	HasGPROp = true;
7754	else
7755	HasFPROp = true;
7756	}
7757	// We have heterogenous regbanks, need to fixup.
7758	if (HasGPROp && HasFPROp)
7759	fixupPHIOpBanks(MI&: *MI, MRI, RBI);
7760	}
7761	}
7762
7763	namespace llvm {
7764	InstructionSelector *
7765	createAArch64InstructionSelector(const AArch64TargetMachine &TM,
7766	AArch64Subtarget &Subtarget,
7767	AArch64RegisterBankInfo &RBI) {
7768	return new AArch64InstructionSelector(TM, Subtarget, RBI);
7769	}
7770	}
7771

source code of llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp