1 | //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the InstructionSelector class for |
10 | /// AArch64. |
11 | /// \todo This should be generated by TableGen. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AArch64GlobalISelUtils.h" |
15 | #include "AArch64InstrInfo.h" |
16 | #include "AArch64MachineFunctionInfo.h" |
17 | #include "AArch64RegisterBankInfo.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "AArch64TargetMachine.h" |
21 | #include "MCTargetDesc/AArch64AddressingModes.h" |
22 | #include "MCTargetDesc/AArch64MCTargetDesc.h" |
23 | #include "llvm/BinaryFormat/Dwarf.h" |
24 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
25 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
26 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
27 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
28 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
29 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
30 | #include "llvm/CodeGen/MachineBasicBlock.h" |
31 | #include "llvm/CodeGen/MachineConstantPool.h" |
32 | #include "llvm/CodeGen/MachineFrameInfo.h" |
33 | #include "llvm/CodeGen/MachineFunction.h" |
34 | #include "llvm/CodeGen/MachineInstr.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | #include "llvm/CodeGen/MachineOperand.h" |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
39 | #include "llvm/CodeGen/TargetOpcodes.h" |
40 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
41 | #include "llvm/IR/Constants.h" |
42 | #include "llvm/IR/DerivedTypes.h" |
43 | #include "llvm/IR/Instructions.h" |
44 | #include "llvm/IR/IntrinsicsAArch64.h" |
45 | #include "llvm/IR/PatternMatch.h" |
46 | #include "llvm/IR/Type.h" |
47 | #include "llvm/Pass.h" |
48 | #include "llvm/Support/Debug.h" |
49 | #include "llvm/Support/raw_ostream.h" |
50 | #include <optional> |
51 | |
52 | #define DEBUG_TYPE "aarch64-isel" |
53 | |
54 | using namespace llvm; |
55 | using namespace MIPatternMatch; |
56 | using namespace AArch64GISelUtils; |
57 | |
58 | namespace llvm { |
59 | class BlockFrequencyInfo; |
60 | class ProfileSummaryInfo; |
61 | } |
62 | |
63 | namespace { |
64 | |
65 | #define GET_GLOBALISEL_PREDICATE_BITSET |
66 | #include "AArch64GenGlobalISel.inc" |
67 | #undef GET_GLOBALISEL_PREDICATE_BITSET |
68 | |
69 | |
70 | class AArch64InstructionSelector : public InstructionSelector { |
71 | public: |
72 | AArch64InstructionSelector(const AArch64TargetMachine &TM, |
73 | const AArch64Subtarget &STI, |
74 | const AArch64RegisterBankInfo &RBI); |
75 | |
76 | bool select(MachineInstr &I) override; |
77 | static const char *getName() { return DEBUG_TYPE; } |
78 | |
79 | void setupMF(MachineFunction &MF, GISelKnownBits *KB, |
80 | CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, |
81 | BlockFrequencyInfo *BFI) override { |
82 | InstructionSelector::setupMF(mf&: MF, kb: KB, covinfo: CoverageInfo, psi: PSI, bfi: BFI); |
83 | MIB.setMF(MF); |
84 | |
85 | // hasFnAttribute() is expensive to call on every BRCOND selection, so |
86 | // cache it here for each run of the selector. |
87 | ProduceNonFlagSettingCondBr = |
88 | !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); |
89 | MFReturnAddr = Register(); |
90 | |
91 | processPHIs(MF); |
92 | } |
93 | |
94 | private: |
95 | /// tblgen-erated 'select' implementation, used as the initial selector for |
96 | /// the patterns that don't require complex C++. |
97 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; |
98 | |
99 | // A lowering phase that runs before any selection attempts. |
100 | // Returns true if the instruction was modified. |
101 | bool preISelLower(MachineInstr &I); |
102 | |
103 | // An early selection function that runs before the selectImpl() call. |
104 | bool earlySelect(MachineInstr &I); |
105 | |
106 | /// Save state that is shared between select calls, call select on \p I and |
107 | /// then restore the saved state. This can be used to recursively call select |
108 | /// within a select call. |
109 | bool selectAndRestoreState(MachineInstr &I); |
110 | |
111 | // Do some preprocessing of G_PHIs before we begin selection. |
112 | void processPHIs(MachineFunction &MF); |
113 | |
114 | bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
115 | |
116 | /// Eliminate same-sized cross-bank copies into stores before selectImpl(). |
117 | bool contractCrossBankCopyIntoStore(MachineInstr &I, |
118 | MachineRegisterInfo &MRI); |
119 | |
120 | bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); |
121 | |
122 | bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, |
123 | MachineRegisterInfo &MRI) const; |
124 | bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, |
125 | MachineRegisterInfo &MRI) const; |
126 | |
127 | ///@{ |
128 | /// Helper functions for selectCompareBranch. |
129 | bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, |
130 | MachineIRBuilder &MIB) const; |
131 | bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
132 | MachineIRBuilder &MIB) const; |
133 | bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
134 | MachineIRBuilder &MIB) const; |
135 | bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, |
136 | MachineBasicBlock *DstMBB, |
137 | MachineIRBuilder &MIB) const; |
138 | ///@} |
139 | |
140 | bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, |
141 | MachineRegisterInfo &MRI); |
142 | |
143 | bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); |
144 | bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
145 | |
146 | // Helper to generate an equivalent of scalar_to_vector into a new register, |
147 | // returned via 'Dst'. |
148 | MachineInstr *emitScalarToVector(unsigned EltSize, |
149 | const TargetRegisterClass *DstRC, |
150 | Register Scalar, |
151 | MachineIRBuilder &MIRBuilder) const; |
152 | /// Helper to narrow vector that was widened by emitScalarToVector. |
153 | /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit |
154 | /// vector, correspondingly. |
155 | MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg, |
156 | MachineIRBuilder &MIRBuilder, |
157 | MachineRegisterInfo &MRI) const; |
158 | |
159 | /// Emit a lane insert into \p DstReg, or a new vector register if |
160 | /// std::nullopt is provided. |
161 | /// |
162 | /// The lane inserted into is defined by \p LaneIdx. The vector source |
163 | /// register is given by \p SrcReg. The register containing the element is |
164 | /// given by \p EltReg. |
165 | MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg, |
166 | Register EltReg, unsigned LaneIdx, |
167 | const RegisterBank &RB, |
168 | MachineIRBuilder &MIRBuilder) const; |
169 | |
170 | /// Emit a sequence of instructions representing a constant \p CV for a |
171 | /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) |
172 | /// |
173 | /// \returns the last instruction in the sequence on success, and nullptr |
174 | /// otherwise. |
175 | MachineInstr *emitConstantVector(Register Dst, Constant *CV, |
176 | MachineIRBuilder &MIRBuilder, |
177 | MachineRegisterInfo &MRI); |
178 | |
179 | MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits, |
180 | MachineIRBuilder &MIRBuilder); |
181 | |
182 | MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits, |
183 | MachineIRBuilder &MIRBuilder, bool Inv); |
184 | |
185 | MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits, |
186 | MachineIRBuilder &MIRBuilder, bool Inv); |
187 | MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits, |
188 | MachineIRBuilder &MIRBuilder); |
189 | MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits, |
190 | MachineIRBuilder &MIRBuilder, bool Inv); |
191 | MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits, |
192 | MachineIRBuilder &MIRBuilder); |
193 | |
194 | bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, |
195 | MachineRegisterInfo &MRI); |
196 | /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a |
197 | /// SUBREG_TO_REG. |
198 | bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); |
199 | bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); |
200 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
201 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
202 | |
203 | bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); |
204 | bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); |
205 | bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); |
206 | bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); |
207 | |
208 | /// Helper function to select vector load intrinsics like |
209 | /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. |
210 | /// \p Opc is the opcode that the selected instruction should use. |
211 | /// \p NumVecs is the number of vector destinations for the instruction. |
212 | /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. |
213 | bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, |
214 | MachineInstr &I); |
215 | bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs, |
216 | MachineInstr &I); |
217 | void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs, |
218 | unsigned Opc); |
219 | bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs, |
220 | unsigned Opc); |
221 | bool selectIntrinsicWithSideEffects(MachineInstr &I, |
222 | MachineRegisterInfo &MRI); |
223 | bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); |
224 | bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); |
225 | bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); |
226 | bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); |
227 | bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); |
228 | bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); |
229 | bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); |
230 | bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); |
231 | |
232 | bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
233 | bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
234 | bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI); |
235 | |
236 | unsigned emitConstantPoolEntry(const Constant *CPVal, |
237 | MachineFunction &MF) const; |
238 | MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, |
239 | MachineIRBuilder &MIRBuilder) const; |
240 | |
241 | // Emit a vector concat operation. |
242 | MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1, |
243 | Register Op2, |
244 | MachineIRBuilder &MIRBuilder) const; |
245 | |
246 | // Emit an integer compare between LHS and RHS, which checks for Predicate. |
247 | MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
248 | MachineOperand &Predicate, |
249 | MachineIRBuilder &MIRBuilder) const; |
250 | |
251 | /// Emit a floating point comparison between \p LHS and \p RHS. |
252 | /// \p Pred if given is the intended predicate to use. |
253 | MachineInstr * |
254 | emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
255 | std::optional<CmpInst::Predicate> = std::nullopt) const; |
256 | |
257 | MachineInstr * |
258 | emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
259 | std::initializer_list<llvm::SrcOp> SrcOps, |
260 | MachineIRBuilder &MIRBuilder, |
261 | const ComplexRendererFns &RenderFns = std::nullopt) const; |
262 | /// Helper function to emit an add or sub instruction. |
263 | /// |
264 | /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above |
265 | /// in a specific order. |
266 | /// |
267 | /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. |
268 | /// |
269 | /// \code |
270 | /// const std::array<std::array<unsigned, 2>, 4> Table { |
271 | /// {{AArch64::ADDXri, AArch64::ADDWri}, |
272 | /// {AArch64::ADDXrs, AArch64::ADDWrs}, |
273 | /// {AArch64::ADDXrr, AArch64::ADDWrr}, |
274 | /// {AArch64::SUBXri, AArch64::SUBWri}, |
275 | /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
276 | /// \endcode |
277 | /// |
278 | /// Each row in the table corresponds to a different addressing mode. Each |
279 | /// column corresponds to a different register size. |
280 | /// |
281 | /// \attention Rows must be structured as follows: |
282 | /// - Row 0: The ri opcode variants |
283 | /// - Row 1: The rs opcode variants |
284 | /// - Row 2: The rr opcode variants |
285 | /// - Row 3: The ri opcode variants for negative immediates |
286 | /// - Row 4: The rx opcode variants |
287 | /// |
288 | /// \attention Columns must be structured as follows: |
289 | /// - Column 0: The 64-bit opcode variants |
290 | /// - Column 1: The 32-bit opcode variants |
291 | /// |
292 | /// \p Dst is the destination register of the binop to emit. |
293 | /// \p LHS is the left-hand operand of the binop to emit. |
294 | /// \p RHS is the right-hand operand of the binop to emit. |
295 | MachineInstr *emitAddSub( |
296 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
297 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
298 | MachineIRBuilder &MIRBuilder) const; |
299 | MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, |
300 | MachineOperand &RHS, |
301 | MachineIRBuilder &MIRBuilder) const; |
302 | MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
303 | MachineIRBuilder &MIRBuilder) const; |
304 | MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
305 | MachineIRBuilder &MIRBuilder) const; |
306 | MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
307 | MachineIRBuilder &MIRBuilder) const; |
308 | MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
309 | MachineIRBuilder &MIRBuilder) const; |
310 | MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
311 | MachineIRBuilder &MIRBuilder) const; |
312 | MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, |
313 | MachineIRBuilder &MIRBuilder) const; |
314 | MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, |
315 | AArch64CC::CondCode CC, |
316 | MachineIRBuilder &MIRBuilder) const; |
317 | MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg, |
318 | const RegisterBank &DstRB, LLT ScalarTy, |
319 | Register VecReg, unsigned LaneIdx, |
320 | MachineIRBuilder &MIRBuilder) const; |
321 | MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, |
322 | AArch64CC::CondCode Pred, |
323 | MachineIRBuilder &MIRBuilder) const; |
324 | /// Emit a CSet for a FP compare. |
325 | /// |
326 | /// \p Dst is expected to be a 32-bit scalar register. |
327 | MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, |
328 | MachineIRBuilder &MIRBuilder) const; |
329 | |
330 | /// Emit an instruction that sets NZCV to the carry-in expected by \p I. |
331 | /// Might elide the instruction if the previous instruction already sets NZCV |
332 | /// correctly. |
333 | MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg); |
334 | |
335 | /// Emit the overflow op for \p Opcode. |
336 | /// |
337 | /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, |
338 | /// G_USUBO, etc. |
339 | std::pair<MachineInstr *, AArch64CC::CondCode> |
340 | emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, |
341 | MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; |
342 | |
343 | bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI); |
344 | |
345 | /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). |
346 | /// In some cases this is even possible with OR operations in the expression. |
347 | MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, |
348 | MachineIRBuilder &MIB) const; |
349 | MachineInstr *emitConditionalComparison(Register LHS, Register RHS, |
350 | CmpInst::Predicate CC, |
351 | AArch64CC::CondCode Predicate, |
352 | AArch64CC::CondCode OutCC, |
353 | MachineIRBuilder &MIB) const; |
354 | MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, |
355 | bool Negate, Register CCOp, |
356 | AArch64CC::CondCode Predicate, |
357 | MachineIRBuilder &MIB) const; |
358 | |
359 | /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. |
360 | /// \p IsNegative is true if the test should be "not zero". |
361 | /// This will also optimize the test bit instruction when possible. |
362 | MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, |
363 | MachineBasicBlock *DstMBB, |
364 | MachineIRBuilder &MIB) const; |
365 | |
366 | /// Emit a CB(N)Z instruction which branches to \p DestMBB. |
367 | MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, |
368 | MachineBasicBlock *DestMBB, |
369 | MachineIRBuilder &MIB) const; |
370 | |
371 | // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. |
372 | // We use these manually instead of using the importer since it doesn't |
373 | // support SDNodeXForm. |
374 | ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; |
375 | ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; |
376 | ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; |
377 | ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; |
378 | |
379 | ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; |
380 | ComplexRendererFns selectArithImmed(MachineOperand &Root) const; |
381 | ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; |
382 | |
383 | ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, |
384 | unsigned Size) const; |
385 | |
386 | ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { |
387 | return selectAddrModeUnscaled(Root, Size: 1); |
388 | } |
389 | ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { |
390 | return selectAddrModeUnscaled(Root, Size: 2); |
391 | } |
392 | ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { |
393 | return selectAddrModeUnscaled(Root, Size: 4); |
394 | } |
395 | ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { |
396 | return selectAddrModeUnscaled(Root, Size: 8); |
397 | } |
398 | ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { |
399 | return selectAddrModeUnscaled(Root, Size: 16); |
400 | } |
401 | |
402 | /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used |
403 | /// from complex pattern matchers like selectAddrModeIndexed(). |
404 | ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, |
405 | MachineRegisterInfo &MRI) const; |
406 | |
407 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, |
408 | unsigned Size) const; |
409 | template <int Width> |
410 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { |
411 | return selectAddrModeIndexed(Root, Size: Width / 8); |
412 | } |
413 | |
414 | bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, |
415 | const MachineRegisterInfo &MRI) const; |
416 | ComplexRendererFns |
417 | selectAddrModeShiftedExtendXReg(MachineOperand &Root, |
418 | unsigned SizeInBytes) const; |
419 | |
420 | /// Returns a \p ComplexRendererFns which contains a base, offset, and whether |
421 | /// or not a shift + extend should be folded into an addressing mode. Returns |
422 | /// None when this is not profitable or possible. |
423 | ComplexRendererFns |
424 | selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, |
425 | MachineOperand &Offset, unsigned SizeInBytes, |
426 | bool WantsExt) const; |
427 | ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; |
428 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, |
429 | unsigned SizeInBytes) const; |
430 | template <int Width> |
431 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { |
432 | return selectAddrModeXRO(Root, SizeInBytes: Width / 8); |
433 | } |
434 | |
435 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, |
436 | unsigned SizeInBytes) const; |
437 | template <int Width> |
438 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { |
439 | return selectAddrModeWRO(Root, SizeInBytes: Width / 8); |
440 | } |
441 | |
442 | ComplexRendererFns selectShiftedRegister(MachineOperand &Root, |
443 | bool AllowROR = false) const; |
444 | |
445 | ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { |
446 | return selectShiftedRegister(Root); |
447 | } |
448 | |
449 | ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { |
450 | return selectShiftedRegister(Root, AllowROR: true); |
451 | } |
452 | |
453 | /// Given an extend instruction, determine the correct shift-extend type for |
454 | /// that instruction. |
455 | /// |
456 | /// If the instruction is going to be used in a load or store, pass |
457 | /// \p IsLoadStore = true. |
458 | AArch64_AM::ShiftExtendType |
459 | getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, |
460 | bool IsLoadStore = false) const; |
461 | |
462 | /// Move \p Reg to \p RC if \p Reg is not already on \p RC. |
463 | /// |
464 | /// \returns Either \p Reg if no change was necessary, or the new register |
465 | /// created by moving \p Reg. |
466 | /// |
467 | /// Note: This uses emitCopy right now. |
468 | Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, |
469 | MachineIRBuilder &MIB) const; |
470 | |
471 | ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; |
472 | |
473 | ComplexRendererFns selectExtractHigh(MachineOperand &Root) const; |
474 | |
475 | void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
476 | int OpIdx = -1) const; |
477 | void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, |
478 | int OpIdx = -1) const; |
479 | void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, |
480 | int OpIdx = -1) const; |
481 | void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI, |
482 | int OpIdx) const; |
483 | void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, |
484 | int OpIdx = -1) const; |
485 | void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, |
486 | int OpIdx = -1) const; |
487 | void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, |
488 | int OpIdx = -1) const; |
489 | void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, |
490 | const MachineInstr &MI, |
491 | int OpIdx = -1) const; |
492 | |
493 | // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. |
494 | void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); |
495 | |
496 | // Optimization methods. |
497 | bool tryOptSelect(GSelect &Sel); |
498 | bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); |
499 | MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
500 | MachineOperand &Predicate, |
501 | MachineIRBuilder &MIRBuilder) const; |
502 | |
503 | /// Return true if \p MI is a load or store of \p NumBytes bytes. |
504 | bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; |
505 | |
506 | /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit |
507 | /// register zeroed out. In other words, the result of MI has been explicitly |
508 | /// zero extended. |
509 | bool isDef32(const MachineInstr &MI) const; |
510 | |
511 | const AArch64TargetMachine &TM; |
512 | const AArch64Subtarget &STI; |
513 | const AArch64InstrInfo &TII; |
514 | const AArch64RegisterInfo &TRI; |
515 | const AArch64RegisterBankInfo &RBI; |
516 | |
517 | bool ProduceNonFlagSettingCondBr = false; |
518 | |
519 | // Some cached values used during selection. |
520 | // We use LR as a live-in register, and we keep track of it here as it can be |
521 | // clobbered by calls. |
522 | Register MFReturnAddr; |
523 | |
524 | MachineIRBuilder MIB; |
525 | |
526 | #define GET_GLOBALISEL_PREDICATES_DECL |
527 | #include "AArch64GenGlobalISel.inc" |
528 | #undef GET_GLOBALISEL_PREDICATES_DECL |
529 | |
530 | // We declare the temporaries used by selectImpl() in the class to minimize the |
531 | // cost of constructing placeholder values. |
532 | #define GET_GLOBALISEL_TEMPORARIES_DECL |
533 | #include "AArch64GenGlobalISel.inc" |
534 | #undef GET_GLOBALISEL_TEMPORARIES_DECL |
535 | }; |
536 | |
537 | } // end anonymous namespace |
538 | |
539 | #define GET_GLOBALISEL_IMPL |
540 | #include "AArch64GenGlobalISel.inc" |
541 | #undef GET_GLOBALISEL_IMPL |
542 | |
543 | AArch64InstructionSelector::AArch64InstructionSelector( |
544 | const AArch64TargetMachine &TM, const AArch64Subtarget &STI, |
545 | const AArch64RegisterBankInfo &RBI) |
546 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), |
547 | RBI(RBI), |
548 | #define GET_GLOBALISEL_PREDICATES_INIT |
549 | #include "AArch64GenGlobalISel.inc" |
550 | #undef GET_GLOBALISEL_PREDICATES_INIT |
551 | #define GET_GLOBALISEL_TEMPORARIES_INIT |
552 | #include "AArch64GenGlobalISel.inc" |
553 | #undef GET_GLOBALISEL_TEMPORARIES_INIT |
554 | { |
555 | } |
556 | |
557 | // FIXME: This should be target-independent, inferred from the types declared |
558 | // for each class in the bank. |
559 | // |
560 | /// Given a register bank, and a type, return the smallest register class that |
561 | /// can represent that combination. |
562 | static const TargetRegisterClass * |
563 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, |
564 | bool GetAllRegSet = false) { |
565 | if (RB.getID() == AArch64::GPRRegBankID) { |
566 | if (Ty.getSizeInBits() <= 32) |
567 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
568 | : &AArch64::GPR32RegClass; |
569 | if (Ty.getSizeInBits() == 64) |
570 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
571 | : &AArch64::GPR64RegClass; |
572 | if (Ty.getSizeInBits() == 128) |
573 | return &AArch64::XSeqPairsClassRegClass; |
574 | return nullptr; |
575 | } |
576 | |
577 | if (RB.getID() == AArch64::FPRRegBankID) { |
578 | switch (Ty.getSizeInBits()) { |
579 | case 8: |
580 | return &AArch64::FPR8RegClass; |
581 | case 16: |
582 | return &AArch64::FPR16RegClass; |
583 | case 32: |
584 | return &AArch64::FPR32RegClass; |
585 | case 64: |
586 | return &AArch64::FPR64RegClass; |
587 | case 128: |
588 | return &AArch64::FPR128RegClass; |
589 | } |
590 | return nullptr; |
591 | } |
592 | |
593 | return nullptr; |
594 | } |
595 | |
596 | /// Given a register bank, and size in bits, return the smallest register class |
597 | /// that can represent that combination. |
598 | static const TargetRegisterClass * |
599 | getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, |
600 | bool GetAllRegSet = false) { |
601 | unsigned RegBankID = RB.getID(); |
602 | |
603 | if (RegBankID == AArch64::GPRRegBankID) { |
604 | if (SizeInBits <= 32) |
605 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
606 | : &AArch64::GPR32RegClass; |
607 | if (SizeInBits == 64) |
608 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
609 | : &AArch64::GPR64RegClass; |
610 | if (SizeInBits == 128) |
611 | return &AArch64::XSeqPairsClassRegClass; |
612 | } |
613 | |
614 | if (RegBankID == AArch64::FPRRegBankID) { |
615 | switch (SizeInBits) { |
616 | default: |
617 | return nullptr; |
618 | case 8: |
619 | return &AArch64::FPR8RegClass; |
620 | case 16: |
621 | return &AArch64::FPR16RegClass; |
622 | case 32: |
623 | return &AArch64::FPR32RegClass; |
624 | case 64: |
625 | return &AArch64::FPR64RegClass; |
626 | case 128: |
627 | return &AArch64::FPR128RegClass; |
628 | } |
629 | } |
630 | |
631 | return nullptr; |
632 | } |
633 | |
634 | /// Returns the correct subregister to use for a given register class. |
635 | static bool getSubRegForClass(const TargetRegisterClass *RC, |
636 | const TargetRegisterInfo &TRI, unsigned &SubReg) { |
637 | switch (TRI.getRegSizeInBits(RC: *RC)) { |
638 | case 8: |
639 | SubReg = AArch64::bsub; |
640 | break; |
641 | case 16: |
642 | SubReg = AArch64::hsub; |
643 | break; |
644 | case 32: |
645 | if (RC != &AArch64::FPR32RegClass) |
646 | SubReg = AArch64::sub_32; |
647 | else |
648 | SubReg = AArch64::ssub; |
649 | break; |
650 | case 64: |
651 | SubReg = AArch64::dsub; |
652 | break; |
653 | default: |
654 | LLVM_DEBUG( |
655 | dbgs() << "Couldn't find appropriate subregister for register class." ); |
656 | return false; |
657 | } |
658 | |
659 | return true; |
660 | } |
661 | |
662 | /// Returns the minimum size the given register bank can hold. |
663 | static unsigned getMinSizeForRegBank(const RegisterBank &RB) { |
664 | switch (RB.getID()) { |
665 | case AArch64::GPRRegBankID: |
666 | return 32; |
667 | case AArch64::FPRRegBankID: |
668 | return 8; |
669 | default: |
670 | llvm_unreachable("Tried to get minimum size for unknown register bank." ); |
671 | } |
672 | } |
673 | |
674 | /// Create a REG_SEQUENCE instruction using the registers in \p Regs. |
675 | /// Helper function for functions like createDTuple and createQTuple. |
676 | /// |
677 | /// \p RegClassIDs - The list of register class IDs available for some tuple of |
678 | /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is |
679 | /// expected to contain between 2 and 4 tuple classes. |
680 | /// |
681 | /// \p SubRegs - The list of subregister classes associated with each register |
682 | /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 |
683 | /// subregister class. The index of each subregister class is expected to |
684 | /// correspond with the index of each register class. |
685 | /// |
686 | /// \returns Either the destination register of REG_SEQUENCE instruction that |
687 | /// was created, or the 0th element of \p Regs if \p Regs contains a single |
688 | /// element. |
689 | static Register createTuple(ArrayRef<Register> Regs, |
690 | const unsigned RegClassIDs[], |
691 | const unsigned SubRegs[], MachineIRBuilder &MIB) { |
692 | unsigned NumRegs = Regs.size(); |
693 | if (NumRegs == 1) |
694 | return Regs[0]; |
695 | assert(NumRegs >= 2 && NumRegs <= 4 && |
696 | "Only support between two and 4 registers in a tuple!" ); |
697 | const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); |
698 | auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - 2]); |
699 | auto RegSequence = |
700 | MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {}); |
701 | for (unsigned I = 0, E = Regs.size(); I < E; ++I) { |
702 | RegSequence.addUse(RegNo: Regs[I]); |
703 | RegSequence.addImm(Val: SubRegs[I]); |
704 | } |
705 | return RegSequence.getReg(Idx: 0); |
706 | } |
707 | |
708 | /// Create a tuple of D-registers using the registers in \p Regs. |
709 | static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
710 | static const unsigned RegClassIDs[] = { |
711 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; |
712 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, |
713 | AArch64::dsub2, AArch64::dsub3}; |
714 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
715 | } |
716 | |
717 | /// Create a tuple of Q-registers using the registers in \p Regs. |
718 | static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
719 | static const unsigned RegClassIDs[] = { |
720 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; |
721 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, |
722 | AArch64::qsub2, AArch64::qsub3}; |
723 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
724 | } |
725 | |
726 | static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { |
727 | auto &MI = *Root.getParent(); |
728 | auto &MBB = *MI.getParent(); |
729 | auto &MF = *MBB.getParent(); |
730 | auto &MRI = MF.getRegInfo(); |
731 | uint64_t Immed; |
732 | if (Root.isImm()) |
733 | Immed = Root.getImm(); |
734 | else if (Root.isCImm()) |
735 | Immed = Root.getCImm()->getZExtValue(); |
736 | else if (Root.isReg()) { |
737 | auto ValAndVReg = |
738 | getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true); |
739 | if (!ValAndVReg) |
740 | return std::nullopt; |
741 | Immed = ValAndVReg->Value.getSExtValue(); |
742 | } else |
743 | return std::nullopt; |
744 | return Immed; |
745 | } |
746 | |
747 | /// Check whether \p I is a currently unsupported binary operation: |
748 | /// - it has an unsized type |
749 | /// - an operand is not a vreg |
750 | /// - all operands are not in the same bank |
751 | /// These are checks that should someday live in the verifier, but right now, |
752 | /// these are mostly limitations of the aarch64 selector. |
753 | static bool unsupportedBinOp(const MachineInstr &I, |
754 | const AArch64RegisterBankInfo &RBI, |
755 | const MachineRegisterInfo &MRI, |
756 | const AArch64RegisterInfo &TRI) { |
757 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
758 | if (!Ty.isValid()) { |
759 | LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n" ); |
760 | return true; |
761 | } |
762 | |
763 | const RegisterBank *PrevOpBank = nullptr; |
764 | for (auto &MO : I.operands()) { |
765 | // FIXME: Support non-register operands. |
766 | if (!MO.isReg()) { |
767 | LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n" ); |
768 | return true; |
769 | } |
770 | |
771 | // FIXME: Can generic operations have physical registers operands? If |
772 | // so, this will need to be taught about that, and we'll need to get the |
773 | // bank out of the minimal class for the register. |
774 | // Either way, this needs to be documented (and possibly verified). |
775 | if (!MO.getReg().isVirtual()) { |
776 | LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n" ); |
777 | return true; |
778 | } |
779 | |
780 | const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); |
781 | if (!OpBank) { |
782 | LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n" ); |
783 | return true; |
784 | } |
785 | |
786 | if (PrevOpBank && OpBank != PrevOpBank) { |
787 | LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n" ); |
788 | return true; |
789 | } |
790 | PrevOpBank = OpBank; |
791 | } |
792 | return false; |
793 | } |
794 | |
795 | /// Select the AArch64 opcode for the basic binary operation \p GenericOpc |
796 | /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID |
797 | /// and of size \p OpSize. |
798 | /// \returns \p GenericOpc if the combination is unsupported. |
799 | static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, |
800 | unsigned OpSize) { |
801 | switch (RegBankID) { |
802 | case AArch64::GPRRegBankID: |
803 | if (OpSize == 32) { |
804 | switch (GenericOpc) { |
805 | case TargetOpcode::G_SHL: |
806 | return AArch64::LSLVWr; |
807 | case TargetOpcode::G_LSHR: |
808 | return AArch64::LSRVWr; |
809 | case TargetOpcode::G_ASHR: |
810 | return AArch64::ASRVWr; |
811 | default: |
812 | return GenericOpc; |
813 | } |
814 | } else if (OpSize == 64) { |
815 | switch (GenericOpc) { |
816 | case TargetOpcode::G_PTR_ADD: |
817 | return AArch64::ADDXrr; |
818 | case TargetOpcode::G_SHL: |
819 | return AArch64::LSLVXr; |
820 | case TargetOpcode::G_LSHR: |
821 | return AArch64::LSRVXr; |
822 | case TargetOpcode::G_ASHR: |
823 | return AArch64::ASRVXr; |
824 | default: |
825 | return GenericOpc; |
826 | } |
827 | } |
828 | break; |
829 | case AArch64::FPRRegBankID: |
830 | switch (OpSize) { |
831 | case 32: |
832 | switch (GenericOpc) { |
833 | case TargetOpcode::G_FADD: |
834 | return AArch64::FADDSrr; |
835 | case TargetOpcode::G_FSUB: |
836 | return AArch64::FSUBSrr; |
837 | case TargetOpcode::G_FMUL: |
838 | return AArch64::FMULSrr; |
839 | case TargetOpcode::G_FDIV: |
840 | return AArch64::FDIVSrr; |
841 | default: |
842 | return GenericOpc; |
843 | } |
844 | case 64: |
845 | switch (GenericOpc) { |
846 | case TargetOpcode::G_FADD: |
847 | return AArch64::FADDDrr; |
848 | case TargetOpcode::G_FSUB: |
849 | return AArch64::FSUBDrr; |
850 | case TargetOpcode::G_FMUL: |
851 | return AArch64::FMULDrr; |
852 | case TargetOpcode::G_FDIV: |
853 | return AArch64::FDIVDrr; |
854 | case TargetOpcode::G_OR: |
855 | return AArch64::ORRv8i8; |
856 | default: |
857 | return GenericOpc; |
858 | } |
859 | } |
860 | break; |
861 | } |
862 | return GenericOpc; |
863 | } |
864 | |
865 | /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, |
866 | /// appropriate for the (value) register bank \p RegBankID and of memory access |
867 | /// size \p OpSize. This returns the variant with the base+unsigned-immediate |
868 | /// addressing mode (e.g., LDRXui). |
869 | /// \returns \p GenericOpc if the combination is unsupported. |
870 | static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, |
871 | unsigned OpSize) { |
872 | const bool isStore = GenericOpc == TargetOpcode::G_STORE; |
873 | switch (RegBankID) { |
874 | case AArch64::GPRRegBankID: |
875 | switch (OpSize) { |
876 | case 8: |
877 | return isStore ? AArch64::STRBBui : AArch64::LDRBBui; |
878 | case 16: |
879 | return isStore ? AArch64::STRHHui : AArch64::LDRHHui; |
880 | case 32: |
881 | return isStore ? AArch64::STRWui : AArch64::LDRWui; |
882 | case 64: |
883 | return isStore ? AArch64::STRXui : AArch64::LDRXui; |
884 | } |
885 | break; |
886 | case AArch64::FPRRegBankID: |
887 | switch (OpSize) { |
888 | case 8: |
889 | return isStore ? AArch64::STRBui : AArch64::LDRBui; |
890 | case 16: |
891 | return isStore ? AArch64::STRHui : AArch64::LDRHui; |
892 | case 32: |
893 | return isStore ? AArch64::STRSui : AArch64::LDRSui; |
894 | case 64: |
895 | return isStore ? AArch64::STRDui : AArch64::LDRDui; |
896 | case 128: |
897 | return isStore ? AArch64::STRQui : AArch64::LDRQui; |
898 | } |
899 | break; |
900 | } |
901 | return GenericOpc; |
902 | } |
903 | |
904 | /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg |
905 | /// to \p *To. |
906 | /// |
907 | /// E.g "To = COPY SrcReg:SubReg" |
908 | static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, |
909 | const RegisterBankInfo &RBI, Register SrcReg, |
910 | const TargetRegisterClass *To, unsigned SubReg) { |
911 | assert(SrcReg.isValid() && "Expected a valid source register?" ); |
912 | assert(To && "Destination register class cannot be null" ); |
913 | assert(SubReg && "Expected a valid subregister" ); |
914 | |
915 | MachineIRBuilder MIB(I); |
916 | auto SubRegCopy = |
917 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: 0, SubReg); |
918 | MachineOperand &RegOp = I.getOperand(i: 1); |
919 | RegOp.setReg(SubRegCopy.getReg(Idx: 0)); |
920 | |
921 | // It's possible that the destination register won't be constrained. Make |
922 | // sure that happens. |
923 | if (!I.getOperand(i: 0).getReg().isPhysical()) |
924 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: *To, MRI); |
925 | |
926 | return true; |
927 | } |
928 | |
929 | /// Helper function to get the source and destination register classes for a |
930 | /// copy. Returns a std::pair containing the source register class for the |
931 | /// copy, and the destination register class for the copy. If a register class |
932 | /// cannot be determined, then it will be nullptr. |
933 | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> |
934 | getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, |
935 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
936 | const RegisterBankInfo &RBI) { |
937 | Register DstReg = I.getOperand(i: 0).getReg(); |
938 | Register SrcReg = I.getOperand(i: 1).getReg(); |
939 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
940 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
941 | unsigned DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI); |
942 | unsigned SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI); |
943 | |
944 | // Special casing for cross-bank copies of s1s. We can technically represent |
945 | // a 1-bit value with any size of register. The minimum size for a GPR is 32 |
946 | // bits. So, we need to put the FPR on 32 bits as well. |
947 | // |
948 | // FIXME: I'm not sure if this case holds true outside of copies. If it does, |
949 | // then we can pull it into the helpers that get the appropriate class for a |
950 | // register bank. Or make a new helper that carries along some constraint |
951 | // information. |
952 | if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) |
953 | SrcSize = DstSize = 32; |
954 | |
955 | return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true), |
956 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)}; |
957 | } |
958 | |
959 | // FIXME: We need some sort of API in RBI/TRI to allow generic code to |
960 | // constrain operands of simple instructions given a TargetRegisterClass |
961 | // and LLT |
962 | static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, |
963 | const RegisterBankInfo &RBI) { |
964 | for (MachineOperand &MO : I.operands()) { |
965 | if (!MO.isReg()) |
966 | continue; |
967 | Register Reg = MO.getReg(); |
968 | if (!Reg) |
969 | continue; |
970 | if (Reg.isPhysical()) |
971 | continue; |
972 | LLT Ty = MRI.getType(Reg); |
973 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); |
974 | const TargetRegisterClass *RC = |
975 | RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); |
976 | if (!RC) { |
977 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); |
978 | RC = getRegClassForTypeOnBank(Ty, RB); |
979 | if (!RC) { |
980 | LLVM_DEBUG( |
981 | dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n" ); |
982 | break; |
983 | } |
984 | } |
985 | RBI.constrainGenericRegister(Reg, RC: *RC, MRI); |
986 | } |
987 | |
988 | return true; |
989 | } |
990 | |
991 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, |
992 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
993 | const RegisterBankInfo &RBI) { |
994 | Register DstReg = I.getOperand(i: 0).getReg(); |
995 | Register SrcReg = I.getOperand(i: 1).getReg(); |
996 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
997 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
998 | |
999 | // Find the correct register classes for the source and destination registers. |
1000 | const TargetRegisterClass *SrcRC; |
1001 | const TargetRegisterClass *DstRC; |
1002 | std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); |
1003 | |
1004 | if (!DstRC) { |
1005 | LLVM_DEBUG(dbgs() << "Unexpected dest size " |
1006 | << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); |
1007 | return false; |
1008 | } |
1009 | |
1010 | // Is this a copy? If so, then we may need to insert a subregister copy. |
1011 | if (I.isCopy()) { |
1012 | // Yes. Check if there's anything to fix up. |
1013 | if (!SrcRC) { |
1014 | LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n" ); |
1015 | return false; |
1016 | } |
1017 | |
1018 | unsigned SrcSize = TRI.getRegSizeInBits(RC: *SrcRC); |
1019 | unsigned DstSize = TRI.getRegSizeInBits(RC: *DstRC); |
1020 | unsigned SubReg; |
1021 | |
1022 | // If the source bank doesn't support a subregister copy small enough, |
1023 | // then we first need to copy to the destination bank. |
1024 | if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) { |
1025 | const TargetRegisterClass *DstTempRC = |
1026 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, /* GetAllRegSet */ true); |
1027 | getSubRegForClass(RC: DstRC, TRI, SubReg); |
1028 | |
1029 | MachineIRBuilder MIB(I); |
1030 | auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg}); |
1031 | copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: 0), To: DstRC, SubReg); |
1032 | } else if (SrcSize > DstSize) { |
1033 | // If the source register is bigger than the destination we need to |
1034 | // perform a subregister copy. |
1035 | const TargetRegisterClass *SubRegRC = |
1036 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1037 | getSubRegForClass(RC: SubRegRC, TRI, SubReg); |
1038 | copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg); |
1039 | } else if (DstSize > SrcSize) { |
1040 | // If the destination register is bigger than the source we need to do |
1041 | // a promotion using SUBREG_TO_REG. |
1042 | const TargetRegisterClass *PromotionRC = |
1043 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1044 | getSubRegForClass(RC: SrcRC, TRI, SubReg); |
1045 | |
1046 | Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC); |
1047 | BuildMI(*I.getParent(), I, I.getDebugLoc(), |
1048 | TII.get(AArch64::SUBREG_TO_REG), PromoteReg) |
1049 | .addImm(0) |
1050 | .addUse(SrcReg) |
1051 | .addImm(SubReg); |
1052 | MachineOperand &RegOp = I.getOperand(i: 1); |
1053 | RegOp.setReg(PromoteReg); |
1054 | } |
1055 | |
1056 | // If the destination is a physical register, then there's nothing to |
1057 | // change, so we're done. |
1058 | if (DstReg.isPhysical()) |
1059 | return true; |
1060 | } |
1061 | |
1062 | // No need to constrain SrcReg. It will get constrained when we hit another |
1063 | // of its use or its defs. Copies do not have constraints. |
1064 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
1065 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
1066 | << " operand\n" ); |
1067 | return false; |
1068 | } |
1069 | |
1070 | // If this a GPR ZEXT that we want to just reduce down into a copy. |
1071 | // The sizes will be mismatched with the source < 32b but that's ok. |
1072 | if (I.getOpcode() == TargetOpcode::G_ZEXT) { |
1073 | I.setDesc(TII.get(AArch64::COPY)); |
1074 | assert(SrcRegBank.getID() == AArch64::GPRRegBankID); |
1075 | return selectCopy(I, TII, MRI, TRI, RBI); |
1076 | } |
1077 | |
1078 | I.setDesc(TII.get(AArch64::COPY)); |
1079 | return true; |
1080 | } |
1081 | |
1082 | static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { |
1083 | if (!DstTy.isScalar() || !SrcTy.isScalar()) |
1084 | return GenericOpc; |
1085 | |
1086 | const unsigned DstSize = DstTy.getSizeInBits(); |
1087 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
1088 | |
1089 | switch (DstSize) { |
1090 | case 32: |
1091 | switch (SrcSize) { |
1092 | case 32: |
1093 | switch (GenericOpc) { |
1094 | case TargetOpcode::G_SITOFP: |
1095 | return AArch64::SCVTFUWSri; |
1096 | case TargetOpcode::G_UITOFP: |
1097 | return AArch64::UCVTFUWSri; |
1098 | case TargetOpcode::G_FPTOSI: |
1099 | return AArch64::FCVTZSUWSr; |
1100 | case TargetOpcode::G_FPTOUI: |
1101 | return AArch64::FCVTZUUWSr; |
1102 | default: |
1103 | return GenericOpc; |
1104 | } |
1105 | case 64: |
1106 | switch (GenericOpc) { |
1107 | case TargetOpcode::G_SITOFP: |
1108 | return AArch64::SCVTFUXSri; |
1109 | case TargetOpcode::G_UITOFP: |
1110 | return AArch64::UCVTFUXSri; |
1111 | case TargetOpcode::G_FPTOSI: |
1112 | return AArch64::FCVTZSUWDr; |
1113 | case TargetOpcode::G_FPTOUI: |
1114 | return AArch64::FCVTZUUWDr; |
1115 | default: |
1116 | return GenericOpc; |
1117 | } |
1118 | default: |
1119 | return GenericOpc; |
1120 | } |
1121 | case 64: |
1122 | switch (SrcSize) { |
1123 | case 32: |
1124 | switch (GenericOpc) { |
1125 | case TargetOpcode::G_SITOFP: |
1126 | return AArch64::SCVTFUWDri; |
1127 | case TargetOpcode::G_UITOFP: |
1128 | return AArch64::UCVTFUWDri; |
1129 | case TargetOpcode::G_FPTOSI: |
1130 | return AArch64::FCVTZSUXSr; |
1131 | case TargetOpcode::G_FPTOUI: |
1132 | return AArch64::FCVTZUUXSr; |
1133 | default: |
1134 | return GenericOpc; |
1135 | } |
1136 | case 64: |
1137 | switch (GenericOpc) { |
1138 | case TargetOpcode::G_SITOFP: |
1139 | return AArch64::SCVTFUXDri; |
1140 | case TargetOpcode::G_UITOFP: |
1141 | return AArch64::UCVTFUXDri; |
1142 | case TargetOpcode::G_FPTOSI: |
1143 | return AArch64::FCVTZSUXDr; |
1144 | case TargetOpcode::G_FPTOUI: |
1145 | return AArch64::FCVTZUUXDr; |
1146 | default: |
1147 | return GenericOpc; |
1148 | } |
1149 | default: |
1150 | return GenericOpc; |
1151 | } |
1152 | default: |
1153 | return GenericOpc; |
1154 | }; |
1155 | return GenericOpc; |
1156 | } |
1157 | |
1158 | MachineInstr * |
1159 | AArch64InstructionSelector::emitSelect(Register Dst, Register True, |
1160 | Register False, AArch64CC::CondCode CC, |
1161 | MachineIRBuilder &MIB) const { |
1162 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1163 | assert(RBI.getRegBank(False, MRI, TRI)->getID() == |
1164 | RBI.getRegBank(True, MRI, TRI)->getID() && |
1165 | "Expected both select operands to have the same regbank?" ); |
1166 | LLT Ty = MRI.getType(Reg: True); |
1167 | if (Ty.isVector()) |
1168 | return nullptr; |
1169 | const unsigned Size = Ty.getSizeInBits(); |
1170 | assert((Size == 32 || Size == 64) && |
1171 | "Expected 32 bit or 64 bit select only?" ); |
1172 | const bool Is32Bit = Size == 32; |
1173 | if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { |
1174 | unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; |
1175 | auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1176 | constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); |
1177 | return &*FCSel; |
1178 | } |
1179 | |
1180 | // By default, we'll try and emit a CSEL. |
1181 | unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; |
1182 | bool Optimized = false; |
1183 | auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, |
1184 | &Optimized](Register &Reg, Register &OtherReg, |
1185 | bool Invert) { |
1186 | if (Optimized) |
1187 | return false; |
1188 | |
1189 | // Attempt to fold: |
1190 | // |
1191 | // %sub = G_SUB 0, %x |
1192 | // %select = G_SELECT cc, %reg, %sub |
1193 | // |
1194 | // Into: |
1195 | // %select = CSNEG %reg, %x, cc |
1196 | Register MatchReg; |
1197 | if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) { |
1198 | Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; |
1199 | Reg = MatchReg; |
1200 | if (Invert) { |
1201 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1202 | std::swap(a&: Reg, b&: OtherReg); |
1203 | } |
1204 | return true; |
1205 | } |
1206 | |
1207 | // Attempt to fold: |
1208 | // |
1209 | // %xor = G_XOR %x, -1 |
1210 | // %select = G_SELECT cc, %reg, %xor |
1211 | // |
1212 | // Into: |
1213 | // %select = CSINV %reg, %x, cc |
1214 | if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) { |
1215 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1216 | Reg = MatchReg; |
1217 | if (Invert) { |
1218 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1219 | std::swap(a&: Reg, b&: OtherReg); |
1220 | } |
1221 | return true; |
1222 | } |
1223 | |
1224 | // Attempt to fold: |
1225 | // |
1226 | // %add = G_ADD %x, 1 |
1227 | // %select = G_SELECT cc, %reg, %add |
1228 | // |
1229 | // Into: |
1230 | // %select = CSINC %reg, %x, cc |
1231 | if (mi_match(R: Reg, MRI, |
1232 | P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1)), |
1233 | preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1))))) { |
1234 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1235 | Reg = MatchReg; |
1236 | if (Invert) { |
1237 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1238 | std::swap(a&: Reg, b&: OtherReg); |
1239 | } |
1240 | return true; |
1241 | } |
1242 | |
1243 | return false; |
1244 | }; |
1245 | |
1246 | // Helper lambda which tries to use CSINC/CSINV for the instruction when its |
1247 | // true/false values are constants. |
1248 | // FIXME: All of these patterns already exist in tablegen. We should be |
1249 | // able to import these. |
1250 | auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, |
1251 | &Optimized]() { |
1252 | if (Optimized) |
1253 | return false; |
1254 | auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI); |
1255 | auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI); |
1256 | if (!TrueCst && !FalseCst) |
1257 | return false; |
1258 | |
1259 | Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; |
1260 | if (TrueCst && FalseCst) { |
1261 | int64_t T = TrueCst->Value.getSExtValue(); |
1262 | int64_t F = FalseCst->Value.getSExtValue(); |
1263 | |
1264 | if (T == 0 && F == 1) { |
1265 | // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc |
1266 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1267 | True = ZReg; |
1268 | False = ZReg; |
1269 | return true; |
1270 | } |
1271 | |
1272 | if (T == 0 && F == -1) { |
1273 | // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc |
1274 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1275 | True = ZReg; |
1276 | False = ZReg; |
1277 | return true; |
1278 | } |
1279 | } |
1280 | |
1281 | if (TrueCst) { |
1282 | int64_t T = TrueCst->Value.getSExtValue(); |
1283 | if (T == 1) { |
1284 | // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc |
1285 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1286 | True = False; |
1287 | False = ZReg; |
1288 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1289 | return true; |
1290 | } |
1291 | |
1292 | if (T == -1) { |
1293 | // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc |
1294 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1295 | True = False; |
1296 | False = ZReg; |
1297 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1298 | return true; |
1299 | } |
1300 | } |
1301 | |
1302 | if (FalseCst) { |
1303 | int64_t F = FalseCst->Value.getSExtValue(); |
1304 | if (F == 1) { |
1305 | // G_SELECT cc, t, 1 -> CSINC t, zreg, cc |
1306 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1307 | False = ZReg; |
1308 | return true; |
1309 | } |
1310 | |
1311 | if (F == -1) { |
1312 | // G_SELECT cc, t, -1 -> CSINC t, zreg, cc |
1313 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1314 | False = ZReg; |
1315 | return true; |
1316 | } |
1317 | } |
1318 | return false; |
1319 | }; |
1320 | |
1321 | Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); |
1322 | Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); |
1323 | Optimized |= TryOptSelectCst(); |
1324 | auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1325 | constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); |
1326 | return &*SelectInst; |
1327 | } |
1328 | |
1329 | static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { |
1330 | switch (P) { |
1331 | default: |
1332 | llvm_unreachable("Unknown condition code!" ); |
1333 | case CmpInst::ICMP_NE: |
1334 | return AArch64CC::NE; |
1335 | case CmpInst::ICMP_EQ: |
1336 | return AArch64CC::EQ; |
1337 | case CmpInst::ICMP_SGT: |
1338 | return AArch64CC::GT; |
1339 | case CmpInst::ICMP_SGE: |
1340 | return AArch64CC::GE; |
1341 | case CmpInst::ICMP_SLT: |
1342 | return AArch64CC::LT; |
1343 | case CmpInst::ICMP_SLE: |
1344 | return AArch64CC::LE; |
1345 | case CmpInst::ICMP_UGT: |
1346 | return AArch64CC::HI; |
1347 | case CmpInst::ICMP_UGE: |
1348 | return AArch64CC::HS; |
1349 | case CmpInst::ICMP_ULT: |
1350 | return AArch64CC::LO; |
1351 | case CmpInst::ICMP_ULE: |
1352 | return AArch64CC::LS; |
1353 | } |
1354 | } |
1355 | |
1356 | /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. |
1357 | static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, |
1358 | AArch64CC::CondCode &CondCode, |
1359 | AArch64CC::CondCode &CondCode2) { |
1360 | CondCode2 = AArch64CC::AL; |
1361 | switch (CC) { |
1362 | default: |
1363 | llvm_unreachable("Unknown FP condition!" ); |
1364 | case CmpInst::FCMP_OEQ: |
1365 | CondCode = AArch64CC::EQ; |
1366 | break; |
1367 | case CmpInst::FCMP_OGT: |
1368 | CondCode = AArch64CC::GT; |
1369 | break; |
1370 | case CmpInst::FCMP_OGE: |
1371 | CondCode = AArch64CC::GE; |
1372 | break; |
1373 | case CmpInst::FCMP_OLT: |
1374 | CondCode = AArch64CC::MI; |
1375 | break; |
1376 | case CmpInst::FCMP_OLE: |
1377 | CondCode = AArch64CC::LS; |
1378 | break; |
1379 | case CmpInst::FCMP_ONE: |
1380 | CondCode = AArch64CC::MI; |
1381 | CondCode2 = AArch64CC::GT; |
1382 | break; |
1383 | case CmpInst::FCMP_ORD: |
1384 | CondCode = AArch64CC::VC; |
1385 | break; |
1386 | case CmpInst::FCMP_UNO: |
1387 | CondCode = AArch64CC::VS; |
1388 | break; |
1389 | case CmpInst::FCMP_UEQ: |
1390 | CondCode = AArch64CC::EQ; |
1391 | CondCode2 = AArch64CC::VS; |
1392 | break; |
1393 | case CmpInst::FCMP_UGT: |
1394 | CondCode = AArch64CC::HI; |
1395 | break; |
1396 | case CmpInst::FCMP_UGE: |
1397 | CondCode = AArch64CC::PL; |
1398 | break; |
1399 | case CmpInst::FCMP_ULT: |
1400 | CondCode = AArch64CC::LT; |
1401 | break; |
1402 | case CmpInst::FCMP_ULE: |
1403 | CondCode = AArch64CC::LE; |
1404 | break; |
1405 | case CmpInst::FCMP_UNE: |
1406 | CondCode = AArch64CC::NE; |
1407 | break; |
1408 | } |
1409 | } |
1410 | |
1411 | /// Convert an IR fp condition code to an AArch64 CC. |
1412 | /// This differs from changeFPCCToAArch64CC in that it returns cond codes that |
1413 | /// should be AND'ed instead of OR'ed. |
1414 | static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, |
1415 | AArch64CC::CondCode &CondCode, |
1416 | AArch64CC::CondCode &CondCode2) { |
1417 | CondCode2 = AArch64CC::AL; |
1418 | switch (CC) { |
1419 | default: |
1420 | changeFPCCToORAArch64CC(CC, CondCode, CondCode2); |
1421 | assert(CondCode2 == AArch64CC::AL); |
1422 | break; |
1423 | case CmpInst::FCMP_ONE: |
1424 | // (a one b) |
1425 | // == ((a olt b) || (a ogt b)) |
1426 | // == ((a ord b) && (a une b)) |
1427 | CondCode = AArch64CC::VC; |
1428 | CondCode2 = AArch64CC::NE; |
1429 | break; |
1430 | case CmpInst::FCMP_UEQ: |
1431 | // (a ueq b) |
1432 | // == ((a uno b) || (a oeq b)) |
1433 | // == ((a ule b) && (a uge b)) |
1434 | CondCode = AArch64CC::PL; |
1435 | CondCode2 = AArch64CC::LE; |
1436 | break; |
1437 | } |
1438 | } |
1439 | |
1440 | /// Return a register which can be used as a bit to test in a TB(N)Z. |
1441 | static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, |
1442 | MachineRegisterInfo &MRI) { |
1443 | assert(Reg.isValid() && "Expected valid register!" ); |
1444 | bool HasZext = false; |
1445 | while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { |
1446 | unsigned Opc = MI->getOpcode(); |
1447 | |
1448 | if (!MI->getOperand(i: 0).isReg() || |
1449 | !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg())) |
1450 | break; |
1451 | |
1452 | // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. |
1453 | // |
1454 | // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number |
1455 | // on the truncated x is the same as the bit number on x. |
1456 | if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || |
1457 | Opc == TargetOpcode::G_TRUNC) { |
1458 | if (Opc == TargetOpcode::G_ZEXT) |
1459 | HasZext = true; |
1460 | |
1461 | Register NextReg = MI->getOperand(i: 1).getReg(); |
1462 | // Did we find something worth folding? |
1463 | if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(RegNo: NextReg)) |
1464 | break; |
1465 | |
1466 | // NextReg is worth folding. Keep looking. |
1467 | Reg = NextReg; |
1468 | continue; |
1469 | } |
1470 | |
1471 | // Attempt to find a suitable operation with a constant on one side. |
1472 | std::optional<uint64_t> C; |
1473 | Register TestReg; |
1474 | switch (Opc) { |
1475 | default: |
1476 | break; |
1477 | case TargetOpcode::G_AND: |
1478 | case TargetOpcode::G_XOR: { |
1479 | TestReg = MI->getOperand(i: 1).getReg(); |
1480 | Register ConstantReg = MI->getOperand(i: 2).getReg(); |
1481 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1482 | if (!VRegAndVal) { |
1483 | // AND commutes, check the other side for a constant. |
1484 | // FIXME: Can we canonicalize the constant so that it's always on the |
1485 | // same side at some point earlier? |
1486 | std::swap(a&: ConstantReg, b&: TestReg); |
1487 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1488 | } |
1489 | if (VRegAndVal) { |
1490 | if (HasZext) |
1491 | C = VRegAndVal->Value.getZExtValue(); |
1492 | else |
1493 | C = VRegAndVal->Value.getSExtValue(); |
1494 | } |
1495 | break; |
1496 | } |
1497 | case TargetOpcode::G_ASHR: |
1498 | case TargetOpcode::G_LSHR: |
1499 | case TargetOpcode::G_SHL: { |
1500 | TestReg = MI->getOperand(i: 1).getReg(); |
1501 | auto VRegAndVal = |
1502 | getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: 2).getReg(), MRI); |
1503 | if (VRegAndVal) |
1504 | C = VRegAndVal->Value.getSExtValue(); |
1505 | break; |
1506 | } |
1507 | } |
1508 | |
1509 | // Didn't find a constant or viable register. Bail out of the loop. |
1510 | if (!C || !TestReg.isValid()) |
1511 | break; |
1512 | |
1513 | // We found a suitable instruction with a constant. Check to see if we can |
1514 | // walk through the instruction. |
1515 | Register NextReg; |
1516 | unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits(); |
1517 | switch (Opc) { |
1518 | default: |
1519 | break; |
1520 | case TargetOpcode::G_AND: |
1521 | // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. |
1522 | if ((*C >> Bit) & 1) |
1523 | NextReg = TestReg; |
1524 | break; |
1525 | case TargetOpcode::G_SHL: |
1526 | // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in |
1527 | // the type of the register. |
1528 | if (*C <= Bit && (Bit - *C) < TestRegSize) { |
1529 | NextReg = TestReg; |
1530 | Bit = Bit - *C; |
1531 | } |
1532 | break; |
1533 | case TargetOpcode::G_ASHR: |
1534 | // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits |
1535 | // in x |
1536 | NextReg = TestReg; |
1537 | Bit = Bit + *C; |
1538 | if (Bit >= TestRegSize) |
1539 | Bit = TestRegSize - 1; |
1540 | break; |
1541 | case TargetOpcode::G_LSHR: |
1542 | // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x |
1543 | if ((Bit + *C) < TestRegSize) { |
1544 | NextReg = TestReg; |
1545 | Bit = Bit + *C; |
1546 | } |
1547 | break; |
1548 | case TargetOpcode::G_XOR: |
1549 | // We can walk through a G_XOR by inverting whether we use tbz/tbnz when |
1550 | // appropriate. |
1551 | // |
1552 | // e.g. If x' = xor x, c, and the b-th bit is set in c then |
1553 | // |
1554 | // tbz x', b -> tbnz x, b |
1555 | // |
1556 | // Because x' only has the b-th bit set if x does not. |
1557 | if ((*C >> Bit) & 1) |
1558 | Invert = !Invert; |
1559 | NextReg = TestReg; |
1560 | break; |
1561 | } |
1562 | |
1563 | // Check if we found anything worth folding. |
1564 | if (!NextReg.isValid()) |
1565 | return Reg; |
1566 | Reg = NextReg; |
1567 | } |
1568 | |
1569 | return Reg; |
1570 | } |
1571 | |
1572 | MachineInstr *AArch64InstructionSelector::emitTestBit( |
1573 | Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, |
1574 | MachineIRBuilder &MIB) const { |
1575 | assert(TestReg.isValid()); |
1576 | assert(ProduceNonFlagSettingCondBr && |
1577 | "Cannot emit TB(N)Z with speculation tracking!" ); |
1578 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1579 | |
1580 | // Attempt to optimize the test bit by walking over instructions. |
1581 | TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI); |
1582 | LLT Ty = MRI.getType(Reg: TestReg); |
1583 | unsigned Size = Ty.getSizeInBits(); |
1584 | assert(!Ty.isVector() && "Expected a scalar!" ); |
1585 | assert(Bit < 64 && "Bit is too large!" ); |
1586 | |
1587 | // When the test register is a 64-bit register, we have to narrow to make |
1588 | // TBNZW work. |
1589 | bool UseWReg = Bit < 32; |
1590 | unsigned NecessarySize = UseWReg ? 32 : 64; |
1591 | if (Size != NecessarySize) |
1592 | TestReg = moveScalarRegClass( |
1593 | TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, |
1594 | MIB); |
1595 | |
1596 | static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, |
1597 | {AArch64::TBZW, AArch64::TBNZW}}; |
1598 | unsigned Opc = OpcTable[UseWReg][IsNegative]; |
1599 | auto TestBitMI = |
1600 | MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB); |
1601 | constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); |
1602 | return &*TestBitMI; |
1603 | } |
1604 | |
1605 | bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( |
1606 | MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, |
1607 | MachineIRBuilder &MIB) const { |
1608 | assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?" ); |
1609 | // Given something like this: |
1610 | // |
1611 | // %x = ...Something... |
1612 | // %one = G_CONSTANT i64 1 |
1613 | // %zero = G_CONSTANT i64 0 |
1614 | // %and = G_AND %x, %one |
1615 | // %cmp = G_ICMP intpred(ne), %and, %zero |
1616 | // %cmp_trunc = G_TRUNC %cmp |
1617 | // G_BRCOND %cmp_trunc, %bb.3 |
1618 | // |
1619 | // We want to try and fold the AND into the G_BRCOND and produce either a |
1620 | // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). |
1621 | // |
1622 | // In this case, we'd get |
1623 | // |
1624 | // TBNZ %x %bb.3 |
1625 | // |
1626 | |
1627 | // Check if the AND has a constant on its RHS which we can use as a mask. |
1628 | // If it's a power of 2, then it's the same as checking a specific bit. |
1629 | // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) |
1630 | auto MaybeBit = getIConstantVRegValWithLookThrough( |
1631 | VReg: AndInst.getOperand(i: 2).getReg(), MRI: *MIB.getMRI()); |
1632 | if (!MaybeBit) |
1633 | return false; |
1634 | |
1635 | int32_t Bit = MaybeBit->Value.exactLogBase2(); |
1636 | if (Bit < 0) |
1637 | return false; |
1638 | |
1639 | Register TestReg = AndInst.getOperand(i: 1).getReg(); |
1640 | |
1641 | // Emit a TB(N)Z. |
1642 | emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB); |
1643 | return true; |
1644 | } |
1645 | |
1646 | MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, |
1647 | bool IsNegative, |
1648 | MachineBasicBlock *DestMBB, |
1649 | MachineIRBuilder &MIB) const { |
1650 | assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!" ); |
1651 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1652 | assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == |
1653 | AArch64::GPRRegBankID && |
1654 | "Expected GPRs only?" ); |
1655 | auto Ty = MRI.getType(Reg: CompareReg); |
1656 | unsigned Width = Ty.getSizeInBits(); |
1657 | assert(!Ty.isVector() && "Expected scalar only?" ); |
1658 | assert(Width <= 64 && "Expected width to be at most 64?" ); |
1659 | static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, |
1660 | {AArch64::CBNZW, AArch64::CBNZX}}; |
1661 | unsigned Opc = OpcTable[IsNegative][Width == 64]; |
1662 | auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB); |
1663 | constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); |
1664 | return &*BranchMI; |
1665 | } |
1666 | |
1667 | bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( |
1668 | MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { |
1669 | assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); |
1670 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1671 | // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't |
1672 | // totally clean. Some of them require two branches to implement. |
1673 | auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: 1).getPredicate(); |
1674 | emitFPCompare(LHS: FCmp.getOperand(i: 2).getReg(), RHS: FCmp.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
1675 | Pred); |
1676 | AArch64CC::CondCode CC1, CC2; |
1677 | changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2); |
1678 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1679 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); |
1680 | if (CC2 != AArch64CC::AL) |
1681 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); |
1682 | I.eraseFromParent(); |
1683 | return true; |
1684 | } |
1685 | |
1686 | bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( |
1687 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1688 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1689 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1690 | // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. |
1691 | // |
1692 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1693 | // instructions will not be produced, as they are conditional branch |
1694 | // instructions that do not set flags. |
1695 | if (!ProduceNonFlagSettingCondBr) |
1696 | return false; |
1697 | |
1698 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1699 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1700 | auto Pred = |
1701 | static_cast<CmpInst::Predicate>(ICmp.getOperand(i: 1).getPredicate()); |
1702 | Register LHS = ICmp.getOperand(i: 2).getReg(); |
1703 | Register RHS = ICmp.getOperand(i: 3).getReg(); |
1704 | |
1705 | // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. |
1706 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1707 | MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1708 | |
1709 | // When we can emit a TB(N)Z, prefer that. |
1710 | // |
1711 | // Handle non-commutative condition codes first. |
1712 | // Note that we don't want to do this when we have a G_AND because it can |
1713 | // become a tst. The tst will make the test bit in the TB(N)Z redundant. |
1714 | if (VRegAndVal && !AndInst) { |
1715 | int64_t C = VRegAndVal->Value.getSExtValue(); |
1716 | |
1717 | // When we have a greater-than comparison, we can just test if the msb is |
1718 | // zero. |
1719 | if (C == -1 && Pred == CmpInst::ICMP_SGT) { |
1720 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1721 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1722 | I.eraseFromParent(); |
1723 | return true; |
1724 | } |
1725 | |
1726 | // When we have a less than comparison, we can just test if the msb is not |
1727 | // zero. |
1728 | if (C == 0 && Pred == CmpInst::ICMP_SLT) { |
1729 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1730 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ true, DstMBB: DestMBB, MIB); |
1731 | I.eraseFromParent(); |
1732 | return true; |
1733 | } |
1734 | |
1735 | // Inversely, if we have a signed greater-than-or-equal comparison to zero, |
1736 | // we can test if the msb is zero. |
1737 | if (C == 0 && Pred == CmpInst::ICMP_SGE) { |
1738 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1739 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1740 | I.eraseFromParent(); |
1741 | return true; |
1742 | } |
1743 | } |
1744 | |
1745 | // Attempt to handle commutative condition codes. Right now, that's only |
1746 | // eq/ne. |
1747 | if (ICmpInst::isEquality(P: Pred)) { |
1748 | if (!VRegAndVal) { |
1749 | std::swap(a&: RHS, b&: LHS); |
1750 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1751 | AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1752 | } |
1753 | |
1754 | if (VRegAndVal && VRegAndVal->Value == 0) { |
1755 | // If there's a G_AND feeding into this branch, try to fold it away by |
1756 | // emitting a TB(N)Z instead. |
1757 | // |
1758 | // Note: If we have LT, then it *is* possible to fold, but it wouldn't be |
1759 | // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding |
1760 | // would be redundant. |
1761 | if (AndInst && |
1762 | tryOptAndIntoCompareBranch( |
1763 | AndInst&: *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) { |
1764 | I.eraseFromParent(); |
1765 | return true; |
1766 | } |
1767 | |
1768 | // Otherwise, try to emit a CB(N)Z instead. |
1769 | auto LHSTy = MRI.getType(Reg: LHS); |
1770 | if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { |
1771 | emitCBZ(CompareReg: LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); |
1772 | I.eraseFromParent(); |
1773 | return true; |
1774 | } |
1775 | } |
1776 | } |
1777 | |
1778 | return false; |
1779 | } |
1780 | |
1781 | bool AArch64InstructionSelector::selectCompareBranchFedByICmp( |
1782 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1783 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1784 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1785 | if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) |
1786 | return true; |
1787 | |
1788 | // Couldn't optimize. Emit a compare + a Bcc. |
1789 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1790 | auto PredOp = ICmp.getOperand(i: 1); |
1791 | emitIntegerCompare(LHS&: ICmp.getOperand(i: 2), RHS&: ICmp.getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
1792 | const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( |
1793 | P: static_cast<CmpInst::Predicate>(PredOp.getPredicate())); |
1794 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); |
1795 | I.eraseFromParent(); |
1796 | return true; |
1797 | } |
1798 | |
1799 | bool AArch64InstructionSelector::selectCompareBranch( |
1800 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { |
1801 | Register CondReg = I.getOperand(i: 0).getReg(); |
1802 | MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg); |
1803 | // Try to select the G_BRCOND using whatever is feeding the condition if |
1804 | // possible. |
1805 | unsigned CCMIOpc = CCMI->getOpcode(); |
1806 | if (CCMIOpc == TargetOpcode::G_FCMP) |
1807 | return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB); |
1808 | if (CCMIOpc == TargetOpcode::G_ICMP) |
1809 | return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB); |
1810 | |
1811 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1812 | // instructions will not be produced, as they are conditional branch |
1813 | // instructions that do not set flags. |
1814 | if (ProduceNonFlagSettingCondBr) { |
1815 | emitTestBit(TestReg: CondReg, /*Bit = */ 0, /*IsNegative = */ true, |
1816 | DstMBB: I.getOperand(i: 1).getMBB(), MIB); |
1817 | I.eraseFromParent(); |
1818 | return true; |
1819 | } |
1820 | |
1821 | // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. |
1822 | auto TstMI = |
1823 | MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); |
1824 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); |
1825 | auto Bcc = MIB.buildInstr(AArch64::Bcc) |
1826 | .addImm(AArch64CC::NE) |
1827 | .addMBB(I.getOperand(1).getMBB()); |
1828 | I.eraseFromParent(); |
1829 | return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); |
1830 | } |
1831 | |
1832 | /// Returns the element immediate value of a vector shift operand if found. |
1833 | /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. |
1834 | static std::optional<int64_t> getVectorShiftImm(Register Reg, |
1835 | MachineRegisterInfo &MRI) { |
1836 | assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand" ); |
1837 | MachineInstr *OpMI = MRI.getVRegDef(Reg); |
1838 | return getAArch64VectorSplatScalar(MI: *OpMI, MRI); |
1839 | } |
1840 | |
1841 | /// Matches and returns the shift immediate value for a SHL instruction given |
1842 | /// a shift operand. |
1843 | static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, |
1844 | MachineRegisterInfo &MRI) { |
1845 | std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); |
1846 | if (!ShiftImm) |
1847 | return std::nullopt; |
1848 | // Check the immediate is in range for a SHL. |
1849 | int64_t Imm = *ShiftImm; |
1850 | if (Imm < 0) |
1851 | return std::nullopt; |
1852 | switch (SrcTy.getElementType().getSizeInBits()) { |
1853 | default: |
1854 | LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift" ); |
1855 | return std::nullopt; |
1856 | case 8: |
1857 | if (Imm > 7) |
1858 | return std::nullopt; |
1859 | break; |
1860 | case 16: |
1861 | if (Imm > 15) |
1862 | return std::nullopt; |
1863 | break; |
1864 | case 32: |
1865 | if (Imm > 31) |
1866 | return std::nullopt; |
1867 | break; |
1868 | case 64: |
1869 | if (Imm > 63) |
1870 | return std::nullopt; |
1871 | break; |
1872 | } |
1873 | return Imm; |
1874 | } |
1875 | |
1876 | bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, |
1877 | MachineRegisterInfo &MRI) { |
1878 | assert(I.getOpcode() == TargetOpcode::G_SHL); |
1879 | Register DstReg = I.getOperand(i: 0).getReg(); |
1880 | const LLT Ty = MRI.getType(Reg: DstReg); |
1881 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1882 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1883 | |
1884 | if (!Ty.isVector()) |
1885 | return false; |
1886 | |
1887 | // Check if we have a vector of constants on RHS that we can select as the |
1888 | // immediate form. |
1889 | std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI); |
1890 | |
1891 | unsigned Opc = 0; |
1892 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1893 | Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; |
1894 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1895 | Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; |
1896 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1897 | Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; |
1898 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1899 | Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; |
1900 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1901 | Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; |
1902 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1903 | Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; |
1904 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1905 | Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; |
1906 | } else { |
1907 | LLVM_DEBUG(dbgs() << "Unhandled G_SHL type" ); |
1908 | return false; |
1909 | } |
1910 | |
1911 | auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg}); |
1912 | if (ImmVal) |
1913 | Shl.addImm(Val: *ImmVal); |
1914 | else |
1915 | Shl.addUse(RegNo: Src2Reg); |
1916 | constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); |
1917 | I.eraseFromParent(); |
1918 | return true; |
1919 | } |
1920 | |
1921 | bool AArch64InstructionSelector::selectVectorAshrLshr( |
1922 | MachineInstr &I, MachineRegisterInfo &MRI) { |
1923 | assert(I.getOpcode() == TargetOpcode::G_ASHR || |
1924 | I.getOpcode() == TargetOpcode::G_LSHR); |
1925 | Register DstReg = I.getOperand(i: 0).getReg(); |
1926 | const LLT Ty = MRI.getType(Reg: DstReg); |
1927 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1928 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1929 | |
1930 | if (!Ty.isVector()) |
1931 | return false; |
1932 | |
1933 | bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; |
1934 | |
1935 | // We expect the immediate case to be lowered in the PostLegalCombiner to |
1936 | // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. |
1937 | |
1938 | // There is not a shift right register instruction, but the shift left |
1939 | // register instruction takes a signed value, where negative numbers specify a |
1940 | // right shift. |
1941 | |
1942 | unsigned Opc = 0; |
1943 | unsigned NegOpc = 0; |
1944 | const TargetRegisterClass *RC = |
1945 | getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID)); |
1946 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1947 | Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; |
1948 | NegOpc = AArch64::NEGv2i64; |
1949 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1950 | Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; |
1951 | NegOpc = AArch64::NEGv4i32; |
1952 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1953 | Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; |
1954 | NegOpc = AArch64::NEGv2i32; |
1955 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1956 | Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; |
1957 | NegOpc = AArch64::NEGv4i16; |
1958 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1959 | Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; |
1960 | NegOpc = AArch64::NEGv8i16; |
1961 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1962 | Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; |
1963 | NegOpc = AArch64::NEGv16i8; |
1964 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1965 | Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; |
1966 | NegOpc = AArch64::NEGv8i8; |
1967 | } else { |
1968 | LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type" ); |
1969 | return false; |
1970 | } |
1971 | |
1972 | auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg}); |
1973 | constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); |
1974 | auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg}); |
1975 | constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); |
1976 | I.eraseFromParent(); |
1977 | return true; |
1978 | } |
1979 | |
1980 | bool AArch64InstructionSelector::selectVaStartAAPCS( |
1981 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
1982 | return false; |
1983 | } |
1984 | |
1985 | bool AArch64InstructionSelector::selectVaStartDarwin( |
1986 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
1987 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
1988 | Register ListReg = I.getOperand(i: 0).getReg(); |
1989 | |
1990 | Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
1991 | |
1992 | int FrameIdx = FuncInfo->getVarArgsStackIndex(); |
1993 | if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64( |
1994 | CC: MF.getFunction().getCallingConv())) { |
1995 | FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 |
1996 | ? FuncInfo->getVarArgsGPRIndex() |
1997 | : FuncInfo->getVarArgsStackIndex(); |
1998 | } |
1999 | |
2000 | auto MIB = |
2001 | BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) |
2002 | .addDef(ArgsAddrReg) |
2003 | .addFrameIndex(FrameIdx) |
2004 | .addImm(0) |
2005 | .addImm(0); |
2006 | |
2007 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); |
2008 | |
2009 | MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) |
2010 | .addUse(ArgsAddrReg) |
2011 | .addUse(ListReg) |
2012 | .addImm(0) |
2013 | .addMemOperand(*I.memoperands_begin()); |
2014 | |
2015 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); |
2016 | I.eraseFromParent(); |
2017 | return true; |
2018 | } |
2019 | |
2020 | void AArch64InstructionSelector::materializeLargeCMVal( |
2021 | MachineInstr &I, const Value *V, unsigned OpFlags) { |
2022 | MachineBasicBlock &MBB = *I.getParent(); |
2023 | MachineFunction &MF = *MBB.getParent(); |
2024 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2025 | |
2026 | auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); |
2027 | MovZ->addOperand(MF, I.getOperand(i: 1)); |
2028 | MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | |
2029 | AArch64II::MO_NC); |
2030 | MovZ->addOperand(MF, MachineOperand::CreateImm(Val: 0)); |
2031 | constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); |
2032 | |
2033 | auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, |
2034 | Register ForceDstReg) { |
2035 | Register DstReg = ForceDstReg |
2036 | ? ForceDstReg |
2037 | : MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
2038 | auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); |
2039 | if (auto *GV = dyn_cast<GlobalValue>(Val: V)) { |
2040 | MovI->addOperand(MF, MachineOperand::CreateGA( |
2041 | GV, Offset: MovZ->getOperand(1).getOffset(), TargetFlags: Flags)); |
2042 | } else { |
2043 | MovI->addOperand( |
2044 | MF, MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V), |
2045 | Offset: MovZ->getOperand(1).getOffset(), TargetFlags: Flags)); |
2046 | } |
2047 | MovI->addOperand(MF, MachineOperand::CreateImm(Val: Offset)); |
2048 | constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); |
2049 | return DstReg; |
2050 | }; |
2051 | Register DstReg = BuildMovK(MovZ.getReg(0), |
2052 | AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); |
2053 | DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); |
2054 | BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(i: 0).getReg()); |
2055 | } |
2056 | |
2057 | bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { |
2058 | MachineBasicBlock &MBB = *I.getParent(); |
2059 | MachineFunction &MF = *MBB.getParent(); |
2060 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2061 | |
2062 | switch (I.getOpcode()) { |
2063 | case TargetOpcode::G_STORE: { |
2064 | bool Changed = contractCrossBankCopyIntoStore(I, MRI); |
2065 | MachineOperand &SrcOp = I.getOperand(i: 0); |
2066 | if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) { |
2067 | // Allow matching with imported patterns for stores of pointers. Unlike |
2068 | // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy |
2069 | // and constrain. |
2070 | auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: SrcOp); |
2071 | Register NewSrc = Copy.getReg(Idx: 0); |
2072 | SrcOp.setReg(NewSrc); |
2073 | RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); |
2074 | Changed = true; |
2075 | } |
2076 | return Changed; |
2077 | } |
2078 | case TargetOpcode::G_PTR_ADD: |
2079 | return convertPtrAddToAdd(I, MRI); |
2080 | case TargetOpcode::G_LOAD: { |
2081 | // For scalar loads of pointers, we try to convert the dest type from p0 |
2082 | // to s64 so that our imported patterns can match. Like with the G_PTR_ADD |
2083 | // conversion, this should be ok because all users should have been |
2084 | // selected already, so the type doesn't matter for them. |
2085 | Register DstReg = I.getOperand(i: 0).getReg(); |
2086 | const LLT DstTy = MRI.getType(Reg: DstReg); |
2087 | if (!DstTy.isPointer()) |
2088 | return false; |
2089 | MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: 64)); |
2090 | return true; |
2091 | } |
2092 | case AArch64::G_DUP: { |
2093 | // Convert the type from p0 to s64 to help selection. |
2094 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2095 | if (!DstTy.isPointerVector()) |
2096 | return false; |
2097 | auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 1).getReg()); |
2098 | MRI.setType(VReg: I.getOperand(i: 0).getReg(), |
2099 | Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64))); |
2100 | MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); |
2101 | I.getOperand(i: 1).setReg(NewSrc.getReg(Idx: 0)); |
2102 | return true; |
2103 | } |
2104 | case TargetOpcode::G_UITOFP: |
2105 | case TargetOpcode::G_SITOFP: { |
2106 | // If both source and destination regbanks are FPR, then convert the opcode |
2107 | // to G_SITOF so that the importer can select it to an fpr variant. |
2108 | // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank |
2109 | // copy. |
2110 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2111 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2112 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2113 | if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) |
2114 | return false; |
2115 | |
2116 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { |
2117 | if (I.getOpcode() == TargetOpcode::G_SITOFP) |
2118 | I.setDesc(TII.get(AArch64::G_SITOF)); |
2119 | else |
2120 | I.setDesc(TII.get(AArch64::G_UITOF)); |
2121 | return true; |
2122 | } |
2123 | return false; |
2124 | } |
2125 | default: |
2126 | return false; |
2127 | } |
2128 | } |
2129 | |
2130 | /// This lowering tries to look for G_PTR_ADD instructions and then converts |
2131 | /// them to a standard G_ADD with a COPY on the source. |
2132 | /// |
2133 | /// The motivation behind this is to expose the add semantics to the imported |
2134 | /// tablegen patterns. We shouldn't need to check for uses being loads/stores, |
2135 | /// because the selector works bottom up, uses before defs. By the time we |
2136 | /// end up trying to select a G_PTR_ADD, we should have already attempted to |
2137 | /// fold this into addressing modes and were therefore unsuccessful. |
2138 | bool AArch64InstructionSelector::convertPtrAddToAdd( |
2139 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2140 | assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD" ); |
2141 | Register DstReg = I.getOperand(i: 0).getReg(); |
2142 | Register AddOp1Reg = I.getOperand(i: 1).getReg(); |
2143 | const LLT PtrTy = MRI.getType(Reg: DstReg); |
2144 | if (PtrTy.getAddressSpace() != 0) |
2145 | return false; |
2146 | |
2147 | const LLT CastPtrTy = |
2148 | PtrTy.isVector() ? LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) : LLT::scalar(SizeInBits: 64); |
2149 | auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg); |
2150 | // Set regbanks on the registers. |
2151 | if (PtrTy.isVector()) |
2152 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); |
2153 | else |
2154 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); |
2155 | |
2156 | // Now turn the %dst(p0) = G_PTR_ADD %base, off into: |
2157 | // %dst(intty) = G_ADD %intbase, off |
2158 | I.setDesc(TII.get(TargetOpcode::G_ADD)); |
2159 | MRI.setType(VReg: DstReg, Ty: CastPtrTy); |
2160 | I.getOperand(i: 1).setReg(PtrToInt.getReg(Idx: 0)); |
2161 | if (!select(I&: *PtrToInt)) { |
2162 | LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd" ); |
2163 | return false; |
2164 | } |
2165 | |
2166 | // Also take the opportunity here to try to do some optimization. |
2167 | // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. |
2168 | Register NegatedReg; |
2169 | if (!mi_match(R: I.getOperand(i: 2).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg)))) |
2170 | return true; |
2171 | I.getOperand(i: 2).setReg(NegatedReg); |
2172 | I.setDesc(TII.get(TargetOpcode::G_SUB)); |
2173 | return true; |
2174 | } |
2175 | |
2176 | bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, |
2177 | MachineRegisterInfo &MRI) { |
2178 | // We try to match the immediate variant of LSL, which is actually an alias |
2179 | // for a special case of UBFM. Otherwise, we fall back to the imported |
2180 | // selector which will match the register variant. |
2181 | assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op" ); |
2182 | const auto &MO = I.getOperand(i: 2); |
2183 | auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI); |
2184 | if (!VRegAndVal) |
2185 | return false; |
2186 | |
2187 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2188 | if (DstTy.isVector()) |
2189 | return false; |
2190 | bool Is64Bit = DstTy.getSizeInBits() == 64; |
2191 | auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO); |
2192 | auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO); |
2193 | |
2194 | if (!Imm1Fn || !Imm2Fn) |
2195 | return false; |
2196 | |
2197 | auto NewI = |
2198 | MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, |
2199 | {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); |
2200 | |
2201 | for (auto &RenderFn : *Imm1Fn) |
2202 | RenderFn(NewI); |
2203 | for (auto &RenderFn : *Imm2Fn) |
2204 | RenderFn(NewI); |
2205 | |
2206 | I.eraseFromParent(); |
2207 | return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); |
2208 | } |
2209 | |
2210 | bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( |
2211 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2212 | assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE" ); |
2213 | // If we're storing a scalar, it doesn't matter what register bank that |
2214 | // scalar is on. All that matters is the size. |
2215 | // |
2216 | // So, if we see something like this (with a 32-bit scalar as an example): |
2217 | // |
2218 | // %x:gpr(s32) = ... something ... |
2219 | // %y:fpr(s32) = COPY %x:gpr(s32) |
2220 | // G_STORE %y:fpr(s32) |
2221 | // |
2222 | // We can fix this up into something like this: |
2223 | // |
2224 | // G_STORE %x:gpr(s32) |
2225 | // |
2226 | // And then continue the selection process normally. |
2227 | Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: 0).getReg(), MRI); |
2228 | if (!DefDstReg.isValid()) |
2229 | return false; |
2230 | LLT DefDstTy = MRI.getType(Reg: DefDstReg); |
2231 | Register StoreSrcReg = I.getOperand(i: 0).getReg(); |
2232 | LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg); |
2233 | |
2234 | // If we get something strange like a physical register, then we shouldn't |
2235 | // go any further. |
2236 | if (!DefDstTy.isValid()) |
2237 | return false; |
2238 | |
2239 | // Are the source and dst types the same size? |
2240 | if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) |
2241 | return false; |
2242 | |
2243 | if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == |
2244 | RBI.getRegBank(DefDstReg, MRI, TRI)) |
2245 | return false; |
2246 | |
2247 | // We have a cross-bank copy, which is entering a store. Let's fold it. |
2248 | I.getOperand(i: 0).setReg(DefDstReg); |
2249 | return true; |
2250 | } |
2251 | |
2252 | bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { |
2253 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2254 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2255 | |
2256 | MachineBasicBlock &MBB = *I.getParent(); |
2257 | MachineFunction &MF = *MBB.getParent(); |
2258 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2259 | |
2260 | switch (I.getOpcode()) { |
2261 | case AArch64::G_DUP: { |
2262 | // Before selecting a DUP instruction, check if it is better selected as a |
2263 | // MOV or load from a constant pool. |
2264 | Register Src = I.getOperand(i: 1).getReg(); |
2265 | auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI); |
2266 | if (!ValAndVReg) |
2267 | return false; |
2268 | LLVMContext &Ctx = MF.getFunction().getContext(); |
2269 | Register Dst = I.getOperand(i: 0).getReg(); |
2270 | auto *CV = ConstantDataVector::getSplat( |
2271 | NumElts: MRI.getType(Reg: Dst).getNumElements(), |
2272 | Elt: ConstantInt::get(Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Src).getSizeInBits()), |
2273 | V: ValAndVReg->Value)); |
2274 | if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI)) |
2275 | return false; |
2276 | I.eraseFromParent(); |
2277 | return true; |
2278 | } |
2279 | case TargetOpcode::G_SEXT: |
2280 | // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV |
2281 | // over a normal extend. |
2282 | if (selectUSMovFromExtend(I, MRI)) |
2283 | return true; |
2284 | return false; |
2285 | case TargetOpcode::G_BR: |
2286 | return false; |
2287 | case TargetOpcode::G_SHL: |
2288 | return earlySelectSHL(I, MRI); |
2289 | case TargetOpcode::G_CONSTANT: { |
2290 | bool IsZero = false; |
2291 | if (I.getOperand(i: 1).isCImm()) |
2292 | IsZero = I.getOperand(i: 1).getCImm()->isZero(); |
2293 | else if (I.getOperand(i: 1).isImm()) |
2294 | IsZero = I.getOperand(i: 1).getImm() == 0; |
2295 | |
2296 | if (!IsZero) |
2297 | return false; |
2298 | |
2299 | Register DefReg = I.getOperand(i: 0).getReg(); |
2300 | LLT Ty = MRI.getType(Reg: DefReg); |
2301 | if (Ty.getSizeInBits() == 64) { |
2302 | I.getOperand(1).ChangeToRegister(AArch64::XZR, false); |
2303 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); |
2304 | } else if (Ty.getSizeInBits() == 32) { |
2305 | I.getOperand(1).ChangeToRegister(AArch64::WZR, false); |
2306 | RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); |
2307 | } else |
2308 | return false; |
2309 | |
2310 | I.setDesc(TII.get(TargetOpcode::COPY)); |
2311 | return true; |
2312 | } |
2313 | |
2314 | case TargetOpcode::G_ADD: { |
2315 | // Check if this is being fed by a G_ICMP on either side. |
2316 | // |
2317 | // (cmp pred, x, y) + z |
2318 | // |
2319 | // In the above case, when the cmp is true, we increment z by 1. So, we can |
2320 | // fold the add into the cset for the cmp by using cinc. |
2321 | // |
2322 | // FIXME: This would probably be a lot nicer in PostLegalizerLowering. |
2323 | Register AddDst = I.getOperand(i: 0).getReg(); |
2324 | Register AddLHS = I.getOperand(i: 1).getReg(); |
2325 | Register AddRHS = I.getOperand(i: 2).getReg(); |
2326 | // Only handle scalars. |
2327 | LLT Ty = MRI.getType(Reg: AddLHS); |
2328 | if (Ty.isVector()) |
2329 | return false; |
2330 | // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 |
2331 | // bits. |
2332 | unsigned Size = Ty.getSizeInBits(); |
2333 | if (Size != 32 && Size != 64) |
2334 | return false; |
2335 | auto MatchCmp = [&](Register Reg) -> MachineInstr * { |
2336 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
2337 | return nullptr; |
2338 | // If the LHS of the add is 32 bits, then we want to fold a 32-bit |
2339 | // compare. |
2340 | if (Size == 32) |
2341 | return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI); |
2342 | // We model scalar compares using 32-bit destinations right now. |
2343 | // If it's a 64-bit compare, it'll have 64-bit sources. |
2344 | Register ZExt; |
2345 | if (!mi_match(R: Reg, MRI, |
2346 | P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt)))))) |
2347 | return nullptr; |
2348 | auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI); |
2349 | if (!Cmp || |
2350 | MRI.getType(Reg: Cmp->getOperand(i: 2).getReg()).getSizeInBits() != 64) |
2351 | return nullptr; |
2352 | return Cmp; |
2353 | }; |
2354 | // Try to match |
2355 | // z + (cmp pred, x, y) |
2356 | MachineInstr *Cmp = MatchCmp(AddRHS); |
2357 | if (!Cmp) { |
2358 | // (cmp pred, x, y) + z |
2359 | std::swap(a&: AddLHS, b&: AddRHS); |
2360 | Cmp = MatchCmp(AddRHS); |
2361 | if (!Cmp) |
2362 | return false; |
2363 | } |
2364 | auto &PredOp = Cmp->getOperand(i: 1); |
2365 | auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); |
2366 | const AArch64CC::CondCode InvCC = |
2367 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
2368 | MIB.setInstrAndDebugLoc(I); |
2369 | emitIntegerCompare(/*LHS=*/Cmp->getOperand(i: 2), |
2370 | /*RHS=*/Cmp->getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
2371 | emitCSINC(/*Dst=*/AddDst, /*Src =*/Src1: AddLHS, /*Src2=*/AddLHS, Pred: InvCC, MIRBuilder&: MIB); |
2372 | I.eraseFromParent(); |
2373 | return true; |
2374 | } |
2375 | case TargetOpcode::G_OR: { |
2376 | // Look for operations that take the lower `Width=Size-ShiftImm` bits of |
2377 | // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via |
2378 | // shifting and masking that we can replace with a BFI (encoded as a BFM). |
2379 | Register Dst = I.getOperand(i: 0).getReg(); |
2380 | LLT Ty = MRI.getType(Reg: Dst); |
2381 | |
2382 | if (!Ty.isScalar()) |
2383 | return false; |
2384 | |
2385 | unsigned Size = Ty.getSizeInBits(); |
2386 | if (Size != 32 && Size != 64) |
2387 | return false; |
2388 | |
2389 | Register ShiftSrc; |
2390 | int64_t ShiftImm; |
2391 | Register MaskSrc; |
2392 | int64_t MaskImm; |
2393 | if (!mi_match( |
2394 | R: Dst, MRI, |
2395 | P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))), |
2396 | R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm)))))) |
2397 | return false; |
2398 | |
2399 | if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) |
2400 | return false; |
2401 | |
2402 | int64_t Immr = Size - ShiftImm; |
2403 | int64_t Imms = Size - ShiftImm - 1; |
2404 | unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; |
2405 | emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB); |
2406 | I.eraseFromParent(); |
2407 | return true; |
2408 | } |
2409 | case TargetOpcode::G_FENCE: { |
2410 | if (I.getOperand(i: 1).getImm() == 0) |
2411 | BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER)); |
2412 | else |
2413 | BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB)) |
2414 | .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb); |
2415 | I.eraseFromParent(); |
2416 | return true; |
2417 | } |
2418 | default: |
2419 | return false; |
2420 | } |
2421 | } |
2422 | |
2423 | bool AArch64InstructionSelector::select(MachineInstr &I) { |
2424 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2425 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2426 | |
2427 | MachineBasicBlock &MBB = *I.getParent(); |
2428 | MachineFunction &MF = *MBB.getParent(); |
2429 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2430 | |
2431 | const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>(); |
2432 | if (Subtarget->requiresStrictAlign()) { |
2433 | // We don't support this feature yet. |
2434 | LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n" ); |
2435 | return false; |
2436 | } |
2437 | |
2438 | MIB.setInstrAndDebugLoc(I); |
2439 | |
2440 | unsigned Opcode = I.getOpcode(); |
2441 | // G_PHI requires same handling as PHI |
2442 | if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { |
2443 | // Certain non-generic instructions also need some special handling. |
2444 | |
2445 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) |
2446 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2447 | |
2448 | if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { |
2449 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2450 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2451 | |
2452 | const RegClassOrRegBank &RegClassOrBank = |
2453 | MRI.getRegClassOrRegBank(Reg: DefReg); |
2454 | |
2455 | const TargetRegisterClass *DefRC |
2456 | = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); |
2457 | if (!DefRC) { |
2458 | if (!DefTy.isValid()) { |
2459 | LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n" ); |
2460 | return false; |
2461 | } |
2462 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); |
2463 | DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB); |
2464 | if (!DefRC) { |
2465 | LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n" ); |
2466 | return false; |
2467 | } |
2468 | } |
2469 | |
2470 | I.setDesc(TII.get(TargetOpcode::PHI)); |
2471 | |
2472 | return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI); |
2473 | } |
2474 | |
2475 | if (I.isCopy()) |
2476 | return selectCopy(I, TII, MRI, TRI, RBI); |
2477 | |
2478 | if (I.isDebugInstr()) |
2479 | return selectDebugInstr(I, MRI, RBI); |
2480 | |
2481 | return true; |
2482 | } |
2483 | |
2484 | |
2485 | if (I.getNumOperands() != I.getNumExplicitOperands()) { |
2486 | LLVM_DEBUG( |
2487 | dbgs() << "Generic instruction has unexpected implicit operands\n" ); |
2488 | return false; |
2489 | } |
2490 | |
2491 | // Try to do some lowering before we start instruction selecting. These |
2492 | // lowerings are purely transformations on the input G_MIR and so selection |
2493 | // must continue after any modification of the instruction. |
2494 | if (preISelLower(I)) { |
2495 | Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. |
2496 | } |
2497 | |
2498 | // There may be patterns where the importer can't deal with them optimally, |
2499 | // but does select it to a suboptimal sequence so our custom C++ selection |
2500 | // code later never has a chance to work on it. Therefore, we have an early |
2501 | // selection attempt here to give priority to certain selection routines |
2502 | // over the imported ones. |
2503 | if (earlySelect(I)) |
2504 | return true; |
2505 | |
2506 | if (selectImpl(I, CoverageInfo&: *CoverageInfo)) |
2507 | return true; |
2508 | |
2509 | LLT Ty = |
2510 | I.getOperand(i: 0).isReg() ? MRI.getType(Reg: I.getOperand(i: 0).getReg()) : LLT{}; |
2511 | |
2512 | switch (Opcode) { |
2513 | case TargetOpcode::G_SBFX: |
2514 | case TargetOpcode::G_UBFX: { |
2515 | static const unsigned OpcTable[2][2] = { |
2516 | {AArch64::UBFMWri, AArch64::UBFMXri}, |
2517 | {AArch64::SBFMWri, AArch64::SBFMXri}}; |
2518 | bool IsSigned = Opcode == TargetOpcode::G_SBFX; |
2519 | unsigned Size = Ty.getSizeInBits(); |
2520 | unsigned Opc = OpcTable[IsSigned][Size == 64]; |
2521 | auto Cst1 = |
2522 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 2).getReg(), MRI); |
2523 | assert(Cst1 && "Should have gotten a constant for src 1?" ); |
2524 | auto Cst2 = |
2525 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 3).getReg(), MRI); |
2526 | assert(Cst2 && "Should have gotten a constant for src 2?" ); |
2527 | auto LSB = Cst1->Value.getZExtValue(); |
2528 | auto Width = Cst2->Value.getZExtValue(); |
2529 | auto BitfieldInst = |
2530 | MIB.buildInstr(Opc, DstOps: {I.getOperand(i: 0)}, SrcOps: {I.getOperand(i: 1)}) |
2531 | .addImm(Val: LSB) |
2532 | .addImm(Val: LSB + Width - 1); |
2533 | I.eraseFromParent(); |
2534 | return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); |
2535 | } |
2536 | case TargetOpcode::G_BRCOND: |
2537 | return selectCompareBranch(I, MF, MRI); |
2538 | |
2539 | case TargetOpcode::G_BRINDIRECT: { |
2540 | I.setDesc(TII.get(AArch64::BR)); |
2541 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2542 | } |
2543 | |
2544 | case TargetOpcode::G_BRJT: |
2545 | return selectBrJT(I, MRI); |
2546 | |
2547 | case AArch64::G_ADD_LOW: { |
2548 | // This op may have been separated from it's ADRP companion by the localizer |
2549 | // or some other code motion pass. Given that many CPUs will try to |
2550 | // macro fuse these operations anyway, select this into a MOVaddr pseudo |
2551 | // which will later be expanded into an ADRP+ADD pair after scheduling. |
2552 | MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
2553 | if (BaseMI->getOpcode() != AArch64::ADRP) { |
2554 | I.setDesc(TII.get(AArch64::ADDXri)); |
2555 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2556 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2557 | } |
2558 | assert(TM.getCodeModel() == CodeModel::Small && |
2559 | "Expected small code model" ); |
2560 | auto Op1 = BaseMI->getOperand(i: 1); |
2561 | auto Op2 = I.getOperand(i: 2); |
2562 | auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) |
2563 | .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), |
2564 | Op1.getTargetFlags()) |
2565 | .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), |
2566 | Op2.getTargetFlags()); |
2567 | I.eraseFromParent(); |
2568 | return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); |
2569 | } |
2570 | |
2571 | case TargetOpcode::G_FCONSTANT: |
2572 | case TargetOpcode::G_CONSTANT: { |
2573 | const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; |
2574 | |
2575 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
2576 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
2577 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
2578 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
2579 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
2580 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
2581 | |
2582 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2583 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2584 | const unsigned DefSize = DefTy.getSizeInBits(); |
2585 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); |
2586 | |
2587 | // FIXME: Redundant check, but even less readable when factored out. |
2588 | if (isFP) { |
2589 | if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { |
2590 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2591 | << " constant, expected: " << s16 << " or " << s32 |
2592 | << " or " << s64 << " or " << s128 << '\n'); |
2593 | return false; |
2594 | } |
2595 | |
2596 | if (RB.getID() != AArch64::FPRRegBankID) { |
2597 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2598 | << " constant on bank: " << RB |
2599 | << ", expected: FPR\n" ); |
2600 | return false; |
2601 | } |
2602 | |
2603 | // The case when we have 0.0 is covered by tablegen. Reject it here so we |
2604 | // can be sure tablegen works correctly and isn't rescued by this code. |
2605 | // 0.0 is not covered by tablegen for FP128. So we will handle this |
2606 | // scenario in the code here. |
2607 | if (DefSize != 128 && I.getOperand(i: 1).getFPImm()->isExactlyValue(V: 0.0)) |
2608 | return false; |
2609 | } else { |
2610 | // s32 and s64 are covered by tablegen. |
2611 | if (Ty != p0 && Ty != s8 && Ty != s16) { |
2612 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2613 | << " constant, expected: " << s32 << ", " << s64 |
2614 | << ", or " << p0 << '\n'); |
2615 | return false; |
2616 | } |
2617 | |
2618 | if (RB.getID() != AArch64::GPRRegBankID) { |
2619 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2620 | << " constant on bank: " << RB |
2621 | << ", expected: GPR\n" ); |
2622 | return false; |
2623 | } |
2624 | } |
2625 | |
2626 | if (isFP) { |
2627 | const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB); |
2628 | // For 16, 64, and 128b values, emit a constant pool load. |
2629 | switch (DefSize) { |
2630 | default: |
2631 | llvm_unreachable("Unexpected destination size for G_FCONSTANT?" ); |
2632 | case 32: |
2633 | case 64: { |
2634 | bool OptForSize = shouldOptForSize(MF: &MF); |
2635 | const auto &TLI = MF.getSubtarget().getTargetLowering(); |
2636 | // If TLI says that this fpimm is illegal, then we'll expand to a |
2637 | // constant pool load. |
2638 | if (TLI->isFPImmLegal(I.getOperand(i: 1).getFPImm()->getValueAPF(), |
2639 | EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize)) |
2640 | break; |
2641 | [[fallthrough]]; |
2642 | } |
2643 | case 16: |
2644 | case 128: { |
2645 | auto *FPImm = I.getOperand(i: 1).getFPImm(); |
2646 | auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB); |
2647 | if (!LoadMI) { |
2648 | LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n" ); |
2649 | return false; |
2650 | } |
2651 | MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: 0).getReg()}); |
2652 | I.eraseFromParent(); |
2653 | return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI); |
2654 | } |
2655 | } |
2656 | |
2657 | assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size" ); |
2658 | // Either emit a FMOV, or emit a copy to emit a normal mov. |
2659 | const Register DefGPRReg = MRI.createVirtualRegister( |
2660 | DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
2661 | MachineOperand &RegOp = I.getOperand(i: 0); |
2662 | RegOp.setReg(DefGPRReg); |
2663 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2664 | MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg}); |
2665 | |
2666 | if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) { |
2667 | LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n" ); |
2668 | return false; |
2669 | } |
2670 | |
2671 | MachineOperand &ImmOp = I.getOperand(i: 1); |
2672 | // FIXME: Is going through int64_t always correct? |
2673 | ImmOp.ChangeToImmediate( |
2674 | ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); |
2675 | } else if (I.getOperand(i: 1).isCImm()) { |
2676 | uint64_t Val = I.getOperand(i: 1).getCImm()->getZExtValue(); |
2677 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2678 | } else if (I.getOperand(i: 1).isImm()) { |
2679 | uint64_t Val = I.getOperand(i: 1).getImm(); |
2680 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2681 | } |
2682 | |
2683 | const unsigned MovOpc = |
2684 | DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
2685 | I.setDesc(TII.get(MovOpc)); |
2686 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2687 | return true; |
2688 | } |
2689 | case TargetOpcode::G_EXTRACT: { |
2690 | Register DstReg = I.getOperand(i: 0).getReg(); |
2691 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2692 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2693 | LLT DstTy = MRI.getType(Reg: DstReg); |
2694 | (void)DstTy; |
2695 | unsigned SrcSize = SrcTy.getSizeInBits(); |
2696 | |
2697 | if (SrcTy.getSizeInBits() > 64) { |
2698 | // This should be an extract of an s128, which is like a vector extract. |
2699 | if (SrcTy.getSizeInBits() != 128) |
2700 | return false; |
2701 | // Only support extracting 64 bits from an s128 at the moment. |
2702 | if (DstTy.getSizeInBits() != 64) |
2703 | return false; |
2704 | |
2705 | unsigned Offset = I.getOperand(i: 2).getImm(); |
2706 | if (Offset % 64 != 0) |
2707 | return false; |
2708 | |
2709 | // Check we have the right regbank always. |
2710 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); |
2711 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
2712 | assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!" ); |
2713 | |
2714 | if (SrcRB.getID() == AArch64::GPRRegBankID) { |
2715 | auto NewI = |
2716 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) |
2717 | .addUse(SrcReg, 0, |
2718 | Offset == 0 ? AArch64::sube64 : AArch64::subo64); |
2719 | constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI, |
2720 | AArch64::GPR64RegClass, NewI->getOperand(0)); |
2721 | I.eraseFromParent(); |
2722 | return true; |
2723 | } |
2724 | |
2725 | // Emit the same code as a vector extract. |
2726 | // Offset must be a multiple of 64. |
2727 | unsigned LaneIdx = Offset / 64; |
2728 | MachineInstr * = emitExtractVectorElt( |
2729 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: 64), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB); |
2730 | if (!Extract) |
2731 | return false; |
2732 | I.eraseFromParent(); |
2733 | return true; |
2734 | } |
2735 | |
2736 | I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); |
2737 | MachineInstrBuilder(MF, I).addImm(Val: I.getOperand(i: 2).getImm() + |
2738 | Ty.getSizeInBits() - 1); |
2739 | |
2740 | if (SrcSize < 64) { |
2741 | assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && |
2742 | "unexpected G_EXTRACT types" ); |
2743 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2744 | } |
2745 | |
2746 | DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2747 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2748 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) |
2749 | .addReg(DstReg, 0, AArch64::sub_32); |
2750 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), |
2751 | AArch64::GPR32RegClass, MRI); |
2752 | I.getOperand(i: 0).setReg(DstReg); |
2753 | |
2754 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2755 | } |
2756 | |
2757 | case TargetOpcode::G_INSERT: { |
2758 | LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 2).getReg()); |
2759 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2760 | unsigned DstSize = DstTy.getSizeInBits(); |
2761 | // Larger inserts are vectors, same-size ones should be something else by |
2762 | // now (split up or turned into COPYs). |
2763 | if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) |
2764 | return false; |
2765 | |
2766 | I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); |
2767 | unsigned LSB = I.getOperand(i: 3).getImm(); |
2768 | unsigned Width = MRI.getType(Reg: I.getOperand(i: 2).getReg()).getSizeInBits(); |
2769 | I.getOperand(i: 3).setImm((DstSize - LSB) % DstSize); |
2770 | MachineInstrBuilder(MF, I).addImm(Val: Width - 1); |
2771 | |
2772 | if (DstSize < 64) { |
2773 | assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && |
2774 | "unexpected G_INSERT types" ); |
2775 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2776 | } |
2777 | |
2778 | Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2779 | BuildMI(MBB, I.getIterator(), I.getDebugLoc(), |
2780 | TII.get(AArch64::SUBREG_TO_REG)) |
2781 | .addDef(SrcReg) |
2782 | .addImm(0) |
2783 | .addUse(I.getOperand(2).getReg()) |
2784 | .addImm(AArch64::sub_32); |
2785 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), |
2786 | AArch64::GPR32RegClass, MRI); |
2787 | I.getOperand(i: 2).setReg(SrcReg); |
2788 | |
2789 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2790 | } |
2791 | case TargetOpcode::G_FRAME_INDEX: { |
2792 | // allocas and G_FRAME_INDEX are only supported in addrspace(0). |
2793 | if (Ty != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
2794 | LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty |
2795 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
2796 | return false; |
2797 | } |
2798 | I.setDesc(TII.get(AArch64::ADDXri)); |
2799 | |
2800 | // MOs for a #0 shifted immediate. |
2801 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2802 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2803 | |
2804 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2805 | } |
2806 | |
2807 | case TargetOpcode::G_GLOBAL_VALUE: { |
2808 | const GlobalValue *GV = nullptr; |
2809 | unsigned OpFlags; |
2810 | if (I.getOperand(i: 1).isSymbol()) { |
2811 | OpFlags = I.getOperand(i: 1).getTargetFlags(); |
2812 | // Currently only used by "RtLibUseGOT". |
2813 | assert(OpFlags == AArch64II::MO_GOT); |
2814 | } else { |
2815 | GV = I.getOperand(i: 1).getGlobal(); |
2816 | if (GV->isThreadLocal()) |
2817 | return selectTLSGlobalValue(I, MRI); |
2818 | OpFlags = STI.ClassifyGlobalReference(GV, TM); |
2819 | } |
2820 | |
2821 | if (OpFlags & AArch64II::MO_GOT) { |
2822 | I.setDesc(TII.get(AArch64::LOADgot)); |
2823 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2824 | } else if (TM.getCodeModel() == CodeModel::Large && |
2825 | !TM.isPositionIndependent()) { |
2826 | // Materialize the global using movz/movk instructions. |
2827 | materializeLargeCMVal(I, V: GV, OpFlags); |
2828 | I.eraseFromParent(); |
2829 | return true; |
2830 | } else if (TM.getCodeModel() == CodeModel::Tiny) { |
2831 | I.setDesc(TII.get(AArch64::ADR)); |
2832 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2833 | } else { |
2834 | I.setDesc(TII.get(AArch64::MOVaddr)); |
2835 | I.getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); |
2836 | MachineInstrBuilder MIB(MF, I); |
2837 | MIB.addGlobalAddress(GV, Offset: I.getOperand(i: 1).getOffset(), |
2838 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
2839 | } |
2840 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2841 | } |
2842 | |
2843 | case TargetOpcode::G_ZEXTLOAD: |
2844 | case TargetOpcode::G_LOAD: |
2845 | case TargetOpcode::G_STORE: { |
2846 | GLoadStore &LdSt = cast<GLoadStore>(Val&: I); |
2847 | bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; |
2848 | LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg()); |
2849 | |
2850 | if (PtrTy != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
2851 | LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy |
2852 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
2853 | return false; |
2854 | } |
2855 | |
2856 | uint64_t MemSizeInBytes = LdSt.getMemSize().getValue(); |
2857 | unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue(); |
2858 | AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); |
2859 | |
2860 | // Need special instructions for atomics that affect ordering. |
2861 | if (Order != AtomicOrdering::NotAtomic && |
2862 | Order != AtomicOrdering::Unordered && |
2863 | Order != AtomicOrdering::Monotonic) { |
2864 | assert(!isa<GZExtLoad>(LdSt)); |
2865 | if (MemSizeInBytes > 64) |
2866 | return false; |
2867 | |
2868 | if (isa<GLoad>(Val: LdSt)) { |
2869 | static constexpr unsigned LDAPROpcodes[] = { |
2870 | AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX}; |
2871 | static constexpr unsigned LDAROpcodes[] = { |
2872 | AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; |
2873 | ArrayRef<unsigned> Opcodes = |
2874 | STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent |
2875 | ? LDAPROpcodes |
2876 | : LDAROpcodes; |
2877 | I.setDesc(TII.get(Opcodes[Log2_32(Value: MemSizeInBytes)])); |
2878 | } else { |
2879 | static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, |
2880 | AArch64::STLRW, AArch64::STLRX}; |
2881 | Register ValReg = LdSt.getReg(Idx: 0); |
2882 | if (MRI.getType(Reg: ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { |
2883 | // Emit a subreg copy of 32 bits. |
2884 | Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
2885 | MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {}) |
2886 | .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32); |
2887 | I.getOperand(i: 0).setReg(NewVal); |
2888 | } |
2889 | I.setDesc(TII.get(Opcodes[Log2_32(Value: MemSizeInBytes)])); |
2890 | } |
2891 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2892 | return true; |
2893 | } |
2894 | |
2895 | #ifndef NDEBUG |
2896 | const Register PtrReg = LdSt.getPointerReg(); |
2897 | const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); |
2898 | // Check that the pointer register is valid. |
2899 | assert(PtrRB.getID() == AArch64::GPRRegBankID && |
2900 | "Load/Store pointer operand isn't a GPR" ); |
2901 | assert(MRI.getType(PtrReg).isPointer() && |
2902 | "Load/Store pointer operand isn't a pointer" ); |
2903 | #endif |
2904 | |
2905 | const Register ValReg = LdSt.getReg(Idx: 0); |
2906 | const LLT ValTy = MRI.getType(Reg: ValReg); |
2907 | const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); |
2908 | |
2909 | // The code below doesn't support truncating stores, so we need to split it |
2910 | // again. |
2911 | if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
2912 | unsigned SubReg; |
2913 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
2914 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
2915 | if (!getSubRegForClass(RC, TRI, SubReg)) |
2916 | return false; |
2917 | |
2918 | // Generate a subreg copy. |
2919 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {}) |
2920 | .addReg(RegNo: ValReg, flags: 0, SubReg) |
2921 | .getReg(Idx: 0); |
2922 | RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI); |
2923 | LdSt.getOperand(i: 0).setReg(Copy); |
2924 | } else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
2925 | // If this is an any-extending load from the FPR bank, split it into a regular |
2926 | // load + extend. |
2927 | if (RB.getID() == AArch64::FPRRegBankID) { |
2928 | unsigned SubReg; |
2929 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
2930 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
2931 | if (!getSubRegForClass(RC, TRI, SubReg)) |
2932 | return false; |
2933 | Register OldDst = LdSt.getReg(Idx: 0); |
2934 | Register NewDst = |
2935 | MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType()); |
2936 | LdSt.getOperand(i: 0).setReg(NewDst); |
2937 | MRI.setRegBank(Reg: NewDst, RegBank: RB); |
2938 | // Generate a SUBREG_TO_REG to extend it. |
2939 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator())); |
2940 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {}) |
2941 | .addImm(0) |
2942 | .addUse(NewDst) |
2943 | .addImm(SubReg); |
2944 | auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB); |
2945 | RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI); |
2946 | MIB.setInstr(LdSt); |
2947 | } |
2948 | } |
2949 | |
2950 | // Helper lambda for partially selecting I. Either returns the original |
2951 | // instruction with an updated opcode, or a new instruction. |
2952 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { |
2953 | bool IsStore = isa<GStore>(Val: I); |
2954 | const unsigned NewOpc = |
2955 | selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits); |
2956 | if (NewOpc == I.getOpcode()) |
2957 | return nullptr; |
2958 | // Check if we can fold anything into the addressing mode. |
2959 | auto AddrModeFns = |
2960 | selectAddrModeIndexed(Root&: I.getOperand(i: 1), Size: MemSizeInBytes); |
2961 | if (!AddrModeFns) { |
2962 | // Can't fold anything. Use the original instruction. |
2963 | I.setDesc(TII.get(NewOpc)); |
2964 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2965 | return &I; |
2966 | } |
2967 | |
2968 | // Folded something. Create a new instruction and return it. |
2969 | auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags()); |
2970 | Register CurValReg = I.getOperand(i: 0).getReg(); |
2971 | IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); |
2972 | NewInst.cloneMemRefs(I); |
2973 | for (auto &Fn : *AddrModeFns) |
2974 | Fn(NewInst); |
2975 | I.eraseFromParent(); |
2976 | return &*NewInst; |
2977 | }; |
2978 | |
2979 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); |
2980 | if (!LoadStore) |
2981 | return false; |
2982 | |
2983 | // If we're storing a 0, use WZR/XZR. |
2984 | if (Opcode == TargetOpcode::G_STORE) { |
2985 | auto CVal = getIConstantVRegValWithLookThrough( |
2986 | VReg: LoadStore->getOperand(i: 0).getReg(), MRI); |
2987 | if (CVal && CVal->Value == 0) { |
2988 | switch (LoadStore->getOpcode()) { |
2989 | case AArch64::STRWui: |
2990 | case AArch64::STRHHui: |
2991 | case AArch64::STRBBui: |
2992 | LoadStore->getOperand(0).setReg(AArch64::WZR); |
2993 | break; |
2994 | case AArch64::STRXui: |
2995 | LoadStore->getOperand(0).setReg(AArch64::XZR); |
2996 | break; |
2997 | } |
2998 | } |
2999 | } |
3000 | |
3001 | if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD && |
3002 | ValTy == LLT::scalar(SizeInBits: 64) && MemSizeInBits == 32)) { |
3003 | // The any/zextload from a smaller type to i32 should be handled by the |
3004 | // importer. |
3005 | if (MRI.getType(Reg: LoadStore->getOperand(i: 0).getReg()).getSizeInBits() != 64) |
3006 | return false; |
3007 | // If we have an extending load then change the load's type to be a |
3008 | // narrower reg and zero_extend with SUBREG_TO_REG. |
3009 | Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
3010 | Register DstReg = LoadStore->getOperand(i: 0).getReg(); |
3011 | LoadStore->getOperand(i: 0).setReg(LdReg); |
3012 | |
3013 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator())); |
3014 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) |
3015 | .addImm(0) |
3016 | .addUse(LdReg) |
3017 | .addImm(AArch64::sub_32); |
3018 | constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); |
3019 | return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, |
3020 | MRI); |
3021 | } |
3022 | return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); |
3023 | } |
3024 | |
3025 | case TargetOpcode::G_INDEXED_ZEXTLOAD: |
3026 | case TargetOpcode::G_INDEXED_SEXTLOAD: |
3027 | return selectIndexedExtLoad(I, MRI); |
3028 | case TargetOpcode::G_INDEXED_LOAD: |
3029 | return selectIndexedLoad(I, MRI); |
3030 | case TargetOpcode::G_INDEXED_STORE: |
3031 | return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI); |
3032 | |
3033 | case TargetOpcode::G_LSHR: |
3034 | case TargetOpcode::G_ASHR: |
3035 | if (MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3036 | return selectVectorAshrLshr(I, MRI); |
3037 | [[fallthrough]]; |
3038 | case TargetOpcode::G_SHL: |
3039 | if (Opcode == TargetOpcode::G_SHL && |
3040 | MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3041 | return selectVectorSHL(I, MRI); |
3042 | |
3043 | // These shifts were legalized to have 64 bit shift amounts because we |
3044 | // want to take advantage of the selection patterns that assume the |
3045 | // immediates are s64s, however, selectBinaryOp will assume both operands |
3046 | // will have the same bit size. |
3047 | { |
3048 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3049 | Register ShiftReg = I.getOperand(i: 2).getReg(); |
3050 | const LLT ShiftTy = MRI.getType(Reg: ShiftReg); |
3051 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3052 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && |
3053 | ShiftTy.getSizeInBits() == 64) { |
3054 | assert(!ShiftTy.isVector() && "unexpected vector shift ty" ); |
3055 | // Insert a subregister copy to implement a 64->32 trunc |
3056 | auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) |
3057 | .addReg(ShiftReg, 0, AArch64::sub_32); |
3058 | MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); |
3059 | I.getOperand(i: 2).setReg(Trunc.getReg(0)); |
3060 | } |
3061 | } |
3062 | [[fallthrough]]; |
3063 | case TargetOpcode::G_OR: { |
3064 | // Reject the various things we don't support yet. |
3065 | if (unsupportedBinOp(I, RBI, MRI, TRI)) |
3066 | return false; |
3067 | |
3068 | const unsigned OpSize = Ty.getSizeInBits(); |
3069 | |
3070 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3071 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); |
3072 | |
3073 | const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize); |
3074 | if (NewOpc == I.getOpcode()) |
3075 | return false; |
3076 | |
3077 | I.setDesc(TII.get(NewOpc)); |
3078 | // FIXME: Should the type be always reset in setDesc? |
3079 | |
3080 | // Now that we selected an opcode, we need to constrain the register |
3081 | // operands to use appropriate classes. |
3082 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3083 | } |
3084 | |
3085 | case TargetOpcode::G_PTR_ADD: { |
3086 | emitADD(DefReg: I.getOperand(i: 0).getReg(), LHS&: I.getOperand(i: 1), RHS&: I.getOperand(i: 2), MIRBuilder&: MIB); |
3087 | I.eraseFromParent(); |
3088 | return true; |
3089 | } |
3090 | |
3091 | case TargetOpcode::G_SADDE: |
3092 | case TargetOpcode::G_UADDE: |
3093 | case TargetOpcode::G_SSUBE: |
3094 | case TargetOpcode::G_USUBE: |
3095 | case TargetOpcode::G_SADDO: |
3096 | case TargetOpcode::G_UADDO: |
3097 | case TargetOpcode::G_SSUBO: |
3098 | case TargetOpcode::G_USUBO: |
3099 | return selectOverflowOp(I, MRI); |
3100 | |
3101 | case TargetOpcode::G_PTRMASK: { |
3102 | Register MaskReg = I.getOperand(i: 2).getReg(); |
3103 | std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI); |
3104 | // TODO: Implement arbitrary cases |
3105 | if (!MaskVal || !isShiftedMask_64(Value: *MaskVal)) |
3106 | return false; |
3107 | |
3108 | uint64_t Mask = *MaskVal; |
3109 | I.setDesc(TII.get(AArch64::ANDXri)); |
3110 | I.getOperand(i: 2).ChangeToImmediate( |
3111 | ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: 64)); |
3112 | |
3113 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3114 | } |
3115 | case TargetOpcode::G_PTRTOINT: |
3116 | case TargetOpcode::G_TRUNC: { |
3117 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3118 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3119 | |
3120 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3121 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3122 | |
3123 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
3124 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); |
3125 | |
3126 | if (DstRB.getID() != SrcRB.getID()) { |
3127 | LLVM_DEBUG( |
3128 | dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ); |
3129 | return false; |
3130 | } |
3131 | |
3132 | if (DstRB.getID() == AArch64::GPRRegBankID) { |
3133 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3134 | if (!DstRC) |
3135 | return false; |
3136 | |
3137 | const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB); |
3138 | if (!SrcRC) |
3139 | return false; |
3140 | |
3141 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
3142 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
3143 | LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n" ); |
3144 | return false; |
3145 | } |
3146 | |
3147 | if (DstRC == SrcRC) { |
3148 | // Nothing to be done |
3149 | } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: 32) && |
3150 | SrcTy == LLT::scalar(SizeInBits: 64)) { |
3151 | llvm_unreachable("TableGen can import this case" ); |
3152 | return false; |
3153 | } else if (DstRC == &AArch64::GPR32RegClass && |
3154 | SrcRC == &AArch64::GPR64RegClass) { |
3155 | I.getOperand(1).setSubReg(AArch64::sub_32); |
3156 | } else { |
3157 | LLVM_DEBUG( |
3158 | dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ); |
3159 | return false; |
3160 | } |
3161 | |
3162 | I.setDesc(TII.get(TargetOpcode::COPY)); |
3163 | return true; |
3164 | } else if (DstRB.getID() == AArch64::FPRRegBankID) { |
3165 | if (DstTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) && |
3166 | SrcTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
3167 | I.setDesc(TII.get(AArch64::XTNv4i16)); |
3168 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3169 | return true; |
3170 | } |
3171 | |
3172 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { |
3173 | MachineInstr * = emitExtractVectorElt( |
3174 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: 0, MIRBuilder&: MIB); |
3175 | if (!Extract) |
3176 | return false; |
3177 | I.eraseFromParent(); |
3178 | return true; |
3179 | } |
3180 | |
3181 | // We might have a vector G_PTRTOINT, in which case just emit a COPY. |
3182 | if (Opcode == TargetOpcode::G_PTRTOINT) { |
3183 | assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector" ); |
3184 | I.setDesc(TII.get(TargetOpcode::COPY)); |
3185 | return selectCopy(I, TII, MRI, TRI, RBI); |
3186 | } |
3187 | } |
3188 | |
3189 | return false; |
3190 | } |
3191 | |
3192 | case TargetOpcode::G_ANYEXT: { |
3193 | if (selectUSMovFromExtend(I, MRI)) |
3194 | return true; |
3195 | |
3196 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3197 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3198 | |
3199 | const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); |
3200 | if (RBDst.getID() != AArch64::GPRRegBankID) { |
3201 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst |
3202 | << ", expected: GPR\n" ); |
3203 | return false; |
3204 | } |
3205 | |
3206 | const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); |
3207 | if (RBSrc.getID() != AArch64::GPRRegBankID) { |
3208 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc |
3209 | << ", expected: GPR\n" ); |
3210 | return false; |
3211 | } |
3212 | |
3213 | const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits(); |
3214 | |
3215 | if (DstSize == 0) { |
3216 | LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n" ); |
3217 | return false; |
3218 | } |
3219 | |
3220 | if (DstSize != 64 && DstSize > 32) { |
3221 | LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize |
3222 | << ", expected: 32 or 64\n" ); |
3223 | return false; |
3224 | } |
3225 | // At this point G_ANYEXT is just like a plain COPY, but we need |
3226 | // to explicitly form the 64-bit value if any. |
3227 | if (DstSize > 32) { |
3228 | Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); |
3229 | BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) |
3230 | .addDef(ExtSrc) |
3231 | .addImm(0) |
3232 | .addUse(SrcReg) |
3233 | .addImm(AArch64::sub_32); |
3234 | I.getOperand(i: 1).setReg(ExtSrc); |
3235 | } |
3236 | return selectCopy(I, TII, MRI, TRI, RBI); |
3237 | } |
3238 | |
3239 | case TargetOpcode::G_ZEXT: |
3240 | case TargetOpcode::G_SEXT_INREG: |
3241 | case TargetOpcode::G_SEXT: { |
3242 | if (selectUSMovFromExtend(I, MRI)) |
3243 | return true; |
3244 | |
3245 | unsigned Opcode = I.getOpcode(); |
3246 | const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; |
3247 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3248 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3249 | const LLT DstTy = MRI.getType(Reg: DefReg); |
3250 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3251 | unsigned DstSize = DstTy.getSizeInBits(); |
3252 | unsigned SrcSize = SrcTy.getSizeInBits(); |
3253 | |
3254 | // SEXT_INREG has the same src reg size as dst, the size of the value to be |
3255 | // extended is encoded in the imm. |
3256 | if (Opcode == TargetOpcode::G_SEXT_INREG) |
3257 | SrcSize = I.getOperand(i: 2).getImm(); |
3258 | |
3259 | if (DstTy.isVector()) |
3260 | return false; // Should be handled by imported patterns. |
3261 | |
3262 | assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == |
3263 | AArch64::GPRRegBankID && |
3264 | "Unexpected ext regbank" ); |
3265 | |
3266 | MachineInstr *ExtI; |
3267 | |
3268 | // First check if we're extending the result of a load which has a dest type |
3269 | // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest |
3270 | // GPR register on AArch64 and all loads which are smaller automatically |
3271 | // zero-extend the upper bits. E.g. |
3272 | // %v(s8) = G_LOAD %p, :: (load 1) |
3273 | // %v2(s32) = G_ZEXT %v(s8) |
3274 | if (!IsSigned) { |
3275 | auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI); |
3276 | bool IsGPR = |
3277 | RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; |
3278 | if (LoadMI && IsGPR) { |
3279 | const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); |
3280 | unsigned BytesLoaded = MemOp->getSize().getValue(); |
3281 | if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) |
3282 | return selectCopy(I, TII, MRI, TRI, RBI); |
3283 | } |
3284 | |
3285 | // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) |
3286 | // + SUBREG_TO_REG. |
3287 | if (IsGPR && SrcSize == 32 && DstSize == 64) { |
3288 | Register SubregToRegSrc = |
3289 | MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
3290 | const Register ZReg = AArch64::WZR; |
3291 | MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg}) |
3292 | .addImm(0); |
3293 | |
3294 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) |
3295 | .addImm(0) |
3296 | .addUse(SubregToRegSrc) |
3297 | .addImm(AArch64::sub_32); |
3298 | |
3299 | if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, |
3300 | MRI)) { |
3301 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n" ); |
3302 | return false; |
3303 | } |
3304 | |
3305 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, |
3306 | MRI)) { |
3307 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n" ); |
3308 | return false; |
3309 | } |
3310 | |
3311 | I.eraseFromParent(); |
3312 | return true; |
3313 | } |
3314 | } |
3315 | |
3316 | if (DstSize == 64) { |
3317 | if (Opcode != TargetOpcode::G_SEXT_INREG) { |
3318 | // FIXME: Can we avoid manually doing this? |
3319 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, |
3320 | MRI)) { |
3321 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) |
3322 | << " operand\n" ); |
3323 | return false; |
3324 | } |
3325 | SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, |
3326 | {&AArch64::GPR64RegClass}, {}) |
3327 | .addImm(0) |
3328 | .addUse(SrcReg) |
3329 | .addImm(AArch64::sub_32) |
3330 | .getReg(0); |
3331 | } |
3332 | |
3333 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, |
3334 | {DefReg}, {SrcReg}) |
3335 | .addImm(0) |
3336 | .addImm(SrcSize - 1); |
3337 | } else if (DstSize <= 32) { |
3338 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, |
3339 | {DefReg}, {SrcReg}) |
3340 | .addImm(0) |
3341 | .addImm(SrcSize - 1); |
3342 | } else { |
3343 | return false; |
3344 | } |
3345 | |
3346 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); |
3347 | I.eraseFromParent(); |
3348 | return true; |
3349 | } |
3350 | |
3351 | case TargetOpcode::G_SITOFP: |
3352 | case TargetOpcode::G_UITOFP: |
3353 | case TargetOpcode::G_FPTOSI: |
3354 | case TargetOpcode::G_FPTOUI: { |
3355 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()), |
3356 | SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3357 | const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy); |
3358 | if (NewOpc == Opcode) |
3359 | return false; |
3360 | |
3361 | I.setDesc(TII.get(NewOpc)); |
3362 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3363 | I.setFlags(MachineInstr::NoFPExcept); |
3364 | |
3365 | return true; |
3366 | } |
3367 | |
3368 | case TargetOpcode::G_FREEZE: |
3369 | return selectCopy(I, TII, MRI, TRI, RBI); |
3370 | |
3371 | case TargetOpcode::G_INTTOPTR: |
3372 | // The importer is currently unable to import pointer types since they |
3373 | // didn't exist in SelectionDAG. |
3374 | return selectCopy(I, TII, MRI, TRI, RBI); |
3375 | |
3376 | case TargetOpcode::G_BITCAST: |
3377 | // Imported SelectionDAG rules can handle every bitcast except those that |
3378 | // bitcast from a type to the same type. Ideally, these shouldn't occur |
3379 | // but we might not run an optimizer that deletes them. The other exception |
3380 | // is bitcasts involving pointer types, as SelectionDAG has no knowledge |
3381 | // of them. |
3382 | return selectCopy(I, TII, MRI, TRI, RBI); |
3383 | |
3384 | case TargetOpcode::G_SELECT: { |
3385 | auto &Sel = cast<GSelect>(Val&: I); |
3386 | const Register CondReg = Sel.getCondReg(); |
3387 | const Register TReg = Sel.getTrueReg(); |
3388 | const Register FReg = Sel.getFalseReg(); |
3389 | |
3390 | if (tryOptSelect(Sel)) |
3391 | return true; |
3392 | |
3393 | // Make sure to use an unused vreg instead of wzr, so that the peephole |
3394 | // optimizations will be able to optimize these. |
3395 | Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
3396 | auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) |
3397 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); |
3398 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); |
3399 | if (!emitSelect(Dst: Sel.getReg(Idx: 0), True: TReg, False: FReg, CC: AArch64CC::NE, MIB)) |
3400 | return false; |
3401 | Sel.eraseFromParent(); |
3402 | return true; |
3403 | } |
3404 | case TargetOpcode::G_ICMP: { |
3405 | if (Ty.isVector()) |
3406 | return selectVectorICmp(I, MRI); |
3407 | |
3408 | if (Ty != LLT::scalar(SizeInBits: 32)) { |
3409 | LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty |
3410 | << ", expected: " << LLT::scalar(32) << '\n'); |
3411 | return false; |
3412 | } |
3413 | |
3414 | auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3415 | const AArch64CC::CondCode InvCC = |
3416 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
3417 | emitIntegerCompare(LHS&: I.getOperand(i: 2), RHS&: I.getOperand(i: 3), Predicate&: I.getOperand(i: 1), MIRBuilder&: MIB); |
3418 | emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR, |
3419 | /*Src2=*/AArch64::WZR, InvCC, MIB); |
3420 | I.eraseFromParent(); |
3421 | return true; |
3422 | } |
3423 | |
3424 | case TargetOpcode::G_FCMP: { |
3425 | CmpInst::Predicate Pred = |
3426 | static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3427 | if (!emitFPCompare(LHS: I.getOperand(i: 2).getReg(), RHS: I.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
3428 | Pred) || |
3429 | !emitCSetForFCmp(Dst: I.getOperand(i: 0).getReg(), Pred, MIRBuilder&: MIB)) |
3430 | return false; |
3431 | I.eraseFromParent(); |
3432 | return true; |
3433 | } |
3434 | case TargetOpcode::G_VASTART: |
3435 | return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) |
3436 | : selectVaStartAAPCS(I, MF, MRI); |
3437 | case TargetOpcode::G_INTRINSIC: |
3438 | return selectIntrinsic(I, MRI); |
3439 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
3440 | return selectIntrinsicWithSideEffects(I, MRI); |
3441 | case TargetOpcode::G_IMPLICIT_DEF: { |
3442 | I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); |
3443 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3444 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3445 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
3446 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3447 | RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI); |
3448 | return true; |
3449 | } |
3450 | case TargetOpcode::G_BLOCK_ADDR: { |
3451 | if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) { |
3452 | materializeLargeCMVal(I, V: I.getOperand(i: 1).getBlockAddress(), OpFlags: 0); |
3453 | I.eraseFromParent(); |
3454 | return true; |
3455 | } else { |
3456 | I.setDesc(TII.get(AArch64::MOVaddrBA)); |
3457 | auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), |
3458 | I.getOperand(0).getReg()) |
3459 | .addBlockAddress(I.getOperand(1).getBlockAddress(), |
3460 | /* Offset */ 0, AArch64II::MO_PAGE) |
3461 | .addBlockAddress( |
3462 | I.getOperand(1).getBlockAddress(), /* Offset */ 0, |
3463 | AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3464 | I.eraseFromParent(); |
3465 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); |
3466 | } |
3467 | } |
3468 | case AArch64::G_DUP: { |
3469 | // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by |
3470 | // imported patterns. Do it manually here. Avoiding generating s16 gpr is |
3471 | // difficult because at RBS we may end up pessimizing the fpr case if we |
3472 | // decided to add an anyextend to fix this. Manual selection is the most |
3473 | // robust solution for now. |
3474 | if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != |
3475 | AArch64::GPRRegBankID) |
3476 | return false; // We expect the fpr regbank case to be imported. |
3477 | LLT VecTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3478 | if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) |
3479 | I.setDesc(TII.get(AArch64::DUPv8i8gpr)); |
3480 | else if (VecTy == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) |
3481 | I.setDesc(TII.get(AArch64::DUPv16i8gpr)); |
3482 | else if (VecTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) |
3483 | I.setDesc(TII.get(AArch64::DUPv4i16gpr)); |
3484 | else if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) |
3485 | I.setDesc(TII.get(AArch64::DUPv8i16gpr)); |
3486 | else |
3487 | return false; |
3488 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3489 | } |
3490 | case TargetOpcode::G_BUILD_VECTOR: |
3491 | return selectBuildVector(I, MRI); |
3492 | case TargetOpcode::G_MERGE_VALUES: |
3493 | return selectMergeValues(I, MRI); |
3494 | case TargetOpcode::G_UNMERGE_VALUES: |
3495 | return selectUnmergeValues(I, MRI); |
3496 | case TargetOpcode::G_SHUFFLE_VECTOR: |
3497 | return selectShuffleVector(I, MRI); |
3498 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: |
3499 | return selectExtractElt(I, MRI); |
3500 | case TargetOpcode::G_CONCAT_VECTORS: |
3501 | return selectConcatVectors(I, MRI); |
3502 | case TargetOpcode::G_JUMP_TABLE: |
3503 | return selectJumpTable(I, MRI); |
3504 | case TargetOpcode::G_MEMCPY: |
3505 | case TargetOpcode::G_MEMCPY_INLINE: |
3506 | case TargetOpcode::G_MEMMOVE: |
3507 | case TargetOpcode::G_MEMSET: |
3508 | assert(STI.hasMOPS() && "Shouldn't get here without +mops feature" ); |
3509 | return selectMOPS(I, MRI); |
3510 | } |
3511 | |
3512 | return false; |
3513 | } |
3514 | |
3515 | bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) { |
3516 | MachineIRBuilderState OldMIBState = MIB.getState(); |
3517 | bool Success = select(I); |
3518 | MIB.setState(OldMIBState); |
3519 | return Success; |
3520 | } |
3521 | |
3522 | bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, |
3523 | MachineRegisterInfo &MRI) { |
3524 | unsigned Mopcode; |
3525 | switch (GI.getOpcode()) { |
3526 | case TargetOpcode::G_MEMCPY: |
3527 | case TargetOpcode::G_MEMCPY_INLINE: |
3528 | Mopcode = AArch64::MOPSMemoryCopyPseudo; |
3529 | break; |
3530 | case TargetOpcode::G_MEMMOVE: |
3531 | Mopcode = AArch64::MOPSMemoryMovePseudo; |
3532 | break; |
3533 | case TargetOpcode::G_MEMSET: |
3534 | // For tagged memset see llvm.aarch64.mops.memset.tag |
3535 | Mopcode = AArch64::MOPSMemorySetPseudo; |
3536 | break; |
3537 | } |
3538 | |
3539 | auto &DstPtr = GI.getOperand(i: 0); |
3540 | auto &SrcOrVal = GI.getOperand(i: 1); |
3541 | auto &Size = GI.getOperand(i: 2); |
3542 | |
3543 | // Create copies of the registers that can be clobbered. |
3544 | const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg()); |
3545 | const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg()); |
3546 | const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg()); |
3547 | |
3548 | const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; |
3549 | const auto &SrcValRegClass = |
3550 | IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; |
3551 | |
3552 | // Constrain to specific registers |
3553 | RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); |
3554 | RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI); |
3555 | RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); |
3556 | |
3557 | MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr); |
3558 | MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal); |
3559 | MIB.buildCopy(Res: SizeCopy, Op: Size); |
3560 | |
3561 | // New instruction uses the copied registers because it must update them. |
3562 | // The defs are not used since they don't exist in G_MEM*. They are still |
3563 | // tied. |
3564 | // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE |
3565 | Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); |
3566 | Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3567 | if (IsSet) { |
3568 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize}, |
3569 | SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy}); |
3570 | } else { |
3571 | Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); |
3572 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize}, |
3573 | SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy}); |
3574 | } |
3575 | |
3576 | GI.eraseFromParent(); |
3577 | return true; |
3578 | } |
3579 | |
3580 | bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, |
3581 | MachineRegisterInfo &MRI) { |
3582 | assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT" ); |
3583 | Register JTAddr = I.getOperand(i: 0).getReg(); |
3584 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3585 | Register Index = I.getOperand(i: 2).getReg(); |
3586 | |
3587 | Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3588 | Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); |
3589 | |
3590 | MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: 4, PCRelSym: nullptr); |
3591 | auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, |
3592 | {TargetReg, ScratchReg}, {JTAddr, Index}) |
3593 | .addJumpTableIndex(JTI); |
3594 | // Save the jump table info. |
3595 | MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {}, |
3596 | SrcOps: {static_cast<int64_t>(JTI)}); |
3597 | // Build the indirect branch. |
3598 | MIB.buildInstr(AArch64::BR, {}, {TargetReg}); |
3599 | I.eraseFromParent(); |
3600 | return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); |
3601 | } |
3602 | |
3603 | bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, |
3604 | MachineRegisterInfo &MRI) { |
3605 | assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table" ); |
3606 | assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!" ); |
3607 | |
3608 | Register DstReg = I.getOperand(i: 0).getReg(); |
3609 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3610 | // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. |
3611 | auto MovMI = |
3612 | MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) |
3613 | .addJumpTableIndex(JTI, AArch64II::MO_PAGE) |
3614 | .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3615 | I.eraseFromParent(); |
3616 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); |
3617 | } |
3618 | |
3619 | bool AArch64InstructionSelector::selectTLSGlobalValue( |
3620 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3621 | if (!STI.isTargetMachO()) |
3622 | return false; |
3623 | MachineFunction &MF = *I.getParent()->getParent(); |
3624 | MF.getFrameInfo().setAdjustsStack(true); |
3625 | |
3626 | const auto &GlobalOp = I.getOperand(i: 1); |
3627 | assert(GlobalOp.getOffset() == 0 && |
3628 | "Shouldn't have an offset on TLS globals!" ); |
3629 | const GlobalValue &GV = *GlobalOp.getGlobal(); |
3630 | |
3631 | auto LoadGOT = |
3632 | MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) |
3633 | .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); |
3634 | |
3635 | auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, |
3636 | {LoadGOT.getReg(0)}) |
3637 | .addImm(0); |
3638 | |
3639 | MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); |
3640 | // TLS calls preserve all registers except those that absolutely must be |
3641 | // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be |
3642 | // silly). |
3643 | MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) |
3644 | .addUse(AArch64::X0, RegState::Implicit) |
3645 | .addDef(AArch64::X0, RegState::Implicit) |
3646 | .addRegMask(TRI.getTLSCallPreservedMask()); |
3647 | |
3648 | MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); |
3649 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, |
3650 | MRI); |
3651 | I.eraseFromParent(); |
3652 | return true; |
3653 | } |
3654 | |
3655 | bool AArch64InstructionSelector::selectVectorICmp( |
3656 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3657 | Register DstReg = I.getOperand(i: 0).getReg(); |
3658 | LLT DstTy = MRI.getType(Reg: DstReg); |
3659 | Register SrcReg = I.getOperand(i: 2).getReg(); |
3660 | Register Src2Reg = I.getOperand(i: 3).getReg(); |
3661 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
3662 | |
3663 | unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); |
3664 | unsigned NumElts = DstTy.getNumElements(); |
3665 | |
3666 | // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b |
3667 | // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 |
3668 | // Third index is cc opcode: |
3669 | // 0 == eq |
3670 | // 1 == ugt |
3671 | // 2 == uge |
3672 | // 3 == ult |
3673 | // 4 == ule |
3674 | // 5 == sgt |
3675 | // 6 == sge |
3676 | // 7 == slt |
3677 | // 8 == sle |
3678 | // ne is done by negating 'eq' result. |
3679 | |
3680 | // This table below assumes that for some comparisons the operands will be |
3681 | // commuted. |
3682 | // ult op == commute + ugt op |
3683 | // ule op == commute + uge op |
3684 | // slt op == commute + sgt op |
3685 | // sle op == commute + sge op |
3686 | unsigned PredIdx = 0; |
3687 | bool SwapOperands = false; |
3688 | CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(i: 1).getPredicate(); |
3689 | switch (Pred) { |
3690 | case CmpInst::ICMP_NE: |
3691 | case CmpInst::ICMP_EQ: |
3692 | PredIdx = 0; |
3693 | break; |
3694 | case CmpInst::ICMP_UGT: |
3695 | PredIdx = 1; |
3696 | break; |
3697 | case CmpInst::ICMP_UGE: |
3698 | PredIdx = 2; |
3699 | break; |
3700 | case CmpInst::ICMP_ULT: |
3701 | PredIdx = 3; |
3702 | SwapOperands = true; |
3703 | break; |
3704 | case CmpInst::ICMP_ULE: |
3705 | PredIdx = 4; |
3706 | SwapOperands = true; |
3707 | break; |
3708 | case CmpInst::ICMP_SGT: |
3709 | PredIdx = 5; |
3710 | break; |
3711 | case CmpInst::ICMP_SGE: |
3712 | PredIdx = 6; |
3713 | break; |
3714 | case CmpInst::ICMP_SLT: |
3715 | PredIdx = 7; |
3716 | SwapOperands = true; |
3717 | break; |
3718 | case CmpInst::ICMP_SLE: |
3719 | PredIdx = 8; |
3720 | SwapOperands = true; |
3721 | break; |
3722 | default: |
3723 | llvm_unreachable("Unhandled icmp predicate" ); |
3724 | return false; |
3725 | } |
3726 | |
3727 | // This table obviously should be tablegen'd when we have our GISel native |
3728 | // tablegen selector. |
3729 | |
3730 | static const unsigned OpcTable[4][4][9] = { |
3731 | { |
3732 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3733 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3734 | 0 /* invalid */}, |
3735 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3736 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3737 | 0 /* invalid */}, |
3738 | {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, |
3739 | AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, |
3740 | AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, |
3741 | {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, |
3742 | AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, |
3743 | AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} |
3744 | }, |
3745 | { |
3746 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3747 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3748 | 0 /* invalid */}, |
3749 | {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, |
3750 | AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, |
3751 | AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, |
3752 | {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, |
3753 | AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, |
3754 | AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, |
3755 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3756 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3757 | 0 /* invalid */} |
3758 | }, |
3759 | { |
3760 | {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, |
3761 | AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, |
3762 | AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, |
3763 | {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, |
3764 | AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, |
3765 | AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, |
3766 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3767 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3768 | 0 /* invalid */}, |
3769 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3770 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3771 | 0 /* invalid */} |
3772 | }, |
3773 | { |
3774 | {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, |
3775 | AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, |
3776 | AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, |
3777 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3778 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3779 | 0 /* invalid */}, |
3780 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3781 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3782 | 0 /* invalid */}, |
3783 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3784 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, |
3785 | 0 /* invalid */} |
3786 | }, |
3787 | }; |
3788 | unsigned EltIdx = Log2_32(Value: SrcEltSize / 8); |
3789 | unsigned NumEltsIdx = Log2_32(Value: NumElts / 2); |
3790 | unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; |
3791 | if (!Opc) { |
3792 | LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode" ); |
3793 | return false; |
3794 | } |
3795 | |
3796 | const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); |
3797 | const TargetRegisterClass *SrcRC = |
3798 | getRegClassForTypeOnBank(Ty: SrcTy, RB: VecRB, GetAllRegSet: true); |
3799 | if (!SrcRC) { |
3800 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n" ); |
3801 | return false; |
3802 | } |
3803 | |
3804 | unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; |
3805 | if (SrcTy.getSizeInBits() == 128) |
3806 | NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; |
3807 | |
3808 | if (SwapOperands) |
3809 | std::swap(a&: SrcReg, b&: Src2Reg); |
3810 | |
3811 | auto Cmp = MIB.buildInstr(Opc, DstOps: {SrcRC}, SrcOps: {SrcReg, Src2Reg}); |
3812 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); |
3813 | |
3814 | // Invert if we had a 'ne' cc. |
3815 | if (NotOpc) { |
3816 | Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); |
3817 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); |
3818 | } else { |
3819 | MIB.buildCopy(Res: DstReg, Op: Cmp.getReg(0)); |
3820 | } |
3821 | RBI.constrainGenericRegister(Reg: DstReg, RC: *SrcRC, MRI); |
3822 | I.eraseFromParent(); |
3823 | return true; |
3824 | } |
3825 | |
3826 | MachineInstr *AArch64InstructionSelector::emitScalarToVector( |
3827 | unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, |
3828 | MachineIRBuilder &MIRBuilder) const { |
3829 | auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {}); |
3830 | |
3831 | auto BuildFn = [&](unsigned SubregIndex) { |
3832 | auto Ins = |
3833 | MIRBuilder |
3834 | .buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar}) |
3835 | .addImm(Val: SubregIndex); |
3836 | constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); |
3837 | constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); |
3838 | return &*Ins; |
3839 | }; |
3840 | |
3841 | switch (EltSize) { |
3842 | case 8: |
3843 | return BuildFn(AArch64::bsub); |
3844 | case 16: |
3845 | return BuildFn(AArch64::hsub); |
3846 | case 32: |
3847 | return BuildFn(AArch64::ssub); |
3848 | case 64: |
3849 | return BuildFn(AArch64::dsub); |
3850 | default: |
3851 | return nullptr; |
3852 | } |
3853 | } |
3854 | |
3855 | MachineInstr * |
3856 | AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg, |
3857 | MachineIRBuilder &MIB, |
3858 | MachineRegisterInfo &MRI) const { |
3859 | LLT DstTy = MRI.getType(Reg: DstReg); |
3860 | const TargetRegisterClass *RC = |
3861 | getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI)); |
3862 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
3863 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
3864 | return nullptr; |
3865 | } |
3866 | unsigned SubReg = 0; |
3867 | if (!getSubRegForClass(RC, TRI, SubReg)) |
3868 | return nullptr; |
3869 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
3870 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" |
3871 | << DstTy.getSizeInBits() << "\n" ); |
3872 | return nullptr; |
3873 | } |
3874 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}) |
3875 | .addReg(RegNo: SrcReg, flags: 0, SubReg); |
3876 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
3877 | return Copy; |
3878 | } |
3879 | |
3880 | bool AArch64InstructionSelector::selectMergeValues( |
3881 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3882 | assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode" ); |
3883 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3884 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3885 | assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation" ); |
3886 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(i: 1).getReg(), MRI, TRI); |
3887 | |
3888 | if (I.getNumOperands() != 3) |
3889 | return false; |
3890 | |
3891 | // Merging 2 s64s into an s128. |
3892 | if (DstTy == LLT::scalar(SizeInBits: 128)) { |
3893 | if (SrcTy.getSizeInBits() != 64) |
3894 | return false; |
3895 | Register DstReg = I.getOperand(i: 0).getReg(); |
3896 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
3897 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
3898 | auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {}); |
3899 | MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: 0), EltReg: Src1Reg, |
3900 | /* LaneIdx */ 0, RB, MIRBuilder&: MIB); |
3901 | if (!InsMI) |
3902 | return false; |
3903 | MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: 0).getReg(), |
3904 | EltReg: Src2Reg, /* LaneIdx */ 1, RB, MIRBuilder&: MIB); |
3905 | if (!Ins2MI) |
3906 | return false; |
3907 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); |
3908 | constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); |
3909 | I.eraseFromParent(); |
3910 | return true; |
3911 | } |
3912 | |
3913 | if (RB.getID() != AArch64::GPRRegBankID) |
3914 | return false; |
3915 | |
3916 | if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) |
3917 | return false; |
3918 | |
3919 | auto *DstRC = &AArch64::GPR64RegClass; |
3920 | Register SubToRegDef = MRI.createVirtualRegister(DstRC); |
3921 | MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), |
3922 | TII.get(TargetOpcode::SUBREG_TO_REG)) |
3923 | .addDef(SubToRegDef) |
3924 | .addImm(0) |
3925 | .addUse(I.getOperand(1).getReg()) |
3926 | .addImm(AArch64::sub_32); |
3927 | Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); |
3928 | // Need to anyext the second scalar before we can use bfm |
3929 | MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), |
3930 | TII.get(TargetOpcode::SUBREG_TO_REG)) |
3931 | .addDef(SubToRegDef2) |
3932 | .addImm(0) |
3933 | .addUse(I.getOperand(2).getReg()) |
3934 | .addImm(AArch64::sub_32); |
3935 | MachineInstr &BFM = |
3936 | *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) |
3937 | .addDef(I.getOperand(0).getReg()) |
3938 | .addUse(SubToRegDef) |
3939 | .addUse(SubToRegDef2) |
3940 | .addImm(32) |
3941 | .addImm(31); |
3942 | constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); |
3943 | constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); |
3944 | constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); |
3945 | I.eraseFromParent(); |
3946 | return true; |
3947 | } |
3948 | |
3949 | static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &, |
3950 | const unsigned EltSize) { |
3951 | // Choose a lane copy opcode and subregister based off of the size of the |
3952 | // vector's elements. |
3953 | switch (EltSize) { |
3954 | case 8: |
3955 | CopyOpc = AArch64::DUPi8; |
3956 | ExtractSubReg = AArch64::bsub; |
3957 | break; |
3958 | case 16: |
3959 | CopyOpc = AArch64::DUPi16; |
3960 | ExtractSubReg = AArch64::hsub; |
3961 | break; |
3962 | case 32: |
3963 | CopyOpc = AArch64::DUPi32; |
3964 | ExtractSubReg = AArch64::ssub; |
3965 | break; |
3966 | case 64: |
3967 | CopyOpc = AArch64::DUPi64; |
3968 | ExtractSubReg = AArch64::dsub; |
3969 | break; |
3970 | default: |
3971 | // Unknown size, bail out. |
3972 | LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n" ); |
3973 | return false; |
3974 | } |
3975 | return true; |
3976 | } |
3977 | |
3978 | MachineInstr *AArch64InstructionSelector::( |
3979 | std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, |
3980 | Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { |
3981 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
3982 | unsigned CopyOpc = 0; |
3983 | unsigned = 0; |
3984 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) { |
3985 | LLVM_DEBUG( |
3986 | dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ); |
3987 | return nullptr; |
3988 | } |
3989 | |
3990 | const TargetRegisterClass *DstRC = |
3991 | getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true); |
3992 | if (!DstRC) { |
3993 | LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n" ); |
3994 | return nullptr; |
3995 | } |
3996 | |
3997 | const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); |
3998 | const LLT &VecTy = MRI.getType(Reg: VecReg); |
3999 | const TargetRegisterClass *VecRC = |
4000 | getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true); |
4001 | if (!VecRC) { |
4002 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n" ); |
4003 | return nullptr; |
4004 | } |
4005 | |
4006 | // The register that we're going to copy into. |
4007 | Register InsertReg = VecReg; |
4008 | if (!DstReg) |
4009 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
4010 | // If the lane index is 0, we just use a subregister COPY. |
4011 | if (LaneIdx == 0) { |
4012 | auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {}) |
4013 | .addReg(RegNo: VecReg, flags: 0, SubReg: ExtractSubReg); |
4014 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
4015 | return &*Copy; |
4016 | } |
4017 | |
4018 | // Lane copies require 128-bit wide registers. If we're dealing with an |
4019 | // unpacked vector, then we need to move up to that width. Insert an implicit |
4020 | // def and a subregister insert to get us there. |
4021 | if (VecTy.getSizeInBits() != 128) { |
4022 | MachineInstr *ScalarToVector = emitScalarToVector( |
4023 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); |
4024 | if (!ScalarToVector) |
4025 | return nullptr; |
4026 | InsertReg = ScalarToVector->getOperand(i: 0).getReg(); |
4027 | } |
4028 | |
4029 | MachineInstr *LaneCopyMI = |
4030 | MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx); |
4031 | constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); |
4032 | |
4033 | // Make sure that we actually constrain the initial copy. |
4034 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
4035 | return LaneCopyMI; |
4036 | } |
4037 | |
4038 | bool AArch64InstructionSelector::( |
4039 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4040 | assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && |
4041 | "unexpected opcode!" ); |
4042 | Register DstReg = I.getOperand(i: 0).getReg(); |
4043 | const LLT NarrowTy = MRI.getType(Reg: DstReg); |
4044 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
4045 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
4046 | (void)WideTy; |
4047 | assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && |
4048 | "source register size too small!" ); |
4049 | assert(!NarrowTy.isVector() && "cannot extract vector into vector!" ); |
4050 | |
4051 | // Need the lane index to determine the correct copy opcode. |
4052 | MachineOperand &LaneIdxOp = I.getOperand(i: 2); |
4053 | assert(LaneIdxOp.isReg() && "Lane index operand was not a register?" ); |
4054 | |
4055 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
4056 | LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n" ); |
4057 | return false; |
4058 | } |
4059 | |
4060 | // Find the index to extract from. |
4061 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI); |
4062 | if (!VRegAndVal) |
4063 | return false; |
4064 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); |
4065 | |
4066 | |
4067 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); |
4068 | MachineInstr * = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, |
4069 | LaneIdx, MIRBuilder&: MIB); |
4070 | if (!Extract) |
4071 | return false; |
4072 | |
4073 | I.eraseFromParent(); |
4074 | return true; |
4075 | } |
4076 | |
4077 | bool AArch64InstructionSelector::selectSplitVectorUnmerge( |
4078 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4079 | unsigned NumElts = I.getNumOperands() - 1; |
4080 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
4081 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
4082 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
4083 | |
4084 | assert(NarrowTy.isVector() && "Expected an unmerge into vectors" ); |
4085 | if (SrcTy.getSizeInBits() > 128) { |
4086 | LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge" ); |
4087 | return false; |
4088 | } |
4089 | |
4090 | // We implement a split vector operation by treating the sub-vectors as |
4091 | // scalars and extracting them. |
4092 | const RegisterBank &DstRB = |
4093 | *RBI.getRegBank(I.getOperand(i: 0).getReg(), MRI, TRI); |
4094 | for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { |
4095 | Register Dst = I.getOperand(i: OpIdx).getReg(); |
4096 | MachineInstr * = |
4097 | emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB); |
4098 | if (!Extract) |
4099 | return false; |
4100 | } |
4101 | I.eraseFromParent(); |
4102 | return true; |
4103 | } |
4104 | |
4105 | bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, |
4106 | MachineRegisterInfo &MRI) { |
4107 | assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && |
4108 | "unexpected opcode" ); |
4109 | |
4110 | // TODO: Handle unmerging into GPRs and from scalars to scalars. |
4111 | if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != |
4112 | AArch64::FPRRegBankID || |
4113 | RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != |
4114 | AArch64::FPRRegBankID) { |
4115 | LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " |
4116 | "currently unsupported.\n" ); |
4117 | return false; |
4118 | } |
4119 | |
4120 | // The last operand is the vector source register, and every other operand is |
4121 | // a register to unpack into. |
4122 | unsigned NumElts = I.getNumOperands() - 1; |
4123 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
4124 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
4125 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
4126 | (void)WideTy; |
4127 | assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && |
4128 | "can only unmerge from vector or s128 types!" ); |
4129 | assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && |
4130 | "source register size too small!" ); |
4131 | |
4132 | if (!NarrowTy.isScalar()) |
4133 | return selectSplitVectorUnmerge(I, MRI); |
4134 | |
4135 | // Choose a lane copy opcode and subregister based off of the size of the |
4136 | // vector's elements. |
4137 | unsigned CopyOpc = 0; |
4138 | unsigned = 0; |
4139 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits())) |
4140 | return false; |
4141 | |
4142 | // Set up for the lane copies. |
4143 | MachineBasicBlock &MBB = *I.getParent(); |
4144 | |
4145 | // Stores the registers we'll be copying from. |
4146 | SmallVector<Register, 4> InsertRegs; |
4147 | |
4148 | // We'll use the first register twice, so we only need NumElts-1 registers. |
4149 | unsigned NumInsertRegs = NumElts - 1; |
4150 | |
4151 | // If our elements fit into exactly 128 bits, then we can copy from the source |
4152 | // directly. Otherwise, we need to do a bit of setup with some subregister |
4153 | // inserts. |
4154 | if (NarrowTy.getSizeInBits() * NumElts == 128) { |
4155 | InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); |
4156 | } else { |
4157 | // No. We have to perform subregister inserts. For each insert, create an |
4158 | // implicit def and a subregister insert, and save the register we create. |
4159 | const TargetRegisterClass *RC = getRegClassForTypeOnBank( |
4160 | LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()), |
4161 | *RBI.getRegBank(SrcReg, MRI, TRI)); |
4162 | unsigned SubReg = 0; |
4163 | bool Found = getSubRegForClass(RC, TRI, SubReg); |
4164 | (void)Found; |
4165 | assert(Found && "expected to find last operand's subeg idx" ); |
4166 | for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { |
4167 | Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); |
4168 | MachineInstr &ImpDefMI = |
4169 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), |
4170 | ImpDefReg); |
4171 | |
4172 | // Now, create the subregister insert from SrcReg. |
4173 | Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); |
4174 | MachineInstr &InsMI = |
4175 | *BuildMI(MBB, I, I.getDebugLoc(), |
4176 | TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) |
4177 | .addUse(ImpDefReg) |
4178 | .addUse(SrcReg) |
4179 | .addImm(SubReg); |
4180 | |
4181 | constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); |
4182 | constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); |
4183 | |
4184 | // Save the register so that we can copy from it after. |
4185 | InsertRegs.push_back(Elt: InsertReg); |
4186 | } |
4187 | } |
4188 | |
4189 | // Now that we've created any necessary subregister inserts, we can |
4190 | // create the copies. |
4191 | // |
4192 | // Perform the first copy separately as a subregister copy. |
4193 | Register CopyTo = I.getOperand(i: 0).getReg(); |
4194 | auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {}) |
4195 | .addReg(RegNo: InsertRegs[0], flags: 0, SubReg: ExtractSubReg); |
4196 | constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); |
4197 | |
4198 | // Now, perform the remaining copies as vector lane copies. |
4199 | unsigned LaneIdx = 1; |
4200 | for (Register InsReg : InsertRegs) { |
4201 | Register CopyTo = I.getOperand(i: LaneIdx).getReg(); |
4202 | MachineInstr &CopyInst = |
4203 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) |
4204 | .addUse(InsReg) |
4205 | .addImm(LaneIdx); |
4206 | constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); |
4207 | ++LaneIdx; |
4208 | } |
4209 | |
4210 | // Separately constrain the first copy's destination. Because of the |
4211 | // limitation in constrainOperandRegClass, we can't guarantee that this will |
4212 | // actually be constrained. So, do it ourselves using the second operand. |
4213 | const TargetRegisterClass *RC = |
4214 | MRI.getRegClassOrNull(Reg: I.getOperand(i: 1).getReg()); |
4215 | if (!RC) { |
4216 | LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n" ); |
4217 | return false; |
4218 | } |
4219 | |
4220 | RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI); |
4221 | I.eraseFromParent(); |
4222 | return true; |
4223 | } |
4224 | |
4225 | bool AArch64InstructionSelector::selectConcatVectors( |
4226 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4227 | assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && |
4228 | "Unexpected opcode" ); |
4229 | Register Dst = I.getOperand(i: 0).getReg(); |
4230 | Register Op1 = I.getOperand(i: 1).getReg(); |
4231 | Register Op2 = I.getOperand(i: 2).getReg(); |
4232 | MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB); |
4233 | if (!ConcatMI) |
4234 | return false; |
4235 | I.eraseFromParent(); |
4236 | return true; |
4237 | } |
4238 | |
4239 | unsigned |
4240 | AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, |
4241 | MachineFunction &MF) const { |
4242 | Type *CPTy = CPVal->getType(); |
4243 | Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy); |
4244 | |
4245 | MachineConstantPool *MCP = MF.getConstantPool(); |
4246 | return MCP->getConstantPoolIndex(C: CPVal, Alignment); |
4247 | } |
4248 | |
4249 | MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( |
4250 | const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { |
4251 | const TargetRegisterClass *RC; |
4252 | unsigned Opc; |
4253 | bool IsTiny = TM.getCodeModel() == CodeModel::Tiny; |
4254 | unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType()); |
4255 | switch (Size) { |
4256 | case 16: |
4257 | RC = &AArch64::FPR128RegClass; |
4258 | Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui; |
4259 | break; |
4260 | case 8: |
4261 | RC = &AArch64::FPR64RegClass; |
4262 | Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui; |
4263 | break; |
4264 | case 4: |
4265 | RC = &AArch64::FPR32RegClass; |
4266 | Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui; |
4267 | break; |
4268 | case 2: |
4269 | RC = &AArch64::FPR16RegClass; |
4270 | Opc = AArch64::LDRHui; |
4271 | break; |
4272 | default: |
4273 | LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " |
4274 | << *CPVal->getType()); |
4275 | return nullptr; |
4276 | } |
4277 | |
4278 | MachineInstr *LoadMI = nullptr; |
4279 | auto &MF = MIRBuilder.getMF(); |
4280 | unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); |
4281 | if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) { |
4282 | // Use load(literal) for tiny code model. |
4283 | LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx); |
4284 | } else { |
4285 | auto Adrp = |
4286 | MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) |
4287 | .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); |
4288 | |
4289 | LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp}) |
4290 | .addConstantPoolIndex( |
4291 | CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
4292 | |
4293 | constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); |
4294 | } |
4295 | |
4296 | MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); |
4297 | LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo, |
4298 | F: MachineMemOperand::MOLoad, |
4299 | Size, BaseAlignment: Align(Size))); |
4300 | constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); |
4301 | return LoadMI; |
4302 | } |
4303 | |
4304 | /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given |
4305 | /// size and RB. |
4306 | static std::pair<unsigned, unsigned> |
4307 | getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { |
4308 | unsigned Opc, SubregIdx; |
4309 | if (RB.getID() == AArch64::GPRRegBankID) { |
4310 | if (EltSize == 8) { |
4311 | Opc = AArch64::INSvi8gpr; |
4312 | SubregIdx = AArch64::bsub; |
4313 | } else if (EltSize == 16) { |
4314 | Opc = AArch64::INSvi16gpr; |
4315 | SubregIdx = AArch64::ssub; |
4316 | } else if (EltSize == 32) { |
4317 | Opc = AArch64::INSvi32gpr; |
4318 | SubregIdx = AArch64::ssub; |
4319 | } else if (EltSize == 64) { |
4320 | Opc = AArch64::INSvi64gpr; |
4321 | SubregIdx = AArch64::dsub; |
4322 | } else { |
4323 | llvm_unreachable("invalid elt size!" ); |
4324 | } |
4325 | } else { |
4326 | if (EltSize == 8) { |
4327 | Opc = AArch64::INSvi8lane; |
4328 | SubregIdx = AArch64::bsub; |
4329 | } else if (EltSize == 16) { |
4330 | Opc = AArch64::INSvi16lane; |
4331 | SubregIdx = AArch64::hsub; |
4332 | } else if (EltSize == 32) { |
4333 | Opc = AArch64::INSvi32lane; |
4334 | SubregIdx = AArch64::ssub; |
4335 | } else if (EltSize == 64) { |
4336 | Opc = AArch64::INSvi64lane; |
4337 | SubregIdx = AArch64::dsub; |
4338 | } else { |
4339 | llvm_unreachable("invalid elt size!" ); |
4340 | } |
4341 | } |
4342 | return std::make_pair(x&: Opc, y&: SubregIdx); |
4343 | } |
4344 | |
4345 | MachineInstr *AArch64InstructionSelector::emitInstr( |
4346 | unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
4347 | std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, |
4348 | const ComplexRendererFns &RenderFns) const { |
4349 | assert(Opcode && "Expected an opcode?" ); |
4350 | assert(!isPreISelGenericOpcode(Opcode) && |
4351 | "Function should only be used to produce selected instructions!" ); |
4352 | auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps); |
4353 | if (RenderFns) |
4354 | for (auto &Fn : *RenderFns) |
4355 | Fn(MI); |
4356 | constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); |
4357 | return &*MI; |
4358 | } |
4359 | |
4360 | MachineInstr *AArch64InstructionSelector::emitAddSub( |
4361 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
4362 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
4363 | MachineIRBuilder &MIRBuilder) const { |
4364 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4365 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4366 | auto Ty = MRI.getType(Reg: LHS.getReg()); |
4367 | assert(!Ty.isVector() && "Expected a scalar or pointer?" ); |
4368 | unsigned Size = Ty.getSizeInBits(); |
4369 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only" ); |
4370 | bool Is32Bit = Size == 32; |
4371 | |
4372 | // INSTRri form with positive arithmetic immediate. |
4373 | if (auto Fns = selectArithImmed(Root&: RHS)) |
4374 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[0][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4375 | MIRBuilder, RenderFns: Fns); |
4376 | |
4377 | // INSTRri form with negative arithmetic immediate. |
4378 | if (auto Fns = selectNegArithImmed(Root&: RHS)) |
4379 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[3][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4380 | MIRBuilder, RenderFns: Fns); |
4381 | |
4382 | // INSTRrx form. |
4383 | if (auto Fns = selectArithExtendedRegister(Root&: RHS)) |
4384 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[4][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4385 | MIRBuilder, RenderFns: Fns); |
4386 | |
4387 | // INSTRrs form. |
4388 | if (auto Fns = selectShiftedRegister(Root&: RHS)) |
4389 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[1][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4390 | MIRBuilder, RenderFns: Fns); |
4391 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[2][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, |
4392 | MIRBuilder); |
4393 | } |
4394 | |
4395 | MachineInstr * |
4396 | AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, |
4397 | MachineOperand &RHS, |
4398 | MachineIRBuilder &MIRBuilder) const { |
4399 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4400 | {{AArch64::ADDXri, AArch64::ADDWri}, |
4401 | {AArch64::ADDXrs, AArch64::ADDWrs}, |
4402 | {AArch64::ADDXrr, AArch64::ADDWrr}, |
4403 | {AArch64::SUBXri, AArch64::SUBWri}, |
4404 | {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
4405 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder); |
4406 | } |
4407 | |
4408 | MachineInstr * |
4409 | AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, |
4410 | MachineOperand &RHS, |
4411 | MachineIRBuilder &MIRBuilder) const { |
4412 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4413 | {{AArch64::ADDSXri, AArch64::ADDSWri}, |
4414 | {AArch64::ADDSXrs, AArch64::ADDSWrs}, |
4415 | {AArch64::ADDSXrr, AArch64::ADDSWrr}, |
4416 | {AArch64::SUBSXri, AArch64::SUBSWri}, |
4417 | {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; |
4418 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4419 | } |
4420 | |
4421 | MachineInstr * |
4422 | AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, |
4423 | MachineOperand &RHS, |
4424 | MachineIRBuilder &MIRBuilder) const { |
4425 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4426 | {{AArch64::SUBSXri, AArch64::SUBSWri}, |
4427 | {AArch64::SUBSXrs, AArch64::SUBSWrs}, |
4428 | {AArch64::SUBSXrr, AArch64::SUBSWrr}, |
4429 | {AArch64::ADDSXri, AArch64::ADDSWri}, |
4430 | {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; |
4431 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4432 | } |
4433 | |
4434 | MachineInstr * |
4435 | AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS, |
4436 | MachineOperand &RHS, |
4437 | MachineIRBuilder &MIRBuilder) const { |
4438 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4439 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4440 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4441 | static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr}; |
4442 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4443 | } |
4444 | |
4445 | MachineInstr * |
4446 | AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, |
4447 | MachineOperand &RHS, |
4448 | MachineIRBuilder &MIRBuilder) const { |
4449 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4450 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4451 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4452 | static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr}; |
4453 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4454 | } |
4455 | |
4456 | MachineInstr * |
4457 | AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
4458 | MachineIRBuilder &MIRBuilder) const { |
4459 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4460 | bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4461 | auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; |
4462 | return emitADDS(Dst: MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); |
4463 | } |
4464 | |
4465 | MachineInstr * |
4466 | AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, |
4467 | MachineIRBuilder &MIRBuilder) const { |
4468 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4469 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4470 | LLT Ty = MRI.getType(Reg: LHS.getReg()); |
4471 | unsigned RegSize = Ty.getSizeInBits(); |
4472 | bool Is32Bit = (RegSize == 32); |
4473 | const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, |
4474 | {AArch64::ANDSXrs, AArch64::ANDSWrs}, |
4475 | {AArch64::ANDSXrr, AArch64::ANDSWrr}}; |
4476 | // ANDS needs a logical immediate for its immediate form. Check if we can |
4477 | // fold one in. |
4478 | if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) { |
4479 | int64_t Imm = ValAndVReg->Value.getSExtValue(); |
4480 | |
4481 | if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) { |
4482 | auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[0][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}); |
4483 | TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize)); |
4484 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); |
4485 | return &*TstMI; |
4486 | } |
4487 | } |
4488 | |
4489 | if (auto Fns = selectLogicalShiftedRegister(Root&: RHS)) |
4490 | return emitInstr(Opcode: OpcTable[1][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns); |
4491 | return emitInstr(Opcode: OpcTable[2][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder); |
4492 | } |
4493 | |
4494 | MachineInstr *AArch64InstructionSelector::emitIntegerCompare( |
4495 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
4496 | MachineIRBuilder &MIRBuilder) const { |
4497 | assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!" ); |
4498 | assert(Predicate.isPredicate() && "Expected predicate?" ); |
4499 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4500 | LLT CmpTy = MRI.getType(Reg: LHS.getReg()); |
4501 | assert(!CmpTy.isVector() && "Expected scalar or pointer" ); |
4502 | unsigned Size = CmpTy.getSizeInBits(); |
4503 | (void)Size; |
4504 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?" ); |
4505 | // Fold the compare into a cmn or tst if possible. |
4506 | if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) |
4507 | return FoldCmp; |
4508 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg()); |
4509 | return emitSUBS(Dst, LHS, RHS, MIRBuilder); |
4510 | } |
4511 | |
4512 | MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( |
4513 | Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { |
4514 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4515 | #ifndef NDEBUG |
4516 | LLT Ty = MRI.getType(Reg: Dst); |
4517 | assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && |
4518 | "Expected a 32-bit scalar register?" ); |
4519 | #endif |
4520 | const Register ZReg = AArch64::WZR; |
4521 | AArch64CC::CondCode CC1, CC2; |
4522 | changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2); |
4523 | auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1); |
4524 | if (CC2 == AArch64CC::AL) |
4525 | return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, |
4526 | MIRBuilder); |
4527 | const TargetRegisterClass *RC = &AArch64::GPR32RegClass; |
4528 | Register Def1Reg = MRI.createVirtualRegister(RegClass: RC); |
4529 | Register Def2Reg = MRI.createVirtualRegister(RegClass: RC); |
4530 | auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2); |
4531 | emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, MIRBuilder); |
4532 | emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC2, MIRBuilder); |
4533 | auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); |
4534 | constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); |
4535 | return &*OrMI; |
4536 | } |
4537 | |
4538 | MachineInstr *AArch64InstructionSelector::emitFPCompare( |
4539 | Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
4540 | std::optional<CmpInst::Predicate> Pred) const { |
4541 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4542 | LLT Ty = MRI.getType(Reg: LHS); |
4543 | if (Ty.isVector()) |
4544 | return nullptr; |
4545 | unsigned OpSize = Ty.getSizeInBits(); |
4546 | assert(OpSize == 16 || OpSize == 32 || OpSize == 64); |
4547 | |
4548 | // If this is a compare against +0.0, then we don't have |
4549 | // to explicitly materialize a constant. |
4550 | const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI); |
4551 | bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); |
4552 | |
4553 | auto IsEqualityPred = [](CmpInst::Predicate P) { |
4554 | return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || |
4555 | P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; |
4556 | }; |
4557 | if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { |
4558 | // Try commutating the operands. |
4559 | const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI); |
4560 | if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { |
4561 | ShouldUseImm = true; |
4562 | std::swap(a&: LHS, b&: RHS); |
4563 | } |
4564 | } |
4565 | unsigned CmpOpcTbl[2][3] = { |
4566 | {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr}, |
4567 | {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}}; |
4568 | unsigned CmpOpc = |
4569 | CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)]; |
4570 | |
4571 | // Partially build the compare. Decide if we need to add a use for the |
4572 | // third operand based off whether or not we're comparing against 0.0. |
4573 | auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS); |
4574 | CmpMI.setMIFlags(MachineInstr::NoFPExcept); |
4575 | if (!ShouldUseImm) |
4576 | CmpMI.addUse(RegNo: RHS); |
4577 | constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); |
4578 | return &*CmpMI; |
4579 | } |
4580 | |
4581 | MachineInstr *AArch64InstructionSelector::emitVectorConcat( |
4582 | std::optional<Register> Dst, Register Op1, Register Op2, |
4583 | MachineIRBuilder &MIRBuilder) const { |
4584 | // We implement a vector concat by: |
4585 | // 1. Use scalar_to_vector to insert the lower vector into the larger dest |
4586 | // 2. Insert the upper vector into the destination's upper element |
4587 | // TODO: some of this code is common with G_BUILD_VECTOR handling. |
4588 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4589 | |
4590 | const LLT Op1Ty = MRI.getType(Reg: Op1); |
4591 | const LLT Op2Ty = MRI.getType(Reg: Op2); |
4592 | |
4593 | if (Op1Ty != Op2Ty) { |
4594 | LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys" ); |
4595 | return nullptr; |
4596 | } |
4597 | assert(Op1Ty.isVector() && "Expected a vector for vector concat" ); |
4598 | |
4599 | if (Op1Ty.getSizeInBits() >= 128) { |
4600 | LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors" ); |
4601 | return nullptr; |
4602 | } |
4603 | |
4604 | // At the moment we just support 64 bit vector concats. |
4605 | if (Op1Ty.getSizeInBits() != 64) { |
4606 | LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors" ); |
4607 | return nullptr; |
4608 | } |
4609 | |
4610 | const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits()); |
4611 | const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); |
4612 | const TargetRegisterClass *DstRC = |
4613 | getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: 2), RB: FPRBank); |
4614 | |
4615 | MachineInstr *WidenedOp1 = |
4616 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder); |
4617 | MachineInstr *WidenedOp2 = |
4618 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder); |
4619 | if (!WidenedOp1 || !WidenedOp2) { |
4620 | LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value" ); |
4621 | return nullptr; |
4622 | } |
4623 | |
4624 | // Now do the insert of the upper element. |
4625 | unsigned InsertOpc, InsSubRegIdx; |
4626 | std::tie(args&: InsertOpc, args&: InsSubRegIdx) = |
4627 | getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits()); |
4628 | |
4629 | if (!Dst) |
4630 | Dst = MRI.createVirtualRegister(RegClass: DstRC); |
4631 | auto InsElt = |
4632 | MIRBuilder |
4633 | .buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: 0).getReg()}) |
4634 | .addImm(Val: 1) /* Lane index */ |
4635 | .addUse(RegNo: WidenedOp2->getOperand(i: 0).getReg()) |
4636 | .addImm(Val: 0); |
4637 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); |
4638 | return &*InsElt; |
4639 | } |
4640 | |
4641 | MachineInstr * |
4642 | AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, |
4643 | Register Src2, AArch64CC::CondCode Pred, |
4644 | MachineIRBuilder &MIRBuilder) const { |
4645 | auto &MRI = *MIRBuilder.getMRI(); |
4646 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst); |
4647 | // If we used a register class, then this won't necessarily have an LLT. |
4648 | // Compute the size based off whether or not we have a class or bank. |
4649 | unsigned Size; |
4650 | if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) |
4651 | Size = TRI.getRegSizeInBits(*RC); |
4652 | else |
4653 | Size = MRI.getType(Reg: Dst).getSizeInBits(); |
4654 | // Some opcodes use s1. |
4655 | assert(Size <= 64 && "Expected 64 bits or less only!" ); |
4656 | static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; |
4657 | unsigned Opc = OpcTable[Size == 64]; |
4658 | auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred); |
4659 | constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI); |
4660 | return &*CSINC; |
4661 | } |
4662 | |
4663 | MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I, |
4664 | Register CarryReg) { |
4665 | MachineRegisterInfo *MRI = MIB.getMRI(); |
4666 | unsigned Opcode = I.getOpcode(); |
4667 | |
4668 | // If the instruction is a SUB, we need to negate the carry, |
4669 | // because borrowing is indicated by carry-flag == 0. |
4670 | bool NeedsNegatedCarry = |
4671 | (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE); |
4672 | |
4673 | // If the previous instruction will already produce the correct carry, do not |
4674 | // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences |
4675 | // generated during legalization of wide add/sub. This optimization depends on |
4676 | // these sequences not being interrupted by other instructions. |
4677 | // We have to select the previous instruction before the carry-using |
4678 | // instruction is deleted by the calling function, otherwise the previous |
4679 | // instruction might become dead and would get deleted. |
4680 | MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg); |
4681 | if (SrcMI == I.getPrevNode()) { |
4682 | if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) { |
4683 | bool ProducesNegatedCarry = CarrySrcMI->isSub(); |
4684 | if (NeedsNegatedCarry == ProducesNegatedCarry && |
4685 | CarrySrcMI->isUnsigned() && |
4686 | CarrySrcMI->getCarryOutReg() == CarryReg && |
4687 | selectAndRestoreState(I&: *SrcMI)) |
4688 | return nullptr; |
4689 | } |
4690 | } |
4691 | |
4692 | Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass); |
4693 | |
4694 | if (NeedsNegatedCarry) { |
4695 | // (0 - Carry) sets !C in NZCV when Carry == 1 |
4696 | Register ZReg = AArch64::WZR; |
4697 | return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB); |
4698 | } |
4699 | |
4700 | // (Carry - 1) sets !C in NZCV when Carry == 0 |
4701 | auto Fns = select12BitValueWithLeftShift(Immed: 1); |
4702 | return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns); |
4703 | } |
4704 | |
4705 | bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I, |
4706 | MachineRegisterInfo &MRI) { |
4707 | auto &CarryMI = cast<GAddSubCarryOut>(Val&: I); |
4708 | |
4709 | if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) { |
4710 | // Set NZCV carry according to carry-in VReg |
4711 | emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg()); |
4712 | } |
4713 | |
4714 | // Emit the operation and get the correct condition code. |
4715 | auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(), |
4716 | LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB); |
4717 | |
4718 | Register CarryOutReg = CarryMI.getCarryOutReg(); |
4719 | |
4720 | // Don't convert carry-out to VReg if it is never used |
4721 | if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) { |
4722 | // Now, put the overflow result in the register given by the first operand |
4723 | // to the overflow op. CSINC increments the result when the predicate is |
4724 | // false, so to get the increment when it's true, we need to use the |
4725 | // inverse. In this case, we want to increment when carry is set. |
4726 | Register ZReg = AArch64::WZR; |
4727 | emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg, |
4728 | Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB); |
4729 | } |
4730 | |
4731 | I.eraseFromParent(); |
4732 | return true; |
4733 | } |
4734 | |
4735 | std::pair<MachineInstr *, AArch64CC::CondCode> |
4736 | AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, |
4737 | MachineOperand &LHS, |
4738 | MachineOperand &RHS, |
4739 | MachineIRBuilder &MIRBuilder) const { |
4740 | switch (Opcode) { |
4741 | default: |
4742 | llvm_unreachable("Unexpected opcode!" ); |
4743 | case TargetOpcode::G_SADDO: |
4744 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4745 | case TargetOpcode::G_UADDO: |
4746 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4747 | case TargetOpcode::G_SSUBO: |
4748 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4749 | case TargetOpcode::G_USUBO: |
4750 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4751 | case TargetOpcode::G_SADDE: |
4752 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4753 | case TargetOpcode::G_UADDE: |
4754 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4755 | case TargetOpcode::G_SSUBE: |
4756 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4757 | case TargetOpcode::G_USUBE: |
4758 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4759 | } |
4760 | } |
4761 | |
4762 | /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be |
4763 | /// expressed as a conjunction. |
4764 | /// \param CanNegate Set to true if we can negate the whole sub-tree just by |
4765 | /// changing the conditions on the CMP tests. |
4766 | /// (this means we can call emitConjunctionRec() with |
4767 | /// Negate==true on this sub-tree) |
4768 | /// \param MustBeFirst Set to true if this subtree needs to be negated and we |
4769 | /// cannot do the negation naturally. We are required to |
4770 | /// emit the subtree first in this case. |
4771 | /// \param WillNegate Is true if are called when the result of this |
4772 | /// subexpression must be negated. This happens when the |
4773 | /// outer expression is an OR. We can use this fact to know |
4774 | /// that we have a double negation (or (or ...) ...) that |
4775 | /// can be implemented for free. |
4776 | static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, |
4777 | bool WillNegate, MachineRegisterInfo &MRI, |
4778 | unsigned Depth = 0) { |
4779 | if (!MRI.hasOneNonDBGUse(RegNo: Val)) |
4780 | return false; |
4781 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4782 | unsigned Opcode = ValDef->getOpcode(); |
4783 | if (isa<GAnyCmp>(Val: ValDef)) { |
4784 | CanNegate = true; |
4785 | MustBeFirst = false; |
4786 | return true; |
4787 | } |
4788 | // Protect against exponential runtime and stack overflow. |
4789 | if (Depth > 6) |
4790 | return false; |
4791 | if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { |
4792 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4793 | Register O0 = ValDef->getOperand(i: 1).getReg(); |
4794 | Register O1 = ValDef->getOperand(i: 2).getReg(); |
4795 | bool CanNegateL; |
4796 | bool MustBeFirstL; |
4797 | if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4798 | return false; |
4799 | bool CanNegateR; |
4800 | bool MustBeFirstR; |
4801 | if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4802 | return false; |
4803 | |
4804 | if (MustBeFirstL && MustBeFirstR) |
4805 | return false; |
4806 | |
4807 | if (IsOR) { |
4808 | // For an OR expression we need to be able to naturally negate at least |
4809 | // one side or we cannot do the transformation at all. |
4810 | if (!CanNegateL && !CanNegateR) |
4811 | return false; |
4812 | // If we the result of the OR will be negated and we can naturally negate |
4813 | // the leaves, then this sub-tree as a whole negates naturally. |
4814 | CanNegate = WillNegate && CanNegateL && CanNegateR; |
4815 | // If we cannot naturally negate the whole sub-tree, then this must be |
4816 | // emitted first. |
4817 | MustBeFirst = !CanNegate; |
4818 | } else { |
4819 | assert(Opcode == TargetOpcode::G_AND && "Must be G_AND" ); |
4820 | // We cannot naturally negate an AND operation. |
4821 | CanNegate = false; |
4822 | MustBeFirst = MustBeFirstL || MustBeFirstR; |
4823 | } |
4824 | return true; |
4825 | } |
4826 | return false; |
4827 | } |
4828 | |
4829 | MachineInstr *AArch64InstructionSelector::emitConditionalComparison( |
4830 | Register LHS, Register RHS, CmpInst::Predicate CC, |
4831 | AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, |
4832 | MachineIRBuilder &MIB) const { |
4833 | // TODO: emit CMN as an optimization. |
4834 | auto &MRI = *MIB.getMRI(); |
4835 | LLT OpTy = MRI.getType(Reg: LHS); |
4836 | unsigned CCmpOpc; |
4837 | std::optional<ValueAndVReg> C; |
4838 | if (CmpInst::isIntPredicate(P: CC)) { |
4839 | assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64); |
4840 | C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
4841 | if (C && C->Value.ult(32)) |
4842 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi; |
4843 | else |
4844 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; |
4845 | } else { |
4846 | assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 || |
4847 | OpTy.getSizeInBits() == 64); |
4848 | switch (OpTy.getSizeInBits()) { |
4849 | case 16: |
4850 | assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons" ); |
4851 | CCmpOpc = AArch64::FCCMPHrr; |
4852 | break; |
4853 | case 32: |
4854 | CCmpOpc = AArch64::FCCMPSrr; |
4855 | break; |
4856 | case 64: |
4857 | CCmpOpc = AArch64::FCCMPDrr; |
4858 | break; |
4859 | default: |
4860 | return nullptr; |
4861 | } |
4862 | } |
4863 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
4864 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC); |
4865 | auto CCmp = |
4866 | MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS}); |
4867 | if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi) |
4868 | CCmp.addImm(Val: C->Value.getZExtValue()); |
4869 | else |
4870 | CCmp.addReg(RegNo: RHS); |
4871 | CCmp.addImm(Val: NZCV).addImm(Val: Predicate); |
4872 | constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI); |
4873 | return &*CCmp; |
4874 | } |
4875 | |
4876 | MachineInstr *AArch64InstructionSelector::emitConjunctionRec( |
4877 | Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, |
4878 | AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { |
4879 | // We're at a tree leaf, produce a conditional comparison operation. |
4880 | auto &MRI = *MIB.getMRI(); |
4881 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4882 | unsigned Opcode = ValDef->getOpcode(); |
4883 | if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) { |
4884 | Register LHS = Cmp->getLHSReg(); |
4885 | Register RHS = Cmp->getRHSReg(); |
4886 | CmpInst::Predicate CC = Cmp->getCond(); |
4887 | if (Negate) |
4888 | CC = CmpInst::getInversePredicate(pred: CC); |
4889 | if (isa<GICmp>(Val: Cmp)) { |
4890 | OutCC = changeICMPPredToAArch64CC(P: CC); |
4891 | } else { |
4892 | // Handle special FP cases. |
4893 | AArch64CC::CondCode ; |
4894 | changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC); |
4895 | // Some floating point conditions can't be tested with a single condition |
4896 | // code. Construct an additional comparison in this case. |
4897 | if (ExtraCC != AArch64CC::AL) { |
4898 | MachineInstr *; |
4899 | if (!CCOp) |
4900 | ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC); |
4901 | else |
4902 | ExtraCmp = |
4903 | emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB); |
4904 | CCOp = ExtraCmp->getOperand(i: 0).getReg(); |
4905 | Predicate = ExtraCC; |
4906 | } |
4907 | } |
4908 | |
4909 | // Produce a normal comparison if we are first in the chain |
4910 | if (!CCOp) { |
4911 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS); |
4912 | if (isa<GICmp>(Val: Cmp)) |
4913 | return emitSUBS(Dst, LHS&: Cmp->getOperand(i: 2), RHS&: Cmp->getOperand(i: 3), MIRBuilder&: MIB); |
4914 | return emitFPCompare(LHS: Cmp->getOperand(i: 2).getReg(), |
4915 | RHS: Cmp->getOperand(i: 3).getReg(), MIRBuilder&: MIB); |
4916 | } |
4917 | // Otherwise produce a ccmp. |
4918 | return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); |
4919 | } |
4920 | assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree" ); |
4921 | |
4922 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4923 | |
4924 | Register LHS = ValDef->getOperand(i: 1).getReg(); |
4925 | bool CanNegateL; |
4926 | bool MustBeFirstL; |
4927 | bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI); |
4928 | assert(ValidL && "Valid conjunction/disjunction tree" ); |
4929 | (void)ValidL; |
4930 | |
4931 | Register RHS = ValDef->getOperand(i: 2).getReg(); |
4932 | bool CanNegateR; |
4933 | bool MustBeFirstR; |
4934 | bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI); |
4935 | assert(ValidR && "Valid conjunction/disjunction tree" ); |
4936 | (void)ValidR; |
4937 | |
4938 | // Swap sub-tree that must come first to the right side. |
4939 | if (MustBeFirstL) { |
4940 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree" ); |
4941 | std::swap(a&: LHS, b&: RHS); |
4942 | std::swap(a&: CanNegateL, b&: CanNegateR); |
4943 | std::swap(a&: MustBeFirstL, b&: MustBeFirstR); |
4944 | } |
4945 | |
4946 | bool NegateR; |
4947 | bool NegateAfterR; |
4948 | bool NegateL; |
4949 | bool NegateAfterAll; |
4950 | if (Opcode == TargetOpcode::G_OR) { |
4951 | // Swap the sub-tree that we can negate naturally to the left. |
4952 | if (!CanNegateL) { |
4953 | assert(CanNegateR && "at least one side must be negatable" ); |
4954 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree" ); |
4955 | assert(!Negate); |
4956 | std::swap(a&: LHS, b&: RHS); |
4957 | NegateR = false; |
4958 | NegateAfterR = true; |
4959 | } else { |
4960 | // Negate the left sub-tree if possible, otherwise negate the result. |
4961 | NegateR = CanNegateR; |
4962 | NegateAfterR = !CanNegateR; |
4963 | } |
4964 | NegateL = true; |
4965 | NegateAfterAll = !Negate; |
4966 | } else { |
4967 | assert(Opcode == TargetOpcode::G_AND && |
4968 | "Valid conjunction/disjunction tree" ); |
4969 | assert(!Negate && "Valid conjunction/disjunction tree" ); |
4970 | |
4971 | NegateL = false; |
4972 | NegateR = false; |
4973 | NegateAfterR = false; |
4974 | NegateAfterAll = false; |
4975 | } |
4976 | |
4977 | // Emit sub-trees. |
4978 | AArch64CC::CondCode RHSCC; |
4979 | MachineInstr *CmpR = |
4980 | emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB); |
4981 | if (NegateAfterR) |
4982 | RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC); |
4983 | MachineInstr *CmpL = emitConjunctionRec( |
4984 | Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: 0).getReg(), Predicate: RHSCC, MIB); |
4985 | if (NegateAfterAll) |
4986 | OutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
4987 | return CmpL; |
4988 | } |
4989 | |
4990 | MachineInstr *AArch64InstructionSelector::emitConjunction( |
4991 | Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { |
4992 | bool DummyCanNegate; |
4993 | bool DummyMustBeFirst; |
4994 | if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false, |
4995 | MRI&: *MIB.getMRI())) |
4996 | return nullptr; |
4997 | return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register(), Predicate: AArch64CC::AL, MIB); |
4998 | } |
4999 | |
5000 | bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, |
5001 | MachineInstr &CondMI) { |
5002 | AArch64CC::CondCode AArch64CC; |
5003 | MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB); |
5004 | if (!ConjMI) |
5005 | return false; |
5006 | |
5007 | emitSelect(Dst: SelI.getReg(Idx: 0), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB); |
5008 | SelI.eraseFromParent(); |
5009 | return true; |
5010 | } |
5011 | |
5012 | bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { |
5013 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
5014 | // We want to recognize this pattern: |
5015 | // |
5016 | // $z = G_FCMP pred, $x, $y |
5017 | // ... |
5018 | // $w = G_SELECT $z, $a, $b |
5019 | // |
5020 | // Where the value of $z is *only* ever used by the G_SELECT (possibly with |
5021 | // some copies/truncs in between.) |
5022 | // |
5023 | // If we see this, then we can emit something like this: |
5024 | // |
5025 | // fcmp $x, $y |
5026 | // fcsel $w, $a, $b, pred |
5027 | // |
5028 | // Rather than emitting both of the rather long sequences in the standard |
5029 | // G_FCMP/G_SELECT select methods. |
5030 | |
5031 | // First, check if the condition is defined by a compare. |
5032 | MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
5033 | |
5034 | // We can only fold if all of the defs have one use. |
5035 | Register CondDefReg = CondDef->getOperand(i: 0).getReg(); |
5036 | if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) { |
5037 | // Unless it's another select. |
5038 | for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) { |
5039 | if (CondDef == &UI) |
5040 | continue; |
5041 | if (UI.getOpcode() != TargetOpcode::G_SELECT) |
5042 | return false; |
5043 | } |
5044 | } |
5045 | |
5046 | // Is the condition defined by a compare? |
5047 | unsigned CondOpc = CondDef->getOpcode(); |
5048 | if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { |
5049 | if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef)) |
5050 | return true; |
5051 | return false; |
5052 | } |
5053 | |
5054 | AArch64CC::CondCode CondCode; |
5055 | if (CondOpc == TargetOpcode::G_ICMP) { |
5056 | auto Pred = |
5057 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
5058 | CondCode = changeICMPPredToAArch64CC(P: Pred); |
5059 | emitIntegerCompare(LHS&: CondDef->getOperand(i: 2), RHS&: CondDef->getOperand(i: 3), |
5060 | Predicate&: CondDef->getOperand(i: 1), MIRBuilder&: MIB); |
5061 | } else { |
5062 | // Get the condition code for the select. |
5063 | auto Pred = |
5064 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
5065 | AArch64CC::CondCode CondCode2; |
5066 | changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2); |
5067 | |
5068 | // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two |
5069 | // instructions to emit the comparison. |
5070 | // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be |
5071 | // unnecessary. |
5072 | if (CondCode2 != AArch64CC::AL) |
5073 | return false; |
5074 | |
5075 | if (!emitFPCompare(LHS: CondDef->getOperand(i: 2).getReg(), |
5076 | RHS: CondDef->getOperand(i: 3).getReg(), MIRBuilder&: MIB)) { |
5077 | LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n" ); |
5078 | return false; |
5079 | } |
5080 | } |
5081 | |
5082 | // Emit the select. |
5083 | emitSelect(Dst: I.getOperand(i: 0).getReg(), True: I.getOperand(i: 2).getReg(), |
5084 | False: I.getOperand(i: 3).getReg(), CC: CondCode, MIB); |
5085 | I.eraseFromParent(); |
5086 | return true; |
5087 | } |
5088 | |
5089 | MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( |
5090 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
5091 | MachineIRBuilder &MIRBuilder) const { |
5092 | assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && |
5093 | "Unexpected MachineOperand" ); |
5094 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5095 | // We want to find this sort of thing: |
5096 | // x = G_SUB 0, y |
5097 | // G_ICMP z, x |
5098 | // |
5099 | // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. |
5100 | // e.g: |
5101 | // |
5102 | // cmn z, y |
5103 | |
5104 | // Check if the RHS or LHS of the G_ICMP is defined by a SUB |
5105 | MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
5106 | MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
5107 | auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); |
5108 | // Given this: |
5109 | // |
5110 | // x = G_SUB 0, y |
5111 | // G_ICMP x, z |
5112 | // |
5113 | // Produce this: |
5114 | // |
5115 | // cmn y, z |
5116 | if (isCMN(MaybeSub: LHSDef, Pred: P, MRI)) |
5117 | return emitCMN(LHS&: LHSDef->getOperand(i: 2), RHS, MIRBuilder); |
5118 | |
5119 | // Same idea here, but with the RHS of the compare instead: |
5120 | // |
5121 | // Given this: |
5122 | // |
5123 | // x = G_SUB 0, y |
5124 | // G_ICMP z, x |
5125 | // |
5126 | // Produce this: |
5127 | // |
5128 | // cmn z, y |
5129 | if (isCMN(MaybeSub: RHSDef, Pred: P, MRI)) |
5130 | return emitCMN(LHS, RHS&: RHSDef->getOperand(i: 2), MIRBuilder); |
5131 | |
5132 | // Given this: |
5133 | // |
5134 | // z = G_AND x, y |
5135 | // G_ICMP z, 0 |
5136 | // |
5137 | // Produce this if the compare is signed: |
5138 | // |
5139 | // tst x, y |
5140 | if (!CmpInst::isUnsigned(predicate: P) && LHSDef && |
5141 | LHSDef->getOpcode() == TargetOpcode::G_AND) { |
5142 | // Make sure that the RHS is 0. |
5143 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI); |
5144 | if (!ValAndVReg || ValAndVReg->Value != 0) |
5145 | return nullptr; |
5146 | |
5147 | return emitTST(LHS&: LHSDef->getOperand(i: 1), |
5148 | RHS&: LHSDef->getOperand(i: 2), MIRBuilder); |
5149 | } |
5150 | |
5151 | return nullptr; |
5152 | } |
5153 | |
5154 | bool AArch64InstructionSelector::selectShuffleVector( |
5155 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5156 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5157 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
5158 | const LLT Src1Ty = MRI.getType(Reg: Src1Reg); |
5159 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
5160 | const LLT Src2Ty = MRI.getType(Reg: Src2Reg); |
5161 | ArrayRef<int> Mask = I.getOperand(i: 3).getShuffleMask(); |
5162 | |
5163 | MachineBasicBlock &MBB = *I.getParent(); |
5164 | MachineFunction &MF = *MBB.getParent(); |
5165 | LLVMContext &Ctx = MF.getFunction().getContext(); |
5166 | |
5167 | // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if |
5168 | // it's originated from a <1 x T> type. Those should have been lowered into |
5169 | // G_BUILD_VECTOR earlier. |
5170 | if (!Src1Ty.isVector() || !Src2Ty.isVector()) { |
5171 | LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n" ); |
5172 | return false; |
5173 | } |
5174 | |
5175 | unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; |
5176 | |
5177 | SmallVector<Constant *, 64> CstIdxs; |
5178 | for (int Val : Mask) { |
5179 | // For now, any undef indexes we'll just assume to be 0. This should be |
5180 | // optimized in future, e.g. to select DUP etc. |
5181 | Val = Val < 0 ? 0 : Val; |
5182 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { |
5183 | unsigned Offset = Byte + Val * BytesPerElt; |
5184 | CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset)); |
5185 | } |
5186 | } |
5187 | |
5188 | // Use a constant pool to load the index vector for TBL. |
5189 | Constant *CPVal = ConstantVector::get(V: CstIdxs); |
5190 | MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB); |
5191 | if (!IndexLoad) { |
5192 | LLVM_DEBUG(dbgs() << "Could not load from a constant pool" ); |
5193 | return false; |
5194 | } |
5195 | |
5196 | if (DstTy.getSizeInBits() != 128) { |
5197 | assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty" ); |
5198 | // This case can be done with TBL1. |
5199 | MachineInstr *Concat = |
5200 | emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB); |
5201 | if (!Concat) { |
5202 | LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1" ); |
5203 | return false; |
5204 | } |
5205 | |
5206 | // The constant pool load will be 64 bits, so need to convert to FPR128 reg. |
5207 | IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, |
5208 | IndexLoad->getOperand(0).getReg(), MIB); |
5209 | |
5210 | auto TBL1 = MIB.buildInstr( |
5211 | AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, |
5212 | {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); |
5213 | constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); |
5214 | |
5215 | auto Copy = |
5216 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) |
5217 | .addReg(TBL1.getReg(0), 0, AArch64::dsub); |
5218 | RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); |
5219 | I.eraseFromParent(); |
5220 | return true; |
5221 | } |
5222 | |
5223 | // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive |
5224 | // Q registers for regalloc. |
5225 | SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; |
5226 | auto RegSeq = createQTuple(Regs, MIB); |
5227 | auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, |
5228 | {RegSeq, IndexLoad->getOperand(0)}); |
5229 | constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); |
5230 | I.eraseFromParent(); |
5231 | return true; |
5232 | } |
5233 | |
5234 | MachineInstr *AArch64InstructionSelector::emitLaneInsert( |
5235 | std::optional<Register> DstReg, Register SrcReg, Register EltReg, |
5236 | unsigned LaneIdx, const RegisterBank &RB, |
5237 | MachineIRBuilder &MIRBuilder) const { |
5238 | MachineInstr *InsElt = nullptr; |
5239 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5240 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5241 | |
5242 | // Create a register to define with the insert if one wasn't passed in. |
5243 | if (!DstReg) |
5244 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
5245 | |
5246 | unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits(); |
5247 | unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; |
5248 | |
5249 | if (RB.getID() == AArch64::FPRRegBankID) { |
5250 | auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder); |
5251 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5252 | .addImm(Val: LaneIdx) |
5253 | .addUse(RegNo: InsSub->getOperand(i: 0).getReg()) |
5254 | .addImm(Val: 0); |
5255 | } else { |
5256 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5257 | .addImm(Val: LaneIdx) |
5258 | .addUse(RegNo: EltReg); |
5259 | } |
5260 | |
5261 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); |
5262 | return InsElt; |
5263 | } |
5264 | |
5265 | bool AArch64InstructionSelector::selectUSMovFromExtend( |
5266 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5267 | if (MI.getOpcode() != TargetOpcode::G_SEXT && |
5268 | MI.getOpcode() != TargetOpcode::G_ZEXT && |
5269 | MI.getOpcode() != TargetOpcode::G_ANYEXT) |
5270 | return false; |
5271 | bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; |
5272 | const Register DefReg = MI.getOperand(i: 0).getReg(); |
5273 | const LLT DstTy = MRI.getType(Reg: DefReg); |
5274 | unsigned DstSize = DstTy.getSizeInBits(); |
5275 | |
5276 | if (DstSize != 32 && DstSize != 64) |
5277 | return false; |
5278 | |
5279 | MachineInstr * = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT, |
5280 | Reg: MI.getOperand(i: 1).getReg(), MRI); |
5281 | int64_t Lane; |
5282 | if (!Extract || !mi_match(R: Extract->getOperand(i: 2).getReg(), MRI, P: m_ICst(Cst&: Lane))) |
5283 | return false; |
5284 | Register Src0 = Extract->getOperand(i: 1).getReg(); |
5285 | |
5286 | const LLT &VecTy = MRI.getType(Reg: Src0); |
5287 | |
5288 | if (VecTy.getSizeInBits() != 128) { |
5289 | const MachineInstr *ScalarToVector = emitScalarToVector( |
5290 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); |
5291 | assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!" ); |
5292 | Src0 = ScalarToVector->getOperand(i: 0).getReg(); |
5293 | } |
5294 | |
5295 | unsigned Opcode; |
5296 | if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) |
5297 | Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; |
5298 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) |
5299 | Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; |
5300 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) |
5301 | Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; |
5302 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) |
5303 | Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; |
5304 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) |
5305 | Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; |
5306 | else |
5307 | llvm_unreachable("Unexpected type combo for S/UMov!" ); |
5308 | |
5309 | // We may need to generate one of these, depending on the type and sign of the |
5310 | // input: |
5311 | // DstReg = SMOV Src0, Lane; |
5312 | // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; |
5313 | MachineInstr *ExtI = nullptr; |
5314 | if (DstSize == 64 && !IsSigned) { |
5315 | Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); |
5316 | MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5317 | ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) |
5318 | .addImm(0) |
5319 | .addUse(NewReg) |
5320 | .addImm(AArch64::sub_32); |
5321 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); |
5322 | } else |
5323 | ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5324 | |
5325 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); |
5326 | MI.eraseFromParent(); |
5327 | return true; |
5328 | } |
5329 | |
5330 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8( |
5331 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5332 | unsigned int Op; |
5333 | if (DstSize == 128) { |
5334 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5335 | return nullptr; |
5336 | Op = AArch64::MOVIv16b_ns; |
5337 | } else { |
5338 | Op = AArch64::MOVIv8b_ns; |
5339 | } |
5340 | |
5341 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5342 | |
5343 | if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) { |
5344 | Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val); |
5345 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5346 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5347 | return &*Mov; |
5348 | } |
5349 | return nullptr; |
5350 | } |
5351 | |
5352 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16( |
5353 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5354 | bool Inv) { |
5355 | |
5356 | unsigned int Op; |
5357 | if (DstSize == 128) { |
5358 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5359 | return nullptr; |
5360 | Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16; |
5361 | } else { |
5362 | Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16; |
5363 | } |
5364 | |
5365 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5366 | uint64_t Shift; |
5367 | |
5368 | if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) { |
5369 | Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val); |
5370 | Shift = 0; |
5371 | } else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) { |
5372 | Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val); |
5373 | Shift = 8; |
5374 | } else |
5375 | return nullptr; |
5376 | |
5377 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5378 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5379 | return &*Mov; |
5380 | } |
5381 | |
5382 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32( |
5383 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5384 | bool Inv) { |
5385 | |
5386 | unsigned int Op; |
5387 | if (DstSize == 128) { |
5388 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5389 | return nullptr; |
5390 | Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32; |
5391 | } else { |
5392 | Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32; |
5393 | } |
5394 | |
5395 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5396 | uint64_t Shift; |
5397 | |
5398 | if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) { |
5399 | Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val); |
5400 | Shift = 0; |
5401 | } else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) { |
5402 | Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val); |
5403 | Shift = 8; |
5404 | } else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) { |
5405 | Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val); |
5406 | Shift = 16; |
5407 | } else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) { |
5408 | Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val); |
5409 | Shift = 24; |
5410 | } else |
5411 | return nullptr; |
5412 | |
5413 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5414 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5415 | return &*Mov; |
5416 | } |
5417 | |
5418 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64( |
5419 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5420 | |
5421 | unsigned int Op; |
5422 | if (DstSize == 128) { |
5423 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5424 | return nullptr; |
5425 | Op = AArch64::MOVIv2d_ns; |
5426 | } else { |
5427 | Op = AArch64::MOVID; |
5428 | } |
5429 | |
5430 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5431 | if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) { |
5432 | Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val); |
5433 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5434 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5435 | return &*Mov; |
5436 | } |
5437 | return nullptr; |
5438 | } |
5439 | |
5440 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s( |
5441 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5442 | bool Inv) { |
5443 | |
5444 | unsigned int Op; |
5445 | if (DstSize == 128) { |
5446 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5447 | return nullptr; |
5448 | Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl; |
5449 | } else { |
5450 | Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl; |
5451 | } |
5452 | |
5453 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5454 | uint64_t Shift; |
5455 | |
5456 | if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) { |
5457 | Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val); |
5458 | Shift = 264; |
5459 | } else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) { |
5460 | Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val); |
5461 | Shift = 272; |
5462 | } else |
5463 | return nullptr; |
5464 | |
5465 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5466 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5467 | return &*Mov; |
5468 | } |
5469 | |
5470 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP( |
5471 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5472 | |
5473 | unsigned int Op; |
5474 | bool IsWide = false; |
5475 | if (DstSize == 128) { |
5476 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5477 | return nullptr; |
5478 | Op = AArch64::FMOVv4f32_ns; |
5479 | IsWide = true; |
5480 | } else { |
5481 | Op = AArch64::FMOVv2f32_ns; |
5482 | } |
5483 | |
5484 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5485 | |
5486 | if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) { |
5487 | Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val); |
5488 | } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) { |
5489 | Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val); |
5490 | Op = AArch64::FMOVv2f64_ns; |
5491 | } else |
5492 | return nullptr; |
5493 | |
5494 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5495 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5496 | return &*Mov; |
5497 | } |
5498 | |
5499 | bool AArch64InstructionSelector::selectIndexedExtLoad( |
5500 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5501 | auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI); |
5502 | Register Dst = ExtLd.getDstReg(); |
5503 | Register WriteBack = ExtLd.getWritebackReg(); |
5504 | Register Base = ExtLd.getBaseReg(); |
5505 | Register Offset = ExtLd.getOffsetReg(); |
5506 | LLT Ty = MRI.getType(Reg: Dst); |
5507 | assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs. |
5508 | unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits(); |
5509 | bool IsPre = ExtLd.isPre(); |
5510 | bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd); |
5511 | bool InsertIntoXReg = false; |
5512 | bool IsDst64 = Ty.getSizeInBits() == 64; |
5513 | |
5514 | unsigned Opc = 0; |
5515 | LLT NewLdDstTy; |
5516 | LLT s32 = LLT::scalar(SizeInBits: 32); |
5517 | LLT s64 = LLT::scalar(SizeInBits: 64); |
5518 | |
5519 | if (MemSizeBits == 8) { |
5520 | if (IsSExt) { |
5521 | if (IsDst64) |
5522 | Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; |
5523 | else |
5524 | Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; |
5525 | NewLdDstTy = IsDst64 ? s64 : s32; |
5526 | } else { |
5527 | Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; |
5528 | InsertIntoXReg = IsDst64; |
5529 | NewLdDstTy = s32; |
5530 | } |
5531 | } else if (MemSizeBits == 16) { |
5532 | if (IsSExt) { |
5533 | if (IsDst64) |
5534 | Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; |
5535 | else |
5536 | Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; |
5537 | NewLdDstTy = IsDst64 ? s64 : s32; |
5538 | } else { |
5539 | Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; |
5540 | InsertIntoXReg = IsDst64; |
5541 | NewLdDstTy = s32; |
5542 | } |
5543 | } else if (MemSizeBits == 32) { |
5544 | if (IsSExt) { |
5545 | Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; |
5546 | NewLdDstTy = s64; |
5547 | } else { |
5548 | Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
5549 | InsertIntoXReg = IsDst64; |
5550 | NewLdDstTy = s32; |
5551 | } |
5552 | } else { |
5553 | llvm_unreachable("Unexpected size for indexed load" ); |
5554 | } |
5555 | |
5556 | if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5557 | return false; // We should be on gpr. |
5558 | |
5559 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5560 | if (!Cst) |
5561 | return false; // Shouldn't happen, but just in case. |
5562 | |
5563 | auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base}) |
5564 | .addImm(Val: Cst->getSExtValue()); |
5565 | LdMI.cloneMemRefs(OtherMI: ExtLd); |
5566 | constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI); |
5567 | // Make sure to select the load with the MemTy as the dest type, and then |
5568 | // insert into X reg if needed. |
5569 | if (InsertIntoXReg) { |
5570 | // Generate a SUBREG_TO_REG. |
5571 | auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {}) |
5572 | .addImm(0) |
5573 | .addUse(LdMI.getReg(1)) |
5574 | .addImm(AArch64::sub_32); |
5575 | RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass, |
5576 | MRI); |
5577 | } else { |
5578 | auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: 1)); |
5579 | selectCopy(*Copy, TII, MRI, TRI, RBI); |
5580 | } |
5581 | MI.eraseFromParent(); |
5582 | |
5583 | return true; |
5584 | } |
5585 | |
5586 | bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI, |
5587 | MachineRegisterInfo &MRI) { |
5588 | auto &Ld = cast<GIndexedLoad>(Val&: MI); |
5589 | Register Dst = Ld.getDstReg(); |
5590 | Register WriteBack = Ld.getWritebackReg(); |
5591 | Register Base = Ld.getBaseReg(); |
5592 | Register Offset = Ld.getOffsetReg(); |
5593 | assert(MRI.getType(Dst).getSizeInBits() <= 128 && |
5594 | "Unexpected type for indexed load" ); |
5595 | unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes(); |
5596 | |
5597 | if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes()) |
5598 | return selectIndexedExtLoad(MI, MRI); |
5599 | |
5600 | unsigned Opc = 0; |
5601 | if (Ld.isPre()) { |
5602 | static constexpr unsigned GPROpcodes[] = { |
5603 | AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre, |
5604 | AArch64::LDRXpre}; |
5605 | static constexpr unsigned FPROpcodes[] = { |
5606 | AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre, |
5607 | AArch64::LDRQpre}; |
5608 | if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5609 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5610 | else |
5611 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5612 | } else { |
5613 | static constexpr unsigned GPROpcodes[] = { |
5614 | AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost, |
5615 | AArch64::LDRXpost}; |
5616 | static constexpr unsigned FPROpcodes[] = { |
5617 | AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost, |
5618 | AArch64::LDRDpost, AArch64::LDRQpost}; |
5619 | if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5620 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5621 | else |
5622 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5623 | } |
5624 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5625 | if (!Cst) |
5626 | return false; // Shouldn't happen, but just in case. |
5627 | auto LdMI = |
5628 | MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst->getSExtValue()); |
5629 | LdMI.cloneMemRefs(OtherMI: Ld); |
5630 | constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI); |
5631 | MI.eraseFromParent(); |
5632 | return true; |
5633 | } |
5634 | |
5635 | bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I, |
5636 | MachineRegisterInfo &MRI) { |
5637 | Register Dst = I.getWritebackReg(); |
5638 | Register Val = I.getValueReg(); |
5639 | Register Base = I.getBaseReg(); |
5640 | Register Offset = I.getOffsetReg(); |
5641 | LLT ValTy = MRI.getType(Reg: Val); |
5642 | assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store" ); |
5643 | |
5644 | unsigned Opc = 0; |
5645 | if (I.isPre()) { |
5646 | static constexpr unsigned GPROpcodes[] = { |
5647 | AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre, |
5648 | AArch64::STRXpre}; |
5649 | static constexpr unsigned FPROpcodes[] = { |
5650 | AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre, |
5651 | AArch64::STRQpre}; |
5652 | |
5653 | if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5654 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5655 | else |
5656 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5657 | } else { |
5658 | static constexpr unsigned GPROpcodes[] = { |
5659 | AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost, |
5660 | AArch64::STRXpost}; |
5661 | static constexpr unsigned FPROpcodes[] = { |
5662 | AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost, |
5663 | AArch64::STRDpost, AArch64::STRQpost}; |
5664 | |
5665 | if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5666 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5667 | else |
5668 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5669 | } |
5670 | |
5671 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5672 | if (!Cst) |
5673 | return false; // Shouldn't happen, but just in case. |
5674 | auto Str = |
5675 | MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst->getSExtValue()); |
5676 | Str.cloneMemRefs(OtherMI: I); |
5677 | constrainSelectedInstRegOperands(*Str, TII, TRI, RBI); |
5678 | I.eraseFromParent(); |
5679 | return true; |
5680 | } |
5681 | |
5682 | MachineInstr * |
5683 | AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, |
5684 | MachineIRBuilder &MIRBuilder, |
5685 | MachineRegisterInfo &MRI) { |
5686 | LLT DstTy = MRI.getType(Reg: Dst); |
5687 | unsigned DstSize = DstTy.getSizeInBits(); |
5688 | if (CV->isNullValue()) { |
5689 | if (DstSize == 128) { |
5690 | auto Mov = |
5691 | MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); |
5692 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); |
5693 | return &*Mov; |
5694 | } |
5695 | |
5696 | if (DstSize == 64) { |
5697 | auto Mov = |
5698 | MIRBuilder |
5699 | .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) |
5700 | .addImm(0); |
5701 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) |
5702 | .addReg(Mov.getReg(0), 0, AArch64::dsub); |
5703 | RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); |
5704 | return &*Copy; |
5705 | } |
5706 | } |
5707 | |
5708 | if (CV->getSplatValue()) { |
5709 | APInt DefBits = APInt::getSplat(NewLen: DstSize, V: CV->getUniqueInteger()); |
5710 | auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * { |
5711 | MachineInstr *NewOp; |
5712 | bool Inv = false; |
5713 | if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5714 | (NewOp = |
5715 | tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5716 | (NewOp = |
5717 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5718 | (NewOp = |
5719 | tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5720 | (NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5721 | (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder))) |
5722 | return NewOp; |
5723 | |
5724 | DefBits = ~DefBits; |
5725 | Inv = true; |
5726 | if ((NewOp = |
5727 | tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5728 | (NewOp = |
5729 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5730 | (NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv))) |
5731 | return NewOp; |
5732 | return nullptr; |
5733 | }; |
5734 | |
5735 | if (auto *NewOp = TryMOVIWithBits(DefBits)) |
5736 | return NewOp; |
5737 | |
5738 | // See if a fneg of the constant can be materialized with a MOVI, etc |
5739 | auto TryWithFNeg = [&](APInt DefBits, int NumBits, |
5740 | unsigned NegOpc) -> MachineInstr * { |
5741 | // FNegate each sub-element of the constant |
5742 | APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: 1).zext(width: DstSize); |
5743 | APInt NegBits(DstSize, 0); |
5744 | unsigned NumElts = DstSize / NumBits; |
5745 | for (unsigned i = 0; i < NumElts; i++) |
5746 | NegBits |= Neg << (NumBits * i); |
5747 | NegBits = DefBits ^ NegBits; |
5748 | |
5749 | // Try to create the new constants with MOVI, and if so generate a fneg |
5750 | // for it. |
5751 | if (auto *NewOp = TryMOVIWithBits(NegBits)) { |
5752 | Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass); |
5753 | NewOp->getOperand(i: 0).setReg(NewDst); |
5754 | return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst}); |
5755 | } |
5756 | return nullptr; |
5757 | }; |
5758 | MachineInstr *R; |
5759 | if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) || |
5760 | (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) || |
5761 | (STI.hasFullFP16() && |
5762 | (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16)))) |
5763 | return R; |
5764 | } |
5765 | |
5766 | auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder); |
5767 | if (!CPLoad) { |
5768 | LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!" ); |
5769 | return nullptr; |
5770 | } |
5771 | |
5772 | auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: 0)); |
5773 | RBI.constrainGenericRegister( |
5774 | Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: 0).getReg()), MRI); |
5775 | return &*Copy; |
5776 | } |
5777 | |
5778 | bool AArch64InstructionSelector::tryOptConstantBuildVec( |
5779 | MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { |
5780 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5781 | unsigned DstSize = DstTy.getSizeInBits(); |
5782 | assert(DstSize <= 128 && "Unexpected build_vec type!" ); |
5783 | if (DstSize < 32) |
5784 | return false; |
5785 | // Check if we're building a constant vector, in which case we want to |
5786 | // generate a constant pool load instead of a vector insert sequence. |
5787 | SmallVector<Constant *, 16> Csts; |
5788 | for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { |
5789 | // Try to find G_CONSTANT or G_FCONSTANT |
5790 | auto *OpMI = |
5791 | getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI); |
5792 | if (OpMI) |
5793 | Csts.emplace_back( |
5794 | Args: const_cast<ConstantInt *>(OpMI->getOperand(i: 1).getCImm())); |
5795 | else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT, |
5796 | Reg: I.getOperand(i: Idx).getReg(), MRI))) |
5797 | Csts.emplace_back( |
5798 | Args: const_cast<ConstantFP *>(OpMI->getOperand(i: 1).getFPImm())); |
5799 | else |
5800 | return false; |
5801 | } |
5802 | Constant *CV = ConstantVector::get(V: Csts); |
5803 | if (!emitConstantVector(Dst: I.getOperand(i: 0).getReg(), CV, MIRBuilder&: MIB, MRI)) |
5804 | return false; |
5805 | I.eraseFromParent(); |
5806 | return true; |
5807 | } |
5808 | |
5809 | bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( |
5810 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5811 | // Given: |
5812 | // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef |
5813 | // |
5814 | // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. |
5815 | Register Dst = I.getOperand(i: 0).getReg(); |
5816 | Register EltReg = I.getOperand(i: 1).getReg(); |
5817 | LLT EltTy = MRI.getType(Reg: EltReg); |
5818 | // If the index isn't on the same bank as its elements, then this can't be a |
5819 | // SUBREG_TO_REG. |
5820 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); |
5821 | const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); |
5822 | if (EltRB != DstRB) |
5823 | return false; |
5824 | if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: 2), P: [&MRI](const MachineOperand &Op) { |
5825 | return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI); |
5826 | })) |
5827 | return false; |
5828 | unsigned SubReg; |
5829 | const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB); |
5830 | if (!EltRC) |
5831 | return false; |
5832 | const TargetRegisterClass *DstRC = |
5833 | getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB); |
5834 | if (!DstRC) |
5835 | return false; |
5836 | if (!getSubRegForClass(EltRC, TRI, SubReg)) |
5837 | return false; |
5838 | auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) |
5839 | .addImm(0) |
5840 | .addUse(EltReg) |
5841 | .addImm(SubReg); |
5842 | I.eraseFromParent(); |
5843 | constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); |
5844 | return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI); |
5845 | } |
5846 | |
5847 | bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, |
5848 | MachineRegisterInfo &MRI) { |
5849 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5850 | // Until we port more of the optimized selections, for now just use a vector |
5851 | // insert sequence. |
5852 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5853 | const LLT EltTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
5854 | unsigned EltSize = EltTy.getSizeInBits(); |
5855 | |
5856 | if (tryOptConstantBuildVec(I, DstTy, MRI)) |
5857 | return true; |
5858 | if (tryOptBuildVecToSubregToReg(I, MRI)) |
5859 | return true; |
5860 | |
5861 | if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64) |
5862 | return false; // Don't support all element types yet. |
5863 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(i: 1).getReg(), MRI, TRI); |
5864 | |
5865 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5866 | MachineInstr *ScalarToVec = |
5867 | emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC, |
5868 | Scalar: I.getOperand(i: 1).getReg(), MIRBuilder&: MIB); |
5869 | if (!ScalarToVec) |
5870 | return false; |
5871 | |
5872 | Register DstVec = ScalarToVec->getOperand(i: 0).getReg(); |
5873 | unsigned DstSize = DstTy.getSizeInBits(); |
5874 | |
5875 | // Keep track of the last MI we inserted. Later on, we might be able to save |
5876 | // a copy using it. |
5877 | MachineInstr *PrevMI = ScalarToVec; |
5878 | for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { |
5879 | // Note that if we don't do a subregister copy, we can end up making an |
5880 | // extra register. |
5881 | Register OpReg = I.getOperand(i).getReg(); |
5882 | // Do not emit inserts for undefs |
5883 | if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) { |
5884 | PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - 1, RB, MIRBuilder&: MIB); |
5885 | DstVec = PrevMI->getOperand(i: 0).getReg(); |
5886 | } |
5887 | } |
5888 | |
5889 | // If DstTy's size in bits is less than 128, then emit a subregister copy |
5890 | // from DstVec to the last register we've defined. |
5891 | if (DstSize < 128) { |
5892 | // Force this to be FPR using the destination vector. |
5893 | const TargetRegisterClass *RC = |
5894 | getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI)); |
5895 | if (!RC) |
5896 | return false; |
5897 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
5898 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
5899 | return false; |
5900 | } |
5901 | |
5902 | unsigned SubReg = 0; |
5903 | if (!getSubRegForClass(RC, TRI, SubReg)) |
5904 | return false; |
5905 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
5906 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize |
5907 | << "\n" ); |
5908 | return false; |
5909 | } |
5910 | |
5911 | Register Reg = MRI.createVirtualRegister(RegClass: RC); |
5912 | Register DstReg = I.getOperand(i: 0).getReg(); |
5913 | |
5914 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: 0, SubReg); |
5915 | MachineOperand &RegOp = I.getOperand(i: 1); |
5916 | RegOp.setReg(Reg); |
5917 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
5918 | } else { |
5919 | // We either have a vector with all elements (except the first one) undef or |
5920 | // at least one non-undef non-first element. In the first case, we need to |
5921 | // constrain the output register ourselves as we may have generated an |
5922 | // INSERT_SUBREG operation which is a generic operation for which the |
5923 | // output regclass cannot be automatically chosen. |
5924 | // |
5925 | // In the second case, there is no need to do this as it may generate an |
5926 | // instruction like INSvi32gpr where the regclass can be automatically |
5927 | // chosen. |
5928 | // |
5929 | // Also, we save a copy by re-using the destination register on the final |
5930 | // insert. |
5931 | PrevMI->getOperand(i: 0).setReg(I.getOperand(i: 0).getReg()); |
5932 | constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); |
5933 | |
5934 | Register DstReg = PrevMI->getOperand(i: 0).getReg(); |
5935 | if (PrevMI == ScalarToVec && DstReg.isVirtual()) { |
5936 | const TargetRegisterClass *RC = |
5937 | getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI)); |
5938 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
5939 | } |
5940 | } |
5941 | |
5942 | I.eraseFromParent(); |
5943 | return true; |
5944 | } |
5945 | |
5946 | bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, |
5947 | unsigned NumVecs, |
5948 | MachineInstr &I) { |
5949 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
5950 | assert(Opc && "Expected an opcode?" ); |
5951 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
5952 | auto &MRI = *MIB.getMRI(); |
5953 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5954 | unsigned Size = Ty.getSizeInBits(); |
5955 | assert((Size == 64 || Size == 128) && |
5956 | "Destination must be 64 bits or 128 bits?" ); |
5957 | unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; |
5958 | auto Ptr = I.getOperand(i: I.getNumOperands() - 1).getReg(); |
5959 | assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?" ); |
5960 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr}); |
5961 | Load.cloneMemRefs(OtherMI: I); |
5962 | constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); |
5963 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
5964 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
5965 | auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {}) |
5966 | .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx); |
5967 | // Emit the subreg copies and immediately select them. |
5968 | // FIXME: We should refactor our copy code into an emitCopy helper and |
5969 | // clean up uses of this pattern elsewhere in the selector. |
5970 | selectCopy(*Vec, TII, MRI, TRI, RBI); |
5971 | } |
5972 | return true; |
5973 | } |
5974 | |
5975 | bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic( |
5976 | unsigned Opc, unsigned NumVecs, MachineInstr &I) { |
5977 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
5978 | assert(Opc && "Expected an opcode?" ); |
5979 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
5980 | auto &MRI = *MIB.getMRI(); |
5981 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5982 | bool Narrow = Ty.getSizeInBits() == 64; |
5983 | |
5984 | auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1; |
5985 | SmallVector<Register, 4> Regs(NumVecs); |
5986 | std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(), |
5987 | unary_op: [](auto MO) { return MO.getReg(); }); |
5988 | |
5989 | if (Narrow) { |
5990 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
5991 | return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB) |
5992 | ->getOperand(0) |
5993 | .getReg(); |
5994 | }); |
5995 | Ty = Ty.multiplyElements(Factor: 2); |
5996 | } |
5997 | |
5998 | Register Tuple = createQTuple(Regs, MIB); |
5999 | auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI); |
6000 | if (!LaneNo) |
6001 | return false; |
6002 | |
6003 | Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg(); |
6004 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {}) |
6005 | .addReg(RegNo: Tuple) |
6006 | .addImm(Val: LaneNo->getZExtValue()) |
6007 | .addReg(RegNo: Ptr); |
6008 | Load.cloneMemRefs(OtherMI: I); |
6009 | constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); |
6010 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
6011 | unsigned SubReg = AArch64::qsub0; |
6012 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
6013 | auto Vec = MIB.buildInstr(TargetOpcode::COPY, |
6014 | {Narrow ? DstOp(&AArch64::FPR128RegClass) |
6015 | : DstOp(I.getOperand(Idx).getReg())}, |
6016 | {}) |
6017 | .addReg(SelectedLoadDst, 0, SubReg + Idx); |
6018 | Register WideReg = Vec.getReg(0); |
6019 | // Emit the subreg copies and immediately select them. |
6020 | selectCopy(*Vec, TII, MRI, TRI, RBI); |
6021 | if (Narrow && |
6022 | !emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI)) |
6023 | return false; |
6024 | } |
6025 | return true; |
6026 | } |
6027 | |
6028 | void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I, |
6029 | unsigned NumVecs, |
6030 | unsigned Opc) { |
6031 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
6032 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6033 | Register Ptr = I.getOperand(i: 1 + NumVecs).getReg(); |
6034 | |
6035 | SmallVector<Register, 2> Regs(NumVecs); |
6036 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
6037 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
6038 | |
6039 | Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) |
6040 | : createDTuple(Regs, MIB); |
6041 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr}); |
6042 | Store.cloneMemRefs(OtherMI: I); |
6043 | constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); |
6044 | } |
6045 | |
6046 | bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic( |
6047 | MachineInstr &I, unsigned NumVecs, unsigned Opc) { |
6048 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
6049 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6050 | bool Narrow = Ty.getSizeInBits() == 64; |
6051 | |
6052 | SmallVector<Register, 2> Regs(NumVecs); |
6053 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
6054 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
6055 | |
6056 | if (Narrow) |
6057 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
6058 | return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB) |
6059 | ->getOperand(0) |
6060 | .getReg(); |
6061 | }); |
6062 | |
6063 | Register Tuple = createQTuple(Regs, MIB); |
6064 | |
6065 | auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: 1 + NumVecs).getReg(), MRI); |
6066 | if (!LaneNo) |
6067 | return false; |
6068 | Register Ptr = I.getOperand(i: 1 + NumVecs + 1).getReg(); |
6069 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {}) |
6070 | .addReg(RegNo: Tuple) |
6071 | .addImm(Val: LaneNo->getZExtValue()) |
6072 | .addReg(RegNo: Ptr); |
6073 | Store.cloneMemRefs(OtherMI: I); |
6074 | constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); |
6075 | return true; |
6076 | } |
6077 | |
6078 | bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( |
6079 | MachineInstr &I, MachineRegisterInfo &MRI) { |
6080 | // Find the intrinsic ID. |
6081 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
6082 | |
6083 | const LLT S8 = LLT::scalar(SizeInBits: 8); |
6084 | const LLT S16 = LLT::scalar(SizeInBits: 16); |
6085 | const LLT S32 = LLT::scalar(SizeInBits: 32); |
6086 | const LLT S64 = LLT::scalar(SizeInBits: 64); |
6087 | const LLT P0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
6088 | // Select the instruction. |
6089 | switch (IntrinID) { |
6090 | default: |
6091 | return false; |
6092 | case Intrinsic::aarch64_ldxp: |
6093 | case Intrinsic::aarch64_ldaxp: { |
6094 | auto NewI = MIB.buildInstr( |
6095 | IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, |
6096 | {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, |
6097 | {I.getOperand(3)}); |
6098 | NewI.cloneMemRefs(I); |
6099 | constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); |
6100 | break; |
6101 | } |
6102 | case Intrinsic::aarch64_neon_ld1x2: { |
6103 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6104 | unsigned Opc = 0; |
6105 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6106 | Opc = AArch64::LD1Twov8b; |
6107 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6108 | Opc = AArch64::LD1Twov16b; |
6109 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6110 | Opc = AArch64::LD1Twov4h; |
6111 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6112 | Opc = AArch64::LD1Twov8h; |
6113 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6114 | Opc = AArch64::LD1Twov2s; |
6115 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6116 | Opc = AArch64::LD1Twov4s; |
6117 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6118 | Opc = AArch64::LD1Twov2d; |
6119 | else if (Ty == S64 || Ty == P0) |
6120 | Opc = AArch64::LD1Twov1d; |
6121 | else |
6122 | llvm_unreachable("Unexpected type for ld1x2!" ); |
6123 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6124 | break; |
6125 | } |
6126 | case Intrinsic::aarch64_neon_ld1x3: { |
6127 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6128 | unsigned Opc = 0; |
6129 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6130 | Opc = AArch64::LD1Threev8b; |
6131 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6132 | Opc = AArch64::LD1Threev16b; |
6133 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6134 | Opc = AArch64::LD1Threev4h; |
6135 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6136 | Opc = AArch64::LD1Threev8h; |
6137 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6138 | Opc = AArch64::LD1Threev2s; |
6139 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6140 | Opc = AArch64::LD1Threev4s; |
6141 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6142 | Opc = AArch64::LD1Threev2d; |
6143 | else if (Ty == S64 || Ty == P0) |
6144 | Opc = AArch64::LD1Threev1d; |
6145 | else |
6146 | llvm_unreachable("Unexpected type for ld1x3!" ); |
6147 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6148 | break; |
6149 | } |
6150 | case Intrinsic::aarch64_neon_ld1x4: { |
6151 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6152 | unsigned Opc = 0; |
6153 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6154 | Opc = AArch64::LD1Fourv8b; |
6155 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6156 | Opc = AArch64::LD1Fourv16b; |
6157 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6158 | Opc = AArch64::LD1Fourv4h; |
6159 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6160 | Opc = AArch64::LD1Fourv8h; |
6161 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6162 | Opc = AArch64::LD1Fourv2s; |
6163 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6164 | Opc = AArch64::LD1Fourv4s; |
6165 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6166 | Opc = AArch64::LD1Fourv2d; |
6167 | else if (Ty == S64 || Ty == P0) |
6168 | Opc = AArch64::LD1Fourv1d; |
6169 | else |
6170 | llvm_unreachable("Unexpected type for ld1x4!" ); |
6171 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6172 | break; |
6173 | } |
6174 | case Intrinsic::aarch64_neon_ld2: { |
6175 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6176 | unsigned Opc = 0; |
6177 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6178 | Opc = AArch64::LD2Twov8b; |
6179 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6180 | Opc = AArch64::LD2Twov16b; |
6181 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6182 | Opc = AArch64::LD2Twov4h; |
6183 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6184 | Opc = AArch64::LD2Twov8h; |
6185 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6186 | Opc = AArch64::LD2Twov2s; |
6187 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6188 | Opc = AArch64::LD2Twov4s; |
6189 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6190 | Opc = AArch64::LD2Twov2d; |
6191 | else if (Ty == S64 || Ty == P0) |
6192 | Opc = AArch64::LD1Twov1d; |
6193 | else |
6194 | llvm_unreachable("Unexpected type for ld2!" ); |
6195 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6196 | break; |
6197 | } |
6198 | case Intrinsic::aarch64_neon_ld2lane: { |
6199 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6200 | unsigned Opc; |
6201 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6202 | Opc = AArch64::LD2i8; |
6203 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6204 | Opc = AArch64::LD2i16; |
6205 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6206 | Opc = AArch64::LD2i32; |
6207 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6208 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6209 | Opc = AArch64::LD2i64; |
6210 | else |
6211 | llvm_unreachable("Unexpected type for st2lane!" ); |
6212 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 2, I)) |
6213 | return false; |
6214 | break; |
6215 | } |
6216 | case Intrinsic::aarch64_neon_ld2r: { |
6217 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6218 | unsigned Opc = 0; |
6219 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6220 | Opc = AArch64::LD2Rv8b; |
6221 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6222 | Opc = AArch64::LD2Rv16b; |
6223 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6224 | Opc = AArch64::LD2Rv4h; |
6225 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6226 | Opc = AArch64::LD2Rv8h; |
6227 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6228 | Opc = AArch64::LD2Rv2s; |
6229 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6230 | Opc = AArch64::LD2Rv4s; |
6231 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6232 | Opc = AArch64::LD2Rv2d; |
6233 | else if (Ty == S64 || Ty == P0) |
6234 | Opc = AArch64::LD2Rv1d; |
6235 | else |
6236 | llvm_unreachable("Unexpected type for ld2r!" ); |
6237 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6238 | break; |
6239 | } |
6240 | case Intrinsic::aarch64_neon_ld3: { |
6241 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6242 | unsigned Opc = 0; |
6243 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6244 | Opc = AArch64::LD3Threev8b; |
6245 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6246 | Opc = AArch64::LD3Threev16b; |
6247 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6248 | Opc = AArch64::LD3Threev4h; |
6249 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6250 | Opc = AArch64::LD3Threev8h; |
6251 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6252 | Opc = AArch64::LD3Threev2s; |
6253 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6254 | Opc = AArch64::LD3Threev4s; |
6255 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6256 | Opc = AArch64::LD3Threev2d; |
6257 | else if (Ty == S64 || Ty == P0) |
6258 | Opc = AArch64::LD1Threev1d; |
6259 | else |
6260 | llvm_unreachable("Unexpected type for ld3!" ); |
6261 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6262 | break; |
6263 | } |
6264 | case Intrinsic::aarch64_neon_ld3lane: { |
6265 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6266 | unsigned Opc; |
6267 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6268 | Opc = AArch64::LD3i8; |
6269 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6270 | Opc = AArch64::LD3i16; |
6271 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6272 | Opc = AArch64::LD3i32; |
6273 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6274 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6275 | Opc = AArch64::LD3i64; |
6276 | else |
6277 | llvm_unreachable("Unexpected type for st3lane!" ); |
6278 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 3, I)) |
6279 | return false; |
6280 | break; |
6281 | } |
6282 | case Intrinsic::aarch64_neon_ld3r: { |
6283 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6284 | unsigned Opc = 0; |
6285 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6286 | Opc = AArch64::LD3Rv8b; |
6287 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6288 | Opc = AArch64::LD3Rv16b; |
6289 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6290 | Opc = AArch64::LD3Rv4h; |
6291 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6292 | Opc = AArch64::LD3Rv8h; |
6293 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6294 | Opc = AArch64::LD3Rv2s; |
6295 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6296 | Opc = AArch64::LD3Rv4s; |
6297 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6298 | Opc = AArch64::LD3Rv2d; |
6299 | else if (Ty == S64 || Ty == P0) |
6300 | Opc = AArch64::LD3Rv1d; |
6301 | else |
6302 | llvm_unreachable("Unexpected type for ld3r!" ); |
6303 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6304 | break; |
6305 | } |
6306 | case Intrinsic::aarch64_neon_ld4: { |
6307 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6308 | unsigned Opc = 0; |
6309 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6310 | Opc = AArch64::LD4Fourv8b; |
6311 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6312 | Opc = AArch64::LD4Fourv16b; |
6313 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6314 | Opc = AArch64::LD4Fourv4h; |
6315 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6316 | Opc = AArch64::LD4Fourv8h; |
6317 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6318 | Opc = AArch64::LD4Fourv2s; |
6319 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6320 | Opc = AArch64::LD4Fourv4s; |
6321 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6322 | Opc = AArch64::LD4Fourv2d; |
6323 | else if (Ty == S64 || Ty == P0) |
6324 | Opc = AArch64::LD1Fourv1d; |
6325 | else |
6326 | llvm_unreachable("Unexpected type for ld4!" ); |
6327 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6328 | break; |
6329 | } |
6330 | case Intrinsic::aarch64_neon_ld4lane: { |
6331 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6332 | unsigned Opc; |
6333 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6334 | Opc = AArch64::LD4i8; |
6335 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6336 | Opc = AArch64::LD4i16; |
6337 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6338 | Opc = AArch64::LD4i32; |
6339 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6340 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6341 | Opc = AArch64::LD4i64; |
6342 | else |
6343 | llvm_unreachable("Unexpected type for st4lane!" ); |
6344 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 4, I)) |
6345 | return false; |
6346 | break; |
6347 | } |
6348 | case Intrinsic::aarch64_neon_ld4r: { |
6349 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6350 | unsigned Opc = 0; |
6351 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6352 | Opc = AArch64::LD4Rv8b; |
6353 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6354 | Opc = AArch64::LD4Rv16b; |
6355 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6356 | Opc = AArch64::LD4Rv4h; |
6357 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6358 | Opc = AArch64::LD4Rv8h; |
6359 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6360 | Opc = AArch64::LD4Rv2s; |
6361 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6362 | Opc = AArch64::LD4Rv4s; |
6363 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6364 | Opc = AArch64::LD4Rv2d; |
6365 | else if (Ty == S64 || Ty == P0) |
6366 | Opc = AArch64::LD4Rv1d; |
6367 | else |
6368 | llvm_unreachable("Unexpected type for ld4r!" ); |
6369 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6370 | break; |
6371 | } |
6372 | case Intrinsic::aarch64_neon_st1x2: { |
6373 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6374 | unsigned Opc; |
6375 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6376 | Opc = AArch64::ST1Twov8b; |
6377 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6378 | Opc = AArch64::ST1Twov16b; |
6379 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6380 | Opc = AArch64::ST1Twov4h; |
6381 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6382 | Opc = AArch64::ST1Twov8h; |
6383 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6384 | Opc = AArch64::ST1Twov2s; |
6385 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6386 | Opc = AArch64::ST1Twov4s; |
6387 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6388 | Opc = AArch64::ST1Twov2d; |
6389 | else if (Ty == S64 || Ty == P0) |
6390 | Opc = AArch64::ST1Twov1d; |
6391 | else |
6392 | llvm_unreachable("Unexpected type for st1x2!" ); |
6393 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6394 | break; |
6395 | } |
6396 | case Intrinsic::aarch64_neon_st1x3: { |
6397 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6398 | unsigned Opc; |
6399 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6400 | Opc = AArch64::ST1Threev8b; |
6401 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6402 | Opc = AArch64::ST1Threev16b; |
6403 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6404 | Opc = AArch64::ST1Threev4h; |
6405 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6406 | Opc = AArch64::ST1Threev8h; |
6407 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6408 | Opc = AArch64::ST1Threev2s; |
6409 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6410 | Opc = AArch64::ST1Threev4s; |
6411 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6412 | Opc = AArch64::ST1Threev2d; |
6413 | else if (Ty == S64 || Ty == P0) |
6414 | Opc = AArch64::ST1Threev1d; |
6415 | else |
6416 | llvm_unreachable("Unexpected type for st1x3!" ); |
6417 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6418 | break; |
6419 | } |
6420 | case Intrinsic::aarch64_neon_st1x4: { |
6421 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6422 | unsigned Opc; |
6423 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6424 | Opc = AArch64::ST1Fourv8b; |
6425 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6426 | Opc = AArch64::ST1Fourv16b; |
6427 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6428 | Opc = AArch64::ST1Fourv4h; |
6429 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6430 | Opc = AArch64::ST1Fourv8h; |
6431 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6432 | Opc = AArch64::ST1Fourv2s; |
6433 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6434 | Opc = AArch64::ST1Fourv4s; |
6435 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6436 | Opc = AArch64::ST1Fourv2d; |
6437 | else if (Ty == S64 || Ty == P0) |
6438 | Opc = AArch64::ST1Fourv1d; |
6439 | else |
6440 | llvm_unreachable("Unexpected type for st1x4!" ); |
6441 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6442 | break; |
6443 | } |
6444 | case Intrinsic::aarch64_neon_st2: { |
6445 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6446 | unsigned Opc; |
6447 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6448 | Opc = AArch64::ST2Twov8b; |
6449 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6450 | Opc = AArch64::ST2Twov16b; |
6451 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6452 | Opc = AArch64::ST2Twov4h; |
6453 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6454 | Opc = AArch64::ST2Twov8h; |
6455 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6456 | Opc = AArch64::ST2Twov2s; |
6457 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6458 | Opc = AArch64::ST2Twov4s; |
6459 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6460 | Opc = AArch64::ST2Twov2d; |
6461 | else if (Ty == S64 || Ty == P0) |
6462 | Opc = AArch64::ST1Twov1d; |
6463 | else |
6464 | llvm_unreachable("Unexpected type for st2!" ); |
6465 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6466 | break; |
6467 | } |
6468 | case Intrinsic::aarch64_neon_st3: { |
6469 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6470 | unsigned Opc; |
6471 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6472 | Opc = AArch64::ST3Threev8b; |
6473 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6474 | Opc = AArch64::ST3Threev16b; |
6475 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6476 | Opc = AArch64::ST3Threev4h; |
6477 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6478 | Opc = AArch64::ST3Threev8h; |
6479 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6480 | Opc = AArch64::ST3Threev2s; |
6481 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6482 | Opc = AArch64::ST3Threev4s; |
6483 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6484 | Opc = AArch64::ST3Threev2d; |
6485 | else if (Ty == S64 || Ty == P0) |
6486 | Opc = AArch64::ST1Threev1d; |
6487 | else |
6488 | llvm_unreachable("Unexpected type for st3!" ); |
6489 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6490 | break; |
6491 | } |
6492 | case Intrinsic::aarch64_neon_st4: { |
6493 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6494 | unsigned Opc; |
6495 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6496 | Opc = AArch64::ST4Fourv8b; |
6497 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6498 | Opc = AArch64::ST4Fourv16b; |
6499 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6500 | Opc = AArch64::ST4Fourv4h; |
6501 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6502 | Opc = AArch64::ST4Fourv8h; |
6503 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6504 | Opc = AArch64::ST4Fourv2s; |
6505 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6506 | Opc = AArch64::ST4Fourv4s; |
6507 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6508 | Opc = AArch64::ST4Fourv2d; |
6509 | else if (Ty == S64 || Ty == P0) |
6510 | Opc = AArch64::ST1Fourv1d; |
6511 | else |
6512 | llvm_unreachable("Unexpected type for st4!" ); |
6513 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6514 | break; |
6515 | } |
6516 | case Intrinsic::aarch64_neon_st2lane: { |
6517 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6518 | unsigned Opc; |
6519 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6520 | Opc = AArch64::ST2i8; |
6521 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6522 | Opc = AArch64::ST2i16; |
6523 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6524 | Opc = AArch64::ST2i32; |
6525 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6526 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6527 | Opc = AArch64::ST2i64; |
6528 | else |
6529 | llvm_unreachable("Unexpected type for st2lane!" ); |
6530 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 2, Opc)) |
6531 | return false; |
6532 | break; |
6533 | } |
6534 | case Intrinsic::aarch64_neon_st3lane: { |
6535 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6536 | unsigned Opc; |
6537 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6538 | Opc = AArch64::ST3i8; |
6539 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6540 | Opc = AArch64::ST3i16; |
6541 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6542 | Opc = AArch64::ST3i32; |
6543 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6544 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6545 | Opc = AArch64::ST3i64; |
6546 | else |
6547 | llvm_unreachable("Unexpected type for st3lane!" ); |
6548 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 3, Opc)) |
6549 | return false; |
6550 | break; |
6551 | } |
6552 | case Intrinsic::aarch64_neon_st4lane: { |
6553 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6554 | unsigned Opc; |
6555 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6556 | Opc = AArch64::ST4i8; |
6557 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6558 | Opc = AArch64::ST4i16; |
6559 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6560 | Opc = AArch64::ST4i32; |
6561 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6562 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6563 | Opc = AArch64::ST4i64; |
6564 | else |
6565 | llvm_unreachable("Unexpected type for st4lane!" ); |
6566 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 4, Opc)) |
6567 | return false; |
6568 | break; |
6569 | } |
6570 | case Intrinsic::aarch64_mops_memset_tag: { |
6571 | // Transform |
6572 | // %dst:gpr(p0) = \ |
6573 | // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), |
6574 | // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) |
6575 | // where %dst is updated, into |
6576 | // %Rd:GPR64common, %Rn:GPR64) = \ |
6577 | // MOPSMemorySetTaggingPseudo \ |
6578 | // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 |
6579 | // where Rd and Rn are tied. |
6580 | // It is expected that %val has been extended to s64 in legalization. |
6581 | // Note that the order of the size/value operands are swapped. |
6582 | |
6583 | Register DstDef = I.getOperand(i: 0).getReg(); |
6584 | // I.getOperand(1) is the intrinsic function |
6585 | Register DstUse = I.getOperand(i: 2).getReg(); |
6586 | Register ValUse = I.getOperand(i: 3).getReg(); |
6587 | Register SizeUse = I.getOperand(i: 4).getReg(); |
6588 | |
6589 | // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. |
6590 | // Therefore an additional virtual register is requried for the updated size |
6591 | // operand. This value is not accessible via the semantics of the intrinsic. |
6592 | Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
6593 | |
6594 | auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, |
6595 | {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); |
6596 | Memset.cloneMemRefs(I); |
6597 | constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); |
6598 | break; |
6599 | } |
6600 | } |
6601 | |
6602 | I.eraseFromParent(); |
6603 | return true; |
6604 | } |
6605 | |
6606 | bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, |
6607 | MachineRegisterInfo &MRI) { |
6608 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
6609 | |
6610 | switch (IntrinID) { |
6611 | default: |
6612 | break; |
6613 | case Intrinsic::aarch64_crypto_sha1h: { |
6614 | Register DstReg = I.getOperand(i: 0).getReg(); |
6615 | Register SrcReg = I.getOperand(i: 2).getReg(); |
6616 | |
6617 | // FIXME: Should this be an assert? |
6618 | if (MRI.getType(Reg: DstReg).getSizeInBits() != 32 || |
6619 | MRI.getType(Reg: SrcReg).getSizeInBits() != 32) |
6620 | return false; |
6621 | |
6622 | // The operation has to happen on FPRs. Set up some new FPR registers for |
6623 | // the source and destination if they are on GPRs. |
6624 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
6625 | SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); |
6626 | MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: 2)}); |
6627 | |
6628 | // Make sure the copy ends up getting constrained properly. |
6629 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), |
6630 | AArch64::GPR32RegClass, MRI); |
6631 | } |
6632 | |
6633 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) |
6634 | DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); |
6635 | |
6636 | // Actually insert the instruction. |
6637 | auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); |
6638 | constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); |
6639 | |
6640 | // Did we create a new register for the destination? |
6641 | if (DstReg != I.getOperand(i: 0).getReg()) { |
6642 | // Yep. Copy the result of the instruction back into the original |
6643 | // destination. |
6644 | MIB.buildCopy(Res: {I.getOperand(i: 0)}, Op: {DstReg}); |
6645 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), |
6646 | AArch64::GPR32RegClass, MRI); |
6647 | } |
6648 | |
6649 | I.eraseFromParent(); |
6650 | return true; |
6651 | } |
6652 | case Intrinsic::frameaddress: |
6653 | case Intrinsic::returnaddress: { |
6654 | MachineFunction &MF = *I.getParent()->getParent(); |
6655 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6656 | |
6657 | unsigned Depth = I.getOperand(i: 2).getImm(); |
6658 | Register DstReg = I.getOperand(i: 0).getReg(); |
6659 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); |
6660 | |
6661 | if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { |
6662 | if (!MFReturnAddr) { |
6663 | // Insert the copy from LR/X30 into the entry block, before it can be |
6664 | // clobbered by anything. |
6665 | MFI.setReturnAddressIsTaken(true); |
6666 | MFReturnAddr = getFunctionLiveInPhysReg( |
6667 | MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc()); |
6668 | } |
6669 | |
6670 | if (STI.hasPAuth()) { |
6671 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); |
6672 | } else { |
6673 | MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); |
6674 | MIB.buildInstr(AArch64::XPACLRI); |
6675 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); |
6676 | } |
6677 | |
6678 | I.eraseFromParent(); |
6679 | return true; |
6680 | } |
6681 | |
6682 | MFI.setFrameAddressIsTaken(true); |
6683 | Register FrameAddr(AArch64::FP); |
6684 | while (Depth--) { |
6685 | Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); |
6686 | auto Ldr = |
6687 | MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); |
6688 | constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); |
6689 | FrameAddr = NextFrame; |
6690 | } |
6691 | |
6692 | if (IntrinID == Intrinsic::frameaddress) |
6693 | MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr}); |
6694 | else { |
6695 | MFI.setReturnAddressIsTaken(true); |
6696 | |
6697 | if (STI.hasPAuth()) { |
6698 | Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
6699 | MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); |
6700 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); |
6701 | } else { |
6702 | MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) |
6703 | .addImm(1); |
6704 | MIB.buildInstr(AArch64::XPACLRI); |
6705 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); |
6706 | } |
6707 | } |
6708 | |
6709 | I.eraseFromParent(); |
6710 | return true; |
6711 | } |
6712 | case Intrinsic::swift_async_context_addr: |
6713 | auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, |
6714 | {Register(AArch64::FP)}) |
6715 | .addImm(8) |
6716 | .addImm(0); |
6717 | constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); |
6718 | |
6719 | MF->getFrameInfo().setFrameAddressIsTaken(true); |
6720 | MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
6721 | I.eraseFromParent(); |
6722 | return true; |
6723 | } |
6724 | return false; |
6725 | } |
6726 | |
6727 | InstructionSelector::ComplexRendererFns |
6728 | AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { |
6729 | auto MaybeImmed = getImmedFromMO(Root); |
6730 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
6731 | return std::nullopt; |
6732 | uint64_t Enc = (32 - *MaybeImmed) & 0x1f; |
6733 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6734 | } |
6735 | |
6736 | InstructionSelector::ComplexRendererFns |
6737 | AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { |
6738 | auto MaybeImmed = getImmedFromMO(Root); |
6739 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
6740 | return std::nullopt; |
6741 | uint64_t Enc = 31 - *MaybeImmed; |
6742 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6743 | } |
6744 | |
6745 | InstructionSelector::ComplexRendererFns |
6746 | AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { |
6747 | auto MaybeImmed = getImmedFromMO(Root); |
6748 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
6749 | return std::nullopt; |
6750 | uint64_t Enc = (64 - *MaybeImmed) & 0x3f; |
6751 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6752 | } |
6753 | |
6754 | InstructionSelector::ComplexRendererFns |
6755 | AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { |
6756 | auto MaybeImmed = getImmedFromMO(Root); |
6757 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
6758 | return std::nullopt; |
6759 | uint64_t Enc = 63 - *MaybeImmed; |
6760 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6761 | } |
6762 | |
6763 | /// Helper to select an immediate value that can be represented as a 12-bit |
6764 | /// value shifted left by either 0 or 12. If it is possible to do so, return |
6765 | /// the immediate and shift value. If not, return std::nullopt. |
6766 | /// |
6767 | /// Used by selectArithImmed and selectNegArithImmed. |
6768 | InstructionSelector::ComplexRendererFns |
6769 | AArch64InstructionSelector::select12BitValueWithLeftShift( |
6770 | uint64_t Immed) const { |
6771 | unsigned ShiftAmt; |
6772 | if (Immed >> 12 == 0) { |
6773 | ShiftAmt = 0; |
6774 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { |
6775 | ShiftAmt = 12; |
6776 | Immed = Immed >> 12; |
6777 | } else |
6778 | return std::nullopt; |
6779 | |
6780 | unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt); |
6781 | return {{ |
6782 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); }, |
6783 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); }, |
6784 | }}; |
6785 | } |
6786 | |
6787 | /// SelectArithImmed - Select an immediate value that can be represented as |
6788 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with |
6789 | /// Val set to the 12-bit value and Shift set to the shifter operand. |
6790 | InstructionSelector::ComplexRendererFns |
6791 | AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { |
6792 | // This function is called from the addsub_shifted_imm ComplexPattern, |
6793 | // which lists [imm] as the list of opcode it's interested in, however |
6794 | // we still need to check whether the operand is actually an immediate |
6795 | // here because the ComplexPattern opcode list is only used in |
6796 | // root-level opcode matching. |
6797 | auto MaybeImmed = getImmedFromMO(Root); |
6798 | if (MaybeImmed == std::nullopt) |
6799 | return std::nullopt; |
6800 | return select12BitValueWithLeftShift(Immed: *MaybeImmed); |
6801 | } |
6802 | |
6803 | /// SelectNegArithImmed - As above, but negates the value before trying to |
6804 | /// select it. |
6805 | InstructionSelector::ComplexRendererFns |
6806 | AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { |
6807 | // We need a register here, because we need to know if we have a 64 or 32 |
6808 | // bit immediate. |
6809 | if (!Root.isReg()) |
6810 | return std::nullopt; |
6811 | auto MaybeImmed = getImmedFromMO(Root); |
6812 | if (MaybeImmed == std::nullopt) |
6813 | return std::nullopt; |
6814 | uint64_t Immed = *MaybeImmed; |
6815 | |
6816 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" |
6817 | // have the opposite effect on the C flag, so this pattern mustn't match under |
6818 | // those circumstances. |
6819 | if (Immed == 0) |
6820 | return std::nullopt; |
6821 | |
6822 | // Check if we're dealing with a 32-bit type on the root or a 64-bit type on |
6823 | // the root. |
6824 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
6825 | if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == 32) |
6826 | Immed = ~((uint32_t)Immed) + 1; |
6827 | else |
6828 | Immed = ~Immed + 1ULL; |
6829 | |
6830 | if (Immed & 0xFFFFFFFFFF000000ULL) |
6831 | return std::nullopt; |
6832 | |
6833 | Immed &= 0xFFFFFFULL; |
6834 | return select12BitValueWithLeftShift(Immed); |
6835 | } |
6836 | |
6837 | /// Return true if it is worth folding MI into an extended register. That is, |
6838 | /// if it's safe to pull it into the addressing mode of a load or store as a |
6839 | /// shift. |
6840 | bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( |
6841 | MachineInstr &MI, const MachineRegisterInfo &MRI) const { |
6842 | // Always fold if there is one use, or if we're optimizing for size. |
6843 | Register DefReg = MI.getOperand(i: 0).getReg(); |
6844 | if (MRI.hasOneNonDBGUse(RegNo: DefReg) || |
6845 | MI.getParent()->getParent()->getFunction().hasOptSize()) |
6846 | return true; |
6847 | |
6848 | // FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as |
6849 | // appropriate. |
6850 | |
6851 | // We have a fastpath, so folding a shift in and potentially computing it |
6852 | // many times may be beneficial. Check if this is only used in memory ops. |
6853 | // If it is, then we should fold. |
6854 | return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg), |
6855 | P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); |
6856 | } |
6857 | |
6858 | static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { |
6859 | switch (Type) { |
6860 | case AArch64_AM::SXTB: |
6861 | case AArch64_AM::SXTH: |
6862 | case AArch64_AM::SXTW: |
6863 | return true; |
6864 | default: |
6865 | return false; |
6866 | } |
6867 | } |
6868 | |
6869 | InstructionSelector::ComplexRendererFns |
6870 | AArch64InstructionSelector::selectExtendedSHL( |
6871 | MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, |
6872 | unsigned SizeInBytes, bool WantsExt) const { |
6873 | assert(Base.isReg() && "Expected base to be a register operand" ); |
6874 | assert(Offset.isReg() && "Expected offset to be a register operand" ); |
6875 | |
6876 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
6877 | MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg()); |
6878 | |
6879 | unsigned OffsetOpc = OffsetInst->getOpcode(); |
6880 | bool LookedThroughZExt = false; |
6881 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { |
6882 | // Try to look through a ZEXT. |
6883 | if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) |
6884 | return std::nullopt; |
6885 | |
6886 | OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: 1).getReg()); |
6887 | OffsetOpc = OffsetInst->getOpcode(); |
6888 | LookedThroughZExt = true; |
6889 | |
6890 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) |
6891 | return std::nullopt; |
6892 | } |
6893 | // Make sure that the memory op is a valid size. |
6894 | int64_t LegalShiftVal = Log2_32(Value: SizeInBytes); |
6895 | if (LegalShiftVal == 0) |
6896 | return std::nullopt; |
6897 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI)) |
6898 | return std::nullopt; |
6899 | |
6900 | // Now, try to find the specific G_CONSTANT. Start by assuming that the |
6901 | // register we will offset is the LHS, and the register containing the |
6902 | // constant is the RHS. |
6903 | Register OffsetReg = OffsetInst->getOperand(i: 1).getReg(); |
6904 | Register ConstantReg = OffsetInst->getOperand(i: 2).getReg(); |
6905 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
6906 | if (!ValAndVReg) { |
6907 | // We didn't get a constant on the RHS. If the opcode is a shift, then |
6908 | // we're done. |
6909 | if (OffsetOpc == TargetOpcode::G_SHL) |
6910 | return std::nullopt; |
6911 | |
6912 | // If we have a G_MUL, we can use either register. Try looking at the RHS. |
6913 | std::swap(a&: OffsetReg, b&: ConstantReg); |
6914 | ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
6915 | if (!ValAndVReg) |
6916 | return std::nullopt; |
6917 | } |
6918 | |
6919 | // The value must fit into 3 bits, and must be positive. Make sure that is |
6920 | // true. |
6921 | int64_t ImmVal = ValAndVReg->Value.getSExtValue(); |
6922 | |
6923 | // Since we're going to pull this into a shift, the constant value must be |
6924 | // a power of 2. If we got a multiply, then we need to check this. |
6925 | if (OffsetOpc == TargetOpcode::G_MUL) { |
6926 | if (!llvm::has_single_bit<uint32_t>(Value: ImmVal)) |
6927 | return std::nullopt; |
6928 | |
6929 | // Got a power of 2. So, the amount we'll shift is the log base-2 of that. |
6930 | ImmVal = Log2_32(Value: ImmVal); |
6931 | } |
6932 | |
6933 | if ((ImmVal & 0x7) != ImmVal) |
6934 | return std::nullopt; |
6935 | |
6936 | // We are only allowed to shift by LegalShiftVal. This shift value is built |
6937 | // into the instruction, so we can't just use whatever we want. |
6938 | if (ImmVal != LegalShiftVal) |
6939 | return std::nullopt; |
6940 | |
6941 | unsigned SignExtend = 0; |
6942 | if (WantsExt) { |
6943 | // Check if the offset is defined by an extend, unless we looked through a |
6944 | // G_ZEXT earlier. |
6945 | if (!LookedThroughZExt) { |
6946 | MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI); |
6947 | auto Ext = getExtendTypeForInst(MI&: *ExtInst, MRI, IsLoadStore: true); |
6948 | if (Ext == AArch64_AM::InvalidShiftExtend) |
6949 | return std::nullopt; |
6950 | |
6951 | SignExtend = isSignExtendShiftType(Type: Ext) ? 1 : 0; |
6952 | // We only support SXTW for signed extension here. |
6953 | if (SignExtend && Ext != AArch64_AM::SXTW) |
6954 | return std::nullopt; |
6955 | OffsetReg = ExtInst->getOperand(i: 1).getReg(); |
6956 | } |
6957 | |
6958 | // Need a 32-bit wide register here. |
6959 | MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg())); |
6960 | OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); |
6961 | } |
6962 | |
6963 | // We can use the LHS of the GEP as the base, and the LHS of the shift as an |
6964 | // offset. Signify that we are shifting by setting the shift flag to 1. |
6965 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); }, |
6966 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); }, |
6967 | [=](MachineInstrBuilder &MIB) { |
6968 | // Need to add both immediates here to make sure that they are both |
6969 | // added to the instruction. |
6970 | MIB.addImm(Val: SignExtend); |
6971 | MIB.addImm(Val: 1); |
6972 | }}}; |
6973 | } |
6974 | |
6975 | /// This is used for computing addresses like this: |
6976 | /// |
6977 | /// ldr x1, [x2, x3, lsl #3] |
6978 | /// |
6979 | /// Where x2 is the base register, and x3 is an offset register. The shift-left |
6980 | /// is a constant value specific to this load instruction. That is, we'll never |
6981 | /// see anything other than a 3 here (which corresponds to the size of the |
6982 | /// element being loaded.) |
6983 | InstructionSelector::ComplexRendererFns |
6984 | AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( |
6985 | MachineOperand &Root, unsigned SizeInBytes) const { |
6986 | if (!Root.isReg()) |
6987 | return std::nullopt; |
6988 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
6989 | |
6990 | // We want to find something like this: |
6991 | // |
6992 | // val = G_CONSTANT LegalShiftVal |
6993 | // shift = G_SHL off_reg val |
6994 | // ptr = G_PTR_ADD base_reg shift |
6995 | // x = G_LOAD ptr |
6996 | // |
6997 | // And fold it into this addressing mode: |
6998 | // |
6999 | // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] |
7000 | |
7001 | // Check if we can find the G_PTR_ADD. |
7002 | MachineInstr *PtrAdd = |
7003 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7004 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI)) |
7005 | return std::nullopt; |
7006 | |
7007 | // Now, try to match an opcode which will match our specific offset. |
7008 | // We want a G_SHL or a G_MUL. |
7009 | MachineInstr *OffsetInst = |
7010 | getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7011 | return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: 1), |
7012 | Offset&: OffsetInst->getOperand(i: 0), SizeInBytes, |
7013 | /*WantsExt=*/false); |
7014 | } |
7015 | |
7016 | /// This is used for computing addresses like this: |
7017 | /// |
7018 | /// ldr x1, [x2, x3] |
7019 | /// |
7020 | /// Where x2 is the base register, and x3 is an offset register. |
7021 | /// |
7022 | /// When possible (or profitable) to fold a G_PTR_ADD into the address |
7023 | /// calculation, this will do so. Otherwise, it will return std::nullopt. |
7024 | InstructionSelector::ComplexRendererFns |
7025 | AArch64InstructionSelector::selectAddrModeRegisterOffset( |
7026 | MachineOperand &Root) const { |
7027 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7028 | |
7029 | // We need a GEP. |
7030 | MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg()); |
7031 | if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) |
7032 | return std::nullopt; |
7033 | |
7034 | // If this is used more than once, let's not bother folding. |
7035 | // TODO: Check if they are memory ops. If they are, then we can still fold |
7036 | // without having to recompute anything. |
7037 | if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: 0).getReg())) |
7038 | return std::nullopt; |
7039 | |
7040 | // Base is the GEP's LHS, offset is its RHS. |
7041 | return {{[=](MachineInstrBuilder &MIB) { |
7042 | MIB.addUse(RegNo: Gep->getOperand(i: 1).getReg()); |
7043 | }, |
7044 | [=](MachineInstrBuilder &MIB) { |
7045 | MIB.addUse(RegNo: Gep->getOperand(i: 2).getReg()); |
7046 | }, |
7047 | [=](MachineInstrBuilder &MIB) { |
7048 | // Need to add both immediates here to make sure that they are both |
7049 | // added to the instruction. |
7050 | MIB.addImm(Val: 0); |
7051 | MIB.addImm(Val: 0); |
7052 | }}}; |
7053 | } |
7054 | |
7055 | /// This is intended to be equivalent to selectAddrModeXRO in |
7056 | /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. |
7057 | InstructionSelector::ComplexRendererFns |
7058 | AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, |
7059 | unsigned SizeInBytes) const { |
7060 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7061 | if (!Root.isReg()) |
7062 | return std::nullopt; |
7063 | MachineInstr *PtrAdd = |
7064 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7065 | if (!PtrAdd) |
7066 | return std::nullopt; |
7067 | |
7068 | // Check for an immediates which cannot be encoded in the [base + imm] |
7069 | // addressing mode, and can't be encoded in an add/sub. If this happens, we'll |
7070 | // end up with code like: |
7071 | // |
7072 | // mov x0, wide |
7073 | // add x1 base, x0 |
7074 | // ldr x2, [x1, x0] |
7075 | // |
7076 | // In this situation, we can use the [base, xreg] addressing mode to save an |
7077 | // add/sub: |
7078 | // |
7079 | // mov x0, wide |
7080 | // ldr x2, [base, x0] |
7081 | auto ValAndVReg = |
7082 | getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7083 | if (ValAndVReg) { |
7084 | unsigned Scale = Log2_32(Value: SizeInBytes); |
7085 | int64_t ImmOff = ValAndVReg->Value.getSExtValue(); |
7086 | |
7087 | // Skip immediates that can be selected in the load/store addresing |
7088 | // mode. |
7089 | if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && |
7090 | ImmOff < (0x1000 << Scale)) |
7091 | return std::nullopt; |
7092 | |
7093 | // Helper lambda to decide whether or not it is preferable to emit an add. |
7094 | auto isPreferredADD = [](int64_t ImmOff) { |
7095 | // Constants in [0x0, 0xfff] can be encoded in an add. |
7096 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) |
7097 | return true; |
7098 | |
7099 | // Can it be encoded in an add lsl #12? |
7100 | if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) |
7101 | return false; |
7102 | |
7103 | // It can be encoded in an add lsl #12, but we may not want to. If it is |
7104 | // possible to select this as a single movz, then prefer that. A single |
7105 | // movz is faster than an add with a shift. |
7106 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && |
7107 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; |
7108 | }; |
7109 | |
7110 | // If the immediate can be encoded in a single add/sub, then bail out. |
7111 | if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) |
7112 | return std::nullopt; |
7113 | } |
7114 | |
7115 | // Try to fold shifts into the addressing mode. |
7116 | auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); |
7117 | if (AddrModeFns) |
7118 | return AddrModeFns; |
7119 | |
7120 | // If that doesn't work, see if it's possible to fold in registers from |
7121 | // a GEP. |
7122 | return selectAddrModeRegisterOffset(Root); |
7123 | } |
7124 | |
7125 | /// This is used for computing addresses like this: |
7126 | /// |
7127 | /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] |
7128 | /// |
7129 | /// Where we have a 64-bit base register, a 32-bit offset register, and an |
7130 | /// extend (which may or may not be signed). |
7131 | InstructionSelector::ComplexRendererFns |
7132 | AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, |
7133 | unsigned SizeInBytes) const { |
7134 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7135 | |
7136 | MachineInstr *PtrAdd = |
7137 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7138 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI)) |
7139 | return std::nullopt; |
7140 | |
7141 | MachineOperand &LHS = PtrAdd->getOperand(i: 1); |
7142 | MachineOperand &RHS = PtrAdd->getOperand(i: 2); |
7143 | MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
7144 | |
7145 | // The first case is the same as selectAddrModeXRO, except we need an extend. |
7146 | // In this case, we try to find a shift and extend, and fold them into the |
7147 | // addressing mode. |
7148 | // |
7149 | // E.g. |
7150 | // |
7151 | // off_reg = G_Z/S/ANYEXT ext_reg |
7152 | // val = G_CONSTANT LegalShiftVal |
7153 | // shift = G_SHL off_reg val |
7154 | // ptr = G_PTR_ADD base_reg shift |
7155 | // x = G_LOAD ptr |
7156 | // |
7157 | // In this case we can get a load like this: |
7158 | // |
7159 | // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] |
7160 | auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: 0), |
7161 | SizeInBytes, /*WantsExt=*/true); |
7162 | if (ExtendedShl) |
7163 | return ExtendedShl; |
7164 | |
7165 | // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. |
7166 | // |
7167 | // e.g. |
7168 | // ldr something, [base_reg, ext_reg, sxtw] |
7169 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI)) |
7170 | return std::nullopt; |
7171 | |
7172 | // Check if this is an extend. We'll get an extend type if it is. |
7173 | AArch64_AM::ShiftExtendType Ext = |
7174 | getExtendTypeForInst(MI&: *OffsetInst, MRI, /*IsLoadStore=*/true); |
7175 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7176 | return std::nullopt; |
7177 | |
7178 | // Need a 32-bit wide register. |
7179 | MachineIRBuilder MIB(*PtrAdd); |
7180 | Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), |
7181 | AArch64::GPR32RegClass, MIB); |
7182 | unsigned SignExtend = Ext == AArch64_AM::SXTW; |
7183 | |
7184 | // Base is LHS, offset is ExtReg. |
7185 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); }, |
7186 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7187 | [=](MachineInstrBuilder &MIB) { |
7188 | MIB.addImm(Val: SignExtend); |
7189 | MIB.addImm(Val: 0); |
7190 | }}}; |
7191 | } |
7192 | |
7193 | /// Select a "register plus unscaled signed 9-bit immediate" address. This |
7194 | /// should only match when there is an offset that is not valid for a scaled |
7195 | /// immediate addressing mode. The "Size" argument is the size in bytes of the |
7196 | /// memory reference, which is needed here to know what is valid for a scaled |
7197 | /// immediate. |
7198 | InstructionSelector::ComplexRendererFns |
7199 | AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, |
7200 | unsigned Size) const { |
7201 | MachineRegisterInfo &MRI = |
7202 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7203 | |
7204 | if (!Root.isReg()) |
7205 | return std::nullopt; |
7206 | |
7207 | if (!isBaseWithConstantOffset(Root, MRI)) |
7208 | return std::nullopt; |
7209 | |
7210 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7211 | |
7212 | MachineOperand &OffImm = RootDef->getOperand(i: 2); |
7213 | if (!OffImm.isReg()) |
7214 | return std::nullopt; |
7215 | MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg()); |
7216 | if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) |
7217 | return std::nullopt; |
7218 | int64_t RHSC; |
7219 | MachineOperand &RHSOp1 = RHS->getOperand(i: 1); |
7220 | if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) |
7221 | return std::nullopt; |
7222 | RHSC = RHSOp1.getCImm()->getSExtValue(); |
7223 | |
7224 | if (RHSC >= -256 && RHSC < 256) { |
7225 | MachineOperand &Base = RootDef->getOperand(i: 1); |
7226 | return {{ |
7227 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); }, |
7228 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); }, |
7229 | }}; |
7230 | } |
7231 | return std::nullopt; |
7232 | } |
7233 | |
7234 | InstructionSelector::ComplexRendererFns |
7235 | AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, |
7236 | unsigned Size, |
7237 | MachineRegisterInfo &MRI) const { |
7238 | if (RootDef.getOpcode() != AArch64::G_ADD_LOW) |
7239 | return std::nullopt; |
7240 | MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: 1).getReg()); |
7241 | if (Adrp.getOpcode() != AArch64::ADRP) |
7242 | return std::nullopt; |
7243 | |
7244 | // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. |
7245 | auto Offset = Adrp.getOperand(i: 1).getOffset(); |
7246 | if (Offset % Size != 0) |
7247 | return std::nullopt; |
7248 | |
7249 | auto GV = Adrp.getOperand(i: 1).getGlobal(); |
7250 | if (GV->isThreadLocal()) |
7251 | return std::nullopt; |
7252 | |
7253 | auto &MF = *RootDef.getParent()->getParent(); |
7254 | if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size) |
7255 | return std::nullopt; |
7256 | |
7257 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget()); |
7258 | MachineIRBuilder MIRBuilder(RootDef); |
7259 | Register AdrpReg = Adrp.getOperand(i: 0).getReg(); |
7260 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); }, |
7261 | [=](MachineInstrBuilder &MIB) { |
7262 | MIB.addGlobalAddress(GV, Offset, |
7263 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | |
7264 | AArch64II::MO_NC); |
7265 | }}}; |
7266 | } |
7267 | |
7268 | /// Select a "register plus scaled unsigned 12-bit immediate" address. The |
7269 | /// "Size" argument is the size in bytes of the memory reference, which |
7270 | /// determines the scale. |
7271 | InstructionSelector::ComplexRendererFns |
7272 | AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, |
7273 | unsigned Size) const { |
7274 | MachineFunction &MF = *Root.getParent()->getParent()->getParent(); |
7275 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7276 | |
7277 | if (!Root.isReg()) |
7278 | return std::nullopt; |
7279 | |
7280 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7281 | if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { |
7282 | return {{ |
7283 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: 1)); }, |
7284 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7285 | }}; |
7286 | } |
7287 | |
7288 | CodeModel::Model CM = MF.getTarget().getCodeModel(); |
7289 | // Check if we can fold in the ADD of small code model ADRP + ADD address. |
7290 | if (CM == CodeModel::Small) { |
7291 | auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI); |
7292 | if (OpFns) |
7293 | return OpFns; |
7294 | } |
7295 | |
7296 | if (isBaseWithConstantOffset(Root, MRI)) { |
7297 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7298 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7299 | MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg()); |
7300 | MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg()); |
7301 | |
7302 | int64_t RHSC = (int64_t)RHSDef->getOperand(i: 1).getCImm()->getZExtValue(); |
7303 | unsigned Scale = Log2_32(Value: Size); |
7304 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { |
7305 | if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) |
7306 | return {{ |
7307 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: 1)); }, |
7308 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7309 | }}; |
7310 | |
7311 | return {{ |
7312 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); }, |
7313 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7314 | }}; |
7315 | } |
7316 | } |
7317 | |
7318 | // Before falling back to our general case, check if the unscaled |
7319 | // instructions can handle this. If so, that's preferable. |
7320 | if (selectAddrModeUnscaled(Root, Size)) |
7321 | return std::nullopt; |
7322 | |
7323 | return {{ |
7324 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); }, |
7325 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7326 | }}; |
7327 | } |
7328 | |
7329 | /// Given a shift instruction, return the correct shift type for that |
7330 | /// instruction. |
7331 | static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { |
7332 | switch (MI.getOpcode()) { |
7333 | default: |
7334 | return AArch64_AM::InvalidShiftExtend; |
7335 | case TargetOpcode::G_SHL: |
7336 | return AArch64_AM::LSL; |
7337 | case TargetOpcode::G_LSHR: |
7338 | return AArch64_AM::LSR; |
7339 | case TargetOpcode::G_ASHR: |
7340 | return AArch64_AM::ASR; |
7341 | case TargetOpcode::G_ROTR: |
7342 | return AArch64_AM::ROR; |
7343 | } |
7344 | } |
7345 | |
7346 | /// Select a "shifted register" operand. If the value is not shifted, set the |
7347 | /// shift operand to a default value of "lsl 0". |
7348 | InstructionSelector::ComplexRendererFns |
7349 | AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, |
7350 | bool AllowROR) const { |
7351 | if (!Root.isReg()) |
7352 | return std::nullopt; |
7353 | MachineRegisterInfo &MRI = |
7354 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7355 | |
7356 | // Check if the operand is defined by an instruction which corresponds to |
7357 | // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. |
7358 | MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg()); |
7359 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst); |
7360 | if (ShType == AArch64_AM::InvalidShiftExtend) |
7361 | return std::nullopt; |
7362 | if (ShType == AArch64_AM::ROR && !AllowROR) |
7363 | return std::nullopt; |
7364 | if (!isWorthFoldingIntoExtendedReg(MI&: *ShiftInst, MRI)) |
7365 | return std::nullopt; |
7366 | |
7367 | // Need an immediate on the RHS. |
7368 | MachineOperand &ShiftRHS = ShiftInst->getOperand(i: 2); |
7369 | auto Immed = getImmedFromMO(Root: ShiftRHS); |
7370 | if (!Immed) |
7371 | return std::nullopt; |
7372 | |
7373 | // We have something that we can fold. Fold in the shift's LHS and RHS into |
7374 | // the instruction. |
7375 | MachineOperand &ShiftLHS = ShiftInst->getOperand(i: 1); |
7376 | Register ShiftReg = ShiftLHS.getReg(); |
7377 | |
7378 | unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits(); |
7379 | unsigned Val = *Immed & (NumBits - 1); |
7380 | unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val); |
7381 | |
7382 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); }, |
7383 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}}; |
7384 | } |
7385 | |
7386 | AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( |
7387 | MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { |
7388 | unsigned Opc = MI.getOpcode(); |
7389 | |
7390 | // Handle explicit extend instructions first. |
7391 | if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { |
7392 | unsigned Size; |
7393 | if (Opc == TargetOpcode::G_SEXT) |
7394 | Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7395 | else |
7396 | Size = MI.getOperand(i: 2).getImm(); |
7397 | assert(Size != 64 && "Extend from 64 bits?" ); |
7398 | switch (Size) { |
7399 | case 8: |
7400 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; |
7401 | case 16: |
7402 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; |
7403 | case 32: |
7404 | return AArch64_AM::SXTW; |
7405 | default: |
7406 | return AArch64_AM::InvalidShiftExtend; |
7407 | } |
7408 | } |
7409 | |
7410 | if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { |
7411 | unsigned Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7412 | assert(Size != 64 && "Extend from 64 bits?" ); |
7413 | switch (Size) { |
7414 | case 8: |
7415 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; |
7416 | case 16: |
7417 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; |
7418 | case 32: |
7419 | return AArch64_AM::UXTW; |
7420 | default: |
7421 | return AArch64_AM::InvalidShiftExtend; |
7422 | } |
7423 | } |
7424 | |
7425 | // Don't have an explicit extend. Try to handle a G_AND with a constant mask |
7426 | // on the RHS. |
7427 | if (Opc != TargetOpcode::G_AND) |
7428 | return AArch64_AM::InvalidShiftExtend; |
7429 | |
7430 | std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: 2)); |
7431 | if (!MaybeAndMask) |
7432 | return AArch64_AM::InvalidShiftExtend; |
7433 | uint64_t AndMask = *MaybeAndMask; |
7434 | switch (AndMask) { |
7435 | default: |
7436 | return AArch64_AM::InvalidShiftExtend; |
7437 | case 0xFF: |
7438 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; |
7439 | case 0xFFFF: |
7440 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; |
7441 | case 0xFFFFFFFF: |
7442 | return AArch64_AM::UXTW; |
7443 | } |
7444 | } |
7445 | |
7446 | Register AArch64InstructionSelector::moveScalarRegClass( |
7447 | Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { |
7448 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
7449 | auto Ty = MRI.getType(Reg); |
7450 | assert(!Ty.isVector() && "Expected scalars only!" ); |
7451 | if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) |
7452 | return Reg; |
7453 | |
7454 | // Create a copy and immediately select it. |
7455 | // FIXME: We should have an emitCopy function? |
7456 | auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg}); |
7457 | selectCopy(*Copy, TII, MRI, TRI, RBI); |
7458 | return Copy.getReg(Idx: 0); |
7459 | } |
7460 | |
7461 | /// Select an "extended register" operand. This operand folds in an extend |
7462 | /// followed by an optional left shift. |
7463 | InstructionSelector::ComplexRendererFns |
7464 | AArch64InstructionSelector::selectArithExtendedRegister( |
7465 | MachineOperand &Root) const { |
7466 | if (!Root.isReg()) |
7467 | return std::nullopt; |
7468 | MachineRegisterInfo &MRI = |
7469 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7470 | |
7471 | uint64_t ShiftVal = 0; |
7472 | Register ExtReg; |
7473 | AArch64_AM::ShiftExtendType Ext; |
7474 | MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI); |
7475 | if (!RootDef) |
7476 | return std::nullopt; |
7477 | |
7478 | if (!isWorthFoldingIntoExtendedReg(MI&: *RootDef, MRI)) |
7479 | return std::nullopt; |
7480 | |
7481 | // Check if we can fold a shift and an extend. |
7482 | if (RootDef->getOpcode() == TargetOpcode::G_SHL) { |
7483 | // Look for a constant on the RHS of the shift. |
7484 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7485 | std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS); |
7486 | if (!MaybeShiftVal) |
7487 | return std::nullopt; |
7488 | ShiftVal = *MaybeShiftVal; |
7489 | if (ShiftVal > 4) |
7490 | return std::nullopt; |
7491 | // Look for a valid extend instruction on the LHS of the shift. |
7492 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7493 | MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
7494 | if (!ExtDef) |
7495 | return std::nullopt; |
7496 | Ext = getExtendTypeForInst(MI&: *ExtDef, MRI); |
7497 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7498 | return std::nullopt; |
7499 | ExtReg = ExtDef->getOperand(i: 1).getReg(); |
7500 | } else { |
7501 | // Didn't get a shift. Try just folding an extend. |
7502 | Ext = getExtendTypeForInst(MI&: *RootDef, MRI); |
7503 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7504 | return std::nullopt; |
7505 | ExtReg = RootDef->getOperand(i: 1).getReg(); |
7506 | |
7507 | // If we have a 32 bit instruction which zeroes out the high half of a |
7508 | // register, we get an implicit zero extend for free. Check if we have one. |
7509 | // FIXME: We actually emit the extend right now even though we don't have |
7510 | // to. |
7511 | if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == 32) { |
7512 | MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg); |
7513 | if (isDef32(MI: *ExtInst)) |
7514 | return std::nullopt; |
7515 | } |
7516 | } |
7517 | |
7518 | // We require a GPR32 here. Narrow the ExtReg if needed using a subregister |
7519 | // copy. |
7520 | MachineIRBuilder MIB(*RootDef); |
7521 | ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); |
7522 | |
7523 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7524 | [=](MachineInstrBuilder &MIB) { |
7525 | MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal)); |
7526 | }}}; |
7527 | } |
7528 | |
7529 | InstructionSelector::ComplexRendererFns |
7530 | AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const { |
7531 | if (!Root.isReg()) |
7532 | return std::nullopt; |
7533 | MachineRegisterInfo &MRI = |
7534 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7535 | |
7536 | auto = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI); |
7537 | while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST && |
7538 | STI.isLittleEndian()) |
7539 | Extract = |
7540 | getDefSrcRegIgnoringCopies(Reg: Extract->MI->getOperand(i: 1).getReg(), MRI); |
7541 | if (!Extract) |
7542 | return std::nullopt; |
7543 | |
7544 | if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { |
7545 | if (Extract->Reg == Extract->MI->getOperand(i: 1).getReg()) { |
7546 | Register ExtReg = Extract->MI->getOperand(i: 2).getReg(); |
7547 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7548 | } |
7549 | } |
7550 | if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) { |
7551 | LLT SrcTy = MRI.getType(Reg: Extract->MI->getOperand(i: 1).getReg()); |
7552 | auto LaneIdx = getIConstantVRegValWithLookThrough( |
7553 | VReg: Extract->MI->getOperand(i: 2).getReg(), MRI); |
7554 | if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) && |
7555 | LaneIdx->Value.getSExtValue() == 1) { |
7556 | Register ExtReg = Extract->MI->getOperand(i: 1).getReg(); |
7557 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7558 | } |
7559 | } |
7560 | |
7561 | return std::nullopt; |
7562 | } |
7563 | |
7564 | void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, |
7565 | const MachineInstr &MI, |
7566 | int OpIdx) const { |
7567 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7568 | assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7569 | "Expected G_CONSTANT" ); |
7570 | std::optional<int64_t> CstVal = |
7571 | getIConstantVRegSExtVal(VReg: MI.getOperand(i: 0).getReg(), MRI); |
7572 | assert(CstVal && "Expected constant value" ); |
7573 | MIB.addImm(Val: *CstVal); |
7574 | } |
7575 | |
7576 | void AArch64InstructionSelector::renderLogicalImm32( |
7577 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7578 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7579 | "Expected G_CONSTANT" ); |
7580 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7581 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 32); |
7582 | MIB.addImm(Val: Enc); |
7583 | } |
7584 | |
7585 | void AArch64InstructionSelector::renderLogicalImm64( |
7586 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7587 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7588 | "Expected G_CONSTANT" ); |
7589 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7590 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 64); |
7591 | MIB.addImm(Val: Enc); |
7592 | } |
7593 | |
7594 | void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB, |
7595 | const MachineInstr &MI, |
7596 | int OpIdx) const { |
7597 | assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 && |
7598 | "Expected G_UBSANTRAP" ); |
7599 | MIB.addImm(Val: MI.getOperand(i: 0).getImm() | ('U' << 8)); |
7600 | } |
7601 | |
7602 | void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, |
7603 | const MachineInstr &MI, |
7604 | int OpIdx) const { |
7605 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7606 | "Expected G_FCONSTANT" ); |
7607 | MIB.addImm( |
7608 | Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7609 | } |
7610 | |
7611 | void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, |
7612 | const MachineInstr &MI, |
7613 | int OpIdx) const { |
7614 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7615 | "Expected G_FCONSTANT" ); |
7616 | MIB.addImm( |
7617 | Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7618 | } |
7619 | |
7620 | void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, |
7621 | const MachineInstr &MI, |
7622 | int OpIdx) const { |
7623 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7624 | "Expected G_FCONSTANT" ); |
7625 | MIB.addImm( |
7626 | Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7627 | } |
7628 | |
7629 | void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( |
7630 | MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { |
7631 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7632 | "Expected G_FCONSTANT" ); |
7633 | MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: 1) |
7634 | .getFPImm() |
7635 | ->getValueAPF() |
7636 | .bitcastToAPInt() |
7637 | .getZExtValue())); |
7638 | } |
7639 | |
7640 | bool AArch64InstructionSelector::isLoadStoreOfNumBytes( |
7641 | const MachineInstr &MI, unsigned NumBytes) const { |
7642 | if (!MI.mayLoadOrStore()) |
7643 | return false; |
7644 | assert(MI.hasOneMemOperand() && |
7645 | "Expected load/store to have only one mem op!" ); |
7646 | return (*MI.memoperands_begin())->getSize() == NumBytes; |
7647 | } |
7648 | |
7649 | bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { |
7650 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7651 | if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() != 32) |
7652 | return false; |
7653 | |
7654 | // Only return true if we know the operation will zero-out the high half of |
7655 | // the 64-bit register. Truncates can be subregister copies, which don't |
7656 | // zero out the high bits. Copies and other copy-like instructions can be |
7657 | // fed by truncates, or could be lowered as subregister copies. |
7658 | switch (MI.getOpcode()) { |
7659 | default: |
7660 | return true; |
7661 | case TargetOpcode::COPY: |
7662 | case TargetOpcode::G_BITCAST: |
7663 | case TargetOpcode::G_TRUNC: |
7664 | case TargetOpcode::G_PHI: |
7665 | return false; |
7666 | } |
7667 | } |
7668 | |
7669 | |
7670 | // Perform fixups on the given PHI instruction's operands to force them all |
7671 | // to be the same as the destination regbank. |
7672 | static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, |
7673 | const AArch64RegisterBankInfo &RBI) { |
7674 | assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI" ); |
7675 | Register DstReg = MI.getOperand(i: 0).getReg(); |
7676 | const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg); |
7677 | assert(DstRB && "Expected PHI dst to have regbank assigned" ); |
7678 | MachineIRBuilder MIB(MI); |
7679 | |
7680 | // Go through each operand and ensure it has the same regbank. |
7681 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) { |
7682 | if (!MO.isReg()) |
7683 | continue; |
7684 | Register OpReg = MO.getReg(); |
7685 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg); |
7686 | if (RB != DstRB) { |
7687 | // Insert a cross-bank copy. |
7688 | auto *OpDef = MRI.getVRegDef(Reg: OpReg); |
7689 | const LLT &Ty = MRI.getType(Reg: OpReg); |
7690 | MachineBasicBlock &OpDefBB = *OpDef->getParent(); |
7691 | |
7692 | // Any instruction we insert must appear after all PHIs in the block |
7693 | // for the block to be valid MIR. |
7694 | MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator()); |
7695 | if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) |
7696 | InsertPt = OpDefBB.getFirstNonPHI(); |
7697 | MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt); |
7698 | auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg); |
7699 | MRI.setRegBank(Reg: Copy.getReg(Idx: 0), RegBank: *DstRB); |
7700 | MO.setReg(Copy.getReg(Idx: 0)); |
7701 | } |
7702 | } |
7703 | } |
7704 | |
7705 | void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { |
7706 | // We're looking for PHIs, build a list so we don't invalidate iterators. |
7707 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7708 | SmallVector<MachineInstr *, 32> Phis; |
7709 | for (auto &BB : MF) { |
7710 | for (auto &MI : BB) { |
7711 | if (MI.getOpcode() == TargetOpcode::G_PHI) |
7712 | Phis.emplace_back(Args: &MI); |
7713 | } |
7714 | } |
7715 | |
7716 | for (auto *MI : Phis) { |
7717 | // We need to do some work here if the operand types are < 16 bit and they |
7718 | // are split across fpr/gpr banks. Since all types <32b on gpr |
7719 | // end up being assigned gpr32 regclasses, we can end up with PHIs here |
7720 | // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't |
7721 | // be selecting heterogenous regbanks for operands if possible, but we |
7722 | // still need to be able to deal with it here. |
7723 | // |
7724 | // To fix this, if we have a gpr-bank operand < 32b in size and at least |
7725 | // one other operand is on the fpr bank, then we add cross-bank copies |
7726 | // to homogenize the operand banks. For simplicity the bank that we choose |
7727 | // to settle on is whatever bank the def operand has. For example: |
7728 | // |
7729 | // %endbb: |
7730 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 |
7731 | // => |
7732 | // %bb2: |
7733 | // ... |
7734 | // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) |
7735 | // ... |
7736 | // %endbb: |
7737 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 |
7738 | bool HasGPROp = false, HasFPROp = false; |
7739 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) { |
7740 | if (!MO.isReg()) |
7741 | continue; |
7742 | const LLT &Ty = MRI.getType(Reg: MO.getReg()); |
7743 | if (!Ty.isValid() || !Ty.isScalar()) |
7744 | break; |
7745 | if (Ty.getSizeInBits() >= 32) |
7746 | break; |
7747 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg()); |
7748 | // If for some reason we don't have a regbank yet. Don't try anything. |
7749 | if (!RB) |
7750 | break; |
7751 | |
7752 | if (RB->getID() == AArch64::GPRRegBankID) |
7753 | HasGPROp = true; |
7754 | else |
7755 | HasFPROp = true; |
7756 | } |
7757 | // We have heterogenous regbanks, need to fixup. |
7758 | if (HasGPROp && HasFPROp) |
7759 | fixupPHIOpBanks(MI&: *MI, MRI, RBI); |
7760 | } |
7761 | } |
7762 | |
7763 | namespace llvm { |
7764 | InstructionSelector * |
7765 | createAArch64InstructionSelector(const AArch64TargetMachine &TM, |
7766 | AArch64Subtarget &Subtarget, |
7767 | AArch64RegisterBankInfo &RBI) { |
7768 | return new AArch64InstructionSelector(TM, Subtarget, RBI); |
7769 | } |
7770 | } |
7771 | |