1 | //===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the AArch64 implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H |
14 | #define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H |
15 | |
16 | #include "AArch64.h" |
17 | #include "AArch64RegisterInfo.h" |
18 | #include "llvm/CodeGen/TargetInstrInfo.h" |
19 | #include "llvm/Support/TypeSize.h" |
20 | #include <optional> |
21 | |
22 | #define |
23 | #include "AArch64GenInstrInfo.inc" |
24 | |
25 | namespace llvm { |
26 | |
27 | class AArch64Subtarget; |
28 | |
29 | static const MachineMemOperand::Flags MOSuppressPair = |
30 | MachineMemOperand::MOTargetFlag1; |
31 | static const MachineMemOperand::Flags MOStridedAccess = |
32 | MachineMemOperand::MOTargetFlag2; |
33 | |
34 | #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" |
35 | |
36 | // AArch64 MachineCombiner patterns |
37 | enum AArch64MachineCombinerPattern : unsigned { |
38 | // These are patterns used to reduce the length of dependence chain. |
39 | SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START, |
40 | SUBADD_OP2, |
41 | |
42 | // These are multiply-add patterns matched by the AArch64 machine combiner. |
43 | MULADDW_OP1, |
44 | MULADDW_OP2, |
45 | MULSUBW_OP1, |
46 | MULSUBW_OP2, |
47 | MULADDWI_OP1, |
48 | MULSUBWI_OP1, |
49 | MULADDX_OP1, |
50 | MULADDX_OP2, |
51 | MULSUBX_OP1, |
52 | MULSUBX_OP2, |
53 | MULADDXI_OP1, |
54 | MULSUBXI_OP1, |
55 | // NEON integers vectors |
56 | MULADDv8i8_OP1, |
57 | MULADDv8i8_OP2, |
58 | MULADDv16i8_OP1, |
59 | MULADDv16i8_OP2, |
60 | MULADDv4i16_OP1, |
61 | MULADDv4i16_OP2, |
62 | MULADDv8i16_OP1, |
63 | MULADDv8i16_OP2, |
64 | MULADDv2i32_OP1, |
65 | MULADDv2i32_OP2, |
66 | MULADDv4i32_OP1, |
67 | MULADDv4i32_OP2, |
68 | |
69 | MULSUBv8i8_OP1, |
70 | MULSUBv8i8_OP2, |
71 | MULSUBv16i8_OP1, |
72 | MULSUBv16i8_OP2, |
73 | MULSUBv4i16_OP1, |
74 | MULSUBv4i16_OP2, |
75 | MULSUBv8i16_OP1, |
76 | MULSUBv8i16_OP2, |
77 | MULSUBv2i32_OP1, |
78 | MULSUBv2i32_OP2, |
79 | MULSUBv4i32_OP1, |
80 | MULSUBv4i32_OP2, |
81 | |
82 | MULADDv4i16_indexed_OP1, |
83 | MULADDv4i16_indexed_OP2, |
84 | MULADDv8i16_indexed_OP1, |
85 | MULADDv8i16_indexed_OP2, |
86 | MULADDv2i32_indexed_OP1, |
87 | MULADDv2i32_indexed_OP2, |
88 | MULADDv4i32_indexed_OP1, |
89 | MULADDv4i32_indexed_OP2, |
90 | |
91 | MULSUBv4i16_indexed_OP1, |
92 | MULSUBv4i16_indexed_OP2, |
93 | MULSUBv8i16_indexed_OP1, |
94 | MULSUBv8i16_indexed_OP2, |
95 | MULSUBv2i32_indexed_OP1, |
96 | MULSUBv2i32_indexed_OP2, |
97 | MULSUBv4i32_indexed_OP1, |
98 | MULSUBv4i32_indexed_OP2, |
99 | |
100 | // Floating Point |
101 | FMULADDH_OP1, |
102 | FMULADDH_OP2, |
103 | FMULSUBH_OP1, |
104 | FMULSUBH_OP2, |
105 | FMULADDS_OP1, |
106 | FMULADDS_OP2, |
107 | FMULSUBS_OP1, |
108 | FMULSUBS_OP2, |
109 | FMULADDD_OP1, |
110 | FMULADDD_OP2, |
111 | FMULSUBD_OP1, |
112 | FMULSUBD_OP2, |
113 | FNMULSUBH_OP1, |
114 | FNMULSUBS_OP1, |
115 | FNMULSUBD_OP1, |
116 | FMLAv1i32_indexed_OP1, |
117 | FMLAv1i32_indexed_OP2, |
118 | FMLAv1i64_indexed_OP1, |
119 | FMLAv1i64_indexed_OP2, |
120 | FMLAv4f16_OP1, |
121 | FMLAv4f16_OP2, |
122 | FMLAv8f16_OP1, |
123 | FMLAv8f16_OP2, |
124 | FMLAv2f32_OP2, |
125 | FMLAv2f32_OP1, |
126 | FMLAv2f64_OP1, |
127 | FMLAv2f64_OP2, |
128 | FMLAv4i16_indexed_OP1, |
129 | FMLAv4i16_indexed_OP2, |
130 | FMLAv8i16_indexed_OP1, |
131 | FMLAv8i16_indexed_OP2, |
132 | FMLAv2i32_indexed_OP1, |
133 | FMLAv2i32_indexed_OP2, |
134 | FMLAv2i64_indexed_OP1, |
135 | FMLAv2i64_indexed_OP2, |
136 | FMLAv4f32_OP1, |
137 | FMLAv4f32_OP2, |
138 | FMLAv4i32_indexed_OP1, |
139 | FMLAv4i32_indexed_OP2, |
140 | FMLSv1i32_indexed_OP2, |
141 | FMLSv1i64_indexed_OP2, |
142 | FMLSv4f16_OP1, |
143 | FMLSv4f16_OP2, |
144 | FMLSv8f16_OP1, |
145 | FMLSv8f16_OP2, |
146 | FMLSv2f32_OP1, |
147 | FMLSv2f32_OP2, |
148 | FMLSv2f64_OP1, |
149 | FMLSv2f64_OP2, |
150 | FMLSv4i16_indexed_OP1, |
151 | FMLSv4i16_indexed_OP2, |
152 | FMLSv8i16_indexed_OP1, |
153 | FMLSv8i16_indexed_OP2, |
154 | FMLSv2i32_indexed_OP1, |
155 | FMLSv2i32_indexed_OP2, |
156 | FMLSv2i64_indexed_OP1, |
157 | FMLSv2i64_indexed_OP2, |
158 | FMLSv4f32_OP1, |
159 | FMLSv4f32_OP2, |
160 | FMLSv4i32_indexed_OP1, |
161 | FMLSv4i32_indexed_OP2, |
162 | |
163 | FMULv2i32_indexed_OP1, |
164 | FMULv2i32_indexed_OP2, |
165 | FMULv2i64_indexed_OP1, |
166 | FMULv2i64_indexed_OP2, |
167 | FMULv4i16_indexed_OP1, |
168 | FMULv4i16_indexed_OP2, |
169 | FMULv4i32_indexed_OP1, |
170 | FMULv4i32_indexed_OP2, |
171 | FMULv8i16_indexed_OP1, |
172 | FMULv8i16_indexed_OP2, |
173 | |
174 | FNMADD, |
175 | }; |
176 | class AArch64InstrInfo final : public AArch64GenInstrInfo { |
177 | const AArch64RegisterInfo RI; |
178 | const AArch64Subtarget &Subtarget; |
179 | |
180 | public: |
181 | explicit AArch64InstrInfo(const AArch64Subtarget &STI); |
182 | |
183 | /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As |
184 | /// such, whenever a client has an instance of instruction info, it should |
185 | /// always be able to get register info as well (through this method). |
186 | const AArch64RegisterInfo &getRegisterInfo() const { return RI; } |
187 | |
188 | unsigned getInstSizeInBytes(const MachineInstr &MI) const override; |
189 | |
190 | bool isAsCheapAsAMove(const MachineInstr &MI) const override; |
191 | |
192 | bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, |
193 | Register &DstReg, unsigned &SubIdx) const override; |
194 | |
195 | bool |
196 | areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, |
197 | const MachineInstr &MIb) const override; |
198 | |
199 | Register isLoadFromStackSlot(const MachineInstr &MI, |
200 | int &FrameIndex) const override; |
201 | Register isStoreToStackSlot(const MachineInstr &MI, |
202 | int &FrameIndex) const override; |
203 | |
204 | /// Does this instruction set its full destination register to zero? |
205 | static bool isGPRZero(const MachineInstr &MI); |
206 | |
207 | /// Does this instruction rename a GPR without modifying bits? |
208 | static bool isGPRCopy(const MachineInstr &MI); |
209 | |
210 | /// Does this instruction rename an FPR without modifying bits? |
211 | static bool isFPRCopy(const MachineInstr &MI); |
212 | |
213 | /// Return true if pairing the given load or store is hinted to be |
214 | /// unprofitable. |
215 | static bool isLdStPairSuppressed(const MachineInstr &MI); |
216 | |
217 | /// Return true if the given load or store is a strided memory access. |
218 | static bool isStridedAccess(const MachineInstr &MI); |
219 | |
220 | /// Return true if it has an unscaled load/store offset. |
221 | static bool hasUnscaledLdStOffset(unsigned Opc); |
222 | static bool hasUnscaledLdStOffset(MachineInstr &MI) { |
223 | return hasUnscaledLdStOffset(Opc: MI.getOpcode()); |
224 | } |
225 | |
226 | /// Returns the unscaled load/store for the scaled load/store opcode, |
227 | /// if there is a corresponding unscaled variant available. |
228 | static std::optional<unsigned> getUnscaledLdSt(unsigned Opc); |
229 | |
230 | /// Scaling factor for (scaled or unscaled) load or store. |
231 | static int getMemScale(unsigned Opc); |
232 | static int getMemScale(const MachineInstr &MI) { |
233 | return getMemScale(Opc: MI.getOpcode()); |
234 | } |
235 | |
236 | /// Returns whether the instruction is a pre-indexed load. |
237 | static bool isPreLd(const MachineInstr &MI); |
238 | |
239 | /// Returns whether the instruction is a pre-indexed store. |
240 | static bool isPreSt(const MachineInstr &MI); |
241 | |
242 | /// Returns whether the instruction is a pre-indexed load/store. |
243 | static bool isPreLdSt(const MachineInstr &MI); |
244 | |
245 | /// Returns whether the instruction is a paired load/store. |
246 | static bool isPairedLdSt(const MachineInstr &MI); |
247 | |
248 | /// Returns the base register operator of a load/store. |
249 | static const MachineOperand &getLdStBaseOp(const MachineInstr &MI); |
250 | |
251 | /// Returns the immediate offset operator of a load/store. |
252 | static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); |
253 | |
254 | /// Returns whether the instruction is FP or NEON. |
255 | static bool isFpOrNEON(const MachineInstr &MI); |
256 | |
257 | /// Returns whether the instruction is in H form (16 bit operands) |
258 | static bool isHForm(const MachineInstr &MI); |
259 | |
260 | /// Returns whether the instruction is in Q form (128 bit operands) |
261 | static bool isQForm(const MachineInstr &MI); |
262 | |
263 | /// Returns whether the instruction can be compatible with non-zero BTYPE. |
264 | static bool hasBTISemantics(const MachineInstr &MI); |
265 | |
266 | /// Returns the index for the immediate for a given instruction. |
267 | static unsigned getLoadStoreImmIdx(unsigned Opc); |
268 | |
269 | /// Return true if pairing the given load or store may be paired with another. |
270 | static bool isPairableLdStInst(const MachineInstr &MI); |
271 | |
272 | /// Returns true if MI is one of the TCRETURN* instructions. |
273 | static bool isTailCallReturnInst(const MachineInstr &MI); |
274 | |
275 | /// Return the opcode that set flags when possible. The caller is |
276 | /// responsible for ensuring the opc has a flag setting equivalent. |
277 | static unsigned convertToFlagSettingOpc(unsigned Opc); |
278 | |
279 | /// Return true if this is a load/store that can be potentially paired/merged. |
280 | bool isCandidateToMergeOrPair(const MachineInstr &MI) const; |
281 | |
282 | /// Hint that pairing the given load or store is unprofitable. |
283 | static void suppressLdStPair(MachineInstr &MI); |
284 | |
285 | std::optional<ExtAddrMode> |
286 | getAddrModeFromMemoryOp(const MachineInstr &MemI, |
287 | const TargetRegisterInfo *TRI) const override; |
288 | |
289 | bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, |
290 | const MachineInstr &AddrI, |
291 | ExtAddrMode &AM) const override; |
292 | |
293 | MachineInstr *emitLdStWithAddr(MachineInstr &MemI, |
294 | const ExtAddrMode &AM) const override; |
295 | |
296 | bool getMemOperandsWithOffsetWidth( |
297 | const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps, |
298 | int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, |
299 | const TargetRegisterInfo *TRI) const override; |
300 | |
301 | /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`. |
302 | /// This is true for some SVE instructions like ldr/str that have a |
303 | /// 'reg + imm' addressing mode where the immediate is an index to the |
304 | /// scalable vector located at 'reg + imm * vscale x #bytes'. |
305 | bool getMemOperandWithOffsetWidth(const MachineInstr &MI, |
306 | const MachineOperand *&BaseOp, |
307 | int64_t &Offset, bool &OffsetIsScalable, |
308 | TypeSize &Width, |
309 | const TargetRegisterInfo *TRI) const; |
310 | |
311 | /// Return the immediate offset of the base register in a load/store \p LdSt. |
312 | MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const; |
313 | |
314 | /// Returns true if opcode \p Opc is a memory operation. If it is, set |
315 | /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. |
316 | /// |
317 | /// For unscaled instructions, \p Scale is set to 1. |
318 | static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, |
319 | int64_t &MinOffset, int64_t &MaxOffset); |
320 | |
321 | bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, |
322 | int64_t Offset1, bool OffsetIsScalable1, |
323 | ArrayRef<const MachineOperand *> BaseOps2, |
324 | int64_t Offset2, bool OffsetIsScalable2, |
325 | unsigned ClusterSize, |
326 | unsigned NumBytes) const override; |
327 | |
328 | void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
329 | const DebugLoc &DL, MCRegister DestReg, |
330 | MCRegister SrcReg, bool KillSrc, unsigned Opcode, |
331 | llvm::ArrayRef<unsigned> Indices) const; |
332 | void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
333 | DebugLoc DL, unsigned DestReg, unsigned SrcReg, |
334 | bool KillSrc, unsigned Opcode, unsigned ZeroReg, |
335 | llvm::ArrayRef<unsigned> Indices) const; |
336 | void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
337 | const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, |
338 | bool KillSrc) const override; |
339 | |
340 | void storeRegToStackSlot(MachineBasicBlock &MBB, |
341 | MachineBasicBlock::iterator MBBI, Register SrcReg, |
342 | bool isKill, int FrameIndex, |
343 | const TargetRegisterClass *RC, |
344 | const TargetRegisterInfo *TRI, |
345 | Register VReg) const override; |
346 | |
347 | void loadRegFromStackSlot(MachineBasicBlock &MBB, |
348 | MachineBasicBlock::iterator MBBI, Register DestReg, |
349 | int FrameIndex, const TargetRegisterClass *RC, |
350 | const TargetRegisterInfo *TRI, |
351 | Register VReg) const override; |
352 | |
353 | // This tells target independent code that it is okay to pass instructions |
354 | // with subreg operands to foldMemoryOperandImpl. |
355 | bool isSubregFoldable() const override { return true; } |
356 | |
357 | using TargetInstrInfo::foldMemoryOperandImpl; |
358 | MachineInstr * |
359 | foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, |
360 | ArrayRef<unsigned> Ops, |
361 | MachineBasicBlock::iterator InsertPt, int FrameIndex, |
362 | LiveIntervals *LIS = nullptr, |
363 | VirtRegMap *VRM = nullptr) const override; |
364 | |
365 | /// \returns true if a branch from an instruction with opcode \p BranchOpc |
366 | /// bytes is capable of jumping to a position \p BrOffset bytes away. |
367 | bool isBranchOffsetInRange(unsigned BranchOpc, |
368 | int64_t BrOffset) const override; |
369 | |
370 | MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; |
371 | |
372 | void insertIndirectBranch(MachineBasicBlock &MBB, |
373 | MachineBasicBlock &NewDestBB, |
374 | MachineBasicBlock &RestoreBB, const DebugLoc &DL, |
375 | int64_t BrOffset, RegScavenger *RS) const override; |
376 | |
377 | bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
378 | MachineBasicBlock *&FBB, |
379 | SmallVectorImpl<MachineOperand> &Cond, |
380 | bool AllowModify = false) const override; |
381 | bool analyzeBranchPredicate(MachineBasicBlock &MBB, |
382 | MachineBranchPredicate &MBP, |
383 | bool AllowModify) const override; |
384 | unsigned removeBranch(MachineBasicBlock &MBB, |
385 | int *BytesRemoved = nullptr) const override; |
386 | unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, |
387 | MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, |
388 | const DebugLoc &DL, |
389 | int *BytesAdded = nullptr) const override; |
390 | |
391 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> |
392 | analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; |
393 | |
394 | bool |
395 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; |
396 | bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, |
397 | Register, Register, Register, int &, int &, |
398 | int &) const override; |
399 | void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
400 | const DebugLoc &DL, Register DstReg, |
401 | ArrayRef<MachineOperand> Cond, Register TrueReg, |
402 | Register FalseReg) const override; |
403 | |
404 | void insertNoop(MachineBasicBlock &MBB, |
405 | MachineBasicBlock::iterator MI) const override; |
406 | |
407 | MCInst getNop() const override; |
408 | |
409 | bool isSchedulingBoundary(const MachineInstr &MI, |
410 | const MachineBasicBlock *MBB, |
411 | const MachineFunction &MF) const override; |
412 | |
413 | /// analyzeCompare - For a comparison instruction, return the source registers |
414 | /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. |
415 | /// Return true if the comparison instruction can be analyzed. |
416 | bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
417 | Register &SrcReg2, int64_t &CmpMask, |
418 | int64_t &CmpValue) const override; |
419 | /// optimizeCompareInstr - Convert the instruction supplying the argument to |
420 | /// the comparison into one that sets the zero bit in the flags register. |
421 | bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, |
422 | Register SrcReg2, int64_t CmpMask, int64_t CmpValue, |
423 | const MachineRegisterInfo *MRI) const override; |
424 | bool optimizeCondBranch(MachineInstr &MI) const override; |
425 | |
426 | CombinerObjective getCombinerObjective(unsigned Pattern) const override; |
427 | /// Return true when a code sequence can improve throughput. It |
428 | /// should be called only for instructions in loops. |
429 | /// \param Pattern - combiner pattern |
430 | bool isThroughputPattern(unsigned Pattern) const override; |
431 | /// Return true when there is potentially a faster code sequence |
432 | /// for an instruction chain ending in ``Root``. All potential patterns are |
433 | /// listed in the ``Patterns`` array. |
434 | bool getMachineCombinerPatterns(MachineInstr &Root, |
435 | SmallVectorImpl<unsigned> &Patterns, |
436 | bool DoRegPressureReduce) const override; |
437 | /// Return true when Inst is associative and commutative so that it can be |
438 | /// reassociated. If Invert is true, then the inverse of Inst operation must |
439 | /// be checked. |
440 | bool isAssociativeAndCommutative(const MachineInstr &Inst, |
441 | bool Invert) const override; |
442 | /// When getMachineCombinerPatterns() finds patterns, this function generates |
443 | /// the instructions that could replace the original code sequence |
444 | void genAlternativeCodeSequence( |
445 | MachineInstr &Root, unsigned Pattern, |
446 | SmallVectorImpl<MachineInstr *> &InsInstrs, |
447 | SmallVectorImpl<MachineInstr *> &DelInstrs, |
448 | DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override; |
449 | /// AArch64 supports MachineCombiner. |
450 | bool useMachineCombiner() const override; |
451 | |
452 | bool expandPostRAPseudo(MachineInstr &MI) const override; |
453 | |
454 | std::pair<unsigned, unsigned> |
455 | decomposeMachineOperandsTargetFlags(unsigned TF) const override; |
456 | ArrayRef<std::pair<unsigned, const char *>> |
457 | getSerializableDirectMachineOperandTargetFlags() const override; |
458 | ArrayRef<std::pair<unsigned, const char *>> |
459 | getSerializableBitmaskMachineOperandTargetFlags() const override; |
460 | ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> |
461 | getSerializableMachineMemOperandTargetFlags() const override; |
462 | |
463 | bool isFunctionSafeToOutlineFrom(MachineFunction &MF, |
464 | bool OutlineFromLinkOnceODRs) const override; |
465 | std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo( |
466 | std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override; |
467 | void mergeOutliningCandidateAttributes( |
468 | Function &F, std::vector<outliner::Candidate> &Candidates) const override; |
469 | outliner::InstrType |
470 | getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; |
471 | SmallVector< |
472 | std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> |
473 | getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override; |
474 | void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, |
475 | const outliner::OutlinedFunction &OF) const override; |
476 | MachineBasicBlock::iterator |
477 | insertOutlinedCall(Module &M, MachineBasicBlock &MBB, |
478 | MachineBasicBlock::iterator &It, MachineFunction &MF, |
479 | outliner::Candidate &C) const override; |
480 | bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; |
481 | |
482 | void buildClearRegister(Register Reg, MachineBasicBlock &MBB, |
483 | MachineBasicBlock::iterator Iter, DebugLoc &DL, |
484 | bool AllowSideEffects = true) const override; |
485 | |
486 | /// Returns the vector element size (B, H, S or D) of an SVE opcode. |
487 | uint64_t getElementSizeForOpcode(unsigned Opc) const; |
488 | /// Returns true if the opcode is for an SVE instruction that sets the |
489 | /// condition codes as if it's results had been fed to a PTEST instruction |
490 | /// along with the same general predicate. |
491 | bool isPTestLikeOpcode(unsigned Opc) const; |
492 | /// Returns true if the opcode is for an SVE WHILE## instruction. |
493 | bool isWhileOpcode(unsigned Opc) const; |
494 | /// Returns true if the instruction has a shift by immediate that can be |
495 | /// executed in one cycle less. |
496 | static bool isFalkorShiftExtFast(const MachineInstr &MI); |
497 | /// Return true if the instructions is a SEH instruciton used for unwinding |
498 | /// on Windows. |
499 | static bool isSEHInstruction(const MachineInstr &MI); |
500 | |
501 | std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI, |
502 | Register Reg) const override; |
503 | |
504 | bool isFunctionSafeToSplit(const MachineFunction &MF) const override; |
505 | |
506 | bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override; |
507 | |
508 | std::optional<ParamLoadedValue> |
509 | describeLoadedValue(const MachineInstr &MI, Register Reg) const override; |
510 | |
511 | unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; |
512 | |
513 | bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, |
514 | MachineRegisterInfo &MRI) const override; |
515 | |
516 | static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, |
517 | int64_t &NumBytes, |
518 | int64_t &NumPredicateVectors, |
519 | int64_t &NumDataVectors); |
520 | static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, |
521 | int64_t &ByteSized, |
522 | int64_t &VGSized); |
523 | |
524 | // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can |
525 | // be used for a load/store of NumBytes. BaseReg is always present and |
526 | // implicit. |
527 | bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, |
528 | unsigned Scale) const; |
529 | |
530 | // Decrement the SP, issuing probes along the way. `TargetReg` is the new top |
531 | // of the stack. `FrameSetup` is passed as true, if the allocation is a part |
532 | // of constructing the activation frame of a function. |
533 | MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, |
534 | Register TargetReg, |
535 | bool FrameSetup) const; |
536 | |
537 | #define GET_INSTRINFO_HELPER_DECLS |
538 | #include "AArch64GenInstrInfo.inc" |
539 | |
540 | protected: |
541 | /// If the specific machine instruction is an instruction that moves/copies |
542 | /// value from one register to another register return destination and source |
543 | /// registers as machine operands. |
544 | std::optional<DestSourcePair> |
545 | isCopyInstrImpl(const MachineInstr &MI) const override; |
546 | std::optional<DestSourcePair> |
547 | isCopyLikeInstrImpl(const MachineInstr &MI) const override; |
548 | |
549 | private: |
550 | unsigned getInstBundleLength(const MachineInstr &MI) const; |
551 | |
552 | /// Sets the offsets on outlined instructions in \p MBB which use SP |
553 | /// so that they will be valid post-outlining. |
554 | /// |
555 | /// \param MBB A \p MachineBasicBlock in an outlined function. |
556 | void fixupPostOutline(MachineBasicBlock &MBB) const; |
557 | |
558 | void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, |
559 | MachineBasicBlock *TBB, |
560 | ArrayRef<MachineOperand> Cond) const; |
561 | bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, |
562 | const MachineRegisterInfo &MRI) const; |
563 | bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg, |
564 | int CmpValue, const MachineRegisterInfo &MRI) const; |
565 | |
566 | /// Returns an unused general-purpose register which can be used for |
567 | /// constructing an outlined call if one exists. Returns 0 otherwise. |
568 | Register findRegisterToSaveLRTo(outliner::Candidate &C) const; |
569 | |
570 | /// Remove a ptest of a predicate-generating operation that already sets, or |
571 | /// can be made to set, the condition codes in an identical manner |
572 | bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg, |
573 | unsigned PredReg, |
574 | const MachineRegisterInfo *MRI) const; |
575 | }; |
576 | |
577 | struct UsedNZCV { |
578 | bool N = false; |
579 | bool Z = false; |
580 | bool C = false; |
581 | bool V = false; |
582 | |
583 | UsedNZCV() = default; |
584 | |
585 | UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { |
586 | this->N |= UsedFlags.N; |
587 | this->Z |= UsedFlags.Z; |
588 | this->C |= UsedFlags.C; |
589 | this->V |= UsedFlags.V; |
590 | return *this; |
591 | } |
592 | }; |
593 | |
594 | /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV |
595 | /// flags are not alive in successors of the same \p CmpInstr and \p MI parent. |
596 | /// \returns std::nullopt otherwise. |
597 | /// |
598 | /// Collect instructions using that flags in \p CCUseInstrs if provided. |
599 | std::optional<UsedNZCV> |
600 | examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, |
601 | const TargetRegisterInfo &TRI, |
602 | SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr); |
603 | |
604 | /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI |
605 | /// which either reads or clobbers NZCV. |
606 | bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, |
607 | const MachineInstr &UseMI, |
608 | const TargetRegisterInfo *TRI); |
609 | |
610 | MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, |
611 | unsigned Reg, const StackOffset &Offset, |
612 | bool LastAdjustmentWasScalable = true); |
613 | MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, |
614 | const StackOffset &OffsetFromDefCFA); |
615 | |
616 | /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg |
617 | /// plus Offset. This is intended to be used from within the prolog/epilog |
618 | /// insertion (PEI) pass, where a virtual scratch register may be allocated |
619 | /// if necessary, to be replaced by the scavenger at the end of PEI. |
620 | void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
621 | const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, |
622 | StackOffset Offset, const TargetInstrInfo *TII, |
623 | MachineInstr::MIFlag = MachineInstr::NoFlags, |
624 | bool SetNZCV = false, bool NeedsWinCFI = false, |
625 | bool *HasWinCFI = nullptr, bool EmitCFAOffset = false, |
626 | StackOffset InitialOffset = {}, |
627 | unsigned FrameReg = AArch64::SP); |
628 | |
629 | /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the |
630 | /// FP. Return false if the offset could not be handled directly in MI, and |
631 | /// return the left-over portion by reference. |
632 | bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
633 | unsigned FrameReg, StackOffset &Offset, |
634 | const AArch64InstrInfo *TII); |
635 | |
636 | /// Use to report the frame offset status in isAArch64FrameOffsetLegal. |
637 | enum AArch64FrameOffsetStatus { |
638 | AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. |
639 | AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal. |
640 | AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. |
641 | }; |
642 | |
643 | /// Check if the @p Offset is a valid frame offset for @p MI. |
644 | /// The returned value reports the validity of the frame offset for @p MI. |
645 | /// It uses the values defined by AArch64FrameOffsetStatus for that. |
646 | /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to |
647 | /// use an offset.eq |
648 | /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be |
649 | /// rewritten in @p MI. |
650 | /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the |
651 | /// amount that is off the limit of the legal offset. |
652 | /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be |
653 | /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. |
654 | /// If set, @p EmittableOffset contains the amount that can be set in @p MI |
655 | /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that |
656 | /// is a legal offset. |
657 | int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, |
658 | bool *OutUseUnscaledOp = nullptr, |
659 | unsigned *OutUnscaledOp = nullptr, |
660 | int64_t *EmittableOffset = nullptr); |
661 | |
662 | static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } |
663 | |
664 | static inline bool isCondBranchOpcode(int Opc) { |
665 | switch (Opc) { |
666 | case AArch64::Bcc: |
667 | case AArch64::CBZW: |
668 | case AArch64::CBZX: |
669 | case AArch64::CBNZW: |
670 | case AArch64::CBNZX: |
671 | case AArch64::TBZW: |
672 | case AArch64::TBZX: |
673 | case AArch64::TBNZW: |
674 | case AArch64::TBNZX: |
675 | return true; |
676 | default: |
677 | return false; |
678 | } |
679 | } |
680 | |
681 | static inline bool isIndirectBranchOpcode(int Opc) { |
682 | switch (Opc) { |
683 | case AArch64::BR: |
684 | case AArch64::BRAA: |
685 | case AArch64::BRAB: |
686 | case AArch64::BRAAZ: |
687 | case AArch64::BRABZ: |
688 | return true; |
689 | } |
690 | return false; |
691 | } |
692 | |
693 | static inline bool isPTrueOpcode(unsigned Opc) { |
694 | switch (Opc) { |
695 | case AArch64::PTRUE_B: |
696 | case AArch64::PTRUE_H: |
697 | case AArch64::PTRUE_S: |
698 | case AArch64::PTRUE_D: |
699 | return true; |
700 | default: |
701 | return false; |
702 | } |
703 | } |
704 | |
705 | /// Return opcode to be used for indirect calls. |
706 | unsigned getBLRCallOpcode(const MachineFunction &MF); |
707 | |
708 | /// Return XPAC opcode to be used for a ptrauth strip using the given key. |
709 | static inline unsigned getXPACOpcodeForKey(AArch64PACKey::ID K) { |
710 | using namespace AArch64PACKey; |
711 | switch (K) { |
712 | case IA: case IB: return AArch64::XPACI; |
713 | case DA: case DB: return AArch64::XPACD; |
714 | } |
715 | llvm_unreachable("Unhandled AArch64PACKey::ID enum" ); |
716 | } |
717 | |
718 | /// Return AUT opcode to be used for a ptrauth auth using the given key, or its |
719 | /// AUT*Z variant that doesn't take a discriminator operand, using zero instead. |
720 | static inline unsigned getAUTOpcodeForKey(AArch64PACKey::ID K, bool Zero) { |
721 | using namespace AArch64PACKey; |
722 | switch (K) { |
723 | case IA: return Zero ? AArch64::AUTIZA : AArch64::AUTIA; |
724 | case IB: return Zero ? AArch64::AUTIZB : AArch64::AUTIB; |
725 | case DA: return Zero ? AArch64::AUTDZA : AArch64::AUTDA; |
726 | case DB: return Zero ? AArch64::AUTDZB : AArch64::AUTDB; |
727 | } |
728 | } |
729 | |
730 | /// Return PAC opcode to be used for a ptrauth sign using the given key, or its |
731 | /// PAC*Z variant that doesn't take a discriminator operand, using zero instead. |
732 | static inline unsigned getPACOpcodeForKey(AArch64PACKey::ID K, bool Zero) { |
733 | using namespace AArch64PACKey; |
734 | switch (K) { |
735 | case IA: return Zero ? AArch64::PACIZA : AArch64::PACIA; |
736 | case IB: return Zero ? AArch64::PACIZB : AArch64::PACIB; |
737 | case DA: return Zero ? AArch64::PACDZA : AArch64::PACDA; |
738 | case DB: return Zero ? AArch64::PACDZB : AArch64::PACDB; |
739 | } |
740 | } |
741 | |
742 | // struct TSFlags { |
743 | #define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits |
744 | #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits |
745 | #define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits |
746 | #define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits |
747 | #define TSFLAG_SME_MATRIX_TYPE(X) ((X) << 11) // 3-bits |
748 | // } |
749 | |
750 | namespace AArch64 { |
751 | |
752 | enum ElementSizeType { |
753 | ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7), |
754 | ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0), |
755 | ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1), |
756 | ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2), |
757 | ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3), |
758 | ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4), |
759 | }; |
760 | |
761 | enum DestructiveInstType { |
762 | DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf), |
763 | NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0), |
764 | DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), |
765 | DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2), |
766 | DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3), |
767 | DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4), |
768 | DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5), |
769 | DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6), |
770 | DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7), |
771 | DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8), |
772 | DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9), |
773 | }; |
774 | |
775 | enum FalseLaneType { |
776 | FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3), |
777 | FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1), |
778 | FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2), |
779 | }; |
780 | |
781 | // NOTE: This is a bit field. |
782 | static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1); |
783 | static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2); |
784 | |
785 | enum SMEMatrixType { |
786 | SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7), |
787 | SMEMatrixNone = TSFLAG_SME_MATRIX_TYPE(0x0), |
788 | SMEMatrixTileB = TSFLAG_SME_MATRIX_TYPE(0x1), |
789 | SMEMatrixTileH = TSFLAG_SME_MATRIX_TYPE(0x2), |
790 | SMEMatrixTileS = TSFLAG_SME_MATRIX_TYPE(0x3), |
791 | SMEMatrixTileD = TSFLAG_SME_MATRIX_TYPE(0x4), |
792 | SMEMatrixTileQ = TSFLAG_SME_MATRIX_TYPE(0x5), |
793 | SMEMatrixArray = TSFLAG_SME_MATRIX_TYPE(0x6), |
794 | }; |
795 | |
796 | #undef TSFLAG_ELEMENT_SIZE_TYPE |
797 | #undef TSFLAG_DESTRUCTIVE_INST_TYPE |
798 | #undef TSFLAG_FALSE_LANE_TYPE |
799 | #undef TSFLAG_INSTR_FLAGS |
800 | #undef TSFLAG_SME_MATRIX_TYPE |
801 | |
802 | int getSVEPseudoMap(uint16_t Opcode); |
803 | int getSVERevInstr(uint16_t Opcode); |
804 | int getSVENonRevInstr(uint16_t Opcode); |
805 | |
806 | int getSMEPseudoMap(uint16_t Opcode); |
807 | } |
808 | |
809 | } // end namespace llvm |
810 | |
811 | #endif |
812 | |