1//===- ARMISelLowering.h - ARM DAG Lowering Interface -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
15#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
16
17#include "MCTargetDesc/ARMBaseInfo.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/ISDOpcodes.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/SelectionDAGNodes.h"
24#include "llvm/CodeGen/TargetLowering.h"
25#include "llvm/CodeGen/ValueTypes.h"
26#include "llvm/CodeGenTypes/MachineValueType.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/CallingConv.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/InlineAsm.h"
31#include "llvm/Support/CodeGen.h"
32#include <optional>
33#include <utility>
34
35namespace llvm {
36
37class ARMSubtarget;
38class DataLayout;
39class FastISel;
40class FunctionLoweringInfo;
41class GlobalValue;
42class InstrItineraryData;
43class Instruction;
44class IRBuilderBase;
45class MachineBasicBlock;
46class MachineInstr;
47class SelectionDAG;
48class TargetLibraryInfo;
49class TargetMachine;
50class TargetRegisterInfo;
51class VectorType;
52
53 namespace ARMISD {
54
55 // ARM Specific DAG Nodes
56 enum NodeType : unsigned {
57 // Start the numbering where the builtin ops and target ops leave off.
58 FIRST_NUMBER = ISD::BUILTIN_OP_END,
59
60 Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
61 // TargetExternalSymbol, and TargetGlobalAddress.
62 WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
63 // PIC mode.
64 WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
65
66 // Add pseudo op to model memcpy for struct byval.
67 COPY_STRUCT_BYVAL,
68
69 CALL, // Function call.
70 CALL_PRED, // Function call that's predicable.
71 CALL_NOLINK, // Function call with branch not branch-and-link.
72 tSECALL, // CMSE non-secure function call.
73 t2CALL_BTI, // Thumb function call followed by BTI instruction.
74 BRCOND, // Conditional branch.
75 BR_JT, // Jumptable branch.
76 BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
77 RET_GLUE, // Return with a flag operand.
78 SERET_GLUE, // CMSE Entry function return with a flag operand.
79 INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand.
80
81 PIC_ADD, // Add with a PC operand and a PIC label.
82
83 ASRL, // MVE long arithmetic shift right.
84 LSRL, // MVE long shift right.
85 LSLL, // MVE long shift left.
86
87 CMP, // ARM compare instructions.
88 CMN, // ARM CMN instructions.
89 CMPZ, // ARM compare that sets only Z flag.
90 CMPFP, // ARM VFP compare instruction, sets FPSCR.
91 CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR.
92 CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
93 CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets
94 // FPSCR.
95 FMSTAT, // ARM fmstat instruction.
96
97 CMOV, // ARM conditional move instructions.
98 SUBS, // Flag-setting subtraction.
99
100 SSAT, // Signed saturation
101 USAT, // Unsigned saturation
102
103 BCC_i64,
104
105 SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
106 SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
107 RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
108
109 ADDC, // Add with carry
110 ADDE, // Add using carry
111 SUBC, // Sub with carry
112 SUBE, // Sub using carry
113 LSLS, // Shift left producing carry
114
115 VMOVRRD, // double to two gprs.
116 VMOVDRR, // Two gprs to double.
117 VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr
118
119 EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
120 EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
121 EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
122
123 TC_RETURN, // Tail call return pseudo.
124
125 THREAD_POINTER,
126
127 DYN_ALLOC, // Dynamic allocation on the stack.
128
129 MEMBARRIER_MCR, // Memory barrier (MCR)
130
131 PRELOAD, // Preload
132
133 WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
134 WIN__DBZCHK, // Windows' divide by zero check
135
136 WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart
137 WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup.
138 LOOP_DEC, // Really a part of LE, performs the sub
139 LE, // Low-overhead loops, Loop End
140
141 PREDICATE_CAST, // Predicate cast for MVE i1 types
142 VECTOR_REG_CAST, // Reinterpret the current contents of a vector register
143
144 MVESEXT, // Legalization aids for extending a vector into two/four vectors.
145 MVEZEXT, // or truncating two/four vectors into one. Eventually becomes
146 MVETRUNC, // stack store/load sequence, if not optimized to anything else.
147
148 VCMP, // Vector compare.
149 VCMPZ, // Vector compare to zero.
150 VTST, // Vector test bits.
151
152 // Vector shift by vector
153 VSHLs, // ...left/right by signed
154 VSHLu, // ...left/right by unsigned
155
156 // Vector shift by immediate:
157 VSHLIMM, // ...left
158 VSHRsIMM, // ...right (signed)
159 VSHRuIMM, // ...right (unsigned)
160
161 // Vector rounding shift by immediate:
162 VRSHRsIMM, // ...right (signed)
163 VRSHRuIMM, // ...right (unsigned)
164 VRSHRNIMM, // ...right narrow
165
166 // Vector saturating shift by immediate:
167 VQSHLsIMM, // ...left (signed)
168 VQSHLuIMM, // ...left (unsigned)
169 VQSHLsuIMM, // ...left (signed to unsigned)
170 VQSHRNsIMM, // ...right narrow (signed)
171 VQSHRNuIMM, // ...right narrow (unsigned)
172 VQSHRNsuIMM, // ...right narrow (signed to unsigned)
173
174 // Vector saturating rounding shift by immediate:
175 VQRSHRNsIMM, // ...right narrow (signed)
176 VQRSHRNuIMM, // ...right narrow (unsigned)
177 VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
178
179 // Vector shift and insert:
180 VSLIIMM, // ...left
181 VSRIIMM, // ...right
182
183 // Vector get lane (VMOV scalar to ARM core register)
184 // (These are used for 8- and 16-bit element types only.)
185 VGETLANEu, // zero-extend vector extract element
186 VGETLANEs, // sign-extend vector extract element
187
188 // Vector move immediate and move negated immediate:
189 VMOVIMM,
190 VMVNIMM,
191
192 // Vector move f32 immediate:
193 VMOVFPIMM,
194
195 // Move H <-> R, clearing top 16 bits
196 VMOVrh,
197 VMOVhr,
198
199 // Vector duplicate:
200 VDUP,
201 VDUPLANE,
202
203 // Vector shuffles:
204 VEXT, // extract
205 VREV64, // reverse elements within 64-bit doublewords
206 VREV32, // reverse elements within 32-bit words
207 VREV16, // reverse elements within 16-bit halfwords
208 VZIP, // zip (interleave)
209 VUZP, // unzip (deinterleave)
210 VTRN, // transpose
211 VTBL1, // 1-register shuffle with mask
212 VTBL2, // 2-register shuffle with mask
213 VMOVN, // MVE vmovn
214
215 // MVE Saturating truncates
216 VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
217 VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
218
219 // MVE float <> half converts
220 VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top
221 // lanes
222 VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
223
224 // MVE VIDUP instruction, taking a start value and increment.
225 VIDUP,
226
227 // Vector multiply long:
228 VMULLs, // ...signed
229 VMULLu, // ...unsigned
230
231 VQDMULH, // MVE vqdmulh instruction
232
233 // MVE reductions
234 VADDVs, // sign- or zero-extend the elements of a vector to i32,
235 VADDVu, // add them all together, and return an i32 of their sum
236 VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
237 VADDVpu,
238 VADDLVs, // sign- or zero-extend elements to i64 and sum, returning
239 VADDLVu, // the low and high 32-bit halves of the sum
240 VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
241 VADDLVAu, // provided as low and high halves
242 VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
243 VADDLVpu,
244 VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
245 VADDLVApu,
246 VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply
247 VMLAVu, // them and add the results together, returning an i32 of their sum
248 VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
249 VMLAVpu,
250 VMLALVs, // Same as VMLAV but with i64, returning the low and
251 VMLALVu, // high 32-bit halves of the sum
252 VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
253 VMLALVpu,
254 VMLALVAs, // Same as VMLALV but also add an input accumulator
255 VMLALVAu, // provided as low and high halves
256 VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
257 VMLALVApu,
258 VMINVu, // Find minimum unsigned value of a vector and register
259 VMINVs, // Find minimum signed value of a vector and register
260 VMAXVu, // Find maximum unsigned value of a vector and register
261 VMAXVs, // Find maximum signed value of a vector and register
262
263 SMULWB, // Signed multiply word by half word, bottom
264 SMULWT, // Signed multiply word by half word, top
265 UMLAL, // 64bit Unsigned Accumulate Multiply
266 SMLAL, // 64bit Signed Accumulate Multiply
267 UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
268 SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
269 SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
270 SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
271 SMLALTT, // 64-bit signed accumulate multiply top, top 16
272 SMLALD, // Signed multiply accumulate long dual
273 SMLALDX, // Signed multiply accumulate long dual exchange
274 SMLSLD, // Signed multiply subtract long dual
275 SMLSLDX, // Signed multiply subtract long dual exchange
276 SMMLAR, // Signed multiply long, round and add
277 SMMLSR, // Signed multiply long, subtract and round
278
279 // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b
280 // stands for.
281 QADD8b,
282 QSUB8b,
283 QADD16b,
284 QSUB16b,
285 UQADD8b,
286 UQSUB8b,
287 UQADD16b,
288 UQSUB16b,
289
290 // Operands of the standard BUILD_VECTOR node are not legalized, which
291 // is fine if BUILD_VECTORs are always lowered to shuffles or other
292 // operations, but for ARM some BUILD_VECTORs are legal as-is and their
293 // operands need to be legalized. Define an ARM-specific version of
294 // BUILD_VECTOR for this purpose.
295 BUILD_VECTOR,
296
297 // Bit-field insert
298 BFI,
299
300 // Vector OR with immediate
301 VORRIMM,
302 // Vector AND with NOT of immediate
303 VBICIMM,
304
305 // Pseudo vector bitwise select
306 VBSP,
307
308 // Pseudo-instruction representing a memory copy using ldm/stm
309 // instructions.
310 MEMCPY,
311
312 // Pseudo-instruction representing a memory copy using a tail predicated
313 // loop
314 MEMCPYLOOP,
315 // Pseudo-instruction representing a memset using a tail predicated
316 // loop
317 MEMSETLOOP,
318
319 // V8.1MMainline condition select
320 CSINV, // Conditional select invert.
321 CSNEG, // Conditional select negate.
322 CSINC, // Conditional select increment.
323
324 // Vector load N-element structure to all lanes:
325 VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
326 VLD2DUP,
327 VLD3DUP,
328 VLD4DUP,
329
330 // NEON loads with post-increment base updates:
331 VLD1_UPD,
332 VLD2_UPD,
333 VLD3_UPD,
334 VLD4_UPD,
335 VLD2LN_UPD,
336 VLD3LN_UPD,
337 VLD4LN_UPD,
338 VLD1DUP_UPD,
339 VLD2DUP_UPD,
340 VLD3DUP_UPD,
341 VLD4DUP_UPD,
342 VLD1x2_UPD,
343 VLD1x3_UPD,
344 VLD1x4_UPD,
345
346 // NEON stores with post-increment base updates:
347 VST1_UPD,
348 VST2_UPD,
349 VST3_UPD,
350 VST4_UPD,
351 VST2LN_UPD,
352 VST3LN_UPD,
353 VST4LN_UPD,
354 VST1x2_UPD,
355 VST1x3_UPD,
356 VST1x4_UPD,
357
358 // Load/Store of dual registers
359 LDRD,
360 STRD
361 };
362
363 } // end namespace ARMISD
364
365 namespace ARM {
366 /// Possible values of current rounding mode, which is specified in bits
367 /// 23:22 of FPSCR.
368 enum Rounding {
369 RN = 0, // Round to Nearest
370 RP = 1, // Round towards Plus infinity
371 RM = 2, // Round towards Minus infinity
372 RZ = 3, // Round towards Zero
373 rmMask = 3 // Bit mask selecting rounding mode
374 };
375
376 // Bit position of rounding mode bits in FPSCR.
377 const unsigned RoundingBitsPos = 22;
378
379 // Bits of floating-point status. These are NZCV flags, QC bit and cumulative
380 // FP exception bits.
381 const unsigned FPStatusBits = 0xf800009f;
382
383 // Some bits in the FPSCR are not yet defined. They must be preserved when
384 // modifying the contents.
385 const unsigned FPReservedBits = 0x00006060;
386 } // namespace ARM
387
388 /// Define some predicates that are used for node matching.
389 namespace ARM {
390
391 bool isBitFieldInvertedMask(unsigned v);
392
393 } // end namespace ARM
394
395 //===--------------------------------------------------------------------===//
396 // ARMTargetLowering - ARM Implementation of the TargetLowering interface
397
398 class ARMTargetLowering : public TargetLowering {
399 public:
400 explicit ARMTargetLowering(const TargetMachine &TM,
401 const ARMSubtarget &STI);
402
403 unsigned getJumpTableEncoding() const override;
404 bool useSoftFloat() const override;
405
406 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
407
408 /// ReplaceNodeResults - Replace the results of node with an illegal result
409 /// type with new values built out of custom code.
410 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
411 SelectionDAG &DAG) const override;
412
413 const char *getTargetNodeName(unsigned Opcode) const override;
414
415 bool isSelectSupported(SelectSupportKind Kind) const override {
416 // ARM does not support scalar condition selects on vectors.
417 return (Kind != ScalarCondVectorVal);
418 }
419
420 bool isReadOnly(const GlobalValue *GV) const;
421
422 /// getSetCCResultType - Return the value type to use for ISD::SETCC.
423 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
424 EVT VT) const override;
425
426 MachineBasicBlock *
427 EmitInstrWithCustomInserter(MachineInstr &MI,
428 MachineBasicBlock *MBB) const override;
429
430 void AdjustInstrPostInstrSelection(MachineInstr &MI,
431 SDNode *Node) const override;
432
433 SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
434 SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
435 SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
436 SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const;
437 SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
438 SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const;
439 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
440
441 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
442 const APInt &OriginalDemandedBits,
443 const APInt &OriginalDemandedElts,
444 KnownBits &Known,
445 TargetLoweringOpt &TLO,
446 unsigned Depth) const override;
447
448 bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
449
450 /// allowsMisalignedMemoryAccesses - Returns true if the target allows
451 /// unaligned memory accesses of the specified type. Returns whether it
452 /// is "fast" by reference in the second argument.
453 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
454 Align Alignment,
455 MachineMemOperand::Flags Flags,
456 unsigned *Fast) const override;
457
458 EVT getOptimalMemOpType(const MemOp &Op,
459 const AttributeList &FuncAttributes) const override;
460
461 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
462 bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
463 bool isZExtFree(SDValue Val, EVT VT2) const override;
464 bool shouldSinkOperands(Instruction *I,
465 SmallVectorImpl<Use *> &Ops) const override;
466 Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const override;
467
468 bool isFNegFree(EVT VT) const override;
469
470 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
471
472 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
473
474
475 /// isLegalAddressingMode - Return true if the addressing mode represented
476 /// by AM is legal for this target, for a load/store of the specified type.
477 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
478 Type *Ty, unsigned AS,
479 Instruction *I = nullptr) const override;
480
481 bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
482
483 /// Returns true if the addressing mode representing by AM is legal
484 /// for the Thumb1 target, for a load/store of the specified type.
485 bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
486
487 /// isLegalICmpImmediate - Return true if the specified immediate is legal
488 /// icmp immediate, that is the target has icmp instructions which can
489 /// compare a register against the immediate without having to materialize
490 /// the immediate into a register.
491 bool isLegalICmpImmediate(int64_t Imm) const override;
492
493 /// isLegalAddImmediate - Return true if the specified immediate is legal
494 /// add immediate, that is the target has add instructions which can
495 /// add a register and the immediate without having to materialize
496 /// the immediate into a register.
497 bool isLegalAddImmediate(int64_t Imm) const override;
498
499 /// getPreIndexedAddressParts - returns true by value, base pointer and
500 /// offset pointer and addressing mode by reference if the node's address
501 /// can be legally represented as pre-indexed load / store address.
502 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
503 ISD::MemIndexedMode &AM,
504 SelectionDAG &DAG) const override;
505
506 /// getPostIndexedAddressParts - returns true by value, base pointer and
507 /// offset pointer and addressing mode by reference if this node can be
508 /// combined with a load / store to form a post-indexed load / store.
509 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
510 SDValue &Offset, ISD::MemIndexedMode &AM,
511 SelectionDAG &DAG) const override;
512
513 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
514 const APInt &DemandedElts,
515 const SelectionDAG &DAG,
516 unsigned Depth) const override;
517
518 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
519 const APInt &DemandedElts,
520 TargetLoweringOpt &TLO) const override;
521
522 bool ExpandInlineAsm(CallInst *CI) const override;
523
524 ConstraintType getConstraintType(StringRef Constraint) const override;
525
526 /// Examine constraint string and operand type and determine a weight value.
527 /// The operand object must already have been set up with the operand type.
528 ConstraintWeight getSingleConstraintMatchWeight(
529 AsmOperandInfo &info, const char *constraint) const override;
530
531 std::pair<unsigned, const TargetRegisterClass *>
532 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
533 StringRef Constraint, MVT VT) const override;
534
535 const char *LowerXConstraint(EVT ConstraintVT) const override;
536
537 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
538 /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
539 /// true it means one of the asm constraint of the inline asm instruction
540 /// being processed is 'm'.
541 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
542 std::vector<SDValue> &Ops,
543 SelectionDAG &DAG) const override;
544
545 InlineAsm::ConstraintCode
546 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
547 if (ConstraintCode == "Q")
548 return InlineAsm::ConstraintCode::Q;
549 if (ConstraintCode.size() == 2) {
550 if (ConstraintCode[0] == 'U') {
551 switch(ConstraintCode[1]) {
552 default:
553 break;
554 case 'm':
555 return InlineAsm::ConstraintCode::Um;
556 case 'n':
557 return InlineAsm::ConstraintCode::Un;
558 case 'q':
559 return InlineAsm::ConstraintCode::Uq;
560 case 's':
561 return InlineAsm::ConstraintCode::Us;
562 case 't':
563 return InlineAsm::ConstraintCode::Ut;
564 case 'v':
565 return InlineAsm::ConstraintCode::Uv;
566 case 'y':
567 return InlineAsm::ConstraintCode::Uy;
568 }
569 }
570 }
571 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
572 }
573
574 const ARMSubtarget* getSubtarget() const {
575 return Subtarget;
576 }
577
578 /// getRegClassFor - Return the register class that should be used for the
579 /// specified value type.
580 const TargetRegisterClass *
581 getRegClassFor(MVT VT, bool isDivergent = false) const override;
582
583 bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
584 Align &PrefAlign) const override;
585
586 /// createFastISel - This method returns a target specific FastISel object,
587 /// or null if the target does not support "fast" ISel.
588 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
589 const TargetLibraryInfo *libInfo) const override;
590
591 Sched::Preference getSchedulingPreference(SDNode *N) const override;
592
593 bool preferZeroCompareBranch() const override { return true; }
594
595 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
596
597 bool
598 isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
599 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
600
601 /// isFPImmLegal - Returns true if the target can instruction select the
602 /// specified FP immediate natively. If false, the legalizer will
603 /// materialize the FP immediate as a load from a constant pool.
604 bool isFPImmLegal(const APFloat &Imm, EVT VT,
605 bool ForCodeSize = false) const override;
606
607 bool getTgtMemIntrinsic(IntrinsicInfo &Info,
608 const CallInst &I,
609 MachineFunction &MF,
610 unsigned Intrinsic) const override;
611
612 /// Returns true if it is beneficial to convert a load of a constant
613 /// to just the constant itself.
614 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
615 Type *Ty) const override;
616
617 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
618 /// with this index.
619 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
620 unsigned Index) const override;
621
622 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
623 bool MathUsed) const override {
624 // Using overflow ops for overflow checks only should beneficial on ARM.
625 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true);
626 }
627
628 bool shouldReassociateReduction(unsigned Opc, EVT VT) const override {
629 return Opc != ISD::VECREDUCE_ADD;
630 }
631
632 /// Returns true if an argument of type Ty needs to be passed in a
633 /// contiguous block of registers in calling convention CallConv.
634 bool functionArgumentNeedsConsecutiveRegisters(
635 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
636 const DataLayout &DL) const override;
637
638 /// If a physical register, this returns the register that receives the
639 /// exception address on entry to an EH pad.
640 Register
641 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
642
643 /// If a physical register, this returns the register that receives the
644 /// exception typeid on entry to a landing pad.
645 Register
646 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
647
648 Instruction *makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const;
649 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
650 AtomicOrdering Ord) const override;
651 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
652 AtomicOrdering Ord) const override;
653
654 void
655 emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
656
657 Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
658 AtomicOrdering Ord) const override;
659 Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
660 AtomicOrdering Ord) const override;
661
662 unsigned getMaxSupportedInterleaveFactor() const override;
663
664 bool lowerInterleavedLoad(LoadInst *LI,
665 ArrayRef<ShuffleVectorInst *> Shuffles,
666 ArrayRef<unsigned> Indices,
667 unsigned Factor) const override;
668 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
669 unsigned Factor) const override;
670
671 bool shouldInsertFencesForAtomic(const Instruction *I) const override;
672 TargetLoweringBase::AtomicExpansionKind
673 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
674 TargetLoweringBase::AtomicExpansionKind
675 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
676 TargetLoweringBase::AtomicExpansionKind
677 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
678 TargetLoweringBase::AtomicExpansionKind
679 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
680
681 bool useLoadStackGuardNode() const override;
682
683 void insertSSPDeclarations(Module &M) const override;
684 Value *getSDagStackGuard(const Module &M) const override;
685 Function *getSSPStackGuardCheck(const Module &M) const override;
686
687 bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
688 unsigned &Cost) const override;
689
690 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
691 const MachineFunction &MF) const override {
692 // Do not merge to larger than i32.
693 return (MemVT.getSizeInBits() <= 32);
694 }
695
696 bool isCheapToSpeculateCttz(Type *Ty) const override;
697 bool isCheapToSpeculateCtlz(Type *Ty) const override;
698
699 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
700 return VT.isScalarInteger();
701 }
702
703 bool supportSwiftError() const override {
704 return true;
705 }
706
707 bool hasStandaloneRem(EVT VT) const override {
708 return HasStandaloneRem;
709 }
710
711 ShiftLegalizationStrategy
712 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
713 unsigned ExpansionFactor) const override;
714
715 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
716 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
717
718 /// Returns true if \p VecTy is a legal interleaved access type. This
719 /// function checks the vector element type and the overall width of the
720 /// vector.
721 bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy,
722 Align Alignment,
723 const DataLayout &DL) const;
724
725 bool isMulAddWithConstProfitable(SDValue AddNode,
726 SDValue ConstNode) const override;
727
728 bool alignLoopsWithOptSize() const override;
729
730 /// Returns the number of interleaved accesses that will be generated when
731 /// lowering accesses of the given type.
732 unsigned getNumInterleavedAccesses(VectorType *VecTy,
733 const DataLayout &DL) const;
734
735 void finalizeLowering(MachineFunction &MF) const override;
736
737 /// Return the correct alignment for the current calling convention.
738 Align getABIAlignmentForCallingConv(Type *ArgTy,
739 const DataLayout &DL) const override;
740
741 bool isDesirableToCommuteWithShift(const SDNode *N,
742 CombineLevel Level) const override;
743
744 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
745
746 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
747 CombineLevel Level) const override;
748
749 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
750 EVT VT) const override;
751
752 bool preferIncOfAddToSubOfNot(EVT VT) const override;
753
754 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
755
756 bool isComplexDeinterleavingSupported() const override;
757 bool isComplexDeinterleavingOperationSupported(
758 ComplexDeinterleavingOperation Operation, Type *Ty) const override;
759
760 Value *createComplexDeinterleavingIR(
761 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
762 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
763 Value *Accumulator = nullptr) const override;
764
765 bool softPromoteHalfType() const override { return true; }
766
767 bool useFPRegsForHalfType() const override { return true; }
768
769 protected:
770 std::pair<const TargetRegisterClass *, uint8_t>
771 findRepresentativeClass(const TargetRegisterInfo *TRI,
772 MVT VT) const override;
773
774 private:
775 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
776 /// make the right decision when generating code for different targets.
777 const ARMSubtarget *Subtarget;
778
779 const TargetRegisterInfo *RegInfo;
780
781 const InstrItineraryData *Itins;
782
783 // TODO: remove this, and have shouldInsertFencesForAtomic do the proper
784 // check.
785 bool InsertFencesForAtomic;
786
787 bool HasStandaloneRem = true;
788
789 void addTypeForNEON(MVT VT, MVT PromotedLdStVT);
790 void addDRTypeForNEON(MVT VT);
791 void addQRTypeForNEON(MVT VT);
792 std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
793
794 using RegsToPassVector = SmallVector<std::pair<unsigned, SDValue>, 8>;
795
796 void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain,
797 SDValue &Arg, RegsToPassVector &RegsToPass,
798 CCValAssign &VA, CCValAssign &NextVA,
799 SDValue &StackPtr,
800 SmallVectorImpl<SDValue> &MemOpChains,
801 bool IsTailCall,
802 int SPDiff) const;
803 SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
804 SDValue &Root, SelectionDAG &DAG,
805 const SDLoc &dl) const;
806
807 CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
808 bool isVarArg) const;
809 CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
810 bool isVarArg) const;
811 std::pair<SDValue, MachinePointerInfo>
812 computeAddrForCallArg(const SDLoc &dl, SelectionDAG &DAG,
813 const CCValAssign &VA, SDValue StackPtr,
814 bool IsTailCall, int SPDiff) const;
815 SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
816 SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
817 SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
818 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
819 const ARMSubtarget *Subtarget) const;
820 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
821 const ARMSubtarget *Subtarget) const;
822 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
823 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
824 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
825 SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
826 SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
827 SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
828 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
829 SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
830 SelectionDAG &DAG) const;
831 SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
832 SelectionDAG &DAG,
833 TLSModel::Model model) const;
834 SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
835 SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
836 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
837 SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
838 SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
839 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
840 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
841 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
842 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
843 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
844 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
845 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
846 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
847 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
848 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
849 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
850 SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
851 SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
852 SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
853 const ARMSubtarget *ST) const;
854 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
855 const ARMSubtarget *ST) const;
856 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
857 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
858 SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
859 SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
860 void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
861 SmallVectorImpl<SDValue> &Results) const;
862 SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
863 const ARMSubtarget *Subtarget) const;
864 SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
865 SDValue &Chain) const;
866 SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
867 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
868 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
869 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
870 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
871 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
872 SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
873 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
874 void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
875 SelectionDAG &DAG) const;
876
877 Register getRegisterByName(const char* RegName, LLT VT,
878 const MachineFunction &MF) const override;
879
880 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
881 SmallVectorImpl<SDNode *> &Created) const override;
882
883 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
884 EVT VT) const override;
885
886 SDValue MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT, MVT ValVT,
887 SDValue Val) const;
888 SDValue MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT,
889 MVT ValVT, SDValue Val) const;
890
891 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
892
893 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
894 CallingConv::ID CallConv, bool isVarArg,
895 const SmallVectorImpl<ISD::InputArg> &Ins,
896 const SDLoc &dl, SelectionDAG &DAG,
897 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
898 SDValue ThisVal) const;
899
900 bool supportSplitCSR(MachineFunction *MF) const override {
901 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
902 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
903 }
904
905 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
906 void insertCopiesSplitCSR(
907 MachineBasicBlock *Entry,
908 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
909
910 bool splitValueIntoRegisterParts(
911 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
912 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
913 const override;
914
915 SDValue joinRegisterPartsIntoValue(
916 SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
917 unsigned NumParts, MVT PartVT, EVT ValueVT,
918 std::optional<CallingConv::ID> CC) const override;
919
920 SDValue
921 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
922 const SmallVectorImpl<ISD::InputArg> &Ins,
923 const SDLoc &dl, SelectionDAG &DAG,
924 SmallVectorImpl<SDValue> &InVals) const override;
925
926 int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl,
927 SDValue &Chain, const Value *OrigArg,
928 unsigned InRegsParamRecordIdx, int ArgOffset,
929 unsigned ArgSize) const;
930
931 void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
932 const SDLoc &dl, SDValue &Chain,
933 unsigned ArgOffset, unsigned TotalArgRegsSaveSize,
934 bool ForceMutable = false) const;
935
936 SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
937 SmallVectorImpl<SDValue> &InVals) const override;
938
939 /// HandleByVal - Target-specific cleanup for ByVal support.
940 void HandleByVal(CCState *, unsigned &, Align) const override;
941
942 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
943 /// for tail call optimization. Targets which want to do tail call
944 /// optimization should implement this function.
945 bool IsEligibleForTailCallOptimization(
946 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
947 bool isCalleeStructRet, bool isCallerStructRet,
948 const SmallVectorImpl<ISD::OutputArg> &Outs,
949 const SmallVectorImpl<SDValue> &OutVals,
950 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
951 const bool isIndirect) const;
952
953 bool CanLowerReturn(CallingConv::ID CallConv,
954 MachineFunction &MF, bool isVarArg,
955 const SmallVectorImpl<ISD::OutputArg> &Outs,
956 LLVMContext &Context) const override;
957
958 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
959 const SmallVectorImpl<ISD::OutputArg> &Outs,
960 const SmallVectorImpl<SDValue> &OutVals,
961 const SDLoc &dl, SelectionDAG &DAG) const override;
962
963 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
964
965 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
966
967 bool shouldConsiderGEPOffsetSplit() const override { return true; }
968
969 bool isUnsupportedFloatingType(EVT VT) const;
970
971 SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
972 SDValue ARMcc, SDValue CCR, SDValue Cmp,
973 SelectionDAG &DAG) const;
974 SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
975 SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
976 SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
977 const SDLoc &dl, bool Signaling = false) const;
978 SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
979
980 SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
981
982 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
983 MachineBasicBlock *DispatchBB, int FI) const;
984
985 void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const;
986
987 MachineBasicBlock *EmitStructByval(MachineInstr &MI,
988 MachineBasicBlock *MBB) const;
989
990 MachineBasicBlock *EmitLowered__chkstk(MachineInstr &MI,
991 MachineBasicBlock *MBB) const;
992 MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
993 MachineBasicBlock *MBB) const;
994 void addMVEVectorTypes(bool HasMVEFP);
995 void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
996 void setAllExpand(MVT VT);
997 };
998
999 enum VMOVModImmType {
1000 VMOVModImm,
1001 VMVNModImm,
1002 MVEVMVNModImm,
1003 OtherModImm
1004 };
1005
1006 namespace ARM {
1007
1008 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1009 const TargetLibraryInfo *libInfo);
1010
1011 } // end namespace ARM
1012
1013} // end namespace llvm
1014
1015#endif // LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
1016

source code of llvm/lib/Target/ARM/ARMISelLowering.h