ARMISelLowering.h source code [llvm/lib/Target/ARM/ARMISelLowering.h]

1	//===- ARMISelLowering.h - ARM DAG Lowering Interface ------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that ARM uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
15	#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
16
17	#include "MCTargetDesc/ARMBaseInfo.h"
18	#include "llvm/ADT/SmallVector.h"
19	#include "llvm/ADT/StringRef.h"
20	#include "llvm/CodeGen/CallingConvLower.h"
21	#include "llvm/CodeGen/ISDOpcodes.h"
22	#include "llvm/CodeGen/MachineFunction.h"
23	#include "llvm/CodeGen/SelectionDAGNodes.h"
24	#include "llvm/CodeGen/TargetLowering.h"
25	#include "llvm/CodeGen/ValueTypes.h"
26	#include "llvm/CodeGenTypes/MachineValueType.h"
27	#include "llvm/IR/Attributes.h"
28	#include "llvm/IR/CallingConv.h"
29	#include "llvm/IR/Function.h"
30	#include "llvm/IR/InlineAsm.h"
31	#include "llvm/Support/CodeGen.h"
32	#include <optional>
33	#include <utility>
34
35	namespace llvm {
36
37	class ARMSubtarget;
38	class DataLayout;
39	class FastISel;
40	class FunctionLoweringInfo;
41	class GlobalValue;
42	class InstrItineraryData;
43	class Instruction;
44	class IRBuilderBase;
45	class MachineBasicBlock;
46	class MachineInstr;
47	class SelectionDAG;
48	class TargetLibraryInfo;
49	class TargetMachine;
50	class TargetRegisterInfo;
51	class VectorType;
52
53	namespace ARMISD {
54
55	// ARM Specific DAG Nodes
56	enum NodeType : unsigned {
57	// Start the numbering where the builtin ops and target ops leave off.
58	FIRST_NUMBER = ISD::BUILTIN_OP_END,
59
60	Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
61	// TargetExternalSymbol, and TargetGlobalAddress.
62	WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
63	// PIC mode.
64	WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
65
66	// Add pseudo op to model memcpy for struct byval.
67	COPY_STRUCT_BYVAL,
68
69	CALL, // Function call.
70	CALL_PRED, // Function call that's predicable.
71	CALL_NOLINK, // Function call with branch not branch-and-link.
72	tSECALL, // CMSE non-secure function call.
73	t2CALL_BTI, // Thumb function call followed by BTI instruction.
74	BRCOND, // Conditional branch.
75	BR_JT, // Jumptable branch.
76	BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
77	RET_GLUE, // Return with a flag operand.
78	SERET_GLUE, // CMSE Entry function return with a flag operand.
79	INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand.
80
81	PIC_ADD, // Add with a PC operand and a PIC label.
82
83	ASRL, // MVE long arithmetic shift right.
84	LSRL, // MVE long shift right.
85	LSLL, // MVE long shift left.
86
87	CMP, // ARM compare instructions.
88	CMN, // ARM CMN instructions.
89	CMPZ, // ARM compare that sets only Z flag.
90	CMPFP, // ARM VFP compare instruction, sets FPSCR.
91	CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR.
92	CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
93	CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets
94	// FPSCR.
95	FMSTAT, // ARM fmstat instruction.
96
97	CMOV, // ARM conditional move instructions.
98	SUBS, // Flag-setting subtraction.
99
100	SSAT, // Signed saturation
101	USAT, // Unsigned saturation
102
103	BCC_i64,
104
105	SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
106	SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
107	RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
108
109	ADDC, // Add with carry
110	ADDE, // Add using carry
111	SUBC, // Sub with carry
112	SUBE, // Sub using carry
113	LSLS, // Shift left producing carry
114
115	VMOVRRD, // double to two gprs.
116	VMOVDRR, // Two gprs to double.
117	VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr
118
119	EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
120	EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
121	EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
122
123	TC_RETURN, // Tail call return pseudo.
124
125	THREAD_POINTER,
126
127	DYN_ALLOC, // Dynamic allocation on the stack.
128
129	MEMBARRIER_MCR, // Memory barrier (MCR)
130
131	PRELOAD, // Preload
132
133	WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
134	WIN__DBZCHK, // Windows' divide by zero check
135
136	WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart
137	WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup.
138	LOOP_DEC, // Really a part of LE, performs the sub
139	LE, // Low-overhead loops, Loop End
140
141	PREDICATE_CAST, // Predicate cast for MVE i1 types
142	VECTOR_REG_CAST, // Reinterpret the current contents of a vector register
143
144	MVESEXT, // Legalization aids for extending a vector into two/four vectors.
145	MVEZEXT, // or truncating two/four vectors into one. Eventually becomes
146	MVETRUNC, // stack store/load sequence, if not optimized to anything else.
147
148	VCMP, // Vector compare.
149	VCMPZ, // Vector compare to zero.
150	VTST, // Vector test bits.
151
152	// Vector shift by vector
153	VSHLs, // ...left/right by signed
154	VSHLu, // ...left/right by unsigned
155
156	// Vector shift by immediate:
157	VSHLIMM, // ...left
158	VSHRsIMM, // ...right (signed)
159	VSHRuIMM, // ...right (unsigned)
160
161	// Vector rounding shift by immediate:
162	VRSHRsIMM, // ...right (signed)
163	VRSHRuIMM, // ...right (unsigned)
164	VRSHRNIMM, // ...right narrow
165
166	// Vector saturating shift by immediate:
167	VQSHLsIMM, // ...left (signed)
168	VQSHLuIMM, // ...left (unsigned)
169	VQSHLsuIMM, // ...left (signed to unsigned)
170	VQSHRNsIMM, // ...right narrow (signed)
171	VQSHRNuIMM, // ...right narrow (unsigned)
172	VQSHRNsuIMM, // ...right narrow (signed to unsigned)
173
174	// Vector saturating rounding shift by immediate:
175	VQRSHRNsIMM, // ...right narrow (signed)
176	VQRSHRNuIMM, // ...right narrow (unsigned)
177	VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
178
179	// Vector shift and insert:
180	VSLIIMM, // ...left
181	VSRIIMM, // ...right
182
183	// Vector get lane (VMOV scalar to ARM core register)
184	// (These are used for 8- and 16-bit element types only.)
185	VGETLANEu, // zero-extend vector extract element
186	VGETLANEs, // sign-extend vector extract element
187
188	// Vector move immediate and move negated immediate:
189	VMOVIMM,
190	VMVNIMM,
191
192	// Vector move f32 immediate:
193	VMOVFPIMM,
194
195	// Move H <-> R, clearing top 16 bits
196	VMOVrh,
197	VMOVhr,
198
199	// Vector duplicate:
200	VDUP,
201	VDUPLANE,
202
203	// Vector shuffles:
204	VEXT, // extract
205	VREV64, // reverse elements within 64-bit doublewords
206	VREV32, // reverse elements within 32-bit words
207	VREV16, // reverse elements within 16-bit halfwords
208	VZIP, // zip (interleave)
209	VUZP, // unzip (deinterleave)
210	VTRN, // transpose
211	VTBL1, // 1-register shuffle with mask
212	VTBL2, // 2-register shuffle with mask
213	VMOVN, // MVE vmovn
214
215	// MVE Saturating truncates
216	VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
217	VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
218
219	// MVE float <> half converts
220	VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top
221	// lanes
222	VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
223
224	// MVE VIDUP instruction, taking a start value and increment.
225	VIDUP,
226
227	// Vector multiply long:
228	VMULLs, // ...signed
229	VMULLu, // ...unsigned
230
231	VQDMULH, // MVE vqdmulh instruction
232
233	// MVE reductions
234	VADDVs, // sign- or zero-extend the elements of a vector to i32,
235	VADDVu, // add them all together, and return an i32 of their sum
236	VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
237	VADDVpu,
238	VADDLVs, // sign- or zero-extend elements to i64 and sum, returning
239	VADDLVu, // the low and high 32-bit halves of the sum
240	VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
241	VADDLVAu, // provided as low and high halves
242	VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
243	VADDLVpu,
244	VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
245	VADDLVApu,
246	VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply
247	VMLAVu, // them and add the results together, returning an i32 of their sum
248	VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
249	VMLAVpu,
250	VMLALVs, // Same as VMLAV but with i64, returning the low and
251	VMLALVu, // high 32-bit halves of the sum
252	VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
253	VMLALVpu,
254	VMLALVAs, // Same as VMLALV but also add an input accumulator
255	VMLALVAu, // provided as low and high halves
256	VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
257	VMLALVApu,
258	VMINVu, // Find minimum unsigned value of a vector and register
259	VMINVs, // Find minimum signed value of a vector and register
260	VMAXVu, // Find maximum unsigned value of a vector and register
261	VMAXVs, // Find maximum signed value of a vector and register
262
263	SMULWB, // Signed multiply word by half word, bottom
264	SMULWT, // Signed multiply word by half word, top
265	UMLAL, // 64bit Unsigned Accumulate Multiply
266	SMLAL, // 64bit Signed Accumulate Multiply
267	UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
268	SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
269	SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
270	SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
271	SMLALTT, // 64-bit signed accumulate multiply top, top 16
272	SMLALD, // Signed multiply accumulate long dual
273	SMLALDX, // Signed multiply accumulate long dual exchange
274	SMLSLD, // Signed multiply subtract long dual
275	SMLSLDX, // Signed multiply subtract long dual exchange
276	SMMLAR, // Signed multiply long, round and add
277	SMMLSR, // Signed multiply long, subtract and round
278
279	// Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b
280	// stands for.
281	QADD8b,
282	QSUB8b,
283	QADD16b,
284	QSUB16b,
285	UQADD8b,
286	UQSUB8b,
287	UQADD16b,
288	UQSUB16b,
289
290	// Operands of the standard BUILD_VECTOR node are not legalized, which
291	// is fine if BUILD_VECTORs are always lowered to shuffles or other
292	// operations, but for ARM some BUILD_VECTORs are legal as-is and their
293	// operands need to be legalized. Define an ARM-specific version of
294	// BUILD_VECTOR for this purpose.
295	BUILD_VECTOR,
296
297	// Bit-field insert
298	BFI,
299
300	// Vector OR with immediate
301	VORRIMM,
302	// Vector AND with NOT of immediate
303	VBICIMM,
304
305	// Pseudo vector bitwise select
306	VBSP,
307
308	// Pseudo-instruction representing a memory copy using ldm/stm
309	// instructions.
310	MEMCPY,
311
312	// Pseudo-instruction representing a memory copy using a tail predicated
313	// loop
314	MEMCPYLOOP,
315	// Pseudo-instruction representing a memset using a tail predicated
316	// loop
317	MEMSETLOOP,
318
319	// V8.1MMainline condition select
320	CSINV, // Conditional select invert.
321	CSNEG, // Conditional select negate.
322	CSINC, // Conditional select increment.
323
324	// Vector load N-element structure to all lanes:
325	VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
326	VLD2DUP,
327	VLD3DUP,
328	VLD4DUP,
329
330	// NEON loads with post-increment base updates:
331	VLD1_UPD,
332	VLD2_UPD,
333	VLD3_UPD,
334	VLD4_UPD,
335	VLD2LN_UPD,
336	VLD3LN_UPD,
337	VLD4LN_UPD,
338	VLD1DUP_UPD,
339	VLD2DUP_UPD,
340	VLD3DUP_UPD,
341	VLD4DUP_UPD,
342	VLD1x2_UPD,
343	VLD1x3_UPD,
344	VLD1x4_UPD,
345
346	// NEON stores with post-increment base updates:
347	VST1_UPD,
348	VST2_UPD,
349	VST3_UPD,
350	VST4_UPD,
351	VST2LN_UPD,
352	VST3LN_UPD,
353	VST4LN_UPD,
354	VST1x2_UPD,
355	VST1x3_UPD,
356	VST1x4_UPD,
357
358	// Load/Store of dual registers
359	LDRD,
360	STRD
361	};
362
363	} // end namespace ARMISD
364
365	namespace ARM {
366	/// Possible values of current rounding mode, which is specified in bits
367	/// 23:22 of FPSCR.
368	enum Rounding {
369	RN = `0`, // Round to Nearest
370	RP = `1`, // Round towards Plus infinity
371	RM = `2`, // Round towards Minus infinity
372	RZ = `3`, // Round towards Zero
373	rmMask = `3` // Bit mask selecting rounding mode
374	};
375
376	// Bit position of rounding mode bits in FPSCR.
377	const unsigned RoundingBitsPos = `22`;
378
379	// Bits of floating-point status. These are NZCV flags, QC bit and cumulative
380	// FP exception bits.
381	const unsigned FPStatusBits = `0xf800009f`;
382
383	// Some bits in the FPSCR are not yet defined. They must be preserved when
384	// modifying the contents.
385	const unsigned FPReservedBits = `0x00006060`;
386	} // namespace ARM
387
388	/// Define some predicates that are used for node matching.
389	namespace ARM {
390
391	bool isBitFieldInvertedMask(unsigned v);
392
393	} // end namespace ARM
394
395	//===--------------------------------------------------------------------===//
396	// ARMTargetLowering - ARM Implementation of the TargetLowering interface
397
398	class ARMTargetLowering : public TargetLowering {
399	public:
400	explicit ARMTargetLowering(const TargetMachine &TM,
401	const ARMSubtarget &STI);
402
403	unsigned getJumpTableEncoding() const override;
404	bool useSoftFloat() const override;
405
406	SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
407
408	/// ReplaceNodeResults - Replace the results of node with an illegal result
409	/// type with new values built out of custom code.
410	void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
411	SelectionDAG &DAG) const override;
412
413	const char getTargetNodeName(unsigned* Opcode) const override;
414
415	bool isSelectSupported(SelectSupportKind Kind) const override {
416	// ARM does not support scalar condition selects on vectors.
417	return (Kind != ScalarCondVectorVal);
418	}
419
420	bool isReadOnly(const GlobalValue GV) const*;
421
422	/// getSetCCResultType - Return the value type to use for ISD::SETCC.
423	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
424	EVT VT) const override;
425
426	MachineBasicBlock *
427	EmitInstrWithCustomInserter(MachineInstr &MI,
428	MachineBasicBlock MBB) const* override;
429
430	void AdjustInstrPostInstrSelection(MachineInstr &MI,
431	SDNode Node) const* override;
432
433	SDValue PerformCMOVCombine(SDNode N, SelectionDAG &DAG) const*;
434	SDValue PerformBRCONDCombine(SDNode N, SelectionDAG &DAG) const*;
435	SDValue PerformCMOVToBFICombine(SDNode N, SelectionDAG &DAG) const*;
436	SDValue PerformIntrinsicCombine(SDNode N, DAGCombinerInfo &DCI) const*;
437	SDValue PerformMVEExtCombine(SDNode N, DAGCombinerInfo &DCI) const*;
438	SDValue PerformMVETruncCombine(SDNode N, DAGCombinerInfo &DCI) const*;
439	SDValue PerformDAGCombine(SDNode N, DAGCombinerInfo &DCI) const* override;
440
441	bool SimplifyDemandedBitsForTargetNode(SDValue Op,
442	const APInt &OriginalDemandedBits,
443	const APInt &OriginalDemandedElts,
444	KnownBits &Known,
445	TargetLoweringOpt &TLO,
446	unsigned Depth) const override;
447
448	bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
449
450	/// allowsMisalignedMemoryAccesses - Returns true if the target allows
451	/// unaligned memory accesses of the specified type. Returns whether it
452	/// is "fast" by reference in the second argument.
453	bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
454	Align Alignment,
455	MachineMemOperand::Flags Flags,
456	unsigned Fast) const* override;
457
458	EVT getOptimalMemOpType(const MemOp &Op,
459	const AttributeList &FuncAttributes) const override;
460
461	bool isTruncateFree(Type SrcTy, Type DstTy) const override;
462	bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
463	bool isZExtFree(SDValue Val, EVT VT2) const override;
464	bool shouldSinkOperands(Instruction *I,
465	SmallVectorImpl<Use > &Ops) const* override;
466	Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const override;
467
468	bool isFNegFree(EVT VT) const override;
469
470	bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
471
472	bool allowTruncateForTailCall(Type Ty1, Type Ty2) const override;
473
474
475	/// isLegalAddressingMode - Return true if the addressing mode represented
476	/// by AM is legal for this target, for a load/store of the specified type.
477	bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
478	Type Ty, unsigned* AS,
479	Instruction I = nullptr) const* override;
480
481	bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
482
483	/// Returns true if the addressing mode representing by AM is legal
484	/// for the Thumb1 target, for a load/store of the specified type.
485	bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
486
487	/// isLegalICmpImmediate - Return true if the specified immediate is legal
488	/// icmp immediate, that is the target has icmp instructions which can
489	/// compare a register against the immediate without having to materialize
490	/// the immediate into a register.
491	bool isLegalICmpImmediate(int64_t Imm) const override;
492
493	/// isLegalAddImmediate - Return true if the specified immediate is legal
494	/// add immediate, that is the target has add instructions which can
495	/// add a register and the immediate without having to materialize
496	/// the immediate into a register.
497	bool isLegalAddImmediate(int64_t Imm) const override;
498
499	/// getPreIndexedAddressParts - returns true by value, base pointer and
500	/// offset pointer and addressing mode by reference if the node's address
501	/// can be legally represented as pre-indexed load / store address.
502	bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
503	ISD::MemIndexedMode &AM,
504	SelectionDAG &DAG) const override;
505
506	/// getPostIndexedAddressParts - returns true by value, base pointer and
507	/// offset pointer and addressing mode by reference if this node can be
508	/// combined with a load / store to form a post-indexed load / store.
509	bool getPostIndexedAddressParts(SDNode N, SDNode Op, SDValue &Base,
510	SDValue &Offset, ISD::MemIndexedMode &AM,
511	SelectionDAG &DAG) const override;
512
513	void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
514	const APInt &DemandedElts,
515	const SelectionDAG &DAG,
516	unsigned Depth) const override;
517
518	bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
519	const APInt &DemandedElts,
520	TargetLoweringOpt &TLO) const override;
521
522	bool ExpandInlineAsm(CallInst CI) const* override;
523
524	ConstraintType getConstraintType(StringRef Constraint) const override;
525
526	/// Examine constraint string and operand type and determine a weight value.
527	/// The operand object must already have been set up with the operand type.
528	ConstraintWeight getSingleConstraintMatchWeight(
529	AsmOperandInfo &info, const char constraint) const* override;
530
531	std::pair<unsigned, const TargetRegisterClass *>
532	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
533	StringRef Constraint, MVT VT) const override;
534
535	const char LowerXConstraint(EVT ConstraintVT) const* override;
536
537	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
538	/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
539	/// true it means one of the asm constraint of the inline asm instruction
540	/// being processed is 'm'.
541	void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
542	std::vector<SDValue> &Ops,
543	SelectionDAG &DAG) const override;
544
545	InlineAsm::ConstraintCode
546	getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
547	if (ConstraintCode == "Q")
548	return InlineAsm::ConstraintCode::Q;
549	if (ConstraintCode.size() == `2`) {
550	if (ConstraintCode [`0`] == `'U'`) {
551	switch(ConstraintCode [`1`]) {
552	default:
553	break;
554	case `'m'`:
555	return InlineAsm::ConstraintCode::Um;
556	case `'n'`:
557	return InlineAsm::ConstraintCode::Un;
558	case `'q'`:
559	return InlineAsm::ConstraintCode::Uq;
560	case `'s'`:
561	return InlineAsm::ConstraintCode::Us;
562	case `'t'`:
563	return InlineAsm::ConstraintCode::Ut;
564	case `'v'`:
565	return InlineAsm::ConstraintCode::Uv;
566	case `'y'`:
567	return InlineAsm::ConstraintCode::Uy;
568	}
569	}
570	}
571	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
572	}
573
574	const ARMSubtarget* getSubtarget() const {
575	return Subtarget;
576	}
577
578	/// getRegClassFor - Return the register class that should be used for the
579	/// specified value type.
580	const TargetRegisterClass *
581	getRegClassFor(MVT VT, bool isDivergent = false) const override;
582
583	bool shouldAlignPointerArgs(CallInst CI, unsigned* &MinSize,
584	Align &PrefAlign) const override;
585
586	/// createFastISel - This method returns a target specific FastISel object,
587	/// or null if the target does not support "fast" ISel.
588	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
589	const TargetLibraryInfo libInfo) const* override;
590
591	Sched::Preference getSchedulingPreference(SDNode N) const* override;
592
593	bool preferZeroCompareBranch() const override { return true; }
594
595	bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
596
597	bool
598	isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
599	bool isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* override;
600
601	/// isFPImmLegal - Returns true if the target can instruction select the
602	/// specified FP immediate natively. If false, the legalizer will
603	/// materialize the FP immediate as a load from a constant pool.
604	bool isFPImmLegal(const APFloat &Imm, EVT VT,
605	bool ForCodeSize = false) const override;
606
607	bool getTgtMemIntrinsic(IntrinsicInfo &Info,
608	const CallInst &I,
609	MachineFunction &MF,
610	unsigned Intrinsic) const override;
611
612	/// Returns true if it is beneficial to convert a load of a constant
613	/// to just the constant itself.
614	bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
615	Type Ty) const* override;
616
617	/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
618	/// with this index.
619	bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
620	unsigned Index) const override;
621
622	bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
623	bool MathUsed) const override {
624	// Using overflow ops for overflow checks only should beneficial on ARM.
625	return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true);
626	}
627
628	bool shouldReassociateReduction(unsigned Opc, EVT VT) const override {
629	return Opc != ISD::VECREDUCE_ADD;
630	}
631
632	/// Returns true if an argument of type Ty needs to be passed in a
633	/// contiguous block of registers in calling convention CallConv.
634	bool functionArgumentNeedsConsecutiveRegisters(
635	Type Ty, CallingConv::ID CallConv, bool* isVarArg,
636	const DataLayout &DL) const override;
637
638	/// If a physical register, this returns the register that receives the
639	/// exception address on entry to an EH pad.
640	Register
641	getExceptionPointerRegister(const Constant PersonalityFn) const* override;
642
643	/// If a physical register, this returns the register that receives the
644	/// exception typeid on entry to a landing pad.
645	Register
646	getExceptionSelectorRegister(const Constant PersonalityFn) const* override;
647
648	Instruction makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const*;
649	Value emitLoadLinked(IRBuilderBase &Builder, Type ValueTy, Value *Addr,
650	AtomicOrdering Ord) const override;
651	Value emitStoreConditional(IRBuilderBase &Builder, Value Val, Value *Addr,
652	AtomicOrdering Ord) const override;
653
654	void
655	emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
656
657	Instruction emitLeadingFence(IRBuilderBase &Builder, Instruction Inst,
658	AtomicOrdering Ord) const override;
659	Instruction emitTrailingFence(IRBuilderBase &Builder, Instruction Inst,
660	AtomicOrdering Ord) const override;
661
662	unsigned getMaxSupportedInterleaveFactor() const override;
663
664	bool lowerInterleavedLoad(LoadInst *LI,
665	ArrayRef<ShuffleVectorInst *> Shuffles,
666	ArrayRef<unsigned> Indices,
667	unsigned Factor) const override;
668	bool lowerInterleavedStore(StoreInst SI, ShuffleVectorInst SVI,
669	unsigned Factor) const override;
670
671	bool shouldInsertFencesForAtomic(const Instruction I) const* override;
672	TargetLoweringBase::AtomicExpansionKind
673	shouldExpandAtomicLoadInIR(LoadInst LI) const* override;
674	TargetLoweringBase::AtomicExpansionKind
675	shouldExpandAtomicStoreInIR(StoreInst SI) const* override;
676	TargetLoweringBase::AtomicExpansionKind
677	shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* override;
678	TargetLoweringBase::AtomicExpansionKind
679	shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst AI) const* override;
680
681	bool useLoadStackGuardNode() const override;
682
683	void insertSSPDeclarations(Module &M) const override;
684	Value getSDagStackGuard(const* Module &M) const override;
685	Function getSSPStackGuardCheck(const* Module &M) const override;
686
687	bool canCombineStoreAndExtract(Type VectorTy, Value Idx,
688	unsigned &Cost) const override;
689
690	bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
691	const MachineFunction &MF) const override {
692	// Do not merge to larger than i32.
693	return (MemVT.getSizeInBits() <= `32`);
694	}
695
696	bool isCheapToSpeculateCttz(Type Ty) const* override;
697	bool isCheapToSpeculateCtlz(Type Ty) const* override;
698
699	bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
700	return VT.isScalarInteger();
701	}
702
703	bool supportSwiftError() const override {
704	return true;
705	}
706
707	bool hasStandaloneRem(EVT VT) const override {
708	return HasStandaloneRem;
709	}
710
711	ShiftLegalizationStrategy
712	preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
713	unsigned ExpansionFactor) const override;
714
715	CCAssignFn CCAssignFnForCall(CallingConv::ID CC, bool* isVarArg) const;
716	CCAssignFn CCAssignFnForReturn(CallingConv::ID CC, bool* isVarArg) const;
717
718	/// Returns true if \p VecTy is a legal interleaved access type. This
719	/// function checks the vector element type and the overall width of the
720	/// vector.
721	bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy,
722	Align Alignment,
723	const DataLayout &DL) const;
724
725	bool isMulAddWithConstProfitable(SDValue AddNode,
726	SDValue ConstNode) const override;
727
728	bool alignLoopsWithOptSize() const override;
729
730	/// Returns the number of interleaved accesses that will be generated when
731	/// lowering accesses of the given type.
732	unsigned getNumInterleavedAccesses(VectorType *VecTy,
733	const DataLayout &DL) const;
734
735	void finalizeLowering(MachineFunction &MF) const override;
736
737	/// Return the correct alignment for the current calling convention.
738	Align getABIAlignmentForCallingConv(Type *ArgTy,
739	const DataLayout &DL) const override;
740
741	bool isDesirableToCommuteWithShift(const SDNode *N,
742	CombineLevel Level) const override;
743
744	bool isDesirableToCommuteXorWithShift(const SDNode N) const* override;
745
746	bool shouldFoldConstantShiftPairToMask(const SDNode *N,
747	CombineLevel Level) const override;
748
749	bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
750	EVT VT) const override;
751
752	bool preferIncOfAddToSubOfNot(EVT VT) const override;
753
754	bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
755
756	bool isComplexDeinterleavingSupported() const override;
757	bool isComplexDeinterleavingOperationSupported(
758	ComplexDeinterleavingOperation Operation, Type Ty) const* override;
759
760	Value *createComplexDeinterleavingIR(
761	IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
762	ComplexDeinterleavingRotation Rotation, Value InputA, Value InputB,
763	Value Accumulator = nullptr) const* override;
764
765	bool softPromoteHalfType() const override { return true; }
766
767	bool useFPRegsForHalfType() const override { return true; }
768
769	protected:
770	std::pair<const TargetRegisterClass *, uint8_t>
771	findRepresentativeClass(const TargetRegisterInfo *TRI,
772	MVT VT) const override;
773
774	private:
775	/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
776	/// make the right decision when generating code for different targets.
777	const ARMSubtarget *Subtarget;
778
779	const TargetRegisterInfo *RegInfo;
780
781	const InstrItineraryData *Itins;
782
783	// TODO: remove this, and have shouldInsertFencesForAtomic do the proper
784	// check.
785	bool InsertFencesForAtomic;
786
787	bool HasStandaloneRem = true;
788
789	void addTypeForNEON(MVT VT, MVT PromotedLdStVT);
790	void addDRTypeForNEON(MVT VT);
791	void addQRTypeForNEON(MVT VT);
792	std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
793
794	using RegsToPassVector = SmallVector<std::pair<unsigned, SDValue>, `8`>;
795
796	void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain,
797	SDValue &Arg, RegsToPassVector &RegsToPass,
798	CCValAssign &VA, CCValAssign &NextVA,
799	SDValue &StackPtr,
800	SmallVectorImpl<SDValue> &MemOpChains,
801	bool IsTailCall,
802	int SPDiff) const;
803	SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
804	SDValue &Root, SelectionDAG &DAG,
805	const SDLoc &dl) const;
806
807	CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
808	bool isVarArg) const;
809	CCAssignFn CCAssignFnForNode(CallingConv::ID CC, bool* Return,
810	bool isVarArg) const;
811	std::pair<SDValue, MachinePointerInfo>
812	computeAddrForCallArg(const SDLoc &dl, SelectionDAG &DAG,
813	const CCValAssign &VA, SDValue StackPtr,
814	bool IsTailCall, int SPDiff) const;
815	SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
816	SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
817	SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
818	SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
819	const ARMSubtarget Subtarget) const*;
820	SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
821	const ARMSubtarget Subtarget) const*;
822	SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
823	SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
824	SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
825	SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
826	SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
827	SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
828	SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
829	SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
830	SelectionDAG &DAG) const;
831	SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
832	SelectionDAG &DAG,
833	TLSModel::Model model) const;
834	SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
835	SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
836	SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
837	SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
838	SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
839	SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
840	SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
841	SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
842	SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
843	SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
844	SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
845	SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
846	SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
847	SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
848	SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
849	SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
850	SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
851	SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
852	SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
853	const ARMSubtarget ST) const*;
854	SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
855	const ARMSubtarget ST) const*;
856	SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
857	SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
858	SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
859	SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
860	void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
861	SmallVectorImpl<SDValue> &Results) const;
862	SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
863	const ARMSubtarget Subtarget) const*;
864	SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
865	SDValue &Chain) const;
866	SDValue LowerREM(SDNode N, SelectionDAG &DAG) const*;
867	SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
868	SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
869	SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
870	SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
871	SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
872	SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
873	SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
874	void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
875	SelectionDAG &DAG) const;
876
877	Register getRegisterByName(const char* RegName, LLT VT,
878	const MachineFunction &MF) const override;
879
880	SDValue BuildSDIVPow2(SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
881	SmallVectorImpl<SDNode > &Created) const* override;
882
883	bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
884	EVT VT) const override;
885
886	SDValue MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT, MVT ValVT,
887	SDValue Val) const;
888	SDValue MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT,
889	MVT ValVT, SDValue Val) const;
890
891	SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
892
893	SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
894	CallingConv::ID CallConv, bool isVarArg,
895	const SmallVectorImpl<ISD::InputArg> &Ins,
896	const SDLoc &dl, SelectionDAG &DAG,
897	SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
898	SDValue ThisVal) const;
899
900	bool supportSplitCSR(MachineFunction MF) const* override {
901	return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
902	MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
903	}
904
905	void initializeSplitCSR(MachineBasicBlock Entry) const* override;
906	void insertCopiesSplitCSR(
907	MachineBasicBlock *Entry,
908	const SmallVectorImpl<MachineBasicBlock > &Exits) const* override;
909
910	bool splitValueIntoRegisterParts(
911	SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
912	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
913	const override;
914
915	SDValue joinRegisterPartsIntoValue(
916	SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
917	unsigned NumParts, MVT PartVT, EVT ValueVT,
918	std::optional<CallingConv::ID> CC) const override;
919
920	SDValue
921	LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
922	const SmallVectorImpl<ISD::InputArg> &Ins,
923	const SDLoc &dl, SelectionDAG &DAG,
924	SmallVectorImpl<SDValue> &InVals) const override;
925
926	int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl,
927	SDValue &Chain, const Value *OrigArg,
928	unsigned InRegsParamRecordIdx, int ArgOffset,
929	unsigned ArgSize) const;
930
931	void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
932	const SDLoc &dl, SDValue &Chain,
933	unsigned ArgOffset, unsigned TotalArgRegsSaveSize,
934	bool ForceMutable = false) const;
935
936	SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
937	SmallVectorImpl<SDValue> &InVals) const override;
938
939	/// HandleByVal - Target-specific cleanup for ByVal support.
940	void HandleByVal(CCState , unsigned* &, Align) const override;
941
942	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
943	/// for tail call optimization. Targets which want to do tail call
944	/// optimization should implement this function.
945	bool IsEligibleForTailCallOptimization(
946	SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
947	bool isCalleeStructRet, bool isCallerStructRet,
948	const SmallVectorImpl<ISD::OutputArg> &Outs,
949	const SmallVectorImpl<SDValue> &OutVals,
950	const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
951	const bool isIndirect) const;
952
953	bool CanLowerReturn(CallingConv::ID CallConv,
954	MachineFunction &MF, bool isVarArg,
955	const SmallVectorImpl<ISD::OutputArg> &Outs,
956	LLVMContext &Context) const override;
957
958	SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
959	const SmallVectorImpl<ISD::OutputArg> &Outs,
960	const SmallVectorImpl<SDValue> &OutVals,
961	const SDLoc &dl, SelectionDAG &DAG) const override;
962
963	bool isUsedByReturnOnly(SDNode N, SDValue &Chain) const* override;
964
965	bool mayBeEmittedAsTailCall(const CallInst CI) const* override;
966
967	bool shouldConsiderGEPOffsetSplit() const override { return true; }
968
969	bool isUnsupportedFloatingType(EVT VT) const;
970
971	SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
972	SDValue ARMcc, SDValue CCR, SDValue Cmp,
973	SelectionDAG &DAG) const;
974	SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
975	SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
976	SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
977	const SDLoc &dl, bool Signaling = false) const;
978	SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
979
980	SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
981
982	void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
983	MachineBasicBlock DispatchBB, int* FI) const;
984
985	void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock MBB) const*;
986
987	MachineBasicBlock *EmitStructByval(MachineInstr &MI,
988	MachineBasicBlock MBB) const*;
989
990	MachineBasicBlock *EmitLowered__chkstk(MachineInstr &MI,
991	MachineBasicBlock MBB) const*;
992	MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
993	MachineBasicBlock MBB) const*;
994	void addMVEVectorTypes(bool HasMVEFP);
995	void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
996	void setAllExpand(MVT VT);
997	};
998
999	enum VMOVModImmType {
1000	VMOVModImm,
1001	VMVNModImm,
1002	MVEVMVNModImm,
1003	OtherModImm
1004	};
1005
1006	namespace ARM {
1007
1008	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1009	const TargetLibraryInfo *libInfo);
1010
1011	} // end namespace ARM
1012
1013	} // end namespace llvm
1014
1015	#endif // LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
1016

source code of llvm/lib/Target/ARM/ARMISelLowering.h