1 | //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares the AArch64 specific subclass of TargetSubtarget. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H |
14 | #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H |
15 | |
16 | #include "AArch64FrameLowering.h" |
17 | #include "AArch64ISelLowering.h" |
18 | #include "AArch64InstrInfo.h" |
19 | #include "AArch64PointerAuth.h" |
20 | #include "AArch64RegisterInfo.h" |
21 | #include "AArch64SelectionDAGInfo.h" |
22 | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
23 | #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" |
24 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
25 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
26 | #include "llvm/CodeGen/RegisterBankInfo.h" |
27 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
28 | #include "llvm/IR/DataLayout.h" |
29 | |
30 | #define |
31 | #include "AArch64GenSubtargetInfo.inc" |
32 | |
33 | namespace llvm { |
34 | class GlobalValue; |
35 | class StringRef; |
36 | class Triple; |
37 | |
38 | class AArch64Subtarget final : public AArch64GenSubtargetInfo { |
39 | public: |
40 | enum ARMProcFamilyEnum : uint8_t { |
41 | Others, |
42 | #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, |
43 | #include "llvm/TargetParser/AArch64TargetParserDef.inc" |
44 | #undef ARM_PROCESSOR_FAMILY |
45 | }; |
46 | |
47 | protected: |
48 | /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. |
49 | ARMProcFamilyEnum ARMProcFamily = Others; |
50 | |
51 | // Enable 64-bit vectorization in SLP. |
52 | unsigned MinVectorRegisterBitWidth = 64; |
53 | |
54 | // Bool members corresponding to the SubtargetFeatures defined in tablegen |
55 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
56 | bool ATTRIBUTE = DEFAULT; |
57 | #include "AArch64GenSubtargetInfo.inc" |
58 | |
59 | uint8_t MaxInterleaveFactor = 2; |
60 | uint8_t = 2; |
61 | uint16_t CacheLineSize = 0; |
62 | uint16_t PrefetchDistance = 0; |
63 | uint16_t MinPrefetchStride = 1; |
64 | unsigned MaxPrefetchIterationsAhead = UINT_MAX; |
65 | Align PrefFunctionAlignment; |
66 | Align PrefLoopAlignment; |
67 | unsigned MaxBytesForLoopAlignment = 0; |
68 | unsigned MinimumJumpTableEntries = 4; |
69 | unsigned MaxJumpTableSize = 0; |
70 | |
71 | // ReserveXRegister[i] - X#i is not available as a general purpose register. |
72 | BitVector ReserveXRegister; |
73 | |
74 | // ReserveXRegisterForRA[i] - X#i is not available for register allocator. |
75 | BitVector ReserveXRegisterForRA; |
76 | |
77 | // CustomCallUsedXRegister[i] - X#i call saved. |
78 | BitVector CustomCallSavedXRegs; |
79 | |
80 | bool IsLittle; |
81 | |
82 | bool StreamingSVEMode; |
83 | bool StreamingCompatibleSVEMode; |
84 | unsigned MinSVEVectorSizeInBits; |
85 | unsigned MaxSVEVectorSizeInBits; |
86 | unsigned VScaleForTuning = 2; |
87 | TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; |
88 | |
89 | /// TargetTriple - What processor and OS we're targeting. |
90 | Triple TargetTriple; |
91 | |
92 | AArch64FrameLowering FrameLowering; |
93 | AArch64InstrInfo InstrInfo; |
94 | AArch64SelectionDAGInfo TSInfo; |
95 | AArch64TargetLowering TLInfo; |
96 | |
97 | /// GlobalISel related APIs. |
98 | std::unique_ptr<CallLowering> CallLoweringInfo; |
99 | std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; |
100 | std::unique_ptr<InstructionSelector> InstSelector; |
101 | std::unique_ptr<LegalizerInfo> Legalizer; |
102 | std::unique_ptr<RegisterBankInfo> RegBankInfo; |
103 | |
104 | private: |
105 | /// initializeSubtargetDependencies - Initializes using CPUString and the |
106 | /// passed in feature string so that we can use initializer lists for |
107 | /// subtarget initialization. |
108 | AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, |
109 | StringRef CPUString, |
110 | StringRef TuneCPUString, |
111 | bool HasMinSize); |
112 | |
113 | /// Initialize properties based on the selected processor family. |
114 | void initializeProperties(bool HasMinSize); |
115 | |
116 | public: |
117 | /// This constructor initializes the data members to match that |
118 | /// of the specified triple. |
119 | AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, |
120 | StringRef FS, const TargetMachine &TM, bool LittleEndian, |
121 | unsigned MinSVEVectorSizeInBitsOverride = 0, |
122 | unsigned MaxSVEVectorSizeInBitsOverride = 0, |
123 | bool StreamingSVEMode = false, |
124 | bool StreamingCompatibleSVEMode = false, |
125 | bool HasMinSize = false); |
126 | |
127 | // Getters for SubtargetFeatures defined in tablegen |
128 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
129 | bool GETTER() const { return ATTRIBUTE; } |
130 | #include "AArch64GenSubtargetInfo.inc" |
131 | |
132 | const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { |
133 | return &TSInfo; |
134 | } |
135 | const AArch64FrameLowering *getFrameLowering() const override { |
136 | return &FrameLowering; |
137 | } |
138 | const AArch64TargetLowering *getTargetLowering() const override { |
139 | return &TLInfo; |
140 | } |
141 | const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } |
142 | const AArch64RegisterInfo *getRegisterInfo() const override { |
143 | return &getInstrInfo()->getRegisterInfo(); |
144 | } |
145 | const CallLowering *getCallLowering() const override; |
146 | const InlineAsmLowering *getInlineAsmLowering() const override; |
147 | InstructionSelector *getInstructionSelector() const override; |
148 | const LegalizerInfo *getLegalizerInfo() const override; |
149 | const RegisterBankInfo *getRegBankInfo() const override; |
150 | const Triple &getTargetTriple() const { return TargetTriple; } |
151 | bool enableMachineScheduler() const override { return true; } |
152 | bool enablePostRAScheduler() const override { return usePostRAScheduler(); } |
153 | |
154 | bool enableMachinePipeliner() const override; |
155 | bool useDFAforSMS() const override { return false; } |
156 | |
157 | /// Returns ARM processor family. |
158 | /// Avoid this function! CPU specifics should be kept local to this class |
159 | /// and preferably modeled with SubtargetFeatures or properties in |
160 | /// initializeProperties(). |
161 | ARMProcFamilyEnum getProcFamily() const { |
162 | return ARMProcFamily; |
163 | } |
164 | |
165 | bool isXRaySupported() const override { return true; } |
166 | |
167 | /// Returns true if the function has a streaming body. |
168 | bool isStreaming() const { return StreamingSVEMode; } |
169 | |
170 | /// Returns true if the function has a streaming-compatible body. |
171 | bool isStreamingCompatible() const; |
172 | |
173 | /// Returns true if the target has NEON and the function at runtime is known |
174 | /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE |
175 | /// mode, which disables NEON instructions). |
176 | bool isNeonAvailable() const; |
177 | |
178 | /// Returns true if the target has SVE and can use the full range of SVE |
179 | /// instructions, for example because it knows the function is known not to be |
180 | /// in streaming-SVE mode or when the target has FEAT_FA64 enabled. |
181 | bool isSVEAvailable() const; |
182 | |
183 | unsigned getMinVectorRegisterBitWidth() const { |
184 | // Don't assume any minimum vector size when PSTATE.SM may not be 0, because |
185 | // we don't yet support streaming-compatible codegen support that we trust |
186 | // is safe for functions that may be executed in streaming-SVE mode. |
187 | // By returning '0' here, we disable vectorization. |
188 | if (!isSVEAvailable() && !isNeonAvailable()) |
189 | return 0; |
190 | return MinVectorRegisterBitWidth; |
191 | } |
192 | |
193 | bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } |
194 | bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; } |
195 | unsigned getNumXRegisterReserved() const { |
196 | BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs()); |
197 | AllReservedX |= ReserveXRegister; |
198 | AllReservedX |= ReserveXRegisterForRA; |
199 | return AllReservedX.count(); |
200 | } |
201 | bool isXRegCustomCalleeSaved(size_t i) const { |
202 | return CustomCallSavedXRegs[i]; |
203 | } |
204 | bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } |
205 | |
206 | /// Return true if the CPU supports any kind of instruction fusion. |
207 | bool hasFusion() const { |
208 | return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || |
209 | hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || |
210 | hasFuseAdrpAdd() || hasFuseLiterals(); |
211 | } |
212 | |
213 | unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } |
214 | unsigned () const; |
215 | unsigned getCacheLineSize() const override { return CacheLineSize; } |
216 | unsigned getPrefetchDistance() const override { return PrefetchDistance; } |
217 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
218 | unsigned NumStridedMemAccesses, |
219 | unsigned NumPrefetches, |
220 | bool HasCall) const override { |
221 | return MinPrefetchStride; |
222 | } |
223 | unsigned getMaxPrefetchIterationsAhead() const override { |
224 | return MaxPrefetchIterationsAhead; |
225 | } |
226 | Align getPrefFunctionAlignment() const { |
227 | return PrefFunctionAlignment; |
228 | } |
229 | Align getPrefLoopAlignment() const { return PrefLoopAlignment; } |
230 | |
231 | unsigned getMaxBytesForLoopAlignment() const { |
232 | return MaxBytesForLoopAlignment; |
233 | } |
234 | |
235 | unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } |
236 | unsigned getMinimumJumpTableEntries() const { |
237 | return MinimumJumpTableEntries; |
238 | } |
239 | |
240 | /// CPU has TBI (top byte of addresses is ignored during HW address |
241 | /// translation) and OS enables it. |
242 | bool supportsAddressTopByteIgnored() const; |
243 | |
244 | bool isLittleEndian() const { return IsLittle; } |
245 | |
246 | bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } |
247 | bool isTargetIOS() const { return TargetTriple.isiOS(); } |
248 | bool isTargetLinux() const { return TargetTriple.isOSLinux(); } |
249 | bool isTargetWindows() const { return TargetTriple.isOSWindows(); } |
250 | bool isTargetAndroid() const { return TargetTriple.isAndroid(); } |
251 | bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } |
252 | bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); } |
253 | |
254 | bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } |
255 | bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } |
256 | bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } |
257 | |
258 | bool isTargetILP32() const { |
259 | return TargetTriple.isArch32Bit() || |
260 | TargetTriple.getEnvironment() == Triple::GNUILP32; |
261 | } |
262 | |
263 | bool useAA() const override; |
264 | |
265 | bool addrSinkUsingGEPs() const override { |
266 | // Keeping GEPs inbounds is important for exploiting AArch64 |
267 | // addressing-modes in ILP32 mode. |
268 | return useAA() || isTargetILP32(); |
269 | } |
270 | |
271 | bool useSmallAddressing() const { |
272 | switch (TLInfo.getTargetMachine().getCodeModel()) { |
273 | case CodeModel::Kernel: |
274 | // Kernel is currently allowed only for Fuchsia targets, |
275 | // where it is the same as Small for almost all purposes. |
276 | case CodeModel::Small: |
277 | return true; |
278 | default: |
279 | return false; |
280 | } |
281 | } |
282 | |
283 | /// ParseSubtargetFeatures - Parses features string setting specified |
284 | /// subtarget options. Definition of function is auto generated by tblgen. |
285 | void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
286 | |
287 | /// ClassifyGlobalReference - Find the target operand flags that describe |
288 | /// how a global value should be referenced for the current subtarget. |
289 | unsigned ClassifyGlobalReference(const GlobalValue *GV, |
290 | const TargetMachine &TM) const; |
291 | |
292 | unsigned classifyGlobalFunctionReference(const GlobalValue *GV, |
293 | const TargetMachine &TM) const; |
294 | |
295 | /// This function is design to compatible with the function def in other |
296 | /// targets and escape build error about the virtual function def in base |
297 | /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it. |
298 | unsigned char |
299 | classifyGlobalFunctionReference(const GlobalValue *GV) const override { |
300 | return 0; |
301 | } |
302 | |
303 | void overrideSchedPolicy(MachineSchedPolicy &Policy, |
304 | unsigned NumRegionInstrs) const override; |
305 | void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, |
306 | SDep &Dep, |
307 | const TargetSchedModel *SchedModel) const override; |
308 | |
309 | bool enableEarlyIfConversion() const override; |
310 | |
311 | std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; |
312 | |
313 | bool isCallingConvWin64(CallingConv::ID CC) const { |
314 | switch (CC) { |
315 | case CallingConv::C: |
316 | case CallingConv::Fast: |
317 | case CallingConv::Swift: |
318 | case CallingConv::SwiftTail: |
319 | return isTargetWindows(); |
320 | case CallingConv::Win64: |
321 | return true; |
322 | default: |
323 | return false; |
324 | } |
325 | } |
326 | |
327 | /// Return whether FrameLowering should always set the "extended frame |
328 | /// present" bit in FP, or set it based on a symbol in the runtime. |
329 | bool swiftAsyncContextIsDynamicallySet() const { |
330 | // Older OS versions (particularly system unwinders) are confused by the |
331 | // Swift extended frame, so when building code that might be run on them we |
332 | // must dynamically query the concurrency library to determine whether |
333 | // extended frames should be flagged as present. |
334 | const Triple &TT = getTargetTriple(); |
335 | |
336 | unsigned Major = TT.getOSVersion().getMajor(); |
337 | switch(TT.getOS()) { |
338 | default: |
339 | return false; |
340 | case Triple::IOS: |
341 | case Triple::TvOS: |
342 | return Major < 15; |
343 | case Triple::WatchOS: |
344 | return Major < 8; |
345 | case Triple::MacOSX: |
346 | case Triple::Darwin: |
347 | return Major < 12; |
348 | } |
349 | } |
350 | |
351 | void mirFileLoaded(MachineFunction &MF) const override; |
352 | |
353 | bool hasSVEorSME() const { return hasSVE() || hasSME(); } |
354 | bool hasSVE2orSME() const { return hasSVE2() || hasSME(); } |
355 | |
356 | // Return the known range for the bit length of SVE data registers. A value |
357 | // of 0 means nothing is known about that particular limit beyong what's |
358 | // implied by the architecture. |
359 | unsigned getMaxSVEVectorSizeInBits() const { |
360 | assert(hasSVEorSME() && |
361 | "Tried to get SVE vector length without SVE support!" ); |
362 | return MaxSVEVectorSizeInBits; |
363 | } |
364 | |
365 | unsigned getMinSVEVectorSizeInBits() const { |
366 | assert(hasSVEorSME() && |
367 | "Tried to get SVE vector length without SVE support!" ); |
368 | return MinSVEVectorSizeInBits; |
369 | } |
370 | |
371 | bool useSVEForFixedLengthVectors() const { |
372 | if (!isNeonAvailable()) |
373 | return hasSVEorSME(); |
374 | |
375 | // Prefer NEON unless larger SVE registers are available. |
376 | return hasSVEorSME() && getMinSVEVectorSizeInBits() >= 256; |
377 | } |
378 | |
379 | bool useSVEForFixedLengthVectors(EVT VT) const { |
380 | if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector()) |
381 | return false; |
382 | return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock || |
383 | !isNeonAvailable(); |
384 | } |
385 | |
386 | unsigned getVScaleForTuning() const { return VScaleForTuning; } |
387 | |
388 | TailFoldingOpts getSVETailFoldingDefaultOpts() const { |
389 | return DefaultSVETFOpts; |
390 | } |
391 | |
392 | const char* getChkStkName() const { |
393 | if (isWindowsArm64EC()) |
394 | return "#__chkstk_arm64ec" ; |
395 | return "__chkstk" ; |
396 | } |
397 | |
398 | const char* getSecurityCheckCookieName() const { |
399 | if (isWindowsArm64EC()) |
400 | return "#__security_check_cookie_arm64ec" ; |
401 | return "__security_check_cookie" ; |
402 | } |
403 | |
404 | /// Choose a method of checking LR before performing a tail call. |
405 | AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod() const; |
406 | |
407 | const PseudoSourceValue *getAddressCheckPSV() const { |
408 | return AddressCheckPSV.get(); |
409 | } |
410 | |
411 | private: |
412 | /// Pseudo value representing memory load performed to check an address. |
413 | /// |
414 | /// This load operation is solely used for its side-effects: if the address |
415 | /// is not mapped (or not readable), it triggers CPU exception, otherwise |
416 | /// execution proceeds and the value is not used. |
417 | class AddressCheckPseudoSourceValue : public PseudoSourceValue { |
418 | public: |
419 | AddressCheckPseudoSourceValue(const TargetMachine &TM) |
420 | : PseudoSourceValue(TargetCustom, TM) {} |
421 | |
422 | bool isConstant(const MachineFrameInfo *) const override { return false; } |
423 | bool isAliased(const MachineFrameInfo *) const override { return true; } |
424 | bool mayAlias(const MachineFrameInfo *) const override { return true; } |
425 | void printCustom(raw_ostream &OS) const override { OS << "AddressCheck" ; } |
426 | }; |
427 | |
428 | std::unique_ptr<AddressCheckPseudoSourceValue> AddressCheckPSV; |
429 | }; |
430 | } // End llvm namespace |
431 | |
432 | #endif |
433 | |