1 | //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares the X86 specific subclass of TargetSubtargetInfo. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
14 | #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
15 | |
16 | #include "X86FrameLowering.h" |
17 | #include "X86ISelLowering.h" |
18 | #include "X86InstrInfo.h" |
19 | #include "X86SelectionDAGInfo.h" |
20 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
21 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
22 | #include "llvm/IR/CallingConv.h" |
23 | #include "llvm/TargetParser/Triple.h" |
24 | #include <climits> |
25 | #include <memory> |
26 | |
27 | #define |
28 | #include "X86GenSubtargetInfo.inc" |
29 | |
30 | namespace llvm { |
31 | |
32 | class CallLowering; |
33 | class GlobalValue; |
34 | class InstructionSelector; |
35 | class LegalizerInfo; |
36 | class RegisterBankInfo; |
37 | class StringRef; |
38 | class TargetMachine; |
39 | |
40 | /// The X86 backend supports a number of different styles of PIC. |
41 | /// |
42 | namespace PICStyles { |
43 | |
44 | enum class Style { |
45 | StubPIC, // Used on i386-darwin in pic mode. |
46 | GOT, // Used on 32 bit elf on when in pic mode. |
47 | RIPRel, // Used on X86-64 when in pic mode. |
48 | None // Set when not in pic mode. |
49 | }; |
50 | |
51 | } // end namespace PICStyles |
52 | |
53 | class X86Subtarget final : public X86GenSubtargetInfo { |
54 | enum X86SSEEnum { |
55 | NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512 |
56 | }; |
57 | |
58 | enum X863DNowEnum { |
59 | NoThreeDNow, MMX, ThreeDNow, ThreeDNowA |
60 | }; |
61 | |
62 | /// Which PIC style to use |
63 | PICStyles::Style PICStyle; |
64 | |
65 | const TargetMachine &TM; |
66 | |
67 | /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. |
68 | X86SSEEnum X86SSELevel = NoSSE; |
69 | |
70 | /// MMX, 3DNow, 3DNow Athlon, or none supported. |
71 | X863DNowEnum X863DNowLevel = NoThreeDNow; |
72 | |
73 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
74 | bool ATTRIBUTE = DEFAULT; |
75 | #include "X86GenSubtargetInfo.inc" |
76 | /// The minimum alignment known to hold of the stack frame on |
77 | /// entry to the function and which must be maintained by every function. |
78 | Align stackAlignment = Align(4); |
79 | |
80 | Align TileConfigAlignment = Align(4); |
81 | |
82 | /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. |
83 | /// |
84 | // FIXME: this is a known good value for Yonah. How about others? |
85 | unsigned MaxInlineSizeThreshold = 128; |
86 | |
87 | /// What processor and OS we're targeting. |
88 | Triple TargetTriple; |
89 | |
90 | /// GlobalISel related APIs. |
91 | std::unique_ptr<CallLowering> CallLoweringInfo; |
92 | std::unique_ptr<LegalizerInfo> Legalizer; |
93 | std::unique_ptr<RegisterBankInfo> RegBankInfo; |
94 | std::unique_ptr<InstructionSelector> InstSelector; |
95 | |
96 | /// Override the stack alignment. |
97 | MaybeAlign StackAlignOverride; |
98 | |
99 | /// Preferred vector width from function attribute. |
100 | unsigned PreferVectorWidthOverride; |
101 | |
102 | /// Resolved preferred vector width from function attribute and subtarget |
103 | /// features. |
104 | unsigned PreferVectorWidth = UINT32_MAX; |
105 | |
106 | /// Required vector width from function attribute. |
107 | unsigned RequiredVectorWidth; |
108 | |
109 | X86SelectionDAGInfo TSInfo; |
110 | // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which |
111 | // X86TargetLowering needs. |
112 | X86InstrInfo InstrInfo; |
113 | X86TargetLowering TLInfo; |
114 | X86FrameLowering FrameLowering; |
115 | |
116 | public: |
117 | /// This constructor initializes the data members to match that |
118 | /// of the specified triple. |
119 | /// |
120 | X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, |
121 | const X86TargetMachine &TM, MaybeAlign StackAlignOverride, |
122 | unsigned PreferVectorWidthOverride, |
123 | unsigned RequiredVectorWidth); |
124 | |
125 | const X86TargetLowering *getTargetLowering() const override { |
126 | return &TLInfo; |
127 | } |
128 | |
129 | const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } |
130 | |
131 | const X86FrameLowering *getFrameLowering() const override { |
132 | return &FrameLowering; |
133 | } |
134 | |
135 | const X86SelectionDAGInfo *getSelectionDAGInfo() const override { |
136 | return &TSInfo; |
137 | } |
138 | |
139 | const X86RegisterInfo *getRegisterInfo() const override { |
140 | return &getInstrInfo()->getRegisterInfo(); |
141 | } |
142 | |
143 | unsigned getTileConfigSize() const { return 64; } |
144 | Align getTileConfigAlignment() const { return TileConfigAlignment; } |
145 | |
146 | /// Returns the minimum alignment known to hold of the |
147 | /// stack frame on entry to the function and which must be maintained by every |
148 | /// function for this subtarget. |
149 | Align getStackAlignment() const { return stackAlignment; } |
150 | |
151 | /// Returns the maximum memset / memcpy size |
152 | /// that still makes it profitable to inline the call. |
153 | unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } |
154 | |
155 | /// ParseSubtargetFeatures - Parses features string setting specified |
156 | /// subtarget options. Definition of function is auto generated by tblgen. |
157 | void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
158 | |
159 | /// Methods used by Global ISel |
160 | const CallLowering *getCallLowering() const override; |
161 | InstructionSelector *getInstructionSelector() const override; |
162 | const LegalizerInfo *getLegalizerInfo() const override; |
163 | const RegisterBankInfo *getRegBankInfo() const override; |
164 | |
165 | private: |
166 | /// Initialize the full set of dependencies so we can use an initializer |
167 | /// list for X86Subtarget. |
168 | X86Subtarget &initializeSubtargetDependencies(StringRef CPU, |
169 | StringRef TuneCPU, |
170 | StringRef FS); |
171 | void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
172 | |
173 | public: |
174 | |
175 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
176 | bool GETTER() const { return ATTRIBUTE; } |
177 | #include "X86GenSubtargetInfo.inc" |
178 | |
179 | /// Is this x86_64 with the ILP32 programming model (x32 ABI)? |
180 | bool isTarget64BitILP32() const { |
181 | return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl()); |
182 | } |
183 | |
184 | /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? |
185 | bool isTarget64BitLP64() const { |
186 | return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl()); |
187 | } |
188 | |
189 | PICStyles::Style getPICStyle() const { return PICStyle; } |
190 | void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } |
191 | |
192 | bool canUseCMPXCHG8B() const { return hasCX8(); } |
193 | bool canUseCMPXCHG16B() const { |
194 | // CX16 is just the CPUID bit, instruction requires 64-bit mode too. |
195 | return hasCX16() && is64Bit(); |
196 | } |
197 | // SSE codegen depends on cmovs, and all SSE1+ processors support them. |
198 | // All 64-bit processors support cmov. |
199 | bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); } |
200 | bool hasSSE1() const { return X86SSELevel >= SSE1; } |
201 | bool hasSSE2() const { return X86SSELevel >= SSE2; } |
202 | bool hasSSE3() const { return X86SSELevel >= SSE3; } |
203 | bool hasSSSE3() const { return X86SSELevel >= SSSE3; } |
204 | bool hasSSE41() const { return X86SSELevel >= SSE41; } |
205 | bool hasSSE42() const { return X86SSELevel >= SSE42; } |
206 | bool hasAVX() const { return X86SSELevel >= AVX; } |
207 | bool hasAVX2() const { return X86SSELevel >= AVX2; } |
208 | bool hasAVX512() const { return X86SSELevel >= AVX512; } |
209 | bool hasInt256() const { return hasAVX2(); } |
210 | bool hasMMX() const { return X863DNowLevel >= MMX; } |
211 | bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; } |
212 | bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; } |
213 | bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } |
214 | bool hasPrefetchW() const { |
215 | // The PREFETCHW instruction was added with 3DNow but later CPUs gave it |
216 | // its own CPUID bit as part of deprecating 3DNow. Intel eventually added |
217 | // it and KNL has another that prefetches to L2 cache. We assume the |
218 | // L1 version exists if the L2 version does. |
219 | return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1(); |
220 | } |
221 | bool hasSSEPrefetch() const { |
222 | // We implicitly enable these when we have a write prefix supporting cache |
223 | // level OR if we have prfchw, but don't already have a read prefetch from |
224 | // 3dnow. |
225 | return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() || |
226 | hasPREFETCHI(); |
227 | } |
228 | bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); } |
229 | // These are generic getters that OR together all of the thunk types |
230 | // supported by the subtarget. Therefore useIndirectThunk*() will return true |
231 | // if any respective thunk feature is enabled. |
232 | bool useIndirectThunkCalls() const { |
233 | return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); |
234 | } |
235 | bool useIndirectThunkBranches() const { |
236 | return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); |
237 | } |
238 | |
239 | unsigned getPreferVectorWidth() const { return PreferVectorWidth; } |
240 | unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } |
241 | |
242 | // Helper functions to determine when we should allow widening to 512-bit |
243 | // during codegen. |
244 | // TODO: Currently we're always allowing widening on CPUs without VLX, |
245 | // because for many cases we don't have a better option. |
246 | bool canExtendTo512DQ() const { |
247 | return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512); |
248 | } |
249 | bool canExtendTo512BW() const { |
250 | return hasBWI() && canExtendTo512DQ(); |
251 | } |
252 | |
253 | bool hasNoDomainDelay() const { return NoDomainDelay; } |
254 | bool hasNoDomainDelayMov() const { |
255 | return hasNoDomainDelay() || NoDomainDelayMov; |
256 | } |
257 | bool hasNoDomainDelayBlend() const { |
258 | return hasNoDomainDelay() || NoDomainDelayBlend; |
259 | } |
260 | bool hasNoDomainDelayShuffle() const { |
261 | return hasNoDomainDelay() || NoDomainDelayShuffle; |
262 | } |
263 | |
264 | // If there are no 512-bit vectors and we prefer not to use 512-bit registers, |
265 | // disable them in the legalizer. |
266 | bool useAVX512Regs() const { |
267 | return hasAVX512() && hasEVEX512() && |
268 | (canExtendTo512DQ() || RequiredVectorWidth > 256); |
269 | } |
270 | |
271 | bool useLight256BitInstructions() const { |
272 | return getPreferVectorWidth() >= 256 || AllowLight256Bit; |
273 | } |
274 | |
275 | bool useBWIRegs() const { |
276 | return hasBWI() && useAVX512Regs(); |
277 | } |
278 | |
279 | bool isXRaySupported() const override { return is64Bit(); } |
280 | |
281 | /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for |
282 | /// no-sse2). There isn't any reason to disable it if the target processor |
283 | /// supports it. |
284 | bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); } |
285 | |
286 | /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for |
287 | /// no-sse2). There isn't any reason to disable it if the target processor |
288 | /// supports it. |
289 | bool hasMFence() const { return hasSSE2() || is64Bit(); } |
290 | |
291 | const Triple &getTargetTriple() const { return TargetTriple; } |
292 | |
293 | bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } |
294 | bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } |
295 | bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } |
296 | bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } |
297 | bool isTargetPS() const { return TargetTriple.isPS(); } |
298 | |
299 | bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } |
300 | bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } |
301 | bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } |
302 | |
303 | bool isTargetLinux() const { return TargetTriple.isOSLinux(); } |
304 | bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } |
305 | bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } |
306 | bool isTargetAndroid() const { return TargetTriple.isAndroid(); } |
307 | bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } |
308 | bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } |
309 | bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } |
310 | bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } |
311 | bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } |
312 | |
313 | bool isTargetWindowsMSVC() const { |
314 | return TargetTriple.isWindowsMSVCEnvironment(); |
315 | } |
316 | |
317 | bool isTargetWindowsCoreCLR() const { |
318 | return TargetTriple.isWindowsCoreCLREnvironment(); |
319 | } |
320 | |
321 | bool isTargetWindowsCygwin() const { |
322 | return TargetTriple.isWindowsCygwinEnvironment(); |
323 | } |
324 | |
325 | bool isTargetWindowsGNU() const { |
326 | return TargetTriple.isWindowsGNUEnvironment(); |
327 | } |
328 | |
329 | bool isTargetWindowsItanium() const { |
330 | return TargetTriple.isWindowsItaniumEnvironment(); |
331 | } |
332 | |
333 | bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } |
334 | |
335 | bool isOSWindows() const { return TargetTriple.isOSWindows(); } |
336 | |
337 | bool isTargetWin64() const { return Is64Bit && isOSWindows(); } |
338 | |
339 | bool isTargetWin32() const { return !Is64Bit && isOSWindows(); } |
340 | |
341 | bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; } |
342 | bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; } |
343 | |
344 | bool isPICStyleStubPIC() const { |
345 | return PICStyle == PICStyles::Style::StubPIC; |
346 | } |
347 | |
348 | bool isPositionIndependent() const; |
349 | |
350 | bool isCallingConvWin64(CallingConv::ID CC) const { |
351 | switch (CC) { |
352 | // On Win64, all these conventions just use the default convention. |
353 | case CallingConv::C: |
354 | case CallingConv::Fast: |
355 | case CallingConv::Tail: |
356 | case CallingConv::Swift: |
357 | case CallingConv::SwiftTail: |
358 | case CallingConv::X86_FastCall: |
359 | case CallingConv::X86_StdCall: |
360 | case CallingConv::X86_ThisCall: |
361 | case CallingConv::X86_VectorCall: |
362 | case CallingConv::Intel_OCL_BI: |
363 | return isTargetWin64(); |
364 | // This convention allows using the Win64 convention on other targets. |
365 | case CallingConv::Win64: |
366 | return true; |
367 | // This convention allows using the SysV convention on Windows targets. |
368 | case CallingConv::X86_64_SysV: |
369 | return false; |
370 | // Otherwise, who knows what this is. |
371 | default: |
372 | return false; |
373 | } |
374 | } |
375 | |
376 | /// Classify a global variable reference for the current subtarget according |
377 | /// to how we should reference it in a non-pcrel context. |
378 | unsigned char classifyLocalReference(const GlobalValue *GV) const; |
379 | |
380 | unsigned char classifyGlobalReference(const GlobalValue *GV, |
381 | const Module &M) const; |
382 | unsigned char classifyGlobalReference(const GlobalValue *GV) const; |
383 | |
384 | /// Classify a global function reference for the current subtarget. |
385 | unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, |
386 | const Module &M) const; |
387 | unsigned char |
388 | classifyGlobalFunctionReference(const GlobalValue *GV) const override; |
389 | |
390 | /// Classify a blockaddress reference for the current subtarget according to |
391 | /// how we should reference it in a non-pcrel context. |
392 | unsigned char classifyBlockAddressReference() const; |
393 | |
394 | /// Return true if the subtarget allows calls to immediate address. |
395 | bool isLegalToCallImmediateAddr() const; |
396 | |
397 | /// Return whether FrameLowering should always set the "extended frame |
398 | /// present" bit in FP, or set it based on a symbol in the runtime. |
399 | bool swiftAsyncContextIsDynamicallySet() const { |
400 | // Older OS versions (particularly system unwinders) are confused by the |
401 | // Swift extended frame, so when building code that might be run on them we |
402 | // must dynamically query the concurrency library to determine whether |
403 | // extended frames should be flagged as present. |
404 | const Triple &TT = getTargetTriple(); |
405 | |
406 | unsigned Major = TT.getOSVersion().getMajor(); |
407 | switch(TT.getOS()) { |
408 | default: |
409 | return false; |
410 | case Triple::IOS: |
411 | case Triple::TvOS: |
412 | return Major < 15; |
413 | case Triple::WatchOS: |
414 | return Major < 8; |
415 | case Triple::MacOSX: |
416 | case Triple::Darwin: |
417 | return Major < 12; |
418 | } |
419 | } |
420 | |
421 | /// If we are using indirect thunks, we need to expand indirectbr to avoid it |
422 | /// lowering to an actual indirect jump. |
423 | bool enableIndirectBrExpand() const override { |
424 | return useIndirectThunkBranches(); |
425 | } |
426 | |
427 | /// Enable the MachineScheduler pass for all X86 subtargets. |
428 | bool enableMachineScheduler() const override { return true; } |
429 | |
430 | bool enableEarlyIfConversion() const override; |
431 | |
432 | void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> |
433 | &Mutations) const override; |
434 | |
435 | AntiDepBreakMode getAntiDepBreakMode() const override { |
436 | return TargetSubtargetInfo::ANTIDEP_CRITICAL; |
437 | } |
438 | }; |
439 | |
440 | } // end namespace llvm |
441 | |
442 | #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
443 | |