1 | //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the X86 implementation of the TargetRegisterInfo class. |
10 | // This file is responsible for the frame pointer elimination optimization |
11 | // on X86. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "X86RegisterInfo.h" |
16 | #include "X86FrameLowering.h" |
17 | #include "X86MachineFunctionInfo.h" |
18 | #include "X86Subtarget.h" |
19 | #include "llvm/ADT/BitVector.h" |
20 | #include "llvm/ADT/STLExtras.h" |
21 | #include "llvm/ADT/SmallSet.h" |
22 | #include "llvm/CodeGen/LiveRegMatrix.h" |
23 | #include "llvm/CodeGen/MachineFrameInfo.h" |
24 | #include "llvm/CodeGen/MachineFunction.h" |
25 | #include "llvm/CodeGen/MachineFunctionPass.h" |
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
27 | #include "llvm/CodeGen/TargetFrameLowering.h" |
28 | #include "llvm/CodeGen/TargetInstrInfo.h" |
29 | #include "llvm/CodeGen/TileShapeInfo.h" |
30 | #include "llvm/CodeGen/VirtRegMap.h" |
31 | #include "llvm/IR/Constants.h" |
32 | #include "llvm/IR/Function.h" |
33 | #include "llvm/IR/Type.h" |
34 | #include "llvm/Support/CommandLine.h" |
35 | #include "llvm/Support/ErrorHandling.h" |
36 | #include "llvm/Target/TargetMachine.h" |
37 | #include "llvm/Target/TargetOptions.h" |
38 | |
39 | using namespace llvm; |
40 | |
41 | #define GET_REGINFO_TARGET_DESC |
42 | #include "X86GenRegisterInfo.inc" |
43 | |
44 | static cl::opt<bool> |
45 | EnableBasePointer("x86-use-base-pointer" , cl::Hidden, cl::init(true), |
46 | cl::desc("Enable use of a base pointer for complex stack frames" )); |
47 | |
48 | X86RegisterInfo::X86RegisterInfo(const Triple &TT) |
49 | : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), |
50 | X86_MC::getDwarfRegFlavour(TT, false), |
51 | X86_MC::getDwarfRegFlavour(TT, true), |
52 | (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { |
53 | X86_MC::initLLVMToSEHAndCVRegMapping(this); |
54 | |
55 | // Cache some information. |
56 | Is64Bit = TT.isArch64Bit(); |
57 | IsWin64 = Is64Bit && TT.isOSWindows(); |
58 | |
59 | // Use a callee-saved register as the base pointer. These registers must |
60 | // not conflict with any ABI requirements. For example, in 32-bit mode PIC |
61 | // requires GOT in the EBX register before function calls via PLT GOT pointer. |
62 | if (Is64Bit) { |
63 | SlotSize = 8; |
64 | // This matches the simplified 32-bit pointer code in the data layout |
65 | // computation. |
66 | // FIXME: Should use the data layout? |
67 | bool Use64BitReg = !TT.isX32(); |
68 | StackPtr = Use64BitReg ? X86::RSP : X86::ESP; |
69 | FramePtr = Use64BitReg ? X86::RBP : X86::EBP; |
70 | BasePtr = Use64BitReg ? X86::RBX : X86::EBX; |
71 | } else { |
72 | SlotSize = 4; |
73 | StackPtr = X86::ESP; |
74 | FramePtr = X86::EBP; |
75 | BasePtr = X86::ESI; |
76 | } |
77 | } |
78 | |
79 | int |
80 | X86RegisterInfo::getSEHRegNum(unsigned i) const { |
81 | return getEncodingValue(i); |
82 | } |
83 | |
84 | const TargetRegisterClass * |
85 | X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, |
86 | unsigned Idx) const { |
87 | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
88 | // It behaves just like the sub_8bit_hi index. |
89 | if (!Is64Bit && Idx == X86::sub_8bit) |
90 | Idx = X86::sub_8bit_hi; |
91 | |
92 | // Forward to TableGen's default version. |
93 | return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); |
94 | } |
95 | |
96 | const TargetRegisterClass * |
97 | X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, |
98 | const TargetRegisterClass *B, |
99 | unsigned SubIdx) const { |
100 | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
101 | if (!Is64Bit && SubIdx == X86::sub_8bit) { |
102 | A = X86GenRegisterInfo::getSubClassWithSubReg(RC: A, X86::Idx: sub_8bit_hi); |
103 | if (!A) |
104 | return nullptr; |
105 | } |
106 | return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); |
107 | } |
108 | |
109 | const TargetRegisterClass * |
110 | X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, |
111 | const MachineFunction &MF) const { |
112 | // Don't allow super-classes of GR8_NOREX. This class is only used after |
113 | // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied |
114 | // to the full GR8 register class in 64-bit mode, so we cannot allow the |
115 | // reigster class inflation. |
116 | // |
117 | // The GR8_NOREX class is always used in a way that won't be constrained to a |
118 | // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the |
119 | // full GR8 class. |
120 | if (RC == &X86::GR8_NOREXRegClass) |
121 | return RC; |
122 | |
123 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
124 | |
125 | const TargetRegisterClass *Super = RC; |
126 | TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); |
127 | do { |
128 | switch (Super->getID()) { |
129 | case X86::FR32RegClassID: |
130 | case X86::FR64RegClassID: |
131 | // If AVX-512 isn't supported we should only inflate to these classes. |
132 | if (!Subtarget.hasAVX512() && |
133 | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
134 | return Super; |
135 | break; |
136 | case X86::VR128RegClassID: |
137 | case X86::VR256RegClassID: |
138 | // If VLX isn't supported we should only inflate to these classes. |
139 | if (!Subtarget.hasVLX() && |
140 | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
141 | return Super; |
142 | break; |
143 | case X86::VR128XRegClassID: |
144 | case X86::VR256XRegClassID: |
145 | // If VLX isn't support we shouldn't inflate to these classes. |
146 | if (Subtarget.hasVLX() && |
147 | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
148 | return Super; |
149 | break; |
150 | case X86::FR32XRegClassID: |
151 | case X86::FR64XRegClassID: |
152 | // If AVX-512 isn't support we shouldn't inflate to these classes. |
153 | if (Subtarget.hasAVX512() && |
154 | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
155 | return Super; |
156 | break; |
157 | case X86::GR8RegClassID: |
158 | case X86::GR16RegClassID: |
159 | case X86::GR32RegClassID: |
160 | case X86::GR64RegClassID: |
161 | case X86::GR8_NOREX2RegClassID: |
162 | case X86::GR16_NOREX2RegClassID: |
163 | case X86::GR32_NOREX2RegClassID: |
164 | case X86::GR64_NOREX2RegClassID: |
165 | case X86::RFP32RegClassID: |
166 | case X86::RFP64RegClassID: |
167 | case X86::RFP80RegClassID: |
168 | case X86::VR512_0_15RegClassID: |
169 | case X86::VR512RegClassID: |
170 | // Don't return a super-class that would shrink the spill size. |
171 | // That can happen with the vector and float classes. |
172 | if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
173 | return Super; |
174 | } |
175 | Super = *I++; |
176 | } while (Super); |
177 | return RC; |
178 | } |
179 | |
180 | const TargetRegisterClass * |
181 | X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, |
182 | unsigned Kind) const { |
183 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
184 | switch (Kind) { |
185 | default: llvm_unreachable("Unexpected Kind in getPointerRegClass!" ); |
186 | case 0: // Normal GPRs. |
187 | if (Subtarget.isTarget64BitLP64()) |
188 | return &X86::GR64RegClass; |
189 | // If the target is 64bit but we have been told to use 32bit addresses, |
190 | // we can still use 64-bit register as long as we know the high bits |
191 | // are zeros. |
192 | // Reflect that in the returned register class. |
193 | if (Is64Bit) { |
194 | // When the target also allows 64-bit frame pointer and we do have a |
195 | // frame, this is fine to use it for the address accesses as well. |
196 | const X86FrameLowering *TFI = getFrameLowering(MF); |
197 | return TFI->hasFP(MF) && TFI->Uses64BitFramePtr |
198 | ? &X86::LOW32_ADDR_ACCESS_RBPRegClass |
199 | : &X86::LOW32_ADDR_ACCESSRegClass; |
200 | } |
201 | return &X86::GR32RegClass; |
202 | case 1: // Normal GPRs except the stack pointer (for encoding reasons). |
203 | if (Subtarget.isTarget64BitLP64()) |
204 | return &X86::GR64_NOSPRegClass; |
205 | // NOSP does not contain RIP, so no special case here. |
206 | return &X86::GR32_NOSPRegClass; |
207 | case 2: // NOREX GPRs. |
208 | if (Subtarget.isTarget64BitLP64()) |
209 | return &X86::GR64_NOREXRegClass; |
210 | return &X86::GR32_NOREXRegClass; |
211 | case 3: // NOREX GPRs except the stack pointer (for encoding reasons). |
212 | if (Subtarget.isTarget64BitLP64()) |
213 | return &X86::GR64_NOREX_NOSPRegClass; |
214 | // NOSP does not contain RIP, so no special case here. |
215 | return &X86::GR32_NOREX_NOSPRegClass; |
216 | case 4: // Available for tailcall (not callee-saved GPRs). |
217 | return getGPRsForTailCall(MF); |
218 | } |
219 | } |
220 | |
221 | bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, |
222 | unsigned DefSubReg, |
223 | const TargetRegisterClass *SrcRC, |
224 | unsigned SrcSubReg) const { |
225 | // Prevent rewriting a copy where the destination size is larger than the |
226 | // input size. See PR41619. |
227 | // FIXME: Should this be factored into the base implementation somehow. |
228 | if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 && |
229 | SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit) |
230 | return false; |
231 | |
232 | return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, |
233 | SrcRC, SrcSubReg); |
234 | } |
235 | |
236 | const TargetRegisterClass * |
237 | X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { |
238 | const Function &F = MF.getFunction(); |
239 | if (IsWin64 || (F.getCallingConv() == CallingConv::Win64)) |
240 | return &X86::GR64_TCW64RegClass; |
241 | else if (Is64Bit) |
242 | return &X86::GR64_TCRegClass; |
243 | |
244 | bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE); |
245 | if (hasHipeCC) |
246 | return &X86::GR32RegClass; |
247 | return &X86::GR32_TCRegClass; |
248 | } |
249 | |
250 | const TargetRegisterClass * |
251 | X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { |
252 | if (RC == &X86::CCRRegClass) { |
253 | if (Is64Bit) |
254 | return &X86::GR64RegClass; |
255 | else |
256 | return &X86::GR32RegClass; |
257 | } |
258 | return RC; |
259 | } |
260 | |
261 | unsigned |
262 | X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, |
263 | MachineFunction &MF) const { |
264 | const X86FrameLowering *TFI = getFrameLowering(MF); |
265 | |
266 | unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; |
267 | switch (RC->getID()) { |
268 | default: |
269 | return 0; |
270 | case X86::GR32RegClassID: |
271 | return 4 - FPDiff; |
272 | case X86::GR64RegClassID: |
273 | return 12 - FPDiff; |
274 | case X86::VR128RegClassID: |
275 | return Is64Bit ? 10 : 4; |
276 | case X86::VR64RegClassID: |
277 | return 4; |
278 | } |
279 | } |
280 | |
281 | const MCPhysReg * |
282 | X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { |
283 | assert(MF && "MachineFunction required" ); |
284 | |
285 | const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); |
286 | const Function &F = MF->getFunction(); |
287 | bool HasSSE = Subtarget.hasSSE1(); |
288 | bool HasAVX = Subtarget.hasAVX(); |
289 | bool HasAVX512 = Subtarget.hasAVX512(); |
290 | bool CallsEHReturn = MF->callsEHReturn(); |
291 | |
292 | CallingConv::ID CC = F.getCallingConv(); |
293 | |
294 | // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling |
295 | // convention because it has the CSR list. |
296 | if (MF->getFunction().hasFnAttribute(Kind: "no_caller_saved_registers" )) |
297 | CC = CallingConv::X86_INTR; |
298 | |
299 | // If atribute specified, override the CSRs normally specified by the |
300 | // calling convention and use the empty set instead. |
301 | if (MF->getFunction().hasFnAttribute("no_callee_saved_registers" )) |
302 | return CSR_NoRegs_SaveList; |
303 | |
304 | switch (CC) { |
305 | case CallingConv::GHC: |
306 | case CallingConv::HiPE: |
307 | return CSR_NoRegs_SaveList; |
308 | case CallingConv::AnyReg: |
309 | if (HasAVX) |
310 | return CSR_64_AllRegs_AVX_SaveList; |
311 | return CSR_64_AllRegs_SaveList; |
312 | case CallingConv::PreserveMost: |
313 | return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList |
314 | : CSR_64_RT_MostRegs_SaveList; |
315 | case CallingConv::PreserveAll: |
316 | if (HasAVX) |
317 | return CSR_64_RT_AllRegs_AVX_SaveList; |
318 | return CSR_64_RT_AllRegs_SaveList; |
319 | case CallingConv::PreserveNone: |
320 | return CSR_64_NoneRegs_SaveList; |
321 | case CallingConv::CXX_FAST_TLS: |
322 | if (Is64Bit) |
323 | return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? |
324 | CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; |
325 | break; |
326 | case CallingConv::Intel_OCL_BI: { |
327 | if (HasAVX512 && IsWin64) |
328 | return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; |
329 | if (HasAVX512 && Is64Bit) |
330 | return CSR_64_Intel_OCL_BI_AVX512_SaveList; |
331 | if (HasAVX && IsWin64) |
332 | return CSR_Win64_Intel_OCL_BI_AVX_SaveList; |
333 | if (HasAVX && Is64Bit) |
334 | return CSR_64_Intel_OCL_BI_AVX_SaveList; |
335 | if (!HasAVX && !IsWin64 && Is64Bit) |
336 | return CSR_64_Intel_OCL_BI_SaveList; |
337 | break; |
338 | } |
339 | case CallingConv::X86_RegCall: |
340 | if (Is64Bit) { |
341 | if (IsWin64) { |
342 | return (HasSSE ? CSR_Win64_RegCall_SaveList : |
343 | CSR_Win64_RegCall_NoSSE_SaveList); |
344 | } else { |
345 | return (HasSSE ? CSR_SysV64_RegCall_SaveList : |
346 | CSR_SysV64_RegCall_NoSSE_SaveList); |
347 | } |
348 | } else { |
349 | return (HasSSE ? CSR_32_RegCall_SaveList : |
350 | CSR_32_RegCall_NoSSE_SaveList); |
351 | } |
352 | case CallingConv::CFGuard_Check: |
353 | assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86" ); |
354 | return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList |
355 | : CSR_Win32_CFGuard_Check_NoSSE_SaveList); |
356 | case CallingConv::Cold: |
357 | if (Is64Bit) |
358 | return CSR_64_MostRegs_SaveList; |
359 | break; |
360 | case CallingConv::Win64: |
361 | if (!HasSSE) |
362 | return CSR_Win64_NoSSE_SaveList; |
363 | return CSR_Win64_SaveList; |
364 | case CallingConv::SwiftTail: |
365 | if (!Is64Bit) |
366 | return CSR_32_SaveList; |
367 | return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; |
368 | case CallingConv::X86_64_SysV: |
369 | if (CallsEHReturn) |
370 | return CSR_64EHRet_SaveList; |
371 | return CSR_64_SaveList; |
372 | case CallingConv::X86_INTR: |
373 | if (Is64Bit) { |
374 | if (HasAVX512) |
375 | return CSR_64_AllRegs_AVX512_SaveList; |
376 | if (HasAVX) |
377 | return CSR_64_AllRegs_AVX_SaveList; |
378 | if (HasSSE) |
379 | return CSR_64_AllRegs_SaveList; |
380 | return CSR_64_AllRegs_NoSSE_SaveList; |
381 | } else { |
382 | if (HasAVX512) |
383 | return CSR_32_AllRegs_AVX512_SaveList; |
384 | if (HasAVX) |
385 | return CSR_32_AllRegs_AVX_SaveList; |
386 | if (HasSSE) |
387 | return CSR_32_AllRegs_SSE_SaveList; |
388 | return CSR_32_AllRegs_SaveList; |
389 | } |
390 | default: |
391 | break; |
392 | } |
393 | |
394 | if (Is64Bit) { |
395 | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
396 | F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); |
397 | if (IsSwiftCC) |
398 | return IsWin64 ? CSR_Win64_SwiftError_SaveList |
399 | : CSR_64_SwiftError_SaveList; |
400 | |
401 | if (IsWin64) |
402 | return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; |
403 | if (CallsEHReturn) |
404 | return CSR_64EHRet_SaveList; |
405 | return CSR_64_SaveList; |
406 | } |
407 | |
408 | return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; |
409 | } |
410 | |
411 | const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( |
412 | const MachineFunction *MF) const { |
413 | assert(MF && "Invalid MachineFunction pointer." ); |
414 | if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
415 | MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) |
416 | return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; |
417 | return nullptr; |
418 | } |
419 | |
420 | const uint32_t * |
421 | X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, |
422 | CallingConv::ID CC) const { |
423 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
424 | bool HasSSE = Subtarget.hasSSE1(); |
425 | bool HasAVX = Subtarget.hasAVX(); |
426 | bool HasAVX512 = Subtarget.hasAVX512(); |
427 | |
428 | switch (CC) { |
429 | case CallingConv::GHC: |
430 | case CallingConv::HiPE: |
431 | return CSR_NoRegs_RegMask; |
432 | case CallingConv::AnyReg: |
433 | if (HasAVX) |
434 | return CSR_64_AllRegs_AVX_RegMask; |
435 | return CSR_64_AllRegs_RegMask; |
436 | case CallingConv::PreserveMost: |
437 | return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask; |
438 | case CallingConv::PreserveAll: |
439 | if (HasAVX) |
440 | return CSR_64_RT_AllRegs_AVX_RegMask; |
441 | return CSR_64_RT_AllRegs_RegMask; |
442 | case CallingConv::PreserveNone: |
443 | return CSR_64_NoneRegs_RegMask; |
444 | case CallingConv::CXX_FAST_TLS: |
445 | if (Is64Bit) |
446 | return CSR_64_TLS_Darwin_RegMask; |
447 | break; |
448 | case CallingConv::Intel_OCL_BI: { |
449 | if (HasAVX512 && IsWin64) |
450 | return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; |
451 | if (HasAVX512 && Is64Bit) |
452 | return CSR_64_Intel_OCL_BI_AVX512_RegMask; |
453 | if (HasAVX && IsWin64) |
454 | return CSR_Win64_Intel_OCL_BI_AVX_RegMask; |
455 | if (HasAVX && Is64Bit) |
456 | return CSR_64_Intel_OCL_BI_AVX_RegMask; |
457 | if (!HasAVX && !IsWin64 && Is64Bit) |
458 | return CSR_64_Intel_OCL_BI_RegMask; |
459 | break; |
460 | } |
461 | case CallingConv::X86_RegCall: |
462 | if (Is64Bit) { |
463 | if (IsWin64) { |
464 | return (HasSSE ? CSR_Win64_RegCall_RegMask : |
465 | CSR_Win64_RegCall_NoSSE_RegMask); |
466 | } else { |
467 | return (HasSSE ? CSR_SysV64_RegCall_RegMask : |
468 | CSR_SysV64_RegCall_NoSSE_RegMask); |
469 | } |
470 | } else { |
471 | return (HasSSE ? CSR_32_RegCall_RegMask : |
472 | CSR_32_RegCall_NoSSE_RegMask); |
473 | } |
474 | case CallingConv::CFGuard_Check: |
475 | assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86" ); |
476 | return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask |
477 | : CSR_Win32_CFGuard_Check_NoSSE_RegMask); |
478 | case CallingConv::Cold: |
479 | if (Is64Bit) |
480 | return CSR_64_MostRegs_RegMask; |
481 | break; |
482 | case CallingConv::Win64: |
483 | return CSR_Win64_RegMask; |
484 | case CallingConv::SwiftTail: |
485 | if (!Is64Bit) |
486 | return CSR_32_RegMask; |
487 | return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; |
488 | case CallingConv::X86_64_SysV: |
489 | return CSR_64_RegMask; |
490 | case CallingConv::X86_INTR: |
491 | if (Is64Bit) { |
492 | if (HasAVX512) |
493 | return CSR_64_AllRegs_AVX512_RegMask; |
494 | if (HasAVX) |
495 | return CSR_64_AllRegs_AVX_RegMask; |
496 | if (HasSSE) |
497 | return CSR_64_AllRegs_RegMask; |
498 | return CSR_64_AllRegs_NoSSE_RegMask; |
499 | } else { |
500 | if (HasAVX512) |
501 | return CSR_32_AllRegs_AVX512_RegMask; |
502 | if (HasAVX) |
503 | return CSR_32_AllRegs_AVX_RegMask; |
504 | if (HasSSE) |
505 | return CSR_32_AllRegs_SSE_RegMask; |
506 | return CSR_32_AllRegs_RegMask; |
507 | } |
508 | default: |
509 | break; |
510 | } |
511 | |
512 | // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check |
513 | // callsEHReturn(). |
514 | if (Is64Bit) { |
515 | const Function &F = MF.getFunction(); |
516 | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
517 | F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); |
518 | if (IsSwiftCC) |
519 | return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; |
520 | |
521 | return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; |
522 | } |
523 | |
524 | return CSR_32_RegMask; |
525 | } |
526 | |
527 | const uint32_t* |
528 | X86RegisterInfo::getNoPreservedMask() const { |
529 | return CSR_NoRegs_RegMask; |
530 | } |
531 | |
532 | const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { |
533 | return CSR_64_TLS_Darwin_RegMask; |
534 | } |
535 | |
536 | BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { |
537 | BitVector Reserved(getNumRegs()); |
538 | const X86FrameLowering *TFI = getFrameLowering(MF); |
539 | |
540 | // Set the floating point control register as reserved. |
541 | Reserved.set(X86::FPCW); |
542 | |
543 | // Set the floating point status register as reserved. |
544 | Reserved.set(X86::FPSW); |
545 | |
546 | // Set the SIMD floating point control register as reserved. |
547 | Reserved.set(X86::MXCSR); |
548 | |
549 | // Set the stack-pointer register and its aliases as reserved. |
550 | for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP)) |
551 | Reserved.set(SubReg); |
552 | |
553 | // Set the Shadow Stack Pointer as reserved. |
554 | Reserved.set(X86::SSP); |
555 | |
556 | // Set the instruction pointer register and its aliases as reserved. |
557 | for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP)) |
558 | Reserved.set(SubReg); |
559 | |
560 | // Set the frame-pointer register and its aliases as reserved if needed. |
561 | if (TFI->hasFP(MF)) { |
562 | for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) |
563 | Reserved.set(SubReg); |
564 | } |
565 | |
566 | // Set the base-pointer register and its aliases as reserved if needed. |
567 | if (hasBasePointer(MF)) { |
568 | CallingConv::ID CC = MF.getFunction().getCallingConv(); |
569 | const uint32_t *RegMask = getCallPreservedMask(MF, CC); |
570 | if (MachineOperand::clobbersPhysReg(RegMask, PhysReg: getBaseRegister())) |
571 | report_fatal_error( |
572 | reason: "Stack realignment in presence of dynamic allocas is not supported with" |
573 | "this calling convention." ); |
574 | |
575 | Register BasePtr = getX86SubSuperRegister(Reg: getBaseRegister(), Size: 64); |
576 | for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr)) |
577 | Reserved.set(SubReg); |
578 | } |
579 | |
580 | // Mark the segment registers as reserved. |
581 | Reserved.set(X86::CS); |
582 | Reserved.set(X86::SS); |
583 | Reserved.set(X86::DS); |
584 | Reserved.set(X86::ES); |
585 | Reserved.set(X86::FS); |
586 | Reserved.set(X86::GS); |
587 | |
588 | // Mark the floating point stack registers as reserved. |
589 | for (unsigned n = 0; n != 8; ++n) |
590 | Reserved.set(X86::ST0 + n); |
591 | |
592 | // Reserve the registers that only exist in 64-bit mode. |
593 | if (!Is64Bit) { |
594 | // These 8-bit registers are part of the x86-64 extension even though their |
595 | // super-registers are old 32-bits. |
596 | Reserved.set(X86::SIL); |
597 | Reserved.set(X86::DIL); |
598 | Reserved.set(X86::BPL); |
599 | Reserved.set(X86::SPL); |
600 | Reserved.set(X86::SIH); |
601 | Reserved.set(X86::DIH); |
602 | Reserved.set(X86::BPH); |
603 | Reserved.set(X86::SPH); |
604 | |
605 | for (unsigned n = 0; n != 8; ++n) { |
606 | // R8, R9, ... |
607 | for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) |
608 | Reserved.set(*AI); |
609 | |
610 | // XMM8, XMM9, ... |
611 | for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) |
612 | Reserved.set(*AI); |
613 | } |
614 | } |
615 | if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { |
616 | for (unsigned n = 0; n != 16; ++n) { |
617 | for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid(); |
618 | ++AI) |
619 | Reserved.set(*AI); |
620 | } |
621 | } |
622 | |
623 | // Reserve the extended general purpose registers. |
624 | if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR()) |
625 | Reserved.set(X86::R16, X86::R31WH + 1); |
626 | |
627 | if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { |
628 | for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI) |
629 | Reserved.set(*AI); |
630 | for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI) |
631 | Reserved.set(*AI); |
632 | } |
633 | |
634 | assert(checkAllSuperRegsMarked(Reserved, |
635 | {X86::SIL, X86::DIL, X86::BPL, X86::SPL, |
636 | X86::SIH, X86::DIH, X86::BPH, X86::SPH})); |
637 | return Reserved; |
638 | } |
639 | |
640 | unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const { |
641 | // All existing Intel CPUs that support AMX support AVX512 and all existing |
642 | // Intel CPUs that support APX support AMX. AVX512 implies AVX. |
643 | // |
644 | // We enumerate the registers in X86GenRegisterInfo.inc in this order: |
645 | // |
646 | // Registers before AVX512, |
647 | // AVX512 registers (X/YMM16-31, ZMM0-31, K registers) |
648 | // AMX registers (TMM) |
649 | // APX registers (R16-R31) |
650 | // |
651 | // and try to return the minimum number of registers supported by the target. |
652 | assert((X86::R15WH + 1 == X86 ::YMM0) && (X86::YMM15 + 1 == X86::K0) && |
653 | (X86::K6_K7 + 1 == X86::TMMCFG) && (X86::TMM7 + 1 == X86::R16) && |
654 | (X86::R31WH + 1 == X86::NUM_TARGET_REGS) && |
655 | "Register number may be incorrect" ); |
656 | |
657 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
658 | if (ST.hasEGPR()) |
659 | return X86::NUM_TARGET_REGS; |
660 | if (ST.hasAMXTILE()) |
661 | return X86::TMM7 + 1; |
662 | if (ST.hasAVX512()) |
663 | return X86::K6_K7 + 1; |
664 | if (ST.hasAVX()) |
665 | return X86::YMM15 + 1; |
666 | return X86::R15WH + 1; |
667 | } |
668 | |
669 | bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, |
670 | MCRegister Reg) const { |
671 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
672 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
673 | auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { |
674 | return TRI.isSuperOrSubRegisterEq(RegA, RegB); |
675 | }; |
676 | |
677 | if (!ST.is64Bit()) |
678 | return llvm::any_of( |
679 | SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX}, |
680 | [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || |
681 | (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); |
682 | |
683 | CallingConv::ID CC = MF.getFunction().getCallingConv(); |
684 | |
685 | if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) |
686 | return true; |
687 | |
688 | if (llvm::any_of( |
689 | SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9}, |
690 | [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
691 | return true; |
692 | |
693 | if (CC != CallingConv::Win64 && |
694 | llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI}, |
695 | [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
696 | return true; |
697 | |
698 | if (ST.hasSSE1() && |
699 | llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2, |
700 | X86::XMM3, X86::XMM4, X86::XMM5, |
701 | X86::XMM6, X86::XMM7}, |
702 | [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
703 | return true; |
704 | |
705 | return X86GenRegisterInfo::isArgumentRegister(MF, Reg); |
706 | } |
707 | |
708 | bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, |
709 | MCRegister PhysReg) const { |
710 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
711 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
712 | |
713 | // Stack pointer. |
714 | if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg)) |
715 | return true; |
716 | |
717 | // Don't use the frame pointer if it's being used. |
718 | const X86FrameLowering &TFI = *getFrameLowering(MF); |
719 | if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg)) |
720 | return true; |
721 | |
722 | return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); |
723 | } |
724 | |
725 | bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { |
726 | return RC->getID() == X86::TILERegClassID; |
727 | } |
728 | |
729 | void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { |
730 | // Check if the EFLAGS register is marked as live-out. This shouldn't happen, |
731 | // because the calling convention defines the EFLAGS register as NOT |
732 | // preserved. |
733 | // |
734 | // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding |
735 | // an assert to track this and clear the register afterwards to avoid |
736 | // unnecessary crashes during release builds. |
737 | assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && |
738 | "EFLAGS are not live-out from a patchpoint." ); |
739 | |
740 | // Also clean other registers that don't need preserving (IP). |
741 | for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) |
742 | Mask[Reg / 32] &= ~(1U << (Reg % 32)); |
743 | } |
744 | |
745 | //===----------------------------------------------------------------------===// |
746 | // Stack Frame Processing methods |
747 | //===----------------------------------------------------------------------===// |
748 | |
749 | static bool CantUseSP(const MachineFrameInfo &MFI) { |
750 | return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); |
751 | } |
752 | |
753 | bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { |
754 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
755 | // We have a virtual register to reference argument, and don't need base |
756 | // pointer. |
757 | if (X86FI->getStackPtrSaveMI() != nullptr) |
758 | return false; |
759 | |
760 | if (X86FI->hasPreallocatedCall()) |
761 | return true; |
762 | |
763 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
764 | |
765 | if (!EnableBasePointer) |
766 | return false; |
767 | |
768 | // When we need stack realignment, we can't address the stack from the frame |
769 | // pointer. When we have dynamic allocas or stack-adjusting inline asm, we |
770 | // can't address variables from the stack pointer. MS inline asm can |
771 | // reference locals while also adjusting the stack pointer. When we can't |
772 | // use both the SP and the FP, we need a separate base pointer register. |
773 | bool CantUseFP = hasStackRealignment(MF); |
774 | return CantUseFP && CantUseSP(MFI); |
775 | } |
776 | |
777 | bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { |
778 | if (!TargetRegisterInfo::canRealignStack(MF)) |
779 | return false; |
780 | |
781 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
782 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
783 | |
784 | // Stack realignment requires a frame pointer. If we already started |
785 | // register allocation with frame pointer elimination, it is too late now. |
786 | if (!MRI->canReserveReg(PhysReg: FramePtr)) |
787 | return false; |
788 | |
789 | // If a base pointer is necessary. Check that it isn't too late to reserve |
790 | // it. |
791 | if (CantUseSP(MFI)) |
792 | return MRI->canReserveReg(PhysReg: BasePtr); |
793 | return true; |
794 | } |
795 | |
796 | bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { |
797 | if (TargetRegisterInfo::shouldRealignStack(MF)) |
798 | return true; |
799 | |
800 | return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; |
801 | } |
802 | |
803 | // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction |
804 | // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. |
805 | // TODO: In this case we should be really trying first to entirely eliminate |
806 | // this instruction which is a plain copy. |
807 | static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { |
808 | MachineInstr &MI = *II; |
809 | unsigned Opc = II->getOpcode(); |
810 | // Check if this is a LEA of the form 'lea (%esp), %ebx' |
811 | if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || |
812 | MI.getOperand(2).getImm() != 1 || |
813 | MI.getOperand(3).getReg() != X86::NoRegister || |
814 | MI.getOperand(4).getImm() != 0 || |
815 | MI.getOperand(5).getReg() != X86::NoRegister) |
816 | return false; |
817 | Register BasePtr = MI.getOperand(i: 1).getReg(); |
818 | // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will |
819 | // be replaced with a 32-bit operand MOV which will zero extend the upper |
820 | // 32-bits of the super register. |
821 | if (Opc == X86::LEA64_32r) |
822 | BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 32); |
823 | Register NewDestReg = MI.getOperand(i: 0).getReg(); |
824 | const X86InstrInfo *TII = |
825 | MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo(); |
826 | TII->copyPhysReg(MBB&: *MI.getParent(), MI: II, DL: MI.getDebugLoc(), DestReg: NewDestReg, SrcReg: BasePtr, |
827 | KillSrc: MI.getOperand(i: 1).isKill()); |
828 | MI.eraseFromParent(); |
829 | return true; |
830 | } |
831 | |
832 | static bool isFuncletReturnInstr(MachineInstr &MI) { |
833 | switch (MI.getOpcode()) { |
834 | case X86::CATCHRET: |
835 | case X86::CLEANUPRET: |
836 | return true; |
837 | default: |
838 | return false; |
839 | } |
840 | llvm_unreachable("impossible" ); |
841 | } |
842 | |
843 | void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, |
844 | unsigned FIOperandNum, |
845 | Register BaseReg, |
846 | int FIOffset) const { |
847 | MachineInstr &MI = *II; |
848 | unsigned Opc = MI.getOpcode(); |
849 | if (Opc == TargetOpcode::LOCAL_ESCAPE) { |
850 | MachineOperand &FI = MI.getOperand(i: FIOperandNum); |
851 | FI.ChangeToImmediate(ImmVal: FIOffset); |
852 | return; |
853 | } |
854 | |
855 | MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: BaseReg, isDef: false); |
856 | |
857 | // The frame index format for stackmaps and patchpoints is different from the |
858 | // X86 format. It only has a FI and an offset. |
859 | if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { |
860 | assert(BasePtr == FramePtr && "Expected the FP as base register" ); |
861 | int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset; |
862 | MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset); |
863 | return; |
864 | } |
865 | |
866 | if (MI.getOperand(i: FIOperandNum + 3).isImm()) { |
867 | // Offset is a 32-bit integer. |
868 | int Imm = (int)(MI.getOperand(i: FIOperandNum + 3).getImm()); |
869 | int Offset = FIOffset + Imm; |
870 | assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && |
871 | "Requesting 64-bit offset in 32-bit immediate!" ); |
872 | if (Offset != 0) |
873 | MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset); |
874 | } else { |
875 | // Offset is symbolic. This is extremely rare. |
876 | uint64_t Offset = |
877 | FIOffset + (uint64_t)MI.getOperand(i: FIOperandNum + 3).getOffset(); |
878 | MI.getOperand(i: FIOperandNum + 3).setOffset(Offset); |
879 | } |
880 | } |
881 | |
882 | bool |
883 | X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, |
884 | int SPAdj, unsigned FIOperandNum, |
885 | RegScavenger *RS) const { |
886 | MachineInstr &MI = *II; |
887 | MachineBasicBlock &MBB = *MI.getParent(); |
888 | MachineFunction &MF = *MBB.getParent(); |
889 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
890 | bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false |
891 | : isFuncletReturnInstr(MI&: *MBBI); |
892 | const X86FrameLowering *TFI = getFrameLowering(MF); |
893 | int FrameIndex = MI.getOperand(i: FIOperandNum).getIndex(); |
894 | |
895 | // Determine base register and offset. |
896 | int FIOffset; |
897 | Register BasePtr; |
898 | if (MI.isReturn()) { |
899 | assert((!hasStackRealignment(MF) || |
900 | MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && |
901 | "Return instruction can only reference SP relative frame objects" ); |
902 | FIOffset = |
903 | TFI->getFrameIndexReferenceSP(MF, FI: FrameIndex, SPReg&: BasePtr, Adjustment: 0).getFixed(); |
904 | } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { |
905 | FIOffset = TFI->getWin64EHFrameIndexRef(MF, FI: FrameIndex, SPReg&: BasePtr); |
906 | } else { |
907 | FIOffset = TFI->getFrameIndexReference(MF, FI: FrameIndex, FrameReg&: BasePtr).getFixed(); |
908 | } |
909 | |
910 | // LOCAL_ESCAPE uses a single offset, with no register. It only works in the |
911 | // simple FP case, and doesn't work with stack realignment. On 32-bit, the |
912 | // offset is from the traditional base pointer location. On 64-bit, the |
913 | // offset is from the SP at the end of the prologue, not the FP location. This |
914 | // matches the behavior of llvm.frameaddress. |
915 | unsigned Opc = MI.getOpcode(); |
916 | if (Opc == TargetOpcode::LOCAL_ESCAPE) { |
917 | MachineOperand &FI = MI.getOperand(i: FIOperandNum); |
918 | FI.ChangeToImmediate(ImmVal: FIOffset); |
919 | return false; |
920 | } |
921 | |
922 | // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit |
923 | // register as source operand, semantic is the same and destination is |
924 | // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. |
925 | // Don't change BasePtr since it is used later for stack adjustment. |
926 | Register MachineBasePtr = BasePtr; |
927 | if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) |
928 | MachineBasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64); |
929 | |
930 | // This must be part of a four operand memory reference. Replace the |
931 | // FrameIndex with base register. Add an offset to the offset. |
932 | MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: MachineBasePtr, isDef: false); |
933 | |
934 | if (BasePtr == StackPtr) |
935 | FIOffset += SPAdj; |
936 | |
937 | // The frame index format for stackmaps and patchpoints is different from the |
938 | // X86 format. It only has a FI and an offset. |
939 | if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { |
940 | assert(BasePtr == FramePtr && "Expected the FP as base register" ); |
941 | int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset; |
942 | MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset); |
943 | return false; |
944 | } |
945 | |
946 | if (MI.getOperand(i: FIOperandNum+3).isImm()) { |
947 | // Offset is a 32-bit integer. |
948 | int Imm = (int)(MI.getOperand(i: FIOperandNum + 3).getImm()); |
949 | int Offset = FIOffset + Imm; |
950 | assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && |
951 | "Requesting 64-bit offset in 32-bit immediate!" ); |
952 | if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) |
953 | MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset); |
954 | } else { |
955 | // Offset is symbolic. This is extremely rare. |
956 | uint64_t Offset = FIOffset + |
957 | (uint64_t)MI.getOperand(i: FIOperandNum+3).getOffset(); |
958 | MI.getOperand(i: FIOperandNum + 3).setOffset(Offset); |
959 | } |
960 | return false; |
961 | } |
962 | |
963 | unsigned X86RegisterInfo::findDeadCallerSavedReg( |
964 | MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { |
965 | const MachineFunction *MF = MBB.getParent(); |
966 | if (MF->callsEHReturn()) |
967 | return 0; |
968 | |
969 | const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(MF: *MF); |
970 | |
971 | if (MBBI == MBB.end()) |
972 | return 0; |
973 | |
974 | switch (MBBI->getOpcode()) { |
975 | default: |
976 | return 0; |
977 | case TargetOpcode::PATCHABLE_RET: |
978 | case X86::RET: |
979 | case X86::RET32: |
980 | case X86::RET64: |
981 | case X86::RETI32: |
982 | case X86::RETI64: |
983 | case X86::TCRETURNdi: |
984 | case X86::TCRETURNri: |
985 | case X86::TCRETURNmi: |
986 | case X86::TCRETURNdi64: |
987 | case X86::TCRETURNri64: |
988 | case X86::TCRETURNmi64: |
989 | case X86::EH_RETURN: |
990 | case X86::EH_RETURN64: { |
991 | SmallSet<uint16_t, 8> Uses; |
992 | for (MachineOperand &MO : MBBI->operands()) { |
993 | if (!MO.isReg() || MO.isDef()) |
994 | continue; |
995 | Register Reg = MO.getReg(); |
996 | if (!Reg) |
997 | continue; |
998 | for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) |
999 | Uses.insert(V: *AI); |
1000 | } |
1001 | |
1002 | for (auto CS : AvailableRegs) |
1003 | if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP) |
1004 | return CS; |
1005 | } |
1006 | } |
1007 | |
1008 | return 0; |
1009 | } |
1010 | |
1011 | Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { |
1012 | const X86FrameLowering *TFI = getFrameLowering(MF); |
1013 | return TFI->hasFP(MF) ? FramePtr : StackPtr; |
1014 | } |
1015 | |
1016 | unsigned |
1017 | X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { |
1018 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
1019 | Register FrameReg = getFrameRegister(MF); |
1020 | if (Subtarget.isTarget64BitILP32()) |
1021 | FrameReg = getX86SubSuperRegister(Reg: FrameReg, Size: 32); |
1022 | return FrameReg; |
1023 | } |
1024 | |
1025 | unsigned |
1026 | X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { |
1027 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
1028 | Register StackReg = getStackRegister(); |
1029 | if (Subtarget.isTarget64BitILP32()) |
1030 | StackReg = getX86SubSuperRegister(Reg: StackReg, Size: 32); |
1031 | return StackReg; |
1032 | } |
1033 | |
1034 | static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, |
1035 | const MachineRegisterInfo *MRI) { |
1036 | if (VRM->hasShape(virtReg: VirtReg)) |
1037 | return VRM->getShape(virtReg: VirtReg); |
1038 | |
1039 | const MachineOperand &Def = *MRI->def_begin(RegNo: VirtReg); |
1040 | MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent()); |
1041 | unsigned OpCode = MI->getOpcode(); |
1042 | switch (OpCode) { |
1043 | default: |
1044 | llvm_unreachable("Unexpected machine instruction on tile register!" ); |
1045 | break; |
1046 | case X86::COPY: { |
1047 | Register SrcReg = MI->getOperand(i: 1).getReg(); |
1048 | ShapeT Shape = getTileShape(VirtReg: SrcReg, VRM, MRI); |
1049 | VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape); |
1050 | return Shape; |
1051 | } |
1052 | // We only collect the tile shape that is defined. |
1053 | case X86::PTILELOADDV: |
1054 | case X86::PTILELOADDT1V: |
1055 | case X86::PTDPBSSDV: |
1056 | case X86::PTDPBSUDV: |
1057 | case X86::PTDPBUSDV: |
1058 | case X86::PTDPBUUDV: |
1059 | case X86::PTILEZEROV: |
1060 | case X86::PTDPBF16PSV: |
1061 | case X86::PTDPFP16PSV: |
1062 | case X86::PTCMMIMFP16PSV: |
1063 | case X86::PTCMMRLFP16PSV: |
1064 | MachineOperand &MO1 = MI->getOperand(i: 1); |
1065 | MachineOperand &MO2 = MI->getOperand(i: 2); |
1066 | ShapeT Shape(&MO1, &MO2, MRI); |
1067 | VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape); |
1068 | return Shape; |
1069 | } |
1070 | } |
1071 | |
1072 | bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, |
1073 | ArrayRef<MCPhysReg> Order, |
1074 | SmallVectorImpl<MCPhysReg> &Hints, |
1075 | const MachineFunction &MF, |
1076 | const VirtRegMap *VRM, |
1077 | const LiveRegMatrix *Matrix) const { |
1078 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
1079 | const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg); |
1080 | bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( |
1081 | VirtReg, Order, Hints, MF, VRM, Matrix); |
1082 | |
1083 | unsigned ID = RC.getID(); |
1084 | if (ID != X86::TILERegClassID) |
1085 | return BaseImplRetVal; |
1086 | |
1087 | ShapeT VirtShape = getTileShape(VirtReg, VRM: const_cast<VirtRegMap *>(VRM), MRI); |
1088 | auto AddHint = [&](MCPhysReg PhysReg) { |
1089 | Register VReg = Matrix->getOneVReg(PhysReg); |
1090 | if (VReg == MCRegister::NoRegister) { // Not allocated yet |
1091 | Hints.push_back(Elt: PhysReg); |
1092 | return; |
1093 | } |
1094 | ShapeT PhysShape = getTileShape(VirtReg: VReg, VRM: const_cast<VirtRegMap *>(VRM), MRI); |
1095 | if (PhysShape == VirtShape) |
1096 | Hints.push_back(Elt: PhysReg); |
1097 | }; |
1098 | |
1099 | SmallSet<MCPhysReg, 4> CopyHints; |
1100 | CopyHints.insert(I: Hints.begin(), E: Hints.end()); |
1101 | Hints.clear(); |
1102 | for (auto Hint : CopyHints) { |
1103 | if (RC.contains(Reg: Hint) && !MRI->isReserved(PhysReg: Hint)) |
1104 | AddHint(Hint); |
1105 | } |
1106 | for (MCPhysReg PhysReg : Order) { |
1107 | if (!CopyHints.count(V: PhysReg) && RC.contains(Reg: PhysReg) && |
1108 | !MRI->isReserved(PhysReg)) |
1109 | AddHint(PhysReg); |
1110 | } |
1111 | |
1112 | #define DEBUG_TYPE "tile-hint" |
1113 | LLVM_DEBUG({ |
1114 | dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n" ; |
1115 | for (auto Hint : Hints) { |
1116 | dbgs() << "tmm" << Hint << "," ; |
1117 | } |
1118 | dbgs() << "\n" ; |
1119 | }); |
1120 | #undef DEBUG_TYPE |
1121 | |
1122 | return true; |
1123 | } |
1124 | |