1 | //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains code to lower X86 MachineInstrs to their corresponding |
10 | // MCInst records. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "MCTargetDesc/X86ATTInstPrinter.h" |
15 | #include "MCTargetDesc/X86BaseInfo.h" |
16 | #include "MCTargetDesc/X86EncodingOptimization.h" |
17 | #include "MCTargetDesc/X86InstComments.h" |
18 | #include "MCTargetDesc/X86ShuffleDecode.h" |
19 | #include "MCTargetDesc/X86TargetStreamer.h" |
20 | #include "X86AsmPrinter.h" |
21 | #include "X86MachineFunctionInfo.h" |
22 | #include "X86RegisterInfo.h" |
23 | #include "X86ShuffleDecodeConstantPool.h" |
24 | #include "X86Subtarget.h" |
25 | #include "llvm/ADT/SmallString.h" |
26 | #include "llvm/ADT/StringExtras.h" |
27 | #include "llvm/CodeGen/MachineConstantPool.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineModuleInfoImpls.h" |
30 | #include "llvm/CodeGen/MachineOperand.h" |
31 | #include "llvm/CodeGen/StackMaps.h" |
32 | #include "llvm/IR/DataLayout.h" |
33 | #include "llvm/IR/GlobalValue.h" |
34 | #include "llvm/IR/Mangler.h" |
35 | #include "llvm/MC/MCAsmInfo.h" |
36 | #include "llvm/MC/MCCodeEmitter.h" |
37 | #include "llvm/MC/MCContext.h" |
38 | #include "llvm/MC/MCExpr.h" |
39 | #include "llvm/MC/MCFixup.h" |
40 | #include "llvm/MC/MCInst.h" |
41 | #include "llvm/MC/MCInstBuilder.h" |
42 | #include "llvm/MC/MCSection.h" |
43 | #include "llvm/MC/MCSectionELF.h" |
44 | #include "llvm/MC/MCStreamer.h" |
45 | #include "llvm/MC/MCSymbol.h" |
46 | #include "llvm/MC/MCSymbolELF.h" |
47 | #include "llvm/MC/TargetRegistry.h" |
48 | #include "llvm/Target/TargetLoweringObjectFile.h" |
49 | #include "llvm/Target/TargetMachine.h" |
50 | #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" |
51 | #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" |
52 | #include <string> |
53 | |
54 | using namespace llvm; |
55 | |
56 | namespace { |
57 | |
58 | /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. |
59 | class X86MCInstLower { |
60 | MCContext &Ctx; |
61 | const MachineFunction &MF; |
62 | const TargetMachine &TM; |
63 | const MCAsmInfo &MAI; |
64 | X86AsmPrinter &AsmPrinter; |
65 | |
66 | public: |
67 | X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); |
68 | |
69 | std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI, |
70 | const MachineOperand &MO) const; |
71 | void Lower(const MachineInstr *MI, MCInst &OutMI) const; |
72 | |
73 | MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; |
74 | MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; |
75 | |
76 | private: |
77 | MachineModuleInfoMachO &getMachOMMI() const; |
78 | }; |
79 | |
80 | } // end anonymous namespace |
81 | |
82 | /// A RAII helper which defines a region of instructions which can't have |
83 | /// padding added between them for correctness. |
84 | struct NoAutoPaddingScope { |
85 | MCStreamer &OS; |
86 | const bool OldAllowAutoPadding; |
87 | NoAutoPaddingScope(MCStreamer &OS) |
88 | : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { |
89 | changeAndComment(b: false); |
90 | } |
91 | ~NoAutoPaddingScope() { changeAndComment(b: OldAllowAutoPadding); } |
92 | void changeAndComment(bool b) { |
93 | if (b == OS.getAllowAutoPadding()) |
94 | return; |
95 | OS.setAllowAutoPadding(b); |
96 | if (b) |
97 | OS.emitRawComment(T: "autopadding" ); |
98 | else |
99 | OS.emitRawComment(T: "noautopadding" ); |
100 | } |
101 | }; |
102 | |
103 | // Emit a minimal sequence of nops spanning NumBytes bytes. |
104 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
105 | const X86Subtarget *Subtarget); |
106 | |
107 | void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, |
108 | const MCSubtargetInfo &STI, |
109 | MCCodeEmitter *CodeEmitter) { |
110 | if (InShadow) { |
111 | SmallString<256> Code; |
112 | SmallVector<MCFixup, 4> Fixups; |
113 | CodeEmitter->encodeInstruction(Inst, CB&: Code, Fixups, STI); |
114 | CurrentShadowSize += Code.size(); |
115 | if (CurrentShadowSize >= RequiredShadowSize) |
116 | InShadow = false; // The shadow is big enough. Stop counting. |
117 | } |
118 | } |
119 | |
120 | void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( |
121 | MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { |
122 | if (InShadow && CurrentShadowSize < RequiredShadowSize) { |
123 | InShadow = false; |
124 | emitX86Nops(OS&: OutStreamer, NumBytes: RequiredShadowSize - CurrentShadowSize, |
125 | Subtarget: &MF->getSubtarget<X86Subtarget>()); |
126 | } |
127 | } |
128 | |
129 | void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { |
130 | OutStreamer->emitInstruction(Inst, STI: getSubtargetInfo()); |
131 | SMShadowTracker.count(Inst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
132 | } |
133 | |
134 | X86MCInstLower::X86MCInstLower(const MachineFunction &mf, |
135 | X86AsmPrinter &asmprinter) |
136 | : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), |
137 | AsmPrinter(asmprinter) {} |
138 | |
139 | MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { |
140 | return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); |
141 | } |
142 | |
143 | /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol |
144 | /// operand to an MCSymbol. |
145 | MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { |
146 | const Triple &TT = TM.getTargetTriple(); |
147 | if (MO.isGlobal() && TT.isOSBinFormatELF()) |
148 | return AsmPrinter.getSymbolPreferLocal(GV: *MO.getGlobal()); |
149 | |
150 | const DataLayout &DL = MF.getDataLayout(); |
151 | assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && |
152 | "Isn't a symbol reference" ); |
153 | |
154 | MCSymbol *Sym = nullptr; |
155 | SmallString<128> Name; |
156 | StringRef Suffix; |
157 | |
158 | switch (MO.getTargetFlags()) { |
159 | case X86II::MO_DLLIMPORT: |
160 | // Handle dllimport linkage. |
161 | Name += "__imp_" ; |
162 | break; |
163 | case X86II::MO_COFFSTUB: |
164 | Name += ".refptr." ; |
165 | break; |
166 | case X86II::MO_DARWIN_NONLAZY: |
167 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
168 | Suffix = "$non_lazy_ptr" ; |
169 | break; |
170 | } |
171 | |
172 | if (!Suffix.empty()) |
173 | Name += DL.getPrivateGlobalPrefix(); |
174 | |
175 | if (MO.isGlobal()) { |
176 | const GlobalValue *GV = MO.getGlobal(); |
177 | AsmPrinter.getNameWithPrefix(Name, GV); |
178 | } else if (MO.isSymbol()) { |
179 | Mangler::getNameWithPrefix(OutName&: Name, GVName: MO.getSymbolName(), DL); |
180 | } else if (MO.isMBB()) { |
181 | assert(Suffix.empty()); |
182 | Sym = MO.getMBB()->getSymbol(); |
183 | } |
184 | |
185 | Name += Suffix; |
186 | if (!Sym) |
187 | Sym = Ctx.getOrCreateSymbol(Name); |
188 | |
189 | // If the target flags on the operand changes the name of the symbol, do that |
190 | // before we return the symbol. |
191 | switch (MO.getTargetFlags()) { |
192 | default: |
193 | break; |
194 | case X86II::MO_COFFSTUB: { |
195 | MachineModuleInfoCOFF &MMICOFF = |
196 | MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>(); |
197 | MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); |
198 | if (!StubSym.getPointer()) { |
199 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
200 | StubSym = MachineModuleInfoImpl::StubValueTy( |
201 | AsmPrinter.getSymbol(GV: MO.getGlobal()), true); |
202 | } |
203 | break; |
204 | } |
205 | case X86II::MO_DARWIN_NONLAZY: |
206 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { |
207 | MachineModuleInfoImpl::StubValueTy &StubSym = |
208 | getMachOMMI().getGVStubEntry(Sym); |
209 | if (!StubSym.getPointer()) { |
210 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
211 | StubSym = MachineModuleInfoImpl::StubValueTy( |
212 | AsmPrinter.getSymbol(GV: MO.getGlobal()), |
213 | !MO.getGlobal()->hasInternalLinkage()); |
214 | } |
215 | break; |
216 | } |
217 | } |
218 | |
219 | return Sym; |
220 | } |
221 | |
222 | MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, |
223 | MCSymbol *Sym) const { |
224 | // FIXME: We would like an efficient form for this, so we don't have to do a |
225 | // lot of extra uniquing. |
226 | const MCExpr *Expr = nullptr; |
227 | MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; |
228 | |
229 | switch (MO.getTargetFlags()) { |
230 | default: |
231 | llvm_unreachable("Unknown target flag on GV operand" ); |
232 | case X86II::MO_NO_FLAG: // No flag. |
233 | // These affect the name of the symbol, not any suffix. |
234 | case X86II::MO_DARWIN_NONLAZY: |
235 | case X86II::MO_DLLIMPORT: |
236 | case X86II::MO_COFFSTUB: |
237 | break; |
238 | |
239 | case X86II::MO_TLVP: |
240 | RefKind = MCSymbolRefExpr::VK_TLVP; |
241 | break; |
242 | case X86II::MO_TLVP_PIC_BASE: |
243 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: MCSymbolRefExpr::VK_TLVP, Ctx); |
244 | // Subtract the pic base. |
245 | Expr = MCBinaryExpr::createSub( |
246 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
247 | break; |
248 | case X86II::MO_SECREL: |
249 | RefKind = MCSymbolRefExpr::VK_SECREL; |
250 | break; |
251 | case X86II::MO_TLSGD: |
252 | RefKind = MCSymbolRefExpr::VK_TLSGD; |
253 | break; |
254 | case X86II::MO_TLSLD: |
255 | RefKind = MCSymbolRefExpr::VK_TLSLD; |
256 | break; |
257 | case X86II::MO_TLSLDM: |
258 | RefKind = MCSymbolRefExpr::VK_TLSLDM; |
259 | break; |
260 | case X86II::MO_GOTTPOFF: |
261 | RefKind = MCSymbolRefExpr::VK_GOTTPOFF; |
262 | break; |
263 | case X86II::MO_INDNTPOFF: |
264 | RefKind = MCSymbolRefExpr::VK_INDNTPOFF; |
265 | break; |
266 | case X86II::MO_TPOFF: |
267 | RefKind = MCSymbolRefExpr::VK_TPOFF; |
268 | break; |
269 | case X86II::MO_DTPOFF: |
270 | RefKind = MCSymbolRefExpr::VK_DTPOFF; |
271 | break; |
272 | case X86II::MO_NTPOFF: |
273 | RefKind = MCSymbolRefExpr::VK_NTPOFF; |
274 | break; |
275 | case X86II::MO_GOTNTPOFF: |
276 | RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; |
277 | break; |
278 | case X86II::MO_GOTPCREL: |
279 | RefKind = MCSymbolRefExpr::VK_GOTPCREL; |
280 | break; |
281 | case X86II::MO_GOTPCREL_NORELAX: |
282 | RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; |
283 | break; |
284 | case X86II::MO_GOT: |
285 | RefKind = MCSymbolRefExpr::VK_GOT; |
286 | break; |
287 | case X86II::MO_GOTOFF: |
288 | RefKind = MCSymbolRefExpr::VK_GOTOFF; |
289 | break; |
290 | case X86II::MO_PLT: |
291 | RefKind = MCSymbolRefExpr::VK_PLT; |
292 | break; |
293 | case X86II::MO_ABS8: |
294 | RefKind = MCSymbolRefExpr::VK_X86_ABS8; |
295 | break; |
296 | case X86II::MO_PIC_BASE_OFFSET: |
297 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
298 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
299 | // Subtract the pic base. |
300 | Expr = MCBinaryExpr::createSub( |
301 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
302 | if (MO.isJTI()) { |
303 | assert(MAI.doesSetDirectiveSuppressReloc()); |
304 | // If .set directive is supported, use it to reduce the number of |
305 | // relocations the assembler will generate for differences between |
306 | // local labels. This is only safe when the symbols are in the same |
307 | // section so we are restricting it to jumptable references. |
308 | MCSymbol *Label = Ctx.createTempSymbol(); |
309 | AsmPrinter.OutStreamer->emitAssignment(Symbol: Label, Value: Expr); |
310 | Expr = MCSymbolRefExpr::create(Symbol: Label, Ctx); |
311 | } |
312 | break; |
313 | } |
314 | |
315 | if (!Expr) |
316 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: RefKind, Ctx); |
317 | |
318 | if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) |
319 | Expr = MCBinaryExpr::createAdd( |
320 | LHS: Expr, RHS: MCConstantExpr::create(Value: MO.getOffset(), Ctx), Ctx); |
321 | return MCOperand::createExpr(Val: Expr); |
322 | } |
323 | |
324 | static unsigned getRetOpcode(const X86Subtarget &Subtarget) { |
325 | return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; |
326 | } |
327 | |
328 | std::optional<MCOperand> |
329 | X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, |
330 | const MachineOperand &MO) const { |
331 | switch (MO.getType()) { |
332 | default: |
333 | MI->print(OS&: errs()); |
334 | llvm_unreachable("unknown operand type" ); |
335 | case MachineOperand::MO_Register: |
336 | // Ignore all implicit register operands. |
337 | if (MO.isImplicit()) |
338 | return std::nullopt; |
339 | return MCOperand::createReg(Reg: MO.getReg()); |
340 | case MachineOperand::MO_Immediate: |
341 | return MCOperand::createImm(Val: MO.getImm()); |
342 | case MachineOperand::MO_MachineBasicBlock: |
343 | case MachineOperand::MO_GlobalAddress: |
344 | case MachineOperand::MO_ExternalSymbol: |
345 | return LowerSymbolOperand(MO, Sym: GetSymbolFromOperand(MO)); |
346 | case MachineOperand::MO_MCSymbol: |
347 | return LowerSymbolOperand(MO, Sym: MO.getMCSymbol()); |
348 | case MachineOperand::MO_JumpTableIndex: |
349 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetJTISymbol(JTID: MO.getIndex())); |
350 | case MachineOperand::MO_ConstantPoolIndex: |
351 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetCPISymbol(CPID: MO.getIndex())); |
352 | case MachineOperand::MO_BlockAddress: |
353 | return LowerSymbolOperand( |
354 | MO, Sym: AsmPrinter.GetBlockAddressSymbol(BA: MO.getBlockAddress())); |
355 | case MachineOperand::MO_RegisterMask: |
356 | // Ignore call clobbers. |
357 | return std::nullopt; |
358 | } |
359 | } |
360 | |
361 | // Replace TAILJMP opcodes with their equivalent opcodes that have encoding |
362 | // information. |
363 | static unsigned convertTailJumpOpcode(unsigned Opcode) { |
364 | switch (Opcode) { |
365 | case X86::TAILJMPr: |
366 | Opcode = X86::JMP32r; |
367 | break; |
368 | case X86::TAILJMPm: |
369 | Opcode = X86::JMP32m; |
370 | break; |
371 | case X86::TAILJMPr64: |
372 | Opcode = X86::JMP64r; |
373 | break; |
374 | case X86::TAILJMPm64: |
375 | Opcode = X86::JMP64m; |
376 | break; |
377 | case X86::TAILJMPr64_REX: |
378 | Opcode = X86::JMP64r_REX; |
379 | break; |
380 | case X86::TAILJMPm64_REX: |
381 | Opcode = X86::JMP64m_REX; |
382 | break; |
383 | case X86::TAILJMPd: |
384 | case X86::TAILJMPd64: |
385 | Opcode = X86::JMP_1; |
386 | break; |
387 | case X86::TAILJMPd_CC: |
388 | case X86::TAILJMPd64_CC: |
389 | Opcode = X86::JCC_1; |
390 | break; |
391 | } |
392 | |
393 | return Opcode; |
394 | } |
395 | |
396 | void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { |
397 | OutMI.setOpcode(MI->getOpcode()); |
398 | |
399 | for (const MachineOperand &MO : MI->operands()) |
400 | if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) |
401 | OutMI.addOperand(Op: *MaybeMCOp); |
402 | |
403 | bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); |
404 | if (X86::optimizeInstFromVEX3ToVEX2(MI&: OutMI, Desc: MI->getDesc()) || |
405 | X86::optimizeShiftRotateWithImmediateOne(MI&: OutMI) || |
406 | X86::optimizeVPCMPWithImmediateOneOrSix(MI&: OutMI) || |
407 | X86::optimizeMOVSX(MI&: OutMI) || X86::optimizeINCDEC(MI&: OutMI, In64BitMode) || |
408 | X86::optimizeMOV(MI&: OutMI, In64BitMode) || |
409 | X86::optimizeToFixedRegisterOrShortImmediateForm(MI&: OutMI)) |
410 | return; |
411 | |
412 | // Handle a few special cases to eliminate operand modifiers. |
413 | switch (OutMI.getOpcode()) { |
414 | case X86::LEA64_32r: |
415 | case X86::LEA64r: |
416 | case X86::LEA16r: |
417 | case X86::LEA32r: |
418 | // LEA should have a segment register, but it must be empty. |
419 | assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && |
420 | "Unexpected # of LEA operands" ); |
421 | assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && |
422 | "LEA has segment specified!" ); |
423 | break; |
424 | case X86::MULX32Hrr: |
425 | case X86::MULX32Hrm: |
426 | case X86::MULX64Hrr: |
427 | case X86::MULX64Hrm: { |
428 | // Turn into regular MULX by duplicating the destination. |
429 | unsigned NewOpc; |
430 | switch (OutMI.getOpcode()) { |
431 | default: llvm_unreachable("Invalid opcode" ); |
432 | case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; |
433 | case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; |
434 | case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; |
435 | case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; |
436 | } |
437 | OutMI.setOpcode(NewOpc); |
438 | // Duplicate the destination. |
439 | unsigned DestReg = OutMI.getOperand(i: 0).getReg(); |
440 | OutMI.insert(I: OutMI.begin(), Op: MCOperand::createReg(Reg: DestReg)); |
441 | break; |
442 | } |
443 | // CALL64r, CALL64pcrel32 - These instructions used to have |
444 | // register inputs modeled as normal uses instead of implicit uses. As such, |
445 | // they we used to truncate off all but the first operand (the callee). This |
446 | // issue seems to have been fixed at some point. This assert verifies that. |
447 | case X86::CALL64r: |
448 | case X86::CALL64pcrel32: |
449 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
450 | break; |
451 | case X86::EH_RETURN: |
452 | case X86::EH_RETURN64: { |
453 | OutMI = MCInst(); |
454 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
455 | break; |
456 | } |
457 | case X86::CLEANUPRET: { |
458 | // Replace CLEANUPRET with the appropriate RET. |
459 | OutMI = MCInst(); |
460 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
461 | break; |
462 | } |
463 | case X86::CATCHRET: { |
464 | // Replace CATCHRET with the appropriate RET. |
465 | const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); |
466 | unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; |
467 | OutMI = MCInst(); |
468 | OutMI.setOpcode(getRetOpcode(Subtarget)); |
469 | OutMI.addOperand(Op: MCOperand::createReg(Reg: ReturnReg)); |
470 | break; |
471 | } |
472 | // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump |
473 | // instruction. |
474 | case X86::TAILJMPr: |
475 | case X86::TAILJMPr64: |
476 | case X86::TAILJMPr64_REX: |
477 | case X86::TAILJMPd: |
478 | case X86::TAILJMPd64: |
479 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
480 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
481 | break; |
482 | case X86::TAILJMPd_CC: |
483 | case X86::TAILJMPd64_CC: |
484 | assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!" ); |
485 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
486 | break; |
487 | case X86::TAILJMPm: |
488 | case X86::TAILJMPm64: |
489 | case X86::TAILJMPm64_REX: |
490 | assert(OutMI.getNumOperands() == X86::AddrNumOperands && |
491 | "Unexpected number of operands!" ); |
492 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
493 | break; |
494 | case X86::MASKMOVDQU: |
495 | case X86::VMASKMOVDQU: |
496 | if (In64BitMode) |
497 | OutMI.setFlags(X86::IP_HAS_AD_SIZE); |
498 | break; |
499 | case X86::BSF16rm: |
500 | case X86::BSF16rr: |
501 | case X86::BSF32rm: |
502 | case X86::BSF32rr: |
503 | case X86::BSF64rm: |
504 | case X86::BSF64rr: { |
505 | // Add an REP prefix to BSF instructions so that new processors can |
506 | // recognize as TZCNT, which has better performance than BSF. |
507 | // BSF and TZCNT have different interpretations on ZF bit. So make sure |
508 | // it won't be used later. |
509 | const MachineOperand *FlagDef = |
510 | MI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); |
511 | if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) |
512 | OutMI.setFlags(X86::IP_HAS_REPEAT); |
513 | break; |
514 | } |
515 | default: |
516 | break; |
517 | } |
518 | } |
519 | |
520 | void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, |
521 | const MachineInstr &MI) { |
522 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
523 | bool Is64Bits = getSubtarget().is64Bit(); |
524 | bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64(); |
525 | MCContext &Ctx = OutStreamer->getContext(); |
526 | |
527 | MCSymbolRefExpr::VariantKind SRVK; |
528 | switch (MI.getOpcode()) { |
529 | case X86::TLS_addr32: |
530 | case X86::TLS_addr64: |
531 | case X86::TLS_addrX32: |
532 | SRVK = MCSymbolRefExpr::VK_TLSGD; |
533 | break; |
534 | case X86::TLS_base_addr32: |
535 | SRVK = MCSymbolRefExpr::VK_TLSLDM; |
536 | break; |
537 | case X86::TLS_base_addr64: |
538 | case X86::TLS_base_addrX32: |
539 | SRVK = MCSymbolRefExpr::VK_TLSLD; |
540 | break; |
541 | case X86::TLS_desc32: |
542 | case X86::TLS_desc64: |
543 | SRVK = MCSymbolRefExpr::VK_TLSDESC; |
544 | break; |
545 | default: |
546 | llvm_unreachable("unexpected opcode" ); |
547 | } |
548 | |
549 | const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( |
550 | Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: 3)), Kind: SRVK, Ctx); |
551 | |
552 | // Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD |
553 | // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is |
554 | // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by |
555 | // only using GOT when GOTPCRELX is enabled. |
556 | // TODO Delete the workaround when rustc no longer relies on the hack |
557 | bool UseGot = MMI->getModule()->getRtLibUseGOT() && |
558 | Ctx.getTargetOptions()->X86RelaxRelocations; |
559 | |
560 | if (SRVK == MCSymbolRefExpr::VK_TLSDESC) { |
561 | const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create( |
562 | Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: 3)), |
563 | Kind: MCSymbolRefExpr::VK_TLSCALL, Ctx); |
564 | EmitAndCountInstruction( |
565 | MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r) |
566 | .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) |
567 | .addReg(Is64Bits ? X86::RIP : X86::EBX) |
568 | .addImm(1) |
569 | .addReg(0) |
570 | .addExpr(Sym) |
571 | .addReg(0)); |
572 | EmitAndCountInstruction( |
573 | MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m) |
574 | .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) |
575 | .addImm(1) |
576 | .addReg(0) |
577 | .addExpr(Expr) |
578 | .addReg(0)); |
579 | } else if (Is64Bits) { |
580 | bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; |
581 | if (NeedsPadding && Is64BitsLP64) |
582 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
583 | EmitAndCountInstruction(MCInstBuilder(X86::LEA64r) |
584 | .addReg(X86::RDI) |
585 | .addReg(X86::RIP) |
586 | .addImm(1) |
587 | .addReg(0) |
588 | .addExpr(Sym) |
589 | .addReg(0)); |
590 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "__tls_get_addr" ); |
591 | if (NeedsPadding) { |
592 | if (!UseGot) |
593 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
594 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
595 | EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); |
596 | } |
597 | if (UseGot) { |
598 | const MCExpr *Expr = MCSymbolRefExpr::create( |
599 | Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOTPCREL, Ctx); |
600 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64m) |
601 | .addReg(X86::RIP) |
602 | .addImm(1) |
603 | .addReg(0) |
604 | .addExpr(Expr) |
605 | .addReg(0)); |
606 | } else { |
607 | EmitAndCountInstruction( |
608 | MCInstBuilder(X86::CALL64pcrel32) |
609 | .addExpr(MCSymbolRefExpr::create(TlsGetAddr, |
610 | MCSymbolRefExpr::VK_PLT, Ctx))); |
611 | } |
612 | } else { |
613 | if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { |
614 | EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) |
615 | .addReg(X86::EAX) |
616 | .addReg(0) |
617 | .addImm(1) |
618 | .addReg(X86::EBX) |
619 | .addExpr(Sym) |
620 | .addReg(0)); |
621 | } else { |
622 | EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) |
623 | .addReg(X86::EAX) |
624 | .addReg(X86::EBX) |
625 | .addImm(1) |
626 | .addReg(0) |
627 | .addExpr(Sym) |
628 | .addReg(0)); |
629 | } |
630 | |
631 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "___tls_get_addr" ); |
632 | if (UseGot) { |
633 | const MCExpr *Expr = |
634 | MCSymbolRefExpr::create(Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOT, Ctx); |
635 | EmitAndCountInstruction(MCInstBuilder(X86::CALL32m) |
636 | .addReg(X86::EBX) |
637 | .addImm(1) |
638 | .addReg(0) |
639 | .addExpr(Expr) |
640 | .addReg(0)); |
641 | } else { |
642 | EmitAndCountInstruction( |
643 | MCInstBuilder(X86::CALLpcrel32) |
644 | .addExpr(MCSymbolRefExpr::create(TlsGetAddr, |
645 | MCSymbolRefExpr::VK_PLT, Ctx))); |
646 | } |
647 | } |
648 | } |
649 | |
650 | /// Emit the largest nop instruction smaller than or equal to \p NumBytes |
651 | /// bytes. Return the size of nop emitted. |
652 | static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, |
653 | const X86Subtarget *Subtarget) { |
654 | // Determine the longest nop which can be efficiently decoded for the given |
655 | // target cpu. 15-bytes is the longest single NOP instruction, but some |
656 | // platforms can't decode the longest forms efficiently. |
657 | unsigned MaxNopLength = 1; |
658 | if (Subtarget->is64Bit()) { |
659 | // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the |
660 | // IndexReg/BaseReg below need to be updated. |
661 | if (Subtarget->hasFeature(X86::TuningFast7ByteNOP)) |
662 | MaxNopLength = 7; |
663 | else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP)) |
664 | MaxNopLength = 15; |
665 | else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP)) |
666 | MaxNopLength = 11; |
667 | else |
668 | MaxNopLength = 10; |
669 | } if (Subtarget->is32Bit()) |
670 | MaxNopLength = 2; |
671 | |
672 | // Cap a single nop emission at the profitable value for the target |
673 | NumBytes = std::min(a: NumBytes, b: MaxNopLength); |
674 | |
675 | unsigned NopSize; |
676 | unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; |
677 | IndexReg = Displacement = SegmentReg = 0; |
678 | BaseReg = X86::RAX; |
679 | ScaleVal = 1; |
680 | switch (NumBytes) { |
681 | case 0: |
682 | llvm_unreachable("Zero nops?" ); |
683 | break; |
684 | case 1: |
685 | NopSize = 1; |
686 | Opc = X86::NOOP; |
687 | break; |
688 | case 2: |
689 | NopSize = 2; |
690 | Opc = X86::XCHG16ar; |
691 | break; |
692 | case 3: |
693 | NopSize = 3; |
694 | Opc = X86::NOOPL; |
695 | break; |
696 | case 4: |
697 | NopSize = 4; |
698 | Opc = X86::NOOPL; |
699 | Displacement = 8; |
700 | break; |
701 | case 5: |
702 | NopSize = 5; |
703 | Opc = X86::NOOPL; |
704 | Displacement = 8; |
705 | IndexReg = X86::RAX; |
706 | break; |
707 | case 6: |
708 | NopSize = 6; |
709 | Opc = X86::NOOPW; |
710 | Displacement = 8; |
711 | IndexReg = X86::RAX; |
712 | break; |
713 | case 7: |
714 | NopSize = 7; |
715 | Opc = X86::NOOPL; |
716 | Displacement = 512; |
717 | break; |
718 | case 8: |
719 | NopSize = 8; |
720 | Opc = X86::NOOPL; |
721 | Displacement = 512; |
722 | IndexReg = X86::RAX; |
723 | break; |
724 | case 9: |
725 | NopSize = 9; |
726 | Opc = X86::NOOPW; |
727 | Displacement = 512; |
728 | IndexReg = X86::RAX; |
729 | break; |
730 | default: |
731 | NopSize = 10; |
732 | Opc = X86::NOOPW; |
733 | Displacement = 512; |
734 | IndexReg = X86::RAX; |
735 | SegmentReg = X86::CS; |
736 | break; |
737 | } |
738 | |
739 | unsigned NumPrefixes = std::min(a: NumBytes - NopSize, b: 5U); |
740 | NopSize += NumPrefixes; |
741 | for (unsigned i = 0; i != NumPrefixes; ++i) |
742 | OS.emitBytes(Data: "\x66" ); |
743 | |
744 | switch (Opc) { |
745 | default: llvm_unreachable("Unexpected opcode" ); |
746 | case X86::NOOP: |
747 | OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); |
748 | break; |
749 | case X86::XCHG16ar: |
750 | OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), |
751 | *Subtarget); |
752 | break; |
753 | case X86::NOOPL: |
754 | case X86::NOOPW: |
755 | OS.emitInstruction(MCInstBuilder(Opc) |
756 | .addReg(Reg: BaseReg) |
757 | .addImm(Val: ScaleVal) |
758 | .addReg(Reg: IndexReg) |
759 | .addImm(Val: Displacement) |
760 | .addReg(Reg: SegmentReg), |
761 | *Subtarget); |
762 | break; |
763 | } |
764 | assert(NopSize <= NumBytes && "We overemitted?" ); |
765 | return NopSize; |
766 | } |
767 | |
768 | /// Emit the optimal amount of multi-byte nops on X86. |
769 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
770 | const X86Subtarget *Subtarget) { |
771 | unsigned NopsToEmit = NumBytes; |
772 | (void)NopsToEmit; |
773 | while (NumBytes) { |
774 | NumBytes -= emitNop(OS, NumBytes, Subtarget); |
775 | assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!" ); |
776 | } |
777 | } |
778 | |
779 | void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, |
780 | X86MCInstLower &MCIL) { |
781 | assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64" ); |
782 | |
783 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
784 | |
785 | StatepointOpers SOpers(&MI); |
786 | if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { |
787 | emitX86Nops(OS&: *OutStreamer, NumBytes: PatchBytes, Subtarget); |
788 | } else { |
789 | // Lower call target and choose correct opcode |
790 | const MachineOperand &CallTarget = SOpers.getCallTarget(); |
791 | MCOperand CallTargetMCOp; |
792 | unsigned CallOpcode; |
793 | switch (CallTarget.getType()) { |
794 | case MachineOperand::MO_GlobalAddress: |
795 | case MachineOperand::MO_ExternalSymbol: |
796 | CallTargetMCOp = MCIL.LowerSymbolOperand( |
797 | MO: CallTarget, Sym: MCIL.GetSymbolFromOperand(MO: CallTarget)); |
798 | CallOpcode = X86::CALL64pcrel32; |
799 | // Currently, we only support relative addressing with statepoints. |
800 | // Otherwise, we'll need a scratch register to hold the target |
801 | // address. You'll fail asserts during load & relocation if this |
802 | // symbol is to far away. (TODO: support non-relative addressing) |
803 | break; |
804 | case MachineOperand::MO_Immediate: |
805 | CallTargetMCOp = MCOperand::createImm(Val: CallTarget.getImm()); |
806 | CallOpcode = X86::CALL64pcrel32; |
807 | // Currently, we only support relative addressing with statepoints. |
808 | // Otherwise, we'll need a scratch register to hold the target |
809 | // immediate. You'll fail asserts during load & relocation if this |
810 | // address is to far away. (TODO: support non-relative addressing) |
811 | break; |
812 | case MachineOperand::MO_Register: |
813 | // FIXME: Add retpoline support and remove this. |
814 | if (Subtarget->useIndirectThunkCalls()) |
815 | report_fatal_error(reason: "Lowering register statepoints with thunks not " |
816 | "yet implemented." ); |
817 | CallTargetMCOp = MCOperand::createReg(Reg: CallTarget.getReg()); |
818 | CallOpcode = X86::CALL64r; |
819 | break; |
820 | default: |
821 | llvm_unreachable("Unsupported operand type in statepoint call target" ); |
822 | break; |
823 | } |
824 | |
825 | // Emit call |
826 | MCInst CallInst; |
827 | CallInst.setOpcode(CallOpcode); |
828 | CallInst.addOperand(Op: CallTargetMCOp); |
829 | OutStreamer->emitInstruction(Inst: CallInst, STI: getSubtargetInfo()); |
830 | } |
831 | |
832 | // Record our statepoint node in the same section used by STACKMAP |
833 | // and PATCHPOINT |
834 | auto &Ctx = OutStreamer->getContext(); |
835 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
836 | OutStreamer->emitLabel(Symbol: MILabel); |
837 | SM.recordStatepoint(L: *MILabel, MI); |
838 | } |
839 | |
840 | void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, |
841 | X86MCInstLower &MCIL) { |
842 | // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, |
843 | // <opcode>, <operands> |
844 | |
845 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
846 | |
847 | Register DefRegister = FaultingMI.getOperand(i: 0).getReg(); |
848 | FaultMaps::FaultKind FK = |
849 | static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(i: 1).getImm()); |
850 | MCSymbol *HandlerLabel = FaultingMI.getOperand(i: 2).getMBB()->getSymbol(); |
851 | unsigned Opcode = FaultingMI.getOperand(i: 3).getImm(); |
852 | unsigned OperandsBeginIdx = 4; |
853 | |
854 | auto &Ctx = OutStreamer->getContext(); |
855 | MCSymbol *FaultingLabel = Ctx.createTempSymbol(); |
856 | OutStreamer->emitLabel(Symbol: FaultingLabel); |
857 | |
858 | assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!" ); |
859 | FM.recordFaultingOp(FaultTy: FK, FaultingLabel, HandlerLabel); |
860 | |
861 | MCInst MI; |
862 | MI.setOpcode(Opcode); |
863 | |
864 | if (DefRegister != X86::NoRegister) |
865 | MI.addOperand(Op: MCOperand::createReg(Reg: DefRegister)); |
866 | |
867 | for (const MachineOperand &MO : |
868 | llvm::drop_begin(RangeOrContainer: FaultingMI.operands(), N: OperandsBeginIdx)) |
869 | if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &FaultingMI, MO)) |
870 | MI.addOperand(Op: *MaybeOperand); |
871 | |
872 | OutStreamer->AddComment(T: "on-fault: " + HandlerLabel->getName()); |
873 | OutStreamer->emitInstruction(Inst: MI, STI: getSubtargetInfo()); |
874 | } |
875 | |
876 | void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, |
877 | X86MCInstLower &MCIL) { |
878 | bool Is64Bits = Subtarget->is64Bit(); |
879 | MCContext &Ctx = OutStreamer->getContext(); |
880 | MCSymbol *fentry = Ctx.getOrCreateSymbol(Name: "__fentry__" ); |
881 | const MCSymbolRefExpr *Op = |
882 | MCSymbolRefExpr::create(Symbol: fentry, Kind: MCSymbolRefExpr::VK_None, Ctx); |
883 | |
884 | EmitAndCountInstruction( |
885 | MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) |
886 | .addExpr(Op)); |
887 | } |
888 | |
889 | void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { |
890 | assert(std::next(MI.getIterator())->isCall() && |
891 | "KCFI_CHECK not followed by a call instruction" ); |
892 | |
893 | // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() |
894 | // returns a 1-byte X86::NOOP, which means the offset is the same in |
895 | // bytes. This assumes that patchable-function-prefix is the same for all |
896 | // functions. |
897 | const MachineFunction &MF = *MI.getMF(); |
898 | int64_t PrefixNops = 0; |
899 | (void)MF.getFunction() |
900 | .getFnAttribute(Kind: "patchable-function-prefix" ) |
901 | .getValueAsString() |
902 | .getAsInteger(Radix: 10, Result&: PrefixNops); |
903 | |
904 | // KCFI allows indirect calls to any location that's preceded by a valid |
905 | // type identifier. To avoid encoding the full constant into an instruction, |
906 | // and thus emitting potential call target gadgets at each indirect call |
907 | // site, load a negated constant to a register and compare that to the |
908 | // expected value at the call target. |
909 | const Register AddrReg = MI.getOperand(i: 0).getReg(); |
910 | const uint32_t Type = MI.getOperand(i: 1).getImm(); |
911 | // The check is immediately before the call. If the call target is in R10, |
912 | // we can clobber R11 for the check instead. |
913 | unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; |
914 | EmitAndCountInstruction( |
915 | MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type))); |
916 | EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm) |
917 | .addReg(X86::NoRegister) |
918 | .addReg(TempReg) |
919 | .addReg(AddrReg) |
920 | .addImm(1) |
921 | .addReg(X86::NoRegister) |
922 | .addImm(-(PrefixNops + 4)) |
923 | .addReg(X86::NoRegister)); |
924 | |
925 | MCSymbol *Pass = OutContext.createTempSymbol(); |
926 | EmitAndCountInstruction( |
927 | MCInstBuilder(X86::JCC_1) |
928 | .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) |
929 | .addImm(X86::COND_E)); |
930 | |
931 | MCSymbol *Trap = OutContext.createTempSymbol(); |
932 | OutStreamer->emitLabel(Symbol: Trap); |
933 | EmitAndCountInstruction(MCInstBuilder(X86::TRAP)); |
934 | emitKCFITrapEntry(MF, Symbol: Trap); |
935 | OutStreamer->emitLabel(Symbol: Pass); |
936 | } |
937 | |
938 | void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { |
939 | // FIXME: Make this work on non-ELF. |
940 | if (!TM.getTargetTriple().isOSBinFormatELF()) { |
941 | report_fatal_error(reason: "llvm.asan.check.memaccess only supported on ELF" ); |
942 | return; |
943 | } |
944 | |
945 | const auto &Reg = MI.getOperand(i: 0).getReg(); |
946 | ASanAccessInfo AccessInfo(MI.getOperand(i: 1).getImm()); |
947 | |
948 | uint64_t ShadowBase; |
949 | int MappingScale; |
950 | bool OrShadowOffset; |
951 | getAddressSanitizerParams(TargetTriple: Triple(TM.getTargetTriple()), LongSize: 64, |
952 | IsKasan: AccessInfo.CompileKernel, ShadowBase: &ShadowBase, |
953 | MappingScale: &MappingScale, OrShadowOffset: &OrShadowOffset); |
954 | |
955 | StringRef Name = AccessInfo.IsWrite ? "store" : "load" ; |
956 | StringRef Op = OrShadowOffset ? "or" : "add" ; |
957 | std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + |
958 | Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + |
959 | TM.getMCRegisterInfo()->getName(RegNo: Reg.asMCReg())) |
960 | .str(); |
961 | if (OrShadowOffset) |
962 | report_fatal_error( |
963 | reason: "OrShadowOffset is not supported with optimized callbacks" ); |
964 | |
965 | EmitAndCountInstruction( |
966 | MCInstBuilder(X86::CALL64pcrel32) |
967 | .addExpr(MCSymbolRefExpr::create( |
968 | OutContext.getOrCreateSymbol(SymName), OutContext))); |
969 | } |
970 | |
971 | void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, |
972 | X86MCInstLower &MCIL) { |
973 | // PATCHABLE_OP minsize |
974 | |
975 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
976 | |
977 | auto NextMI = std::find_if(first: std::next(x: MI.getIterator()), |
978 | last: MI.getParent()->end().getInstrIterator(), |
979 | pred: [](auto &II) { return !II.isMetaInstruction(); }); |
980 | |
981 | SmallString<256> Code; |
982 | unsigned MinSize = MI.getOperand(i: 0).getImm(); |
983 | |
984 | if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) { |
985 | // Lower the next MachineInstr to find its byte size. |
986 | // If the next instruction is inline assembly, we skip lowering it for now, |
987 | // and assume we should always generate NOPs. |
988 | MCInst MCI; |
989 | MCIL.Lower(MI: &*NextMI, OutMI&: MCI); |
990 | |
991 | SmallVector<MCFixup, 4> Fixups; |
992 | CodeEmitter->encodeInstruction(Inst: MCI, CB&: Code, Fixups, STI: getSubtargetInfo()); |
993 | } |
994 | |
995 | if (Code.size() < MinSize) { |
996 | if (MinSize == 2 && Subtarget->is32Bit() && |
997 | Subtarget->isTargetWindowsMSVC() && |
998 | (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3" )) { |
999 | // For compatibility reasons, when targetting MSVC, it is important to |
1000 | // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools |
1001 | // rely specifically on this pattern to be able to patch a function. |
1002 | // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. |
1003 | OutStreamer->emitInstruction( |
1004 | MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), |
1005 | *Subtarget); |
1006 | } else { |
1007 | unsigned NopSize = emitNop(OS&: *OutStreamer, NumBytes: MinSize, Subtarget); |
1008 | assert(NopSize == MinSize && "Could not implement MinSize!" ); |
1009 | (void)NopSize; |
1010 | } |
1011 | } |
1012 | } |
1013 | |
1014 | // Lower a stackmap of the form: |
1015 | // <id>, <shadowBytes>, ... |
1016 | void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { |
1017 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
1018 | |
1019 | auto &Ctx = OutStreamer->getContext(); |
1020 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
1021 | OutStreamer->emitLabel(Symbol: MILabel); |
1022 | |
1023 | SM.recordStackMap(L: *MILabel, MI); |
1024 | unsigned NumShadowBytes = MI.getOperand(i: 1).getImm(); |
1025 | SMShadowTracker.reset(RequiredSize: NumShadowBytes); |
1026 | } |
1027 | |
1028 | // Lower a patchpoint of the form: |
1029 | // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... |
1030 | void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, |
1031 | X86MCInstLower &MCIL) { |
1032 | assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64" ); |
1033 | |
1034 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
1035 | |
1036 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1037 | |
1038 | auto &Ctx = OutStreamer->getContext(); |
1039 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
1040 | OutStreamer->emitLabel(Symbol: MILabel); |
1041 | SM.recordPatchPoint(L: *MILabel, MI); |
1042 | |
1043 | PatchPointOpers opers(&MI); |
1044 | unsigned ScratchIdx = opers.getNextScratchIdx(); |
1045 | unsigned EncodedBytes = 0; |
1046 | const MachineOperand &CalleeMO = opers.getCallTarget(); |
1047 | |
1048 | // Check for null target. If target is non-null (i.e. is non-zero or is |
1049 | // symbolic) then emit a call. |
1050 | if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { |
1051 | MCOperand CalleeMCOp; |
1052 | switch (CalleeMO.getType()) { |
1053 | default: |
1054 | /// FIXME: Add a verifier check for bad callee types. |
1055 | llvm_unreachable("Unrecognized callee operand type." ); |
1056 | case MachineOperand::MO_Immediate: |
1057 | if (CalleeMO.getImm()) |
1058 | CalleeMCOp = MCOperand::createImm(Val: CalleeMO.getImm()); |
1059 | break; |
1060 | case MachineOperand::MO_ExternalSymbol: |
1061 | case MachineOperand::MO_GlobalAddress: |
1062 | CalleeMCOp = MCIL.LowerSymbolOperand(MO: CalleeMO, |
1063 | Sym: MCIL.GetSymbolFromOperand(MO: CalleeMO)); |
1064 | break; |
1065 | } |
1066 | |
1067 | // Emit MOV to materialize the target address and the CALL to target. |
1068 | // This is encoded with 12-13 bytes, depending on which register is used. |
1069 | Register ScratchReg = MI.getOperand(i: ScratchIdx).getReg(); |
1070 | if (X86II::isX86_64ExtendedReg(RegNo: ScratchReg)) |
1071 | EncodedBytes = 13; |
1072 | else |
1073 | EncodedBytes = 12; |
1074 | |
1075 | EmitAndCountInstruction( |
1076 | MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); |
1077 | // FIXME: Add retpoline support and remove this. |
1078 | if (Subtarget->useIndirectThunkCalls()) |
1079 | report_fatal_error( |
1080 | reason: "Lowering patchpoint with thunks not yet implemented." ); |
1081 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); |
1082 | } |
1083 | |
1084 | // Emit padding. |
1085 | unsigned NumBytes = opers.getNumPatchBytes(); |
1086 | assert(NumBytes >= EncodedBytes && |
1087 | "Patchpoint can't request size less than the length of a call." ); |
1088 | |
1089 | emitX86Nops(OS&: *OutStreamer, NumBytes: NumBytes - EncodedBytes, Subtarget); |
1090 | } |
1091 | |
1092 | void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, |
1093 | X86MCInstLower &MCIL) { |
1094 | assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64" ); |
1095 | |
1096 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1097 | |
1098 | // We want to emit the following pattern, which follows the x86 calling |
1099 | // convention to prepare for the trampoline call to be patched in. |
1100 | // |
1101 | // .p2align 1, ... |
1102 | // .Lxray_event_sled_N: |
1103 | // jmp +N // jump across the instrumentation sled |
1104 | // ... // set up arguments in register |
1105 | // callq __xray_CustomEvent@plt // force dependency to symbol |
1106 | // ... |
1107 | // <jump here> |
1108 | // |
1109 | // After patching, it would look something like: |
1110 | // |
1111 | // nopw (2-byte nop) |
1112 | // ... |
1113 | // callq __xrayCustomEvent // already lowered |
1114 | // ... |
1115 | // |
1116 | // --- |
1117 | // First we emit the label and the jump. |
1118 | auto CurSled = OutContext.createTempSymbol(Name: "xray_event_sled_" , AlwaysAddSuffix: true); |
1119 | OutStreamer->AddComment(T: "# XRay Custom Event Log" ); |
1120 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1121 | OutStreamer->emitLabel(Symbol: CurSled); |
1122 | |
1123 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1124 | // an operand (computed as an offset from the jmp instruction). |
1125 | // FIXME: Find another less hacky way do force the relative jump. |
1126 | OutStreamer->emitBinaryData(Data: "\xeb\x0f" ); |
1127 | |
1128 | // The default C calling convention will place two arguments into %rcx and |
1129 | // %rdx -- so we only work with those. |
1130 | const Register DestRegs[] = {X86::RDI, X86::RSI}; |
1131 | bool UsedMask[] = {false, false}; |
1132 | // Filled out in loop. |
1133 | Register SrcRegs[] = {0, 0}; |
1134 | |
1135 | // Then we put the operands in the %rdi and %rsi registers. We spill the |
1136 | // values in the register before we clobber them, and mark them as used in |
1137 | // UsedMask. In case the arguments are already in the correct register, we use |
1138 | // emit nops appropriately sized to keep the sled the same size in every |
1139 | // situation. |
1140 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1141 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I))) { |
1142 | assert(Op->isReg() && "Only support arguments in registers" ); |
1143 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op->getReg(), Size: 64); |
1144 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1145 | if (SrcRegs[I] != DestRegs[I]) { |
1146 | UsedMask[I] = true; |
1147 | EmitAndCountInstruction( |
1148 | MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); |
1149 | } else { |
1150 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1151 | } |
1152 | } |
1153 | |
1154 | // Now that the register values are stashed, mov arguments into place. |
1155 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1156 | // earlier DestReg. We will have already overwritten over the register before |
1157 | // we can copy from it. |
1158 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1159 | if (SrcRegs[I] != DestRegs[I]) |
1160 | EmitAndCountInstruction( |
1161 | MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); |
1162 | |
1163 | // We emit a hard dependency on the __xray_CustomEvent symbol, which is the |
1164 | // name of the trampoline to be implemented by the XRay runtime. |
1165 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_CustomEvent" ); |
1166 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1167 | if (isPositionIndependent()) |
1168 | TOp.setTargetFlags(X86II::MO_PLT); |
1169 | |
1170 | // Emit the call instruction. |
1171 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) |
1172 | .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); |
1173 | |
1174 | // Restore caller-saved and used registers. |
1175 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1176 | if (UsedMask[I]) |
1177 | EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); |
1178 | else |
1179 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1180 | |
1181 | OutStreamer->AddComment(T: "xray custom event end." ); |
1182 | |
1183 | // Record the sled version. Version 0 of this sled was spelled differently, so |
1184 | // we let the runtime handle the different offsets we're using. Version 2 |
1185 | // changed the absolute address to a PC-relative address. |
1186 | recordSled(Sled: CurSled, MI, Kind: SledKind::CUSTOM_EVENT, Version: 2); |
1187 | } |
1188 | |
1189 | void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, |
1190 | X86MCInstLower &MCIL) { |
1191 | assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64" ); |
1192 | |
1193 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1194 | |
1195 | // We want to emit the following pattern, which follows the x86 calling |
1196 | // convention to prepare for the trampoline call to be patched in. |
1197 | // |
1198 | // .p2align 1, ... |
1199 | // .Lxray_event_sled_N: |
1200 | // jmp +N // jump across the instrumentation sled |
1201 | // ... // set up arguments in register |
1202 | // callq __xray_TypedEvent@plt // force dependency to symbol |
1203 | // ... |
1204 | // <jump here> |
1205 | // |
1206 | // After patching, it would look something like: |
1207 | // |
1208 | // nopw (2-byte nop) |
1209 | // ... |
1210 | // callq __xrayTypedEvent // already lowered |
1211 | // ... |
1212 | // |
1213 | // --- |
1214 | // First we emit the label and the jump. |
1215 | auto CurSled = OutContext.createTempSymbol(Name: "xray_typed_event_sled_" , AlwaysAddSuffix: true); |
1216 | OutStreamer->AddComment(T: "# XRay Typed Event Log" ); |
1217 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1218 | OutStreamer->emitLabel(Symbol: CurSled); |
1219 | |
1220 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1221 | // an operand (computed as an offset from the jmp instruction). |
1222 | // FIXME: Find another less hacky way do force the relative jump. |
1223 | OutStreamer->emitBinaryData(Data: "\xeb\x14" ); |
1224 | |
1225 | // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, |
1226 | // so we'll work with those. Or we may be called via SystemV, in which case |
1227 | // we don't have to do any translation. |
1228 | const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; |
1229 | bool UsedMask[] = {false, false, false}; |
1230 | |
1231 | // Will fill out src regs in the loop. |
1232 | Register SrcRegs[] = {0, 0, 0}; |
1233 | |
1234 | // Then we put the operands in the SystemV registers. We spill the values in |
1235 | // the registers before we clobber them, and mark them as used in UsedMask. |
1236 | // In case the arguments are already in the correct register, we emit nops |
1237 | // appropriately sized to keep the sled the same size in every situation. |
1238 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1239 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I))) { |
1240 | // TODO: Is register only support adequate? |
1241 | assert(Op->isReg() && "Only supports arguments in registers" ); |
1242 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op->getReg(), Size: 64); |
1243 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1244 | if (SrcRegs[I] != DestRegs[I]) { |
1245 | UsedMask[I] = true; |
1246 | EmitAndCountInstruction( |
1247 | MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); |
1248 | } else { |
1249 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1250 | } |
1251 | } |
1252 | |
1253 | // In the above loop we only stash all of the destination registers or emit |
1254 | // nops if the arguments are already in the right place. Doing the actually |
1255 | // moving is postponed until after all the registers are stashed so nothing |
1256 | // is clobbers. We've already added nops to account for the size of mov and |
1257 | // push if the register is in the right place, so we only have to worry about |
1258 | // emitting movs. |
1259 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1260 | // earlier DestReg. We will have already overwritten over the register before |
1261 | // we can copy from it. |
1262 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1263 | if (UsedMask[I]) |
1264 | EmitAndCountInstruction( |
1265 | MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); |
1266 | |
1267 | // We emit a hard dependency on the __xray_TypedEvent symbol, which is the |
1268 | // name of the trampoline to be implemented by the XRay runtime. |
1269 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_TypedEvent" ); |
1270 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1271 | if (isPositionIndependent()) |
1272 | TOp.setTargetFlags(X86II::MO_PLT); |
1273 | |
1274 | // Emit the call instruction. |
1275 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) |
1276 | .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); |
1277 | |
1278 | // Restore caller-saved and used registers. |
1279 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1280 | if (UsedMask[I]) |
1281 | EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); |
1282 | else |
1283 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1284 | |
1285 | OutStreamer->AddComment(T: "xray typed event end." ); |
1286 | |
1287 | // Record the sled version. |
1288 | recordSled(Sled: CurSled, MI, Kind: SledKind::TYPED_EVENT, Version: 2); |
1289 | } |
1290 | |
1291 | void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, |
1292 | X86MCInstLower &MCIL) { |
1293 | |
1294 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1295 | |
1296 | const Function &F = MF->getFunction(); |
1297 | if (F.hasFnAttribute(Kind: "patchable-function-entry" )) { |
1298 | unsigned Num; |
1299 | if (F.getFnAttribute(Kind: "patchable-function-entry" ) |
1300 | .getValueAsString() |
1301 | .getAsInteger(Radix: 10, Result&: Num)) |
1302 | return; |
1303 | emitX86Nops(OS&: *OutStreamer, NumBytes: Num, Subtarget); |
1304 | return; |
1305 | } |
1306 | // We want to emit the following pattern: |
1307 | // |
1308 | // .p2align 1, ... |
1309 | // .Lxray_sled_N: |
1310 | // jmp .tmpN |
1311 | // # 9 bytes worth of noops |
1312 | // |
1313 | // We need the 9 bytes because at runtime, we'd be patching over the full 11 |
1314 | // bytes with the following pattern: |
1315 | // |
1316 | // mov %r10, <function id, 32-bit> // 6 bytes |
1317 | // call <relative offset, 32-bits> // 5 bytes |
1318 | // |
1319 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1320 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1321 | OutStreamer->emitLabel(Symbol: CurSled); |
1322 | |
1323 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1324 | // an operand (computed as an offset from the jmp instruction). |
1325 | // FIXME: Find another less hacky way do force the relative jump. |
1326 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1327 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1328 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_ENTER, Version: 2); |
1329 | } |
1330 | |
1331 | void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, |
1332 | X86MCInstLower &MCIL) { |
1333 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1334 | |
1335 | // Since PATCHABLE_RET takes the opcode of the return statement as an |
1336 | // argument, we use that to emit the correct form of the RET that we want. |
1337 | // i.e. when we see this: |
1338 | // |
1339 | // PATCHABLE_RET X86::RET ... |
1340 | // |
1341 | // We should emit the RET followed by sleds. |
1342 | // |
1343 | // .p2align 1, ... |
1344 | // .Lxray_sled_N: |
1345 | // ret # or equivalent instruction |
1346 | // # 10 bytes worth of noops |
1347 | // |
1348 | // This just makes sure that the alignment for the next instruction is 2. |
1349 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1350 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1351 | OutStreamer->emitLabel(Symbol: CurSled); |
1352 | unsigned OpCode = MI.getOperand(i: 0).getImm(); |
1353 | MCInst Ret; |
1354 | Ret.setOpcode(OpCode); |
1355 | for (auto &MO : drop_begin(RangeOrContainer: MI.operands())) |
1356 | if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &MI, MO)) |
1357 | Ret.addOperand(Op: *MaybeOperand); |
1358 | OutStreamer->emitInstruction(Inst: Ret, STI: getSubtargetInfo()); |
1359 | emitX86Nops(OS&: *OutStreamer, NumBytes: 10, Subtarget); |
1360 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_EXIT, Version: 2); |
1361 | } |
1362 | |
1363 | void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, |
1364 | X86MCInstLower &MCIL) { |
1365 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1366 | |
1367 | // Like PATCHABLE_RET, we have the actual instruction in the operands to this |
1368 | // instruction so we lower that particular instruction and its operands. |
1369 | // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how |
1370 | // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to |
1371 | // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual |
1372 | // tail call much like how we have it in PATCHABLE_RET. |
1373 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1374 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1375 | OutStreamer->emitLabel(Symbol: CurSled); |
1376 | auto Target = OutContext.createTempSymbol(); |
1377 | |
1378 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1379 | // an operand (computed as an offset from the jmp instruction). |
1380 | // FIXME: Find another less hacky way do force the relative jump. |
1381 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1382 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1383 | OutStreamer->emitLabel(Symbol: Target); |
1384 | recordSled(Sled: CurSled, MI, Kind: SledKind::TAIL_CALL, Version: 2); |
1385 | |
1386 | unsigned OpCode = MI.getOperand(i: 0).getImm(); |
1387 | OpCode = convertTailJumpOpcode(Opcode: OpCode); |
1388 | MCInst TC; |
1389 | TC.setOpcode(OpCode); |
1390 | |
1391 | // Before emitting the instruction, add a comment to indicate that this is |
1392 | // indeed a tail call. |
1393 | OutStreamer->AddComment(T: "TAILCALL" ); |
1394 | for (auto &MO : drop_begin(RangeOrContainer: MI.operands())) |
1395 | if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &MI, MO)) |
1396 | TC.addOperand(Op: *MaybeOperand); |
1397 | OutStreamer->emitInstruction(Inst: TC, STI: getSubtargetInfo()); |
1398 | } |
1399 | |
1400 | // Returns instruction preceding MBBI in MachineFunction. |
1401 | // If MBBI is the first instruction of the first basic block, returns null. |
1402 | static MachineBasicBlock::const_iterator |
1403 | PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { |
1404 | const MachineBasicBlock *MBB = MBBI->getParent(); |
1405 | while (MBBI == MBB->begin()) { |
1406 | if (MBB == &MBB->getParent()->front()) |
1407 | return MachineBasicBlock::const_iterator(); |
1408 | MBB = MBB->getPrevNode(); |
1409 | MBBI = MBB->end(); |
1410 | } |
1411 | --MBBI; |
1412 | return MBBI; |
1413 | } |
1414 | |
1415 | static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) { |
1416 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1417 | // Skip mask operand. |
1418 | ++SrcIdx; |
1419 | if (X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1420 | // Skip passthru operand. |
1421 | ++SrcIdx; |
1422 | } |
1423 | } |
1424 | return SrcIdx; |
1425 | } |
1426 | |
1427 | static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, |
1428 | unsigned SrcOpIdx) { |
1429 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1430 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()); |
1431 | |
1432 | // Handle AVX512 MASK/MASXZ write mask comments. |
1433 | // MASK: zmmX {%kY} |
1434 | // MASKZ: zmmX {%kY} {z} |
1435 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1436 | const MachineOperand &WriteMaskOp = MI->getOperand(i: SrcOpIdx - 1); |
1437 | StringRef Mask = X86ATTInstPrinter::getRegisterName(Reg: WriteMaskOp.getReg()); |
1438 | CS << " {%" << Mask << "}" ; |
1439 | if (!X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1440 | CS << " {z}" ; |
1441 | } |
1442 | } |
1443 | } |
1444 | |
1445 | static void printShuffleMask(raw_ostream &CS, StringRef Src1Name, |
1446 | StringRef Src2Name, ArrayRef<int> Mask) { |
1447 | // One source operand, fix the mask to print all elements in one span. |
1448 | SmallVector<int, 8> ShuffleMask(Mask); |
1449 | if (Src1Name == Src2Name) |
1450 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) |
1451 | if (ShuffleMask[i] >= e) |
1452 | ShuffleMask[i] -= e; |
1453 | |
1454 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { |
1455 | if (i != 0) |
1456 | CS << "," ; |
1457 | if (ShuffleMask[i] == SM_SentinelZero) { |
1458 | CS << "zero" ; |
1459 | continue; |
1460 | } |
1461 | |
1462 | // Otherwise, it must come from src1 or src2. Print the span of elements |
1463 | // that comes from this src. |
1464 | bool isSrc1 = ShuffleMask[i] < (int)e; |
1465 | CS << (isSrc1 ? Src1Name : Src2Name) << '['; |
1466 | |
1467 | bool IsFirst = true; |
1468 | while (i != e && ShuffleMask[i] != SM_SentinelZero && |
1469 | (ShuffleMask[i] < (int)e) == isSrc1) { |
1470 | if (!IsFirst) |
1471 | CS << ','; |
1472 | else |
1473 | IsFirst = false; |
1474 | if (ShuffleMask[i] == SM_SentinelUndef) |
1475 | CS << "u" ; |
1476 | else |
1477 | CS << ShuffleMask[i] % (int)e; |
1478 | ++i; |
1479 | } |
1480 | CS << ']'; |
1481 | --i; // For loop increments element #. |
1482 | } |
1483 | } |
1484 | |
1485 | static std::string (const MachineInstr *MI, unsigned SrcOp1Idx, |
1486 | unsigned SrcOp2Idx, ArrayRef<int> Mask) { |
1487 | std::string ; |
1488 | |
1489 | const MachineOperand &SrcOp1 = MI->getOperand(i: SrcOp1Idx); |
1490 | const MachineOperand &SrcOp2 = MI->getOperand(i: SrcOp2Idx); |
1491 | StringRef Src1Name = SrcOp1.isReg() |
1492 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp1.getReg()) |
1493 | : "mem" ; |
1494 | StringRef Src2Name = SrcOp2.isReg() |
1495 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp2.getReg()) |
1496 | : "mem" ; |
1497 | |
1498 | raw_string_ostream CS(Comment); |
1499 | printDstRegisterName(CS, MI, SrcOpIdx: SrcOp1Idx); |
1500 | CS << " = " ; |
1501 | printShuffleMask(CS, Src1Name, Src2Name, Mask); |
1502 | CS.flush(); |
1503 | |
1504 | return Comment; |
1505 | } |
1506 | |
1507 | static void printConstant(const APInt &Val, raw_ostream &CS, |
1508 | bool PrintZero = false) { |
1509 | if (Val.getBitWidth() <= 64) { |
1510 | CS << (PrintZero ? 0ULL : Val.getZExtValue()); |
1511 | } else { |
1512 | // print multi-word constant as (w0,w1) |
1513 | CS << "(" ; |
1514 | for (int i = 0, N = Val.getNumWords(); i < N; ++i) { |
1515 | if (i > 0) |
1516 | CS << "," ; |
1517 | CS << (PrintZero ? 0ULL : Val.getRawData()[i]); |
1518 | } |
1519 | CS << ")" ; |
1520 | } |
1521 | } |
1522 | |
1523 | static void printConstant(const APFloat &Flt, raw_ostream &CS, |
1524 | bool PrintZero = false) { |
1525 | SmallString<32> Str; |
1526 | // Force scientific notation to distinguish from integers. |
1527 | if (PrintZero) |
1528 | APFloat::getZero(Sem: Flt.getSemantics()).toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1529 | else |
1530 | Flt.toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1531 | CS << Str; |
1532 | } |
1533 | |
1534 | static void printConstant(const Constant *COp, unsigned BitWidth, |
1535 | raw_ostream &CS, bool PrintZero = false) { |
1536 | if (isa<UndefValue>(Val: COp)) { |
1537 | CS << "u" ; |
1538 | } else if (auto *CI = dyn_cast<ConstantInt>(Val: COp)) { |
1539 | printConstant(Val: CI->getValue(), CS, PrintZero); |
1540 | } else if (auto *CF = dyn_cast<ConstantFP>(Val: COp)) { |
1541 | printConstant(Flt: CF->getValueAPF(), CS, PrintZero); |
1542 | } else if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: COp)) { |
1543 | Type *EltTy = CDS->getElementType(); |
1544 | bool IsInteger = EltTy->isIntegerTy(); |
1545 | bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); |
1546 | unsigned EltBits = EltTy->getPrimitiveSizeInBits(); |
1547 | unsigned E = std::min(a: BitWidth / EltBits, b: CDS->getNumElements()); |
1548 | assert((BitWidth % EltBits) == 0 && "Element size mismatch" ); |
1549 | for (unsigned I = 0; I != E; ++I) { |
1550 | if (I != 0) |
1551 | CS << "," ; |
1552 | if (IsInteger) |
1553 | printConstant(Val: CDS->getElementAsAPInt(i: I), CS, PrintZero); |
1554 | else if (IsFP) |
1555 | printConstant(Flt: CDS->getElementAsAPFloat(i: I), CS, PrintZero); |
1556 | else |
1557 | CS << "?" ; |
1558 | } |
1559 | } else if (auto *CV = dyn_cast<ConstantVector>(Val: COp)) { |
1560 | unsigned EltBits = CV->getType()->getScalarSizeInBits(); |
1561 | unsigned E = std::min(a: BitWidth / EltBits, b: CV->getNumOperands()); |
1562 | assert((BitWidth % EltBits) == 0 && "Element size mismatch" ); |
1563 | for (unsigned I = 0; I != E; ++I) { |
1564 | if (I != 0) |
1565 | CS << "," ; |
1566 | printConstant(COp: CV->getOperand(i_nocapture: I), BitWidth: EltBits, CS, PrintZero); |
1567 | } |
1568 | } else { |
1569 | CS << "?" ; |
1570 | } |
1571 | } |
1572 | |
1573 | static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, |
1574 | int SclWidth, int VecWidth, |
1575 | const char *) { |
1576 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1577 | |
1578 | std::string ; |
1579 | raw_string_ostream CS(Comment); |
1580 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1581 | CS << " = " ; |
1582 | |
1583 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1584 | CS << "[" ; |
1585 | printConstant(COp: C, BitWidth: SclWidth, CS); |
1586 | for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { |
1587 | CS << "," ; |
1588 | printConstant(COp: C, BitWidth: SclWidth, CS, PrintZero: true); |
1589 | } |
1590 | CS << "]" ; |
1591 | OutStreamer.AddComment(T: CS.str()); |
1592 | return; // early-out |
1593 | } |
1594 | |
1595 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1596 | CS << ShuffleComment; |
1597 | OutStreamer.AddComment(T: CS.str()); |
1598 | } |
1599 | |
1600 | static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, |
1601 | int Repeats, int BitWidth) { |
1602 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1603 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1604 | std::string ; |
1605 | raw_string_ostream CS(Comment); |
1606 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1607 | CS << " = [" ; |
1608 | for (int l = 0; l != Repeats; ++l) { |
1609 | if (l != 0) |
1610 | CS << "," ; |
1611 | printConstant(COp: C, BitWidth, CS); |
1612 | } |
1613 | CS << "]" ; |
1614 | OutStreamer.AddComment(T: CS.str()); |
1615 | } |
1616 | } |
1617 | |
1618 | static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1619 | int SrcEltBits, int DstEltBits, bool IsSext) { |
1620 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1621 | auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx); |
1622 | if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) { |
1623 | if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) { |
1624 | int NumElts = CDS->getNumElements(); |
1625 | std::string ; |
1626 | raw_string_ostream CS(Comment); |
1627 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1628 | CS << " = [" ; |
1629 | for (int i = 0; i != NumElts; ++i) { |
1630 | if (i != 0) |
1631 | CS << "," ; |
1632 | if (CDS->getElementType()->isIntegerTy()) { |
1633 | APInt Elt = CDS->getElementAsAPInt(i); |
1634 | Elt = IsSext ? Elt.sext(width: DstEltBits) : Elt.zext(width: DstEltBits); |
1635 | printConstant(Val: Elt, CS); |
1636 | } else |
1637 | CS << "?" ; |
1638 | } |
1639 | CS << "]" ; |
1640 | OutStreamer.AddComment(T: CS.str()); |
1641 | return true; |
1642 | } |
1643 | } |
1644 | |
1645 | return false; |
1646 | } |
1647 | static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1648 | int SrcEltBits, int DstEltBits) { |
1649 | printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: true); |
1650 | } |
1651 | static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1652 | int SrcEltBits, int DstEltBits) { |
1653 | if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: false)) |
1654 | return; |
1655 | |
1656 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1657 | std::string ; |
1658 | raw_string_ostream CS(Comment); |
1659 | printDstRegisterName(CS, MI, SrcOpIdx: getSrcIdx(MI, SrcIdx: 1)); |
1660 | CS << " = " ; |
1661 | |
1662 | SmallVector<int> Mask; |
1663 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1664 | assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 && |
1665 | "Illegal extension ratio" ); |
1666 | DecodeZeroExtendMask(SrcScalarBits: SrcEltBits, DstScalarBits: DstEltBits, NumDstElts: Width / DstEltBits, IsAnyExtend: false, ShuffleMask&: Mask); |
1667 | printShuffleMask(CS, Src1Name: "mem" , Src2Name: "" , Mask); |
1668 | |
1669 | OutStreamer.AddComment(T: CS.str()); |
1670 | } |
1671 | |
1672 | void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { |
1673 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
1674 | assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) && |
1675 | "SEH_ instruction Windows and UEFI only" ); |
1676 | |
1677 | // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. |
1678 | if (EmitFPOData) { |
1679 | X86TargetStreamer *XTS = |
1680 | static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); |
1681 | switch (MI->getOpcode()) { |
1682 | case X86::SEH_PushReg: |
1683 | XTS->emitFPOPushReg(Reg: MI->getOperand(i: 0).getImm()); |
1684 | break; |
1685 | case X86::SEH_StackAlloc: |
1686 | XTS->emitFPOStackAlloc(StackAlloc: MI->getOperand(i: 0).getImm()); |
1687 | break; |
1688 | case X86::SEH_StackAlign: |
1689 | XTS->emitFPOStackAlign(Align: MI->getOperand(i: 0).getImm()); |
1690 | break; |
1691 | case X86::SEH_SetFrame: |
1692 | assert(MI->getOperand(1).getImm() == 0 && |
1693 | ".cv_fpo_setframe takes no offset" ); |
1694 | XTS->emitFPOSetFrame(Reg: MI->getOperand(i: 0).getImm()); |
1695 | break; |
1696 | case X86::SEH_EndPrologue: |
1697 | XTS->emitFPOEndPrologue(); |
1698 | break; |
1699 | case X86::SEH_SaveReg: |
1700 | case X86::SEH_SaveXMM: |
1701 | case X86::SEH_PushFrame: |
1702 | llvm_unreachable("SEH_ directive incompatible with FPO" ); |
1703 | break; |
1704 | default: |
1705 | llvm_unreachable("expected SEH_ instruction" ); |
1706 | } |
1707 | return; |
1708 | } |
1709 | |
1710 | // Otherwise, use the .seh_ directives for all other Windows platforms. |
1711 | switch (MI->getOpcode()) { |
1712 | case X86::SEH_PushReg: |
1713 | OutStreamer->emitWinCFIPushReg(Register: MI->getOperand(i: 0).getImm()); |
1714 | break; |
1715 | |
1716 | case X86::SEH_SaveReg: |
1717 | OutStreamer->emitWinCFISaveReg(Register: MI->getOperand(i: 0).getImm(), |
1718 | Offset: MI->getOperand(i: 1).getImm()); |
1719 | break; |
1720 | |
1721 | case X86::SEH_SaveXMM: |
1722 | OutStreamer->emitWinCFISaveXMM(Register: MI->getOperand(i: 0).getImm(), |
1723 | Offset: MI->getOperand(i: 1).getImm()); |
1724 | break; |
1725 | |
1726 | case X86::SEH_StackAlloc: |
1727 | OutStreamer->emitWinCFIAllocStack(Size: MI->getOperand(i: 0).getImm()); |
1728 | break; |
1729 | |
1730 | case X86::SEH_SetFrame: |
1731 | OutStreamer->emitWinCFISetFrame(Register: MI->getOperand(i: 0).getImm(), |
1732 | Offset: MI->getOperand(i: 1).getImm()); |
1733 | break; |
1734 | |
1735 | case X86::SEH_PushFrame: |
1736 | OutStreamer->emitWinCFIPushFrame(Code: MI->getOperand(i: 0).getImm()); |
1737 | break; |
1738 | |
1739 | case X86::SEH_EndPrologue: |
1740 | OutStreamer->emitWinCFIEndProlog(); |
1741 | break; |
1742 | |
1743 | default: |
1744 | llvm_unreachable("expected SEH_ instruction" ); |
1745 | } |
1746 | } |
1747 | |
1748 | static void (const MachineInstr *MI, |
1749 | MCStreamer &OutStreamer) { |
1750 | switch (MI->getOpcode()) { |
1751 | // Lower PSHUFB and VPERMILP normally but add a comment if we can find |
1752 | // a constant shuffle mask. We won't be able to do this at the MC layer |
1753 | // because the mask isn't an immediate. |
1754 | case X86::PSHUFBrm: |
1755 | case X86::VPSHUFBrm: |
1756 | case X86::VPSHUFBYrm: |
1757 | case X86::VPSHUFBZ128rm: |
1758 | case X86::VPSHUFBZ128rmk: |
1759 | case X86::VPSHUFBZ128rmkz: |
1760 | case X86::VPSHUFBZ256rm: |
1761 | case X86::VPSHUFBZ256rmk: |
1762 | case X86::VPSHUFBZ256rmkz: |
1763 | case X86::VPSHUFBZrm: |
1764 | case X86::VPSHUFBZrmk: |
1765 | case X86::VPSHUFBZrmkz: { |
1766 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1767 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1768 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1769 | SmallVector<int, 64> Mask; |
1770 | DecodePSHUFBMask(C, Width, ShuffleMask&: Mask); |
1771 | if (!Mask.empty()) |
1772 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1773 | } |
1774 | break; |
1775 | } |
1776 | |
1777 | case X86::VPERMILPSrm: |
1778 | case X86::VPERMILPSYrm: |
1779 | case X86::VPERMILPSZ128rm: |
1780 | case X86::VPERMILPSZ128rmk: |
1781 | case X86::VPERMILPSZ128rmkz: |
1782 | case X86::VPERMILPSZ256rm: |
1783 | case X86::VPERMILPSZ256rmk: |
1784 | case X86::VPERMILPSZ256rmkz: |
1785 | case X86::VPERMILPSZrm: |
1786 | case X86::VPERMILPSZrmk: |
1787 | case X86::VPERMILPSZrmkz: { |
1788 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1789 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1790 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1791 | SmallVector<int, 16> Mask; |
1792 | DecodeVPERMILPMask(C, ElSize: 32, Width, ShuffleMask&: Mask); |
1793 | if (!Mask.empty()) |
1794 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1795 | } |
1796 | break; |
1797 | } |
1798 | case X86::VPERMILPDrm: |
1799 | case X86::VPERMILPDYrm: |
1800 | case X86::VPERMILPDZ128rm: |
1801 | case X86::VPERMILPDZ128rmk: |
1802 | case X86::VPERMILPDZ128rmkz: |
1803 | case X86::VPERMILPDZ256rm: |
1804 | case X86::VPERMILPDZ256rmk: |
1805 | case X86::VPERMILPDZ256rmkz: |
1806 | case X86::VPERMILPDZrm: |
1807 | case X86::VPERMILPDZrmk: |
1808 | case X86::VPERMILPDZrmkz: { |
1809 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1810 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1811 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1812 | SmallVector<int, 16> Mask; |
1813 | DecodeVPERMILPMask(C, ElSize: 64, Width, ShuffleMask&: Mask); |
1814 | if (!Mask.empty()) |
1815 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1816 | } |
1817 | break; |
1818 | } |
1819 | |
1820 | case X86::VPERMIL2PDrm: |
1821 | case X86::VPERMIL2PSrm: |
1822 | case X86::VPERMIL2PDYrm: |
1823 | case X86::VPERMIL2PSYrm: { |
1824 | assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && |
1825 | "Unexpected number of operands!" ); |
1826 | |
1827 | const MachineOperand &CtrlOp = MI->getOperand(i: MI->getNumOperands() - 1); |
1828 | if (!CtrlOp.isImm()) |
1829 | break; |
1830 | |
1831 | unsigned ElSize; |
1832 | switch (MI->getOpcode()) { |
1833 | default: llvm_unreachable("Invalid opcode" ); |
1834 | case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; |
1835 | case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; |
1836 | } |
1837 | |
1838 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1839 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1840 | SmallVector<int, 16> Mask; |
1841 | DecodeVPERMIL2PMask(C, M2Z: (unsigned)CtrlOp.getImm(), ElSize, Width, ShuffleMask&: Mask); |
1842 | if (!Mask.empty()) |
1843 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1844 | } |
1845 | break; |
1846 | } |
1847 | |
1848 | case X86::VPPERMrrm: { |
1849 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1850 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1851 | SmallVector<int, 16> Mask; |
1852 | DecodeVPPERMMask(C, Width, ShuffleMask&: Mask); |
1853 | if (!Mask.empty()) |
1854 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1855 | } |
1856 | break; |
1857 | } |
1858 | |
1859 | case X86::MMX_MOVQ64rm: { |
1860 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 1)) { |
1861 | std::string ; |
1862 | raw_string_ostream CS(Comment); |
1863 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1864 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()) << " = " ; |
1865 | if (auto *CF = dyn_cast<ConstantFP>(Val: C)) { |
1866 | CS << "0x" << toString(I: CF->getValueAPF().bitcastToAPInt(), Radix: 16, Signed: false); |
1867 | OutStreamer.AddComment(T: CS.str()); |
1868 | } |
1869 | } |
1870 | break; |
1871 | } |
1872 | |
1873 | #define MASK_AVX512_CASE(Instr) \ |
1874 | case Instr: \ |
1875 | case Instr##k: \ |
1876 | case Instr##kz: |
1877 | |
1878 | case X86::MOVSDrm: |
1879 | case X86::VMOVSDrm: |
1880 | MASK_AVX512_CASE(X86::VMOVSDZrm) |
1881 | case X86::MOVSDrm_alt: |
1882 | case X86::VMOVSDrm_alt: |
1883 | case X86::VMOVSDZrm_alt: |
1884 | case X86::MOVQI2PQIrm: |
1885 | case X86::VMOVQI2PQIrm: |
1886 | case X86::VMOVQI2PQIZrm: |
1887 | printZeroUpperMove(MI, OutStreamer, SclWidth: 64, VecWidth: 128, ShuffleComment: "mem[0],zero" ); |
1888 | break; |
1889 | |
1890 | MASK_AVX512_CASE(X86::VMOVSHZrm) |
1891 | case X86::VMOVSHZrm_alt: |
1892 | printZeroUpperMove(MI, OutStreamer, SclWidth: 16, VecWidth: 128, |
1893 | ShuffleComment: "mem[0],zero,zero,zero,zero,zero,zero,zero" ); |
1894 | break; |
1895 | |
1896 | case X86::MOVSSrm: |
1897 | case X86::VMOVSSrm: |
1898 | MASK_AVX512_CASE(X86::VMOVSSZrm) |
1899 | case X86::MOVSSrm_alt: |
1900 | case X86::VMOVSSrm_alt: |
1901 | case X86::VMOVSSZrm_alt: |
1902 | case X86::MOVDI2PDIrm: |
1903 | case X86::VMOVDI2PDIrm: |
1904 | case X86::VMOVDI2PDIZrm: |
1905 | printZeroUpperMove(MI, OutStreamer, SclWidth: 32, VecWidth: 128, ShuffleComment: "mem[0],zero,zero,zero" ); |
1906 | break; |
1907 | |
1908 | #define MOV_CASE(Prefix, Suffix) \ |
1909 | case X86::Prefix##MOVAPD##Suffix##rm: \ |
1910 | case X86::Prefix##MOVAPS##Suffix##rm: \ |
1911 | case X86::Prefix##MOVUPD##Suffix##rm: \ |
1912 | case X86::Prefix##MOVUPS##Suffix##rm: \ |
1913 | case X86::Prefix##MOVDQA##Suffix##rm: \ |
1914 | case X86::Prefix##MOVDQU##Suffix##rm: |
1915 | |
1916 | #define MOV_AVX512_CASE(Suffix, Postfix) \ |
1917 | case X86::VMOVDQA64##Suffix##rm##Postfix: \ |
1918 | case X86::VMOVDQA32##Suffix##rm##Postfix: \ |
1919 | case X86::VMOVDQU64##Suffix##rm##Postfix: \ |
1920 | case X86::VMOVDQU32##Suffix##rm##Postfix: \ |
1921 | case X86::VMOVDQU16##Suffix##rm##Postfix: \ |
1922 | case X86::VMOVDQU8##Suffix##rm##Postfix: \ |
1923 | case X86::VMOVAPS##Suffix##rm##Postfix: \ |
1924 | case X86::VMOVAPD##Suffix##rm##Postfix: \ |
1925 | case X86::VMOVUPS##Suffix##rm##Postfix: \ |
1926 | case X86::VMOVUPD##Suffix##rm##Postfix: |
1927 | |
1928 | #define CASE_128_MOV_RM() \ |
1929 | MOV_CASE(, ) /* SSE */ \ |
1930 | MOV_CASE(V, ) /* AVX-128 */ \ |
1931 | MOV_AVX512_CASE(Z128, ) \ |
1932 | MOV_AVX512_CASE(Z128, k) \ |
1933 | MOV_AVX512_CASE(Z128, kz) |
1934 | |
1935 | #define CASE_256_MOV_RM() \ |
1936 | MOV_CASE(V, Y) /* AVX-256 */ \ |
1937 | MOV_AVX512_CASE(Z256, ) \ |
1938 | MOV_AVX512_CASE(Z256, k) \ |
1939 | MOV_AVX512_CASE(Z256, kz) \ |
1940 | |
1941 | #define CASE_512_MOV_RM() \ |
1942 | MOV_AVX512_CASE(Z, ) \ |
1943 | MOV_AVX512_CASE(Z, k) \ |
1944 | MOV_AVX512_CASE(Z, kz) \ |
1945 | |
1946 | // For loads from a constant pool to a vector register, print the constant |
1947 | // loaded. |
1948 | CASE_128_MOV_RM() |
1949 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 128); |
1950 | break; |
1951 | CASE_256_MOV_RM() |
1952 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 256); |
1953 | break; |
1954 | CASE_512_MOV_RM() |
1955 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 512); |
1956 | break; |
1957 | case X86::VBROADCASTF128rm: |
1958 | case X86::VBROADCASTI128rm: |
1959 | MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm) |
1960 | MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm) |
1961 | MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm) |
1962 | MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm) |
1963 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 128); |
1964 | break; |
1965 | MASK_AVX512_CASE(X86::VBROADCASTF32X4rm) |
1966 | MASK_AVX512_CASE(X86::VBROADCASTF64X2rm) |
1967 | MASK_AVX512_CASE(X86::VBROADCASTI32X4rm) |
1968 | MASK_AVX512_CASE(X86::VBROADCASTI64X2rm) |
1969 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 128); |
1970 | break; |
1971 | MASK_AVX512_CASE(X86::VBROADCASTF32X8rm) |
1972 | MASK_AVX512_CASE(X86::VBROADCASTF64X4rm) |
1973 | MASK_AVX512_CASE(X86::VBROADCASTI32X8rm) |
1974 | MASK_AVX512_CASE(X86::VBROADCASTI64X4rm) |
1975 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 256); |
1976 | break; |
1977 | |
1978 | // For broadcast loads from a constant pool to a vector register, repeatedly |
1979 | // print the constant loaded. |
1980 | case X86::MOVDDUPrm: |
1981 | case X86::VMOVDDUPrm: |
1982 | MASK_AVX512_CASE(X86::VMOVDDUPZ128rm) |
1983 | case X86::VPBROADCASTQrm: |
1984 | MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm) |
1985 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 64); |
1986 | break; |
1987 | case X86::VBROADCASTSDYrm: |
1988 | MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm) |
1989 | case X86::VPBROADCASTQYrm: |
1990 | MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm) |
1991 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 64); |
1992 | break; |
1993 | MASK_AVX512_CASE(X86::VBROADCASTSDZrm) |
1994 | MASK_AVX512_CASE(X86::VPBROADCASTQZrm) |
1995 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 64); |
1996 | break; |
1997 | case X86::VBROADCASTSSrm: |
1998 | MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm) |
1999 | case X86::VPBROADCASTDrm: |
2000 | MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm) |
2001 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 32); |
2002 | break; |
2003 | case X86::VBROADCASTSSYrm: |
2004 | MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm) |
2005 | case X86::VPBROADCASTDYrm: |
2006 | MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm) |
2007 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 32); |
2008 | break; |
2009 | MASK_AVX512_CASE(X86::VBROADCASTSSZrm) |
2010 | MASK_AVX512_CASE(X86::VPBROADCASTDZrm) |
2011 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 32); |
2012 | break; |
2013 | case X86::VPBROADCASTWrm: |
2014 | MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm) |
2015 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 16); |
2016 | break; |
2017 | case X86::VPBROADCASTWYrm: |
2018 | MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm) |
2019 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 16); |
2020 | break; |
2021 | MASK_AVX512_CASE(X86::VPBROADCASTWZrm) |
2022 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 16); |
2023 | break; |
2024 | case X86::VPBROADCASTBrm: |
2025 | MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm) |
2026 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 8); |
2027 | break; |
2028 | case X86::VPBROADCASTBYrm: |
2029 | MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm) |
2030 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 8); |
2031 | break; |
2032 | MASK_AVX512_CASE(X86::VPBROADCASTBZrm) |
2033 | printBroadcast(MI, OutStreamer, Repeats: 64, BitWidth: 8); |
2034 | break; |
2035 | |
2036 | #define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \ |
2037 | case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix: |
2038 | |
2039 | #define CASE_MOVX_RM(Ext, Type) \ |
2040 | MOVX_CASE(, Ext, Type, , ) \ |
2041 | MOVX_CASE(V, Ext, Type, , ) \ |
2042 | MOVX_CASE(V, Ext, Type, Y, ) \ |
2043 | MOVX_CASE(V, Ext, Type, Z128, ) \ |
2044 | MOVX_CASE(V, Ext, Type, Z128, k ) \ |
2045 | MOVX_CASE(V, Ext, Type, Z128, kz ) \ |
2046 | MOVX_CASE(V, Ext, Type, Z256, ) \ |
2047 | MOVX_CASE(V, Ext, Type, Z256, k ) \ |
2048 | MOVX_CASE(V, Ext, Type, Z256, kz ) \ |
2049 | MOVX_CASE(V, Ext, Type, Z, ) \ |
2050 | MOVX_CASE(V, Ext, Type, Z, k ) \ |
2051 | MOVX_CASE(V, Ext, Type, Z, kz ) |
2052 | |
2053 | CASE_MOVX_RM(SX, BD) |
2054 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2055 | break; |
2056 | CASE_MOVX_RM(SX, BQ) |
2057 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2058 | break; |
2059 | CASE_MOVX_RM(SX, BW) |
2060 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2061 | break; |
2062 | CASE_MOVX_RM(SX, DQ) |
2063 | printSignExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2064 | break; |
2065 | CASE_MOVX_RM(SX, WD) |
2066 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2067 | break; |
2068 | CASE_MOVX_RM(SX, WQ) |
2069 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2070 | break; |
2071 | |
2072 | CASE_MOVX_RM(ZX, BD) |
2073 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2074 | break; |
2075 | CASE_MOVX_RM(ZX, BQ) |
2076 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2077 | break; |
2078 | CASE_MOVX_RM(ZX, BW) |
2079 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2080 | break; |
2081 | CASE_MOVX_RM(ZX, DQ) |
2082 | printZeroExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2083 | break; |
2084 | CASE_MOVX_RM(ZX, WD) |
2085 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2086 | break; |
2087 | CASE_MOVX_RM(ZX, WQ) |
2088 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2089 | break; |
2090 | } |
2091 | } |
2092 | |
2093 | void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { |
2094 | // FIXME: Enable feature predicate checks once all the test pass. |
2095 | // X86_MC::verifyInstructionPredicates(MI->getOpcode(), |
2096 | // Subtarget->getFeatureBits()); |
2097 | |
2098 | X86MCInstLower MCInstLowering(*MF, *this); |
2099 | const X86RegisterInfo *RI = |
2100 | MF->getSubtarget<X86Subtarget>().getRegisterInfo(); |
2101 | |
2102 | if (MI->getOpcode() == X86::OR64rm) { |
2103 | for (auto &Opd : MI->operands()) { |
2104 | if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == |
2105 | "swift_async_extendedFramePointerFlags" ) { |
2106 | ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; |
2107 | } |
2108 | } |
2109 | } |
2110 | |
2111 | // Add comments for values loaded from constant pool. |
2112 | if (OutStreamer->isVerboseAsm()) |
2113 | addConstantComments(MI, OutStreamer&: *OutStreamer); |
2114 | |
2115 | // Add a comment about EVEX compression |
2116 | if (TM.Options.MCOptions.ShowMCEncoding) { |
2117 | if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) |
2118 | OutStreamer->AddComment(T: "EVEX TO LEGACY Compression " , EOL: false); |
2119 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) |
2120 | OutStreamer->AddComment(T: "EVEX TO VEX Compression " , EOL: false); |
2121 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) |
2122 | OutStreamer->AddComment(T: "EVEX TO EVEX Compression " , EOL: false); |
2123 | } |
2124 | |
2125 | switch (MI->getOpcode()) { |
2126 | case TargetOpcode::DBG_VALUE: |
2127 | llvm_unreachable("Should be handled target independently" ); |
2128 | |
2129 | case X86::EH_RETURN: |
2130 | case X86::EH_RETURN64: { |
2131 | // Lower these as normal, but add some comments. |
2132 | Register Reg = MI->getOperand(i: 0).getReg(); |
2133 | OutStreamer->AddComment(T: StringRef("eh_return, addr: %" ) + |
2134 | X86ATTInstPrinter::getRegisterName(Reg)); |
2135 | break; |
2136 | } |
2137 | case X86::CLEANUPRET: { |
2138 | // Lower these as normal, but add some comments. |
2139 | OutStreamer->AddComment(T: "CLEANUPRET" ); |
2140 | break; |
2141 | } |
2142 | |
2143 | case X86::CATCHRET: { |
2144 | // Lower these as normal, but add some comments. |
2145 | OutStreamer->AddComment(T: "CATCHRET" ); |
2146 | break; |
2147 | } |
2148 | |
2149 | case X86::ENDBR32: |
2150 | case X86::ENDBR64: { |
2151 | // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for |
2152 | // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be |
2153 | // non-empty. If MI is the initial ENDBR, place the |
2154 | // __patchable_function_entries label after ENDBR. |
2155 | if (CurrentPatchableFunctionEntrySym && |
2156 | CurrentPatchableFunctionEntrySym == CurrentFnBegin && |
2157 | MI == &MF->front().front()) { |
2158 | MCInst Inst; |
2159 | MCInstLowering.Lower(MI, OutMI&: Inst); |
2160 | EmitAndCountInstruction(Inst); |
2161 | CurrentPatchableFunctionEntrySym = createTempSymbol(Name: "patch" ); |
2162 | OutStreamer->emitLabel(Symbol: CurrentPatchableFunctionEntrySym); |
2163 | return; |
2164 | } |
2165 | break; |
2166 | } |
2167 | |
2168 | case X86::TAILJMPd64: |
2169 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) |
2170 | EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); |
2171 | [[fallthrough]]; |
2172 | case X86::TAILJMPr: |
2173 | case X86::TAILJMPm: |
2174 | case X86::TAILJMPd: |
2175 | case X86::TAILJMPd_CC: |
2176 | case X86::TAILJMPr64: |
2177 | case X86::TAILJMPm64: |
2178 | case X86::TAILJMPd64_CC: |
2179 | case X86::TAILJMPr64_REX: |
2180 | case X86::TAILJMPm64_REX: |
2181 | // Lower these as normal, but add some comments. |
2182 | OutStreamer->AddComment(T: "TAILCALL" ); |
2183 | break; |
2184 | |
2185 | case X86::TLS_addr32: |
2186 | case X86::TLS_addr64: |
2187 | case X86::TLS_addrX32: |
2188 | case X86::TLS_base_addr32: |
2189 | case X86::TLS_base_addr64: |
2190 | case X86::TLS_base_addrX32: |
2191 | case X86::TLS_desc32: |
2192 | case X86::TLS_desc64: |
2193 | return LowerTlsAddr(MCInstLowering, MI: *MI); |
2194 | |
2195 | case X86::MOVPC32r: { |
2196 | // This is a pseudo op for a two instruction sequence with a label, which |
2197 | // looks like: |
2198 | // call "L1$pb" |
2199 | // "L1$pb": |
2200 | // popl %esi |
2201 | |
2202 | // Emit the call. |
2203 | MCSymbol *PICBase = MF->getPICBaseSymbol(); |
2204 | // FIXME: We would like an efficient form for this, so we don't have to do a |
2205 | // lot of extra uniquing. |
2206 | EmitAndCountInstruction( |
2207 | MCInstBuilder(X86::CALLpcrel32) |
2208 | .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); |
2209 | |
2210 | const X86FrameLowering *FrameLowering = |
2211 | MF->getSubtarget<X86Subtarget>().getFrameLowering(); |
2212 | bool hasFP = FrameLowering->hasFP(MF: *MF); |
2213 | |
2214 | // TODO: This is needed only if we require precise CFA. |
2215 | bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && |
2216 | !OutStreamer->getDwarfFrameInfos().back().End; |
2217 | |
2218 | int stackGrowth = -RI->getSlotSize(); |
2219 | |
2220 | if (HasActiveDwarfFrame && !hasFP) { |
2221 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: -stackGrowth); |
2222 | MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); |
2223 | } |
2224 | |
2225 | // Emit the label. |
2226 | OutStreamer->emitLabel(Symbol: PICBase); |
2227 | |
2228 | // popl $reg |
2229 | EmitAndCountInstruction( |
2230 | MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); |
2231 | |
2232 | if (HasActiveDwarfFrame && !hasFP) { |
2233 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: stackGrowth); |
2234 | } |
2235 | return; |
2236 | } |
2237 | |
2238 | case X86::ADD32ri: { |
2239 | // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. |
2240 | if (MI->getOperand(i: 2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) |
2241 | break; |
2242 | |
2243 | // Okay, we have something like: |
2244 | // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) |
2245 | |
2246 | // For this, we want to print something like: |
2247 | // MYGLOBAL + (. - PICBASE) |
2248 | // However, we can't generate a ".", so just emit a new label here and refer |
2249 | // to it. |
2250 | MCSymbol *DotSym = OutContext.createTempSymbol(); |
2251 | OutStreamer->emitLabel(Symbol: DotSym); |
2252 | |
2253 | // Now that we have emitted the label, lower the complex operand expression. |
2254 | MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MO: MI->getOperand(i: 2)); |
2255 | |
2256 | const MCExpr *DotExpr = MCSymbolRefExpr::create(Symbol: DotSym, Ctx&: OutContext); |
2257 | const MCExpr *PICBase = |
2258 | MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx&: OutContext); |
2259 | DotExpr = MCBinaryExpr::createSub(LHS: DotExpr, RHS: PICBase, Ctx&: OutContext); |
2260 | |
2261 | DotExpr = MCBinaryExpr::createAdd( |
2262 | LHS: MCSymbolRefExpr::create(Symbol: OpSym, Ctx&: OutContext), RHS: DotExpr, Ctx&: OutContext); |
2263 | |
2264 | EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) |
2265 | .addReg(MI->getOperand(0).getReg()) |
2266 | .addReg(MI->getOperand(1).getReg()) |
2267 | .addExpr(DotExpr)); |
2268 | return; |
2269 | } |
2270 | case TargetOpcode::STATEPOINT: |
2271 | return LowerSTATEPOINT(MI: *MI, MCIL&: MCInstLowering); |
2272 | |
2273 | case TargetOpcode::FAULTING_OP: |
2274 | return LowerFAULTING_OP(FaultingMI: *MI, MCIL&: MCInstLowering); |
2275 | |
2276 | case TargetOpcode::FENTRY_CALL: |
2277 | return LowerFENTRY_CALL(MI: *MI, MCIL&: MCInstLowering); |
2278 | |
2279 | case TargetOpcode::PATCHABLE_OP: |
2280 | return LowerPATCHABLE_OP(MI: *MI, MCIL&: MCInstLowering); |
2281 | |
2282 | case TargetOpcode::STACKMAP: |
2283 | return LowerSTACKMAP(MI: *MI); |
2284 | |
2285 | case TargetOpcode::PATCHPOINT: |
2286 | return LowerPATCHPOINT(MI: *MI, MCIL&: MCInstLowering); |
2287 | |
2288 | case TargetOpcode::PATCHABLE_FUNCTION_ENTER: |
2289 | return LowerPATCHABLE_FUNCTION_ENTER(MI: *MI, MCIL&: MCInstLowering); |
2290 | |
2291 | case TargetOpcode::PATCHABLE_RET: |
2292 | return LowerPATCHABLE_RET(MI: *MI, MCIL&: MCInstLowering); |
2293 | |
2294 | case TargetOpcode::PATCHABLE_TAIL_CALL: |
2295 | return LowerPATCHABLE_TAIL_CALL(MI: *MI, MCIL&: MCInstLowering); |
2296 | |
2297 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
2298 | return LowerPATCHABLE_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2299 | |
2300 | case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: |
2301 | return LowerPATCHABLE_TYPED_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2302 | |
2303 | case X86::MORESTACK_RET: |
2304 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2305 | return; |
2306 | |
2307 | case X86::KCFI_CHECK: |
2308 | return LowerKCFI_CHECK(MI: *MI); |
2309 | |
2310 | case X86::ASAN_CHECK_MEMACCESS: |
2311 | return LowerASAN_CHECK_MEMACCESS(MI: *MI); |
2312 | |
2313 | case X86::MORESTACK_RET_RESTORE_R10: |
2314 | // Return, then restore R10. |
2315 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2316 | EmitAndCountInstruction( |
2317 | MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); |
2318 | return; |
2319 | |
2320 | case X86::SEH_PushReg: |
2321 | case X86::SEH_SaveReg: |
2322 | case X86::SEH_SaveXMM: |
2323 | case X86::SEH_StackAlloc: |
2324 | case X86::SEH_StackAlign: |
2325 | case X86::SEH_SetFrame: |
2326 | case X86::SEH_PushFrame: |
2327 | case X86::SEH_EndPrologue: |
2328 | EmitSEHInstruction(MI); |
2329 | return; |
2330 | |
2331 | case X86::SEH_Epilogue: { |
2332 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
2333 | MachineBasicBlock::const_iterator MBBI(MI); |
2334 | // Check if preceded by a call and emit nop if so. |
2335 | for (MBBI = PrevCrossBBInst(MBBI); |
2336 | MBBI != MachineBasicBlock::const_iterator(); |
2337 | MBBI = PrevCrossBBInst(MBBI)) { |
2338 | // Pseudo instructions that aren't a call are assumed to not emit any |
2339 | // code. If they do, we worst case generate unnecessary noops after a |
2340 | // call. |
2341 | if (MBBI->isCall() || !MBBI->isPseudo()) { |
2342 | if (MBBI->isCall()) |
2343 | EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); |
2344 | break; |
2345 | } |
2346 | } |
2347 | return; |
2348 | } |
2349 | case X86::UBSAN_UD1: |
2350 | EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm) |
2351 | .addReg(X86::EAX) |
2352 | .addReg(X86::EAX) |
2353 | .addImm(1) |
2354 | .addReg(X86::NoRegister) |
2355 | .addImm(MI->getOperand(0).getImm()) |
2356 | .addReg(X86::NoRegister)); |
2357 | return; |
2358 | case X86::CALL64pcrel32: |
2359 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) |
2360 | EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); |
2361 | break; |
2362 | } |
2363 | |
2364 | MCInst TmpInst; |
2365 | MCInstLowering.Lower(MI, OutMI&: TmpInst); |
2366 | |
2367 | // Stackmap shadows cannot include branch targets, so we can count the bytes |
2368 | // in a call towards the shadow, but must ensure that the no thread returns |
2369 | // in to the stackmap shadow. The only way to achieve this is if the call |
2370 | // is at the end of the shadow. |
2371 | if (MI->isCall()) { |
2372 | // Count then size of the call towards the shadow |
2373 | SMShadowTracker.count(Inst&: TmpInst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
2374 | // Then flush the shadow so that we fill with nops before the call, not |
2375 | // after it. |
2376 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
2377 | // Then emit the call |
2378 | OutStreamer->emitInstruction(Inst: TmpInst, STI: getSubtargetInfo()); |
2379 | return; |
2380 | } |
2381 | |
2382 | EmitAndCountInstruction(Inst&: TmpInst); |
2383 | } |
2384 | |