X86MCInstLower.cpp source code [llvm/lib/Target/X86/X86MCInstLower.cpp]

1	//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains code to lower X86 MachineInstrs to their corresponding
10	// MCInst records.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "MCTargetDesc/X86ATTInstPrinter.h"
15	#include "MCTargetDesc/X86BaseInfo.h"
16	#include "MCTargetDesc/X86EncodingOptimization.h"
17	#include "MCTargetDesc/X86InstComments.h"
18	#include "MCTargetDesc/X86ShuffleDecode.h"
19	#include "MCTargetDesc/X86TargetStreamer.h"
20	#include "X86AsmPrinter.h"
21	#include "X86MachineFunctionInfo.h"
22	#include "X86RegisterInfo.h"
23	#include "X86ShuffleDecodeConstantPool.h"
24	#include "X86Subtarget.h"
25	#include "llvm/ADT/SmallString.h"
26	#include "llvm/ADT/StringExtras.h"
27	#include "llvm/CodeGen/MachineConstantPool.h"
28	#include "llvm/CodeGen/MachineFunction.h"
29	#include "llvm/CodeGen/MachineModuleInfoImpls.h"
30	#include "llvm/CodeGen/MachineOperand.h"
31	#include "llvm/CodeGen/StackMaps.h"
32	#include "llvm/IR/DataLayout.h"
33	#include "llvm/IR/GlobalValue.h"
34	#include "llvm/IR/Mangler.h"
35	#include "llvm/MC/MCAsmInfo.h"
36	#include "llvm/MC/MCCodeEmitter.h"
37	#include "llvm/MC/MCContext.h"
38	#include "llvm/MC/MCExpr.h"
39	#include "llvm/MC/MCFixup.h"
40	#include "llvm/MC/MCInst.h"
41	#include "llvm/MC/MCInstBuilder.h"
42	#include "llvm/MC/MCSection.h"
43	#include "llvm/MC/MCSectionELF.h"
44	#include "llvm/MC/MCStreamer.h"
45	#include "llvm/MC/MCSymbol.h"
46	#include "llvm/MC/MCSymbolELF.h"
47	#include "llvm/MC/TargetRegistry.h"
48	#include "llvm/Target/TargetLoweringObjectFile.h"
49	#include "llvm/Target/TargetMachine.h"
50	#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
51	#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
52	#include <string>
53
54	using namespace llvm;
55
56	namespace {
57
58	/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
59	class X86MCInstLower {
60	MCContext &Ctx;
61	const MachineFunction &MF;
62	const TargetMachine &TM;
63	const MCAsmInfo &MAI;
64	X86AsmPrinter &AsmPrinter;
65
66	public:
67	X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
68
69	std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
70	const MachineOperand &MO) const;
71	void Lower(const MachineInstr MI, MCInst &OutMI) const*;
72
73	MCSymbol GetSymbolFromOperand(const* MachineOperand &MO) const;
74	MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol Sym) const*;
75
76	private:
77	MachineModuleInfoMachO &getMachOMMI() const;
78	};
79
80	} // end anonymous namespace
81
82	/// A RAII helper which defines a region of instructions which can't have
83	/// padding added between them for correctness.
84	struct NoAutoPaddingScope {
85	MCStreamer &OS;
86	const bool OldAllowAutoPadding;
87	NoAutoPaddingScope(MCStreamer &OS)
88	: OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
89	changeAndComment(b: false);
90	}
91	~NoAutoPaddingScope() { changeAndComment(b: OldAllowAutoPadding); }
92	void changeAndComment(bool b) {
93	if (b == OS.getAllowAutoPadding())
94	return;
95	OS.setAllowAutoPadding(b);
96	if (b)
97	OS.emitRawComment(T: "autopadding");
98	else
99	OS.emitRawComment(T: "noautopadding");
100	}
101	};
102
103	// Emit a minimal sequence of nops spanning NumBytes bytes.
104	static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
105	const X86Subtarget *Subtarget);
106
107	void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
108	const MCSubtargetInfo &STI,
109	MCCodeEmitter *CodeEmitter) {
110	if (InShadow) {
111	SmallString<`256`> Code;
112	SmallVector<MCFixup, `4`> Fixups;
113	CodeEmitter->encodeInstruction(Inst, CB&: Code, Fixups, STI);
114	CurrentShadowSize += Code.size();
115	if (CurrentShadowSize >= RequiredShadowSize)
116	InShadow = false; // The shadow is big enough. Stop counting.
117	}
118	}
119
120	void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
121	MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
122	if (InShadow && CurrentShadowSize < RequiredShadowSize) {
123	InShadow = false;
124	emitX86Nops(OS&: OutStreamer, NumBytes: RequiredShadowSize - CurrentShadowSize,
125	Subtarget: &MF->getSubtarget<X86Subtarget>());
126	}
127	}
128
129	void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
130	OutStreamer ->emitInstruction(Inst, STI: getSubtargetInfo());
131	SMShadowTracker.count(Inst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get());
132	}
133
134	X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
135	X86AsmPrinter &asmprinter)
136	: Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
137	AsmPrinter(asmprinter) {}
138
139	MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
140	return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
141	}
142
143	/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
144	/// operand to an MCSymbol.
145	MCSymbol X86MCInstLower::GetSymbolFromOperand(const* MachineOperand &MO) const {
146	const Triple &TT = TM.getTargetTriple();
147	if (MO.isGlobal() && TT.isOSBinFormatELF())
148	return AsmPrinter.getSymbolPreferLocal(GV: *MO.getGlobal());
149
150	const DataLayout &DL = MF.getDataLayout();
151	assert((MO.isGlobal() \|\| MO.isSymbol() \|\| MO.isMBB()) &&
152	"Isn't a symbol reference");
153
154	MCSymbol Sym = nullptr*;
155	SmallString<`128`> Name;
156	StringRef Suffix;
157
158	switch (MO.getTargetFlags()) {
159	case X86II::MO_DLLIMPORT:
160	// Handle dllimport linkage.
161	Name += "__imp_";
162	break;
163	case X86II::MO_COFFSTUB:
164	Name += ".refptr.";
165	break;
166	case X86II::MO_DARWIN_NONLAZY:
167	case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
168	Suffix = "$non_lazy_ptr";
169	break;
170	}
171
172	if (!Suffix.empty())
173	Name += DL.getPrivateGlobalPrefix();
174
175	if (MO.isGlobal()) {
176	const GlobalValue *GV = MO.getGlobal();
177	AsmPrinter.getNameWithPrefix(Name, GV);
178	} else if (MO.isSymbol()) {
179	Mangler::getNameWithPrefix(OutName&: Name, GVName: MO.getSymbolName(), DL);
180	} else if (MO.isMBB()) {
181	assert(Suffix.empty());
182	Sym = MO.getMBB()->getSymbol();
183	}
184
185	Name += Suffix;
186	if (!Sym)
187	Sym = Ctx.getOrCreateSymbol(Name);
188
189	// If the target flags on the operand changes the name of the symbol, do that
190	// before we return the symbol.
191	switch (MO.getTargetFlags()) {
192	default:
193	break;
194	case X86II::MO_COFFSTUB: {
195	MachineModuleInfoCOFF &MMICOFF =
196	MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
197	MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
198	if (!StubSym.getPointer()) {
199	assert(MO.isGlobal() && "Extern symbol not handled yet");
200	StubSym = MachineModuleInfoImpl::StubValueTy (
201	AsmPrinter.getSymbol(GV: MO.getGlobal()), true);
202	}
203	break;
204	}
205	case X86II::MO_DARWIN_NONLAZY:
206	case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
207	MachineModuleInfoImpl::StubValueTy &StubSym =
208	getMachOMMI().getGVStubEntry(Sym);
209	if (!StubSym.getPointer()) {
210	assert(MO.isGlobal() && "Extern symbol not handled yet");
211	StubSym = MachineModuleInfoImpl::StubValueTy (
212	AsmPrinter.getSymbol(GV: MO.getGlobal()),
213	!MO.getGlobal()->hasInternalLinkage());
214	}
215	break;
216	}
217	}
218
219	return Sym;
220	}
221
222	MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
223	MCSymbol Sym) const* {
224	// FIXME: We would like an efficient form for this, so we don't have to do a
225	// lot of extra uniquing.
226	const MCExpr Expr = nullptr*;
227	MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
228
229	switch (MO.getTargetFlags()) {
230	default:
231	llvm_unreachable("Unknown target flag on GV operand");
232	case X86II::MO_NO_FLAG: // No flag.
233	// These affect the name of the symbol, not any suffix.
234	case X86II::MO_DARWIN_NONLAZY:
235	case X86II::MO_DLLIMPORT:
236	case X86II::MO_COFFSTUB:
237	break;
238
239	case X86II::MO_TLVP:
240	RefKind = MCSymbolRefExpr::VK_TLVP;
241	break;
242	case X86II::MO_TLVP_PIC_BASE:
243	Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: MCSymbolRefExpr::VK_TLVP, Ctx);
244	// Subtract the pic base.
245	Expr = MCBinaryExpr::createSub(
246	LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx);
247	break;
248	case X86II::MO_SECREL:
249	RefKind = MCSymbolRefExpr::VK_SECREL;
250	break;
251	case X86II::MO_TLSGD:
252	RefKind = MCSymbolRefExpr::VK_TLSGD;
253	break;
254	case X86II::MO_TLSLD:
255	RefKind = MCSymbolRefExpr::VK_TLSLD;
256	break;
257	case X86II::MO_TLSLDM:
258	RefKind = MCSymbolRefExpr::VK_TLSLDM;
259	break;
260	case X86II::MO_GOTTPOFF:
261	RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
262	break;
263	case X86II::MO_INDNTPOFF:
264	RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
265	break;
266	case X86II::MO_TPOFF:
267	RefKind = MCSymbolRefExpr::VK_TPOFF;
268	break;
269	case X86II::MO_DTPOFF:
270	RefKind = MCSymbolRefExpr::VK_DTPOFF;
271	break;
272	case X86II::MO_NTPOFF:
273	RefKind = MCSymbolRefExpr::VK_NTPOFF;
274	break;
275	case X86II::MO_GOTNTPOFF:
276	RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
277	break;
278	case X86II::MO_GOTPCREL:
279	RefKind = MCSymbolRefExpr::VK_GOTPCREL;
280	break;
281	case X86II::MO_GOTPCREL_NORELAX:
282	RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
283	break;
284	case X86II::MO_GOT:
285	RefKind = MCSymbolRefExpr::VK_GOT;
286	break;
287	case X86II::MO_GOTOFF:
288	RefKind = MCSymbolRefExpr::VK_GOTOFF;
289	break;
290	case X86II::MO_PLT:
291	RefKind = MCSymbolRefExpr::VK_PLT;
292	break;
293	case X86II::MO_ABS8:
294	RefKind = MCSymbolRefExpr::VK_X86_ABS8;
295	break;
296	case X86II::MO_PIC_BASE_OFFSET:
297	case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
298	Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
299	// Subtract the pic base.
300	Expr = MCBinaryExpr::createSub(
301	LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx);
302	if (MO.isJTI()) {
303	assert(MAI.doesSetDirectiveSuppressReloc());
304	// If .set directive is supported, use it to reduce the number of
305	// relocations the assembler will generate for differences between
306	// local labels. This is only safe when the symbols are in the same
307	// section so we are restricting it to jumptable references.
308	MCSymbol *Label = Ctx.createTempSymbol();
309	AsmPrinter.OutStreamer ->emitAssignment(Symbol: Label, Value: Expr);
310	Expr = MCSymbolRefExpr::create(Symbol: Label, Ctx);
311	}
312	break;
313	}
314
315	if (!Expr)
316	Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: RefKind, Ctx);
317
318	if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
319	Expr = MCBinaryExpr::createAdd(
320	LHS: Expr, RHS: MCConstantExpr::create(Value: MO.getOffset(), Ctx), Ctx);
321	return MCOperand::createExpr(Val: Expr);
322	}
323
324	static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
325	return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
326	}
327
328	std::optional<MCOperand>
329	X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
330	const MachineOperand &MO) const {
331	switch (MO.getType()) {
332	default:
333	MI->print(OS&: errs());
334	llvm_unreachable("unknown operand type");
335	case MachineOperand::MO_Register:
336	// Ignore all implicit register operands.
337	if (MO.isImplicit())
338	return std::nullopt;
339	return MCOperand::createReg(Reg: MO.getReg());
340	case MachineOperand::MO_Immediate:
341	return MCOperand::createImm(Val: MO.getImm());
342	case MachineOperand::MO_MachineBasicBlock:
343	case MachineOperand::MO_GlobalAddress:
344	case MachineOperand::MO_ExternalSymbol:
345	return LowerSymbolOperand(MO, Sym: GetSymbolFromOperand(MO));
346	case MachineOperand::MO_MCSymbol:
347	return LowerSymbolOperand(MO, Sym: MO.getMCSymbol());
348	case MachineOperand::MO_JumpTableIndex:
349	return LowerSymbolOperand(MO, Sym: AsmPrinter.GetJTISymbol(JTID: MO.getIndex()));
350	case MachineOperand::MO_ConstantPoolIndex:
351	return LowerSymbolOperand(MO, Sym: AsmPrinter.GetCPISymbol(CPID: MO.getIndex()));
352	case MachineOperand::MO_BlockAddress:
353	return LowerSymbolOperand(
354	MO, Sym: AsmPrinter.GetBlockAddressSymbol(BA: MO.getBlockAddress()));
355	case MachineOperand::MO_RegisterMask:
356	// Ignore call clobbers.
357	return std::nullopt;
358	}
359	}
360
361	// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
362	// information.
363	static unsigned convertTailJumpOpcode(unsigned Opcode) {
364	switch (Opcode) {
365	case X86::TAILJMPr:
366	Opcode = X86::JMP32r;
367	break;
368	case X86::TAILJMPm:
369	Opcode = X86::JMP32m;
370	break;
371	case X86::TAILJMPr64:
372	Opcode = X86::JMP64r;
373	break;
374	case X86::TAILJMPm64:
375	Opcode = X86::JMP64m;
376	break;
377	case X86::TAILJMPr64_REX:
378	Opcode = X86::JMP64r_REX;
379	break;
380	case X86::TAILJMPm64_REX:
381	Opcode = X86::JMP64m_REX;
382	break;
383	case X86::TAILJMPd:
384	case X86::TAILJMPd64:
385	Opcode = X86::JMP_1;
386	break;
387	case X86::TAILJMPd_CC:
388	case X86::TAILJMPd64_CC:
389	Opcode = X86::JCC_1;
390	break;
391	}
392
393	return Opcode;
394	}
395
396	void X86MCInstLower::Lower(const MachineInstr MI, MCInst &OutMI) const* {
397	OutMI.setOpcode(MI->getOpcode());
398
399	for (const MachineOperand &MO : MI->operands())
400	if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
401	OutMI.addOperand(Op: *MaybeMCOp);
402
403	bool In64BitMode = AsmPrinter.getSubtarget().is64Bit();
404	if (X86::optimizeInstFromVEX3ToVEX2(MI&: OutMI, Desc: MI->getDesc()) \|\|
405	X86::optimizeShiftRotateWithImmediateOne(MI&: OutMI) \|\|
406	X86::optimizeVPCMPWithImmediateOneOrSix(MI&: OutMI) \|\|
407	X86::optimizeMOVSX(MI&: OutMI) \|\| X86::optimizeINCDEC(MI&: OutMI, In64BitMode) \|\|
408	X86::optimizeMOV(MI&: OutMI, In64BitMode) \|\|
409	X86::optimizeToFixedRegisterOrShortImmediateForm(MI&: OutMI))
410	return;
411
412	// Handle a few special cases to eliminate operand modifiers.
413	switch (OutMI.getOpcode()) {
414	case X86::LEA64_32r:
415	case X86::LEA64r:
416	case X86::LEA16r:
417	case X86::LEA32r:
418	// LEA should have a segment register, but it must be empty.
419	assert(OutMI.getNumOperands() == `1` + X86::AddrNumOperands &&
420	"Unexpected # of LEA operands");
421	assert(OutMI.getOperand(`1` + X86::AddrSegmentReg).getReg() == `0` &&
422	"LEA has segment specified!");
423	break;
424	case X86::MULX32Hrr:
425	case X86::MULX32Hrm:
426	case X86::MULX64Hrr:
427	case X86::MULX64Hrm: {
428	// Turn into regular MULX by duplicating the destination.
429	unsigned NewOpc;
430	switch (OutMI.getOpcode()) {
431	default: llvm_unreachable("Invalid opcode");
432	case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
433	case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
434	case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
435	case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
436	}
437	OutMI.setOpcode(NewOpc);
438	// Duplicate the destination.
439	unsigned DestReg = OutMI.getOperand(i: `0`).getReg();
440	OutMI.insert(I: OutMI.begin(), Op: MCOperand::createReg(Reg: DestReg));
441	break;
442	}
443	// CALL64r, CALL64pcrel32 - These instructions used to have
444	// register inputs modeled as normal uses instead of implicit uses. As such,
445	// they we used to truncate off all but the first operand (the callee). This
446	// issue seems to have been fixed at some point. This assert verifies that.
447	case X86::CALL64r:
448	case X86::CALL64pcrel32:
449	assert(OutMI.getNumOperands() == `1` && "Unexpected number of operands!");
450	break;
451	case X86::EH_RETURN:
452	case X86::EH_RETURN64: {
453	OutMI = MCInst ();
454	OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget()));
455	break;
456	}
457	case X86::CLEANUPRET: {
458	// Replace CLEANUPRET with the appropriate RET.
459	OutMI = MCInst ();
460	OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget()));
461	break;
462	}
463	case X86::CATCHRET: {
464	// Replace CATCHRET with the appropriate RET.
465	const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
466	unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX;
467	OutMI = MCInst ();
468	OutMI.setOpcode(getRetOpcode(Subtarget));
469	OutMI.addOperand(Op: MCOperand::createReg(Reg: ReturnReg));
470	break;
471	}
472	// TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
473	// instruction.
474	case X86::TAILJMPr:
475	case X86::TAILJMPr64:
476	case X86::TAILJMPr64_REX:
477	case X86::TAILJMPd:
478	case X86::TAILJMPd64:
479	assert(OutMI.getNumOperands() == `1` && "Unexpected number of operands!");
480	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
481	break;
482	case X86::TAILJMPd_CC:
483	case X86::TAILJMPd64_CC:
484	assert(OutMI.getNumOperands() == `2` && "Unexpected number of operands!");
485	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
486	break;
487	case X86::TAILJMPm:
488	case X86::TAILJMPm64:
489	case X86::TAILJMPm64_REX:
490	assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
491	"Unexpected number of operands!");
492	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
493	break;
494	case X86::MASKMOVDQU:
495	case X86::VMASKMOVDQU:
496	if (In64BitMode)
497	OutMI.setFlags(X86::IP_HAS_AD_SIZE);
498	break;
499	case X86::BSF16rm:
500	case X86::BSF16rr:
501	case X86::BSF32rm:
502	case X86::BSF32rr:
503	case X86::BSF64rm:
504	case X86::BSF64rr: {
505	// Add an REP prefix to BSF instructions so that new processors can
506	// recognize as TZCNT, which has better performance than BSF.
507	// BSF and TZCNT have different interpretations on ZF bit. So make sure
508	// it won't be used later.
509	const MachineOperand *FlagDef =
510	MI->findRegisterDefOperand(X86::EFLAGS, /TRI=/nullptr);
511	if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead())
512	OutMI.setFlags(X86::IP_HAS_REPEAT);
513	break;
514	}
515	default:
516	break;
517	}
518	}
519
520	void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
521	const MachineInstr &MI) {
522	NoAutoPaddingScope NoPadScope(*OutStreamer);
523	bool Is64Bits = getSubtarget().is64Bit();
524	bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64();
525	MCContext &Ctx = OutStreamer ->getContext();
526
527	MCSymbolRefExpr::VariantKind SRVK;
528	switch (MI.getOpcode()) {
529	case X86::TLS_addr32:
530	case X86::TLS_addr64:
531	case X86::TLS_addrX32:
532	SRVK = MCSymbolRefExpr::VK_TLSGD;
533	break;
534	case X86::TLS_base_addr32:
535	SRVK = MCSymbolRefExpr::VK_TLSLDM;
536	break;
537	case X86::TLS_base_addr64:
538	case X86::TLS_base_addrX32:
539	SRVK = MCSymbolRefExpr::VK_TLSLD;
540	break;
541	case X86::TLS_desc32:
542	case X86::TLS_desc64:
543	SRVK = MCSymbolRefExpr::VK_TLSDESC;
544	break;
545	default:
546	llvm_unreachable("unexpected opcode");
547	}
548
549	const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
550	Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: `3`)), Kind: SRVK, Ctx);
551
552	// Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD
553	// code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
554	// attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
555	// only using GOT when GOTPCRELX is enabled.
556	// TODO Delete the workaround when rustc no longer relies on the hack
557	bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
558	Ctx.getTargetOptions()->X86RelaxRelocations;
559
560	if (SRVK == MCSymbolRefExpr::VK_TLSDESC) {
561	const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
562	Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: `3`)),
563	Kind: MCSymbolRefExpr::VK_TLSCALL, Ctx);
564	EmitAndCountInstruction(
565	MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
566	.addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
567	.addReg(Is64Bits ? X86::RIP : X86::EBX)
568	.addImm(`1`)
569	.addReg(`0`)
570	.addExpr(Sym)
571	.addReg(`0`));
572	EmitAndCountInstruction(
573	MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
574	.addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
575	.addImm(`1`)
576	.addReg(`0`)
577	.addExpr(Expr)
578	.addReg(`0`));
579	} else if (Is64Bits) {
580	bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
581	if (NeedsPadding && Is64BitsLP64)
582	EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
583	EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
584	.addReg(X86::RDI)
585	.addReg(X86::RIP)
586	.addImm(`1`)
587	.addReg(`0`)
588	.addExpr(Sym)
589	.addReg(`0`));
590	const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "__tls_get_addr");
591	if (NeedsPadding) {
592	if (!UseGot)
593	EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
594	EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
595	EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
596	}
597	if (UseGot) {
598	const MCExpr *Expr = MCSymbolRefExpr::create(
599	Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOTPCREL, Ctx);
600	EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
601	.addReg(X86::RIP)
602	.addImm(`1`)
603	.addReg(`0`)
604	.addExpr(Expr)
605	.addReg(`0`));
606	} else {
607	EmitAndCountInstruction(
608	MCInstBuilder(X86::CALL64pcrel32)
609	.addExpr(MCSymbolRefExpr::create(TlsGetAddr,
610	MCSymbolRefExpr::VK_PLT, Ctx)));
611	}
612	} else {
613	if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
614	EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
615	.addReg(X86::EAX)
616	.addReg(`0`)
617	.addImm(`1`)
618	.addReg(X86::EBX)
619	.addExpr(Sym)
620	.addReg(`0`));
621	} else {
622	EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
623	.addReg(X86::EAX)
624	.addReg(X86::EBX)
625	.addImm(`1`)
626	.addReg(`0`)
627	.addExpr(Sym)
628	.addReg(`0`));
629	}
630
631	const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "___tls_get_addr");
632	if (UseGot) {
633	const MCExpr *Expr =
634	MCSymbolRefExpr::create(Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOT, Ctx);
635	EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
636	.addReg(X86::EBX)
637	.addImm(`1`)
638	.addReg(`0`)
639	.addExpr(Expr)
640	.addReg(`0`));
641	} else {
642	EmitAndCountInstruction(
643	MCInstBuilder(X86::CALLpcrel32)
644	.addExpr(MCSymbolRefExpr::create(TlsGetAddr,
645	MCSymbolRefExpr::VK_PLT, Ctx)));
646	}
647	}
648	}
649
650	/// Emit the largest nop instruction smaller than or equal to \p NumBytes
651	/// bytes. Return the size of nop emitted.
652	static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
653	const X86Subtarget *Subtarget) {
654	// Determine the longest nop which can be efficiently decoded for the given
655	// target cpu. 15-bytes is the longest single NOP instruction, but some
656	// platforms can't decode the longest forms efficiently.
657	unsigned MaxNopLength = `1`;
658	if (Subtarget->is64Bit()) {
659	// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
660	// IndexReg/BaseReg below need to be updated.
661	if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
662	MaxNopLength = `7`;
663	else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
664	MaxNopLength = `15`;
665	else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
666	MaxNopLength = `11`;
667	else
668	MaxNopLength = `10`;
669	} if (Subtarget->is32Bit())
670	MaxNopLength = `2`;
671
672	// Cap a single nop emission at the profitable value for the target
673	NumBytes = std::min(a: NumBytes, b: MaxNopLength);
674
675	unsigned NopSize;
676	unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
677	IndexReg = Displacement = SegmentReg = `0`;
678	BaseReg = X86::RAX;
679	ScaleVal = `1`;
680	switch (NumBytes) {
681	case `0`:
682	llvm_unreachable("Zero nops?");
683	break;
684	case `1`:
685	NopSize = `1`;
686	Opc = X86::NOOP;
687	break;
688	case `2`:
689	NopSize = `2`;
690	Opc = X86::XCHG16ar;
691	break;
692	case `3`:
693	NopSize = `3`;
694	Opc = X86::NOOPL;
695	break;
696	case `4`:
697	NopSize = `4`;
698	Opc = X86::NOOPL;
699	Displacement = `8`;
700	break;
701	case `5`:
702	NopSize = `5`;
703	Opc = X86::NOOPL;
704	Displacement = `8`;
705	IndexReg = X86::RAX;
706	break;
707	case `6`:
708	NopSize = `6`;
709	Opc = X86::NOOPW;
710	Displacement = `8`;
711	IndexReg = X86::RAX;
712	break;
713	case `7`:
714	NopSize = `7`;
715	Opc = X86::NOOPL;
716	Displacement = `512`;
717	break;
718	case `8`:
719	NopSize = `8`;
720	Opc = X86::NOOPL;
721	Displacement = `512`;
722	IndexReg = X86::RAX;
723	break;
724	case `9`:
725	NopSize = `9`;
726	Opc = X86::NOOPW;
727	Displacement = `512`;
728	IndexReg = X86::RAX;
729	break;
730	default:
731	NopSize = `10`;
732	Opc = X86::NOOPW;
733	Displacement = `512`;
734	IndexReg = X86::RAX;
735	SegmentReg = X86::CS;
736	break;
737	}
738
739	unsigned NumPrefixes = std::min(a: NumBytes - NopSize, b: `5U`);
740	NopSize += NumPrefixes;
741	for (unsigned i = `0`; i != NumPrefixes; ++i)
742	OS.emitBytes(Data: "\x66");
743
744	switch (Opc) {
745	default: llvm_unreachable("Unexpected opcode");
746	case X86::NOOP:
747	OS.emitInstruction(MCInstBuilder (Opc), *Subtarget);
748	break;
749	case X86::XCHG16ar:
750	OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
751	*Subtarget);
752	break;
753	case X86::NOOPL:
754	case X86::NOOPW:
755	OS.emitInstruction(MCInstBuilder (Opc)
756	.addReg(Reg: BaseReg)
757	.addImm(Val: ScaleVal)
758	.addReg(Reg: IndexReg)
759	.addImm(Val: Displacement)
760	.addReg(Reg: SegmentReg),
761	*Subtarget);
762	break;
763	}
764	assert(NopSize <= NumBytes && "We overemitted?");
765	return NopSize;
766	}
767
768	/// Emit the optimal amount of multi-byte nops on X86.
769	static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
770	const X86Subtarget *Subtarget) {
771	unsigned NopsToEmit = NumBytes;
772	(void)NopsToEmit;
773	while (NumBytes) {
774	NumBytes -= emitNop(OS, NumBytes, Subtarget);
775	assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
776	}
777	}
778
779	void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
780	X86MCInstLower &MCIL) {
781	assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
782
783	NoAutoPaddingScope NoPadScope(*OutStreamer);
784
785	StatepointOpers SOpers(&MI);
786	if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
787	emitX86Nops(OS&: *OutStreamer, NumBytes: PatchBytes, Subtarget);
788	} else {
789	// Lower call target and choose correct opcode
790	const MachineOperand &CallTarget = SOpers.getCallTarget();
791	MCOperand CallTargetMCOp;
792	unsigned CallOpcode;
793	switch (CallTarget.getType()) {
794	case MachineOperand::MO_GlobalAddress:
795	case MachineOperand::MO_ExternalSymbol:
796	CallTargetMCOp = MCIL.LowerSymbolOperand(
797	MO: CallTarget, Sym: MCIL.GetSymbolFromOperand(MO: CallTarget));
798	CallOpcode = X86::CALL64pcrel32;
799	// Currently, we only support relative addressing with statepoints.
800	// Otherwise, we'll need a scratch register to hold the target
801	// address. You'll fail asserts during load & relocation if this
802	// symbol is to far away. (TODO: support non-relative addressing)
803	break;
804	case MachineOperand::MO_Immediate:
805	CallTargetMCOp = MCOperand::createImm(Val: CallTarget.getImm());
806	CallOpcode = X86::CALL64pcrel32;
807	// Currently, we only support relative addressing with statepoints.
808	// Otherwise, we'll need a scratch register to hold the target
809	// immediate. You'll fail asserts during load & relocation if this
810	// address is to far away. (TODO: support non-relative addressing)
811	break;
812	case MachineOperand::MO_Register:
813	// FIXME: Add retpoline support and remove this.
814	if (Subtarget->useIndirectThunkCalls())
815	report_fatal_error(reason: "Lowering register statepoints with thunks not "
816	"yet implemented.");
817	CallTargetMCOp = MCOperand::createReg(Reg: CallTarget.getReg());
818	CallOpcode = X86::CALL64r;
819	break;
820	default:
821	llvm_unreachable("Unsupported operand type in statepoint call target");
822	break;
823	}
824
825	// Emit call
826	MCInst CallInst;
827	CallInst.setOpcode(CallOpcode);
828	CallInst.addOperand(Op: CallTargetMCOp);
829	OutStreamer ->emitInstruction(Inst: CallInst, STI: getSubtargetInfo());
830	}
831
832	// Record our statepoint node in the same section used by STACKMAP
833	// and PATCHPOINT
834	auto &Ctx = OutStreamer ->getContext();
835	MCSymbol *MILabel = Ctx.createTempSymbol();
836	OutStreamer ->emitLabel(Symbol: MILabel);
837	SM.recordStatepoint(L: *MILabel, MI);
838	}
839
840	void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
841	X86MCInstLower &MCIL) {
842	// FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
843	// <opcode>, <operands>
844
845	NoAutoPaddingScope NoPadScope(*OutStreamer);
846
847	Register DefRegister = FaultingMI.getOperand(i: `0`).getReg();
848	FaultMaps::FaultKind FK =
849	static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(i: `1`).getImm());
850	MCSymbol *HandlerLabel = FaultingMI.getOperand(i: `2`).getMBB()->getSymbol();
851	unsigned Opcode = FaultingMI.getOperand(i: `3`).getImm();
852	unsigned OperandsBeginIdx = `4`;
853
854	auto &Ctx = OutStreamer ->getContext();
855	MCSymbol *FaultingLabel = Ctx.createTempSymbol();
856	OutStreamer ->emitLabel(Symbol: FaultingLabel);
857
858	assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
859	FM.recordFaultingOp(FaultTy: FK, FaultingLabel, HandlerLabel);
860
861	MCInst MI;
862	MI.setOpcode(Opcode);
863
864	if (DefRegister != X86::NoRegister)
865	MI.addOperand(Op: MCOperand::createReg(Reg: DefRegister));
866
867	for (const MachineOperand &MO :
868	llvm::drop_begin(RangeOrContainer: FaultingMI.operands(), N: OperandsBeginIdx))
869	if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &FaultingMI, MO))
870	MI.addOperand(Op: *MaybeOperand);
871
872	OutStreamer ->AddComment(T: "on-fault: " + HandlerLabel->getName());
873	OutStreamer ->emitInstruction(Inst: MI, STI: getSubtargetInfo());
874	}
875
876	void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
877	X86MCInstLower &MCIL) {
878	bool Is64Bits = Subtarget->is64Bit();
879	MCContext &Ctx = OutStreamer ->getContext();
880	MCSymbol *fentry = Ctx.getOrCreateSymbol(Name: "__fentry__");
881	const MCSymbolRefExpr *Op =
882	MCSymbolRefExpr::create(Symbol: fentry, Kind: MCSymbolRefExpr::VK_None, Ctx);
883
884	EmitAndCountInstruction(
885	MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
886	.addExpr(Op));
887	}
888
889	void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
890	assert(std::next(MI.getIterator())->isCall() &&
891	"KCFI_CHECK not followed by a call instruction");
892
893	// Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
894	// returns a 1-byte X86::NOOP, which means the offset is the same in
895	// bytes. This assumes that patchable-function-prefix is the same for all
896	// functions.
897	const MachineFunction &MF = *MI.getMF();
898	int64_t PrefixNops = `0`;
899	(void)MF.getFunction()
900	.getFnAttribute(Kind: "patchable-function-prefix")
901	.getValueAsString()
902	.getAsInteger(Radix: `10`, Result&: PrefixNops);
903
904	// KCFI allows indirect calls to any location that's preceded by a valid
905	// type identifier. To avoid encoding the full constant into an instruction,
906	// and thus emitting potential call target gadgets at each indirect call
907	// site, load a negated constant to a register and compare that to the
908	// expected value at the call target.
909	const Register AddrReg = MI.getOperand(i: `0`).getReg();
910	const uint32_t Type = MI.getOperand(i: `1`).getImm();
911	// The check is immediately before the call. If the call target is in R10,
912	// we can clobber R11 for the check instead.
913	unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D;
914	EmitAndCountInstruction(
915	MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type)));
916	EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm)
917	.addReg(X86::NoRegister)
918	.addReg(TempReg)
919	.addReg(AddrReg)
920	.addImm(`1`)
921	.addReg(X86::NoRegister)
922	.addImm(-(PrefixNops + `4`))
923	.addReg(X86::NoRegister));
924
925	MCSymbol *Pass = OutContext.createTempSymbol();
926	EmitAndCountInstruction(
927	MCInstBuilder(X86::JCC_1)
928	.addExpr(MCSymbolRefExpr::create(Pass, OutContext))
929	.addImm(X86::COND_E));
930
931	MCSymbol *Trap = OutContext.createTempSymbol();
932	OutStreamer ->emitLabel(Symbol: Trap);
933	EmitAndCountInstruction(MCInstBuilder(X86::TRAP));
934	emitKCFITrapEntry(MF, Symbol: Trap);
935	OutStreamer ->emitLabel(Symbol: Pass);
936	}
937
938	void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
939	// FIXME: Make this work on non-ELF.
940	if (!TM.getTargetTriple().isOSBinFormatELF()) {
941	report_fatal_error(reason: "llvm.asan.check.memaccess only supported on ELF");
942	return;
943	}
944
945	const auto &Reg = MI.getOperand(i: `0`).getReg();
946	ASanAccessInfo AccessInfo(MI.getOperand(i: `1`).getImm());
947
948	uint64_t ShadowBase;
949	int MappingScale;
950	bool OrShadowOffset;
951	getAddressSanitizerParams(TargetTriple: Triple (TM.getTargetTriple()), LongSize: `64`,
952	IsKasan: AccessInfo.CompileKernel, ShadowBase: &ShadowBase,
953	MappingScale: &MappingScale, OrShadowOffset: &OrShadowOffset);
954
955	StringRef Name = AccessInfo.IsWrite ? "store" : "load";
956	StringRef Op = OrShadowOffset ? "or" : "add";
957	std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
958	Twine (`1ULL` << AccessInfo.AccessSizeIndex) + "_" +
959	TM.getMCRegisterInfo()->getName(RegNo: Reg.asMCReg()))
960	.str();
961	if (OrShadowOffset)
962	report_fatal_error(
963	reason: "OrShadowOffset is not supported with optimized callbacks");
964
965	EmitAndCountInstruction(
966	MCInstBuilder(X86::CALL64pcrel32)
967	.addExpr(MCSymbolRefExpr::create(
968	OutContext.getOrCreateSymbol(SymName), OutContext)));
969	}
970
971	void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
972	X86MCInstLower &MCIL) {
973	// PATCHABLE_OP minsize
974
975	NoAutoPaddingScope NoPadScope(*OutStreamer);
976
977	auto NextMI = std::find_if(first: std::next(x: MI.getIterator()),
978	last: MI.getParent()->end().getInstrIterator(),
979	pred: [](auto &II) { return !II.isMetaInstruction(); });
980
981	SmallString<`256`> Code;
982	unsigned MinSize = MI.getOperand(i: `0`).getImm();
983
984	if (NextMI != MI.getParent()->end() && !NextMI ->isInlineAsm()) {
985	// Lower the next MachineInstr to find its byte size.
986	// If the next instruction is inline assembly, we skip lowering it for now,
987	// and assume we should always generate NOPs.
988	MCInst MCI;
989	MCIL.Lower(MI: &*NextMI, OutMI&: MCI);
990
991	SmallVector<MCFixup, `4`> Fixups;
992	CodeEmitter ->encodeInstruction(Inst: MCI, CB&: Code, Fixups, STI: getSubtargetInfo());
993	}
994
995	if (Code.size() < MinSize) {
996	if (MinSize == `2` && Subtarget->is32Bit() &&
997	Subtarget->isTargetWindowsMSVC() &&
998	(Subtarget->getCPU().empty() \|\| Subtarget->getCPU() == "pentium3")) {
999	// For compatibility reasons, when targetting MSVC, it is important to
1000	// generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1001	// rely specifically on this pattern to be able to patch a function.
1002	// This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1003	OutStreamer->emitInstruction(
1004	MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1005	*Subtarget);
1006	} else {
1007	unsigned NopSize = emitNop(OS&: *OutStreamer, NumBytes: MinSize, Subtarget);
1008	assert(NopSize == MinSize && "Could not implement MinSize!");
1009	(void)NopSize;
1010	}
1011	}
1012	}
1013
1014	// Lower a stackmap of the form:
1015	// <id>, <shadowBytes>, ...
1016	void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1017	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
1018
1019	auto &Ctx = OutStreamer ->getContext();
1020	MCSymbol *MILabel = Ctx.createTempSymbol();
1021	OutStreamer ->emitLabel(Symbol: MILabel);
1022
1023	SM.recordStackMap(L: *MILabel, MI);
1024	unsigned NumShadowBytes = MI.getOperand(i: `1`).getImm();
1025	SMShadowTracker.reset(RequiredSize: NumShadowBytes);
1026	}
1027
1028	// Lower a patchpoint of the form:
1029	// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1030	void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1031	X86MCInstLower &MCIL) {
1032	assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1033
1034	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
1035
1036	NoAutoPaddingScope NoPadScope(*OutStreamer);
1037
1038	auto &Ctx = OutStreamer ->getContext();
1039	MCSymbol *MILabel = Ctx.createTempSymbol();
1040	OutStreamer ->emitLabel(Symbol: MILabel);
1041	SM.recordPatchPoint(L: *MILabel, MI);
1042
1043	PatchPointOpers opers(&MI);
1044	unsigned ScratchIdx = opers.getNextScratchIdx();
1045	unsigned EncodedBytes = `0`;
1046	const MachineOperand &CalleeMO = opers.getCallTarget();
1047
1048	// Check for null target. If target is non-null (i.e. is non-zero or is
1049	// symbolic) then emit a call.
1050	if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1051	MCOperand CalleeMCOp;
1052	switch (CalleeMO.getType()) {
1053	default:
1054	/// FIXME: Add a verifier check for bad callee types.
1055	llvm_unreachable("Unrecognized callee operand type.");
1056	case MachineOperand::MO_Immediate:
1057	if (CalleeMO.getImm())
1058	CalleeMCOp = MCOperand::createImm(Val: CalleeMO.getImm());
1059	break;
1060	case MachineOperand::MO_ExternalSymbol:
1061	case MachineOperand::MO_GlobalAddress:
1062	CalleeMCOp = MCIL.LowerSymbolOperand(MO: CalleeMO,
1063	Sym: MCIL.GetSymbolFromOperand(MO: CalleeMO));
1064	break;
1065	}
1066
1067	// Emit MOV to materialize the target address and the CALL to target.
1068	// This is encoded with 12-13 bytes, depending on which register is used.
1069	Register ScratchReg = MI.getOperand(i: ScratchIdx).getReg();
1070	if (X86II::isX86_64ExtendedReg(RegNo: ScratchReg))
1071	EncodedBytes = `13`;
1072	else
1073	EncodedBytes = `12`;
1074
1075	EmitAndCountInstruction(
1076	MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1077	// FIXME: Add retpoline support and remove this.
1078	if (Subtarget->useIndirectThunkCalls())
1079	report_fatal_error(
1080	reason: "Lowering patchpoint with thunks not yet implemented.");
1081	EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1082	}
1083
1084	// Emit padding.
1085	unsigned NumBytes = opers.getNumPatchBytes();
1086	assert(NumBytes >= EncodedBytes &&
1087	"Patchpoint can't request size less than the length of a call.");
1088
1089	emitX86Nops(OS&: *OutStreamer, NumBytes: NumBytes - EncodedBytes, Subtarget);
1090	}
1091
1092	void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1093	X86MCInstLower &MCIL) {
1094	assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1095
1096	NoAutoPaddingScope NoPadScope(*OutStreamer);
1097
1098	// We want to emit the following pattern, which follows the x86 calling
1099	// convention to prepare for the trampoline call to be patched in.
1100	//
1101	// .p2align 1, ...
1102	// .Lxray_event_sled_N:
1103	// jmp +N // jump across the instrumentation sled
1104	// ... // set up arguments in register
1105	// callq __xray_CustomEvent@plt // force dependency to symbol
1106	// ...
1107	// <jump here>
1108	//
1109	// After patching, it would look something like:
1110	//
1111	// nopw (2-byte nop)
1112	// ...
1113	// callq __xrayCustomEvent // already lowered
1114	// ...
1115	//
1116	// ---
1117	// First we emit the label and the jump.
1118	auto CurSled = OutContext.createTempSymbol(Name: "xray_event_sled_", AlwaysAddSuffix: true);
1119	OutStreamer ->AddComment(T: "# XRay Custom Event Log");
1120	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1121	OutStreamer ->emitLabel(Symbol: CurSled);
1122
1123	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1124	// an operand (computed as an offset from the jmp instruction).
1125	// FIXME: Find another less hacky way do force the relative jump.
1126	OutStreamer ->emitBinaryData(Data: "\xeb\x0f");
1127
1128	// The default C calling convention will place two arguments into %rcx and
1129	// %rdx -- so we only work with those.
1130	const Register DestRegs[] = {X86::RDI, X86::RSI};
1131	bool UsedMask[] = {false, false};
1132	// Filled out in loop.
1133	Register SrcRegs[] = {`0`, `0`};
1134
1135	// Then we put the operands in the %rdi and %rsi registers. We spill the
1136	// values in the register before we clobber them, and mark them as used in
1137	// UsedMask. In case the arguments are already in the correct register, we use
1138	// emit nops appropriately sized to keep the sled the same size in every
1139	// situation.
1140	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1141	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I))) {
1142	assert(Op ->isReg() && "Only support arguments in registers");
1143	SrcRegs[I] = getX86SubSuperRegister(Reg: Op ->getReg(), Size: `64`);
1144	assert(SrcRegs[I].isValid() && "Invalid operand");
1145	if (SrcRegs[I] != DestRegs[I]) {
1146	UsedMask[I] = true;
1147	EmitAndCountInstruction(
1148	MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1149	} else {
1150	emitX86Nops(OS&: *OutStreamer, NumBytes: `4`, Subtarget);
1151	}
1152	}
1153
1154	// Now that the register values are stashed, mov arguments into place.
1155	// FIXME: This doesn't work if one of the later SrcRegs is equal to an
1156	// earlier DestReg. We will have already overwritten over the register before
1157	// we can copy from it.
1158	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1159	if (SrcRegs[I] != DestRegs[I])
1160	EmitAndCountInstruction(
1161	MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1162
1163	// We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1164	// name of the trampoline to be implemented by the XRay runtime.
1165	auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_CustomEvent");
1166	MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym);
1167	if (isPositionIndependent())
1168	TOp.setTargetFlags(X86II::MO_PLT);
1169
1170	// Emit the call instruction.
1171	EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1172	.addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1173
1174	// Restore caller-saved and used registers.
1175	for (unsigned I = sizeof UsedMask; I-- > `0`;)
1176	if (UsedMask[I])
1177	EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1178	else
1179	emitX86Nops(OS&: *OutStreamer, NumBytes: `1`, Subtarget);
1180
1181	OutStreamer ->AddComment(T: "xray custom event end.");
1182
1183	// Record the sled version. Version 0 of this sled was spelled differently, so
1184	// we let the runtime handle the different offsets we're using. Version 2
1185	// changed the absolute address to a PC-relative address.
1186	recordSled(Sled: CurSled, MI, Kind: SledKind::CUSTOM_EVENT, Version: `2`);
1187	}
1188
1189	void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1190	X86MCInstLower &MCIL) {
1191	assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1192
1193	NoAutoPaddingScope NoPadScope(*OutStreamer);
1194
1195	// We want to emit the following pattern, which follows the x86 calling
1196	// convention to prepare for the trampoline call to be patched in.
1197	//
1198	// .p2align 1, ...
1199	// .Lxray_event_sled_N:
1200	// jmp +N // jump across the instrumentation sled
1201	// ... // set up arguments in register
1202	// callq __xray_TypedEvent@plt // force dependency to symbol
1203	// ...
1204	// <jump here>
1205	//
1206	// After patching, it would look something like:
1207	//
1208	// nopw (2-byte nop)
1209	// ...
1210	// callq __xrayTypedEvent // already lowered
1211	// ...
1212	//
1213	// ---
1214	// First we emit the label and the jump.
1215	auto CurSled = OutContext.createTempSymbol(Name: "xray_typed_event_sled_", AlwaysAddSuffix: true);
1216	OutStreamer ->AddComment(T: "# XRay Typed Event Log");
1217	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1218	OutStreamer ->emitLabel(Symbol: CurSled);
1219
1220	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1221	// an operand (computed as an offset from the jmp instruction).
1222	// FIXME: Find another less hacky way do force the relative jump.
1223	OutStreamer ->emitBinaryData(Data: "\xeb\x14");
1224
1225	// An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1226	// so we'll work with those. Or we may be called via SystemV, in which case
1227	// we don't have to do any translation.
1228	const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1229	bool UsedMask[] = {false, false, false};
1230
1231	// Will fill out src regs in the loop.
1232	Register SrcRegs[] = {`0`, `0`, `0`};
1233
1234	// Then we put the operands in the SystemV registers. We spill the values in
1235	// the registers before we clobber them, and mark them as used in UsedMask.
1236	// In case the arguments are already in the correct register, we emit nops
1237	// appropriately sized to keep the sled the same size in every situation.
1238	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1239	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I))) {
1240	// TODO: Is register only support adequate?
1241	assert(Op ->isReg() && "Only supports arguments in registers");
1242	SrcRegs[I] = getX86SubSuperRegister(Reg: Op ->getReg(), Size: `64`);
1243	assert(SrcRegs[I].isValid() && "Invalid operand");
1244	if (SrcRegs[I] != DestRegs[I]) {
1245	UsedMask[I] = true;
1246	EmitAndCountInstruction(
1247	MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1248	} else {
1249	emitX86Nops(OS&: *OutStreamer, NumBytes: `4`, Subtarget);
1250	}
1251	}
1252
1253	// In the above loop we only stash all of the destination registers or emit
1254	// nops if the arguments are already in the right place. Doing the actually
1255	// moving is postponed until after all the registers are stashed so nothing
1256	// is clobbers. We've already added nops to account for the size of mov and
1257	// push if the register is in the right place, so we only have to worry about
1258	// emitting movs.
1259	// FIXME: This doesn't work if one of the later SrcRegs is equal to an
1260	// earlier DestReg. We will have already overwritten over the register before
1261	// we can copy from it.
1262	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1263	if (UsedMask[I])
1264	EmitAndCountInstruction(
1265	MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1266
1267	// We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1268	// name of the trampoline to be implemented by the XRay runtime.
1269	auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_TypedEvent");
1270	MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym);
1271	if (isPositionIndependent())
1272	TOp.setTargetFlags(X86II::MO_PLT);
1273
1274	// Emit the call instruction.
1275	EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1276	.addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1277
1278	// Restore caller-saved and used registers.
1279	for (unsigned I = sizeof UsedMask; I-- > `0`;)
1280	if (UsedMask[I])
1281	EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1282	else
1283	emitX86Nops(OS&: *OutStreamer, NumBytes: `1`, Subtarget);
1284
1285	OutStreamer ->AddComment(T: "xray typed event end.");
1286
1287	// Record the sled version.
1288	recordSled(Sled: CurSled, MI, Kind: SledKind::TYPED_EVENT, Version: `2`);
1289	}
1290
1291	void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1292	X86MCInstLower &MCIL) {
1293
1294	NoAutoPaddingScope NoPadScope(*OutStreamer);
1295
1296	const Function &F = MF->getFunction();
1297	if (F.hasFnAttribute(Kind: "patchable-function-entry")) {
1298	unsigned Num;
1299	if (F.getFnAttribute(Kind: "patchable-function-entry")
1300	.getValueAsString()
1301	.getAsInteger(Radix: `10`, Result&: Num))
1302	return;
1303	emitX86Nops(OS&: *OutStreamer, NumBytes: Num, Subtarget);
1304	return;
1305	}
1306	// We want to emit the following pattern:
1307	//
1308	// .p2align 1, ...
1309	// .Lxray_sled_N:
1310	// jmp .tmpN
1311	// # 9 bytes worth of noops
1312	//
1313	// We need the 9 bytes because at runtime, we'd be patching over the full 11
1314	// bytes with the following pattern:
1315	//
1316	// mov %r10, <function id, 32-bit> // 6 bytes
1317	// call <relative offset, 32-bits> // 5 bytes
1318	//
1319	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1320	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1321	OutStreamer ->emitLabel(Symbol: CurSled);
1322
1323	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1324	// an operand (computed as an offset from the jmp instruction).
1325	// FIXME: Find another less hacky way do force the relative jump.
1326	OutStreamer ->emitBytes(Data: "\xeb\x09");
1327	emitX86Nops(OS&: *OutStreamer, NumBytes: `9`, Subtarget);
1328	recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_ENTER, Version: `2`);
1329	}
1330
1331	void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1332	X86MCInstLower &MCIL) {
1333	NoAutoPaddingScope NoPadScope(*OutStreamer);
1334
1335	// Since PATCHABLE_RET takes the opcode of the return statement as an
1336	// argument, we use that to emit the correct form of the RET that we want.
1337	// i.e. when we see this:
1338	//
1339	// PATCHABLE_RET X86::RET ...
1340	//
1341	// We should emit the RET followed by sleds.
1342	//
1343	// .p2align 1, ...
1344	// .Lxray_sled_N:
1345	// ret # or equivalent instruction
1346	// # 10 bytes worth of noops
1347	//
1348	// This just makes sure that the alignment for the next instruction is 2.
1349	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1350	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1351	OutStreamer ->emitLabel(Symbol: CurSled);
1352	unsigned OpCode = MI.getOperand(i: `0`).getImm();
1353	MCInst Ret;
1354	Ret.setOpcode(OpCode);
1355	for (auto &MO : drop_begin(RangeOrContainer: MI.operands()))
1356	if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &MI, MO))
1357	Ret.addOperand(Op: *MaybeOperand);
1358	OutStreamer ->emitInstruction(Inst: Ret, STI: getSubtargetInfo());
1359	emitX86Nops(OS&: *OutStreamer, NumBytes: `10`, Subtarget);
1360	recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_EXIT, Version: `2`);
1361	}
1362
1363	void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1364	X86MCInstLower &MCIL) {
1365	NoAutoPaddingScope NoPadScope(*OutStreamer);
1366
1367	// Like PATCHABLE_RET, we have the actual instruction in the operands to this
1368	// instruction so we lower that particular instruction and its operands.
1369	// Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1370	// we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1371	// the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1372	// tail call much like how we have it in PATCHABLE_RET.
1373	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1374	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1375	OutStreamer ->emitLabel(Symbol: CurSled);
1376	auto Target = OutContext.createTempSymbol();
1377
1378	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1379	// an operand (computed as an offset from the jmp instruction).
1380	// FIXME: Find another less hacky way do force the relative jump.
1381	OutStreamer ->emitBytes(Data: "\xeb\x09");
1382	emitX86Nops(OS&: *OutStreamer, NumBytes: `9`, Subtarget);
1383	OutStreamer ->emitLabel(Symbol: Target);
1384	recordSled(Sled: CurSled, MI, Kind: SledKind::TAIL_CALL, Version: `2`);
1385
1386	unsigned OpCode = MI.getOperand(i: `0`).getImm();
1387	OpCode = convertTailJumpOpcode(Opcode: OpCode);
1388	MCInst TC;
1389	TC.setOpcode(OpCode);
1390
1391	// Before emitting the instruction, add a comment to indicate that this is
1392	// indeed a tail call.
1393	OutStreamer ->AddComment(T: "TAILCALL");
1394	for (auto &MO : drop_begin(RangeOrContainer: MI.operands()))
1395	if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &MI, MO))
1396	TC.addOperand(Op: *MaybeOperand);
1397	OutStreamer ->emitInstruction(Inst: TC, STI: getSubtargetInfo());
1398	}
1399
1400	// Returns instruction preceding MBBI in MachineFunction.
1401	// If MBBI is the first instruction of the first basic block, returns null.
1402	static MachineBasicBlock::const_iterator
1403	PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1404	const MachineBasicBlock *MBB = MBBI ->getParent();
1405	while (MBBI == MBB->begin()) {
1406	if (MBB == &MBB->getParent()->front())
1407	return MachineBasicBlock::const_iterator ();
1408	MBB = MBB->getPrevNode();
1409	MBBI = MBB->end();
1410	}
1411	--MBBI;
1412	return MBBI;
1413	}
1414
1415	static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) {
1416	if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) {
1417	// Skip mask operand.
1418	++SrcIdx;
1419	if (X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) {
1420	// Skip passthru operand.
1421	++SrcIdx;
1422	}
1423	}
1424	return SrcIdx;
1425	}
1426
1427	static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI,
1428	unsigned SrcOpIdx) {
1429	const MachineOperand &DstOp = MI->getOperand(i: `0`);
1430	CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg());
1431
1432	// Handle AVX512 MASK/MASXZ write mask comments.
1433	// MASK: zmmX {%kY}
1434	// MASKZ: zmmX {%kY} {z}
1435	if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) {
1436	const MachineOperand &WriteMaskOp = MI->getOperand(i: SrcOpIdx - `1`);
1437	StringRef Mask = X86ATTInstPrinter::getRegisterName(Reg: WriteMaskOp.getReg());
1438	CS << " {%" << Mask << "}";
1439	if (!X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) {
1440	CS << " {z}";
1441	}
1442	}
1443	}
1444
1445	static void printShuffleMask(raw_ostream &CS, StringRef Src1Name,
1446	StringRef Src2Name, ArrayRef<int> Mask) {
1447	// One source operand, fix the mask to print all elements in one span.
1448	SmallVector<int, `8`> ShuffleMask(Mask);
1449	if (Src1Name == Src2Name)
1450	for (int i = `0`, e = ShuffleMask.size(); i != e; ++i)
1451	if (ShuffleMask [i] >= e)
1452	ShuffleMask [i] -= e;
1453
1454	for (int i = `0`, e = ShuffleMask.size(); i != e; ++i) {
1455	if (i != `0`)
1456	CS << ",";
1457	if (ShuffleMask [i] == SM_SentinelZero) {
1458	CS << "zero";
1459	continue;
1460	}
1461
1462	// Otherwise, it must come from src1 or src2. Print the span of elements
1463	// that comes from this src.
1464	bool isSrc1 = ShuffleMask [i] < (int)e;
1465	CS << (isSrc1 ? Src1Name : Src2Name) << `'['`;
1466
1467	bool IsFirst = true;
1468	while (i != e && ShuffleMask [i] != SM_SentinelZero &&
1469	(ShuffleMask [i] < (int)e) == isSrc1) {
1470	if (!IsFirst)
1471	CS << `','`;
1472	else
1473	IsFirst = false;
1474	if (ShuffleMask [i] == SM_SentinelUndef)
1475	CS << "u";
1476	else
1477	CS << ShuffleMask [i] % (int)e;
1478	++i;
1479	}
1480	CS << `']'`;
1481	--i; // For loop increments element #.
1482	}
1483	}
1484
1485	static std::string getShuffleComment(const MachineInstr MI, unsigned* SrcOp1Idx,
1486	unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1487	std::string Comment;
1488
1489	const MachineOperand &SrcOp1 = MI->getOperand(i: SrcOp1Idx);
1490	const MachineOperand &SrcOp2 = MI->getOperand(i: SrcOp2Idx);
1491	StringRef Src1Name = SrcOp1.isReg()
1492	? X86ATTInstPrinter::getRegisterName(Reg: SrcOp1.getReg())
1493	: "mem";
1494	StringRef Src2Name = SrcOp2.isReg()
1495	? X86ATTInstPrinter::getRegisterName(Reg: SrcOp2.getReg())
1496	: "mem";
1497
1498	raw_string_ostream CS(Comment);
1499	printDstRegisterName(CS, MI, SrcOpIdx: SrcOp1Idx);
1500	CS << " = ";
1501	printShuffleMask(CS, Src1Name, Src2Name, Mask);
1502	CS.flush();
1503
1504	return Comment;
1505	}
1506
1507	static void printConstant(const APInt &Val, raw_ostream &CS,
1508	bool PrintZero = false) {
1509	if (Val.getBitWidth() <= `64`) {
1510	CS << (PrintZero ? `0ULL` : Val.getZExtValue());
1511	} else {
1512	// print multi-word constant as (w0,w1)
1513	CS << "(";
1514	for (int i = `0`, N = Val.getNumWords(); i < N; ++i) {
1515	if (i > `0`)
1516	CS << ",";
1517	CS << (PrintZero ? `0ULL` : Val.getRawData()[i]);
1518	}
1519	CS << ")";
1520	}
1521	}
1522
1523	static void printConstant(const APFloat &Flt, raw_ostream &CS,
1524	bool PrintZero = false) {
1525	SmallString<`32`> Str;
1526	// Force scientific notation to distinguish from integers.
1527	if (PrintZero)
1528	APFloat::getZero(Sem: Flt.getSemantics()).toString(Str, FormatPrecision: `0`, FormatMaxPadding: `0`);
1529	else
1530	Flt.toString(Str, FormatPrecision: `0`, FormatMaxPadding: `0`);
1531	CS << Str;
1532	}
1533
1534	static void printConstant(const Constant COp, unsigned* BitWidth,
1535	raw_ostream &CS, bool PrintZero = false) {
1536	if (isa<UndefValue>(Val: COp)) {
1537	CS << "u";
1538	} else if (auto *CI = dyn_cast<ConstantInt>(Val: COp)) {
1539	printConstant(Val: CI->getValue(), CS, PrintZero);
1540	} else if (auto *CF = dyn_cast<ConstantFP>(Val: COp)) {
1541	printConstant(Flt: CF->getValueAPF(), CS, PrintZero);
1542	} else if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: COp)) {
1543	Type *EltTy = CDS->getElementType();
1544	bool IsInteger = EltTy->isIntegerTy();
1545	bool IsFP = EltTy->isHalfTy() \|\| EltTy->isFloatTy() \|\| EltTy->isDoubleTy();
1546	unsigned EltBits = EltTy->getPrimitiveSizeInBits();
1547	unsigned E = std::min(a: BitWidth / EltBits, b: CDS->getNumElements());
1548	assert((BitWidth % EltBits) == `0` && "Element size mismatch");
1549	for (unsigned I = `0`; I != E; ++I) {
1550	if (I != `0`)
1551	CS << ",";
1552	if (IsInteger)
1553	printConstant(Val: CDS->getElementAsAPInt(i: I), CS, PrintZero);
1554	else if (IsFP)
1555	printConstant(Flt: CDS->getElementAsAPFloat(i: I), CS, PrintZero);
1556	else
1557	CS << "?";
1558	}
1559	} else if (auto *CV = dyn_cast<ConstantVector>(Val: COp)) {
1560	unsigned EltBits = CV->getType()->getScalarSizeInBits();
1561	unsigned E = std::min(a: BitWidth / EltBits, b: CV->getNumOperands());
1562	assert((BitWidth % EltBits) == `0` && "Element size mismatch");
1563	for (unsigned I = `0`; I != E; ++I) {
1564	if (I != `0`)
1565	CS << ",";
1566	printConstant(COp: CV->getOperand(i_nocapture: I), BitWidth: EltBits, CS, PrintZero);
1567	}
1568	} else {
1569	CS << "?";
1570	}
1571	}
1572
1573	static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer,
1574	int SclWidth, int VecWidth,
1575	const char *ShuffleComment) {
1576	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1577
1578	std::string Comment;
1579	raw_string_ostream CS(Comment);
1580	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1581	CS << " = ";
1582
1583	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx)) {
1584	CS << "[";
1585	printConstant(COp: C, BitWidth: SclWidth, CS);
1586	for (int I = `1`, E = VecWidth / SclWidth; I < E; ++I) {
1587	CS << ",";
1588	printConstant(COp: C, BitWidth: SclWidth, CS, PrintZero: true);
1589	}
1590	CS << "]";
1591	OutStreamer.AddComment(T: CS.str());
1592	return; // early-out
1593	}
1594
1595	// We didn't find a constant load, fallback to a shuffle mask decode.
1596	CS << ShuffleComment;
1597	OutStreamer.AddComment(T: CS.str());
1598	}
1599
1600	static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
1601	int Repeats, int BitWidth) {
1602	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1603	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx)) {
1604	std::string Comment;
1605	raw_string_ostream CS(Comment);
1606	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1607	CS << " = [";
1608	for (int l = `0`; l != Repeats; ++l) {
1609	if (l != `0`)
1610	CS << ",";
1611	printConstant(COp: C, BitWidth, CS);
1612	}
1613	CS << "]";
1614	OutStreamer.AddComment(T: CS.str());
1615	}
1616	}
1617
1618	static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1619	int SrcEltBits, int DstEltBits, bool IsSext) {
1620	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1621	auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx);
1622	if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) {
1623	if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) {
1624	int NumElts = CDS->getNumElements();
1625	std::string Comment;
1626	raw_string_ostream CS(Comment);
1627	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1628	CS << " = [";
1629	for (int i = `0`; i != NumElts; ++i) {
1630	if (i != `0`)
1631	CS << ",";
1632	if (CDS->getElementType()->isIntegerTy()) {
1633	APInt Elt = CDS->getElementAsAPInt(i);
1634	Elt = IsSext ? Elt.sext(width: DstEltBits) : Elt.zext(width: DstEltBits);
1635	printConstant(Val: Elt, CS);
1636	} else
1637	CS << "?";
1638	}
1639	CS << "]";
1640	OutStreamer.AddComment(T: CS.str());
1641	return true;
1642	}
1643	}
1644
1645	return false;
1646	}
1647	static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1648	int SrcEltBits, int DstEltBits) {
1649	printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: true);
1650	}
1651	static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1652	int SrcEltBits, int DstEltBits) {
1653	if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: false))
1654	return;
1655
1656	// We didn't find a constant load, fallback to a shuffle mask decode.
1657	std::string Comment;
1658	raw_string_ostream CS(Comment);
1659	printDstRegisterName(CS, MI, SrcOpIdx: getSrcIdx(MI, SrcIdx: `1`));
1660	CS << " = ";
1661
1662	SmallVector<int> Mask;
1663	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1664	assert((Width % DstEltBits) == `0` && (DstEltBits % SrcEltBits) == `0` &&
1665	"Illegal extension ratio");
1666	DecodeZeroExtendMask(SrcScalarBits: SrcEltBits, DstScalarBits: DstEltBits, NumDstElts: Width / DstEltBits, IsAnyExtend: false, ShuffleMask&: Mask);
1667	printShuffleMask(CS, Src1Name: "mem", Src2Name: "", Mask);
1668
1669	OutStreamer.AddComment(T: CS.str());
1670	}
1671
1672	void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1673	assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1674	assert((getSubtarget().isOSWindows() \|\| TM.getTargetTriple().isUEFI()) &&
1675	"SEH_ instruction Windows and UEFI only");
1676
1677	// Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1678	if (EmitFPOData) {
1679	X86TargetStreamer *XTS =
1680	static_cast<X86TargetStreamer *>(OutStreamer ->getTargetStreamer());
1681	switch (MI->getOpcode()) {
1682	case X86::SEH_PushReg:
1683	XTS->emitFPOPushReg(Reg: MI->getOperand(i: `0`).getImm());
1684	break;
1685	case X86::SEH_StackAlloc:
1686	XTS->emitFPOStackAlloc(StackAlloc: MI->getOperand(i: `0`).getImm());
1687	break;
1688	case X86::SEH_StackAlign:
1689	XTS->emitFPOStackAlign(Align: MI->getOperand(i: `0`).getImm());
1690	break;
1691	case X86::SEH_SetFrame:
1692	assert(MI->getOperand(`1`).getImm() == `0` &&
1693	".cv_fpo_setframe takes no offset");
1694	XTS->emitFPOSetFrame(Reg: MI->getOperand(i: `0`).getImm());
1695	break;
1696	case X86::SEH_EndPrologue:
1697	XTS->emitFPOEndPrologue();
1698	break;
1699	case X86::SEH_SaveReg:
1700	case X86::SEH_SaveXMM:
1701	case X86::SEH_PushFrame:
1702	llvm_unreachable("SEH_ directive incompatible with FPO");
1703	break;
1704	default:
1705	llvm_unreachable("expected SEH_ instruction");
1706	}
1707	return;
1708	}
1709
1710	// Otherwise, use the .seh_ directives for all other Windows platforms.
1711	switch (MI->getOpcode()) {
1712	case X86::SEH_PushReg:
1713	OutStreamer ->emitWinCFIPushReg(Register: MI->getOperand(i: `0`).getImm());
1714	break;
1715
1716	case X86::SEH_SaveReg:
1717	OutStreamer ->emitWinCFISaveReg(Register: MI->getOperand(i: `0`).getImm(),
1718	Offset: MI->getOperand(i: `1`).getImm());
1719	break;
1720
1721	case X86::SEH_SaveXMM:
1722	OutStreamer ->emitWinCFISaveXMM(Register: MI->getOperand(i: `0`).getImm(),
1723	Offset: MI->getOperand(i: `1`).getImm());
1724	break;
1725
1726	case X86::SEH_StackAlloc:
1727	OutStreamer ->emitWinCFIAllocStack(Size: MI->getOperand(i: `0`).getImm());
1728	break;
1729
1730	case X86::SEH_SetFrame:
1731	OutStreamer ->emitWinCFISetFrame(Register: MI->getOperand(i: `0`).getImm(),
1732	Offset: MI->getOperand(i: `1`).getImm());
1733	break;
1734
1735	case X86::SEH_PushFrame:
1736	OutStreamer ->emitWinCFIPushFrame(Code: MI->getOperand(i: `0`).getImm());
1737	break;
1738
1739	case X86::SEH_EndPrologue:
1740	OutStreamer ->emitWinCFIEndProlog();
1741	break;
1742
1743	default:
1744	llvm_unreachable("expected SEH_ instruction");
1745	}
1746	}
1747
1748	static void addConstantComments(const MachineInstr *MI,
1749	MCStreamer &OutStreamer) {
1750	switch (MI->getOpcode()) {
1751	// Lower PSHUFB and VPERMILP normally but add a comment if we can find
1752	// a constant shuffle mask. We won't be able to do this at the MC layer
1753	// because the mask isn't an immediate.
1754	case X86::PSHUFBrm:
1755	case X86::VPSHUFBrm:
1756	case X86::VPSHUFBYrm:
1757	case X86::VPSHUFBZ128rm:
1758	case X86::VPSHUFBZ128rmk:
1759	case X86::VPSHUFBZ128rmkz:
1760	case X86::VPSHUFBZ256rm:
1761	case X86::VPSHUFBZ256rmk:
1762	case X86::VPSHUFBZ256rmkz:
1763	case X86::VPSHUFBZrm:
1764	case X86::VPSHUFBZrmk:
1765	case X86::VPSHUFBZrmkz: {
1766	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1767	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1768	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1769	SmallVector<int, `64`> Mask;
1770	DecodePSHUFBMask(C, Width, ShuffleMask&: Mask);
1771	if (!Mask.empty())
1772	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1773	}
1774	break;
1775	}
1776
1777	case X86::VPERMILPSrm:
1778	case X86::VPERMILPSYrm:
1779	case X86::VPERMILPSZ128rm:
1780	case X86::VPERMILPSZ128rmk:
1781	case X86::VPERMILPSZ128rmkz:
1782	case X86::VPERMILPSZ256rm:
1783	case X86::VPERMILPSZ256rmk:
1784	case X86::VPERMILPSZ256rmkz:
1785	case X86::VPERMILPSZrm:
1786	case X86::VPERMILPSZrmk:
1787	case X86::VPERMILPSZrmkz: {
1788	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1789	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1790	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1791	SmallVector<int, `16`> Mask;
1792	DecodeVPERMILPMask(C, ElSize: `32`, Width, ShuffleMask&: Mask);
1793	if (!Mask.empty())
1794	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1795	}
1796	break;
1797	}
1798	case X86::VPERMILPDrm:
1799	case X86::VPERMILPDYrm:
1800	case X86::VPERMILPDZ128rm:
1801	case X86::VPERMILPDZ128rmk:
1802	case X86::VPERMILPDZ128rmkz:
1803	case X86::VPERMILPDZ256rm:
1804	case X86::VPERMILPDZ256rmk:
1805	case X86::VPERMILPDZ256rmkz:
1806	case X86::VPERMILPDZrm:
1807	case X86::VPERMILPDZrmk:
1808	case X86::VPERMILPDZrmkz: {
1809	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1810	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1811	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1812	SmallVector<int, `16`> Mask;
1813	DecodeVPERMILPMask(C, ElSize: `64`, Width, ShuffleMask&: Mask);
1814	if (!Mask.empty())
1815	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1816	}
1817	break;
1818	}
1819
1820	case X86::VPERMIL2PDrm:
1821	case X86::VPERMIL2PSrm:
1822	case X86::VPERMIL2PDYrm:
1823	case X86::VPERMIL2PSYrm: {
1824	assert(MI->getNumOperands() >= (`3` + X86::AddrNumOperands + `1`) &&
1825	"Unexpected number of operands!");
1826
1827	const MachineOperand &CtrlOp = MI->getOperand(i: MI->getNumOperands() - `1`);
1828	if (!CtrlOp.isImm())
1829	break;
1830
1831	unsigned ElSize;
1832	switch (MI->getOpcode()) {
1833	default: llvm_unreachable("Invalid opcode");
1834	case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = `32`; break;
1835	case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = `64`; break;
1836	}
1837
1838	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `3`)) {
1839	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1840	SmallVector<int, `16`> Mask;
1841	DecodeVPERMIL2PMask(C, M2Z: (unsigned)CtrlOp.getImm(), ElSize, Width, ShuffleMask&: Mask);
1842	if (!Mask.empty())
1843	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: `1`, SrcOp2Idx: `2`, Mask));
1844	}
1845	break;
1846	}
1847
1848	case X86::VPPERMrrm: {
1849	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `3`)) {
1850	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1851	SmallVector<int, `16`> Mask;
1852	DecodeVPPERMMask(C, Width, ShuffleMask&: Mask);
1853	if (!Mask.empty())
1854	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: `1`, SrcOp2Idx: `2`, Mask));
1855	}
1856	break;
1857	}
1858
1859	case X86::MMX_MOVQ64rm: {
1860	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `1`)) {
1861	std::string Comment;
1862	raw_string_ostream CS(Comment);
1863	const MachineOperand &DstOp = MI->getOperand(i: `0`);
1864	CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()) << " = ";
1865	if (auto *CF = dyn_cast<ConstantFP>(Val: C)) {
1866	CS << "0x" << toString(I: CF->getValueAPF().bitcastToAPInt(), Radix: `16`, Signed: false);
1867	OutStreamer.AddComment(T: CS.str());
1868	}
1869	}
1870	break;
1871	}
1872
1873	#define MASK_AVX512_CASE(Instr) \
1874	case Instr: \
1875	case Instr##k: \
1876	case Instr##kz:
1877
1878	case X86::MOVSDrm:
1879	case X86::VMOVSDrm:
1880	MASK_AVX512_CASE(X86::VMOVSDZrm)
1881	case X86::MOVSDrm_alt:
1882	case X86::VMOVSDrm_alt:
1883	case X86::VMOVSDZrm_alt:
1884	case X86::MOVQI2PQIrm:
1885	case X86::VMOVQI2PQIrm:
1886	case X86::VMOVQI2PQIZrm:
1887	printZeroUpperMove(MI, OutStreamer, SclWidth: `64`, VecWidth: `128`, ShuffleComment: "mem[0],zero");
1888	break;
1889
1890	MASK_AVX512_CASE(X86::VMOVSHZrm)
1891	case X86::VMOVSHZrm_alt:
1892	printZeroUpperMove(MI, OutStreamer, SclWidth: `16`, VecWidth: `128`,
1893	ShuffleComment: "mem[0],zero,zero,zero,zero,zero,zero,zero");
1894	break;
1895
1896	case X86::MOVSSrm:
1897	case X86::VMOVSSrm:
1898	MASK_AVX512_CASE(X86::VMOVSSZrm)
1899	case X86::MOVSSrm_alt:
1900	case X86::VMOVSSrm_alt:
1901	case X86::VMOVSSZrm_alt:
1902	case X86::MOVDI2PDIrm:
1903	case X86::VMOVDI2PDIrm:
1904	case X86::VMOVDI2PDIZrm:
1905	printZeroUpperMove(MI, OutStreamer, SclWidth: `32`, VecWidth: `128`, ShuffleComment: "mem[0],zero,zero,zero");
1906	break;
1907
1908	#define MOV_CASE(Prefix, Suffix) \
1909	case X86::Prefix##MOVAPD##Suffix##rm: \
1910	case X86::Prefix##MOVAPS##Suffix##rm: \
1911	case X86::Prefix##MOVUPD##Suffix##rm: \
1912	case X86::Prefix##MOVUPS##Suffix##rm: \
1913	case X86::Prefix##MOVDQA##Suffix##rm: \
1914	case X86::Prefix##MOVDQU##Suffix##rm:
1915
1916	#define MOV_AVX512_CASE(Suffix, Postfix) \
1917	case X86::VMOVDQA64##Suffix##rm##Postfix: \
1918	case X86::VMOVDQA32##Suffix##rm##Postfix: \
1919	case X86::VMOVDQU64##Suffix##rm##Postfix: \
1920	case X86::VMOVDQU32##Suffix##rm##Postfix: \
1921	case X86::VMOVDQU16##Suffix##rm##Postfix: \
1922	case X86::VMOVDQU8##Suffix##rm##Postfix: \
1923	case X86::VMOVAPS##Suffix##rm##Postfix: \
1924	case X86::VMOVAPD##Suffix##rm##Postfix: \
1925	case X86::VMOVUPS##Suffix##rm##Postfix: \
1926	case X86::VMOVUPD##Suffix##rm##Postfix:
1927
1928	#define CASE_128_MOV_RM() \
1929	MOV_CASE(, ) /* SSE */ \
1930	MOV_CASE(V, ) /* AVX-128 */ \
1931	MOV_AVX512_CASE(Z128, ) \
1932	MOV_AVX512_CASE(Z128, k) \
1933	MOV_AVX512_CASE(Z128, kz)
1934
1935	#define CASE_256_MOV_RM() \
1936	MOV_CASE(V, Y) /* AVX-256 */ \
1937	MOV_AVX512_CASE(Z256, ) \
1938	MOV_AVX512_CASE(Z256, k) \
1939	MOV_AVX512_CASE(Z256, kz) \
1940
1941	#define CASE_512_MOV_RM() \
1942	MOV_AVX512_CASE(Z, ) \
1943	MOV_AVX512_CASE(Z, k) \
1944	MOV_AVX512_CASE(Z, kz) \
1945
1946	// For loads from a constant pool to a vector register, print the constant
1947	// loaded.
1948	CASE_128_MOV_RM()
1949	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `128`);
1950	break;
1951	CASE_256_MOV_RM()
1952	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `256`);
1953	break;
1954	CASE_512_MOV_RM()
1955	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `512`);
1956	break;
1957	case X86::VBROADCASTF128rm:
1958	case X86::VBROADCASTI128rm:
1959	MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
1960	MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm)
1961	MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
1962	MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm)
1963	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `128`);
1964	break;
1965	MASK_AVX512_CASE(X86::VBROADCASTF32X4rm)
1966	MASK_AVX512_CASE(X86::VBROADCASTF64X2rm)
1967	MASK_AVX512_CASE(X86::VBROADCASTI32X4rm)
1968	MASK_AVX512_CASE(X86::VBROADCASTI64X2rm)
1969	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `128`);
1970	break;
1971	MASK_AVX512_CASE(X86::VBROADCASTF32X8rm)
1972	MASK_AVX512_CASE(X86::VBROADCASTF64X4rm)
1973	MASK_AVX512_CASE(X86::VBROADCASTI32X8rm)
1974	MASK_AVX512_CASE(X86::VBROADCASTI64X4rm)
1975	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `256`);
1976	break;
1977
1978	// For broadcast loads from a constant pool to a vector register, repeatedly
1979	// print the constant loaded.
1980	case X86::MOVDDUPrm:
1981	case X86::VMOVDDUPrm:
1982	MASK_AVX512_CASE(X86::VMOVDDUPZ128rm)
1983	case X86::VPBROADCASTQrm:
1984	MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm)
1985	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `64`);
1986	break;
1987	case X86::VBROADCASTSDYrm:
1988	MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm)
1989	case X86::VPBROADCASTQYrm:
1990	MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm)
1991	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `64`);
1992	break;
1993	MASK_AVX512_CASE(X86::VBROADCASTSDZrm)
1994	MASK_AVX512_CASE(X86::VPBROADCASTQZrm)
1995	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `64`);
1996	break;
1997	case X86::VBROADCASTSSrm:
1998	MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm)
1999	case X86::VPBROADCASTDrm:
2000	MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm)
2001	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `32`);
2002	break;
2003	case X86::VBROADCASTSSYrm:
2004	MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm)
2005	case X86::VPBROADCASTDYrm:
2006	MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm)
2007	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `32`);
2008	break;
2009	MASK_AVX512_CASE(X86::VBROADCASTSSZrm)
2010	MASK_AVX512_CASE(X86::VPBROADCASTDZrm)
2011	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `32`);
2012	break;
2013	case X86::VPBROADCASTWrm:
2014	MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm)
2015	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `16`);
2016	break;
2017	case X86::VPBROADCASTWYrm:
2018	MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm)
2019	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `16`);
2020	break;
2021	MASK_AVX512_CASE(X86::VPBROADCASTWZrm)
2022	printBroadcast(MI, OutStreamer, Repeats: `32`, BitWidth: `16`);
2023	break;
2024	case X86::VPBROADCASTBrm:
2025	MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm)
2026	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `8`);
2027	break;
2028	case X86::VPBROADCASTBYrm:
2029	MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm)
2030	printBroadcast(MI, OutStreamer, Repeats: `32`, BitWidth: `8`);
2031	break;
2032	MASK_AVX512_CASE(X86::VPBROADCASTBZrm)
2033	printBroadcast(MI, OutStreamer, Repeats: `64`, BitWidth: `8`);
2034	break;
2035
2036	#define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \
2037	case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix:
2038
2039	#define CASE_MOVX_RM(Ext, Type) \
2040	MOVX_CASE(, Ext, Type, , ) \
2041	MOVX_CASE(V, Ext, Type, , ) \
2042	MOVX_CASE(V, Ext, Type, Y, ) \
2043	MOVX_CASE(V, Ext, Type, Z128, ) \
2044	MOVX_CASE(V, Ext, Type, Z128, k ) \
2045	MOVX_CASE(V, Ext, Type, Z128, kz ) \
2046	MOVX_CASE(V, Ext, Type, Z256, ) \
2047	MOVX_CASE(V, Ext, Type, Z256, k ) \
2048	MOVX_CASE(V, Ext, Type, Z256, kz ) \
2049	MOVX_CASE(V, Ext, Type, Z, ) \
2050	MOVX_CASE(V, Ext, Type, Z, k ) \
2051	MOVX_CASE(V, Ext, Type, Z, kz )
2052
2053	CASE_MOVX_RM(SX, BD)
2054	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `32`);
2055	break;
2056	CASE_MOVX_RM(SX, BQ)
2057	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `64`);
2058	break;
2059	CASE_MOVX_RM(SX, BW)
2060	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `16`);
2061	break;
2062	CASE_MOVX_RM(SX, DQ)
2063	printSignExtend(MI, OutStreamer, SrcEltBits: `32`, DstEltBits: `64`);
2064	break;
2065	CASE_MOVX_RM(SX, WD)
2066	printSignExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `32`);
2067	break;
2068	CASE_MOVX_RM(SX, WQ)
2069	printSignExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `64`);
2070	break;
2071
2072	CASE_MOVX_RM(ZX, BD)
2073	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `32`);
2074	break;
2075	CASE_MOVX_RM(ZX, BQ)
2076	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `64`);
2077	break;
2078	CASE_MOVX_RM(ZX, BW)
2079	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `16`);
2080	break;
2081	CASE_MOVX_RM(ZX, DQ)
2082	printZeroExtend(MI, OutStreamer, SrcEltBits: `32`, DstEltBits: `64`);
2083	break;
2084	CASE_MOVX_RM(ZX, WD)
2085	printZeroExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `32`);
2086	break;
2087	CASE_MOVX_RM(ZX, WQ)
2088	printZeroExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `64`);
2089	break;
2090	}
2091	}
2092
2093	void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2094	// FIXME: Enable feature predicate checks once all the test pass.
2095	// X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2096	// Subtarget->getFeatureBits());
2097
2098	X86MCInstLower MCInstLowering(MF, this);
2099	const X86RegisterInfo *RI =
2100	MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2101
2102	if (MI->getOpcode() == X86::OR64rm) {
2103	for (auto &Opd : MI->operands()) {
2104	if (Opd.isSymbol() && StringRef (Opd.getSymbolName()) ==
2105	"swift_async_extendedFramePointerFlags") {
2106	ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2107	}
2108	}
2109	}
2110
2111	// Add comments for values loaded from constant pool.
2112	if (OutStreamer ->isVerboseAsm())
2113	addConstantComments(MI, OutStreamer&: *OutStreamer);
2114
2115	// Add a comment about EVEX compression
2116	if (TM.Options.MCOptions.ShowMCEncoding) {
2117	if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
2118	OutStreamer ->AddComment(T: "EVEX TO LEGACY Compression ", EOL: false);
2119	else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2120	OutStreamer ->AddComment(T: "EVEX TO VEX Compression ", EOL: false);
2121	else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX)
2122	OutStreamer ->AddComment(T: "EVEX TO EVEX Compression ", EOL: false);
2123	}
2124
2125	switch (MI->getOpcode()) {
2126	case TargetOpcode::DBG_VALUE:
2127	llvm_unreachable("Should be handled target independently");
2128
2129	case X86::EH_RETURN:
2130	case X86::EH_RETURN64: {
2131	// Lower these as normal, but add some comments.
2132	Register Reg = MI->getOperand(i: `0`).getReg();
2133	OutStreamer ->AddComment(T: StringRef ("eh_return, addr: %") +
2134	X86ATTInstPrinter::getRegisterName(Reg));
2135	break;
2136	}
2137	case X86::CLEANUPRET: {
2138	// Lower these as normal, but add some comments.
2139	OutStreamer ->AddComment(T: "CLEANUPRET");
2140	break;
2141	}
2142
2143	case X86::CATCHRET: {
2144	// Lower these as normal, but add some comments.
2145	OutStreamer ->AddComment(T: "CATCHRET");
2146	break;
2147	}
2148
2149	case X86::ENDBR32:
2150	case X86::ENDBR64: {
2151	// CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2152	// -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2153	// non-empty. If MI is the initial ENDBR, place the
2154	// __patchable_function_entries label after ENDBR.
2155	if (CurrentPatchableFunctionEntrySym &&
2156	CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2157	MI == &MF->front().front()) {
2158	MCInst Inst;
2159	MCInstLowering.Lower(MI, OutMI&: Inst);
2160	EmitAndCountInstruction(Inst);
2161	CurrentPatchableFunctionEntrySym = createTempSymbol(Name: "patch");
2162	OutStreamer ->emitLabel(Symbol: CurrentPatchableFunctionEntrySym);
2163	return;
2164	}
2165	break;
2166	}
2167
2168	case X86::TAILJMPd64:
2169	if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2170	EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2171	[[fallthrough]];
2172	case X86::TAILJMPr:
2173	case X86::TAILJMPm:
2174	case X86::TAILJMPd:
2175	case X86::TAILJMPd_CC:
2176	case X86::TAILJMPr64:
2177	case X86::TAILJMPm64:
2178	case X86::TAILJMPd64_CC:
2179	case X86::TAILJMPr64_REX:
2180	case X86::TAILJMPm64_REX:
2181	// Lower these as normal, but add some comments.
2182	OutStreamer ->AddComment(T: "TAILCALL");
2183	break;
2184
2185	case X86::TLS_addr32:
2186	case X86::TLS_addr64:
2187	case X86::TLS_addrX32:
2188	case X86::TLS_base_addr32:
2189	case X86::TLS_base_addr64:
2190	case X86::TLS_base_addrX32:
2191	case X86::TLS_desc32:
2192	case X86::TLS_desc64:
2193	return LowerTlsAddr(MCInstLowering, MI: *MI);
2194
2195	case X86::MOVPC32r: {
2196	// This is a pseudo op for a two instruction sequence with a label, which
2197	// looks like:
2198	// call "L1$pb"
2199	// "L1$pb":
2200	// popl %esi
2201
2202	// Emit the call.
2203	MCSymbol *PICBase = MF->getPICBaseSymbol();
2204	// FIXME: We would like an efficient form for this, so we don't have to do a
2205	// lot of extra uniquing.
2206	EmitAndCountInstruction(
2207	MCInstBuilder(X86::CALLpcrel32)
2208	.addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2209
2210	const X86FrameLowering *FrameLowering =
2211	MF->getSubtarget<X86Subtarget>().getFrameLowering();
2212	bool hasFP = FrameLowering->hasFP(MF: *MF);
2213
2214	// TODO: This is needed only if we require precise CFA.
2215	bool HasActiveDwarfFrame = OutStreamer ->getNumFrameInfos() &&
2216	!OutStreamer ->getDwarfFrameInfos().back().End;
2217
2218	int stackGrowth = -RI->getSlotSize();
2219
2220	if (HasActiveDwarfFrame && !hasFP) {
2221	OutStreamer ->emitCFIAdjustCfaOffset(Adjustment: -stackGrowth);
2222	MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
2223	}
2224
2225	// Emit the label.
2226	OutStreamer ->emitLabel(Symbol: PICBase);
2227
2228	// popl $reg
2229	EmitAndCountInstruction(
2230	MCInstBuilder(X86::POP32r).addReg(MI->getOperand(`0`).getReg()));
2231
2232	if (HasActiveDwarfFrame && !hasFP) {
2233	OutStreamer ->emitCFIAdjustCfaOffset(Adjustment: stackGrowth);
2234	}
2235	return;
2236	}
2237
2238	case X86::ADD32ri: {
2239	// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2240	if (MI->getOperand(i: `2`).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2241	break;
2242
2243	// Okay, we have something like:
2244	// EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2245
2246	// For this, we want to print something like:
2247	// MYGLOBAL + (. - PICBASE)
2248	// However, we can't generate a ".", so just emit a new label here and refer
2249	// to it.
2250	MCSymbol *DotSym = OutContext.createTempSymbol();
2251	OutStreamer ->emitLabel(Symbol: DotSym);
2252
2253	// Now that we have emitted the label, lower the complex operand expression.
2254	MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MO: MI->getOperand(i: `2`));
2255
2256	const MCExpr *DotExpr = MCSymbolRefExpr::create(Symbol: DotSym, Ctx&: OutContext);
2257	const MCExpr *PICBase =
2258	MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx&: OutContext);
2259	DotExpr = MCBinaryExpr::createSub(LHS: DotExpr, RHS: PICBase, Ctx&: OutContext);
2260
2261	DotExpr = MCBinaryExpr::createAdd(
2262	LHS: MCSymbolRefExpr::create(Symbol: OpSym, Ctx&: OutContext), RHS: DotExpr, Ctx&: OutContext);
2263
2264	EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2265	.addReg(MI->getOperand(`0`).getReg())
2266	.addReg(MI->getOperand(`1`).getReg())
2267	.addExpr(DotExpr));
2268	return;
2269	}
2270	case TargetOpcode::STATEPOINT:
2271	return LowerSTATEPOINT(MI: *MI, MCIL&: MCInstLowering);
2272
2273	case TargetOpcode::FAULTING_OP:
2274	return LowerFAULTING_OP(FaultingMI: *MI, MCIL&: MCInstLowering);
2275
2276	case TargetOpcode::FENTRY_CALL:
2277	return LowerFENTRY_CALL(MI: *MI, MCIL&: MCInstLowering);
2278
2279	case TargetOpcode::PATCHABLE_OP:
2280	return LowerPATCHABLE_OP(MI: *MI, MCIL&: MCInstLowering);
2281
2282	case TargetOpcode::STACKMAP:
2283	return LowerSTACKMAP(MI: *MI);
2284
2285	case TargetOpcode::PATCHPOINT:
2286	return LowerPATCHPOINT(MI: *MI, MCIL&: MCInstLowering);
2287
2288	case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2289	return LowerPATCHABLE_FUNCTION_ENTER(MI: *MI, MCIL&: MCInstLowering);
2290
2291	case TargetOpcode::PATCHABLE_RET:
2292	return LowerPATCHABLE_RET(MI: *MI, MCIL&: MCInstLowering);
2293
2294	case TargetOpcode::PATCHABLE_TAIL_CALL:
2295	return LowerPATCHABLE_TAIL_CALL(MI: *MI, MCIL&: MCInstLowering);
2296
2297	case TargetOpcode::PATCHABLE_EVENT_CALL:
2298	return LowerPATCHABLE_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering);
2299
2300	case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2301	return LowerPATCHABLE_TYPED_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering);
2302
2303	case X86::MORESTACK_RET:
2304	EmitAndCountInstruction(Inst&: MCInstBuilder (getRetOpcode(Subtarget: *Subtarget)));
2305	return;
2306
2307	case X86::KCFI_CHECK:
2308	return LowerKCFI_CHECK(MI: *MI);
2309
2310	case X86::ASAN_CHECK_MEMACCESS:
2311	return LowerASAN_CHECK_MEMACCESS(MI: *MI);
2312
2313	case X86::MORESTACK_RET_RESTORE_R10:
2314	// Return, then restore R10.
2315	EmitAndCountInstruction(Inst&: MCInstBuilder (getRetOpcode(Subtarget: *Subtarget)));
2316	EmitAndCountInstruction(
2317	MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2318	return;
2319
2320	case X86::SEH_PushReg:
2321	case X86::SEH_SaveReg:
2322	case X86::SEH_SaveXMM:
2323	case X86::SEH_StackAlloc:
2324	case X86::SEH_StackAlign:
2325	case X86::SEH_SetFrame:
2326	case X86::SEH_PushFrame:
2327	case X86::SEH_EndPrologue:
2328	EmitSEHInstruction(MI);
2329	return;
2330
2331	case X86::SEH_Epilogue: {
2332	assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2333	MachineBasicBlock::const_iterator MBBI(MI);
2334	// Check if preceded by a call and emit nop if so.
2335	for (MBBI = PrevCrossBBInst(MBBI);
2336	MBBI != MachineBasicBlock::const_iterator ();
2337	MBBI = PrevCrossBBInst(MBBI)) {
2338	// Pseudo instructions that aren't a call are assumed to not emit any
2339	// code. If they do, we worst case generate unnecessary noops after a
2340	// call.
2341	if (MBBI ->isCall() \|\| !MBBI ->isPseudo()) {
2342	if (MBBI->isCall())
2343	EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2344	break;
2345	}
2346	}
2347	return;
2348	}
2349	case X86::UBSAN_UD1:
2350	EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2351	.addReg(X86::EAX)
2352	.addReg(X86::EAX)
2353	.addImm(`1`)
2354	.addReg(X86::NoRegister)
2355	.addImm(MI->getOperand(`0`).getImm())
2356	.addReg(X86::NoRegister));
2357	return;
2358	case X86::CALL64pcrel32:
2359	if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2360	EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2361	break;
2362	}
2363
2364	MCInst TmpInst;
2365	MCInstLowering.Lower(MI, OutMI&: TmpInst);
2366
2367	// Stackmap shadows cannot include branch targets, so we can count the bytes
2368	// in a call towards the shadow, but must ensure that the no thread returns
2369	// in to the stackmap shadow. The only way to achieve this is if the call
2370	// is at the end of the shadow.
2371	if (MI->isCall()) {
2372	// Count then size of the call towards the shadow
2373	SMShadowTracker.count(Inst&: TmpInst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get());
2374	// Then flush the shadow so that we fill with nops before the call, not
2375	// after it.
2376	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
2377	// Then emit the call
2378	OutStreamer ->emitInstruction(Inst: TmpInst, STI: getSubtargetInfo());
2379	return;
2380	}
2381
2382	EmitAndCountInstruction(Inst&: TmpInst);
2383	}
2384

source code of llvm/lib/Target/X86/X86MCInstLower.cpp