1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPUTargetStreamer.h"
14#include "AMDGPUMCKernelDescriptor.h"
15#include "AMDGPUPTNote.h"
16#include "AMDKernelCodeT.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
20#include "llvm/BinaryFormat/ELF.h"
21#include "llvm/MC/MCAssembler.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCELFStreamer.h"
24#include "llvm/MC/MCObjectWriter.h"
25#include "llvm/MC/MCSectionELF.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Support/AMDGPUMetadata.h"
28#include "llvm/Support/AMDHSAKernelDescriptor.h"
29#include "llvm/Support/Casting.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Support/FormattedStream.h"
32#include "llvm/TargetParser/TargetParser.h"
33
34using namespace llvm;
35using namespace llvm::AMDGPU;
36
37//===----------------------------------------------------------------------===//
38// AMDGPUTargetStreamer
39//===----------------------------------------------------------------------===//
40
41static cl::opt<unsigned>
42 ForceGenericVersion("amdgpu-force-generic-version",
43 cl::desc("Force a specific generic_v<N> flag to be "
44 "added. For testing purposes only."),
45 cl::ReallyHidden, cl::init(Val: 0));
46
47bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
48 msgpack::Document HSAMetadataDoc;
49 if (!HSAMetadataDoc.fromYAML(S: HSAMetadataString))
50 return false;
51 return EmitHSAMetadata(HSAMetadata&: HSAMetadataDoc, Strict: false);
52}
53
54StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
55 AMDGPU::GPUKind AK;
56
57 // clang-format off
58 switch (ElfMach) {
59 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
60 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
61 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
62 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
63 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
64 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
65 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
66 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
67 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
68 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
69 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
70 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
71 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
72 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
73 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
74 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
123 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
124 default: AK = GK_NONE; break;
125 }
126 // clang-format on
127
128 StringRef GPUName = getArchNameAMDGCN(AK);
129 if (GPUName != "")
130 return GPUName;
131 return getArchNameR600(AK);
132}
133
134unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
135 AMDGPU::GPUKind AK = parseArchAMDGCN(CPU: GPU);
136 if (AK == AMDGPU::GPUKind::GK_NONE)
137 AK = parseArchR600(CPU: GPU);
138
139 // clang-format off
140 switch (AK) {
141 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
142 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
143 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
144 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
145 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
146 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
147 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
148 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
149 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
150 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
151 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
152 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
153 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
154 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
155 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
156 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
157 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
158 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
159 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
160 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
161 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
162 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
163 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
164 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
165 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
166 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
167 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
168 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
169 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
170 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
171 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
172 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
173 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
174 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
175 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
176 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
177 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
178 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
179 case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
180 case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
181 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
182 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
183 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
184 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
185 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
186 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
187 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
188 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
189 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
190 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
191 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
192 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
193 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
194 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
195 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
196 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
197 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
198 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
199 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
200 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
201 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
202 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
203 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
204 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
205 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
206 }
207 // clang-format on
208
209 llvm_unreachable("unknown GPU");
210}
211
212//===----------------------------------------------------------------------===//
213// AMDGPUTargetAsmStreamer
214//===----------------------------------------------------------------------===//
215
216AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
217 formatted_raw_ostream &OS)
218 : AMDGPUTargetStreamer(S), OS(OS) { }
219
220// A hook for emitting stuff at the end.
221// We use it for emitting the accumulated PAL metadata as directives.
222// The PAL metadata is reset after it is emitted.
223void AMDGPUTargetAsmStreamer::finish() {
224 std::string S;
225 getPALMetadata()->toString(S);
226 OS << S;
227
228 // Reset the pal metadata so its data will not affect a compilation that
229 // reuses this object.
230 getPALMetadata()->reset();
231}
232
233void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
234 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
235}
236
237void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
238 unsigned COV) {
239 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
240 OS << "\t.amdhsa_code_object_version " << COV << '\n';
241}
242
243void
244AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
245 OS << "\t.amd_kernel_code_t\n";
246 dumpAmdKernelCode(C: &Header, OS, tab: "\t\t");
247 OS << "\t.end_amd_kernel_code_t\n";
248}
249
250void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
251 unsigned Type) {
252 switch (Type) {
253 default: llvm_unreachable("Invalid AMDGPU symbol type");
254 case ELF::STT_AMDGPU_HSA_KERNEL:
255 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
256 break;
257 }
258}
259
260void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
261 Align Alignment) {
262 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
263 << Alignment.value() << '\n';
264}
265
266bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
267 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
268 return true;
269}
270
271bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
272 msgpack::Document &HSAMetadataDoc, bool Strict) {
273 HSAMD::V3::MetadataVerifier Verifier(Strict);
274 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
275 return false;
276
277 std::string HSAMetadataString;
278 raw_string_ostream StrOS(HSAMetadataString);
279 HSAMetadataDoc.toYAML(OS&: StrOS);
280
281 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
282 OS << StrOS.str() << '\n';
283 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
284 return true;
285}
286
287bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
288 const MCSubtargetInfo &STI, bool TrapEnabled) {
289 OS << (TrapEnabled ? "\ts_trap 2" : "\ts_endpgm")
290 << " ; Kernarg preload header. Trap with incompatible firmware that "
291 "doesn't support preloading kernel arguments.\n";
292 OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
293 return true;
294}
295
296bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
297 const uint32_t Encoded_s_code_end = 0xbf9f0000;
298 const uint32_t Encoded_s_nop = 0xbf800000;
299 uint32_t Encoded_pad = Encoded_s_code_end;
300
301 // Instruction cache line size in bytes.
302 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
303 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
304
305 // Extra padding amount in bytes to support prefetch mode 3.
306 unsigned FillSize = 3 * CacheLineSize;
307
308 if (AMDGPU::isGFX90A(STI)) {
309 Encoded_pad = Encoded_s_nop;
310 FillSize = 16 * CacheLineSize;
311 }
312
313 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
314 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
315 return true;
316}
317
318void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
319 const MCSubtargetInfo &STI, StringRef KernelName,
320 const MCKernelDescriptor &KD, uint64_t NextVGPR, uint64_t NextSGPR,
321 bool ReserveVCC, bool ReserveFlatScr) {
322 IsaVersion IVersion = getIsaVersion(GPU: STI.getCPU());
323 const MCAsmInfo *MAI = getContext().getAsmInfo();
324
325 OS << "\t.amdhsa_kernel " << KernelName << '\n';
326
327 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
328 StringRef Directive) {
329 int64_t IVal;
330 OS << "\t\t" << Directive << ' ';
331 const MCExpr *pgm_rsrc1_bits =
332 MCKernelDescriptor::bits_get(Src: Expr, Shift, Mask, Ctx&: getContext());
333 if (pgm_rsrc1_bits->evaluateAsAbsolute(Res&: IVal))
334 OS << static_cast<uint64_t>(IVal);
335 else
336 pgm_rsrc1_bits->print(OS, MAI);
337 OS << '\n';
338 };
339
340 OS << "\t\t.amdhsa_group_segment_fixed_size ";
341 KD.group_segment_fixed_size->print(OS, MAI);
342 OS << '\n';
343
344 OS << "\t\t.amdhsa_private_segment_fixed_size ";
345 KD.private_segment_fixed_size->print(OS, MAI);
346 OS << '\n';
347
348 OS << "\t\t.amdhsa_kernarg_size ";
349 KD.kernarg_size->print(OS, MAI);
350 OS << '\n';
351
352 PrintField(
353 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
354 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
355
356 if (!hasArchitectedFlatScratch(STI))
357 PrintField(
358 KD.kernel_code_properties,
359 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
360 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
361 ".amdhsa_user_sgpr_private_segment_buffer");
362 PrintField(KD.kernel_code_properties,
363 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
364 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
365 ".amdhsa_user_sgpr_dispatch_ptr");
366 PrintField(KD.kernel_code_properties,
367 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
368 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
369 ".amdhsa_user_sgpr_queue_ptr");
370 PrintField(KD.kernel_code_properties,
371 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
372 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
373 ".amdhsa_user_sgpr_kernarg_segment_ptr");
374 PrintField(KD.kernel_code_properties,
375 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
376 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
377 ".amdhsa_user_sgpr_dispatch_id");
378 if (!hasArchitectedFlatScratch(STI))
379 PrintField(KD.kernel_code_properties,
380 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
381 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
382 ".amdhsa_user_sgpr_flat_scratch_init");
383 if (hasKernargPreload(STI)) {
384 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
385 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
386 ".amdhsa_user_sgpr_kernarg_preload_length");
387 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
388 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
389 ".amdhsa_user_sgpr_kernarg_preload_offset");
390 }
391 PrintField(
392 KD.kernel_code_properties,
393 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
394 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
395 ".amdhsa_user_sgpr_private_segment_size");
396 if (IVersion.Major >= 10)
397 PrintField(KD.kernel_code_properties,
398 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
399 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
400 ".amdhsa_wavefront_size32");
401 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
402 PrintField(KD.kernel_code_properties,
403 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
404 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
405 ".amdhsa_uses_dynamic_stack");
406 PrintField(KD.compute_pgm_rsrc2,
407 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
408 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
409 (hasArchitectedFlatScratch(STI)
410 ? ".amdhsa_enable_private_segment"
411 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
412 PrintField(KD.compute_pgm_rsrc2,
413 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
414 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
415 ".amdhsa_system_sgpr_workgroup_id_x");
416 PrintField(KD.compute_pgm_rsrc2,
417 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
418 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
419 ".amdhsa_system_sgpr_workgroup_id_y");
420 PrintField(KD.compute_pgm_rsrc2,
421 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
422 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
423 ".amdhsa_system_sgpr_workgroup_id_z");
424 PrintField(KD.compute_pgm_rsrc2,
425 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
426 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
427 ".amdhsa_system_sgpr_workgroup_info");
428 PrintField(KD.compute_pgm_rsrc2,
429 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
430 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
431 ".amdhsa_system_vgpr_workitem_id");
432
433 // These directives are required.
434 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
435 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
436
437 if (AMDGPU::isGFX90A(STI)) {
438 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
439 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
440 Src: KD.compute_pgm_rsrc3,
441 Shift: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
442 Mask: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx&: getContext());
443 accum_bits = MCBinaryExpr::createAdd(
444 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
445 accum_bits = MCBinaryExpr::createMul(
446 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext());
447 OS << "\t\t.amdhsa_accum_offset ";
448 int64_t IVal;
449 if (accum_bits->evaluateAsAbsolute(Res&: IVal)) {
450 OS << static_cast<uint64_t>(IVal);
451 } else {
452 accum_bits->print(OS, MAI);
453 }
454 OS << '\n';
455 }
456
457 if (!ReserveVCC)
458 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
459 if (IVersion.Major >= 7 && !ReserveFlatScr && !hasArchitectedFlatScratch(STI))
460 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
461
462 switch (CodeObjectVersion) {
463 default:
464 break;
465 case AMDGPU::AMDHSA_COV4:
466 case AMDGPU::AMDHSA_COV5:
467 if (getTargetID()->isXnackSupported())
468 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
469 break;
470 }
471
472 PrintField(KD.compute_pgm_rsrc1,
473 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
474 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
475 ".amdhsa_float_round_mode_32");
476 PrintField(KD.compute_pgm_rsrc1,
477 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
478 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
479 ".amdhsa_float_round_mode_16_64");
480 PrintField(KD.compute_pgm_rsrc1,
481 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
482 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
483 ".amdhsa_float_denorm_mode_32");
484 PrintField(KD.compute_pgm_rsrc1,
485 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
486 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
487 ".amdhsa_float_denorm_mode_16_64");
488 if (IVersion.Major < 12) {
489 PrintField(KD.compute_pgm_rsrc1,
490 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
491 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
492 ".amdhsa_dx10_clamp");
493 PrintField(KD.compute_pgm_rsrc1,
494 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
495 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
496 ".amdhsa_ieee_mode");
497 }
498 if (IVersion.Major >= 9) {
499 PrintField(KD.compute_pgm_rsrc1,
500 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
501 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
502 ".amdhsa_fp16_overflow");
503 }
504 if (AMDGPU::isGFX90A(STI))
505 PrintField(KD.compute_pgm_rsrc3,
506 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
507 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
508 if (IVersion.Major >= 10) {
509 PrintField(KD.compute_pgm_rsrc1,
510 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
511 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
512 ".amdhsa_workgroup_processor_mode");
513 PrintField(KD.compute_pgm_rsrc1,
514 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
515 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
516 ".amdhsa_memory_ordered");
517 PrintField(KD.compute_pgm_rsrc1,
518 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
519 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
520 ".amdhsa_forward_progress");
521 }
522 if (IVersion.Major >= 10 && IVersion.Major < 12) {
523 PrintField(KD.compute_pgm_rsrc3,
524 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
525 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
526 ".amdhsa_shared_vgpr_count");
527 }
528 if (IVersion.Major >= 12) {
529 PrintField(KD.compute_pgm_rsrc1,
530 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
531 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
532 ".amdhsa_round_robin_scheduling");
533 }
534 PrintField(
535 KD.compute_pgm_rsrc2,
536 amdhsa::
537 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
538 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
539 ".amdhsa_exception_fp_ieee_invalid_op");
540 PrintField(
541 KD.compute_pgm_rsrc2,
542 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
543 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
544 ".amdhsa_exception_fp_denorm_src");
545 PrintField(
546 KD.compute_pgm_rsrc2,
547 amdhsa::
548 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
549 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
550 ".amdhsa_exception_fp_ieee_div_zero");
551 PrintField(
552 KD.compute_pgm_rsrc2,
553 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
554 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
555 ".amdhsa_exception_fp_ieee_overflow");
556 PrintField(
557 KD.compute_pgm_rsrc2,
558 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
559 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
560 ".amdhsa_exception_fp_ieee_underflow");
561 PrintField(
562 KD.compute_pgm_rsrc2,
563 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
564 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
565 ".amdhsa_exception_fp_ieee_inexact");
566 PrintField(
567 KD.compute_pgm_rsrc2,
568 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
569 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
570 ".amdhsa_exception_int_div_zero");
571
572 OS << "\t.end_amdhsa_kernel\n";
573}
574
575//===----------------------------------------------------------------------===//
576// AMDGPUTargetELFStreamer
577//===----------------------------------------------------------------------===//
578
579AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
580 const MCSubtargetInfo &STI)
581 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
582
583MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
584 return static_cast<MCELFStreamer &>(Streamer);
585}
586
587// A hook for emitting stuff at the end.
588// We use it for emitting the accumulated PAL metadata as a .note record.
589// The PAL metadata is reset after it is emitted.
590void AMDGPUTargetELFStreamer::finish() {
591 MCAssembler &MCA = getStreamer().getAssembler();
592 MCA.setELFHeaderEFlags(getEFlags());
593 MCA.getWriter().setOverrideABIVersion(
594 getELFABIVersion(OS: STI.getTargetTriple(), CodeObjectVersion));
595
596 std::string Blob;
597 const char *Vendor = getPALMetadata()->getVendor();
598 unsigned Type = getPALMetadata()->getType();
599 getPALMetadata()->toBlob(Type, S&: Blob);
600 if (Blob.empty())
601 return;
602 EmitNote(Name: Vendor, DescSize: MCConstantExpr::create(Value: Blob.size(), Ctx&: getContext()), NoteType: Type,
603 EmitDesc: [&](MCELFStreamer &OS) { OS.emitBytes(Data: Blob); });
604
605 // Reset the pal metadata so its data will not affect a compilation that
606 // reuses this object.
607 getPALMetadata()->reset();
608}
609
610void AMDGPUTargetELFStreamer::EmitNote(
611 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
612 function_ref<void(MCELFStreamer &)> EmitDesc) {
613 auto &S = getStreamer();
614 auto &Context = S.getContext();
615
616 auto NameSZ = Name.size() + 1;
617
618 unsigned NoteFlags = 0;
619 // TODO Apparently, this is currently needed for OpenCL as mentioned in
620 // https://reviews.llvm.org/D74995
621 if (isHsaAbi(STI))
622 NoteFlags = ELF::SHF_ALLOC;
623
624 S.pushSection();
625 S.switchSection(
626 Section: Context.getELFSection(Section: ElfNote::SectionName, Type: ELF::SHT_NOTE, Flags: NoteFlags));
627 S.emitInt32(Value: NameSZ); // namesz
628 S.emitValue(Value: DescSZ, Size: 4); // descz
629 S.emitInt32(Value: NoteType); // type
630 S.emitBytes(Data: Name); // name
631 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
632 EmitDesc(S); // desc
633 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
634 S.popSection();
635}
636
637unsigned AMDGPUTargetELFStreamer::getEFlags() {
638 switch (STI.getTargetTriple().getArch()) {
639 default:
640 llvm_unreachable("Unsupported Arch");
641 case Triple::r600:
642 return getEFlagsR600();
643 case Triple::amdgcn:
644 return getEFlagsAMDGCN();
645 }
646}
647
648unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
649 assert(STI.getTargetTriple().getArch() == Triple::r600);
650
651 return getElfMach(GPU: STI.getCPU());
652}
653
654unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
655 assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
656
657 switch (STI.getTargetTriple().getOS()) {
658 default:
659 // TODO: Why are some tests have "mingw" listed as OS?
660 // llvm_unreachable("Unsupported OS");
661 case Triple::UnknownOS:
662 return getEFlagsUnknownOS();
663 case Triple::AMDHSA:
664 return getEFlagsAMDHSA();
665 case Triple::AMDPAL:
666 return getEFlagsAMDPAL();
667 case Triple::Mesa3D:
668 return getEFlagsMesa3D();
669 }
670}
671
672unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
673 // TODO: Why are some tests have "mingw" listed as OS?
674 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
675
676 return getEFlagsV3();
677}
678
679unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
680 assert(isHsaAbi(STI));
681
682 if (CodeObjectVersion >= 6)
683 return getEFlagsV6();
684 return getEFlagsV4();
685}
686
687unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
688 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
689
690 return getEFlagsV3();
691}
692
693unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
694 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
695
696 return getEFlagsV3();
697}
698
699unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
700 unsigned EFlagsV3 = 0;
701
702 // mach.
703 EFlagsV3 |= getElfMach(GPU: STI.getCPU());
704
705 // xnack.
706 if (getTargetID()->isXnackOnOrAny())
707 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
708 // sramecc.
709 if (getTargetID()->isSramEccOnOrAny())
710 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
711
712 return EFlagsV3;
713}
714
715unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
716 unsigned EFlagsV4 = 0;
717
718 // mach.
719 EFlagsV4 |= getElfMach(GPU: STI.getCPU());
720
721 // xnack.
722 switch (getTargetID()->getXnackSetting()) {
723 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
724 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
725 break;
726 case AMDGPU::IsaInfo::TargetIDSetting::Any:
727 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
728 break;
729 case AMDGPU::IsaInfo::TargetIDSetting::Off:
730 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
731 break;
732 case AMDGPU::IsaInfo::TargetIDSetting::On:
733 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
734 break;
735 }
736 // sramecc.
737 switch (getTargetID()->getSramEccSetting()) {
738 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
739 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
740 break;
741 case AMDGPU::IsaInfo::TargetIDSetting::Any:
742 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
743 break;
744 case AMDGPU::IsaInfo::TargetIDSetting::Off:
745 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
746 break;
747 case AMDGPU::IsaInfo::TargetIDSetting::On:
748 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
749 break;
750 }
751
752 return EFlagsV4;
753}
754
755unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
756 unsigned Flags = getEFlagsV4();
757
758 unsigned Version = ForceGenericVersion;
759 if (!Version) {
760 switch (parseArchAMDGCN(CPU: STI.getCPU())) {
761 case AMDGPU::GK_GFX9_GENERIC:
762 Version = GenericVersion::GFX9;
763 break;
764 case AMDGPU::GK_GFX10_1_GENERIC:
765 Version = GenericVersion::GFX10_1;
766 break;
767 case AMDGPU::GK_GFX10_3_GENERIC:
768 Version = GenericVersion::GFX10_3;
769 break;
770 case AMDGPU::GK_GFX11_GENERIC:
771 Version = GenericVersion::GFX11;
772 break;
773 default:
774 break;
775 }
776 }
777
778 // Versions start at 1.
779 if (Version) {
780 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
781 report_fatal_error(reason: "Cannot encode generic code object version " +
782 Twine(Version) +
783 " - no ELF flag can represent this version!");
784 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
785 }
786
787 return Flags;
788}
789
790void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
791
792void
793AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
794
795 MCStreamer &OS = getStreamer();
796 OS.pushSection();
797 OS.emitBytes(Data: StringRef((const char*)&Header, sizeof(Header)));
798 OS.popSection();
799}
800
801void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
802 unsigned Type) {
803 MCSymbolELF *Symbol = cast<MCSymbolELF>(
804 Val: getStreamer().getContext().getOrCreateSymbol(Name: SymbolName));
805 Symbol->setType(Type);
806}
807
808void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
809 Align Alignment) {
810 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Val: Symbol);
811 SymbolELF->setType(ELF::STT_OBJECT);
812
813 if (!SymbolELF->isBindingSet()) {
814 SymbolELF->setBinding(ELF::STB_GLOBAL);
815 SymbolELF->setExternal(true);
816 }
817
818 if (SymbolELF->declareCommon(Size, Alignment, Target: true)) {
819 report_fatal_error(reason: "Symbol: " + Symbol->getName() +
820 " redeclared as different type");
821 }
822
823 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
824 SymbolELF->setSize(MCConstantExpr::create(Value: Size, Ctx&: getContext()));
825}
826
827bool AMDGPUTargetELFStreamer::EmitISAVersion() {
828 // Create two labels to mark the beginning and end of the desc field
829 // and a MCExpr to calculate the size of the desc field.
830 auto &Context = getContext();
831 auto *DescBegin = Context.createTempSymbol();
832 auto *DescEnd = Context.createTempSymbol();
833 auto *DescSZ = MCBinaryExpr::createSub(
834 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
835 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
836
837 EmitNote(Name: ElfNote::NoteNameV2, DescSZ, NoteType: ELF::NT_AMD_HSA_ISA_NAME,
838 EmitDesc: [&](MCELFStreamer &OS) {
839 OS.emitLabel(Symbol: DescBegin);
840 OS.emitBytes(Data: getTargetID()->toString());
841 OS.emitLabel(Symbol: DescEnd);
842 });
843 return true;
844}
845
846bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
847 bool Strict) {
848 HSAMD::V3::MetadataVerifier Verifier(Strict);
849 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
850 return false;
851
852 std::string HSAMetadataString;
853 HSAMetadataDoc.writeToBlob(Blob&: HSAMetadataString);
854
855 // Create two labels to mark the beginning and end of the desc field
856 // and a MCExpr to calculate the size of the desc field.
857 auto &Context = getContext();
858 auto *DescBegin = Context.createTempSymbol();
859 auto *DescEnd = Context.createTempSymbol();
860 auto *DescSZ = MCBinaryExpr::createSub(
861 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
862 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
863
864 EmitNote(Name: ElfNote::NoteNameV3, DescSZ, NoteType: ELF::NT_AMDGPU_METADATA,
865 EmitDesc: [&](MCELFStreamer &OS) {
866 OS.emitLabel(Symbol: DescBegin);
867 OS.emitBytes(Data: HSAMetadataString);
868 OS.emitLabel(Symbol: DescEnd);
869 });
870 return true;
871}
872
873bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
874 const MCSubtargetInfo &STI, bool TrapEnabled) {
875 const uint32_t Encoded_s_nop = 0xbf800000;
876 const uint32_t Encoded_s_trap = 0xbf920002;
877 const uint32_t Encoded_s_endpgm = 0xbf810000;
878 const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm;
879 MCStreamer &OS = getStreamer();
880 OS.emitInt32(Value: TrapInstr);
881 for (int i = 0; i < 63; ++i) {
882 OS.emitInt32(Value: Encoded_s_nop);
883 }
884 return true;
885}
886
887bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
888 const uint32_t Encoded_s_code_end = 0xbf9f0000;
889 const uint32_t Encoded_s_nop = 0xbf800000;
890 uint32_t Encoded_pad = Encoded_s_code_end;
891
892 // Instruction cache line size in bytes.
893 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
894 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
895
896 // Extra padding amount in bytes to support prefetch mode 3.
897 unsigned FillSize = 3 * CacheLineSize;
898
899 if (AMDGPU::isGFX90A(STI)) {
900 Encoded_pad = Encoded_s_nop;
901 FillSize = 16 * CacheLineSize;
902 }
903
904 MCStreamer &OS = getStreamer();
905 OS.pushSection();
906 OS.emitValueToAlignment(Alignment: Align(CacheLineSize), Value: Encoded_pad, ValueSize: 4);
907 for (unsigned I = 0; I < FillSize; I += 4)
908 OS.emitInt32(Value: Encoded_pad);
909 OS.popSection();
910 return true;
911}
912
913void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
914 const MCSubtargetInfo &STI, StringRef KernelName,
915 const MCKernelDescriptor &KernelDescriptor, uint64_t NextVGPR,
916 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
917 auto &Streamer = getStreamer();
918 auto &Context = Streamer.getContext();
919
920 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
921 Val: Context.getOrCreateSymbol(Name: Twine(KernelName)));
922 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
923 Val: Context.getOrCreateSymbol(Name: Twine(KernelName) + Twine(".kd")));
924
925 // Copy kernel descriptor symbol's binding, other and visibility from the
926 // kernel code symbol.
927 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
928 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
929 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
930 // Kernel descriptor symbol's type and size are fixed.
931 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
932 KernelDescriptorSymbol->setSize(
933 MCConstantExpr::create(Value: sizeof(amdhsa::kernel_descriptor_t), Ctx&: Context));
934
935 // The visibility of the kernel code symbol must be protected or less to allow
936 // static relocations from the kernel descriptor to be used.
937 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
938 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
939
940 Streamer.emitLabel(Symbol: KernelDescriptorSymbol);
941 Streamer.emitValue(
942 Value: KernelDescriptor.group_segment_fixed_size,
943 Size: sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
944 Streamer.emitValue(
945 Value: KernelDescriptor.private_segment_fixed_size,
946 Size: sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
947 Streamer.emitValue(Value: KernelDescriptor.kernarg_size,
948 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
949
950 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
951 Streamer.emitInt8(Value: 0u);
952
953 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
954 // expression being created is:
955 // (start of kernel code) - (start of kernel descriptor)
956 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
957 Streamer.emitValue(
958 Value: MCBinaryExpr::createSub(
959 LHS: MCSymbolRefExpr::create(Symbol: KernelCodeSymbol,
960 Kind: MCSymbolRefExpr::VK_AMDGPU_REL64, Ctx&: Context),
961 RHS: MCSymbolRefExpr::create(Symbol: KernelDescriptorSymbol,
962 Kind: MCSymbolRefExpr::VK_None, Ctx&: Context),
963 Ctx&: Context),
964 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
965 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
966 Streamer.emitInt8(Value: 0u);
967 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc3,
968 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
969 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc1,
970 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
971 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc2,
972 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
973 Streamer.emitValue(
974 Value: KernelDescriptor.kernel_code_properties,
975 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
976 Streamer.emitValue(Value: KernelDescriptor.kernarg_preload,
977 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
978 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
979 Streamer.emitInt8(Value: 0u);
980}
981

source code of llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp