NVPTX.cpp source code [clang/lib/CodeGen/Targets/NVPTX.cpp]

1	//===- NVPTX.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ABIInfoImpl.h"
10	#include "TargetInfo.h"
11	#include "llvm/IR/IntrinsicsNVPTX.h"
12
13	using namespace clang;
14	using namespace clang::CodeGen;
15
16	//===----------------------------------------------------------------------===//
17	// NVPTX ABI Implementation
18	//===----------------------------------------------------------------------===//
19
20	namespace {
21
22	class NVPTXTargetCodeGenInfo;
23
24	class NVPTXABIInfo : public ABIInfo {
25	NVPTXTargetCodeGenInfo &CGInfo;
26
27	public:
28	NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29	: ABIInfo (CGT), CGInfo(Info) {}
30
31	ABIArgInfo classifyReturnType(QualType RetTy) const;
32	ABIArgInfo classifyArgumentType(QualType Ty) const;
33
34	void computeInfo(CGFunctionInfo &FI) const override;
35	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
36	QualType Ty) const override;
37	bool isUnsupportedType(QualType T) const;
38	ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39	};
40
41	class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42	public:
43	NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44	: TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45
46	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
47	CodeGen::CodeGenModule &M) const override;
48	bool shouldEmitStaticExternCAliases() const override;
49
50	llvm::Constant getNullPointer(const* CodeGen::CodeGenModule &CGM,
51	llvm::PointerType *T,
52	QualType QT) const override;
53
54	llvm::Type getCUDADeviceBuiltinSurfaceDeviceType() const* override {
55	// On the device side, surface reference is represented as an object handle
56	// in 64-bit integer.
57	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
58	}
59
60	llvm::Type getCUDADeviceBuiltinTextureDeviceType() const* override {
61	// On the device side, texture reference is represented as an object handle
62	// in 64-bit integer.
63	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
64	}
65
66	bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
67	LValue Src) const override {
68	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
69	return true;
70	}
71
72	bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
73	LValue Src) const override {
74	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
75	return true;
76	}
77
78	// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
79	// resulting MDNode to the nvvm.annotations MDNode.
80	static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
81	int Operand);
82
83	private:
84	static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
85	LValue Src) {
86	llvm::Value Handle = nullptr*;
87	llvm::Constant *C =
88	llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).emitRawPointer(CGF));
89	// Lookup `addrspacecast` through the constant pointer if any.
90	if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
91	C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
92	if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
93	// Load the handle from the specific global variable using
94	// `nvvm.texsurf.handle.internal` intrinsic.
95	Handle = CGF.EmitRuntimeCall(
96	CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
97	{GV->getType()}),
98	{GV}, "texsurf_handle");
99	} else
100	Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation ());
101	CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst);
102	}
103	};
104
105	/// Checks if the type is unsupported directly by the current target.
106	bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
107	ASTContext &Context = getContext();
108	if (!Context.getTargetInfo().hasFloat16Type() && T ->isFloat16Type())
109	return true;
110	if (!Context.getTargetInfo().hasFloat128Type() &&
111	(T ->isFloat128Type() \|\|
112	(T ->isRealFloatingType() && Context.getTypeSize(T) == `128`)))
113	return true;
114	if (const auto *EIT = T->getAs<BitIntType>())
115	return EIT->getNumBits() >
116	(Context.getTargetInfo().hasInt128Type() ? `128U` : `64U`);
117	if (!Context.getTargetInfo().hasInt128Type() && T ->isIntegerType() &&
118	Context.getTypeSize(T) > `64U`)
119	return true;
120	if (const auto *AT = T->getAsArrayTypeUnsafe())
121	return isUnsupportedType(T: AT->getElementType());
122	const auto *RT = T ->getAs<RecordType>();
123	if (!RT)
124	return false;
125	const RecordDecl *RD = RT->getDecl();
126
127	// If this is a C++ record, check the bases first.
128	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
129	for (const CXXBaseSpecifier &I : CXXRD->bases())
130	if (isUnsupportedType(I.getType()))
131	return true;
132
133	for (const FieldDecl *I : RD->fields())
134	if (isUnsupportedType(I->getType()))
135	return true;
136	return false;
137	}
138
139	/// Coerce the given type into an array with maximum allowed size of elements.
140	ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
141	unsigned MaxSize) const {
142	// Alignment and Size are measured in bits.
143	const uint64_t Size = getContext().getTypeSize(T: Ty);
144	const uint64_t Alignment = getContext().getTypeAlign(T: Ty);
145	const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
146	llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div);
147	const uint64_t NumElements = (Size + Div - `1`) / Div;
148	return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements));
149	}
150
151	ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
152	if (RetTy ->isVoidType())
153	return ABIArgInfo::getIgnore();
154
155	if (getContext().getLangOpts().OpenMP &&
156	getContext().getLangOpts().OpenMPIsTargetDevice &&
157	isUnsupportedType(T: RetTy))
158	return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: `64`);
159
160	// note: this is different from default ABI
161	if (!RetTy ->isScalarType())
162	return ABIArgInfo::getDirect();
163
164	// Treat an enum type as its underlying type.
165	if (const EnumType *EnumTy = RetTy ->getAs<EnumType>())
166	RetTy = EnumTy->getDecl()->getIntegerType();
167
168	return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
169	: ABIArgInfo::getDirect());
170	}
171
172	ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
173	// Treat an enum type as its underlying type.
174	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
175	Ty = EnumTy->getDecl()->getIntegerType();
176
177	// Return aggregates type as indirect by value
178	if (isAggregateTypeForABI(T: Ty)) {
179	// Under CUDA device compilation, tex/surf builtin types are replaced with
180	// object types and passed directly.
181	if (getContext().getLangOpts().CUDAIsDevice) {
182	if (Ty ->isCUDADeviceBuiltinSurfaceType())
183	return ABIArgInfo::getDirect(
184	T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
185	if (Ty ->isCUDADeviceBuiltinTextureType())
186	return ABIArgInfo::getDirect(
187	T: CGInfo.getCUDADeviceBuiltinTextureDeviceType());
188	}
189	return getNaturalAlignIndirect(Ty, / byval / ByVal: true);
190	}
191
192	if (const auto *EIT = Ty->getAs<BitIntType>()) {
193	if ((EIT->getNumBits() > `128`) \|\|
194	(!getContext().getTargetInfo().hasInt128Type() &&
195	EIT->getNumBits() > `64`))
196	return getNaturalAlignIndirect(Ty, / byval / ByVal: true);
197	}
198
199	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
200	: ABIArgInfo::getDirect());
201	}
202
203	void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
204	if (!getCXXABI().classifyReturnType(FI))
205	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
206	for (auto &I : FI.arguments())
207	I.info = classifyArgumentType(I.type);
208
209	// Always honor user-specified calling convention.
210	if (FI.getCallingConvention() != llvm::CallingConv::C)
211	return;
212
213	FI.setEffectiveCallingConvention(getRuntimeCC());
214	}
215
216	Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
217	QualType Ty) const {
218	llvm_unreachable("NVPTX does not support varargs");
219	}
220
221	void NVPTXTargetCodeGenInfo::setTargetAttributes(
222	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
223	if (GV->isDeclaration())
224	return;
225	const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
226	if (VD) {
227	if (M.getLangOpts().CUDA) {
228	if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
229	addNVVMMetadata(GV, Name: "surface", Operand: `1`);
230	else if (VD->getType()->isCUDADeviceBuiltinTextureType())
231	addNVVMMetadata(GV, Name: "texture", Operand: `1`);
232	return;
233	}
234	}
235
236	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
237	if (!FD) return;
238
239	llvm::Function *F = cast<llvm::Function>(GV);
240
241	// Perform special handling in OpenCL mode
242	if (M.getLangOpts().OpenCL) {
243	// Use OpenCL function attributes to check for kernel functions
244	// By default, all functions are device functions
245	if (FD->hasAttr<OpenCLKernelAttr>()) {
246	// OpenCL __kernel functions get kernel metadata
247	// Create !{<func-ref>, metadata !"kernel", i32 1} node
248	addNVVMMetadata(GV: F, Name: "kernel", Operand: `1`);
249	// And kernel functions are not subject to inlining
250	F->addFnAttr(llvm::Attribute::NoInline);
251	}
252	}
253
254	// Perform special handling in CUDA mode.
255	if (M.getLangOpts().CUDA) {
256	// CUDA __global__ functions get a kernel metadata entry. Since
257	// __global__ functions cannot be called from the device, we do not
258	// need to set the noinline attribute.
259	if (FD->hasAttr<CUDAGlobalAttr>()) {
260	// Create !{<func-ref>, metadata !"kernel", i32 1} node
261	addNVVMMetadata(GV: F, Name: "kernel", Operand: `1`);
262	}
263	if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
264	M.handleCUDALaunchBoundsAttr(F, Attr);
265	}
266
267	// Attach kernel metadata directly if compiling for NVPTX.
268	if (FD->hasAttr<NVPTXKernelAttr>()) {
269	addNVVMMetadata(GV: F, Name: "kernel", Operand: `1`);
270	}
271	}
272
273	void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
274	StringRef Name, int Operand) {
275	llvm::Module *M = GV->getParent();
276	llvm::LLVMContext &Ctx = M->getContext();
277
278	// Get "nvvm.annotations" metadata node
279	llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations");
280
281	llvm::Metadata *MDVals[] = {
282	llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name),
283	llvm::ConstantAsMetadata::get(
284	C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))};
285	// Append metadata to nvvm.annotations
286	MD->addOperand(M: llvm::MDNode::get(Ctx, MDVals));
287	}
288
289	bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
290	return false;
291	}
292
293	llvm::Constant *
294	NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
295	llvm::PointerType *PT,
296	QualType QT) const {
297	auto &Ctx = CGM.getContext();
298	if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local))
299	return llvm::ConstantPointerNull::get(T: PT);
300
301	auto NPT = llvm::PointerType::get(
302	C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic));
303	return llvm::ConstantExpr::getAddrSpaceCast(
304	C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT);
305	}
306	}
307
308	void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
309	const CUDALaunchBoundsAttr *Attr,
310	int32_t *MaxThreadsVal,
311	int32_t *MinBlocksVal,
312	int32_t *MaxClusterRankVal) {
313	// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
314	llvm::APSInt MaxThreads(`32`);
315	MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
316	if (MaxThreads > `0`) {
317	if (MaxThreadsVal)
318	*MaxThreadsVal = MaxThreads.getExtValue();
319	if (F) {
320	// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
321	NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxntidx",
322	Operand: MaxThreads.getExtValue());
323	}
324	}
325
326	// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
327	// was not specified in __launch_bounds__ or if the user specified a 0 value,
328	// we don't have to add a PTX directive.
329	if (Attr->getMinBlocks()) {
330	llvm::APSInt MinBlocks(`32`);
331	MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
332	if (MinBlocks > `0`) {
333	if (MinBlocksVal)
334	*MinBlocksVal = MinBlocks.getExtValue();
335	if (F) {
336	// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
337	NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "minctasm",
338	Operand: MinBlocks.getExtValue());
339	}
340	}
341	}
342	if (Attr->getMaxBlocks()) {
343	llvm::APSInt MaxBlocks(`32`);
344	MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
345	if (MaxBlocks > `0`) {
346	if (MaxClusterRankVal)
347	*MaxClusterRankVal = MaxBlocks.getExtValue();
348	if (F) {
349	// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
350	NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxclusterrank",
351	Operand: MaxBlocks.getExtValue());
352	}
353	}
354	}
355	}
356
357	std::unique_ptr<TargetCodeGenInfo>
358	CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
359	return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes());
360	}
361

source code of clang/lib/CodeGen/Targets/NVPTX.cpp