SemaCUDA.cpp source code [clang/lib/Sema/SemaCUDA.cpp]

1	//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements semantic analysis for CUDA constructs.
10	///
11	//===----------------------------------------------------------------------===//
12
13	#include "clang/Sema/SemaCUDA.h"
14	#include "clang/AST/ASTContext.h"
15	#include "clang/AST/Decl.h"
16	#include "clang/AST/ExprCXX.h"
17	#include "clang/Basic/Cuda.h"
18	#include "clang/Basic/TargetInfo.h"
19	#include "clang/Lex/Preprocessor.h"
20	#include "clang/Sema/Lookup.h"
21	#include "clang/Sema/ScopeInfo.h"
22	#include "clang/Sema/Sema.h"
23	#include "clang/Sema/SemaDiagnostic.h"
24	#include "clang/Sema/SemaInternal.h"
25	#include "clang/Sema/Template.h"
26	#include "llvm/ADT/STLForwardCompat.h"
27	#include "llvm/ADT/SmallVector.h"
28	#include <optional>
29	using namespace clang;
30
31	SemaCUDA::SemaCUDA(Sema &S) : SemaBase (S) {}
32
33	template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
34	if (!D)
35	return false;
36	if (auto *A = D->getAttr<AttrT>())
37	return !A->isImplicit();
38	return false;
39	}
40
41	void SemaCUDA::PushForceHostDevice() {
42	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
43	ForceHostDeviceDepth++;
44	}
45
46	bool SemaCUDA::PopForceHostDevice() {
47	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
48	if (ForceHostDeviceDepth == `0`)
49	return false;
50	ForceHostDeviceDepth--;
51	return true;
52	}
53
54	ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
55	MultiExprArg ExecConfig,
56	SourceLocation GGGLoc) {
57	FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();
58	if (!ConfigDecl)
59	return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
60	<< getConfigureFuncName());
61	QualType ConfigQTy = ConfigDecl->getType();
62
63	DeclRefExpr ConfigDR = new* (getASTContext()) DeclRefExpr(
64	getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
65	SemaRef.MarkFunctionReferenced(Loc: LLLLoc, Func: ConfigDecl);
66
67	return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
68	/IsExecConfig=/true);
69	}
70
71	CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
72	bool HasHostAttr = false;
73	bool HasDeviceAttr = false;
74	bool HasGlobalAttr = false;
75	bool HasInvalidTargetAttr = false;
76	for (const ParsedAttr &AL : Attrs) {
77	switch (AL.getKind()) {
78	case ParsedAttr::AT_CUDAGlobal:
79	HasGlobalAttr = true;
80	break;
81	case ParsedAttr::AT_CUDAHost:
82	HasHostAttr = true;
83	break;
84	case ParsedAttr::AT_CUDADevice:
85	HasDeviceAttr = true;
86	break;
87	case ParsedAttr::AT_CUDAInvalidTarget:
88	HasInvalidTargetAttr = true;
89	break;
90	default:
91	break;
92	}
93	}
94
95	if (HasInvalidTargetAttr)
96	return CUDAFunctionTarget::InvalidTarget;
97
98	if (HasGlobalAttr)
99	return CUDAFunctionTarget::Global;
100
101	if (HasHostAttr && HasDeviceAttr)
102	return CUDAFunctionTarget::HostDevice;
103
104	if (HasDeviceAttr)
105	return CUDAFunctionTarget::Device;
106
107	return CUDAFunctionTarget::Host;
108	}
109
110	template <typename A>
111	static bool hasAttr(const Decl D, bool* IgnoreImplicitAttr) {
112	return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
113	return isa<A>(Attribute) &&
114	!(IgnoreImplicitAttr && Attribute->isImplicit());
115	});
116	}
117
118	SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
119	SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
120	: S(S_) {
121	SavedCtx = S.CurCUDATargetCtx;
122	assert(K == SemaCUDA::CTCK_InitGlobalVar);
123	auto *VD = dyn_cast_or_null<VarDecl>(Val: D);
124	if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
125	auto Target = CUDAFunctionTarget::Host;
126	if ((hasAttr<CUDADeviceAttr>(VD, /IgnoreImplicit=/true) &&
127	!hasAttr<CUDAHostAttr>(VD, /IgnoreImplicit=/true)) \|\|
128	hasAttr<CUDASharedAttr>(VD, /IgnoreImplicit=/true) \|\|
129	hasAttr<CUDAConstantAttr>(VD, /IgnoreImplicit=/true))
130	Target = CUDAFunctionTarget::Device;
131	S.CurCUDATargetCtx = {Target, K, VD};
132	}
133	}
134
135	/// IdentifyTarget - Determine the CUDA compilation target for this function
136	CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
137	bool IgnoreImplicitHDAttr) {
138	// Code that lives outside a function gets the target from CurCUDATargetCtx.
139	if (D == nullptr)
140	return CurCUDATargetCtx.Target;
141
142	if (D->hasAttr<CUDAInvalidTargetAttr>())
143	return CUDAFunctionTarget::InvalidTarget;
144
145	if (D->hasAttr<CUDAGlobalAttr>())
146	return CUDAFunctionTarget::Global;
147
148	if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
149	if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
150	return CUDAFunctionTarget::HostDevice;
151	return CUDAFunctionTarget::Device;
152	} else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
153	return CUDAFunctionTarget::Host;
154	} else if ((D->isImplicit() \|\| !D->isUserProvided()) &&
155	!IgnoreImplicitHDAttr) {
156	// Some implicit declarations (like intrinsic functions) are not marked.
157	// Set the most lenient target on them for maximal flexibility.
158	return CUDAFunctionTarget::HostDevice;
159	}
160
161	return CUDAFunctionTarget::Host;
162	}
163
164	/// IdentifyTarget - Determine the CUDA compilation target for this variable.
165	SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
166	if (Var->hasAttr<HIPManagedAttr>())
167	return CVT_Unified;
168	// Only constexpr and const variabless with implicit constant attribute
169	// are emitted on both sides. Such variables are promoted to device side
170	// only if they have static constant intializers on device side.
171	if ((Var->isConstexpr() \|\| Var->getType().isConstQualified()) &&
172	Var->hasAttr<CUDAConstantAttr>() &&
173	!hasExplicitAttr<CUDAConstantAttr>(Var))
174	return CVT_Both;
175	if (Var->hasAttr<CUDADeviceAttr>() \|\| Var->hasAttr<CUDAConstantAttr>() \|\|
176	Var->hasAttr<CUDASharedAttr>() \|\|
177	Var->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
178	Var->getType()->isCUDADeviceBuiltinTextureType())
179	return CVT_Device;
180	// Function-scope static variable without explicit device or constant
181	// attribute are emitted
182	// - on both sides in host device functions
183	// - on device side in device or global functions
184	if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
185	switch (IdentifyTarget(FD)) {
186	case CUDAFunctionTarget::HostDevice:
187	return CVT_Both;
188	case CUDAFunctionTarget::Device:
189	case CUDAFunctionTarget::Global:
190	return CVT_Device;
191	default:
192	return CVT_Host;
193	}
194	}
195	return CVT_Host;
196	}
197
198	// CUDA Call preference table*
199	//
200	// F - from,
201	// T - to
202	// Ph - preference in host mode
203	// Pd - preference in device mode
204	// H - handled in (x)
205	// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
206	//
207	// \| F \| T \| Ph \| Pd \| H \|
208	// \|----+----+-----+-----+-----+
209	// \| d \| d \| N \| N \| (c) \|
210	// \| d \| g \| -- \| -- \| (a) \|
211	// \| d \| h \| -- \| -- \| (e) \|
212	// \| d \| hd \| HD \| HD \| (b) \|
213	// \| g \| d \| N \| N \| (c) \|
214	// \| g \| g \| -- \| -- \| (a) \|
215	// \| g \| h \| -- \| -- \| (e) \|
216	// \| g \| hd \| HD \| HD \| (b) \|
217	// \| h \| d \| -- \| -- \| (e) \|
218	// \| h \| g \| N \| N \| (c) \|
219	// \| h \| h \| N \| N \| (c) \|
220	// \| h \| hd \| HD \| HD \| (b) \|
221	// \| hd \| d \| WS \| SS \| (d) \|
222	// \| hd \| g \| SS \| -- \|(d/a)\|
223	// \| hd \| h \| SS \| WS \| (d) \|
224	// \| hd \| hd \| HD \| HD \| (b) \|
225
226	SemaCUDA::CUDAFunctionPreference
227	SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
228	const FunctionDecl *Callee) {
229	assert(Callee && "Callee must be valid.");
230
231	// Treat ctor/dtor as host device function in device var initializer to allow
232	// trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
233	// will be diagnosed by checkAllowedInitializer.
234	if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
235	CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
236	(isa<CXXConstructorDecl>(Val: Callee) \|\| isa<CXXDestructorDecl>(Val: Callee)))
237	return CFP_HostDevice;
238
239	CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller);
240	CUDAFunctionTarget CalleeTarget = IdentifyTarget(D: Callee);
241
242	// If one of the targets is invalid, the check always fails, no matter what
243	// the other target is.
244	if (CallerTarget == CUDAFunctionTarget::InvalidTarget \|\|
245	CalleeTarget == CUDAFunctionTarget::InvalidTarget)
246	return CFP_Never;
247
248	// (a) Can't call global from some contexts until we support CUDA's
249	// dynamic parallelism.
250	if (CalleeTarget == CUDAFunctionTarget::Global &&
251	(CallerTarget == CUDAFunctionTarget::Global \|\|
252	CallerTarget == CUDAFunctionTarget::Device))
253	return CFP_Never;
254
255	// (b) Calling HostDevice is OK for everyone.
256	if (CalleeTarget == CUDAFunctionTarget::HostDevice)
257	return CFP_HostDevice;
258
259	// (c) Best case scenarios
260	if (CalleeTarget == CallerTarget \|\|
261	(CallerTarget == CUDAFunctionTarget::Host &&
262	CalleeTarget == CUDAFunctionTarget::Global) \|\|
263	(CallerTarget == CUDAFunctionTarget::Global &&
264	CalleeTarget == CUDAFunctionTarget::Device))
265	return CFP_Native;
266
267	// HipStdPar mode is special, in that assessing whether a device side call to
268	// a host target is deferred to a subsequent pass, and cannot unambiguously be
269	// adjudicated in the AST, hence we optimistically allow them to pass here.
270	if (getLangOpts().HIPStdPar &&
271	(CallerTarget == CUDAFunctionTarget::Global \|\|
272	CallerTarget == CUDAFunctionTarget::Device \|\|
273	CallerTarget == CUDAFunctionTarget::HostDevice) &&
274	CalleeTarget == CUDAFunctionTarget::Host)
275	return CFP_HostDevice;
276
277	// (d) HostDevice behavior depends on compilation mode.
278	if (CallerTarget == CUDAFunctionTarget::HostDevice) {
279	// It's OK to call a compilation-mode matching function from an HD one.
280	if ((getLangOpts().CUDAIsDevice &&
281	CalleeTarget == CUDAFunctionTarget::Device) \|\|
282	(!getLangOpts().CUDAIsDevice &&
283	(CalleeTarget == CUDAFunctionTarget::Host \|\|
284	CalleeTarget == CUDAFunctionTarget::Global)))
285	return CFP_SameSide;
286
287	// Calls from HD to non-mode-matching functions (i.e., to host functions
288	// when compiling in device mode or to device functions when compiling in
289	// host mode) are allowed at the sema level, but eventually rejected if
290	// they're ever codegened. TODO: Reject said calls earlier.
291	return CFP_WrongSide;
292	}
293
294	// (e) Calling across device/host boundary is not something you should do.
295	if ((CallerTarget == CUDAFunctionTarget::Host &&
296	CalleeTarget == CUDAFunctionTarget::Device) \|\|
297	(CallerTarget == CUDAFunctionTarget::Device &&
298	CalleeTarget == CUDAFunctionTarget::Host) \|\|
299	(CallerTarget == CUDAFunctionTarget::Global &&
300	CalleeTarget == CUDAFunctionTarget::Host))
301	return CFP_Never;
302
303	llvm_unreachable("All cases should've been handled by now.");
304	}
305
306	template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
307	if (!D)
308	return false;
309	if (auto *A = D->getAttr<AttrT>())
310	return A->isImplicit();
311	return D->isImplicit();
312	}
313
314	bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {
315	bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
316	bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
317	return IsImplicitDevAttr && IsImplicitHostAttr;
318	}
319
320	void SemaCUDA::EraseUnwantedMatches(
321	const FunctionDecl *Caller,
322	SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
323	if (Matches.size() <= `1`)
324	return;
325
326	using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
327
328	// Gets the CUDA function preference for a call from Caller to Match.
329	auto GetCFP = [&](const Pair &Match) {
330	return IdentifyPreference(Caller, Callee: Match.second);
331	};
332
333	// Find the best call preference among the functions in Matches.
334	CUDAFunctionPreference BestCFP = GetCFP (*std::max_element(
335	first: Matches.begin(), last: Matches.end(),
336	comp: [&](const Pair &M1, const Pair &M2) { return GetCFP (M1) < GetCFP (M2); }));
337
338	// Erase all functions with lower priority.
339	llvm::erase_if(C&: Matches,
340	P: [&](const Pair &Match) { return GetCFP (Match) < BestCFP; });
341	}
342
343	/// When an implicitly-declared special member has to invoke more than one
344	/// base/field special member, conflicts may occur in the targets of these
345	/// members. For example, if one base's member __host__ and another's is
346	/// __device__, it's a conflict.
347	/// This function figures out if the given targets \param Target1 and
348	/// \param Target2 conflict, and if they do not it fills in
349	/// \param ResolvedTarget with a target that resolves for both calls.
350	/// \return true if there's a conflict, false otherwise.
351	static bool
352	resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
353	CUDAFunctionTarget Target2,
354	CUDAFunctionTarget *ResolvedTarget) {
355	// Only free functions and static member functions may be global.
356	assert(Target1 != CUDAFunctionTarget::Global);
357	assert(Target2 != CUDAFunctionTarget::Global);
358
359	if (Target1 == CUDAFunctionTarget::HostDevice) {
360	*ResolvedTarget = Target2;
361	} else if (Target2 == CUDAFunctionTarget::HostDevice) {
362	*ResolvedTarget = Target1;
363	} else if (Target1 != Target2) {
364	return true;
365	} else {
366	*ResolvedTarget = Target1;
367	}
368
369	return false;
370	}
371
372	bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
373	CXXSpecialMemberKind CSM,
374	CXXMethodDecl *MemberDecl,
375	bool ConstRHS,
376	bool Diagnose) {
377	// If the defaulted special member is defined lexically outside of its
378	// owning class, or the special member already has explicit device or host
379	// attributes, do not infer.
380	bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
381	bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
382	bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
383	bool HasExplicitAttr =
384	(HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) \|\|
385	(HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
386	if (!InClass \|\| HasExplicitAttr)
387	return false;
388
389	std::optional<CUDAFunctionTarget> InferredTarget;
390
391	// We're going to invoke special member lookup; mark that these special
392	// members are called from this one, and not from its caller.
393	Sema::ContextRAII MethodContext(SemaRef, MemberDecl);
394
395	// Look for special members in base classes that should be invoked from here.
396	// Infer the target of this member base on the ones it should call.
397	// Skip direct and indirect virtual bases for abstract classes.
398	llvm::SmallVector<const CXXBaseSpecifier *, `16`> Bases;
399	for (const auto &B : ClassDecl->bases()) {
400	if (!B.isVirtual()) {
401	Bases.push_back(Elt: &B);
402	}
403	}
404
405	if (!ClassDecl->isAbstract()) {
406	llvm::append_range(C&: Bases, R: llvm::make_pointer_range(Range: ClassDecl->vbases()));
407	}
408
409	for (const auto *B : Bases) {
410	const RecordType *BaseType = B->getType()->getAs<RecordType>();
411	if (!BaseType) {
412	continue;
413	}
414
415	CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(Val: BaseType->getDecl());
416	Sema::SpecialMemberOverloadResult SMOR =
417	SemaRef.LookupSpecialMember(D: BaseClassDecl, SM: CSM,
418	/ ConstArg / ConstRHS,
419	/ VolatileArg / false,
420	/ RValueThis / false,
421	/ ConstThis / false,
422	/ VolatileThis / false);
423
424	if (!SMOR.getMethod())
425	continue;
426
427	CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod());
428	if (!InferredTarget) {
429	InferredTarget = BaseMethodTarget;
430	} else {
431	bool ResolutionError = resolveCalleeCUDATargetConflict(
432	Target1: InferredTarget, Target2: BaseMethodTarget, ResolvedTarget: &InferredTarget);
433	if (ResolutionError) {
434	if (Diagnose) {
435	Diag(ClassDecl->getLocation(),
436	diag::note_implicit_member_target_infer_collision)
437	<< (unsigned)CSM << llvm::to_underlying(*InferredTarget)
438	<< llvm::to_underlying(BaseMethodTarget);
439	}
440	MemberDecl->addAttr(
441	CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
442	return true;
443	}
444	}
445	}
446
447	// Same as for bases, but now for special members of fields.
448	for (const auto *F : ClassDecl->fields()) {
449	if (F->isInvalidDecl()) {
450	continue;
451	}
452
453	const RecordType *FieldType =
454	getASTContext().getBaseElementType(F->getType())->getAs<RecordType>();
455	if (!FieldType) {
456	continue;
457	}
458
459	CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
460	Sema::SpecialMemberOverloadResult SMOR =
461	SemaRef.LookupSpecialMember(FieldRecDecl, CSM,
462	/ ConstArg / ConstRHS && !F->isMutable(),
463	/ VolatileArg / false,
464	/ RValueThis / false,
465	/ ConstThis / false,
466	/ VolatileThis / false);
467
468	if (!SMOR.getMethod())
469	continue;
470
471	CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod());
472	if (!InferredTarget) {
473	InferredTarget = FieldMethodTarget;
474	} else {
475	bool ResolutionError = resolveCalleeCUDATargetConflict(
476	InferredTarget, FieldMethodTarget, &InferredTarget);
477	if (ResolutionError) {
478	if (Diagnose) {
479	Diag(ClassDecl->getLocation(),
480	diag::note_implicit_member_target_infer_collision)
481	<< (unsigned)CSM << llvm::to_underlying(*InferredTarget)
482	<< llvm::to_underlying(FieldMethodTarget);
483	}
484	MemberDecl->addAttr(
485	CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
486	return true;
487	}
488	}
489	}
490
491
492	// If no target was inferred, mark this member as __host__ __device__;
493	// it's the least restrictive option that can be invoked from any target.
494	bool NeedsH = true, NeedsD = true;
495	if (InferredTarget) {
496	if (*InferredTarget == CUDAFunctionTarget::Device)
497	NeedsH = false;
498	else if (*InferredTarget == CUDAFunctionTarget::Host)
499	NeedsD = false;
500	}
501
502	// We either setting attributes first time, or the inferred ones must match
503	// previously set ones.
504	if (NeedsD && !HasD)
505	MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
506	if (NeedsH && !HasH)
507	MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
508
509	return false;
510	}
511
512	bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
513	if (!CD->isDefined() && CD->isTemplateInstantiation())
514	SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: CD->getFirstDecl());
515
516	// (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
517	// empty at a point in the translation unit, if it is either a
518	// trivial constructor
519	if (CD->isTrivial())
520	return true;
521
522	// ... or it satisfies all of the following conditions:
523	// The constructor function has been defined.
524	// The constructor function has no parameters,
525	// and the function body is an empty compound statement.
526	if (!(CD->hasTrivialBody() && CD->getNumParams() == `0`))
527	return false;
528
529	// Its class has no virtual functions and no virtual base classes.
530	if (CD->getParent()->isDynamicClass())
531	return false;
532
533	// Union ctor does not call ctors of its data members.
534	if (CD->getParent()->isUnion())
535	return true;
536
537	// The only form of initializer allowed is an empty constructor.
538	// This will recursively check all base classes and member initializers
539	if (!llvm::all_of(Range: CD->inits(), P: [&](const CXXCtorInitializer *CI) {
540	if (const CXXConstructExpr *CE =
541	dyn_cast<CXXConstructExpr>(Val: CI->getInit()))
542	return isEmptyConstructor(Loc, CD: CE->getConstructor());
543	return false;
544	}))
545	return false;
546
547	return true;
548	}
549
550	bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
551	// No destructor -> no problem.
552	if (!DD)
553	return true;
554
555	if (!DD->isDefined() && DD->isTemplateInstantiation())
556	SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: DD->getFirstDecl());
557
558	// (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
559	// empty at a point in the translation unit, if it is either a
560	// trivial constructor
561	if (DD->isTrivial())
562	return true;
563
564	// ... or it satisfies all of the following conditions:
565	// The destructor function has been defined.
566	// and the function body is an empty compound statement.
567	if (!DD->hasTrivialBody())
568	return false;
569
570	const CXXRecordDecl *ClassDecl = DD->getParent();
571
572	// Its class has no virtual functions and no virtual base classes.
573	if (ClassDecl->isDynamicClass())
574	return false;
575
576	// Union does not have base class and union dtor does not call dtors of its
577	// data members.
578	if (DD->getParent()->isUnion())
579	return true;
580
581	// Only empty destructors are allowed. This will recursively check
582	// destructors for all base classes...
583	if (!llvm::all_of(Range: ClassDecl->bases(), P: [&](const CXXBaseSpecifier &BS) {
584	if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
585	return isEmptyDestructor(Loc, DD: RD->getDestructor());
586	return true;
587	}))
588	return false;
589
590	// ... and member fields.
591	if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
592	if (CXXRecordDecl *RD = Field->getType()
593	->getBaseElementTypeUnsafe()
594	->getAsCXXRecordDecl())
595	return isEmptyDestructor(Loc, RD->getDestructor());
596	return true;
597	}))
598	return false;
599
600	return true;
601	}
602
603	namespace {
604	enum CUDAInitializerCheckKind {
605	CICK_DeviceOrConstant, // Check initializer for device/constant variable
606	CICK_Shared, // Check initializer for shared variable
607	};
608
609	bool IsDependentVar(VarDecl *VD) {
610	if (VD->getType()->isDependentType())
611	return true;
612	if (const auto *Init = VD->getInit())
613	return Init->isValueDependent();
614	return false;
615	}
616
617	// Check whether a variable has an allowed initializer for a CUDA device side
618	// variable with global storage. \p VD may be a host variable to be checked for
619	// potential promotion to device side variable.
620	//
621	// CUDA/HIP allows only empty constructors as initializers for global
622	// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
623	// __shared__ variables whether they are local or not (they all are implicitly
624	// static in CUDA). One exception is that CUDA allows constant initializers
625	// for __constant__ and __device__ variables.
626	bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,
627	CUDAInitializerCheckKind CheckKind) {
628	assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
629	assert(!IsDependentVar(VD) && "do not check dependent var");
630	const Expr *Init = VD->getInit();
631	auto IsEmptyInit = [&](const Expr *Init) {
632	if (!Init)
633	return true;
634	if (const auto *CE = dyn_cast<CXXConstructExpr>(Val: Init)) {
635	return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor());
636	}
637	return false;
638	};
639	auto IsConstantInit = [&](const Expr *Init) {
640	assert(Init);
641	ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),
642	/NoWronSidedVars=/true);
643	return Init->isConstantInitializer(S.getASTContext(),
644	VD->getType()->isReferenceType());
645	};
646	auto HasEmptyDtor = [&](VarDecl *VD) {
647	if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
648	return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor());
649	return true;
650	};
651	if (CheckKind == CICK_Shared)
652	return IsEmptyInit(Init) && HasEmptyDtor(VD);
653	return S.getLangOpts().GPUAllowDeviceInit \|\|
654	((IsEmptyInit(Init) \|\| IsConstantInit(Init)) && HasEmptyDtor(VD));
655	}
656	} // namespace
657
658	void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
659	// Return early if VD is inside a non-instantiated template function since
660	// the implicit constructor is not defined yet.
661	if (const FunctionDecl *FD =
662	dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()))
663	if (FD->isDependentContext())
664	return;
665
666	// Do not check dependent variables since the ctor/dtor/initializer are not
667	// determined. Do it after instantiation.
668	if (VD->isInvalidDecl() \|\| !VD->hasInit() \|\| !VD->hasGlobalStorage() \|\|
669	IsDependentVar(VD))
670	return;
671	const Expr *Init = VD->getInit();
672	bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
673	bool IsDeviceOrConstantVar =
674	!IsSharedVar &&
675	(VD->hasAttr<CUDADeviceAttr>() \|\| VD->hasAttr<CUDAConstantAttr>());
676	if (IsDeviceOrConstantVar \|\| IsSharedVar) {
677	if (HasAllowedCUDADeviceStaticInitializer(
678	S&: *this, VD, CheckKind: IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
679	return;
680	Diag(VD->getLocation(),
681	IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
682	<< Init->getSourceRange();
683	VD->setInvalidDecl();
684	} else {
685	// This is a host-side global variable. Check that the initializer is
686	// callable from the host side.
687	const FunctionDecl InitFn = nullptr*;
688	if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Val: Init)) {
689	InitFn = CE->getConstructor();
690	} else if (const CallExpr *CE = dyn_cast<CallExpr>(Val: Init)) {
691	InitFn = CE->getDirectCallee();
692	}
693	if (InitFn) {
694	CUDAFunctionTarget InitFnTarget = IdentifyTarget(D: InitFn);
695	if (InitFnTarget != CUDAFunctionTarget::Host &&
696	InitFnTarget != CUDAFunctionTarget::HostDevice) {
697	Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
698	<< llvm::to_underlying(InitFnTarget) << InitFn;
699	Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
700	VD->setInvalidDecl();
701	}
702	}
703	}
704	}
705
706	void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
707	const FunctionDecl *Callee) {
708	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
709	if (!Caller)
710	return;
711
712	if (!isImplicitHostDeviceFunction(D: Callee))
713	return;
714
715	CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller);
716
717	// Record whether an implicit host device function is used on device side.
718	if (CallerTarget != CUDAFunctionTarget::Device &&
719	CallerTarget != CUDAFunctionTarget::Global &&
720	(CallerTarget != CUDAFunctionTarget::HostDevice \|\|
721	(isImplicitHostDeviceFunction(D: Caller) &&
722	!getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(V: Caller))))
723	return;
724
725	getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(V: Callee);
726	}
727
728	// With -fcuda-host-device-constexpr, an unattributed constexpr function is
729	// treated as implicitly __host__ __device__, unless:
730	// it is a variadic function (device-side variadic functions are not*
731	// allowed), or
732	// a __device__ function with this signature was already declared, in which*
733	// case in which case we output an error, unless the __device__ decl is in a
734	// system header, in which case we leave the constexpr function unattributed.
735	//
736	// In addition, all function decls are treated as __host__ __device__ when
737	// ForceHostDeviceDepth > 0 (corresponding to code within a
738	// #pragma clang force_cuda_host_device_begin/end
739	// pair).
740	void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,
741	const LookupResult &Previous) {
742	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
743
744	if (ForceHostDeviceDepth > `0`) {
745	if (!NewD->hasAttr<CUDAHostAttr>())
746	NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
747	if (!NewD->hasAttr<CUDADeviceAttr>())
748	NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
749	return;
750	}
751
752	// If a template function has no host/device/global attributes,
753	// make it implicitly host device function.
754	if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
755	!NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
756	!NewD->hasAttr<CUDAGlobalAttr>() &&
757	(NewD->getDescribedFunctionTemplate() \|\|
758	NewD->isFunctionTemplateSpecialization())) {
759	NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
760	NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
761	return;
762	}
763
764	if (!getLangOpts().CUDAHostDeviceConstexpr \|\| !NewD->isConstexpr() \|\|
765	NewD->isVariadic() \|\| NewD->hasAttr<CUDAHostAttr>() \|\|
766	NewD->hasAttr<CUDADeviceAttr>() \|\| NewD->hasAttr<CUDAGlobalAttr>())
767	return;
768
769	// Is D a __device__ function with the same signature as NewD, ignoring CUDA
770	// attributes?
771	auto IsMatchingDeviceFn = [&](NamedDecl *D) {
772	if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(Val: D))
773	D = Using->getTargetDecl();
774	FunctionDecl *OldD = D->getAsFunction();
775	return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
776	!OldD->hasAttr<CUDAHostAttr>() &&
777	!SemaRef.IsOverload(NewD, OldD,
778	/ UseMemberUsingDeclRules = / false,
779	/ ConsiderCudaAttrs = / false);
780	};
781	auto It = llvm::find_if(Range: Previous, P: IsMatchingDeviceFn);
782	if (It != Previous.end()) {
783	// We found a __device__ function with the same name and signature as NewD
784	// (ignoring CUDA attrs). This is an error unless that function is defined
785	// in a system header, in which case we simply return without making NewD
786	// host+device.
787	NamedDecl Match = It;
788	if (!SemaRef.getSourceManager().isInSystemHeader(Loc: Match->getLocation())) {
789	Diag(NewD->getLocation(),
790	diag::err_cuda_unattributed_constexpr_cannot_overload_device)
791	<< NewD;
792	Diag(Match->getLocation(),
793	diag::note_cuda_conflicting_device_function_declared_here);
794	}
795	return;
796	}
797
798	NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
799	NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
800	}
801
802	// TODO: `__constant__` memory may be a limited resource for certain targets.
803	// A safeguard may be needed at the end of compilation pipeline if
804	// `__constant__` memory usage goes beyond limit.
805	void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {
806	// Do not promote dependent variables since the cotr/dtor/initializer are
807	// not determined. Do it after instantiation.
808	if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
809	!VD->hasAttr<CUDASharedAttr>() &&
810	(VD->isFileVarDecl() \|\| VD->isStaticDataMember()) &&
811	!IsDependentVar(VD) &&
812	((VD->isConstexpr() \|\| VD->getType().isConstQualified()) &&
813	HasAllowedCUDADeviceStaticInitializer(*this, VD,
814	CICK_DeviceOrConstant))) {
815	VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
816	}
817	}
818
819	SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
820	unsigned DiagID) {
821	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
822	FunctionDecl *CurFunContext =
823	SemaRef.getCurFunctionDecl(/AllowLambda=/true);
824	SemaDiagnosticBuilder::Kind DiagKind = [&] {
825	if (!CurFunContext)
826	return SemaDiagnosticBuilder::K_Nop;
827	switch (CurrentTarget()) {
828	case CUDAFunctionTarget::Global:
829	case CUDAFunctionTarget::Device:
830	return SemaDiagnosticBuilder::K_Immediate;
831	case CUDAFunctionTarget::HostDevice:
832	// An HD function counts as host code if we're compiling for host, and
833	// device code if we're compiling for device. Defer any errors in device
834	// mode until the function is known-emitted.
835	if (!getLangOpts().CUDAIsDevice)
836	return SemaDiagnosticBuilder::K_Nop;
837	if (SemaRef.IsLastErrorImmediate &&
838	getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))
839	return SemaDiagnosticBuilder::K_Immediate;
840	return (SemaRef.getEmissionStatus(Decl: CurFunContext) ==
841	Sema::FunctionEmissionStatus::Emitted)
842	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
843	: SemaDiagnosticBuilder::K_Deferred;
844	default:
845	return SemaDiagnosticBuilder::K_Nop;
846	}
847	}();
848	return SemaDiagnosticBuilder (DiagKind, Loc, DiagID, CurFunContext, SemaRef);
849	}
850
851	Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,
852	unsigned DiagID) {
853	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
854	FunctionDecl *CurFunContext =
855	SemaRef.getCurFunctionDecl(/AllowLambda=/true);
856	SemaDiagnosticBuilder::Kind DiagKind = [&] {
857	if (!CurFunContext)
858	return SemaDiagnosticBuilder::K_Nop;
859	switch (CurrentTarget()) {
860	case CUDAFunctionTarget::Host:
861	return SemaDiagnosticBuilder::K_Immediate;
862	case CUDAFunctionTarget::HostDevice:
863	// An HD function counts as host code if we're compiling for host, and
864	// device code if we're compiling for device. Defer any errors in device
865	// mode until the function is known-emitted.
866	if (getLangOpts().CUDAIsDevice)
867	return SemaDiagnosticBuilder::K_Nop;
868	if (SemaRef.IsLastErrorImmediate &&
869	getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID))
870	return SemaDiagnosticBuilder::K_Immediate;
871	return (SemaRef.getEmissionStatus(Decl: CurFunContext) ==
872	Sema::FunctionEmissionStatus::Emitted)
873	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
874	: SemaDiagnosticBuilder::K_Deferred;
875	default:
876	return SemaDiagnosticBuilder::K_Nop;
877	}
878	}();
879	return SemaDiagnosticBuilder (DiagKind, Loc, DiagID, CurFunContext, SemaRef);
880	}
881
882	bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
883	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
884	assert(Callee && "Callee may not be null.");
885
886	const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
887	if (ExprEvalCtx.isUnevaluated() \|\| ExprEvalCtx.isConstantEvaluated())
888	return true;
889
890	// FIXME: Is bailing out early correct here? Should we instead assume that
891	// the caller is a global initializer?
892	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
893	if (!Caller)
894	return true;
895
896	// If the caller is known-emitted, mark the callee as known-emitted.
897	// Otherwise, mark the call in our call graph so we can traverse it later.
898	bool CallerKnownEmitted = SemaRef.getEmissionStatus(Decl: Caller) ==
899	Sema::FunctionEmissionStatus::Emitted;
900	SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
901	CallerKnownEmitted] {
902	switch (IdentifyPreference(Caller, Callee)) {
903	case CFP_Never:
904	case CFP_WrongSide:
905	assert(Caller && "Never/wrongSide calls require a non-null caller");
906	// If we know the caller will be emitted, we know this wrong-side call
907	// will be emitted, so it's an immediate error. Otherwise, defer the
908	// error until we know the caller is emitted.
909	return CallerKnownEmitted
910	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
911	: SemaDiagnosticBuilder::K_Deferred;
912	default:
913	return SemaDiagnosticBuilder::K_Nop;
914	}
915	}();
916
917	if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
918	// For -fgpu-rdc, keep track of external kernels used by host functions.
919	if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
920	Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
921	(!Caller \|\| (!Caller->getDescribedFunctionTemplate() &&
922	getASTContext().GetGVALinkageForFunction(Caller) ==
923	GVA_StrongExternal)))
924	getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
925	return true;
926	}
927
928	// Avoid emitting this error twice for the same location. Using a hashtable
929	// like this is unfortunate, but because we must continue parsing as normal
930	// after encountering a deferred error, it's otherwise very tricky for us to
931	// ensure that we only emit this deferred error once.
932	if (!LocsWithCUDACallDiags.insert(V: {.FD: Caller, .Loc: Loc}).second)
933	return true;
934
935	SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller,
936	SemaRef)
937	<< llvm::to_underlying(IdentifyTarget(Callee)) << /function/ `0` << Callee
938	<< llvm::to_underlying(IdentifyTarget(Caller));
939	if (!Callee->getBuiltinID())
940	SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
941	diag::note_previous_decl, Caller, SemaRef)
942	<< Callee;
943	return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
944	DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
945	}
946
947	// Check the wrong-sided reference capture of lambda for CUDA/HIP.
948	// A lambda function may capture a stack variable by reference when it is
949	// defined and uses the capture by reference when the lambda is called. When
950	// the capture and use happen on different sides, the capture is invalid and
951	// should be diagnosed.
952	void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,
953	const sema::Capture &Capture) {
954	// In host compilation we only need to check lambda functions emitted on host
955	// side. In such lambda functions, a reference capture is invalid only
956	// if the lambda structure is populated by a device function or kernel then
957	// is passed to and called by a host function. However that is impossible,
958	// since a device function or kernel can only call a device function, also a
959	// kernel cannot pass a lambda back to a host function since we cannot
960	// define a kernel argument type which can hold the lambda before the lambda
961	// itself is defined.
962	if (!getLangOpts().CUDAIsDevice)
963	return;
964
965	// File-scope lambda can only do init captures for global variables, which
966	// results in passing by value for these global variables.
967	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
968	if (!Caller)
969	return;
970
971	// In device compilation, we only need to check lambda functions which are
972	// emitted on device side. For such lambdas, a reference capture is invalid
973	// only if the lambda structure is populated by a host function then passed
974	// to and called in a device function or kernel.
975	bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
976	bool CallerIsHost =
977	!Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
978	bool ShouldCheck = CalleeIsDevice && CallerIsHost;
979	if (!ShouldCheck \|\| !Capture.isReferenceCapture())
980	return;
981	auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
982	if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
983	SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
984	diag::err_capture_bad_target, Callee, SemaRef)
985	<< Capture.getVariable();
986	} else if (Capture.isThisCapture()) {
987	// Capture of this pointer is allowed since this pointer may be pointing to
988	// managed memory which is accessible on both device and host sides. It only
989	// results in invalid memory access if this pointer points to memory not
990	// accessible on device side.
991	SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
992	diag::warn_maybe_capture_bad_target_this_ptr, Callee,
993	SemaRef);
994	}
995	}
996
997	void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {
998	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
999	if (Method->hasAttr<CUDAHostAttr>() \|\| Method->hasAttr<CUDADeviceAttr>())
1000	return;
1001	Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
1002	Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
1003	}
1004
1005	void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,
1006	const LookupResult &Previous) {
1007	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1008	CUDAFunctionTarget NewTarget = IdentifyTarget(D: NewFD);
1009	for (NamedDecl *OldND : Previous) {
1010	FunctionDecl *OldFD = OldND->getAsFunction();
1011	if (!OldFD)
1012	continue;
1013
1014	CUDAFunctionTarget OldTarget = IdentifyTarget(D: OldFD);
1015	// Don't allow HD and global functions to overload other functions with the
1016	// same signature. We allow overloading based on CUDA attributes so that
1017	// functions can have different implementations on the host and device, but
1018	// HD/global functions "exist" in some sense on both the host and device, so
1019	// should have the same implementation on both sides.
1020	if (NewTarget != OldTarget &&
1021	((NewTarget == CUDAFunctionTarget::HostDevice &&
1022	!(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1023	isImplicitHostDeviceFunction(D: NewFD) &&
1024	OldTarget == CUDAFunctionTarget::Device)) \|\|
1025	(OldTarget == CUDAFunctionTarget::HostDevice &&
1026	!(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1027	isImplicitHostDeviceFunction(D: OldFD) &&
1028	NewTarget == CUDAFunctionTarget::Device)) \|\|
1029	(NewTarget == CUDAFunctionTarget::Global) \|\|
1030	(OldTarget == CUDAFunctionTarget::Global)) &&
1031	!SemaRef.IsOverload(New: NewFD, Old: OldFD, / UseMemberUsingDeclRules = / false,
1032	/ ConsiderCudaAttrs = / false)) {
1033	Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
1034	<< llvm::to_underlying(NewTarget) << NewFD->getDeclName()
1035	<< llvm::to_underlying(OldTarget) << OldFD;
1036	Diag(OldFD->getLocation(), diag::note_previous_declaration);
1037	NewFD->setInvalidDecl();
1038	break;
1039	}
1040	}
1041	}
1042
1043	template <typename AttrTy>
1044	static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1045	const FunctionDecl &TemplateFD) {
1046	if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1047	AttrTy *Clone = Attribute->clone(S.Context);
1048	Clone->setInherited(true);
1049	FD->addAttr(A: Clone);
1050	}
1051	}
1052
1053	void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,
1054	const FunctionTemplateDecl &TD) {
1055	const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1056	copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD);
1057	copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD);
1058	copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD);
1059	}
1060
1061	std::string SemaCUDA::getConfigureFuncName() const {
1062	if (getLangOpts().HIP)
1063	return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1064	: "hipConfigureCall";
1065
1066	// New CUDA kernel launch sequence.
1067	if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),
1068	CudaFeature::CUDA_USES_NEW_LAUNCH))
1069	return "__cudaPushCallConfiguration";
1070
1071	// Legacy CUDA kernel configuration call
1072	return "cudaConfigureCall";
1073	}
1074

source code of clang/lib/Sema/SemaCUDA.cpp