1 | //===----- SemaCUDA.h ----- Semantic Analysis for CUDA constructs ---------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file declares semantic analysis for CUDA constructs. |
10 | /// |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CLANG_SEMA_SEMACUDA_H |
14 | #define LLVM_CLANG_SEMA_SEMACUDA_H |
15 | |
16 | #include "clang/AST/Decl.h" |
17 | #include "clang/AST/DeclCXX.h" |
18 | #include "clang/AST/Redeclarable.h" |
19 | #include "clang/Basic/Cuda.h" |
20 | #include "clang/Basic/SourceLocation.h" |
21 | #include "clang/Sema/Lookup.h" |
22 | #include "clang/Sema/Ownership.h" |
23 | #include "clang/Sema/ParsedAttr.h" |
24 | #include "clang/Sema/Scope.h" |
25 | #include "clang/Sema/ScopeInfo.h" |
26 | #include "clang/Sema/SemaBase.h" |
27 | #include "llvm/ADT/DenseMap.h" |
28 | #include "llvm/ADT/SmallVector.h" |
29 | #include <string> |
30 | |
31 | namespace clang { |
32 | |
33 | enum class CUDAFunctionTarget; |
34 | |
35 | class SemaCUDA : public SemaBase { |
36 | public: |
37 | SemaCUDA(Sema &S); |
38 | |
39 | /// Increments our count of the number of times we've seen a pragma forcing |
40 | /// functions to be __host__ __device__. So long as this count is greater |
41 | /// than zero, all functions encountered will be __host__ __device__. |
42 | void PushForceHostDevice(); |
43 | |
44 | /// Decrements our count of the number of times we've seen a pragma forcing |
45 | /// functions to be __host__ __device__. Returns false if the count is 0 |
46 | /// before incrementing, so you can emit an error. |
47 | bool PopForceHostDevice(); |
48 | |
49 | ExprResult ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
50 | MultiExprArg ExecConfig, |
51 | SourceLocation GGGLoc); |
52 | |
53 | /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the |
54 | /// key in a hashtable, both the FD and location are hashed. |
55 | struct FunctionDeclAndLoc { |
56 | CanonicalDeclPtr<const FunctionDecl> FD; |
57 | SourceLocation Loc; |
58 | }; |
59 | |
60 | /// FunctionDecls and SourceLocations for which CheckCall has emitted a |
61 | /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the |
62 | /// same deferred diag twice. |
63 | llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags; |
64 | |
65 | /// An inverse call graph, mapping known-emitted functions to one of their |
66 | /// known-emitted callers (plus the location of the call). |
67 | /// |
68 | /// Functions that we can tell a priori must be emitted aren't added to this |
69 | /// map. |
70 | llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>, |
71 | /* Caller = */ FunctionDeclAndLoc> |
72 | DeviceKnownEmittedFns; |
73 | |
74 | /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
75 | /// context is "used as device code". |
76 | /// |
77 | /// - If CurContext is a __host__ function, does not emit any diagnostics |
78 | /// unless \p EmitOnBothSides is true. |
79 | /// - If CurContext is a __device__ or __global__ function, emits the |
80 | /// diagnostics immediately. |
81 | /// - If CurContext is a __host__ __device__ function and we are compiling for |
82 | /// the device, creates a diagnostic which is emitted if and when we realize |
83 | /// that the function will be codegen'ed. |
84 | /// |
85 | /// Example usage: |
86 | /// |
87 | /// // Variable-length arrays are not allowed in CUDA device code. |
88 | /// if (DiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentTarget()) |
89 | /// return ExprError(); |
90 | /// // Otherwise, continue parsing as normal. |
91 | SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); |
92 | |
93 | /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
94 | /// context is "used as host code". |
95 | /// |
96 | /// Same as DiagIfDeviceCode, with "host" and "device" switched. |
97 | SemaDiagnosticBuilder DiagIfHostCode(SourceLocation Loc, unsigned DiagID); |
98 | |
99 | /// Determines whether the given function is a CUDA device/host/kernel/etc. |
100 | /// function. |
101 | /// |
102 | /// Use this rather than examining the function's attributes yourself -- you |
103 | /// will get it wrong. Returns CUDAFunctionTarget::Host if D is null. |
104 | CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D, |
105 | bool IgnoreImplicitHDAttr = false); |
106 | CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs); |
107 | |
108 | enum CUDAVariableTarget { |
109 | CVT_Device, /// Emitted on device side with a shadow variable on host side |
110 | CVT_Host, /// Emitted on host side only |
111 | CVT_Both, /// Emitted on both sides with different addresses |
112 | CVT_Unified, /// Emitted as a unified address, e.g. managed variables |
113 | }; |
114 | /// Determines whether the given variable is emitted on host or device side. |
115 | CUDAVariableTarget IdentifyTarget(const VarDecl *D); |
116 | |
117 | /// Defines kinds of CUDA global host/device context where a function may be |
118 | /// called. |
119 | enum CUDATargetContextKind { |
120 | CTCK_Unknown, /// Unknown context |
121 | CTCK_InitGlobalVar, /// Function called during global variable |
122 | /// initialization |
123 | }; |
124 | |
125 | /// Define the current global CUDA host/device context where a function may be |
126 | /// called. Only used when a function is called outside of any functions. |
127 | struct CUDATargetContext { |
128 | CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; |
129 | CUDATargetContextKind Kind = CTCK_Unknown; |
130 | Decl *D = nullptr; |
131 | } CurCUDATargetCtx; |
132 | |
133 | struct { |
134 | SemaCUDA &; |
135 | SemaCUDA::CUDATargetContext ; |
136 | (SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, |
137 | Decl *D); |
138 | () { S.CurCUDATargetCtx = SavedCtx; } |
139 | }; |
140 | |
141 | /// Gets the CUDA target for the current context. |
142 | CUDAFunctionTarget CurrentTarget() { |
143 | return IdentifyTarget(D: dyn_cast<FunctionDecl>(Val: SemaRef.CurContext)); |
144 | } |
145 | |
146 | static bool isImplicitHostDeviceFunction(const FunctionDecl *D); |
147 | |
148 | // CUDA function call preference. Must be ordered numerically from |
149 | // worst to best. |
150 | enum CUDAFunctionPreference { |
151 | CFP_Never, // Invalid caller/callee combination. |
152 | CFP_WrongSide, // Calls from host-device to host or device |
153 | // function that do not match current compilation |
154 | // mode. |
155 | CFP_HostDevice, // Any calls to host/device functions. |
156 | CFP_SameSide, // Calls from host-device to host or device |
157 | // function matching current compilation mode. |
158 | CFP_Native, // host-to-host or device-to-device calls. |
159 | }; |
160 | |
161 | /// Identifies relative preference of a given Caller/Callee |
162 | /// combination, based on their host/device attributes. |
163 | /// \param Caller function which needs address of \p Callee. |
164 | /// nullptr in case of global context. |
165 | /// \param Callee target function |
166 | /// |
167 | /// \returns preference value for particular Caller/Callee combination. |
168 | CUDAFunctionPreference IdentifyPreference(const FunctionDecl *Caller, |
169 | const FunctionDecl *Callee); |
170 | |
171 | /// Determines whether Caller may invoke Callee, based on their CUDA |
172 | /// host/device attributes. Returns false if the call is not allowed. |
173 | /// |
174 | /// Note: Will return true for CFP_WrongSide calls. These may appear in |
175 | /// semantically correct CUDA programs, but only if they're never codegen'ed. |
176 | bool IsAllowedCall(const FunctionDecl *Caller, const FunctionDecl *Callee) { |
177 | return IdentifyPreference(Caller, Callee) != CFP_Never; |
178 | } |
179 | |
180 | /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, |
181 | /// depending on FD and the current compilation settings. |
182 | void maybeAddHostDeviceAttrs(FunctionDecl *FD, const LookupResult &Previous); |
183 | |
184 | /// May add implicit CUDAConstantAttr attribute to VD, depending on VD |
185 | /// and current compilation settings. |
186 | void MaybeAddConstantAttr(VarDecl *VD); |
187 | |
188 | /// Check whether we're allowed to call Callee from the current context. |
189 | /// |
190 | /// - If the call is never allowed in a semantically-correct program |
191 | /// (CFP_Never), emits an error and returns false. |
192 | /// |
193 | /// - If the call is allowed in semantically-correct programs, but only if |
194 | /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to |
195 | /// be emitted if and when the caller is codegen'ed, and returns true. |
196 | /// |
197 | /// Will only create deferred diagnostics for a given SourceLocation once, |
198 | /// so you can safely call this multiple times without generating duplicate |
199 | /// deferred errors. |
200 | /// |
201 | /// - Otherwise, returns true without emitting any diagnostics. |
202 | bool CheckCall(SourceLocation Loc, FunctionDecl *Callee); |
203 | |
204 | void CheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture); |
205 | |
206 | /// Set __device__ or __host__ __device__ attributes on the given lambda |
207 | /// operator() method. |
208 | /// |
209 | /// CUDA lambdas by default is host device function unless it has explicit |
210 | /// host or device attribute. |
211 | void SetLambdaAttrs(CXXMethodDecl *Method); |
212 | |
213 | /// Record \p FD if it is a CUDA/HIP implicit host device function used on |
214 | /// device side in device compilation. |
215 | void RecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD); |
216 | |
217 | /// Finds a function in \p Matches with highest calling priority |
218 | /// from \p Caller context and erases all functions with lower |
219 | /// calling priority. |
220 | void EraseUnwantedMatches( |
221 | const FunctionDecl *Caller, |
222 | llvm::SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> |
223 | &Matches); |
224 | |
225 | /// Given a implicit special member, infer its CUDA target from the |
226 | /// calls it needs to make to underlying base/field special members. |
227 | /// \param ClassDecl the class for which the member is being created. |
228 | /// \param CSM the kind of special member. |
229 | /// \param MemberDecl the special member itself. |
230 | /// \param ConstRHS true if this is a copy operation with a const object on |
231 | /// its RHS. |
232 | /// \param Diagnose true if this call should emit diagnostics. |
233 | /// \return true if there was an error inferring. |
234 | /// The result of this call is implicit CUDA target attribute(s) attached to |
235 | /// the member declaration. |
236 | bool inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
237 | CXXSpecialMemberKind CSM, |
238 | CXXMethodDecl *MemberDecl, |
239 | bool ConstRHS, bool Diagnose); |
240 | |
241 | /// \return true if \p CD can be considered empty according to CUDA |
242 | /// (E.2.3.1 in CUDA 7.5 Programming guide). |
243 | bool isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD); |
244 | bool isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *CD); |
245 | |
246 | // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In |
247 | // case of error emits appropriate diagnostic and invalidates \p Var. |
248 | // |
249 | // \details CUDA allows only empty constructors as initializers for global |
250 | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
251 | // __shared__ variables whether they are local or not (they all are implicitly |
252 | // static in CUDA). One exception is that CUDA allows constant initializers |
253 | // for __constant__ and __device__ variables. |
254 | void checkAllowedInitializer(VarDecl *VD); |
255 | |
256 | /// Check whether NewFD is a valid overload for CUDA. Emits |
257 | /// diagnostics and invalidates NewFD if not. |
258 | void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous); |
259 | /// Copies target attributes from the template TD to the function FD. |
260 | void inheritTargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); |
261 | |
262 | /// Returns the name of the launch configuration function. This is the name |
263 | /// of the function that will be called to configure kernel call, with the |
264 | /// parameters specified via <<<>>>. |
265 | std::string getConfigureFuncName() const; |
266 | |
267 | private: |
268 | unsigned ForceHostDeviceDepth = 0; |
269 | |
270 | friend class ASTReader; |
271 | friend class ASTWriter; |
272 | }; |
273 | |
274 | } // namespace clang |
275 | |
276 | namespace llvm { |
277 | // Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its |
278 | // SourceLocation. |
279 | template <> struct DenseMapInfo<clang::SemaCUDA::FunctionDeclAndLoc> { |
280 | using FunctionDeclAndLoc = clang::SemaCUDA::FunctionDeclAndLoc; |
281 | using FDBaseInfo = |
282 | DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>; |
283 | |
284 | static FunctionDeclAndLoc getEmptyKey() { |
285 | return {.FD: FDBaseInfo::getEmptyKey(), .Loc: clang::SourceLocation()}; |
286 | } |
287 | |
288 | static FunctionDeclAndLoc getTombstoneKey() { |
289 | return {.FD: FDBaseInfo::getTombstoneKey(), .Loc: clang::SourceLocation()}; |
290 | } |
291 | |
292 | static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { |
293 | return hash_combine(args: FDBaseInfo::getHashValue(P: FDL.FD), |
294 | args: FDL.Loc.getHashValue()); |
295 | } |
296 | |
297 | static bool isEqual(const FunctionDeclAndLoc &LHS, |
298 | const FunctionDeclAndLoc &RHS) { |
299 | return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc; |
300 | } |
301 | }; |
302 | } // namespace llvm |
303 | |
304 | #endif // LLVM_CLANG_SEMA_SEMACUDA_H |
305 | |