1 | //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This contains code to emit OpenMP nodes as LLVM code. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGCleanup.h" |
14 | #include "CGOpenMPRuntime.h" |
15 | #include "CodeGenFunction.h" |
16 | #include "CodeGenModule.h" |
17 | #include "TargetInfo.h" |
18 | #include "clang/AST/ASTContext.h" |
19 | #include "clang/AST/Attr.h" |
20 | #include "clang/AST/DeclOpenMP.h" |
21 | #include "clang/AST/OpenMPClause.h" |
22 | #include "clang/AST/Stmt.h" |
23 | #include "clang/AST/StmtOpenMP.h" |
24 | #include "clang/AST/StmtVisitor.h" |
25 | #include "clang/Basic/OpenMPKinds.h" |
26 | #include "clang/Basic/PrettyStackTrace.h" |
27 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
28 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
29 | #include "llvm/IR/Constants.h" |
30 | #include "llvm/IR/Instructions.h" |
31 | #include "llvm/Support/AtomicOrdering.h" |
32 | using namespace clang; |
33 | using namespace CodeGen; |
34 | using namespace llvm::omp; |
35 | |
36 | static const VarDecl *getBaseDecl(const Expr *Ref); |
37 | |
38 | namespace { |
39 | /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
40 | /// for captured expressions. |
41 | class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
42 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
43 | for (const auto *C : S.clauses()) { |
44 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
45 | if (const auto *PreInit = |
46 | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
47 | for (const auto *I : PreInit->decls()) { |
48 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
49 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
50 | } else { |
51 | CodeGenFunction::AutoVarEmission Emission = |
52 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
53 | CGF.EmitAutoVarCleanups(Emission); |
54 | } |
55 | } |
56 | } |
57 | } |
58 | } |
59 | } |
60 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
61 | |
62 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
63 | return CGF.LambdaCaptureFields.lookup(VD) || |
64 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
65 | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
66 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
67 | } |
68 | |
69 | public: |
70 | OMPLexicalScope( |
71 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
72 | const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, |
73 | const bool EmitPreInitStmt = true) |
74 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
75 | InlinedShareds(CGF) { |
76 | if (EmitPreInitStmt) |
77 | emitPreInitStmt(CGF, S); |
78 | if (!CapturedRegion.hasValue()) |
79 | return; |
80 | assert(S.hasAssociatedStmt() && |
81 | "Expected associated statement for inlined directive." ); |
82 | const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); |
83 | for (const auto &C : CS->captures()) { |
84 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
85 | auto *VD = C.getCapturedVar(); |
86 | assert(VD == VD->getCanonicalDecl() && |
87 | "Canonical decl must be captured." ); |
88 | DeclRefExpr DRE( |
89 | CGF.getContext(), const_cast<VarDecl *>(VD), |
90 | isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && |
91 | InlinedShareds.isGlobalVarCaptured(VD)), |
92 | VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); |
93 | InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { |
94 | return CGF.EmitLValue(&DRE).getAddress(CGF); |
95 | }); |
96 | } |
97 | } |
98 | (void)InlinedShareds.Privatize(); |
99 | } |
100 | }; |
101 | |
102 | /// Lexical scope for OpenMP parallel construct, that handles correct codegen |
103 | /// for captured expressions. |
104 | class OMPParallelScope final : public OMPLexicalScope { |
105 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
106 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
107 | return !(isOpenMPTargetExecutionDirective(Kind) || |
108 | isOpenMPLoopBoundSharingDirective(Kind)) && |
109 | isOpenMPParallelDirective(Kind); |
110 | } |
111 | |
112 | public: |
113 | OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
114 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, |
115 | EmitPreInitStmt(S)) {} |
116 | }; |
117 | |
118 | /// Lexical scope for OpenMP teams construct, that handles correct codegen |
119 | /// for captured expressions. |
120 | class OMPTeamsScope final : public OMPLexicalScope { |
121 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
122 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
123 | return !isOpenMPTargetExecutionDirective(Kind) && |
124 | isOpenMPTeamsDirective(Kind); |
125 | } |
126 | |
127 | public: |
128 | OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
129 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, |
130 | EmitPreInitStmt(S)) {} |
131 | }; |
132 | |
133 | /// Private scope for OpenMP loop-based directives, that supports capturing |
134 | /// of used expression from loop statement. |
135 | class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
136 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { |
137 | const DeclStmt *PreInits; |
138 | CodeGenFunction::OMPMapVars PreCondVars; |
139 | if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { |
140 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
141 | for (const auto *E : LD->counters()) { |
142 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
143 | EmittedAsPrivate.insert(VD->getCanonicalDecl()); |
144 | (void)PreCondVars.setVarAddr( |
145 | CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); |
146 | } |
147 | // Mark private vars as undefs. |
148 | for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { |
149 | for (const Expr *IRef : C->varlists()) { |
150 | const auto *OrigVD = |
151 | cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
152 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
153 | (void)PreCondVars.setVarAddr( |
154 | CGF, OrigVD, |
155 | Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( |
156 | CGF.getContext().getPointerType( |
157 | OrigVD->getType().getNonReferenceType()))), |
158 | CGF.getContext().getDeclAlign(OrigVD))); |
159 | } |
160 | } |
161 | } |
162 | (void)PreCondVars.apply(CGF); |
163 | // Emit init, __range and __end variables for C++ range loops. |
164 | (void)OMPLoopBasedDirective::doForAllLoops( |
165 | LD->getInnermostCapturedStmt()->getCapturedStmt(), |
166 | /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), |
167 | [&CGF](unsigned Cnt, const Stmt *CurStmt) { |
168 | if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { |
169 | if (const Stmt *Init = CXXFor->getInit()) |
170 | CGF.EmitStmt(Init); |
171 | CGF.EmitStmt(CXXFor->getRangeStmt()); |
172 | CGF.EmitStmt(CXXFor->getEndStmt()); |
173 | } |
174 | return false; |
175 | }); |
176 | PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); |
177 | } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { |
178 | PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); |
179 | } else { |
180 | llvm_unreachable("Unknown loop-based directive kind." ); |
181 | } |
182 | if (PreInits) { |
183 | for (const auto *I : PreInits->decls()) |
184 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
185 | } |
186 | PreCondVars.restore(CGF); |
187 | } |
188 | |
189 | public: |
190 | OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) |
191 | : CodeGenFunction::RunCleanupsScope(CGF) { |
192 | emitPreInitStmt(CGF, S); |
193 | } |
194 | }; |
195 | |
196 | class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { |
197 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
198 | |
199 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
200 | return CGF.LambdaCaptureFields.lookup(VD) || |
201 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
202 | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
203 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
204 | } |
205 | |
206 | public: |
207 | OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
208 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
209 | InlinedShareds(CGF) { |
210 | for (const auto *C : S.clauses()) { |
211 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
212 | if (const auto *PreInit = |
213 | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
214 | for (const auto *I : PreInit->decls()) { |
215 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
216 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
217 | } else { |
218 | CodeGenFunction::AutoVarEmission Emission = |
219 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
220 | CGF.EmitAutoVarCleanups(Emission); |
221 | } |
222 | } |
223 | } |
224 | } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { |
225 | for (const Expr *E : UDP->varlists()) { |
226 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
227 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
228 | CGF.EmitVarDecl(*OED); |
229 | } |
230 | } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { |
231 | for (const Expr *E : UDP->varlists()) { |
232 | const Decl *D = getBaseDecl(E); |
233 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
234 | CGF.EmitVarDecl(*OED); |
235 | } |
236 | } |
237 | } |
238 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) |
239 | CGF.EmitOMPPrivateClause(S, InlinedShareds); |
240 | if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { |
241 | if (const Expr *E = TG->getReductionRef()) |
242 | CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); |
243 | } |
244 | // Temp copy arrays for inscan reductions should not be emitted as they are |
245 | // not used in simd only mode. |
246 | llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; |
247 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
248 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
249 | continue; |
250 | for (const Expr *E : C->copy_array_temps()) |
251 | CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); |
252 | } |
253 | const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); |
254 | while (CS) { |
255 | for (auto &C : CS->captures()) { |
256 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
257 | auto *VD = C.getCapturedVar(); |
258 | if (CopyArrayTemps.contains(VD)) |
259 | continue; |
260 | assert(VD == VD->getCanonicalDecl() && |
261 | "Canonical decl must be captured." ); |
262 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
263 | isCapturedVar(CGF, VD) || |
264 | (CGF.CapturedStmtInfo && |
265 | InlinedShareds.isGlobalVarCaptured(VD)), |
266 | VD->getType().getNonReferenceType(), VK_LValue, |
267 | C.getLocation()); |
268 | InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { |
269 | return CGF.EmitLValue(&DRE).getAddress(CGF); |
270 | }); |
271 | } |
272 | } |
273 | CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); |
274 | } |
275 | (void)InlinedShareds.Privatize(); |
276 | } |
277 | }; |
278 | |
279 | } // namespace |
280 | |
281 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
282 | const OMPExecutableDirective &S, |
283 | const RegionCodeGenTy &CodeGen); |
284 | |
285 | LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { |
286 | if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { |
287 | if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { |
288 | OrigVD = OrigVD->getCanonicalDecl(); |
289 | bool IsCaptured = |
290 | LambdaCaptureFields.lookup(OrigVD) || |
291 | (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || |
292 | (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); |
293 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, |
294 | OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); |
295 | return EmitLValue(&DRE); |
296 | } |
297 | } |
298 | return EmitLValue(E); |
299 | } |
300 | |
301 | llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { |
302 | ASTContext &C = getContext(); |
303 | llvm::Value *Size = nullptr; |
304 | auto SizeInChars = C.getTypeSizeInChars(Ty); |
305 | if (SizeInChars.isZero()) { |
306 | // getTypeSizeInChars() returns 0 for a VLA. |
307 | while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { |
308 | VlaSizePair VlaSize = getVLASize(VAT); |
309 | Ty = VlaSize.Type; |
310 | Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) |
311 | : VlaSize.NumElts; |
312 | } |
313 | SizeInChars = C.getTypeSizeInChars(Ty); |
314 | if (SizeInChars.isZero()) |
315 | return llvm::ConstantInt::get(SizeTy, /*V=*/0); |
316 | return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); |
317 | } |
318 | return CGM.getSize(SizeInChars); |
319 | } |
320 | |
321 | void CodeGenFunction::GenerateOpenMPCapturedVars( |
322 | const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
323 | const RecordDecl *RD = S.getCapturedRecordDecl(); |
324 | auto CurField = RD->field_begin(); |
325 | auto CurCap = S.captures().begin(); |
326 | for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), |
327 | E = S.capture_init_end(); |
328 | I != E; ++I, ++CurField, ++CurCap) { |
329 | if (CurField->hasCapturedVLAType()) { |
330 | const VariableArrayType *VAT = CurField->getCapturedVLAType(); |
331 | llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; |
332 | CapturedVars.push_back(Val); |
333 | } else if (CurCap->capturesThis()) { |
334 | CapturedVars.push_back(CXXThisValue); |
335 | } else if (CurCap->capturesVariableByCopy()) { |
336 | llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); |
337 | |
338 | // If the field is not a pointer, we need to save the actual value |
339 | // and load it as a void pointer. |
340 | if (!CurField->getType()->isAnyPointerType()) { |
341 | ASTContext &Ctx = getContext(); |
342 | Address DstAddr = CreateMemTemp( |
343 | Ctx.getUIntPtrType(), |
344 | Twine(CurCap->getCapturedVar()->getName(), ".casted" )); |
345 | LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); |
346 | |
347 | llvm::Value *SrcAddrVal = EmitScalarConversion( |
348 | DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), |
349 | Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); |
350 | LValue SrcLV = |
351 | MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); |
352 | |
353 | // Store the value using the source type pointer. |
354 | EmitStoreThroughLValue(RValue::get(CV), SrcLV); |
355 | |
356 | // Load the value using the destination type pointer. |
357 | CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); |
358 | } |
359 | CapturedVars.push_back(CV); |
360 | } else { |
361 | assert(CurCap->capturesVariable() && "Expected capture by reference." ); |
362 | CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); |
363 | } |
364 | } |
365 | } |
366 | |
367 | static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, |
368 | QualType DstType, StringRef Name, |
369 | LValue AddrLV) { |
370 | ASTContext &Ctx = CGF.getContext(); |
371 | |
372 | llvm::Value *CastedPtr = CGF.EmitScalarConversion( |
373 | AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), |
374 | Ctx.getPointerType(DstType), Loc); |
375 | Address TmpAddr = |
376 | CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) |
377 | .getAddress(CGF); |
378 | return TmpAddr; |
379 | } |
380 | |
381 | static QualType getCanonicalParamType(ASTContext &C, QualType T) { |
382 | if (T->isLValueReferenceType()) |
383 | return C.getLValueReferenceType( |
384 | getCanonicalParamType(C, T.getNonReferenceType()), |
385 | /*SpelledAsLValue=*/false); |
386 | if (T->isPointerType()) |
387 | return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); |
388 | if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { |
389 | if (const auto *VLA = dyn_cast<VariableArrayType>(A)) |
390 | return getCanonicalParamType(C, VLA->getElementType()); |
391 | if (!A->isVariablyModifiedType()) |
392 | return C.getCanonicalType(T); |
393 | } |
394 | return C.getCanonicalParamType(T); |
395 | } |
396 | |
397 | namespace { |
398 | /// Contains required data for proper outlined function codegen. |
399 | struct FunctionOptions { |
400 | /// Captured statement for which the function is generated. |
401 | const CapturedStmt *S = nullptr; |
402 | /// true if cast to/from UIntPtr is required for variables captured by |
403 | /// value. |
404 | const bool UIntPtrCastRequired = true; |
405 | /// true if only casted arguments must be registered as local args or VLA |
406 | /// sizes. |
407 | const bool RegisterCastedArgsOnly = false; |
408 | /// Name of the generated function. |
409 | const StringRef FunctionName; |
410 | /// Location of the non-debug version of the outlined function. |
411 | SourceLocation Loc; |
412 | explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, |
413 | bool RegisterCastedArgsOnly, StringRef FunctionName, |
414 | SourceLocation Loc) |
415 | : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
416 | RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
417 | FunctionName(FunctionName), Loc(Loc) {} |
418 | }; |
419 | } // namespace |
420 | |
421 | static llvm::Function *emitOutlinedFunctionPrologue( |
422 | CodeGenFunction &CGF, FunctionArgList &Args, |
423 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
424 | &LocalAddrs, |
425 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> |
426 | &VLASizes, |
427 | llvm::Value *&CXXThisValue, const FunctionOptions &FO) { |
428 | const CapturedDecl *CD = FO.S->getCapturedDecl(); |
429 | const RecordDecl *RD = FO.S->getCapturedRecordDecl(); |
430 | assert(CD->hasBody() && "missing CapturedDecl body" ); |
431 | |
432 | CXXThisValue = nullptr; |
433 | // Build the argument list. |
434 | CodeGenModule &CGM = CGF.CGM; |
435 | ASTContext &Ctx = CGM.getContext(); |
436 | FunctionArgList TargetArgs; |
437 | Args.append(CD->param_begin(), |
438 | std::next(CD->param_begin(), CD->getContextParamPosition())); |
439 | TargetArgs.append( |
440 | CD->param_begin(), |
441 | std::next(CD->param_begin(), CD->getContextParamPosition())); |
442 | auto I = FO.S->captures().begin(); |
443 | FunctionDecl *DebugFunctionDecl = nullptr; |
444 | if (!FO.UIntPtrCastRequired) { |
445 | FunctionProtoType::ExtProtoInfo EPI; |
446 | QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); |
447 | DebugFunctionDecl = FunctionDecl::Create( |
448 | Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), |
449 | SourceLocation(), DeclarationName(), FunctionTy, |
450 | Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, |
451 | /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); |
452 | } |
453 | for (const FieldDecl *FD : RD->fields()) { |
454 | QualType ArgType = FD->getType(); |
455 | IdentifierInfo *II = nullptr; |
456 | VarDecl *CapVar = nullptr; |
457 | |
458 | // If this is a capture by copy and the type is not a pointer, the outlined |
459 | // function argument type should be uintptr and the value properly casted to |
460 | // uintptr. This is necessary given that the runtime library is only able to |
461 | // deal with pointers. We can pass in the same way the VLA type sizes to the |
462 | // outlined function. |
463 | if (FO.UIntPtrCastRequired && |
464 | ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || |
465 | I->capturesVariableArrayType())) |
466 | ArgType = Ctx.getUIntPtrType(); |
467 | |
468 | if (I->capturesVariable() || I->capturesVariableByCopy()) { |
469 | CapVar = I->getCapturedVar(); |
470 | II = CapVar->getIdentifier(); |
471 | } else if (I->capturesThis()) { |
472 | II = &Ctx.Idents.get("this" ); |
473 | } else { |
474 | assert(I->capturesVariableArrayType()); |
475 | II = &Ctx.Idents.get("vla" ); |
476 | } |
477 | if (ArgType->isVariablyModifiedType()) |
478 | ArgType = getCanonicalParamType(Ctx, ArgType); |
479 | VarDecl *Arg; |
480 | if (DebugFunctionDecl && (CapVar || I->capturesThis())) { |
481 | Arg = ParmVarDecl::Create( |
482 | Ctx, DebugFunctionDecl, |
483 | CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), |
484 | CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, |
485 | /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); |
486 | } else { |
487 | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
488 | II, ArgType, ImplicitParamDecl::Other); |
489 | } |
490 | Args.emplace_back(Arg); |
491 | // Do not cast arguments if we emit function with non-original types. |
492 | TargetArgs.emplace_back( |
493 | FO.UIntPtrCastRequired |
494 | ? Arg |
495 | : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); |
496 | ++I; |
497 | } |
498 | Args.append( |
499 | std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
500 | CD->param_end()); |
501 | TargetArgs.append( |
502 | std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
503 | CD->param_end()); |
504 | |
505 | // Create the function declaration. |
506 | const CGFunctionInfo &FuncInfo = |
507 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); |
508 | llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); |
509 | |
510 | auto *F = |
511 | llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, |
512 | FO.FunctionName, &CGM.getModule()); |
513 | CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); |
514 | if (CD->isNothrow()) |
515 | F->setDoesNotThrow(); |
516 | F->setDoesNotRecurse(); |
517 | |
518 | // Generate the function. |
519 | CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, |
520 | FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), |
521 | FO.UIntPtrCastRequired ? FO.Loc |
522 | : CD->getBody()->getBeginLoc()); |
523 | unsigned Cnt = CD->getContextParamPosition(); |
524 | I = FO.S->captures().begin(); |
525 | for (const FieldDecl *FD : RD->fields()) { |
526 | // Do not map arguments if we emit function with non-original types. |
527 | Address LocalAddr(Address::invalid()); |
528 | if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { |
529 | LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], |
530 | TargetArgs[Cnt]); |
531 | } else { |
532 | LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); |
533 | } |
534 | // If we are capturing a pointer by copy we don't need to do anything, just |
535 | // use the value that we get from the arguments. |
536 | if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { |
537 | const VarDecl *CurVD = I->getCapturedVar(); |
538 | if (!FO.RegisterCastedArgsOnly) |
539 | LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); |
540 | ++Cnt; |
541 | ++I; |
542 | continue; |
543 | } |
544 | |
545 | LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), |
546 | AlignmentSource::Decl); |
547 | if (FD->hasCapturedVLAType()) { |
548 | if (FO.UIntPtrCastRequired) { |
549 | ArgLVal = CGF.MakeAddrLValue( |
550 | castValueFromUintptr(CGF, I->getLocation(), FD->getType(), |
551 | Args[Cnt]->getName(), ArgLVal), |
552 | FD->getType(), AlignmentSource::Decl); |
553 | } |
554 | llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
555 | const VariableArrayType *VAT = FD->getCapturedVLAType(); |
556 | VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); |
557 | } else if (I->capturesVariable()) { |
558 | const VarDecl *Var = I->getCapturedVar(); |
559 | QualType VarTy = Var->getType(); |
560 | Address ArgAddr = ArgLVal.getAddress(CGF); |
561 | if (ArgLVal.getType()->isLValueReferenceType()) { |
562 | ArgAddr = CGF.EmitLoadOfReference(ArgLVal); |
563 | } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { |
564 | assert(ArgLVal.getType()->isPointerType()); |
565 | ArgAddr = CGF.EmitLoadOfPointer( |
566 | ArgAddr, ArgLVal.getType()->castAs<PointerType>()); |
567 | } |
568 | if (!FO.RegisterCastedArgsOnly) { |
569 | LocalAddrs.insert( |
570 | {Args[Cnt], |
571 | {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); |
572 | } |
573 | } else if (I->capturesVariableByCopy()) { |
574 | assert(!FD->getType()->isAnyPointerType() && |
575 | "Not expecting a captured pointer." ); |
576 | const VarDecl *Var = I->getCapturedVar(); |
577 | LocalAddrs.insert({Args[Cnt], |
578 | {Var, FO.UIntPtrCastRequired |
579 | ? castValueFromUintptr( |
580 | CGF, I->getLocation(), FD->getType(), |
581 | Args[Cnt]->getName(), ArgLVal) |
582 | : ArgLVal.getAddress(CGF)}}); |
583 | } else { |
584 | // If 'this' is captured, load it into CXXThisValue. |
585 | assert(I->capturesThis()); |
586 | CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
587 | LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); |
588 | } |
589 | ++Cnt; |
590 | ++I; |
591 | } |
592 | |
593 | return F; |
594 | } |
595 | |
596 | llvm::Function * |
597 | CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
598 | SourceLocation Loc) { |
599 | assert( |
600 | CapturedStmtInfo && |
601 | "CapturedStmtInfo should be set when generating the captured function" ); |
602 | const CapturedDecl *CD = S.getCapturedDecl(); |
603 | // Build the argument list. |
604 | bool NeedWrapperFunction = |
605 | getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); |
606 | FunctionArgList Args; |
607 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; |
608 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; |
609 | SmallString<256> Buffer; |
610 | llvm::raw_svector_ostream Out(Buffer); |
611 | Out << CapturedStmtInfo->getHelperName(); |
612 | if (NeedWrapperFunction) |
613 | Out << "_debug__" ; |
614 | FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, |
615 | Out.str(), Loc); |
616 | llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, |
617 | VLASizes, CXXThisValue, FO); |
618 | CodeGenFunction::OMPPrivateScope LocalScope(*this); |
619 | for (const auto &LocalAddrPair : LocalAddrs) { |
620 | if (LocalAddrPair.second.first) { |
621 | LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { |
622 | return LocalAddrPair.second.second; |
623 | }); |
624 | } |
625 | } |
626 | (void)LocalScope.Privatize(); |
627 | for (const auto &VLASizePair : VLASizes) |
628 | VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; |
629 | PGO.assignRegionCounters(GlobalDecl(CD), F); |
630 | CapturedStmtInfo->EmitBody(*this, CD->getBody()); |
631 | (void)LocalScope.ForceCleanup(); |
632 | FinishFunction(CD->getBodyRBrace()); |
633 | if (!NeedWrapperFunction) |
634 | return F; |
635 | |
636 | FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, |
637 | /*RegisterCastedArgsOnly=*/true, |
638 | CapturedStmtInfo->getHelperName(), Loc); |
639 | CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); |
640 | WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; |
641 | Args.clear(); |
642 | LocalAddrs.clear(); |
643 | VLASizes.clear(); |
644 | llvm::Function *WrapperF = |
645 | emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, |
646 | WrapperCGF.CXXThisValue, WrapperFO); |
647 | llvm::SmallVector<llvm::Value *, 4> CallArgs; |
648 | auto *PI = F->arg_begin(); |
649 | for (const auto *Arg : Args) { |
650 | llvm::Value *CallArg; |
651 | auto I = LocalAddrs.find(Arg); |
652 | if (I != LocalAddrs.end()) { |
653 | LValue LV = WrapperCGF.MakeAddrLValue( |
654 | I->second.second, |
655 | I->second.first ? I->second.first->getType() : Arg->getType(), |
656 | AlignmentSource::Decl); |
657 | if (LV.getType()->isAnyComplexType()) |
658 | LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
659 | LV.getAddress(WrapperCGF), |
660 | PI->getType()->getPointerTo( |
661 | LV.getAddress(WrapperCGF).getAddressSpace()))); |
662 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
663 | } else { |
664 | auto EI = VLASizes.find(Arg); |
665 | if (EI != VLASizes.end()) { |
666 | CallArg = EI->second.second; |
667 | } else { |
668 | LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), |
669 | Arg->getType(), |
670 | AlignmentSource::Decl); |
671 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
672 | } |
673 | } |
674 | CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); |
675 | ++PI; |
676 | } |
677 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); |
678 | WrapperCGF.FinishFunction(); |
679 | return WrapperF; |
680 | } |
681 | |
682 | //===----------------------------------------------------------------------===// |
683 | // OpenMP Directive Emission |
684 | //===----------------------------------------------------------------------===// |
685 | void CodeGenFunction::EmitOMPAggregateAssign( |
686 | Address DestAddr, Address SrcAddr, QualType OriginalType, |
687 | const llvm::function_ref<void(Address, Address)> CopyGen) { |
688 | // Perform element-by-element initialization. |
689 | QualType ElementTy; |
690 | |
691 | // Drill down to the base element type on both arrays. |
692 | const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
693 | llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); |
694 | SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); |
695 | |
696 | llvm::Value *SrcBegin = SrcAddr.getPointer(); |
697 | llvm::Value *DestBegin = DestAddr.getPointer(); |
698 | // Cast from pointer to array type to pointer to single element. |
699 | llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); |
700 | // The basic structure here is a while-do loop. |
701 | llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body" ); |
702 | llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done" ); |
703 | llvm::Value *IsEmpty = |
704 | Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty" ); |
705 | Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
706 | |
707 | // Enter the loop body, making that address the current address. |
708 | llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); |
709 | EmitBlock(BodyBB); |
710 | |
711 | CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); |
712 | |
713 | llvm::PHINode *SrcElementPHI = |
714 | Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast" ); |
715 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
716 | Address SrcElementCurrent = |
717 | Address(SrcElementPHI, |
718 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
719 | |
720 | llvm::PHINode *DestElementPHI = |
721 | Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast" ); |
722 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
723 | Address DestElementCurrent = |
724 | Address(DestElementPHI, |
725 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
726 | |
727 | // Emit copy. |
728 | CopyGen(DestElementCurrent, SrcElementCurrent); |
729 | |
730 | // Shift the address forward by one element. |
731 | llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( |
732 | DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element" ); |
733 | llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( |
734 | SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element" ); |
735 | // Check whether we've reached the end. |
736 | llvm::Value *Done = |
737 | Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done" ); |
738 | Builder.CreateCondBr(Done, DoneBB, BodyBB); |
739 | DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); |
740 | SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); |
741 | |
742 | // Done. |
743 | EmitBlock(DoneBB, /*IsFinished=*/true); |
744 | } |
745 | |
746 | void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, |
747 | Address SrcAddr, const VarDecl *DestVD, |
748 | const VarDecl *SrcVD, const Expr *Copy) { |
749 | if (OriginalType->isArrayType()) { |
750 | const auto *BO = dyn_cast<BinaryOperator>(Copy); |
751 | if (BO && BO->getOpcode() == BO_Assign) { |
752 | // Perform simple memcpy for simple copying. |
753 | LValue Dest = MakeAddrLValue(DestAddr, OriginalType); |
754 | LValue Src = MakeAddrLValue(SrcAddr, OriginalType); |
755 | EmitAggregateAssign(Dest, Src, OriginalType); |
756 | } else { |
757 | // For arrays with complex element types perform element by element |
758 | // copying. |
759 | EmitOMPAggregateAssign( |
760 | DestAddr, SrcAddr, OriginalType, |
761 | [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { |
762 | // Working with the single array element, so have to remap |
763 | // destination and source variables to corresponding array |
764 | // elements. |
765 | CodeGenFunction::OMPPrivateScope Remap(*this); |
766 | Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); |
767 | Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); |
768 | (void)Remap.Privatize(); |
769 | EmitIgnoredExpr(Copy); |
770 | }); |
771 | } |
772 | } else { |
773 | // Remap pseudo source variable to private copy. |
774 | CodeGenFunction::OMPPrivateScope Remap(*this); |
775 | Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); |
776 | Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); |
777 | (void)Remap.Privatize(); |
778 | // Emit copying of the whole variable. |
779 | EmitIgnoredExpr(Copy); |
780 | } |
781 | } |
782 | |
783 | bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, |
784 | OMPPrivateScope &PrivateScope) { |
785 | if (!HaveInsertPoint()) |
786 | return false; |
787 | bool DeviceConstTarget = |
788 | getLangOpts().OpenMPIsDevice && |
789 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
790 | bool FirstprivateIsLastprivate = false; |
791 | llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; |
792 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
793 | for (const auto *D : C->varlists()) |
794 | Lastprivates.try_emplace( |
795 | cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), |
796 | C->getKind()); |
797 | } |
798 | llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; |
799 | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
800 | getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); |
801 | // Force emission of the firstprivate copy if the directive does not emit |
802 | // outlined function, like omp for, omp simd, omp distribute etc. |
803 | bool MustEmitFirstprivateCopy = |
804 | CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; |
805 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
806 | const auto *IRef = C->varlist_begin(); |
807 | const auto *InitsRef = C->inits().begin(); |
808 | for (const Expr *IInit : C->private_copies()) { |
809 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
810 | bool ThisFirstprivateIsLastprivate = |
811 | Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; |
812 | const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); |
813 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
814 | if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && |
815 | !FD->getType()->isReferenceType() && |
816 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
817 | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
818 | ++IRef; |
819 | ++InitsRef; |
820 | continue; |
821 | } |
822 | // Do not emit copy for firstprivate constant variables in target regions, |
823 | // captured by reference. |
824 | if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && |
825 | FD && FD->getType()->isReferenceType() && |
826 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
827 | (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, |
828 | OrigVD); |
829 | ++IRef; |
830 | ++InitsRef; |
831 | continue; |
832 | } |
833 | FirstprivateIsLastprivate = |
834 | FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; |
835 | if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { |
836 | const auto *VDInit = |
837 | cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); |
838 | bool IsRegistered; |
839 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
840 | /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, |
841 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
842 | LValue OriginalLVal; |
843 | if (!FD) { |
844 | // Check if the firstprivate variable is just a constant value. |
845 | ConstantEmission CE = tryEmitAsConstant(&DRE); |
846 | if (CE && !CE.isReference()) { |
847 | // Constant value, no need to create a copy. |
848 | ++IRef; |
849 | ++InitsRef; |
850 | continue; |
851 | } |
852 | if (CE && CE.isReference()) { |
853 | OriginalLVal = CE.getReferenceLValue(*this, &DRE); |
854 | } else { |
855 | assert(!CE && "Expected non-constant firstprivate." ); |
856 | OriginalLVal = EmitLValue(&DRE); |
857 | } |
858 | } else { |
859 | OriginalLVal = EmitLValue(&DRE); |
860 | } |
861 | QualType Type = VD->getType(); |
862 | if (Type->isArrayType()) { |
863 | // Emit VarDecl with copy init for arrays. |
864 | // Get the address of the original variable captured in current |
865 | // captured region. |
866 | IsRegistered = PrivateScope.addPrivate( |
867 | OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { |
868 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
869 | const Expr *Init = VD->getInit(); |
870 | if (!isa<CXXConstructExpr>(Init) || |
871 | isTrivialInitializer(Init)) { |
872 | // Perform simple memcpy. |
873 | LValue Dest = |
874 | MakeAddrLValue(Emission.getAllocatedAddress(), Type); |
875 | EmitAggregateAssign(Dest, OriginalLVal, Type); |
876 | } else { |
877 | EmitOMPAggregateAssign( |
878 | Emission.getAllocatedAddress(), |
879 | OriginalLVal.getAddress(*this), Type, |
880 | [this, VDInit, Init](Address DestElement, |
881 | Address SrcElement) { |
882 | // Clean up any temporaries needed by the |
883 | // initialization. |
884 | RunCleanupsScope InitScope(*this); |
885 | // Emit initialization for single element. |
886 | setAddrOfLocalVar(VDInit, SrcElement); |
887 | EmitAnyExprToMem(Init, DestElement, |
888 | Init->getType().getQualifiers(), |
889 | /*IsInitializer*/ false); |
890 | LocalDeclMap.erase(VDInit); |
891 | }); |
892 | } |
893 | EmitAutoVarCleanups(Emission); |
894 | return Emission.getAllocatedAddress(); |
895 | }); |
896 | } else { |
897 | Address OriginalAddr = OriginalLVal.getAddress(*this); |
898 | IsRegistered = |
899 | PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, |
900 | ThisFirstprivateIsLastprivate, |
901 | OrigVD, &Lastprivates, IRef]() { |
902 | // Emit private VarDecl with copy init. |
903 | // Remap temp VDInit variable to the address of the original |
904 | // variable (for proper handling of captured global variables). |
905 | setAddrOfLocalVar(VDInit, OriginalAddr); |
906 | EmitDecl(*VD); |
907 | LocalDeclMap.erase(VDInit); |
908 | if (ThisFirstprivateIsLastprivate && |
909 | Lastprivates[OrigVD->getCanonicalDecl()] == |
910 | OMPC_LASTPRIVATE_conditional) { |
911 | // Create/init special variable for lastprivate conditionals. |
912 | Address VDAddr = |
913 | CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
914 | *this, OrigVD); |
915 | llvm::Value *V = EmitLoadOfScalar( |
916 | MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), |
917 | AlignmentSource::Decl), |
918 | (*IRef)->getExprLoc()); |
919 | EmitStoreOfScalar(V, |
920 | MakeAddrLValue(VDAddr, (*IRef)->getType(), |
921 | AlignmentSource::Decl)); |
922 | LocalDeclMap.erase(VD); |
923 | setAddrOfLocalVar(VD, VDAddr); |
924 | return VDAddr; |
925 | } |
926 | return GetAddrOfLocalVar(VD); |
927 | }); |
928 | } |
929 | assert(IsRegistered && |
930 | "firstprivate var already registered as private" ); |
931 | // Silence the warning about unused variable. |
932 | (void)IsRegistered; |
933 | } |
934 | ++IRef; |
935 | ++InitsRef; |
936 | } |
937 | } |
938 | return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); |
939 | } |
940 | |
941 | void CodeGenFunction::EmitOMPPrivateClause( |
942 | const OMPExecutableDirective &D, |
943 | CodeGenFunction::OMPPrivateScope &PrivateScope) { |
944 | if (!HaveInsertPoint()) |
945 | return; |
946 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
947 | for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { |
948 | auto IRef = C->varlist_begin(); |
949 | for (const Expr *IInit : C->private_copies()) { |
950 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
951 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
952 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
953 | bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { |
954 | // Emit private VarDecl with copy init. |
955 | EmitDecl(*VD); |
956 | return GetAddrOfLocalVar(VD); |
957 | }); |
958 | assert(IsRegistered && "private var already registered as private" ); |
959 | // Silence the warning about unused variable. |
960 | (void)IsRegistered; |
961 | } |
962 | ++IRef; |
963 | } |
964 | } |
965 | } |
966 | |
967 | bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { |
968 | if (!HaveInsertPoint()) |
969 | return false; |
970 | // threadprivate_var1 = master_threadprivate_var1; |
971 | // operator=(threadprivate_var2, master_threadprivate_var2); |
972 | // ... |
973 | // __kmpc_barrier(&loc, global_tid); |
974 | llvm::DenseSet<const VarDecl *> CopiedVars; |
975 | llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; |
976 | for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { |
977 | auto IRef = C->varlist_begin(); |
978 | auto ISrcRef = C->source_exprs().begin(); |
979 | auto IDestRef = C->destination_exprs().begin(); |
980 | for (const Expr *AssignOp : C->assignment_ops()) { |
981 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
982 | QualType Type = VD->getType(); |
983 | if (CopiedVars.insert(VD->getCanonicalDecl()).second) { |
984 | // Get the address of the master variable. If we are emitting code with |
985 | // TLS support, the address is passed from the master as field in the |
986 | // captured declaration. |
987 | Address MasterAddr = Address::invalid(); |
988 | if (getLangOpts().OpenMPUseTLS && |
989 | getContext().getTargetInfo().isTLSSupported()) { |
990 | assert(CapturedStmtInfo->lookup(VD) && |
991 | "Copyin threadprivates should have been captured!" ); |
992 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, |
993 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
994 | MasterAddr = EmitLValue(&DRE).getAddress(*this); |
995 | LocalDeclMap.erase(VD); |
996 | } else { |
997 | MasterAddr = |
998 | Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) |
999 | : CGM.GetAddrOfGlobal(VD), |
1000 | getContext().getDeclAlign(VD)); |
1001 | } |
1002 | // Get the address of the threadprivate variable. |
1003 | Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); |
1004 | if (CopiedVars.size() == 1) { |
1005 | // At first check if current thread is a master thread. If it is, no |
1006 | // need to copy data. |
1007 | CopyBegin = createBasicBlock("copyin.not.master" ); |
1008 | CopyEnd = createBasicBlock("copyin.not.master.end" ); |
1009 | // TODO: Avoid ptrtoint conversion. |
1010 | auto *MasterAddrInt = |
1011 | Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); |
1012 | auto *PrivateAddrInt = |
1013 | Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); |
1014 | Builder.CreateCondBr( |
1015 | Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, |
1016 | CopyEnd); |
1017 | EmitBlock(CopyBegin); |
1018 | } |
1019 | const auto *SrcVD = |
1020 | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
1021 | const auto *DestVD = |
1022 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1023 | EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); |
1024 | } |
1025 | ++IRef; |
1026 | ++ISrcRef; |
1027 | ++IDestRef; |
1028 | } |
1029 | } |
1030 | if (CopyEnd) { |
1031 | // Exit out of copying procedure for non-master thread. |
1032 | EmitBlock(CopyEnd, /*IsFinished=*/true); |
1033 | return true; |
1034 | } |
1035 | return false; |
1036 | } |
1037 | |
1038 | bool CodeGenFunction::EmitOMPLastprivateClauseInit( |
1039 | const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { |
1040 | if (!HaveInsertPoint()) |
1041 | return false; |
1042 | bool HasAtLeastOneLastprivate = false; |
1043 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
1044 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
1045 | const auto *LoopDirective = cast<OMPLoopDirective>(&D); |
1046 | for (const Expr *C : LoopDirective->counters()) { |
1047 | SIMDLCVs.insert( |
1048 | cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); |
1049 | } |
1050 | } |
1051 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1052 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1053 | HasAtLeastOneLastprivate = true; |
1054 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
1055 | !getLangOpts().OpenMPSimd) |
1056 | break; |
1057 | const auto *IRef = C->varlist_begin(); |
1058 | const auto *IDestRef = C->destination_exprs().begin(); |
1059 | for (const Expr *IInit : C->private_copies()) { |
1060 | // Keep the address of the original variable for future update at the end |
1061 | // of the loop. |
1062 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1063 | // Taskloops do not require additional initialization, it is done in |
1064 | // runtime support library. |
1065 | if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { |
1066 | const auto *DestVD = |
1067 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1068 | PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { |
1069 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1070 | /*RefersToEnclosingVariableOrCapture=*/ |
1071 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
1072 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1073 | return EmitLValue(&DRE).getAddress(*this); |
1074 | }); |
1075 | // Check if the variable is also a firstprivate: in this case IInit is |
1076 | // not generated. Initialization of this variable will happen in codegen |
1077 | // for 'firstprivate' clause. |
1078 | if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { |
1079 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
1080 | bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, |
1081 | OrigVD]() { |
1082 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) { |
1083 | Address VDAddr = |
1084 | CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, |
1085 | OrigVD); |
1086 | setAddrOfLocalVar(VD, VDAddr); |
1087 | return VDAddr; |
1088 | } |
1089 | // Emit private VarDecl with copy init. |
1090 | EmitDecl(*VD); |
1091 | return GetAddrOfLocalVar(VD); |
1092 | }); |
1093 | assert(IsRegistered && |
1094 | "lastprivate var already registered as private" ); |
1095 | (void)IsRegistered; |
1096 | } |
1097 | } |
1098 | ++IRef; |
1099 | ++IDestRef; |
1100 | } |
1101 | } |
1102 | return HasAtLeastOneLastprivate; |
1103 | } |
1104 | |
1105 | void CodeGenFunction::EmitOMPLastprivateClauseFinal( |
1106 | const OMPExecutableDirective &D, bool NoFinals, |
1107 | llvm::Value *IsLastIterCond) { |
1108 | if (!HaveInsertPoint()) |
1109 | return; |
1110 | // Emit following code: |
1111 | // if (<IsLastIterCond>) { |
1112 | // orig_var1 = private_orig_var1; |
1113 | // ... |
1114 | // orig_varn = private_orig_varn; |
1115 | // } |
1116 | llvm::BasicBlock *ThenBB = nullptr; |
1117 | llvm::BasicBlock *DoneBB = nullptr; |
1118 | if (IsLastIterCond) { |
1119 | // Emit implicit barrier if at least one lastprivate conditional is found |
1120 | // and this is not a simd mode. |
1121 | if (!getLangOpts().OpenMPSimd && |
1122 | llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), |
1123 | [](const OMPLastprivateClause *C) { |
1124 | return C->getKind() == OMPC_LASTPRIVATE_conditional; |
1125 | })) { |
1126 | CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), |
1127 | OMPD_unknown, |
1128 | /*EmitChecks=*/false, |
1129 | /*ForceSimpleCall=*/true); |
1130 | } |
1131 | ThenBB = createBasicBlock(".omp.lastprivate.then" ); |
1132 | DoneBB = createBasicBlock(".omp.lastprivate.done" ); |
1133 | Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); |
1134 | EmitBlock(ThenBB); |
1135 | } |
1136 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1137 | llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; |
1138 | if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { |
1139 | auto IC = LoopDirective->counters().begin(); |
1140 | for (const Expr *F : LoopDirective->finals()) { |
1141 | const auto *D = |
1142 | cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); |
1143 | if (NoFinals) |
1144 | AlreadyEmittedVars.insert(D); |
1145 | else |
1146 | LoopCountersAndUpdates[D] = F; |
1147 | ++IC; |
1148 | } |
1149 | } |
1150 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1151 | auto IRef = C->varlist_begin(); |
1152 | auto ISrcRef = C->source_exprs().begin(); |
1153 | auto IDestRef = C->destination_exprs().begin(); |
1154 | for (const Expr *AssignOp : C->assignment_ops()) { |
1155 | const auto *PrivateVD = |
1156 | cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1157 | QualType Type = PrivateVD->getType(); |
1158 | const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); |
1159 | if (AlreadyEmittedVars.insert(CanonicalVD).second) { |
1160 | // If lastprivate variable is a loop control variable for loop-based |
1161 | // directive, update its value before copyin back to original |
1162 | // variable. |
1163 | if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) |
1164 | EmitIgnoredExpr(FinalExpr); |
1165 | const auto *SrcVD = |
1166 | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
1167 | const auto *DestVD = |
1168 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1169 | // Get the address of the private variable. |
1170 | Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); |
1171 | if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) |
1172 | PrivateAddr = |
1173 | Address(Builder.CreateLoad(PrivateAddr), |
1174 | CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); |
1175 | // Store the last value to the private copy in the last iteration. |
1176 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) |
1177 | CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( |
1178 | *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, |
1179 | (*IRef)->getExprLoc()); |
1180 | // Get the address of the original variable. |
1181 | Address OriginalAddr = GetAddrOfLocalVar(DestVD); |
1182 | EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); |
1183 | } |
1184 | ++IRef; |
1185 | ++ISrcRef; |
1186 | ++IDestRef; |
1187 | } |
1188 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1189 | EmitIgnoredExpr(PostUpdate); |
1190 | } |
1191 | if (IsLastIterCond) |
1192 | EmitBlock(DoneBB, /*IsFinished=*/true); |
1193 | } |
1194 | |
1195 | void CodeGenFunction::EmitOMPReductionClauseInit( |
1196 | const OMPExecutableDirective &D, |
1197 | CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1198 | if (!HaveInsertPoint()) |
1199 | return; |
1200 | SmallVector<const Expr *, 4> Shareds; |
1201 | SmallVector<const Expr *, 4> Privates; |
1202 | SmallVector<const Expr *, 4> ReductionOps; |
1203 | SmallVector<const Expr *, 4> LHSs; |
1204 | SmallVector<const Expr *, 4> RHSs; |
1205 | OMPTaskDataTy Data; |
1206 | SmallVector<const Expr *, 4> TaskLHSs; |
1207 | SmallVector<const Expr *, 4> TaskRHSs; |
1208 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1209 | if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) |
1210 | continue; |
1211 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
1212 | Privates.append(C->privates().begin(), C->privates().end()); |
1213 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1214 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1215 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1216 | if (C->getModifier() == OMPC_REDUCTION_task) { |
1217 | Data.ReductionVars.append(C->privates().begin(), C->privates().end()); |
1218 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
1219 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
1220 | Data.ReductionOps.append(C->reduction_ops().begin(), |
1221 | C->reduction_ops().end()); |
1222 | TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1223 | TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1224 | } |
1225 | } |
1226 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
1227 | unsigned Count = 0; |
1228 | auto *ILHS = LHSs.begin(); |
1229 | auto *IRHS = RHSs.begin(); |
1230 | auto *IPriv = Privates.begin(); |
1231 | for (const Expr *IRef : Shareds) { |
1232 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); |
1233 | // Emit private VarDecl with reduction init. |
1234 | RedCG.emitSharedOrigLValue(*this, Count); |
1235 | RedCG.emitAggregateType(*this, Count); |
1236 | AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); |
1237 | RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), |
1238 | RedCG.getSharedLValue(Count), |
1239 | [&Emission](CodeGenFunction &CGF) { |
1240 | CGF.EmitAutoVarInit(Emission); |
1241 | return true; |
1242 | }); |
1243 | EmitAutoVarCleanups(Emission); |
1244 | Address BaseAddr = RedCG.adjustPrivateAddress( |
1245 | *this, Count, Emission.getAllocatedAddress()); |
1246 | bool IsRegistered = PrivateScope.addPrivate( |
1247 | RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); |
1248 | assert(IsRegistered && "private var already registered as private" ); |
1249 | // Silence the warning about unused variable. |
1250 | (void)IsRegistered; |
1251 | |
1252 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
1253 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
1254 | QualType Type = PrivateVD->getType(); |
1255 | bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); |
1256 | if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { |
1257 | // Store the address of the original variable associated with the LHS |
1258 | // implicit variable. |
1259 | PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { |
1260 | return RedCG.getSharedLValue(Count).getAddress(*this); |
1261 | }); |
1262 | PrivateScope.addPrivate( |
1263 | RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); |
1264 | } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || |
1265 | isa<ArraySubscriptExpr>(IRef)) { |
1266 | // Store the address of the original variable associated with the LHS |
1267 | // implicit variable. |
1268 | PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { |
1269 | return RedCG.getSharedLValue(Count).getAddress(*this); |
1270 | }); |
1271 | PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { |
1272 | return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), |
1273 | ConvertTypeForMem(RHSVD->getType()), |
1274 | "rhs.begin" ); |
1275 | }); |
1276 | } else { |
1277 | QualType Type = PrivateVD->getType(); |
1278 | bool IsArray = getContext().getAsArrayType(Type) != nullptr; |
1279 | Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); |
1280 | // Store the address of the original variable associated with the LHS |
1281 | // implicit variable. |
1282 | if (IsArray) { |
1283 | OriginalAddr = Builder.CreateElementBitCast( |
1284 | OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin" ); |
1285 | } |
1286 | PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); |
1287 | PrivateScope.addPrivate( |
1288 | RHSVD, [this, PrivateVD, RHSVD, IsArray]() { |
1289 | return IsArray |
1290 | ? Builder.CreateElementBitCast( |
1291 | GetAddrOfLocalVar(PrivateVD), |
1292 | ConvertTypeForMem(RHSVD->getType()), "rhs.begin" ) |
1293 | : GetAddrOfLocalVar(PrivateVD); |
1294 | }); |
1295 | } |
1296 | ++ILHS; |
1297 | ++IRHS; |
1298 | ++IPriv; |
1299 | ++Count; |
1300 | } |
1301 | if (!Data.ReductionVars.empty()) { |
1302 | Data.IsReductionWithTaskMod = true; |
1303 | Data.IsWorksharingReduction = |
1304 | isOpenMPWorksharingDirective(D.getDirectiveKind()); |
1305 | llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( |
1306 | *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); |
1307 | const Expr *TaskRedRef = nullptr; |
1308 | switch (D.getDirectiveKind()) { |
1309 | case OMPD_parallel: |
1310 | TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); |
1311 | break; |
1312 | case OMPD_for: |
1313 | TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); |
1314 | break; |
1315 | case OMPD_sections: |
1316 | TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); |
1317 | break; |
1318 | case OMPD_parallel_for: |
1319 | TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); |
1320 | break; |
1321 | case OMPD_parallel_master: |
1322 | TaskRedRef = |
1323 | cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); |
1324 | break; |
1325 | case OMPD_parallel_sections: |
1326 | TaskRedRef = |
1327 | cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); |
1328 | break; |
1329 | case OMPD_target_parallel: |
1330 | TaskRedRef = |
1331 | cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); |
1332 | break; |
1333 | case OMPD_target_parallel_for: |
1334 | TaskRedRef = |
1335 | cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); |
1336 | break; |
1337 | case OMPD_distribute_parallel_for: |
1338 | TaskRedRef = |
1339 | cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); |
1340 | break; |
1341 | case OMPD_teams_distribute_parallel_for: |
1342 | TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) |
1343 | .getTaskReductionRefExpr(); |
1344 | break; |
1345 | case OMPD_target_teams_distribute_parallel_for: |
1346 | TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) |
1347 | .getTaskReductionRefExpr(); |
1348 | break; |
1349 | case OMPD_simd: |
1350 | case OMPD_for_simd: |
1351 | case OMPD_section: |
1352 | case OMPD_single: |
1353 | case OMPD_master: |
1354 | case OMPD_critical: |
1355 | case OMPD_parallel_for_simd: |
1356 | case OMPD_task: |
1357 | case OMPD_taskyield: |
1358 | case OMPD_barrier: |
1359 | case OMPD_taskwait: |
1360 | case OMPD_taskgroup: |
1361 | case OMPD_flush: |
1362 | case OMPD_depobj: |
1363 | case OMPD_scan: |
1364 | case OMPD_ordered: |
1365 | case OMPD_atomic: |
1366 | case OMPD_teams: |
1367 | case OMPD_target: |
1368 | case OMPD_cancellation_point: |
1369 | case OMPD_cancel: |
1370 | case OMPD_target_data: |
1371 | case OMPD_target_enter_data: |
1372 | case OMPD_target_exit_data: |
1373 | case OMPD_taskloop: |
1374 | case OMPD_taskloop_simd: |
1375 | case OMPD_master_taskloop: |
1376 | case OMPD_master_taskloop_simd: |
1377 | case OMPD_parallel_master_taskloop: |
1378 | case OMPD_parallel_master_taskloop_simd: |
1379 | case OMPD_distribute: |
1380 | case OMPD_target_update: |
1381 | case OMPD_distribute_parallel_for_simd: |
1382 | case OMPD_distribute_simd: |
1383 | case OMPD_target_parallel_for_simd: |
1384 | case OMPD_target_simd: |
1385 | case OMPD_teams_distribute: |
1386 | case OMPD_teams_distribute_simd: |
1387 | case OMPD_teams_distribute_parallel_for_simd: |
1388 | case OMPD_target_teams: |
1389 | case OMPD_target_teams_distribute: |
1390 | case OMPD_target_teams_distribute_parallel_for_simd: |
1391 | case OMPD_target_teams_distribute_simd: |
1392 | case OMPD_declare_target: |
1393 | case OMPD_end_declare_target: |
1394 | case OMPD_threadprivate: |
1395 | case OMPD_allocate: |
1396 | case OMPD_declare_reduction: |
1397 | case OMPD_declare_mapper: |
1398 | case OMPD_declare_simd: |
1399 | case OMPD_requires: |
1400 | case OMPD_declare_variant: |
1401 | case OMPD_begin_declare_variant: |
1402 | case OMPD_end_declare_variant: |
1403 | case OMPD_unknown: |
1404 | default: |
1405 | llvm_unreachable("Enexpected directive with task reductions." ); |
1406 | } |
1407 | |
1408 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); |
1409 | EmitVarDecl(*VD); |
1410 | EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), |
1411 | /*Volatile=*/false, TaskRedRef->getType()); |
1412 | } |
1413 | } |
1414 | |
1415 | void CodeGenFunction::EmitOMPReductionClauseFinal( |
1416 | const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { |
1417 | if (!HaveInsertPoint()) |
1418 | return; |
1419 | llvm::SmallVector<const Expr *, 8> Privates; |
1420 | llvm::SmallVector<const Expr *, 8> LHSExprs; |
1421 | llvm::SmallVector<const Expr *, 8> RHSExprs; |
1422 | llvm::SmallVector<const Expr *, 8> ReductionOps; |
1423 | bool HasAtLeastOneReduction = false; |
1424 | bool IsReductionWithTaskMod = false; |
1425 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1426 | // Do not emit for inscan reductions. |
1427 | if (C->getModifier() == OMPC_REDUCTION_inscan) |
1428 | continue; |
1429 | HasAtLeastOneReduction = true; |
1430 | Privates.append(C->privates().begin(), C->privates().end()); |
1431 | LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1432 | RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1433 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1434 | IsReductionWithTaskMod = |
1435 | IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; |
1436 | } |
1437 | if (HasAtLeastOneReduction) { |
1438 | if (IsReductionWithTaskMod) { |
1439 | CGM.getOpenMPRuntime().emitTaskReductionFini( |
1440 | *this, D.getBeginLoc(), |
1441 | isOpenMPWorksharingDirective(D.getDirectiveKind())); |
1442 | } |
1443 | bool WithNowait = D.getSingleClause<OMPNowaitClause>() || |
1444 | isOpenMPParallelDirective(D.getDirectiveKind()) || |
1445 | ReductionKind == OMPD_simd; |
1446 | bool SimpleReduction = ReductionKind == OMPD_simd; |
1447 | // Emit nowait reduction if nowait clause is present or directive is a |
1448 | // parallel directive (it always has implicit barrier). |
1449 | CGM.getOpenMPRuntime().emitReduction( |
1450 | *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, |
1451 | {WithNowait, SimpleReduction, ReductionKind}); |
1452 | } |
1453 | } |
1454 | |
1455 | static void emitPostUpdateForReductionClause( |
1456 | CodeGenFunction &CGF, const OMPExecutableDirective &D, |
1457 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1458 | if (!CGF.HaveInsertPoint()) |
1459 | return; |
1460 | llvm::BasicBlock *DoneBB = nullptr; |
1461 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1462 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) { |
1463 | if (!DoneBB) { |
1464 | if (llvm::Value *Cond = CondGen(CGF)) { |
1465 | // If the first post-update expression is found, emit conditional |
1466 | // block if it was requested. |
1467 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu" ); |
1468 | DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done" ); |
1469 | CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
1470 | CGF.EmitBlock(ThenBB); |
1471 | } |
1472 | } |
1473 | CGF.EmitIgnoredExpr(PostUpdate); |
1474 | } |
1475 | } |
1476 | if (DoneBB) |
1477 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
1478 | } |
1479 | |
1480 | namespace { |
1481 | /// Codegen lambda for appending distribute lower and upper bounds to outlined |
1482 | /// parallel function. This is necessary for combined constructs such as |
1483 | /// 'distribute parallel for' |
1484 | typedef llvm::function_ref<void(CodeGenFunction &, |
1485 | const OMPExecutableDirective &, |
1486 | llvm::SmallVectorImpl<llvm::Value *> &)> |
1487 | CodeGenBoundParametersTy; |
1488 | } // anonymous namespace |
1489 | |
1490 | static void |
1491 | checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, |
1492 | const OMPExecutableDirective &S) { |
1493 | if (CGF.getLangOpts().OpenMP < 50) |
1494 | return; |
1495 | llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; |
1496 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
1497 | for (const Expr *Ref : C->varlists()) { |
1498 | if (!Ref->getType()->isScalarType()) |
1499 | continue; |
1500 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1501 | if (!DRE) |
1502 | continue; |
1503 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1504 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1505 | } |
1506 | } |
1507 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
1508 | for (const Expr *Ref : C->varlists()) { |
1509 | if (!Ref->getType()->isScalarType()) |
1510 | continue; |
1511 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1512 | if (!DRE) |
1513 | continue; |
1514 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1515 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1516 | } |
1517 | } |
1518 | for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { |
1519 | for (const Expr *Ref : C->varlists()) { |
1520 | if (!Ref->getType()->isScalarType()) |
1521 | continue; |
1522 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1523 | if (!DRE) |
1524 | continue; |
1525 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1526 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1527 | } |
1528 | } |
1529 | // Privates should ne analyzed since they are not captured at all. |
1530 | // Task reductions may be skipped - tasks are ignored. |
1531 | // Firstprivates do not return value but may be passed by reference - no need |
1532 | // to check for updated lastprivate conditional. |
1533 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
1534 | for (const Expr *Ref : C->varlists()) { |
1535 | if (!Ref->getType()->isScalarType()) |
1536 | continue; |
1537 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1538 | if (!DRE) |
1539 | continue; |
1540 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1541 | } |
1542 | } |
1543 | CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( |
1544 | CGF, S, PrivateDecls); |
1545 | } |
1546 | |
1547 | static void emitCommonOMPParallelDirective( |
1548 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
1549 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1550 | const CodeGenBoundParametersTy &CodeGenBoundParameters) { |
1551 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1552 | llvm::Function *OutlinedFn = |
1553 | CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( |
1554 | S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); |
1555 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { |
1556 | CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
1557 | llvm::Value *NumThreads = |
1558 | CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1559 | /*IgnoreResultAssign=*/true); |
1560 | CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
1561 | CGF, NumThreads, NumThreadsClause->getBeginLoc()); |
1562 | } |
1563 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { |
1564 | CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); |
1565 | CGF.CGM.getOpenMPRuntime().emitProcBindClause( |
1566 | CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); |
1567 | } |
1568 | const Expr *IfCond = nullptr; |
1569 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
1570 | if (C->getNameModifier() == OMPD_unknown || |
1571 | C->getNameModifier() == OMPD_parallel) { |
1572 | IfCond = C->getCondition(); |
1573 | break; |
1574 | } |
1575 | } |
1576 | |
1577 | OMPParallelScope Scope(CGF, S); |
1578 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
1579 | // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1580 | // lower and upper bounds with the pragma 'for' chunking mechanism. |
1581 | // The following lambda takes care of appending the lower and upper bound |
1582 | // parameters when necessary |
1583 | CodeGenBoundParameters(CGF, S, CapturedVars); |
1584 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
1585 | CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, |
1586 | CapturedVars, IfCond); |
1587 | } |
1588 | |
1589 | static bool isAllocatableDecl(const VarDecl *VD) { |
1590 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1591 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
1592 | return false; |
1593 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1594 | // Use the default allocation. |
1595 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
1596 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
1597 | !AA->getAllocator()); |
1598 | } |
1599 | |
1600 | static void emitEmptyBoundParameters(CodeGenFunction &, |
1601 | const OMPExecutableDirective &, |
1602 | llvm::SmallVectorImpl<llvm::Value *> &) {} |
1603 | |
1604 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( |
1605 | CodeGenFunction &CGF, const VarDecl *VD) { |
1606 | CodeGenModule &CGM = CGF.CGM; |
1607 | auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1608 | |
1609 | if (!VD) |
1610 | return Address::invalid(); |
1611 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1612 | if (!isAllocatableDecl(CVD)) |
1613 | return Address::invalid(); |
1614 | llvm::Value *Size; |
1615 | CharUnits Align = CGM.getContext().getDeclAlign(CVD); |
1616 | if (CVD->getType()->isVariablyModifiedType()) { |
1617 | Size = CGF.getTypeSize(CVD->getType()); |
1618 | // Align the size: ((size + align - 1) / align) * align |
1619 | Size = CGF.Builder.CreateNUWAdd( |
1620 | Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); |
1621 | Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); |
1622 | Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); |
1623 | } else { |
1624 | CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); |
1625 | Size = CGM.getSize(Sz.alignTo(Align)); |
1626 | } |
1627 | |
1628 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1629 | assert(AA->getAllocator() && |
1630 | "Expected allocator expression for non-default allocator." ); |
1631 | llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); |
1632 | // According to the standard, the original allocator type is a enum (integer). |
1633 | // Convert to pointer type, if required. |
1634 | if (Allocator->getType()->isIntegerTy()) |
1635 | Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); |
1636 | else if (Allocator->getType()->isPointerTy()) |
1637 | Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, |
1638 | CGM.VoidPtrTy); |
1639 | |
1640 | llvm::Value *Addr = OMPBuilder.createOMPAlloc( |
1641 | CGF.Builder, Size, Allocator, |
1642 | getNameWithSeparators({CVD->getName(), ".void.addr" }, "." , "." )); |
1643 | llvm::CallInst *FreeCI = |
1644 | OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); |
1645 | |
1646 | CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); |
1647 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1648 | Addr, |
1649 | CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), |
1650 | getNameWithSeparators({CVD->getName(), ".addr" }, "." , "." )); |
1651 | return Address(Addr, Align); |
1652 | } |
1653 | |
1654 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( |
1655 | CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, |
1656 | SourceLocation Loc) { |
1657 | CodeGenModule &CGM = CGF.CGM; |
1658 | if (CGM.getLangOpts().OpenMPUseTLS && |
1659 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1660 | return VDAddr; |
1661 | |
1662 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1663 | |
1664 | llvm::Type *VarTy = VDAddr.getElementType(); |
1665 | llvm::Value *Data = |
1666 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); |
1667 | llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); |
1668 | std::string Suffix = getNameWithSeparators({"cache" , "" }); |
1669 | llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); |
1670 | |
1671 | llvm::CallInst *ThreadPrivateCacheCall = |
1672 | OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); |
1673 | |
1674 | return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); |
1675 | } |
1676 | |
1677 | std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( |
1678 | ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { |
1679 | SmallString<128> Buffer; |
1680 | llvm::raw_svector_ostream OS(Buffer); |
1681 | StringRef Sep = FirstSeparator; |
1682 | for (StringRef Part : Parts) { |
1683 | OS << Sep << Part; |
1684 | Sep = Separator; |
1685 | } |
1686 | return OS.str().str(); |
1687 | } |
1688 | void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1689 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1690 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1691 | // Check if we have any if clause associated with the directive. |
1692 | llvm::Value *IfCond = nullptr; |
1693 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1694 | IfCond = EmitScalarExpr(C->getCondition(), |
1695 | /*IgnoreResultAssign=*/true); |
1696 | |
1697 | llvm::Value *NumThreads = nullptr; |
1698 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) |
1699 | NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1700 | /*IgnoreResultAssign=*/true); |
1701 | |
1702 | ProcBindKind ProcBind = OMP_PROC_BIND_default; |
1703 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) |
1704 | ProcBind = ProcBindClause->getProcBindKind(); |
1705 | |
1706 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
1707 | |
1708 | // The cleanup callback that finalizes all variabels at the given location, |
1709 | // thus calls destructors etc. |
1710 | auto FiniCB = [this](InsertPointTy IP) { |
1711 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
1712 | }; |
1713 | |
1714 | // Privatization callback that performs appropriate action for |
1715 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
1716 | // |
1717 | // TODO: This defaults to shared right now. |
1718 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1719 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1720 | // The next line is appropriate only for variables (Val) with the |
1721 | // data-sharing attribute "shared". |
1722 | ReplVal = &Val; |
1723 | |
1724 | return CodeGenIP; |
1725 | }; |
1726 | |
1727 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1728 | const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); |
1729 | |
1730 | auto BodyGenCB = [ParallelRegionBodyStmt, |
1731 | this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1732 | llvm::BasicBlock &ContinuationBB) { |
1733 | OMPBuilderCBHelpers:: |
---|