1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGOpenMPRuntime.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "TargetInfo.h"
18#include "clang/AST/ASTContext.h"
19#include "clang/AST/Attr.h"
20#include "clang/AST/DeclOpenMP.h"
21#include "clang/AST/OpenMPClause.h"
22#include "clang/AST/Stmt.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/OpenMPKinds.h"
26#include "clang/Basic/PrettyStackTrace.h"
27#include "llvm/Frontend/OpenMP/OMPConstants.h"
28#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/Support/AtomicOrdering.h"
32using namespace clang;
33using namespace CodeGen;
34using namespace llvm::omp;
35
36static const VarDecl *getBaseDecl(const Expr *Ref);
37
38namespace {
39/// Lexical scope for OpenMP executable constructs, that handles correct codegen
40/// for captured expressions.
41class OMPLexicalScope : public CodeGenFunction::LexicalScope {
42 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43 for (const auto *C : S.clauses()) {
44 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45 if (const auto *PreInit =
46 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47 for (const auto *I : PreInit->decls()) {
48 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49 CGF.EmitVarDecl(cast<VarDecl>(*I));
50 } else {
51 CodeGenFunction::AutoVarEmission Emission =
52 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53 CGF.EmitAutoVarCleanups(Emission);
54 }
55 }
56 }
57 }
58 }
59 }
60 CodeGenFunction::OMPPrivateScope InlinedShareds;
61
62 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63 return CGF.LambdaCaptureFields.lookup(VD) ||
64 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67 }
68
69public:
70 OMPLexicalScope(
71 CodeGenFunction &CGF, const OMPExecutableDirective &S,
72 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73 const bool EmitPreInitStmt = true)
74 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75 InlinedShareds(CGF) {
76 if (EmitPreInitStmt)
77 emitPreInitStmt(CGF, S);
78 if (!CapturedRegion.hasValue())
79 return;
80 assert(S.hasAssociatedStmt() &&
81 "Expected associated statement for inlined directive.");
82 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83 for (const auto &C : CS->captures()) {
84 if (C.capturesVariable() || C.capturesVariableByCopy()) {
85 auto *VD = C.getCapturedVar();
86 assert(VD == VD->getCanonicalDecl() &&
87 "Canonical decl must be captured.");
88 DeclRefExpr DRE(
89 CGF.getContext(), const_cast<VarDecl *>(VD),
90 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91 InlinedShareds.isGlobalVarCaptured(VD)),
92 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94 return CGF.EmitLValue(&DRE).getAddress(CGF);
95 });
96 }
97 }
98 (void)InlinedShareds.Privatize();
99 }
100};
101
102/// Lexical scope for OpenMP parallel construct, that handles correct codegen
103/// for captured expressions.
104class OMPParallelScope final : public OMPLexicalScope {
105 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106 OpenMPDirectiveKind Kind = S.getDirectiveKind();
107 return !(isOpenMPTargetExecutionDirective(Kind) ||
108 isOpenMPLoopBoundSharingDirective(Kind)) &&
109 isOpenMPParallelDirective(Kind);
110 }
111
112public:
113 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115 EmitPreInitStmt(S)) {}
116};
117
118/// Lexical scope for OpenMP teams construct, that handles correct codegen
119/// for captured expressions.
120class OMPTeamsScope final : public OMPLexicalScope {
121 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122 OpenMPDirectiveKind Kind = S.getDirectiveKind();
123 return !isOpenMPTargetExecutionDirective(Kind) &&
124 isOpenMPTeamsDirective(Kind);
125 }
126
127public:
128 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130 EmitPreInitStmt(S)) {}
131};
132
133/// Private scope for OpenMP loop-based directives, that supports capturing
134/// of used expression from loop statement.
135class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
137 const DeclStmt *PreInits;
138 CodeGenFunction::OMPMapVars PreCondVars;
139 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
140 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
141 for (const auto *E : LD->counters()) {
142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
143 EmittedAsPrivate.insert(VD->getCanonicalDecl());
144 (void)PreCondVars.setVarAddr(
145 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
146 }
147 // Mark private vars as undefs.
148 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
149 for (const Expr *IRef : C->varlists()) {
150 const auto *OrigVD =
151 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
152 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
153 (void)PreCondVars.setVarAddr(
154 CGF, OrigVD,
155 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
156 CGF.getContext().getPointerType(
157 OrigVD->getType().getNonReferenceType()))),
158 CGF.getContext().getDeclAlign(OrigVD)));
159 }
160 }
161 }
162 (void)PreCondVars.apply(CGF);
163 // Emit init, __range and __end variables for C++ range loops.
164 (void)OMPLoopBasedDirective::doForAllLoops(
165 LD->getInnermostCapturedStmt()->getCapturedStmt(),
166 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
167 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
168 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
169 if (const Stmt *Init = CXXFor->getInit())
170 CGF.EmitStmt(Init);
171 CGF.EmitStmt(CXXFor->getRangeStmt());
172 CGF.EmitStmt(CXXFor->getEndStmt());
173 }
174 return false;
175 });
176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
179 } else {
180 llvm_unreachable("Unknown loop-based directive kind.");
181 }
182 if (PreInits) {
183 for (const auto *I : PreInits->decls())
184 CGF.EmitVarDecl(cast<VarDecl>(*I));
185 }
186 PreCondVars.restore(CGF);
187 }
188
189public:
190 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
191 : CodeGenFunction::RunCleanupsScope(CGF) {
192 emitPreInitStmt(CGF, S);
193 }
194};
195
196class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
197 CodeGenFunction::OMPPrivateScope InlinedShareds;
198
199 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
200 return CGF.LambdaCaptureFields.lookup(VD) ||
201 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
202 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
203 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
204 }
205
206public:
207 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
208 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
209 InlinedShareds(CGF) {
210 for (const auto *C : S.clauses()) {
211 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
212 if (const auto *PreInit =
213 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
214 for (const auto *I : PreInit->decls()) {
215 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
216 CGF.EmitVarDecl(cast<VarDecl>(*I));
217 } else {
218 CodeGenFunction::AutoVarEmission Emission =
219 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
220 CGF.EmitAutoVarCleanups(Emission);
221 }
222 }
223 }
224 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
225 for (const Expr *E : UDP->varlists()) {
226 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
227 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
228 CGF.EmitVarDecl(*OED);
229 }
230 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
231 for (const Expr *E : UDP->varlists()) {
232 const Decl *D = getBaseDecl(E);
233 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
234 CGF.EmitVarDecl(*OED);
235 }
236 }
237 }
238 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
239 CGF.EmitOMPPrivateClause(S, InlinedShareds);
240 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
241 if (const Expr *E = TG->getReductionRef())
242 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
243 }
244 // Temp copy arrays for inscan reductions should not be emitted as they are
245 // not used in simd only mode.
246 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
247 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
248 if (C->getModifier() != OMPC_REDUCTION_inscan)
249 continue;
250 for (const Expr *E : C->copy_array_temps())
251 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
252 }
253 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
254 while (CS) {
255 for (auto &C : CS->captures()) {
256 if (C.capturesVariable() || C.capturesVariableByCopy()) {
257 auto *VD = C.getCapturedVar();
258 if (CopyArrayTemps.contains(VD))
259 continue;
260 assert(VD == VD->getCanonicalDecl() &&
261 "Canonical decl must be captured.");
262 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
263 isCapturedVar(CGF, VD) ||
264 (CGF.CapturedStmtInfo &&
265 InlinedShareds.isGlobalVarCaptured(VD)),
266 VD->getType().getNonReferenceType(), VK_LValue,
267 C.getLocation());
268 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
269 return CGF.EmitLValue(&DRE).getAddress(CGF);
270 });
271 }
272 }
273 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
274 }
275 (void)InlinedShareds.Privatize();
276 }
277};
278
279} // namespace
280
281static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
282 const OMPExecutableDirective &S,
283 const RegionCodeGenTy &CodeGen);
284
285LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
286 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
287 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
288 OrigVD = OrigVD->getCanonicalDecl();
289 bool IsCaptured =
290 LambdaCaptureFields.lookup(OrigVD) ||
291 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
292 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
293 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
294 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
295 return EmitLValue(&DRE);
296 }
297 }
298 return EmitLValue(E);
299}
300
301llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
302 ASTContext &C = getContext();
303 llvm::Value *Size = nullptr;
304 auto SizeInChars = C.getTypeSizeInChars(Ty);
305 if (SizeInChars.isZero()) {
306 // getTypeSizeInChars() returns 0 for a VLA.
307 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
308 VlaSizePair VlaSize = getVLASize(VAT);
309 Ty = VlaSize.Type;
310 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
311 : VlaSize.NumElts;
312 }
313 SizeInChars = C.getTypeSizeInChars(Ty);
314 if (SizeInChars.isZero())
315 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
316 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
317 }
318 return CGM.getSize(SizeInChars);
319}
320
321void CodeGenFunction::GenerateOpenMPCapturedVars(
322 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
323 const RecordDecl *RD = S.getCapturedRecordDecl();
324 auto CurField = RD->field_begin();
325 auto CurCap = S.captures().begin();
326 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
327 E = S.capture_init_end();
328 I != E; ++I, ++CurField, ++CurCap) {
329 if (CurField->hasCapturedVLAType()) {
330 const VariableArrayType *VAT = CurField->getCapturedVLAType();
331 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
332 CapturedVars.push_back(Val);
333 } else if (CurCap->capturesThis()) {
334 CapturedVars.push_back(CXXThisValue);
335 } else if (CurCap->capturesVariableByCopy()) {
336 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
337
338 // If the field is not a pointer, we need to save the actual value
339 // and load it as a void pointer.
340 if (!CurField->getType()->isAnyPointerType()) {
341 ASTContext &Ctx = getContext();
342 Address DstAddr = CreateMemTemp(
343 Ctx.getUIntPtrType(),
344 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
345 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
346
347 llvm::Value *SrcAddrVal = EmitScalarConversion(
348 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
349 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
350 LValue SrcLV =
351 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
352
353 // Store the value using the source type pointer.
354 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
355
356 // Load the value using the destination type pointer.
357 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
358 }
359 CapturedVars.push_back(CV);
360 } else {
361 assert(CurCap->capturesVariable() && "Expected capture by reference.");
362 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
363 }
364 }
365}
366
367static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
368 QualType DstType, StringRef Name,
369 LValue AddrLV) {
370 ASTContext &Ctx = CGF.getContext();
371
372 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
373 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
374 Ctx.getPointerType(DstType), Loc);
375 Address TmpAddr =
376 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
377 .getAddress(CGF);
378 return TmpAddr;
379}
380
381static QualType getCanonicalParamType(ASTContext &C, QualType T) {
382 if (T->isLValueReferenceType())
383 return C.getLValueReferenceType(
384 getCanonicalParamType(C, T.getNonReferenceType()),
385 /*SpelledAsLValue=*/false);
386 if (T->isPointerType())
387 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
388 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
389 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
390 return getCanonicalParamType(C, VLA->getElementType());
391 if (!A->isVariablyModifiedType())
392 return C.getCanonicalType(T);
393 }
394 return C.getCanonicalParamType(T);
395}
396
397namespace {
398/// Contains required data for proper outlined function codegen.
399struct FunctionOptions {
400 /// Captured statement for which the function is generated.
401 const CapturedStmt *S = nullptr;
402 /// true if cast to/from UIntPtr is required for variables captured by
403 /// value.
404 const bool UIntPtrCastRequired = true;
405 /// true if only casted arguments must be registered as local args or VLA
406 /// sizes.
407 const bool RegisterCastedArgsOnly = false;
408 /// Name of the generated function.
409 const StringRef FunctionName;
410 /// Location of the non-debug version of the outlined function.
411 SourceLocation Loc;
412 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
413 bool RegisterCastedArgsOnly, StringRef FunctionName,
414 SourceLocation Loc)
415 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
416 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
417 FunctionName(FunctionName), Loc(Loc) {}
418};
419} // namespace
420
421static llvm::Function *emitOutlinedFunctionPrologue(
422 CodeGenFunction &CGF, FunctionArgList &Args,
423 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
424 &LocalAddrs,
425 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
426 &VLASizes,
427 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
428 const CapturedDecl *CD = FO.S->getCapturedDecl();
429 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
430 assert(CD->hasBody() && "missing CapturedDecl body");
431
432 CXXThisValue = nullptr;
433 // Build the argument list.
434 CodeGenModule &CGM = CGF.CGM;
435 ASTContext &Ctx = CGM.getContext();
436 FunctionArgList TargetArgs;
437 Args.append(CD->param_begin(),
438 std::next(CD->param_begin(), CD->getContextParamPosition()));
439 TargetArgs.append(
440 CD->param_begin(),
441 std::next(CD->param_begin(), CD->getContextParamPosition()));
442 auto I = FO.S->captures().begin();
443 FunctionDecl *DebugFunctionDecl = nullptr;
444 if (!FO.UIntPtrCastRequired) {
445 FunctionProtoType::ExtProtoInfo EPI;
446 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
447 DebugFunctionDecl = FunctionDecl::Create(
448 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
449 SourceLocation(), DeclarationName(), FunctionTy,
450 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
451 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
452 }
453 for (const FieldDecl *FD : RD->fields()) {
454 QualType ArgType = FD->getType();
455 IdentifierInfo *II = nullptr;
456 VarDecl *CapVar = nullptr;
457
458 // If this is a capture by copy and the type is not a pointer, the outlined
459 // function argument type should be uintptr and the value properly casted to
460 // uintptr. This is necessary given that the runtime library is only able to
461 // deal with pointers. We can pass in the same way the VLA type sizes to the
462 // outlined function.
463 if (FO.UIntPtrCastRequired &&
464 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
465 I->capturesVariableArrayType()))
466 ArgType = Ctx.getUIntPtrType();
467
468 if (I->capturesVariable() || I->capturesVariableByCopy()) {
469 CapVar = I->getCapturedVar();
470 II = CapVar->getIdentifier();
471 } else if (I->capturesThis()) {
472 II = &Ctx.Idents.get("this");
473 } else {
474 assert(I->capturesVariableArrayType());
475 II = &Ctx.Idents.get("vla");
476 }
477 if (ArgType->isVariablyModifiedType())
478 ArgType = getCanonicalParamType(Ctx, ArgType);
479 VarDecl *Arg;
480 if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
481 Arg = ParmVarDecl::Create(
482 Ctx, DebugFunctionDecl,
483 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
484 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
485 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
486 } else {
487 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
488 II, ArgType, ImplicitParamDecl::Other);
489 }
490 Args.emplace_back(Arg);
491 // Do not cast arguments if we emit function with non-original types.
492 TargetArgs.emplace_back(
493 FO.UIntPtrCastRequired
494 ? Arg
495 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
496 ++I;
497 }
498 Args.append(
499 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
500 CD->param_end());
501 TargetArgs.append(
502 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
503 CD->param_end());
504
505 // Create the function declaration.
506 const CGFunctionInfo &FuncInfo =
507 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
508 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
509
510 auto *F =
511 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
512 FO.FunctionName, &CGM.getModule());
513 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
514 if (CD->isNothrow())
515 F->setDoesNotThrow();
516 F->setDoesNotRecurse();
517
518 // Generate the function.
519 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
520 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
521 FO.UIntPtrCastRequired ? FO.Loc
522 : CD->getBody()->getBeginLoc());
523 unsigned Cnt = CD->getContextParamPosition();
524 I = FO.S->captures().begin();
525 for (const FieldDecl *FD : RD->fields()) {
526 // Do not map arguments if we emit function with non-original types.
527 Address LocalAddr(Address::invalid());
528 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
529 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
530 TargetArgs[Cnt]);
531 } else {
532 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
533 }
534 // If we are capturing a pointer by copy we don't need to do anything, just
535 // use the value that we get from the arguments.
536 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
537 const VarDecl *CurVD = I->getCapturedVar();
538 if (!FO.RegisterCastedArgsOnly)
539 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
540 ++Cnt;
541 ++I;
542 continue;
543 }
544
545 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
546 AlignmentSource::Decl);
547 if (FD->hasCapturedVLAType()) {
548 if (FO.UIntPtrCastRequired) {
549 ArgLVal = CGF.MakeAddrLValue(
550 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
551 Args[Cnt]->getName(), ArgLVal),
552 FD->getType(), AlignmentSource::Decl);
553 }
554 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
555 const VariableArrayType *VAT = FD->getCapturedVLAType();
556 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
557 } else if (I->capturesVariable()) {
558 const VarDecl *Var = I->getCapturedVar();
559 QualType VarTy = Var->getType();
560 Address ArgAddr = ArgLVal.getAddress(CGF);
561 if (ArgLVal.getType()->isLValueReferenceType()) {
562 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
563 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
564 assert(ArgLVal.getType()->isPointerType());
565 ArgAddr = CGF.EmitLoadOfPointer(
566 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
567 }
568 if (!FO.RegisterCastedArgsOnly) {
569 LocalAddrs.insert(
570 {Args[Cnt],
571 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
572 }
573 } else if (I->capturesVariableByCopy()) {
574 assert(!FD->getType()->isAnyPointerType() &&
575 "Not expecting a captured pointer.");
576 const VarDecl *Var = I->getCapturedVar();
577 LocalAddrs.insert({Args[Cnt],
578 {Var, FO.UIntPtrCastRequired
579 ? castValueFromUintptr(
580 CGF, I->getLocation(), FD->getType(),
581 Args[Cnt]->getName(), ArgLVal)
582 : ArgLVal.getAddress(CGF)}});
583 } else {
584 // If 'this' is captured, load it into CXXThisValue.
585 assert(I->capturesThis());
586 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
587 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
588 }
589 ++Cnt;
590 ++I;
591 }
592
593 return F;
594}
595
596llvm::Function *
597CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
598 SourceLocation Loc) {
599 assert(
600 CapturedStmtInfo &&
601 "CapturedStmtInfo should be set when generating the captured function");
602 const CapturedDecl *CD = S.getCapturedDecl();
603 // Build the argument list.
604 bool NeedWrapperFunction =
605 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
606 FunctionArgList Args;
607 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
608 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
609 SmallString<256> Buffer;
610 llvm::raw_svector_ostream Out(Buffer);
611 Out << CapturedStmtInfo->getHelperName();
612 if (NeedWrapperFunction)
613 Out << "_debug__";
614 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
615 Out.str(), Loc);
616 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
617 VLASizes, CXXThisValue, FO);
618 CodeGenFunction::OMPPrivateScope LocalScope(*this);
619 for (const auto &LocalAddrPair : LocalAddrs) {
620 if (LocalAddrPair.second.first) {
621 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
622 return LocalAddrPair.second.second;
623 });
624 }
625 }
626 (void)LocalScope.Privatize();
627 for (const auto &VLASizePair : VLASizes)
628 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
629 PGO.assignRegionCounters(GlobalDecl(CD), F);
630 CapturedStmtInfo->EmitBody(*this, CD->getBody());
631 (void)LocalScope.ForceCleanup();
632 FinishFunction(CD->getBodyRBrace());
633 if (!NeedWrapperFunction)
634 return F;
635
636 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
637 /*RegisterCastedArgsOnly=*/true,
638 CapturedStmtInfo->getHelperName(), Loc);
639 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
640 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
641 Args.clear();
642 LocalAddrs.clear();
643 VLASizes.clear();
644 llvm::Function *WrapperF =
645 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
646 WrapperCGF.CXXThisValue, WrapperFO);
647 llvm::SmallVector<llvm::Value *, 4> CallArgs;
648 auto *PI = F->arg_begin();
649 for (const auto *Arg : Args) {
650 llvm::Value *CallArg;
651 auto I = LocalAddrs.find(Arg);
652 if (I != LocalAddrs.end()) {
653 LValue LV = WrapperCGF.MakeAddrLValue(
654 I->second.second,
655 I->second.first ? I->second.first->getType() : Arg->getType(),
656 AlignmentSource::Decl);
657 if (LV.getType()->isAnyComplexType())
658 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
659 LV.getAddress(WrapperCGF),
660 PI->getType()->getPointerTo(
661 LV.getAddress(WrapperCGF).getAddressSpace())));
662 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
663 } else {
664 auto EI = VLASizes.find(Arg);
665 if (EI != VLASizes.end()) {
666 CallArg = EI->second.second;
667 } else {
668 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
669 Arg->getType(),
670 AlignmentSource::Decl);
671 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
672 }
673 }
674 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
675 ++PI;
676 }
677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
678 WrapperCGF.FinishFunction();
679 return WrapperF;
680}
681
682//===----------------------------------------------------------------------===//
683// OpenMP Directive Emission
684//===----------------------------------------------------------------------===//
685void CodeGenFunction::EmitOMPAggregateAssign(
686 Address DestAddr, Address SrcAddr, QualType OriginalType,
687 const llvm::function_ref<void(Address, Address)> CopyGen) {
688 // Perform element-by-element initialization.
689 QualType ElementTy;
690
691 // Drill down to the base element type on both arrays.
692 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
693 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
694 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695
696 llvm::Value *SrcBegin = SrcAddr.getPointer();
697 llvm::Value *DestBegin = DestAddr.getPointer();
698 // Cast from pointer to array type to pointer to single element.
699 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
700 // The basic structure here is a while-do loop.
701 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
702 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
703 llvm::Value *IsEmpty =
704 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
705 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
706
707 // Enter the loop body, making that address the current address.
708 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
709 EmitBlock(BodyBB);
710
711 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
712
713 llvm::PHINode *SrcElementPHI =
714 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
715 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
716 Address SrcElementCurrent =
717 Address(SrcElementPHI,
718 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
719
720 llvm::PHINode *DestElementPHI =
721 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
722 DestElementPHI->addIncoming(DestBegin, EntryBB);
723 Address DestElementCurrent =
724 Address(DestElementPHI,
725 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726
727 // Emit copy.
728 CopyGen(DestElementCurrent, SrcElementCurrent);
729
730 // Shift the address forward by one element.
731 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
732 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
733 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
734 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
735 // Check whether we've reached the end.
736 llvm::Value *Done =
737 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
738 Builder.CreateCondBr(Done, DoneBB, BodyBB);
739 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
740 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
741
742 // Done.
743 EmitBlock(DoneBB, /*IsFinished=*/true);
744}
745
746void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
747 Address SrcAddr, const VarDecl *DestVD,
748 const VarDecl *SrcVD, const Expr *Copy) {
749 if (OriginalType->isArrayType()) {
750 const auto *BO = dyn_cast<BinaryOperator>(Copy);
751 if (BO && BO->getOpcode() == BO_Assign) {
752 // Perform simple memcpy for simple copying.
753 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
754 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
755 EmitAggregateAssign(Dest, Src, OriginalType);
756 } else {
757 // For arrays with complex element types perform element by element
758 // copying.
759 EmitOMPAggregateAssign(
760 DestAddr, SrcAddr, OriginalType,
761 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
762 // Working with the single array element, so have to remap
763 // destination and source variables to corresponding array
764 // elements.
765 CodeGenFunction::OMPPrivateScope Remap(*this);
766 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
767 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
768 (void)Remap.Privatize();
769 EmitIgnoredExpr(Copy);
770 });
771 }
772 } else {
773 // Remap pseudo source variable to private copy.
774 CodeGenFunction::OMPPrivateScope Remap(*this);
775 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
776 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
777 (void)Remap.Privatize();
778 // Emit copying of the whole variable.
779 EmitIgnoredExpr(Copy);
780 }
781}
782
783bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
784 OMPPrivateScope &PrivateScope) {
785 if (!HaveInsertPoint())
786 return false;
787 bool DeviceConstTarget =
788 getLangOpts().OpenMPIsDevice &&
789 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
790 bool FirstprivateIsLastprivate = false;
791 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
792 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
793 for (const auto *D : C->varlists())
794 Lastprivates.try_emplace(
795 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
796 C->getKind());
797 }
798 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
799 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
800 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
801 // Force emission of the firstprivate copy if the directive does not emit
802 // outlined function, like omp for, omp simd, omp distribute etc.
803 bool MustEmitFirstprivateCopy =
804 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
805 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
806 const auto *IRef = C->varlist_begin();
807 const auto *InitsRef = C->inits().begin();
808 for (const Expr *IInit : C->private_copies()) {
809 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
810 bool ThisFirstprivateIsLastprivate =
811 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
812 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
813 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
814 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
815 !FD->getType()->isReferenceType() &&
816 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
817 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
818 ++IRef;
819 ++InitsRef;
820 continue;
821 }
822 // Do not emit copy for firstprivate constant variables in target regions,
823 // captured by reference.
824 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
825 FD && FD->getType()->isReferenceType() &&
826 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
827 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
828 OrigVD);
829 ++IRef;
830 ++InitsRef;
831 continue;
832 }
833 FirstprivateIsLastprivate =
834 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
835 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
836 const auto *VDInit =
837 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
838 bool IsRegistered;
839 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
840 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
841 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
842 LValue OriginalLVal;
843 if (!FD) {
844 // Check if the firstprivate variable is just a constant value.
845 ConstantEmission CE = tryEmitAsConstant(&DRE);
846 if (CE && !CE.isReference()) {
847 // Constant value, no need to create a copy.
848 ++IRef;
849 ++InitsRef;
850 continue;
851 }
852 if (CE && CE.isReference()) {
853 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
854 } else {
855 assert(!CE && "Expected non-constant firstprivate.");
856 OriginalLVal = EmitLValue(&DRE);
857 }
858 } else {
859 OriginalLVal = EmitLValue(&DRE);
860 }
861 QualType Type = VD->getType();
862 if (Type->isArrayType()) {
863 // Emit VarDecl with copy init for arrays.
864 // Get the address of the original variable captured in current
865 // captured region.
866 IsRegistered = PrivateScope.addPrivate(
867 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
868 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
869 const Expr *Init = VD->getInit();
870 if (!isa<CXXConstructExpr>(Init) ||
871 isTrivialInitializer(Init)) {
872 // Perform simple memcpy.
873 LValue Dest =
874 MakeAddrLValue(Emission.getAllocatedAddress(), Type);
875 EmitAggregateAssign(Dest, OriginalLVal, Type);
876 } else {
877 EmitOMPAggregateAssign(
878 Emission.getAllocatedAddress(),
879 OriginalLVal.getAddress(*this), Type,
880 [this, VDInit, Init](Address DestElement,
881 Address SrcElement) {
882 // Clean up any temporaries needed by the
883 // initialization.
884 RunCleanupsScope InitScope(*this);
885 // Emit initialization for single element.
886 setAddrOfLocalVar(VDInit, SrcElement);
887 EmitAnyExprToMem(Init, DestElement,
888 Init->getType().getQualifiers(),
889 /*IsInitializer*/ false);
890 LocalDeclMap.erase(VDInit);
891 });
892 }
893 EmitAutoVarCleanups(Emission);
894 return Emission.getAllocatedAddress();
895 });
896 } else {
897 Address OriginalAddr = OriginalLVal.getAddress(*this);
898 IsRegistered =
899 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
900 ThisFirstprivateIsLastprivate,
901 OrigVD, &Lastprivates, IRef]() {
902 // Emit private VarDecl with copy init.
903 // Remap temp VDInit variable to the address of the original
904 // variable (for proper handling of captured global variables).
905 setAddrOfLocalVar(VDInit, OriginalAddr);
906 EmitDecl(*VD);
907 LocalDeclMap.erase(VDInit);
908 if (ThisFirstprivateIsLastprivate &&
909 Lastprivates[OrigVD->getCanonicalDecl()] ==
910 OMPC_LASTPRIVATE_conditional) {
911 // Create/init special variable for lastprivate conditionals.
912 Address VDAddr =
913 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
914 *this, OrigVD);
915 llvm::Value *V = EmitLoadOfScalar(
916 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
917 AlignmentSource::Decl),
918 (*IRef)->getExprLoc());
919 EmitStoreOfScalar(V,
920 MakeAddrLValue(VDAddr, (*IRef)->getType(),
921 AlignmentSource::Decl));
922 LocalDeclMap.erase(VD);
923 setAddrOfLocalVar(VD, VDAddr);
924 return VDAddr;
925 }
926 return GetAddrOfLocalVar(VD);
927 });
928 }
929 assert(IsRegistered &&
930 "firstprivate var already registered as private");
931 // Silence the warning about unused variable.
932 (void)IsRegistered;
933 }
934 ++IRef;
935 ++InitsRef;
936 }
937 }
938 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
939}
940
941void CodeGenFunction::EmitOMPPrivateClause(
942 const OMPExecutableDirective &D,
943 CodeGenFunction::OMPPrivateScope &PrivateScope) {
944 if (!HaveInsertPoint())
945 return;
946 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
947 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
948 auto IRef = C->varlist_begin();
949 for (const Expr *IInit : C->private_copies()) {
950 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
951 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
952 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
953 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
954 // Emit private VarDecl with copy init.
955 EmitDecl(*VD);
956 return GetAddrOfLocalVar(VD);
957 });
958 assert(IsRegistered && "private var already registered as private");
959 // Silence the warning about unused variable.
960 (void)IsRegistered;
961 }
962 ++IRef;
963 }
964 }
965}
966
967bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
968 if (!HaveInsertPoint())
969 return false;
970 // threadprivate_var1 = master_threadprivate_var1;
971 // operator=(threadprivate_var2, master_threadprivate_var2);
972 // ...
973 // __kmpc_barrier(&loc, global_tid);
974 llvm::DenseSet<const VarDecl *> CopiedVars;
975 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
976 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
977 auto IRef = C->varlist_begin();
978 auto ISrcRef = C->source_exprs().begin();
979 auto IDestRef = C->destination_exprs().begin();
980 for (const Expr *AssignOp : C->assignment_ops()) {
981 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
982 QualType Type = VD->getType();
983 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
984 // Get the address of the master variable. If we are emitting code with
985 // TLS support, the address is passed from the master as field in the
986 // captured declaration.
987 Address MasterAddr = Address::invalid();
988 if (getLangOpts().OpenMPUseTLS &&
989 getContext().getTargetInfo().isTLSSupported()) {
990 assert(CapturedStmtInfo->lookup(VD) &&
991 "Copyin threadprivates should have been captured!");
992 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
993 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
994 MasterAddr = EmitLValue(&DRE).getAddress(*this);
995 LocalDeclMap.erase(VD);
996 } else {
997 MasterAddr =
998 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
999 : CGM.GetAddrOfGlobal(VD),
1000 getContext().getDeclAlign(VD));
1001 }
1002 // Get the address of the threadprivate variable.
1003 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1004 if (CopiedVars.size() == 1) {
1005 // At first check if current thread is a master thread. If it is, no
1006 // need to copy data.
1007 CopyBegin = createBasicBlock("copyin.not.master");
1008 CopyEnd = createBasicBlock("copyin.not.master.end");
1009 // TODO: Avoid ptrtoint conversion.
1010 auto *MasterAddrInt =
1011 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1012 auto *PrivateAddrInt =
1013 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1014 Builder.CreateCondBr(
1015 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1016 CopyEnd);
1017 EmitBlock(CopyBegin);
1018 }
1019 const auto *SrcVD =
1020 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1021 const auto *DestVD =
1022 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1023 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1024 }
1025 ++IRef;
1026 ++ISrcRef;
1027 ++IDestRef;
1028 }
1029 }
1030 if (CopyEnd) {
1031 // Exit out of copying procedure for non-master thread.
1032 EmitBlock(CopyEnd, /*IsFinished=*/true);
1033 return true;
1034 }
1035 return false;
1036}
1037
1038bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1039 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1040 if (!HaveInsertPoint())
1041 return false;
1042 bool HasAtLeastOneLastprivate = false;
1043 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1044 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1045 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1046 for (const Expr *C : LoopDirective->counters()) {
1047 SIMDLCVs.insert(
1048 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1049 }
1050 }
1051 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1052 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1053 HasAtLeastOneLastprivate = true;
1054 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1055 !getLangOpts().OpenMPSimd)
1056 break;
1057 const auto *IRef = C->varlist_begin();
1058 const auto *IDestRef = C->destination_exprs().begin();
1059 for (const Expr *IInit : C->private_copies()) {
1060 // Keep the address of the original variable for future update at the end
1061 // of the loop.
1062 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1063 // Taskloops do not require additional initialization, it is done in
1064 // runtime support library.
1065 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1066 const auto *DestVD =
1067 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1068 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1069 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1070 /*RefersToEnclosingVariableOrCapture=*/
1071 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1072 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1073 return EmitLValue(&DRE).getAddress(*this);
1074 });
1075 // Check if the variable is also a firstprivate: in this case IInit is
1076 // not generated. Initialization of this variable will happen in codegen
1077 // for 'firstprivate' clause.
1078 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1079 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1080 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1081 OrigVD]() {
1082 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1083 Address VDAddr =
1084 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1085 OrigVD);
1086 setAddrOfLocalVar(VD, VDAddr);
1087 return VDAddr;
1088 }
1089 // Emit private VarDecl with copy init.
1090 EmitDecl(*VD);
1091 return GetAddrOfLocalVar(VD);
1092 });
1093 assert(IsRegistered &&
1094 "lastprivate var already registered as private");
1095 (void)IsRegistered;
1096 }
1097 }
1098 ++IRef;
1099 ++IDestRef;
1100 }
1101 }
1102 return HasAtLeastOneLastprivate;
1103}
1104
1105void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106 const OMPExecutableDirective &D, bool NoFinals,
1107 llvm::Value *IsLastIterCond) {
1108 if (!HaveInsertPoint())
1109 return;
1110 // Emit following code:
1111 // if (<IsLastIterCond>) {
1112 // orig_var1 = private_orig_var1;
1113 // ...
1114 // orig_varn = private_orig_varn;
1115 // }
1116 llvm::BasicBlock *ThenBB = nullptr;
1117 llvm::BasicBlock *DoneBB = nullptr;
1118 if (IsLastIterCond) {
1119 // Emit implicit barrier if at least one lastprivate conditional is found
1120 // and this is not a simd mode.
1121 if (!getLangOpts().OpenMPSimd &&
1122 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1123 [](const OMPLastprivateClause *C) {
1124 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1125 })) {
1126 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1127 OMPD_unknown,
1128 /*EmitChecks=*/false,
1129 /*ForceSimpleCall=*/true);
1130 }
1131 ThenBB = createBasicBlock(".omp.lastprivate.then");
1132 DoneBB = createBasicBlock(".omp.lastprivate.done");
1133 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1134 EmitBlock(ThenBB);
1135 }
1136 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1137 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1138 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1139 auto IC = LoopDirective->counters().begin();
1140 for (const Expr *F : LoopDirective->finals()) {
1141 const auto *D =
1142 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1143 if (NoFinals)
1144 AlreadyEmittedVars.insert(D);
1145 else
1146 LoopCountersAndUpdates[D] = F;
1147 ++IC;
1148 }
1149 }
1150 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1151 auto IRef = C->varlist_begin();
1152 auto ISrcRef = C->source_exprs().begin();
1153 auto IDestRef = C->destination_exprs().begin();
1154 for (const Expr *AssignOp : C->assignment_ops()) {
1155 const auto *PrivateVD =
1156 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1157 QualType Type = PrivateVD->getType();
1158 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1159 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1160 // If lastprivate variable is a loop control variable for loop-based
1161 // directive, update its value before copyin back to original
1162 // variable.
1163 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1164 EmitIgnoredExpr(FinalExpr);
1165 const auto *SrcVD =
1166 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1167 const auto *DestVD =
1168 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1169 // Get the address of the private variable.
1170 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1171 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1172 PrivateAddr =
1173 Address(Builder.CreateLoad(PrivateAddr),
1174 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1175 // Store the last value to the private copy in the last iteration.
1176 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1177 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1178 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1179 (*IRef)->getExprLoc());
1180 // Get the address of the original variable.
1181 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1182 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1183 }
1184 ++IRef;
1185 ++ISrcRef;
1186 ++IDestRef;
1187 }
1188 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1189 EmitIgnoredExpr(PostUpdate);
1190 }
1191 if (IsLastIterCond)
1192 EmitBlock(DoneBB, /*IsFinished=*/true);
1193}
1194
1195void CodeGenFunction::EmitOMPReductionClauseInit(
1196 const OMPExecutableDirective &D,
1197 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1198 if (!HaveInsertPoint())
1199 return;
1200 SmallVector<const Expr *, 4> Shareds;
1201 SmallVector<const Expr *, 4> Privates;
1202 SmallVector<const Expr *, 4> ReductionOps;
1203 SmallVector<const Expr *, 4> LHSs;
1204 SmallVector<const Expr *, 4> RHSs;
1205 OMPTaskDataTy Data;
1206 SmallVector<const Expr *, 4> TaskLHSs;
1207 SmallVector<const Expr *, 4> TaskRHSs;
1208 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1209 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1210 continue;
1211 Shareds.append(C->varlist_begin(), C->varlist_end());
1212 Privates.append(C->privates().begin(), C->privates().end());
1213 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1214 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1215 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1216 if (C->getModifier() == OMPC_REDUCTION_task) {
1217 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1218 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1219 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1220 Data.ReductionOps.append(C->reduction_ops().begin(),
1221 C->reduction_ops().end());
1222 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1223 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1224 }
1225 }
1226 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1227 unsigned Count = 0;
1228 auto *ILHS = LHSs.begin();
1229 auto *IRHS = RHSs.begin();
1230 auto *IPriv = Privates.begin();
1231 for (const Expr *IRef : Shareds) {
1232 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1233 // Emit private VarDecl with reduction init.
1234 RedCG.emitSharedOrigLValue(*this, Count);
1235 RedCG.emitAggregateType(*this, Count);
1236 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1237 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1238 RedCG.getSharedLValue(Count),
1239 [&Emission](CodeGenFunction &CGF) {
1240 CGF.EmitAutoVarInit(Emission);
1241 return true;
1242 });
1243 EmitAutoVarCleanups(Emission);
1244 Address BaseAddr = RedCG.adjustPrivateAddress(
1245 *this, Count, Emission.getAllocatedAddress());
1246 bool IsRegistered = PrivateScope.addPrivate(
1247 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1248 assert(IsRegistered && "private var already registered as private");
1249 // Silence the warning about unused variable.
1250 (void)IsRegistered;
1251
1252 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1253 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1254 QualType Type = PrivateVD->getType();
1255 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1256 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1257 // Store the address of the original variable associated with the LHS
1258 // implicit variable.
1259 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1260 return RedCG.getSharedLValue(Count).getAddress(*this);
1261 });
1262 PrivateScope.addPrivate(
1263 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1264 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1265 isa<ArraySubscriptExpr>(IRef)) {
1266 // Store the address of the original variable associated with the LHS
1267 // implicit variable.
1268 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1269 return RedCG.getSharedLValue(Count).getAddress(*this);
1270 });
1271 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1272 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1273 ConvertTypeForMem(RHSVD->getType()),
1274 "rhs.begin");
1275 });
1276 } else {
1277 QualType Type = PrivateVD->getType();
1278 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1279 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1280 // Store the address of the original variable associated with the LHS
1281 // implicit variable.
1282 if (IsArray) {
1283 OriginalAddr = Builder.CreateElementBitCast(
1284 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1285 }
1286 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1287 PrivateScope.addPrivate(
1288 RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1289 return IsArray
1290 ? Builder.CreateElementBitCast(
1291 GetAddrOfLocalVar(PrivateVD),
1292 ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1293 : GetAddrOfLocalVar(PrivateVD);
1294 });
1295 }
1296 ++ILHS;
1297 ++IRHS;
1298 ++IPriv;
1299 ++Count;
1300 }
1301 if (!Data.ReductionVars.empty()) {
1302 Data.IsReductionWithTaskMod = true;
1303 Data.IsWorksharingReduction =
1304 isOpenMPWorksharingDirective(D.getDirectiveKind());
1305 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1306 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1307 const Expr *TaskRedRef = nullptr;
1308 switch (D.getDirectiveKind()) {
1309 case OMPD_parallel:
1310 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1311 break;
1312 case OMPD_for:
1313 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1314 break;
1315 case OMPD_sections:
1316 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1317 break;
1318 case OMPD_parallel_for:
1319 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1320 break;
1321 case OMPD_parallel_master:
1322 TaskRedRef =
1323 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1324 break;
1325 case OMPD_parallel_sections:
1326 TaskRedRef =
1327 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1328 break;
1329 case OMPD_target_parallel:
1330 TaskRedRef =
1331 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1332 break;
1333 case OMPD_target_parallel_for:
1334 TaskRedRef =
1335 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1336 break;
1337 case OMPD_distribute_parallel_for:
1338 TaskRedRef =
1339 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1340 break;
1341 case OMPD_teams_distribute_parallel_for:
1342 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1343 .getTaskReductionRefExpr();
1344 break;
1345 case OMPD_target_teams_distribute_parallel_for:
1346 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1347 .getTaskReductionRefExpr();
1348 break;
1349 case OMPD_simd:
1350 case OMPD_for_simd:
1351 case OMPD_section:
1352 case OMPD_single:
1353 case OMPD_master:
1354 case OMPD_critical:
1355 case OMPD_parallel_for_simd:
1356 case OMPD_task:
1357 case OMPD_taskyield:
1358 case OMPD_barrier:
1359 case OMPD_taskwait:
1360 case OMPD_taskgroup:
1361 case OMPD_flush:
1362 case OMPD_depobj:
1363 case OMPD_scan:
1364 case OMPD_ordered:
1365 case OMPD_atomic:
1366 case OMPD_teams:
1367 case OMPD_target:
1368 case OMPD_cancellation_point:
1369 case OMPD_cancel:
1370 case OMPD_target_data:
1371 case OMPD_target_enter_data:
1372 case OMPD_target_exit_data:
1373 case OMPD_taskloop:
1374 case OMPD_taskloop_simd:
1375 case OMPD_master_taskloop:
1376 case OMPD_master_taskloop_simd:
1377 case OMPD_parallel_master_taskloop:
1378 case OMPD_parallel_master_taskloop_simd:
1379 case OMPD_distribute:
1380 case OMPD_target_update:
1381 case OMPD_distribute_parallel_for_simd:
1382 case OMPD_distribute_simd:
1383 case OMPD_target_parallel_for_simd:
1384 case OMPD_target_simd:
1385 case OMPD_teams_distribute:
1386 case OMPD_teams_distribute_simd:
1387 case OMPD_teams_distribute_parallel_for_simd:
1388 case OMPD_target_teams:
1389 case OMPD_target_teams_distribute:
1390 case OMPD_target_teams_distribute_parallel_for_simd:
1391 case OMPD_target_teams_distribute_simd:
1392 case OMPD_declare_target:
1393 case OMPD_end_declare_target:
1394 case OMPD_threadprivate:
1395 case OMPD_allocate:
1396 case OMPD_declare_reduction:
1397 case OMPD_declare_mapper:
1398 case OMPD_declare_simd:
1399 case OMPD_requires:
1400 case OMPD_declare_variant:
1401 case OMPD_begin_declare_variant:
1402 case OMPD_end_declare_variant:
1403 case OMPD_unknown:
1404 default:
1405 llvm_unreachable("Enexpected directive with task reductions.");
1406 }
1407
1408 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1409 EmitVarDecl(*VD);
1410 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1411 /*Volatile=*/false, TaskRedRef->getType());
1412 }
1413}
1414
1415void CodeGenFunction::EmitOMPReductionClauseFinal(
1416 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1417 if (!HaveInsertPoint())
1418 return;
1419 llvm::SmallVector<const Expr *, 8> Privates;
1420 llvm::SmallVector<const Expr *, 8> LHSExprs;
1421 llvm::SmallVector<const Expr *, 8> RHSExprs;
1422 llvm::SmallVector<const Expr *, 8> ReductionOps;
1423 bool HasAtLeastOneReduction = false;
1424 bool IsReductionWithTaskMod = false;
1425 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1426 // Do not emit for inscan reductions.
1427 if (C->getModifier() == OMPC_REDUCTION_inscan)
1428 continue;
1429 HasAtLeastOneReduction = true;
1430 Privates.append(C->privates().begin(), C->privates().end());
1431 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1432 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1433 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1434 IsReductionWithTaskMod =
1435 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1436 }
1437 if (HasAtLeastOneReduction) {
1438 if (IsReductionWithTaskMod) {
1439 CGM.getOpenMPRuntime().emitTaskReductionFini(
1440 *this, D.getBeginLoc(),
1441 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1442 }
1443 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1444 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1445 ReductionKind == OMPD_simd;
1446 bool SimpleReduction = ReductionKind == OMPD_simd;
1447 // Emit nowait reduction if nowait clause is present or directive is a
1448 // parallel directive (it always has implicit barrier).
1449 CGM.getOpenMPRuntime().emitReduction(
1450 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1451 {WithNowait, SimpleReduction, ReductionKind});
1452 }
1453}
1454
1455static void emitPostUpdateForReductionClause(
1456 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1457 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1458 if (!CGF.HaveInsertPoint())
1459 return;
1460 llvm::BasicBlock *DoneBB = nullptr;
1461 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1462 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1463 if (!DoneBB) {
1464 if (llvm::Value *Cond = CondGen(CGF)) {
1465 // If the first post-update expression is found, emit conditional
1466 // block if it was requested.
1467 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1468 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1469 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1470 CGF.EmitBlock(ThenBB);
1471 }
1472 }
1473 CGF.EmitIgnoredExpr(PostUpdate);
1474 }
1475 }
1476 if (DoneBB)
1477 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1478}
1479
1480namespace {
1481/// Codegen lambda for appending distribute lower and upper bounds to outlined
1482/// parallel function. This is necessary for combined constructs such as
1483/// 'distribute parallel for'
1484typedef llvm::function_ref<void(CodeGenFunction &,
1485 const OMPExecutableDirective &,
1486 llvm::SmallVectorImpl<llvm::Value *> &)>
1487 CodeGenBoundParametersTy;
1488} // anonymous namespace
1489
1490static void
1491checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1492 const OMPExecutableDirective &S) {
1493 if (CGF.getLangOpts().OpenMP < 50)
1494 return;
1495 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1496 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1497 for (const Expr *Ref : C->varlists()) {
1498 if (!Ref->getType()->isScalarType())
1499 continue;
1500 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1501 if (!DRE)
1502 continue;
1503 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1504 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1505 }
1506 }
1507 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1508 for (const Expr *Ref : C->varlists()) {
1509 if (!Ref->getType()->isScalarType())
1510 continue;
1511 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1512 if (!DRE)
1513 continue;
1514 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1515 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1516 }
1517 }
1518 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1519 for (const Expr *Ref : C->varlists()) {
1520 if (!Ref->getType()->isScalarType())
1521 continue;
1522 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1523 if (!DRE)
1524 continue;
1525 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1526 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1527 }
1528 }
1529 // Privates should ne analyzed since they are not captured at all.
1530 // Task reductions may be skipped - tasks are ignored.
1531 // Firstprivates do not return value but may be passed by reference - no need
1532 // to check for updated lastprivate conditional.
1533 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1534 for (const Expr *Ref : C->varlists()) {
1535 if (!Ref->getType()->isScalarType())
1536 continue;
1537 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1538 if (!DRE)
1539 continue;
1540 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1541 }
1542 }
1543 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1544 CGF, S, PrivateDecls);
1545}
1546
1547static void emitCommonOMPParallelDirective(
1548 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1549 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1550 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1551 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1552 llvm::Function *OutlinedFn =
1553 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1554 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1555 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1556 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1557 llvm::Value *NumThreads =
1558 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1559 /*IgnoreResultAssign=*/true);
1560 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1561 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1562 }
1563 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1564 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1565 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1566 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1567 }
1568 const Expr *IfCond = nullptr;
1569 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1570 if (C->getNameModifier() == OMPD_unknown ||
1571 C->getNameModifier() == OMPD_parallel) {
1572 IfCond = C->getCondition();
1573 break;
1574 }
1575 }
1576
1577 OMPParallelScope Scope(CGF, S);
1578 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1579 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1580 // lower and upper bounds with the pragma 'for' chunking mechanism.
1581 // The following lambda takes care of appending the lower and upper bound
1582 // parameters when necessary
1583 CodeGenBoundParameters(CGF, S, CapturedVars);
1584 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1585 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1586 CapturedVars, IfCond);
1587}
1588
1589static bool isAllocatableDecl(const VarDecl *VD) {
1590 const VarDecl *CVD = VD->getCanonicalDecl();
1591 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1592 return false;
1593 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1594 // Use the default allocation.
1595 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1596 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1597 !AA->getAllocator());
1598}
1599
1600static void emitEmptyBoundParameters(CodeGenFunction &,
1601 const OMPExecutableDirective &,
1602 llvm::SmallVectorImpl<llvm::Value *> &) {}
1603
1604Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1605 CodeGenFunction &CGF, const VarDecl *VD) {
1606 CodeGenModule &CGM = CGF.CGM;
1607 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1608
1609 if (!VD)
1610 return Address::invalid();
1611 const VarDecl *CVD = VD->getCanonicalDecl();
1612 if (!isAllocatableDecl(CVD))
1613 return Address::invalid();
1614 llvm::Value *Size;
1615 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1616 if (CVD->getType()->isVariablyModifiedType()) {
1617 Size = CGF.getTypeSize(CVD->getType());
1618 // Align the size: ((size + align - 1) / align) * align
1619 Size = CGF.Builder.CreateNUWAdd(
1620 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1621 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1622 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1623 } else {
1624 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1625 Size = CGM.getSize(Sz.alignTo(Align));
1626 }
1627
1628 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1629 assert(AA->getAllocator() &&
1630 "Expected allocator expression for non-default allocator.");
1631 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1632 // According to the standard, the original allocator type is a enum (integer).
1633 // Convert to pointer type, if required.
1634 if (Allocator->getType()->isIntegerTy())
1635 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1636 else if (Allocator->getType()->isPointerTy())
1637 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1638 CGM.VoidPtrTy);
1639
1640 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1641 CGF.Builder, Size, Allocator,
1642 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1643 llvm::CallInst *FreeCI =
1644 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1645
1646 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1647 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1648 Addr,
1649 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1650 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1651 return Address(Addr, Align);
1652}
1653
1654Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1655 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1656 SourceLocation Loc) {
1657 CodeGenModule &CGM = CGF.CGM;
1658 if (CGM.getLangOpts().OpenMPUseTLS &&
1659 CGM.getContext().getTargetInfo().isTLSSupported())
1660 return VDAddr;
1661
1662 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1663
1664 llvm::Type *VarTy = VDAddr.getElementType();
1665 llvm::Value *Data =
1666 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1667 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1668 std::string Suffix = getNameWithSeparators({"cache", ""});
1669 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1670
1671 llvm::CallInst *ThreadPrivateCacheCall =
1672 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1673
1674 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1675}
1676
1677std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1678 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1679 SmallString<128> Buffer;
1680 llvm::raw_svector_ostream OS(Buffer);
1681 StringRef Sep = FirstSeparator;
1682 for (StringRef Part : Parts) {
1683 OS << Sep << Part;
1684 Sep = Separator;
1685 }
1686 return OS.str().str();
1687}
1688void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1689 if (CGM.getLangOpts().OpenMPIRBuilder) {
1690 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1691 // Check if we have any if clause associated with the directive.
1692 llvm::Value *IfCond = nullptr;
1693 if (const auto *C = S.getSingleClause<OMPIfClause>())
1694 IfCond = EmitScalarExpr(C->getCondition(),
1695 /*IgnoreResultAssign=*/true);
1696
1697 llvm::Value *NumThreads = nullptr;
1698 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1699 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1700 /*IgnoreResultAssign=*/true);
1701
1702 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1703 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1704 ProcBind = ProcBindClause->getProcBindKind();
1705
1706 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1707
1708 // The cleanup callback that finalizes all variabels at the given location,
1709 // thus calls destructors etc.
1710 auto FiniCB = [this](InsertPointTy IP) {
1711 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1712 };
1713
1714 // Privatization callback that performs appropriate action for
1715 // shared/private/firstprivate/lastprivate/copyin/... variables.
1716 //
1717 // TODO: This defaults to shared right now.
1718 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1719 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1720 // The next line is appropriate only for variables (Val) with the
1721 // data-sharing attribute "shared".
1722 ReplVal = &Val;
1723
1724 return CodeGenIP;
1725 };
1726
1727 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1728 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1729
1730 auto BodyGenCB = [ParallelRegionBodyStmt,
1731 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1732 llvm::BasicBlock &ContinuationBB) {
1733 OMPBuilderCBHelpers::