1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
166 PtrTy: PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
171 CGF.EmitBlock(BB: DoneBB);
172 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
173 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
174 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
175 Dest: CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
183 PtrTy: PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
185 lvalue: PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
189 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
191 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
192 Dest: CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
194 CGF.EmitBlock(BB: CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(&DRE).getAddress(CGF));
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
572void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573 CodeGenFunction::RunCleanupsScope Scope(CGF);
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
596static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
604 const auto *CE = cast<CallExpr>(Val: InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
614 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
615 (void)PrivateScope.Privatize();
616 RValue Func = RValue::get(V: Reduction.second);
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(E: InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignAddrLValue(V: GV, T: Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(T: Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
633 RValue::getComplex(CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
655static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.getPointer();
672 llvm::Value *DestBegin = DestAddr.getPointer();
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BB: BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
693 Name: "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
711 Original: SrcElementCurrent, Ty: ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
721 Name: "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
728 Name: "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
733 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: E))
746 return CGF.EmitOMPArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
767ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768 ArrayRef<const Expr *> Origs,
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(N: Shareds.size());
772 SharedAddresses.reserve(N: Shareds.size());
773 Sizes.reserve(N: Shareds.size());
774 BaseDecls.reserve(N: Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
780 std::advance(i&: IOrig, n: 1);
781 std::advance(i&: IPriv, n: 1);
782 std::advance(i&: IRed, n: 1);
783 }
784}
785
786void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
791 SharedAddresses.emplace_back(Args&: First, Args&: Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(Args&: First, Args&: Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
797 OrigAddresses.emplace_back(Args&: First, Args&: Second);
798 }
799}
800
801void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<OMPArraySectionExpr>(Val: ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
816 LHS: OrigAddresses[N].second.getPointer(CGF),
817 RHS: OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
825 }
826 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
827 CodeGenFunction::OpaqueValueMapping OpaqueMap(
828 CGF,
829 cast<OpaqueValueExpr>(
830 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
831 RValue::get(V: Size));
832 CGF.EmitVariablyModifiedType(Ty: PrivateType);
833}
834
835void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
844 CodeGenFunction::OpaqueValueMapping OpaqueMap(
845 CGF,
846 cast<OpaqueValueExpr>(
847 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
848 RValue::get(V: Size));
849 CGF.EmitVariablyModifiedType(Ty: PrivateType);
850}
851
852void ReductionCodeGen::emitInitialization(
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
868 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
872 Quals: PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
877bool ReductionCodeGen::needCleanups(unsigned N) {
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
883void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
890 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
891 }
892}
893
894static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(CGF), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(CGF), T: BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 Addr: BaseLV.getAddress(CGF).withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
909 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
910 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
911}
912
913static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
914 Address OriginalBaseAddress, llvm::Value *Addr) {
915 Address Tmp = Address::invalid();
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
921 Tmp = CGF.CreateMemTemp(T: BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
931 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
932 V: Addr, DestTy: Tmp.getElementType());
933 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
934 return MostTopTmp;
935 }
936
937 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
938 V: Addr, DestTy: OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Val: Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 DE = cast<DeclRefExpr>(Val: Base);
951 OrigVD = cast<VarDecl>(Val: DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 DE = cast<DeclRefExpr>(Val: Base);
957 OrigVD = cast<VarDecl>(Val: DE->getDecl());
958 }
959 return OrigVD;
960}
961
962Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(Args&: OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
974 RHS: SharedAddr.getPointer());
975 llvm::Value *PrivatePointer =
976 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
977 V: PrivateAddr.getPointer(), DestTy: SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(CGF), Ptr);
983 }
984 BaseDecls.emplace_back(
985 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
989bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
998 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1011 CGF.incrementProfileCounter(S);
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1020 AlignmentSource::Decl);
1021}
1022
1023static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1027 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1034CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder.initialize();
1043 OMPBuilder.loadOffloadInfoMetadata(HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1044 ? CGM.getLangOpts().OMPHostIRFile
1045 : StringRef{});
1046 OMPBuilder.setConfig(Config);
1047
1048 // The user forces the compiler to behave as if omp requires
1049 // unified_shared_memory was given.
1050 if (CGM.getLangOpts().OpenMPForceUSM) {
1051 HasRequiresUnifiedSharedMemory = true;
1052 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053 }
1054}
1055
1056void CGOpenMPRuntime::clear() {
1057 InternalVars.clear();
1058 // Clean non-target variable declarations possibly used only in debug info.
1059 for (const auto &Data : EmittedNonTargetVariables) {
1060 if (!Data.getValue().pointsToAliveValue())
1061 continue;
1062 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1063 if (!GV)
1064 continue;
1065 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066 continue;
1067 GV->eraseFromParent();
1068 }
1069}
1070
1071std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1072 return OMPBuilder.createPlatformSpecificName(Parts);
1073}
1074
1075static llvm::Function *
1076emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1077 const Expr *CombinerInitializer, const VarDecl *In,
1078 const VarDecl *Out, bool IsCombiner) {
1079 // void .omp_combiner.(Ty *in, Ty *out);
1080 ASTContext &C = CGM.getContext();
1081 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1082 FunctionArgList Args;
1083 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 Args.push_back(&OmpOutParm);
1088 Args.push_back(&OmpInParm);
1089 const CGFunctionInfo &FnInfo =
1090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1092 std::string Name = CGM.getOpenMPRuntime().getName(
1093 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1095 N: Name, M: &CGM.getModule());
1096 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1097 if (CGM.getLangOpts().Optimize) {
1098 Fn->removeFnAttr(llvm::Attribute::NoInline);
1099 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101 }
1102 CodeGenFunction CGF(CGM);
1103 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo, Args, Loc: In->getLocation(),
1106 StartLoc: Out->getLocation());
1107 CodeGenFunction::OMPPrivateScope Scope(CGF);
1108 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109 Scope.addPrivate(
1110 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1111 .getAddress(CGF));
1112 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113 Scope.addPrivate(
1114 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1115 .getAddress(CGF));
1116 (void)Scope.Privatize();
1117 if (!IsCombiner && Out->hasInit() &&
1118 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1119 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1120 Quals: Out->getType().getQualifiers(),
1121 /*IsInitializer=*/true);
1122 }
1123 if (CombinerInitializer)
1124 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1125 Scope.ForceCleanup();
1126 CGF.FinishFunction();
1127 return Fn;
1128}
1129
1130void CGOpenMPRuntime::emitUserDefinedReduction(
1131 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1132 if (UDRMap.count(Val: D) > 0)
1133 return;
1134 llvm::Function *Combiner = emitCombinerOrInitializer(
1135 CGM, D->getType(), D->getCombiner(),
1136 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1137 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1138 /*IsCombiner=*/true);
1139 llvm::Function *Initializer = nullptr;
1140 if (const Expr *Init = D->getInitializer()) {
1141 Initializer = emitCombinerOrInitializer(
1142 CGM, D->getType(),
1143 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1144 : nullptr,
1145 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1146 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1147 /*IsCombiner=*/false);
1148 }
1149 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1150 if (CGF) {
1151 auto &Decls = FunctionUDRMap.FindAndConstruct(Key: CGF->CurFn);
1152 Decls.second.push_back(Elt: D);
1153 }
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1157CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1158 auto I = UDRMap.find(Val: D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(Val: D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1192 CodeGenFunction::JumpDest Dest =
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 };
1196
1197 // TODO: Remove this once we emit parallel regions through the
1198 // OpenMPIRBuilder as it can do this setup internally.
1199 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1201 }
1202 ~PushAndPopStackRAII() {
1203 if (OMPBuilder)
1204 OMPBuilder->popFinalizationCB();
1205 }
1206 llvm::OpenMPIRBuilder *OMPBuilder;
1207};
1208} // namespace
1209
1210static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1211 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214 assert(ThreadIDVar->getType()->isPointerType() &&
1215 "thread id variable must be of type kmp_int32 *");
1216 CodeGenFunction CGF(CGM, true);
1217 bool HasCancel = false;
1218 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1219 HasCancel = OPD->hasCancel();
1220 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1223 HasCancel = OPSD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1235 HasCancel = OPFD->hasCancel();
1236
1237 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238 // parallel region to make cancellation barriers work properly.
1239 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242 HasCancel, OutlinedHelperName);
1243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1244 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, Loc: D.getBeginLoc());
1245}
1246
1247std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248 std::string Suffix = getName(Parts: {"omp_outlined"});
1249 return (Name + Suffix).str();
1250}
1251
1252std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1253 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1254}
1255
1256std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1258 return (Name + Suffix).str();
1259}
1260
1261llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1262 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1263 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264 const RegionCodeGenTy &CodeGen) {
1265 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1266 return emitParallelOrTeamsOutlinedFunction(
1267 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268 CodeGen);
1269}
1270
1271llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1272 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274 const RegionCodeGenTy &CodeGen) {
1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1276 return emitParallelOrTeamsOutlinedFunction(
1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278 CodeGen);
1279}
1280
1281llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285 bool Tied, unsigned &NumberOfParts) {
1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287 PrePostActionTy &) {
1288 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1290 llvm::Value *TaskArgs[] = {
1291 UpLoc, ThreadID,
1292 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1293 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1294 .getPointer(CGF)};
1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1297 TaskArgs);
1298 };
1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300 UntiedCodeGen);
1301 CodeGen.setAction(Action);
1302 assert(!ThreadIDVar->getType()->isPointerType() &&
1303 "thread id variable must be of type kmp_int32 for tasks");
1304 const OpenMPDirectiveKind Region =
1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306 : OMPD_task;
1307 const CapturedStmt *CS = D.getCapturedStmt(Region);
1308 bool HasCancel = false;
1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1316 HasCancel = TD->hasCancel();
1317
1318 CodeGenFunction CGF(CGM, true);
1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320 InnermostKind, HasCancel, Action);
1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1323 if (!Tied)
1324 NumberOfParts = Action.getNumberOfParts();
1325 return Res;
1326}
1327
1328void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1329 bool AtCurrentPoint) {
1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332
1333 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1334 if (AtCurrentPoint) {
1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337 } else {
1338 Elem.second.ServiceInsertPt =
1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.second.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt);
1341 }
1342}
1343
1344void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1346 if (Elem.second.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348 Elem.second.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1353static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1358 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1359 OS << ";" << PLoc.getFilename() << ";";
1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1361 OS << FD->getQualifiedNameAsString();
1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363 return OS.str();
1364}
1365
1366llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1367 SourceLocation Loc,
1368 unsigned Flags, bool EmitLoc) {
1369 uint32_t SrcLocStrSize;
1370 llvm::Constant *SrcLocStr;
1371 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372 llvm::codegenoptions::NoDebugInfo) ||
1373 Loc.isInvalid()) {
1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375 } else {
1376 std::string FunctionName;
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1378 FunctionName = FD->getQualifiedNameAsString();
1379 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1380 const char *FileName = PLoc.getFilename();
1381 unsigned Line = PLoc.getLine();
1382 unsigned Column = PLoc.getColumn();
1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384 Column, SrcLocStrSize);
1385 }
1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387 return OMPBuilder.getOrCreateIdent(
1388 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1389}
1390
1391llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1392 SourceLocation Loc) {
1393 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395 // the clang invariants used below might be broken.
1396 if (CGM.getLangOpts().OpenMPIRBuilder) {
1397 SmallString<128> Buffer;
1398 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1399 uint32_t SrcLocStrSize;
1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402 return OMPBuilder.getOrCreateThreadID(
1403 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404 }
1405
1406 llvm::Value *ThreadID = nullptr;
1407 // Check whether we've already cached a load of the thread id in this
1408 // function.
1409 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1410 if (I != OpenMPLocThreadIDMap.end()) {
1411 ThreadID = I->second.ThreadID;
1412 if (ThreadID != nullptr)
1413 return ThreadID;
1414 }
1415 // If exceptions are enabled, do not use parameter to avoid possible crash.
1416 if (auto *OMPRegionInfo =
1417 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1418 if (OMPRegionInfo->getThreadIDVariable()) {
1419 // Check if this an outlined function with thread id passed as argument.
1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423 !CGF.getLangOpts().CXXExceptions ||
1424 CGF.Builder.GetInsertBlock() == TopBlock ||
1425 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1426 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1427 TopBlock ||
1428 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1429 CGF.Builder.GetInsertBlock()) {
1430 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1431 // If value loaded in entry block, cache it and use it everywhere in
1432 // function.
1433 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1435 Elem.second.ThreadID = ThreadID;
1436 }
1437 return ThreadID;
1438 }
1439 }
1440 }
1441
1442 // This is not an outlined function region - need to call __kmpc_int32
1443 // kmpc_global_thread_num(ident_t *loc).
1444 // Generate thread id value and cache this value for use across the
1445 // function.
1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1447 if (!Elem.second.ServiceInsertPt)
1448 setLocThreadIdInsertPt(CGF);
1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1451 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1452 llvm::CallInst *Call = CGF.Builder.CreateCall(
1453 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1454 FnID: OMPRTL___kmpc_global_thread_num),
1455 Args: emitUpdateLocation(CGF, Loc));
1456 Call->setCallingConv(CGF.getRuntimeCC());
1457 Elem.second.ThreadID = Call;
1458 return Call;
1459}
1460
1461void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1462 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1464 clearLocThreadIdInsertPt(CGF);
1465 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1466 }
1467 if (FunctionUDRMap.count(Val: CGF.CurFn) > 0) {
1468 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469 UDRMap.erase(Val: D);
1470 FunctionUDRMap.erase(Val: CGF.CurFn);
1471 }
1472 auto I = FunctionUDMMap.find(Val: CGF.CurFn);
1473 if (I != FunctionUDMMap.end()) {
1474 for(const auto *D : I->second)
1475 UDMMap.erase(Val: D);
1476 FunctionUDMMap.erase(I);
1477 }
1478 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1479 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1480}
1481
1482llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1483 return OMPBuilder.IdentPtr;
1484}
1485
1486llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1487 if (!Kmpc_MicroTy) {
1488 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(ElementType: CGM.Int32Ty),
1490 llvm::PointerType::getUnqual(ElementType: CGM.Int32Ty)};
1491 Kmpc_MicroTy = llvm::FunctionType::get(Result: CGM.VoidTy, Params: MicroParams, isVarArg: true);
1492 }
1493 return llvm::PointerType::getUnqual(ElementType: Kmpc_MicroTy);
1494}
1495
1496llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1497convertDeviceClause(const VarDecl *VD) {
1498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500 if (!DevTy)
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502
1503 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504 case OMPDeclareTargetDeclAttr::DT_Host:
1505 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506 break;
1507 case OMPDeclareTargetDeclAttr::DT_NoHost:
1508 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509 break;
1510 case OMPDeclareTargetDeclAttr::DT_Any:
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512 break;
1513 default:
1514 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515 break;
1516 }
1517}
1518
1519llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1520convertCaptureClause(const VarDecl *VD) {
1521 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523 if (!MapType)
1524 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528 break;
1529 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531 break;
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534 break;
1535 default:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537 break;
1538 }
1539}
1540
1541static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544
1545 auto FileInfoCallBack = [&]() {
1546 SourceManager &SM = CGM.getContext().getSourceManager();
1547 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1548
1549 llvm::sys::fs::UniqueID ID;
1550 if (llvm::sys::fs::getUniqueID(Path: PLoc.getFilename(), Result&: ID)) {
1551 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1552 }
1553
1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555 };
1556
1557 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack, ParentName);
1558}
1559
1560Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1562
1563 auto LinkageForVariable = [&VD, this]() {
1564 return CGM.getLLVMLinkageVarDefinition(VD);
1565 };
1566
1567 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1570 T: CGM.getContext().getPointerType(VD->getType()));
1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1572 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1573 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1574 IsExternallyVisible: VD->isExternallyVisible(),
1575 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1576 VD->getCanonicalDecl()->getBeginLoc()),
1577 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1578 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1579 VariableLinkage: LinkageForVariable);
1580
1581 if (!addr)
1582 return Address::invalid();
1583 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1587CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1589 !CGM.getContext().getTargetInfo().isTLSSupported());
1590 // Lookup the entry, lazily creating it if necessary.
1591 std::string Suffix = getName(Parts: {"cache", ""});
1592 return OMPBuilder.getOrCreateInternalVariable(
1593 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1594}
1595
1596Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1597 const VarDecl *VD,
1598 Address VDAddr,
1599 SourceLocation Loc) {
1600 if (CGM.getLangOpts().OpenMPUseTLS &&
1601 CGM.getContext().getTargetInfo().isTLSSupported())
1602 return VDAddr;
1603
1604 llvm::Type *VarTy = VDAddr.getElementType();
1605 llvm::Value *Args[] = {
1606 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607 CGF.Builder.CreatePointerCast(V: VDAddr.getPointer(), DestTy: CGM.Int8PtrTy),
1608 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1609 getOrCreateThreadPrivateCache(VD)};
1610 return Address(
1611 CGF.EmitRuntimeCall(
1612 callee: OMPBuilder.getOrCreateRuntimeFunction(
1613 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1614 args: Args),
1615 CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1618void CGOpenMPRuntime::emitThreadPrivateVarInit(
1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622 // library.
1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1625 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1626 args: OMPLoc);
1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628 // to register constructor/destructor for variable.
1629 llvm::Value *Args[] = {
1630 OMPLoc, CGF.Builder.CreatePointerCast(V: VDAddr.getPointer(), DestTy: CGM.VoidPtrTy),
1631 Ctor, CopyCtor, Dtor};
1632 CGF.EmitRuntimeCall(
1633 callee: OMPBuilder.getOrCreateRuntimeFunction(
1634 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1635 args: Args);
1636}
1637
1638llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1639 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1640 bool PerformInit, CodeGenFunction *CGF) {
1641 if (CGM.getLangOpts().OpenMPUseTLS &&
1642 CGM.getContext().getTargetInfo().isTLSSupported())
1643 return nullptr;
1644
1645 VD = VD->getDefinition(C&: CGM.getContext());
1646 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1647 QualType ASTTy = VD->getType();
1648
1649 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1650 const Expr *Init = VD->getAnyInitializer();
1651 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1652 // Generate function that re-emits the declaration's initializer into the
1653 // threadprivate copy of the variable VD
1654 CodeGenFunction CtorCGF(CGM);
1655 FunctionArgList Args;
1656 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1657 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1658 ImplicitParamKind::Other);
1659 Args.push_back(&Dst);
1660
1661 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1662 CGM.getContext().VoidPtrTy, Args);
1663 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1664 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1665 llvm::Function *Fn =
1666 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI: FI, Loc);
1667 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1668 Args, Loc, StartLoc: Loc);
1669 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1670 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671 CGM.getContext().VoidPtrTy, Dst.getLocation());
1672 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1673 VDAddr.getAlignment());
1674 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1675 /*IsInitializer=*/true);
1676 ArgVal = CtorCGF.EmitLoadOfScalar(
1677 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1678 CGM.getContext().VoidPtrTy, Dst.getLocation());
1679 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1680 CtorCGF.FinishFunction();
1681 Ctor = Fn;
1682 }
1683 if (VD->getType().isDestructedType() != QualType::DK_none) {
1684 // Generate function that emits destructor call for the threadprivate copy
1685 // of the variable VD
1686 CodeGenFunction DtorCGF(CGM);
1687 FunctionArgList Args;
1688 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1689 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1690 ImplicitParamKind::Other);
1691 Args.push_back(&Dst);
1692
1693 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1694 CGM.getContext().VoidTy, Args);
1695 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1696 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1697 llvm::Function *Fn =
1698 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI: FI, Loc);
1699 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1700 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1701 Loc, StartLoc: Loc);
1702 // Create a scope with an artificial location for the body of this function.
1703 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1704 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1705 DtorCGF.GetAddrOfLocalVar(&Dst),
1706 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1707 DtorCGF.emitDestroy(
1708 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1709 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1710 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1711 DtorCGF.FinishFunction();
1712 Dtor = Fn;
1713 }
1714 // Do not emit init function if it is not required.
1715 if (!Ctor && !Dtor)
1716 return nullptr;
1717
1718 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1719 auto *CopyCtorTy = llvm::FunctionType::get(Result: CGM.VoidPtrTy, Params: CopyCtorTyArgs,
1720 /*isVarArg=*/false)
1721 ->getPointerTo();
1722 // Copying constructor for the threadprivate variable.
1723 // Must be NULL - reserved by runtime, but currently it requires that this
1724 // parameter is always NULL. Otherwise it fires assertion.
1725 CopyCtor = llvm::Constant::getNullValue(Ty: CopyCtorTy);
1726 if (Ctor == nullptr) {
1727 auto *CtorTy = llvm::FunctionType::get(Result: CGM.VoidPtrTy, Params: CGM.VoidPtrTy,
1728 /*isVarArg=*/false)
1729 ->getPointerTo();
1730 Ctor = llvm::Constant::getNullValue(Ty: CtorTy);
1731 }
1732 if (Dtor == nullptr) {
1733 auto *DtorTy = llvm::FunctionType::get(Result: CGM.VoidTy, Params: CGM.VoidPtrTy,
1734 /*isVarArg=*/false)
1735 ->getPointerTo();
1736 Dtor = llvm::Constant::getNullValue(Ty: DtorTy);
1737 }
1738 if (!CGF) {
1739 auto *InitFunctionTy =
1740 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1741 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1742 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1743 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1744 CodeGenFunction InitCGF(CGM);
1745 FunctionArgList ArgList;
1746 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1747 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1748 Loc, StartLoc: Loc);
1749 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1750 InitCGF.FinishFunction();
1751 return InitFunction;
1752 }
1753 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1754 }
1755 return nullptr;
1756}
1757
1758void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1759 llvm::GlobalValue *GV) {
1760 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1761 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1762
1763 // We only need to handle active 'indirect' declare target functions.
1764 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1765 return;
1766
1767 // Get a mangled name to store the new device global in.
1768 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1769 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1770 SmallString<128> Name;
1771 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1772
1773 // We need to generate a new global to hold the address of the indirectly
1774 // called device function. Doing this allows us to keep the visibility and
1775 // linkage of the associated function unchanged while allowing the runtime to
1776 // access its value.
1777 llvm::GlobalValue *Addr = GV;
1778 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1779 Addr = new llvm::GlobalVariable(
1780 CGM.getModule(), CGM.VoidPtrTy,
1781 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1782 nullptr, llvm::GlobalValue::NotThreadLocal,
1783 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1784 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1785 }
1786
1787 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1788 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1789 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790 Linkage: llvm::GlobalValue::WeakODRLinkage);
1791}
1792
1793Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1794 QualType VarType,
1795 StringRef Name) {
1796 std::string Suffix = getName(Parts: {"artificial", ""});
1797 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1798 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1799 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1800 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1801 CGM.getTarget().isTLSSupported()) {
1802 GAddr->setThreadLocal(/*Val=*/true);
1803 return Address(GAddr, GAddr->getValueType(),
1804 CGM.getContext().getTypeAlignInChars(T: VarType));
1805 }
1806 std::string CacheSuffix = getName(Parts: {"cache", ""});
1807 llvm::Value *Args[] = {
1808 emitUpdateLocation(CGF, Loc: SourceLocation()),
1809 getThreadID(CGF, Loc: SourceLocation()),
1810 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1811 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1812 /*isSigned=*/false),
1813 OMPBuilder.getOrCreateInternalVariable(
1814 Ty: CGM.VoidPtrPtrTy,
1815 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1816 return Address(
1817 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1818 V: CGF.EmitRuntimeCall(
1819 callee: OMPBuilder.getOrCreateRuntimeFunction(
1820 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1821 args: Args),
1822 DestTy: VarLVType->getPointerTo(/*AddrSpace=*/0)),
1823 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1824}
1825
1826void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1827 const RegionCodeGenTy &ThenGen,
1828 const RegionCodeGenTy &ElseGen) {
1829 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1830
1831 // If the condition constant folds and can be elided, try to avoid emitting
1832 // the condition and the dead arm of the if/else.
1833 bool CondConstant;
1834 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1835 if (CondConstant)
1836 ThenGen(CGF);
1837 else
1838 ElseGen(CGF);
1839 return;
1840 }
1841
1842 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1843 // emit the conditional branch.
1844 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1845 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1846 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1847 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1848
1849 // Emit the 'then' code.
1850 CGF.EmitBlock(BB: ThenBlock);
1851 ThenGen(CGF);
1852 CGF.EmitBranch(Block: ContBlock);
1853 // Emit the 'else' code if present.
1854 // There is no need to emit line number for unconditional branch.
1855 (void)ApplyDebugLocation::CreateEmpty(CGF);
1856 CGF.EmitBlock(BB: ElseBlock);
1857 ElseGen(CGF);
1858 // There is no need to emit line number for unconditional branch.
1859 (void)ApplyDebugLocation::CreateEmpty(CGF);
1860 CGF.EmitBranch(Block: ContBlock);
1861 // Emit the continuation block for code after the if.
1862 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1863}
1864
1865void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1866 llvm::Function *OutlinedFn,
1867 ArrayRef<llvm::Value *> CapturedVars,
1868 const Expr *IfCond,
1869 llvm::Value *NumThreads) {
1870 if (!CGF.HaveInsertPoint())
1871 return;
1872 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1873 auto &M = CGM.getModule();
1874 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1875 this](CodeGenFunction &CGF, PrePostActionTy &) {
1876 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1877 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1878 llvm::Value *Args[] = {
1879 RTLoc,
1880 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1881 CGF.Builder.CreateBitCast(V: OutlinedFn, DestTy: RT.getKmpc_MicroPointerTy())};
1882 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1883 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1884 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1885
1886 llvm::FunctionCallee RTLFn =
1887 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1888 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1889 };
1890 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1891 this](CodeGenFunction &CGF, PrePostActionTy &) {
1892 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1893 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1894 // Build calls:
1895 // __kmpc_serialized_parallel(&Loc, GTid);
1896 llvm::Value *Args[] = {RTLoc, ThreadID};
1897 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1898 M, FnID: OMPRTL___kmpc_serialized_parallel),
1899 args: Args);
1900
1901 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1902 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1903 Address ZeroAddrBound =
1904 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
1905 /*Name=*/".bound.zero.addr");
1906 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
1907 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1908 // ThreadId for serialized parallels is 0.
1909 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.getPointer());
1910 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
1911 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1912
1913 // Ensure we do not inline the function. This is trivially true for the ones
1914 // passed to __kmpc_fork_call but the ones called in serialized regions
1915 // could be inlined. This is not a perfect but it is closer to the invariant
1916 // we want, namely, every data environment starts with a new function.
1917 // TODO: We should pass the if condition to the runtime function and do the
1918 // handling there. Much cleaner code.
1919 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1920 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1921 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
1922
1923 // __kmpc_end_serialized_parallel(&Loc, GTid);
1924 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1925 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1926 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
1927 args: EndArgs);
1928 };
1929 if (IfCond) {
1930 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
1931 } else {
1932 RegionCodeGenTy ThenRCG(ThenGen);
1933 ThenRCG(CGF);
1934 }
1935}
1936
1937// If we're inside an (outlined) parallel region, use the region info's
1938// thread-ID variable (it is passed in a first argument of the outlined function
1939// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1940// regular serial code region, get thread ID by calling kmp_int32
1941// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1942// return the address of that temp.
1943Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1944 SourceLocation Loc) {
1945 if (auto *OMPRegionInfo =
1946 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
1947 if (OMPRegionInfo->getThreadIDVariable())
1948 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1949
1950 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1951 QualType Int32Ty =
1952 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1953 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
1954 CGF.EmitStoreOfScalar(value: ThreadID,
1955 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
1956
1957 return ThreadIDTemp;
1958}
1959
1960llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1961 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1962 std::string Name = getName(Parts: {Prefix, "var"});
1963 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
1964}
1965
1966namespace {
1967/// Common pre(post)-action for different OpenMP constructs.
1968class CommonActionTy final : public PrePostActionTy {
1969 llvm::FunctionCallee EnterCallee;
1970 ArrayRef<llvm::Value *> EnterArgs;
1971 llvm::FunctionCallee ExitCallee;
1972 ArrayRef<llvm::Value *> ExitArgs;
1973 bool Conditional;
1974 llvm::BasicBlock *ContBlock = nullptr;
1975
1976public:
1977 CommonActionTy(llvm::FunctionCallee EnterCallee,
1978 ArrayRef<llvm::Value *> EnterArgs,
1979 llvm::FunctionCallee ExitCallee,
1980 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1981 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1982 ExitArgs(ExitArgs), Conditional(Conditional) {}
1983 void Enter(CodeGenFunction &CGF) override {
1984 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
1985 if (Conditional) {
1986 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
1987 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1988 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1989 // Generate the branch (If-stmt)
1990 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
1991 CGF.EmitBlock(BB: ThenBlock);
1992 }
1993 }
1994 void Done(CodeGenFunction &CGF) {
1995 // Emit the rest of blocks/branches
1996 CGF.EmitBranch(Block: ContBlock);
1997 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
1998 }
1999 void Exit(CodeGenFunction &CGF) override {
2000 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
2001 }
2002};
2003} // anonymous namespace
2004
2005void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2006 StringRef CriticalName,
2007 const RegionCodeGenTy &CriticalOpGen,
2008 SourceLocation Loc, const Expr *Hint) {
2009 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2010 // CriticalOpGen();
2011 // __kmpc_end_critical(ident_t *, gtid, Lock);
2012 // Prepare arguments and build a call to __kmpc_critical
2013 if (!CGF.HaveInsertPoint())
2014 return;
2015 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2016 getCriticalRegionLock(CriticalName)};
2017 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2018 std::end(arr&: Args));
2019 if (Hint) {
2020 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2021 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2022 }
2023 CommonActionTy Action(
2024 OMPBuilder.getOrCreateRuntimeFunction(
2025 M&: CGM.getModule(),
2026 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2027 EnterArgs,
2028 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2029 FnID: OMPRTL___kmpc_end_critical),
2030 Args);
2031 CriticalOpGen.setAction(Action);
2032 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2033}
2034
2035void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2036 const RegionCodeGenTy &MasterOpGen,
2037 SourceLocation Loc) {
2038 if (!CGF.HaveInsertPoint())
2039 return;
2040 // if(__kmpc_master(ident_t *, gtid)) {
2041 // MasterOpGen();
2042 // __kmpc_end_master(ident_t *, gtid);
2043 // }
2044 // Prepare arguments and build a call to __kmpc_master
2045 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2046 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2047 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2048 Args,
2049 OMPBuilder.getOrCreateRuntimeFunction(
2050 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2051 Args,
2052 /*Conditional=*/true);
2053 MasterOpGen.setAction(Action);
2054 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2055 Action.Done(CGF);
2056}
2057
2058void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2059 const RegionCodeGenTy &MaskedOpGen,
2060 SourceLocation Loc, const Expr *Filter) {
2061 if (!CGF.HaveInsertPoint())
2062 return;
2063 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2064 // MaskedOpGen();
2065 // __kmpc_end_masked(iden_t *, gtid);
2066 // }
2067 // Prepare arguments and build a call to __kmpc_masked
2068 llvm::Value *FilterVal = Filter
2069 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2070 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2071 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2072 FilterVal};
2073 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2074 getThreadID(CGF, Loc)};
2075 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2076 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2077 Args,
2078 OMPBuilder.getOrCreateRuntimeFunction(
2079 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2080 ArgsEnd,
2081 /*Conditional=*/true);
2082 MaskedOpGen.setAction(Action);
2083 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2084 Action.Done(CGF);
2085}
2086
2087void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2088 SourceLocation Loc) {
2089 if (!CGF.HaveInsertPoint())
2090 return;
2091 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2092 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2093 } else {
2094 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2095 llvm::Value *Args[] = {
2096 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2097 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2098 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2099 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2100 args: Args);
2101 }
2102
2103 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2104 Region->emitUntiedSwitch(CGF);
2105}
2106
2107void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2108 const RegionCodeGenTy &TaskgroupOpGen,
2109 SourceLocation Loc) {
2110 if (!CGF.HaveInsertPoint())
2111 return;
2112 // __kmpc_taskgroup(ident_t *, gtid);
2113 // TaskgroupOpGen();
2114 // __kmpc_end_taskgroup(ident_t *, gtid);
2115 // Prepare arguments and build a call to __kmpc_taskgroup
2116 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2117 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2118 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2119 Args,
2120 OMPBuilder.getOrCreateRuntimeFunction(
2121 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2122 Args);
2123 TaskgroupOpGen.setAction(Action);
2124 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2125}
2126
2127/// Given an array of pointers to variables, project the address of a
2128/// given variable.
2129static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2130 unsigned Index, const VarDecl *Var) {
2131 // Pull out the pointer to the variable.
2132 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2133 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2134
2135 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2136 return Address(
2137 CGF.Builder.CreateBitCast(
2138 V: Ptr, DestTy: ElemTy->getPointerTo(AddrSpace: Ptr->getType()->getPointerAddressSpace())),
2139 ElemTy, CGF.getContext().getDeclAlign(Var));
2140}
2141
2142static llvm::Value *emitCopyprivateCopyFunction(
2143 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2144 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2145 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2146 SourceLocation Loc) {
2147 ASTContext &C = CGM.getContext();
2148 // void copy_func(void *LHSArg, void *RHSArg);
2149 FunctionArgList Args;
2150 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2151 ImplicitParamKind::Other);
2152 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2153 ImplicitParamKind::Other);
2154 Args.push_back(&LHSArg);
2155 Args.push_back(&RHSArg);
2156 const auto &CGFI =
2157 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2158 std::string Name =
2159 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2160 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2161 llvm::GlobalValue::InternalLinkage, Name,
2162 &CGM.getModule());
2163 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2164 Fn->setDoesNotRecurse();
2165 CodeGenFunction CGF(CGM);
2166 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2167 // Dest = (void*[n])(LHSArg);
2168 // Src = (void*[n])(RHSArg);
2169 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2170 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&LHSArg)),
2171 DestTy: ArgsElemType->getPointerTo()),
2172 ArgsElemType, CGF.getPointerAlign());
2173 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2174 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&RHSArg)),
2175 DestTy: ArgsElemType->getPointerTo()),
2176 ArgsElemType, CGF.getPointerAlign());
2177 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2178 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2179 // ...
2180 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2181 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2182 const auto *DestVar =
2183 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2184 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2185
2186 const auto *SrcVar =
2187 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2188 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2189
2190 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2191 QualType Type = VD->getType();
2192 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2193 }
2194 CGF.FinishFunction();
2195 return Fn;
2196}
2197
2198void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2199 const RegionCodeGenTy &SingleOpGen,
2200 SourceLocation Loc,
2201 ArrayRef<const Expr *> CopyprivateVars,
2202 ArrayRef<const Expr *> SrcExprs,
2203 ArrayRef<const Expr *> DstExprs,
2204 ArrayRef<const Expr *> AssignmentOps) {
2205 if (!CGF.HaveInsertPoint())
2206 return;
2207 assert(CopyprivateVars.size() == SrcExprs.size() &&
2208 CopyprivateVars.size() == DstExprs.size() &&
2209 CopyprivateVars.size() == AssignmentOps.size());
2210 ASTContext &C = CGM.getContext();
2211 // int32 did_it = 0;
2212 // if(__kmpc_single(ident_t *, gtid)) {
2213 // SingleOpGen();
2214 // __kmpc_end_single(ident_t *, gtid);
2215 // did_it = 1;
2216 // }
2217 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2218 // <copy_func>, did_it);
2219
2220 Address DidIt = Address::invalid();
2221 if (!CopyprivateVars.empty()) {
2222 // int32 did_it = 0;
2223 QualType KmpInt32Ty =
2224 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2225 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2226 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2227 }
2228 // Prepare arguments and build a call to __kmpc_single
2229 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2230 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2231 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2232 Args,
2233 OMPBuilder.getOrCreateRuntimeFunction(
2234 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2235 Args,
2236 /*Conditional=*/true);
2237 SingleOpGen.setAction(Action);
2238 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2239 if (DidIt.isValid()) {
2240 // did_it = 1;
2241 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2242 }
2243 Action.Done(CGF);
2244 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2245 // <copy_func>, did_it);
2246 if (DidIt.isValid()) {
2247 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2248 QualType CopyprivateArrayTy = C.getConstantArrayType(
2249 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2250 /*IndexTypeQuals=*/0);
2251 // Create a list of all private variables for copyprivate.
2252 Address CopyprivateList =
2253 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2254 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2255 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2256 CGF.Builder.CreateStore(
2257 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2258 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2259 DestTy: CGF.VoidPtrTy),
2260 Addr: Elem);
2261 }
2262 // Build function that copies private values from single region to all other
2263 // threads in the corresponding parallel region.
2264 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2265 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2266 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2267 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2268 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2269 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2270 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2271 llvm::Value *Args[] = {
2272 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2273 getThreadID(CGF, Loc), // i32 <gtid>
2274 BufSize, // size_t <buf_size>
2275 CL.getPointer(), // void *<copyprivate list>
2276 CpyFn, // void (*) (void *, void *) <copy_func>
2277 DidItVal // i32 did_it
2278 };
2279 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2280 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2281 args: Args);
2282 }
2283}
2284
2285void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2286 const RegionCodeGenTy &OrderedOpGen,
2287 SourceLocation Loc, bool IsThreads) {
2288 if (!CGF.HaveInsertPoint())
2289 return;
2290 // __kmpc_ordered(ident_t *, gtid);
2291 // OrderedOpGen();
2292 // __kmpc_end_ordered(ident_t *, gtid);
2293 // Prepare arguments and build a call to __kmpc_ordered
2294 if (IsThreads) {
2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2296 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2297 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2298 Args,
2299 OMPBuilder.getOrCreateRuntimeFunction(
2300 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2301 Args);
2302 OrderedOpGen.setAction(Action);
2303 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2304 return;
2305 }
2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307}
2308
2309unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2310 unsigned Flags;
2311 if (Kind == OMPD_for)
2312 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2313 else if (Kind == OMPD_sections)
2314 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2315 else if (Kind == OMPD_single)
2316 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2317 else if (Kind == OMPD_barrier)
2318 Flags = OMP_IDENT_BARRIER_EXPL;
2319 else
2320 Flags = OMP_IDENT_BARRIER_IMPL;
2321 return Flags;
2322}
2323
2324void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2325 CodeGenFunction &CGF, const OMPLoopDirective &S,
2326 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2327 // Check if the loop directive is actually a doacross loop directive. In this
2328 // case choose static, 1 schedule.
2329 if (llvm::any_of(
2330 S.getClausesOfKind<OMPOrderedClause>(),
2331 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2332 ScheduleKind = OMPC_SCHEDULE_static;
2333 // Chunk size is 1 in this case.
2334 llvm::APInt ChunkSize(32, 1);
2335 ChunkExpr = IntegerLiteral::Create(
2336 C: CGF.getContext(), V: ChunkSize,
2337 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2338 l: SourceLocation());
2339 }
2340}
2341
2342void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2343 OpenMPDirectiveKind Kind, bool EmitChecks,
2344 bool ForceSimpleCall) {
2345 // Check if we should use the OMPBuilder
2346 auto *OMPRegionInfo =
2347 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2348 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2349 CGF.Builder.restoreIP(IP: OMPBuilder.createBarrier(
2350 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2351 return;
2352 }
2353
2354 if (!CGF.HaveInsertPoint())
2355 return;
2356 // Build call __kmpc_cancel_barrier(loc, thread_id);
2357 // Build call __kmpc_barrier(loc, thread_id);
2358 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2359 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2360 // thread_id);
2361 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2362 getThreadID(CGF, Loc)};
2363 if (OMPRegionInfo) {
2364 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2365 llvm::Value *Result = CGF.EmitRuntimeCall(
2366 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2367 FnID: OMPRTL___kmpc_cancel_barrier),
2368 args: Args);
2369 if (EmitChecks) {
2370 // if (__kmpc_cancel_barrier()) {
2371 // exit from construct;
2372 // }
2373 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2374 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2375 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2376 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2377 CGF.EmitBlock(BB: ExitBB);
2378 // exit from construct;
2379 CodeGenFunction::JumpDest CancelDestination =
2380 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2381 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2382 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2383 }
2384 return;
2385 }
2386 }
2387 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2388 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2389 args: Args);
2390}
2391
2392void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2393 Expr *ME, bool IsFatal) {
2394 llvm::Value *MVL =
2395 ME ? CGF.EmitStringLiteralLValue(E: cast<StringLiteral>(Val: ME)).getPointer(CGF)
2396 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2397 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2398 // *message)
2399 llvm::Value *Args[] = {
2400 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2401 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2402 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2403 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2404 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2405 args: Args);
2406}
2407
2408/// Map the OpenMP loop schedule to the runtime enumeration.
2409static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2410 bool Chunked, bool Ordered) {
2411 switch (ScheduleKind) {
2412 case OMPC_SCHEDULE_static:
2413 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2414 : (Ordered ? OMP_ord_static : OMP_sch_static);
2415 case OMPC_SCHEDULE_dynamic:
2416 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2417 case OMPC_SCHEDULE_guided:
2418 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2419 case OMPC_SCHEDULE_runtime:
2420 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2421 case OMPC_SCHEDULE_auto:
2422 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2423 case OMPC_SCHEDULE_unknown:
2424 assert(!Chunked && "chunk was specified but schedule kind not known");
2425 return Ordered ? OMP_ord_static : OMP_sch_static;
2426 }
2427 llvm_unreachable("Unexpected runtime schedule");
2428}
2429
2430/// Map the OpenMP distribute schedule to the runtime enumeration.
2431static OpenMPSchedType
2432getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2433 // only static is allowed for dist_schedule
2434 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2435}
2436
2437bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2438 bool Chunked) const {
2439 OpenMPSchedType Schedule =
2440 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2441 return Schedule == OMP_sch_static;
2442}
2443
2444bool CGOpenMPRuntime::isStaticNonchunked(
2445 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2446 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2447 return Schedule == OMP_dist_sch_static;
2448}
2449
2450bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2451 bool Chunked) const {
2452 OpenMPSchedType Schedule =
2453 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2454 return Schedule == OMP_sch_static_chunked;
2455}
2456
2457bool CGOpenMPRuntime::isStaticChunked(
2458 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2459 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2460 return Schedule == OMP_dist_sch_static_chunked;
2461}
2462
2463bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2464 OpenMPSchedType Schedule =
2465 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2466 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2467 return Schedule != OMP_sch_static;
2468}
2469
2470static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2471 OpenMPScheduleClauseModifier M1,
2472 OpenMPScheduleClauseModifier M2) {
2473 int Modifier = 0;
2474 switch (M1) {
2475 case OMPC_SCHEDULE_MODIFIER_monotonic:
2476 Modifier = OMP_sch_modifier_monotonic;
2477 break;
2478 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2479 Modifier = OMP_sch_modifier_nonmonotonic;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_simd:
2482 if (Schedule == OMP_sch_static_chunked)
2483 Schedule = OMP_sch_static_balanced_chunked;
2484 break;
2485 case OMPC_SCHEDULE_MODIFIER_last:
2486 case OMPC_SCHEDULE_MODIFIER_unknown:
2487 break;
2488 }
2489 switch (M2) {
2490 case OMPC_SCHEDULE_MODIFIER_monotonic:
2491 Modifier = OMP_sch_modifier_monotonic;
2492 break;
2493 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2494 Modifier = OMP_sch_modifier_nonmonotonic;
2495 break;
2496 case OMPC_SCHEDULE_MODIFIER_simd:
2497 if (Schedule == OMP_sch_static_chunked)
2498 Schedule = OMP_sch_static_balanced_chunked;
2499 break;
2500 case OMPC_SCHEDULE_MODIFIER_last:
2501 case OMPC_SCHEDULE_MODIFIER_unknown:
2502 break;
2503 }
2504 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2505 // If the static schedule kind is specified or if the ordered clause is
2506 // specified, and if the nonmonotonic modifier is not specified, the effect is
2507 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2508 // modifier is specified, the effect is as if the nonmonotonic modifier is
2509 // specified.
2510 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2511 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2512 Schedule == OMP_sch_static_balanced_chunked ||
2513 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2514 Schedule == OMP_dist_sch_static_chunked ||
2515 Schedule == OMP_dist_sch_static))
2516 Modifier = OMP_sch_modifier_nonmonotonic;
2517 }
2518 return Schedule | Modifier;
2519}
2520
2521void CGOpenMPRuntime::emitForDispatchInit(
2522 CodeGenFunction &CGF, SourceLocation Loc,
2523 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2524 bool Ordered, const DispatchRTInput &DispatchValues) {
2525 if (!CGF.HaveInsertPoint())
2526 return;
2527 OpenMPSchedType Schedule = getRuntimeSchedule(
2528 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2529 assert(Ordered ||
2530 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2531 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2532 Schedule != OMP_sch_static_balanced_chunked));
2533 // Call __kmpc_dispatch_init(
2534 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2535 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2536 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2537
2538 // If the Chunk was not specified in the clause - use default value 1.
2539 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2540 : CGF.Builder.getIntN(N: IVSize, C: 1);
2541 llvm::Value *Args[] = {
2542 emitUpdateLocation(CGF, Loc),
2543 getThreadID(CGF, Loc),
2544 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2545 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2546 DispatchValues.LB, // Lower
2547 DispatchValues.UB, // Upper
2548 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2549 Chunk // Chunk
2550 };
2551 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2552 args: Args);
2553}
2554
2555static void emitForStaticInitCall(
2556 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2557 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2558 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2559 const CGOpenMPRuntime::StaticRTInput &Values) {
2560 if (!CGF.HaveInsertPoint())
2561 return;
2562
2563 assert(!Values.Ordered);
2564 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2565 Schedule == OMP_sch_static_balanced_chunked ||
2566 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2567 Schedule == OMP_dist_sch_static ||
2568 Schedule == OMP_dist_sch_static_chunked);
2569
2570 // Call __kmpc_for_static_init(
2571 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2572 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2573 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2574 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2575 llvm::Value *Chunk = Values.Chunk;
2576 if (Chunk == nullptr) {
2577 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2578 Schedule == OMP_dist_sch_static) &&
2579 "expected static non-chunked schedule");
2580 // If the Chunk was not specified in the clause - use default value 1.
2581 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2582 } else {
2583 assert((Schedule == OMP_sch_static_chunked ||
2584 Schedule == OMP_sch_static_balanced_chunked ||
2585 Schedule == OMP_ord_static_chunked ||
2586 Schedule == OMP_dist_sch_static_chunked) &&
2587 "expected static chunked schedule");
2588 }
2589 llvm::Value *Args[] = {
2590 UpdateLocation,
2591 ThreadId,
2592 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2593 M2)), // Schedule type
2594 Values.IL.getPointer(), // &isLastIter
2595 Values.LB.getPointer(), // &LB
2596 Values.UB.getPointer(), // &UB
2597 Values.ST.getPointer(), // &Stride
2598 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2599 Chunk // Chunk
2600 };
2601 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2602}
2603
2604void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2605 SourceLocation Loc,
2606 OpenMPDirectiveKind DKind,
2607 const OpenMPScheduleTy &ScheduleKind,
2608 const StaticRTInput &Values) {
2609 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2610 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2611 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2612 "Expected loop-based or sections-based directive.");
2613 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2614 isOpenMPLoopDirective(DKind)
2615 ? OMP_IDENT_WORK_LOOP
2616 : OMP_IDENT_WORK_SECTIONS);
2617 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2618 llvm::FunctionCallee StaticInitFunction =
2619 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2620 IsGPUDistribute: false);
2621 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2622 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2623 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2624}
2625
2626void CGOpenMPRuntime::emitDistributeStaticInit(
2627 CodeGenFunction &CGF, SourceLocation Loc,
2628 OpenMPDistScheduleClauseKind SchedKind,
2629 const CGOpenMPRuntime::StaticRTInput &Values) {
2630 OpenMPSchedType ScheduleNum =
2631 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2632 llvm::Value *UpdatedLocation =
2633 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2634 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2635 llvm::FunctionCallee StaticInitFunction;
2636 bool isGPUDistribute =
2637 CGM.getLangOpts().OpenMPIsTargetDevice &&
2638 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2639 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2640 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2641
2642 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2643 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2644 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2645}
2646
2647void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2648 SourceLocation Loc,
2649 OpenMPDirectiveKind DKind) {
2650 if (!CGF.HaveInsertPoint())
2651 return;
2652 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2653 llvm::Value *Args[] = {
2654 emitUpdateLocation(CGF, Loc,
2655 isOpenMPDistributeDirective(DKind)
2656 ? OMP_IDENT_WORK_DISTRIBUTE
2657 : isOpenMPLoopDirective(DKind)
2658 ? OMP_IDENT_WORK_LOOP
2659 : OMP_IDENT_WORK_SECTIONS),
2660 getThreadID(CGF, Loc)};
2661 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2662 if (isOpenMPDistributeDirective(DKind) &&
2663 CGM.getLangOpts().OpenMPIsTargetDevice &&
2664 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2665 CGF.EmitRuntimeCall(
2666 OMPBuilder.getOrCreateRuntimeFunction(
2667 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2668 Args);
2669 else
2670 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2671 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2672 Args);
2673}
2674
2675void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2676 SourceLocation Loc,
2677 unsigned IVSize,
2678 bool IVSigned) {
2679 if (!CGF.HaveInsertPoint())
2680 return;
2681 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2682 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2683 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2684 args: Args);
2685}
2686
2687llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2688 SourceLocation Loc, unsigned IVSize,
2689 bool IVSigned, Address IL,
2690 Address LB, Address UB,
2691 Address ST) {
2692 // Call __kmpc_dispatch_next(
2693 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2694 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2695 // kmp_int[32|64] *p_stride);
2696 llvm::Value *Args[] = {
2697 emitUpdateLocation(CGF, Loc),
2698 getThreadID(CGF, Loc),
2699 IL.getPointer(), // &isLastIter
2700 LB.getPointer(), // &Lower
2701 UB.getPointer(), // &Upper
2702 ST.getPointer() // &Stride
2703 };
2704 llvm::Value *Call = CGF.EmitRuntimeCall(
2705 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2706 return CGF.EmitScalarConversion(
2707 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2708 DstTy: CGF.getContext().BoolTy, Loc);
2709}
2710
2711void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2712 llvm::Value *NumThreads,
2713 SourceLocation Loc) {
2714 if (!CGF.HaveInsertPoint())
2715 return;
2716 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2717 llvm::Value *Args[] = {
2718 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2719 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)};
2720 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2721 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_threads),
2722 args: Args);
2723}
2724
2725void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2726 ProcBindKind ProcBind,
2727 SourceLocation Loc) {
2728 if (!CGF.HaveInsertPoint())
2729 return;
2730 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2731 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2732 llvm::Value *Args[] = {
2733 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2734 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2735 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2736 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2737 args: Args);
2738}
2739
2740void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2741 SourceLocation Loc, llvm::AtomicOrdering AO) {
2742 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2743 OMPBuilder.createFlush(Loc: CGF.Builder);
2744 } else {
2745 if (!CGF.HaveInsertPoint())
2746 return;
2747 // Build call void __kmpc_flush(ident_t *loc)
2748 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2749 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2750 args: emitUpdateLocation(CGF, Loc));
2751 }
2752}
2753
2754namespace {
2755/// Indexes of fields for type kmp_task_t.
2756enum KmpTaskTFields {
2757 /// List of shared variables.
2758 KmpTaskTShareds,
2759 /// Task routine.
2760 KmpTaskTRoutine,
2761 /// Partition id for the untied tasks.
2762 KmpTaskTPartId,
2763 /// Function with call of destructors for private variables.
2764 Data1,
2765 /// Task priority.
2766 Data2,
2767 /// (Taskloops only) Lower bound.
2768 KmpTaskTLowerBound,
2769 /// (Taskloops only) Upper bound.
2770 KmpTaskTUpperBound,
2771 /// (Taskloops only) Stride.
2772 KmpTaskTStride,
2773 /// (Taskloops only) Is last iteration flag.
2774 KmpTaskTLastIter,
2775 /// (Taskloops only) Reduction data.
2776 KmpTaskTReductions,
2777};
2778} // anonymous namespace
2779
2780void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2781 // If we are in simd mode or there are no entries, we don't need to do
2782 // anything.
2783 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2784 return;
2785
2786 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2787 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2788 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2789 SourceLocation Loc;
2790 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2791 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2792 E = CGM.getContext().getSourceManager().fileinfo_end();
2793 I != E; ++I) {
2794 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2795 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2796 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2797 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2798 break;
2799 }
2800 }
2801 }
2802 switch (Kind) {
2803 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2804 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2805 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for target region in "
2806 "%0 is incorrect: either the "
2807 "address or the ID is invalid.");
2808 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2809 } break;
2810 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2811 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2812 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for declare target "
2813 "variable %0 is incorrect: the "
2814 "address is invalid.");
2815 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2816 } break;
2817 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2818 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2819 L: DiagnosticsEngine::Error,
2820 FormatString: "Offloading entry for declare target variable is incorrect: the "
2821 "address is invalid.");
2822 CGM.getDiags().Report(DiagID);
2823 } break;
2824 }
2825 };
2826
2827 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2828}
2829
2830void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2831 if (!KmpRoutineEntryPtrTy) {
2832 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2833 ASTContext &C = CGM.getContext();
2834 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2835 FunctionProtoType::ExtProtoInfo EPI;
2836 KmpRoutineEntryPtrQTy = C.getPointerType(
2837 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2838 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2839 }
2840}
2841
2842namespace {
2843struct PrivateHelpersTy {
2844 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2845 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2846 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2847 PrivateElemInit(PrivateElemInit) {}
2848 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2849 const Expr *OriginalRef = nullptr;
2850 const VarDecl *Original = nullptr;
2851 const VarDecl *PrivateCopy = nullptr;
2852 const VarDecl *PrivateElemInit = nullptr;
2853 bool isLocalPrivate() const {
2854 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2855 }
2856};
2857typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2858} // anonymous namespace
2859
2860static bool isAllocatableDecl(const VarDecl *VD) {
2861 const VarDecl *CVD = VD->getCanonicalDecl();
2862 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2863 return false;
2864 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2865 // Use the default allocation.
2866 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2867 !AA->getAllocator());
2868}
2869
2870static RecordDecl *
2871createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2872 if (!Privates.empty()) {
2873 ASTContext &C = CGM.getContext();
2874 // Build struct .kmp_privates_t. {
2875 // /* private vars */
2876 // };
2877 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
2878 RD->startDefinition();
2879 for (const auto &Pair : Privates) {
2880 const VarDecl *VD = Pair.second.Original;
2881 QualType Type = VD->getType().getNonReferenceType();
2882 // If the private variable is a local variable with lvalue ref type,
2883 // allocate the pointer instead of the pointee type.
2884 if (Pair.second.isLocalPrivate()) {
2885 if (VD->getType()->isLValueReferenceType())
2886 Type = C.getPointerType(T: Type);
2887 if (isAllocatableDecl(VD))
2888 Type = C.getPointerType(T: Type);
2889 }
2890 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2891 if (VD->hasAttrs()) {
2892 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2893 E(VD->getAttrs().end());
2894 I != E; ++I)
2895 FD->addAttr(*I);
2896 }
2897 }
2898 RD->completeDefinition();
2899 return RD;
2900 }
2901 return nullptr;
2902}
2903
2904static RecordDecl *
2905createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2906 QualType KmpInt32Ty,
2907 QualType KmpRoutineEntryPointerQTy) {
2908 ASTContext &C = CGM.getContext();
2909 // Build struct kmp_task_t {
2910 // void * shareds;
2911 // kmp_routine_entry_t routine;
2912 // kmp_int32 part_id;
2913 // kmp_cmplrdata_t data1;
2914 // kmp_cmplrdata_t data2;
2915 // For taskloops additional fields:
2916 // kmp_uint64 lb;
2917 // kmp_uint64 ub;
2918 // kmp_int64 st;
2919 // kmp_int32 liter;
2920 // void * reductions;
2921 // };
2922 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
2923 UD->startDefinition();
2924 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2925 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2926 UD->completeDefinition();
2927 QualType KmpCmplrdataTy = C.getRecordType(Decl: UD);
2928 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
2929 RD->startDefinition();
2930 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2931 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2932 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2933 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2934 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2935 if (isOpenMPTaskLoopDirective(Kind)) {
2936 QualType KmpUInt64Ty =
2937 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2938 QualType KmpInt64Ty =
2939 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2940 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2941 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2942 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2943 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2944 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2945 }
2946 RD->completeDefinition();
2947 return RD;
2948}
2949
2950static RecordDecl *
2951createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2952 ArrayRef<PrivateDataTy> Privates) {
2953 ASTContext &C = CGM.getContext();
2954 // Build struct kmp_task_t_with_privates {
2955 // kmp_task_t task_data;
2956 // .kmp_privates_t. privates;
2957 // };
2958 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
2959 RD->startDefinition();
2960 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2961 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2962 addFieldToRecordDecl(C, RD, C.getRecordType(Decl: PrivateRD));
2963 RD->completeDefinition();
2964 return RD;
2965}
2966
2967/// Emit a proxy function which accepts kmp_task_t as the second
2968/// argument.
2969/// \code
2970/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2971/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2972/// For taskloops:
2973/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2974/// tt->reductions, tt->shareds);
2975/// return 0;
2976/// }
2977/// \endcode
2978static llvm::Function *
2979emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2980 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2981 QualType KmpTaskTWithPrivatesPtrQTy,
2982 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2983 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2984 llvm::Value *TaskPrivatesMap) {
2985 ASTContext &C = CGM.getContext();
2986 FunctionArgList Args;
2987 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2988 ImplicitParamKind::Other);
2989 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2990 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2991 ImplicitParamKind::Other);
2992 Args.push_back(&GtidArg);
2993 Args.push_back(&TaskTypeArg);
2994 const auto &TaskEntryFnInfo =
2995 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
2996 llvm::FunctionType *TaskEntryTy =
2997 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
2998 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
2999 auto *TaskEntry = llvm::Function::Create(
3000 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3001 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
3002 TaskEntry->setDoesNotRecurse();
3003 CodeGenFunction CGF(CGM);
3004 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
3005 Loc, StartLoc: Loc);
3006
3007 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3008 // tt,
3009 // For taskloops:
3010 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3011 // tt->task_data.shareds);
3012 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3013 Addr: CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3014 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3015 Ptr: CGF.GetAddrOfLocalVar(&TaskTypeArg),
3016 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3017 const auto *KmpTaskTWithPrivatesQTyRD =
3018 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3019 LValue Base =
3020 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3021 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3022 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3023 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3024 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3025
3026 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3027 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3028 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3029 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3030 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3031
3032 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3033 llvm::Value *PrivatesParam;
3034 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3035 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3036 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3037 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3038 } else {
3039 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3040 }
3041
3042 llvm::Value *CommonArgs[] = {
3043 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3044 CGF.Builder
3045 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(CGF),
3046 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3047 .getPointer()};
3048 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3049 std::end(arr&: CommonArgs));
3050 if (isOpenMPTaskLoopDirective(Kind)) {
3051 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3052 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3053 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3054 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3055 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3056 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3057 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3058 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3059 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3060 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3061 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3062 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3063 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3064 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3065 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3066 CallArgs.push_back(Elt: LBParam);
3067 CallArgs.push_back(Elt: UBParam);
3068 CallArgs.push_back(Elt: StParam);
3069 CallArgs.push_back(Elt: LIParam);
3070 CallArgs.push_back(Elt: RParam);
3071 }
3072 CallArgs.push_back(Elt: SharedsParam);
3073
3074 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3075 Args: CallArgs);
3076 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3077 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3078 CGF.FinishFunction();
3079 return TaskEntry;
3080}
3081
3082static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3083 SourceLocation Loc,
3084 QualType KmpInt32Ty,
3085 QualType KmpTaskTWithPrivatesPtrQTy,
3086 QualType KmpTaskTWithPrivatesQTy) {
3087 ASTContext &C = CGM.getContext();
3088 FunctionArgList Args;
3089 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3090 ImplicitParamKind::Other);
3091 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3092 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3093 ImplicitParamKind::Other);
3094 Args.push_back(&GtidArg);
3095 Args.push_back(&TaskTypeArg);
3096 const auto &DestructorFnInfo =
3097 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3098 llvm::FunctionType *DestructorFnTy =
3099 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3100 std::string Name =
3101 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3102 auto *DestructorFn =
3103 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3104 N: Name, M: &CGM.getModule());
3105 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3106 FI: DestructorFnInfo);
3107 DestructorFn->setDoesNotRecurse();
3108 CodeGenFunction CGF(CGM);
3109 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3110 Args, Loc, StartLoc: Loc);
3111
3112 LValue Base = CGF.EmitLoadOfPointerLValue(
3113 Ptr: CGF.GetAddrOfLocalVar(&TaskTypeArg),
3114 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3115 const auto *KmpTaskTWithPrivatesQTyRD =
3116 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3117 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3118 Base = CGF.EmitLValueForField(Base, Field: *FI);
3119 for (const auto *Field :
3120 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3121 if (QualType::DestructionKind DtorKind =
3122 Field->getType().isDestructedType()) {
3123 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3124 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3125 }
3126 }
3127 CGF.FinishFunction();
3128 return DestructorFn;
3129}
3130
3131/// Emit a privates mapping function for correct handling of private and
3132/// firstprivate variables.
3133/// \code
3134/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3135/// **noalias priv1,..., <tyn> **noalias privn) {
3136/// *priv1 = &.privates.priv1;
3137/// ...;
3138/// *privn = &.privates.privn;
3139/// }
3140/// \endcode
3141static llvm::Value *
3142emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3143 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3144 ArrayRef<PrivateDataTy> Privates) {
3145 ASTContext &C = CGM.getContext();
3146 FunctionArgList Args;
3147 ImplicitParamDecl TaskPrivatesArg(
3148 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3149 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3150 ImplicitParamKind::Other);
3151 Args.push_back(&TaskPrivatesArg);
3152 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3153 unsigned Counter = 1;
3154 for (const Expr *E : Data.PrivateVars) {
3155 Args.push_back(ImplicitParamDecl::Create(
3156 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3157 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3158 .withConst()
3159 .withRestrict(),
3160 ParamKind: ImplicitParamKind::Other));
3161 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3162 PrivateVarsPos[VD] = Counter;
3163 ++Counter;
3164 }
3165 for (const Expr *E : Data.FirstprivateVars) {
3166 Args.push_back(ImplicitParamDecl::Create(
3167 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3168 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3169 .withConst()
3170 .withRestrict(),
3171 ParamKind: ImplicitParamKind::Other));
3172 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3173 PrivateVarsPos[VD] = Counter;
3174 ++Counter;
3175 }
3176 for (const Expr *E : Data.LastprivateVars) {
3177 Args.push_back(ImplicitParamDecl::Create(
3178 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3179 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3180 .withConst()
3181 .withRestrict(),
3182 ParamKind: ImplicitParamKind::Other));
3183 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3184 PrivateVarsPos[VD] = Counter;
3185 ++Counter;
3186 }
3187 for (const VarDecl *VD : Data.PrivateLocals) {
3188 QualType Ty = VD->getType().getNonReferenceType();
3189 if (VD->getType()->isLValueReferenceType())
3190 Ty = C.getPointerType(T: Ty);
3191 if (isAllocatableDecl(VD))
3192 Ty = C.getPointerType(T: Ty);
3193 Args.push_back(ImplicitParamDecl::Create(
3194 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3195 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3196 ParamKind: ImplicitParamKind::Other));
3197 PrivateVarsPos[VD] = Counter;
3198 ++Counter;
3199 }
3200 const auto &TaskPrivatesMapFnInfo =
3201 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3202 llvm::FunctionType *TaskPrivatesMapTy =
3203 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3204 std::string Name =
3205 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3206 auto *TaskPrivatesMap = llvm::Function::Create(
3207 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3208 M: &CGM.getModule());
3209 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3210 FI: TaskPrivatesMapFnInfo);
3211 if (CGM.getLangOpts().Optimize) {
3212 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3213 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3214 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3215 }
3216 CodeGenFunction CGF(CGM);
3217 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3218 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3219
3220 // *privi = &.privates.privi;
3221 LValue Base = CGF.EmitLoadOfPointerLValue(
3222 Ptr: CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3223 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3224 const auto *PrivatesQTyRD = cast<RecordDecl>(Val: PrivatesQTy->getAsTagDecl());
3225 Counter = 0;
3226 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3227 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3228 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3229 LValue RefLVal =
3230 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3231 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3232 Ptr: RefLVal.getAddress(CGF), PtrTy: RefLVal.getType()->castAs<PointerType>());
3233 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3234 ++Counter;
3235 }
3236 CGF.FinishFunction();
3237 return TaskPrivatesMap;
3238}
3239
3240/// Emit initialization for private variables in task-based directives.
3241static void emitPrivatesInit(CodeGenFunction &CGF,
3242 const OMPExecutableDirective &D,
3243 Address KmpTaskSharedsPtr, LValue TDBase,
3244 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3245 QualType SharedsTy, QualType SharedsPtrTy,
3246 const OMPTaskDataTy &Data,
3247 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3248 ASTContext &C = CGF.getContext();
3249 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3250 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3251 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3252 ? OMPD_taskloop
3253 : OMPD_task;
3254 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3255 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3256 LValue SrcBase;
3257 bool IsTargetTask =
3258 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3259 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3260 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3261 // PointersArray, SizesArray, and MappersArray. The original variables for
3262 // these arrays are not captured and we get their addresses explicitly.
3263 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3264 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3265 SrcBase = CGF.MakeAddrLValue(
3266 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3267 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3268 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3269 T: SharedsTy);
3270 }
3271 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3272 for (const PrivateDataTy &Pair : Privates) {
3273 // Do not initialize private locals.
3274 if (Pair.second.isLocalPrivate()) {
3275 ++FI;
3276 continue;
3277 }
3278 const VarDecl *VD = Pair.second.PrivateCopy;
3279 const Expr *Init = VD->getAnyInitializer();
3280 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3281 !CGF.isTrivialInitializer(Init)))) {
3282 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3283 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3284 const VarDecl *OriginalVD = Pair.second.Original;
3285 // Check if the variable is the target-based BasePointersArray,
3286 // PointersArray, SizesArray, or MappersArray.
3287 LValue SharedRefLValue;
3288 QualType Type = PrivateLValue.getType();
3289 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3290 if (IsTargetTask && !SharedField) {
3291 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3292 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3293 cast<CapturedDecl>(OriginalVD->getDeclContext())
3294 ->getNumParams() == 0 &&
3295 isa<TranslationUnitDecl>(
3296 cast<CapturedDecl>(OriginalVD->getDeclContext())
3297 ->getDeclContext()) &&
3298 "Expected artificial target data variable.");
3299 SharedRefLValue =
3300 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3301 } else if (ForDup) {
3302 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3303 SharedRefLValue = CGF.MakeAddrLValue(
3304 Addr: SharedRefLValue.getAddress(CGF).withAlignment(
3305 NewAlignment: C.getDeclAlign(OriginalVD)),
3306 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3307 TBAAInfo: SharedRefLValue.getTBAAInfo());
3308 } else if (CGF.LambdaCaptureFields.count(
3309 Pair.second.Original->getCanonicalDecl()) > 0 ||
3310 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3311 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3312 } else {
3313 // Processing for implicitly captured variables.
3314 InlinedOpenMPRegionRAII Region(
3315 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3316 /*HasCancel=*/false, /*NoInheritance=*/true);
3317 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3318 }
3319 if (Type->isArrayType()) {
3320 // Initialize firstprivate array.
3321 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3322 // Perform simple memcpy.
3323 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3324 } else {
3325 // Initialize firstprivate array using element-by-element
3326 // initialization.
3327 CGF.EmitOMPAggregateAssign(
3328 DestAddr: PrivateLValue.getAddress(CGF), SrcAddr: SharedRefLValue.getAddress(CGF),
3329 OriginalType: Type,
3330 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3331 Address SrcElement) {
3332 // Clean up any temporaries needed by the initialization.
3333 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3334 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3335 (void)InitScope.Privatize();
3336 // Emit initialization for single element.
3337 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3338 CGF, &CapturesInfo);
3339 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3340 Quals: Init->getType().getQualifiers(),
3341 /*IsInitializer=*/false);
3342 });
3343 }
3344 } else {
3345 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3346 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress(CGF));
3347 (void)InitScope.Privatize();
3348 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3349 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3350 /*capturedByInit=*/false);
3351 }
3352 } else {
3353 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3354 }
3355 }
3356 ++FI;
3357 }
3358}
3359
3360/// Check if duplication function is required for taskloops.
3361static bool checkInitIsRequired(CodeGenFunction &CGF,
3362 ArrayRef<PrivateDataTy> Privates) {
3363 bool InitRequired = false;
3364 for (const PrivateDataTy &Pair : Privates) {
3365 if (Pair.second.isLocalPrivate())
3366 continue;
3367 const VarDecl *VD = Pair.second.PrivateCopy;
3368 const Expr *Init = VD->getAnyInitializer();
3369 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3370 !CGF.isTrivialInitializer(Init));
3371 if (InitRequired)
3372 break;
3373 }
3374 return InitRequired;
3375}
3376
3377
3378/// Emit task_dup function (for initialization of
3379/// private/firstprivate/lastprivate vars and last_iter flag)
3380/// \code
3381/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3382/// lastpriv) {
3383/// // setup lastprivate flag
3384/// task_dst->last = lastpriv;
3385/// // could be constructor calls here...
3386/// }
3387/// \endcode
3388static llvm::Value *
3389emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3390 const OMPExecutableDirective &D,
3391 QualType KmpTaskTWithPrivatesPtrQTy,
3392 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3393 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3394 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3395 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3396 ASTContext &C = CGM.getContext();
3397 FunctionArgList Args;
3398 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3399 KmpTaskTWithPrivatesPtrQTy,
3400 ImplicitParamKind::Other);
3401 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3402 KmpTaskTWithPrivatesPtrQTy,
3403 ImplicitParamKind::Other);
3404 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3405 ImplicitParamKind::Other);
3406 Args.push_back(&DstArg);
3407 Args.push_back(&SrcArg);
3408 Args.push_back(&LastprivArg);
3409 const auto &TaskDupFnInfo =
3410 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3411 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3412 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3413 auto *TaskDup = llvm::Function::Create(
3414 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3415 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3416 TaskDup->setDoesNotRecurse();
3417 CodeGenFunction CGF(CGM);
3418 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3419 StartLoc: Loc);
3420
3421 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3422 Ptr: CGF.GetAddrOfLocalVar(&DstArg),
3423 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3424 // task_dst->liter = lastpriv;
3425 if (WithLastIter) {
3426 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3427 LValue Base = CGF.EmitLValueForField(
3428 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3429 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3430 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3431 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3432 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3433 }
3434
3435 // Emit initial values for private copies (if any).
3436 assert(!Privates.empty());
3437 Address KmpTaskSharedsPtr = Address::invalid();
3438 if (!Data.FirstprivateVars.empty()) {
3439 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3440 Ptr: CGF.GetAddrOfLocalVar(&SrcArg),
3441 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3442 LValue Base = CGF.EmitLValueForField(
3443 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3444 KmpTaskSharedsPtr = Address(
3445 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3446 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3447 n: KmpTaskTShareds)),
3448 Loc),
3449 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3450 }
3451 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3452 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3453 CGF.FinishFunction();
3454 return TaskDup;
3455}
3456
3457/// Checks if destructor function is required to be generated.
3458/// \return true if cleanups are required, false otherwise.
3459static bool
3460checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3461 ArrayRef<PrivateDataTy> Privates) {
3462 for (const PrivateDataTy &P : Privates) {
3463 if (P.second.isLocalPrivate())
3464 continue;
3465 QualType Ty = P.second.Original->getType().getNonReferenceType();
3466 if (Ty.isDestructedType())
3467 return true;
3468 }
3469 return false;
3470}
3471
3472namespace {
3473/// Loop generator for OpenMP iterator expression.
3474class OMPIteratorGeneratorScope final
3475 : public CodeGenFunction::OMPPrivateScope {
3476 CodeGenFunction &CGF;
3477 const OMPIteratorExpr *E = nullptr;
3478 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3479 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3480 OMPIteratorGeneratorScope() = delete;
3481 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3482
3483public:
3484 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3485 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3486 if (!E)
3487 return;
3488 SmallVector<llvm::Value *, 4> Uppers;
3489 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3490 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3491 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3492 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(VD->getType(), VD->getName()));
3493 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3494 addPrivate(
3495 LocalVD: HelperData.CounterVD,
3496 Addr: CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3497 }
3498 Privatize();
3499
3500 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3501 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3502 LValue CLVal =
3503 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3504 HelperData.CounterVD->getType());
3505 // Counter = 0;
3506 CGF.EmitStoreOfScalar(
3507 value: llvm::ConstantInt::get(Ty: CLVal.getAddress(CGF).getElementType(), V: 0),
3508 lvalue: CLVal);
3509 CodeGenFunction::JumpDest &ContDest =
3510 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3511 CodeGenFunction::JumpDest &ExitDest =
3512 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3513 // N = <number-of_iterations>;
3514 llvm::Value *N = Uppers[I];
3515 // cont:
3516 // if (Counter < N) goto body; else goto exit;
3517 CGF.EmitBlock(BB: ContDest.getBlock());
3518 auto *CVal =
3519 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3520 llvm::Value *Cmp =
3521 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3522 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3523 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3524 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3525 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3526 // body:
3527 CGF.EmitBlock(BB: BodyBB);
3528 // Iteri = Begini + Counter * Stepi;
3529 CGF.EmitIgnoredExpr(E: HelperData.Update);
3530 }
3531 }
3532 ~OMPIteratorGeneratorScope() {
3533 if (!E)
3534 return;
3535 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3536 // Counter = Counter + 1;
3537 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3538 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3539 // goto cont;
3540 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3541 // exit:
3542 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3543 }
3544 }
3545};
3546} // namespace
3547
3548static std::pair<llvm::Value *, llvm::Value *>
3549getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3550 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3551 llvm::Value *Addr;
3552 if (OASE) {
3553 const Expr *Base = OASE->getBase();
3554 Addr = CGF.EmitScalarExpr(E: Base);
3555 } else {
3556 Addr = CGF.EmitLValue(E).getPointer(CGF);
3557 }
3558 llvm::Value *SizeVal;
3559 QualType Ty = E->getType();
3560 if (OASE) {
3561 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3562 for (const Expr *SE : OASE->getDimensions()) {
3563 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3564 Sz = CGF.EmitScalarConversion(
3565 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3566 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3567 }
3568 } else if (const auto *ASE =
3569 dyn_cast<OMPArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3570 LValue UpAddrLVal =
3571 CGF.EmitOMPArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3572 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3573 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3574 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.getPointer(), /*Idx0=*/1);
3575 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.SizeTy);
3576 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(V: UpAddr, DestTy: CGF.SizeTy);
3577 SizeVal = CGF.Builder.CreateNUWSub(LHS: UpIntPtr, RHS: LowIntPtr);
3578 } else {
3579 SizeVal = CGF.getTypeSize(Ty);
3580 }
3581 return std::make_pair(x&: Addr, y&: SizeVal);
3582}
3583
3584/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3585static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3586 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3587 if (KmpTaskAffinityInfoTy.isNull()) {
3588 RecordDecl *KmpAffinityInfoRD =
3589 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3590 KmpAffinityInfoRD->startDefinition();
3591 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3592 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3593 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3594 KmpAffinityInfoRD->completeDefinition();
3595 KmpTaskAffinityInfoTy = C.getRecordType(Decl: KmpAffinityInfoRD);
3596 }
3597}
3598
3599CGOpenMPRuntime::TaskResultTy
3600CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3601 const OMPExecutableDirective &D,
3602 llvm::Function *TaskFunction, QualType SharedsTy,
3603 Address Shareds, const OMPTaskDataTy &Data) {
3604 ASTContext &C = CGM.getContext();
3605 llvm::SmallVector<PrivateDataTy, 4> Privates;
3606 // Aggregate privates and sort them by the alignment.
3607 const auto *I = Data.PrivateCopies.begin();
3608 for (const Expr *E : Data.PrivateVars) {
3609 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3610 Privates.emplace_back(
3611 Args: C.getDeclAlign(VD),
3612 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3613 /*PrivateElemInit=*/nullptr));
3614 ++I;
3615 }
3616 I = Data.FirstprivateCopies.begin();
3617 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3618 for (const Expr *E : Data.FirstprivateVars) {
3619 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3620 Privates.emplace_back(
3621 Args: C.getDeclAlign(VD),
3622 Args: PrivateHelpersTy(
3623 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3624 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3625 ++I;
3626 ++IElemInitRef;
3627 }
3628 I = Data.LastprivateCopies.begin();
3629 for (const Expr *E : Data.LastprivateVars) {
3630 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3631 Privates.emplace_back(
3632 Args: C.getDeclAlign(VD),
3633 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3634 /*PrivateElemInit=*/nullptr));
3635 ++I;
3636 }
3637 for (const VarDecl *VD : Data.PrivateLocals) {
3638 if (isAllocatableDecl(VD))
3639 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3640 else
3641 Privates.emplace_back(Args: C.getDeclAlign(VD), Args: PrivateHelpersTy(VD));
3642 }
3643 llvm::stable_sort(Range&: Privates,
3644 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3645 return L.first > R.first;
3646 });
3647 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3648 // Build type kmp_routine_entry_t (if not built yet).
3649 emitKmpRoutineEntryT(KmpInt32Ty);
3650 // Build type kmp_task_t (if not built yet).
3651 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3652 if (SavedKmpTaskloopTQTy.isNull()) {
3653 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3654 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3655 }
3656 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3657 } else {
3658 assert((D.getDirectiveKind() == OMPD_task ||
3659 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3660 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3661 "Expected taskloop, task or target directive");
3662 if (SavedKmpTaskTQTy.isNull()) {
3663 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3664 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3665 }
3666 KmpTaskTQTy = SavedKmpTaskTQTy;
3667 }
3668 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3669 // Build particular struct kmp_task_t for the given task.
3670 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3671 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3672 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(Decl: KmpTaskTWithPrivatesQTyRD);
3673 QualType KmpTaskTWithPrivatesPtrQTy =
3674 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3675 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(T: KmpTaskTWithPrivatesQTy);
3676 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3677 KmpTaskTWithPrivatesTy->getPointerTo();
3678 llvm::Value *KmpTaskTWithPrivatesTySize =
3679 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3680 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3681
3682 // Emit initial values for private copies (if any).
3683 llvm::Value *TaskPrivatesMap = nullptr;
3684 llvm::Type *TaskPrivatesMapTy =
3685 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3686 if (!Privates.empty()) {
3687 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3688 TaskPrivatesMap =
3689 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3690 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3691 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3692 } else {
3693 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3694 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3695 }
3696 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3697 // kmp_task_t *tt);
3698 llvm::Function *TaskEntry = emitProxyTaskFunction(
3699 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3700 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3701 TaskPrivatesMap);
3702
3703 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3704 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3705 // kmp_routine_entry_t *task_entry);
3706 // Task flags. Format is taken from
3707 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3708 // description of kmp_tasking_flags struct.
3709 enum {
3710 TiedFlag = 0x1,
3711 FinalFlag = 0x2,
3712 DestructorsFlag = 0x8,
3713 PriorityFlag = 0x20,
3714 DetachableFlag = 0x40,
3715 };
3716 unsigned Flags = Data.Tied ? TiedFlag : 0;
3717 bool NeedsCleanup = false;
3718 if (!Privates.empty()) {
3719 NeedsCleanup =
3720 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3721 if (NeedsCleanup)
3722 Flags = Flags | DestructorsFlag;
3723 }
3724 if (Data.Priority.getInt())
3725 Flags = Flags | PriorityFlag;
3726 if (D.hasClausesOfKind<OMPDetachClause>())
3727 Flags = Flags | DetachableFlag;
3728 llvm::Value *TaskFlags =
3729 Data.Final.getPointer()
3730 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3731 True: CGF.Builder.getInt32(C: FinalFlag),
3732 False: CGF.Builder.getInt32(/*C=*/0))
3733 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3734 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3735 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3736 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3737 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3738 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3739 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3740 llvm::Value *NewTask;
3741 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3742 // Check if we have any device clause associated with the directive.
3743 const Expr *Device = nullptr;
3744 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3745 Device = C->getDevice();
3746 // Emit device ID if any otherwise use default value.
3747 llvm::Value *DeviceID;
3748 if (Device)
3749 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3750 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3751 else
3752 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3753 AllocArgs.push_back(Elt: DeviceID);
3754 NewTask = CGF.EmitRuntimeCall(
3755 callee: OMPBuilder.getOrCreateRuntimeFunction(
3756 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3757 args: AllocArgs);
3758 } else {
3759 NewTask =
3760 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3761 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3762 args: AllocArgs);
3763 }
3764 // Emit detach clause initialization.
3765 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3766 // task_descriptor);
3767 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3768 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3769 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3770
3771 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3772 // int gtid, kmp_task_t *task);
3773 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3774 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3775 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3776 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3777 callee: OMPBuilder.getOrCreateRuntimeFunction(
3778 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3779 args: {Loc, Tid, NewTask});
3780 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3781 Loc: Evt->getExprLoc());
3782 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3783 }
3784 // Process affinity clauses.
3785 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3786 // Process list of affinity data.
3787 ASTContext &C = CGM.getContext();
3788 Address AffinitiesArray = Address::invalid();
3789 // Calculate number of elements to form the array of affinity data.
3790 llvm::Value *NumOfElements = nullptr;
3791 unsigned NumAffinities = 0;
3792 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3793 if (const Expr *Modifier = C->getModifier()) {
3794 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3795 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3796 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3797 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3798 NumOfElements =
3799 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3800 }
3801 } else {
3802 NumAffinities += C->varlist_size();
3803 }
3804 }
3805 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3806 // Fields ids in kmp_task_affinity_info record.
3807 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3808
3809 QualType KmpTaskAffinityInfoArrayTy;
3810 if (NumOfElements) {
3811 NumOfElements = CGF.Builder.CreateNUWAdd(
3812 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3813 auto *OVE = new (C) OpaqueValueExpr(
3814 Loc,
3815 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3816 VK_PRValue);
3817 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3818 RValue::get(V: NumOfElements));
3819 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3820 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3821 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3822 // Properly emit variable-sized array.
3823 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3824 ParamKind: ImplicitParamKind::Other);
3825 CGF.EmitVarDecl(*PD);
3826 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3827 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3828 /*isSigned=*/false);
3829 } else {
3830 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3831 KmpTaskAffinityInfoTy,
3832 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3833 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3834 AffinitiesArray =
3835 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
3836 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
3837 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
3838 /*isSigned=*/IsSigned: false);
3839 }
3840
3841 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3842 // Fill array by elements without iterators.
3843 unsigned Pos = 0;
3844 bool HasIterator = false;
3845 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3846 if (C->getModifier()) {
3847 HasIterator = true;
3848 continue;
3849 }
3850 for (const Expr *E : C->varlists()) {
3851 llvm::Value *Addr;
3852 llvm::Value *Size;
3853 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3854 LValue Base =
3855 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3856 KmpTaskAffinityInfoTy);
3857 // affs[i].base_addr = &<Affinities[i].second>;
3858 LValue BaseAddrLVal = CGF.EmitLValueForField(
3859 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3860 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3861 BaseAddrLVal);
3862 // affs[i].len = sizeof(<Affinities[i].second>);
3863 LValue LenLVal = CGF.EmitLValueForField(
3864 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3865 CGF.EmitStoreOfScalar(Size, LenLVal);
3866 ++Pos;
3867 }
3868 }
3869 LValue PosLVal;
3870 if (HasIterator) {
3871 PosLVal = CGF.MakeAddrLValue(
3872 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
3873 T: C.getSizeType());
3874 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
3875 }
3876 // Process elements with iterators.
3877 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3878 const Expr *Modifier = C->getModifier();
3879 if (!Modifier)
3880 continue;
3881 OMPIteratorGeneratorScope IteratorScope(
3882 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
3883 for (const Expr *E : C->varlists()) {
3884 llvm::Value *Addr;
3885 llvm::Value *Size;
3886 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3887 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3888 LValue Base = CGF.MakeAddrLValue(
3889 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
3890 // affs[i].base_addr = &<Affinities[i].second>;
3891 LValue BaseAddrLVal = CGF.EmitLValueForField(
3892 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3893 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3894 BaseAddrLVal);
3895 // affs[i].len = sizeof(<Affinities[i].second>);
3896 LValue LenLVal = CGF.EmitLValueForField(
3897 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3898 CGF.EmitStoreOfScalar(Size, LenLVal);
3899 Idx = CGF.Builder.CreateNUWAdd(
3900 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3901 CGF.EmitStoreOfScalar(Idx, PosLVal);
3902 }
3903 }
3904 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3905 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3906 // naffins, kmp_task_affinity_info_t *affin_list);
3907 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3908 llvm::Value *GTid = getThreadID(CGF, Loc);
3909 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3910 V: AffinitiesArray.getPointer(), DestTy: CGM.VoidPtrTy);
3911 // FIXME: Emit the function and ignore its result for now unless the
3912 // runtime function is properly implemented.
3913 (void)CGF.EmitRuntimeCall(
3914 callee: OMPBuilder.getOrCreateRuntimeFunction(
3915 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
3916 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3917 }
3918 llvm::Value *NewTaskNewTaskTTy =
3919 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3920 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
3921 LValue Base = CGF.MakeNaturalAlignAddrLValue(V: NewTaskNewTaskTTy,
3922 T: KmpTaskTWithPrivatesQTy);
3923 LValue TDBase =
3924 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3925 // Fill the data in the resulting kmp_task_t record.
3926 // Copy shareds if there are any.
3927 Address KmpTaskSharedsPtr = Address::invalid();
3928 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3929 KmpTaskSharedsPtr = Address(
3930 CGF.EmitLoadOfScalar(
3931 CGF.EmitLValueForField(
3932 Base: TDBase,
3933 Field: *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3934 Loc),
3935 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3936 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
3937 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
3938 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
3939 }
3940 // Emit initial values for private copies (if any).
3941 TaskResultTy Result;
3942 if (!Privates.empty()) {
3943 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
3944 SharedsTy, SharedsPtrTy, Data, Privates,
3945 /*ForDup=*/false);
3946 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3947 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3948 Result.TaskDupFn = emitTaskDupFunction(
3949 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3950 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3951 /*WithLastIter=*/!Data.LastprivateVars.empty());
3952 }
3953 }
3954 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3955 enum { Priority = 0, Destructors = 1 };
3956 // Provide pointer to function with destructors for privates.
3957 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3958 const RecordDecl *KmpCmplrdataUD =
3959 (*FI)->getType()->getAsUnionType()->getDecl();
3960 if (NeedsCleanup) {
3961 llvm::Value *DestructorFn = emitDestructorsFunction(
3962 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3963 KmpTaskTWithPrivatesQTy);
3964 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3965 LValue DestructorsLV = CGF.EmitLValueForField(
3966 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
3967 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3968 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
3969 lvalue: DestructorsLV);
3970 }
3971 // Set priority.
3972 if (Data.Priority.getInt()) {
3973 LValue Data2LV = CGF.EmitLValueForField(
3974 Base: TDBase, Field: *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3975 LValue PriorityLV = CGF.EmitLValueForField(
3976 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
3977 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
3978 }
3979 Result.NewTask = NewTask;
3980 Result.TaskEntry = TaskEntry;
3981 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3982 Result.TDBase = TDBase;
3983 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3984 return Result;
3985}
3986
3987/// Translates internal dependency kind into the runtime kind.
3988static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3989 RTLDependenceKindTy DepKind;
3990 switch (K) {
3991 case OMPC_DEPEND_in:
3992 DepKind = RTLDependenceKindTy::DepIn;
3993 break;
3994 // Out and InOut dependencies must use the same code.
3995 case OMPC_DEPEND_out:
3996 case OMPC_DEPEND_inout:
3997 DepKind = RTLDependenceKindTy::DepInOut;
3998 break;
3999 case OMPC_DEPEND_mutexinoutset:
4000 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4001 break;
4002 case OMPC_DEPEND_inoutset:
4003 DepKind = RTLDependenceKindTy::DepInOutSet;
4004 break;
4005 case OMPC_DEPEND_outallmemory:
4006 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4007 break;
4008 case OMPC_DEPEND_source:
4009 case OMPC_DEPEND_sink:
4010 case OMPC_DEPEND_depobj:
4011 case OMPC_DEPEND_inoutallmemory:
4012 case OMPC_DEPEND_unknown:
4013 llvm_unreachable("Unknown task dependence type");
4014 }
4015 return DepKind;
4016}
4017
4018/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4019static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4020 QualType &FlagsTy) {
4021 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(C.BoolTy), /*Signed=*/false);
4022 if (KmpDependInfoTy.isNull()) {
4023 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4024 KmpDependInfoRD->startDefinition();
4025 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4026 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4027 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4028 KmpDependInfoRD->completeDefinition();
4029 KmpDependInfoTy = C.getRecordType(Decl: KmpDependInfoRD);
4030 }
4031}
4032
4033std::pair<llvm::Value *, LValue>
4034CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4035 SourceLocation Loc) {
4036 ASTContext &C = CGM.getContext();
4037 QualType FlagsTy;
4038 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4039 RecordDecl *KmpDependInfoRD =
4040 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4041 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4042 LValue Base = CGF.EmitLoadOfPointerLValue(
4043 Ptr: DepobjLVal.getAddress(CGF).withElementType(
4044 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4045 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4046 Address DepObjAddr = CGF.Builder.CreateGEP(
4047 Addr: Base.getAddress(CGF),
4048 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4049 LValue NumDepsBase = CGF.MakeAddrLValue(
4050 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4051 // NumDeps = deps[i].base_addr;
4052 LValue BaseAddrLVal = CGF.EmitLValueForField(
4053 Base: NumDepsBase,
4054 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4055 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4056 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4057 return std::make_pair(x&: NumDeps, y&: Base);
4058}
4059
4060static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4061 llvm::PointerUnion<unsigned *, LValue *> Pos,
4062 const OMPTaskDataTy::DependData &Data,
4063 Address DependenciesArray) {
4064 CodeGenModule &CGM = CGF.CGM;
4065 ASTContext &C = CGM.getContext();
4066 QualType FlagsTy;
4067 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4068 RecordDecl *KmpDependInfoRD =
4069 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4070 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4071
4072 OMPIteratorGeneratorScope IteratorScope(
4073 CGF, cast_or_null<OMPIteratorExpr>(
4074 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4075 : nullptr));
4076 for (const Expr *E : Data.DepExprs) {
4077 llvm::Value *Addr;
4078 llvm::Value *Size;
4079
4080 // The expression will be a nullptr in the 'omp_all_memory' case.
4081 if (E) {
4082 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4083 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4084 } else {
4085 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4086 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4087 }
4088 LValue Base;
4089 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4090 Base = CGF.MakeAddrLValue(
4091 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4092 } else {
4093 assert(E && "Expected a non-null expression");
4094 LValue &PosLVal = *Pos.get<LValue *>();
4095 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4096 Base = CGF.MakeAddrLValue(
4097 Addr: CGF.Builder.CreateGEP(Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4098 }
4099 // deps[i].base_addr = &<Dependencies[i].second>;
4100 LValue BaseAddrLVal = CGF.EmitLValueForField(
4101 Base,
4102 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4103 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4104 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4105 // deps[i].len = sizeof(<Dependencies[i].second>);
4106 LValue LenLVal = CGF.EmitLValueForField(
4107 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4108 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4109 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4110 // deps[i].flags = <Dependencies[i].first>;
4111 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4112 LValue FlagsLVal = CGF.EmitLValueForField(
4113 Base,
4114 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4115 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4116 CGF.EmitStoreOfScalar(
4117 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4118 lvalue: FlagsLVal);
4119 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4120 ++(*P);
4121 } else {
4122 LValue &PosLVal = *Pos.get<LValue *>();
4123 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4124 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4125 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4126 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4127 }
4128 }
4129}
4130
4131SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4132 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4133 const OMPTaskDataTy::DependData &Data) {
4134 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4135 "Expected depobj dependency kind.");
4136 SmallVector<llvm::Value *, 4> Sizes;
4137 SmallVector<LValue, 4> SizeLVals;
4138 ASTContext &C = CGF.getContext();
4139 {
4140 OMPIteratorGeneratorScope IteratorScope(
4141 CGF, cast_or_null<OMPIteratorExpr>(
4142 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4143 : nullptr));
4144 for (const Expr *E : Data.DepExprs) {
4145 llvm::Value *NumDeps;
4146 LValue Base;
4147 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4148 std::tie(NumDeps, Base) =
4149 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4150 LValue NumLVal = CGF.MakeAddrLValue(
4151 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4152 T: C.getUIntPtrType());
4153 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4154 Addr: NumLVal.getAddress(CGF));
4155 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4156 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4157 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4158 SizeLVals.push_back(Elt: NumLVal);
4159 }
4160 }
4161 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4162 llvm::Value *Size =
4163 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4164 Sizes.push_back(Elt: Size);
4165 }
4166 return Sizes;
4167}
4168
4169void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4170 QualType &KmpDependInfoTy,
4171 LValue PosLVal,
4172 const OMPTaskDataTy::DependData &Data,
4173 Address DependenciesArray) {
4174 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4175 "Expected depobj dependency kind.");
4176 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4177 {
4178 OMPIteratorGeneratorScope IteratorScope(
4179 CGF, cast_or_null<OMPIteratorExpr>(
4180 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4181 : nullptr));
4182 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4183 const Expr *E = Data.DepExprs[I];
4184 llvm::Value *NumDeps;
4185 LValue Base;
4186 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4187 std::tie(NumDeps, Base) =
4188 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4189
4190 // memcopy dependency data.
4191 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4192 LHS: ElSize,
4193 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4194 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4195 Address DepAddr = CGF.Builder.CreateGEP(Addr: DependenciesArray, Index: Pos);
4196 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(CGF), Size);
4197
4198 // Increase pos.
4199 // pos += size;
4200 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4201 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4202 }
4203 }
4204}
4205
4206std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4207 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4208 SourceLocation Loc) {
4209 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4210 return D.DepExprs.empty();
4211 }))
4212 return std::make_pair(x: nullptr, y: Address::invalid());
4213 // Process list of dependencies.
4214 ASTContext &C = CGM.getContext();
4215 Address DependenciesArray = Address::invalid();
4216 llvm::Value *NumOfElements = nullptr;
4217 unsigned NumDependencies = std::accumulate(
4218 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4219 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4220 return D.DepKind == OMPC_DEPEND_depobj
4221 ? V
4222 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4223 });
4224 QualType FlagsTy;
4225 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4226 bool HasDepobjDeps = false;
4227 bool HasRegularWithIterators = false;
4228 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4229 llvm::Value *NumOfRegularWithIterators =
4230 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4231 // Calculate number of depobj dependencies and regular deps with the
4232 // iterators.
4233 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4234 if (D.DepKind == OMPC_DEPEND_depobj) {
4235 SmallVector<llvm::Value *, 4> Sizes =
4236 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4237 for (llvm::Value *Size : Sizes) {
4238 NumOfDepobjElements =
4239 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4240 }
4241 HasDepobjDeps = true;
4242 continue;
4243 }
4244 // Include number of iterations, if any.
4245
4246 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4247 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4248 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4249 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4250 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4251 LHS: Sz, RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4252 NumOfRegularWithIterators =
4253 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4254 }
4255 HasRegularWithIterators = true;
4256 continue;
4257 }
4258 }
4259
4260 QualType KmpDependInfoArrayTy;
4261 if (HasDepobjDeps || HasRegularWithIterators) {
4262 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4263 /*isSigned=*/IsSigned: false);
4264 if (HasDepobjDeps) {
4265 NumOfElements =
4266 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4267 }
4268 if (HasRegularWithIterators) {
4269 NumOfElements =
4270 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4271 }
4272 auto *OVE = new (C) OpaqueValueExpr(
4273 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4274 VK_PRValue);
4275 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4276 RValue::get(V: NumOfElements));
4277 KmpDependInfoArrayTy =
4278 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4279 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4280 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4281 // Properly emit variable-sized array.
4282 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4283 ParamKind: ImplicitParamKind::Other);
4284 CGF.EmitVarDecl(*PD);
4285 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4286 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4287 /*isSigned=*/false);
4288 } else {
4289 KmpDependInfoArrayTy = C.getConstantArrayType(
4290 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4291 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4292 DependenciesArray =
4293 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4294 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4295 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4296 /*isSigned=*/IsSigned: false);
4297 }
4298 unsigned Pos = 0;
4299 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4300 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4301 Dependencies[I].IteratorExpr)
4302 continue;
4303 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4304 DependenciesArray);
4305 }
4306 // Copy regular dependencies with iterators.
4307 LValue PosLVal = CGF.MakeAddrLValue(
4308 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4309 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4310 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4311 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4312 !Dependencies[I].IteratorExpr)
4313 continue;
4314 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4315 DependenciesArray);
4316 }
4317 // Copy final depobj arrays without iterators.
4318 if (HasDepobjDeps) {
4319 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4320 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4321 continue;
4322 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4323 DependenciesArray);
4324 }
4325 }
4326 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4327 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4328 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4329}
4330
4331Address CGOpenMPRuntime::emitDepobjDependClause(
4332 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4333 SourceLocation Loc) {
4334 if (Dependencies.DepExprs.empty())
4335 return Address::invalid();
4336 // Process list of dependencies.
4337 ASTContext &C = CGM.getContext();
4338 Address DependenciesArray = Address::invalid();
4339 unsigned NumDependencies = Dependencies.DepExprs.size();
4340 QualType FlagsTy;
4341 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4342 RecordDecl *KmpDependInfoRD =
4343 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4344
4345 llvm::Value *Size;
4346 // Define type kmp_depend_info[<Dependencies.size()>];
4347 // For depobj reserve one extra element to store the number of elements.
4348 // It is required to handle depobj(x) update(in) construct.
4349 // kmp_depend_info[<Dependencies.size()>] deps;
4350 llvm::Value *NumDepsVal;
4351 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4352 if (const auto *IE =
4353 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4354 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4355 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4356 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4357 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4358 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4359 }
4360 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4361 RHS: NumDepsVal);
4362 CharUnits SizeInBytes =
4363 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4364 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4365 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4366 NumDepsVal =
4367 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4368 } else {
4369 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4370 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4371 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4372 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4373 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4374 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4375 }
4376 // Need to allocate on the dynamic memory.
4377 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4378 // Use default allocator.
4379 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4380 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4381
4382 llvm::Value *Addr =
4383 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4384 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4385 args: Args, name: ".dep.arr.addr");
4386 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4387 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4388 V: Addr, DestTy: KmpDependInfoLlvmTy->getPointerTo());
4389 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4390 // Write number of elements in the first element of array for depobj.
4391 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4392 // deps[i].base_addr = NumDependencies;
4393 LValue BaseAddrLVal = CGF.EmitLValueForField(
4394 Base,
4395 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4396 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4397 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4398 llvm::PointerUnion<unsigned *, LValue *> Pos;
4399 unsigned Idx = 1;
4400 LValue PosLVal;
4401 if (Dependencies.IteratorExpr) {
4402 PosLVal = CGF.MakeAddrLValue(
4403 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4404 T: C.getSizeType());
4405 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4406 /*IsInit=*/isInit: true);
4407 Pos = &PosLVal;
4408 } else {
4409 Pos = &Idx;
4410 }
4411 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4412 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4413 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4414 ElementTy: CGF.Int8Ty);
4415 return DependenciesArray;
4416}
4417
4418void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4419 SourceLocation Loc) {
4420 ASTContext &C = CGM.getContext();
4421 QualType FlagsTy;
4422 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4423 LValue Base = CGF.EmitLoadOfPointerLValue(
4424 Ptr: DepobjLVal.getAddress(CGF), PtrTy: C.VoidPtrTy.castAs<PointerType>());
4425 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4426 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4428 CGF.ConvertTypeForMem(KmpDependInfoTy));
4429 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4430 Ty: Addr.getElementType(), Ptr: Addr.getPointer(),
4431 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4432 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4433 DestTy: CGF.VoidPtrTy);
4434 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4435 // Use default allocator.
4436 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4437 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4438
4439 // _kmpc_free(gtid, addr, nullptr);
4440 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4441 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4442 args: Args);
4443}
4444
4445void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4446 OpenMPDependClauseKind NewDepKind,
4447 SourceLocation Loc) {
4448 ASTContext &C = CGM.getContext();
4449 QualType FlagsTy;
4450 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4451 RecordDecl *KmpDependInfoRD =
4452 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4453 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4454 llvm::Value *NumDeps;
4455 LValue Base;
4456 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4457
4458 Address Begin = Base.getAddress(CGF);
4459 // Cast from pointer to array type to pointer to single element.
4460 llvm::Value *End = CGF.Builder.CreateGEP(
4461 Ty: Begin.getElementType(), Ptr: Begin.getPointer(), IdxList: NumDeps);
4462 // The basic structure here is a while-do loop.
4463 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4464 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4465 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4466 CGF.EmitBlock(BB: BodyBB);
4467 llvm::PHINode *ElementPHI =
4468 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4469 ElementPHI->addIncoming(V: Begin.getPointer(), BB: EntryBB);
4470 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4471 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4472 Base.getTBAAInfo());
4473 // deps[i].flags = NewDepKind;
4474 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4475 LValue FlagsLVal = CGF.EmitLValueForField(
4476 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4477 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4478 CGF.EmitStoreOfScalar(
4479 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4480 lvalue: FlagsLVal);
4481
4482 // Shift the address forward by one element.
4483 Address ElementNext =
4484 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext");
4485 ElementPHI->addIncoming(V: ElementNext.getPointer(),
4486 BB: CGF.Builder.GetInsertBlock());
4487 llvm::Value *IsEmpty =
4488 CGF.Builder.CreateICmpEQ(LHS: ElementNext.getPointer(), RHS: End, Name: "omp.isempty");
4489 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4490 // Done.
4491 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4492}
4493
4494void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4495 const OMPExecutableDirective &D,
4496 llvm::Function *TaskFunction,
4497 QualType SharedsTy, Address Shareds,
4498 const Expr *IfCond,
4499 const OMPTaskDataTy &Data) {
4500 if (!CGF.HaveInsertPoint())
4501 return;
4502
4503 TaskResultTy Result =
4504 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4505 llvm::Value *NewTask = Result.NewTask;
4506 llvm::Function *TaskEntry = Result.TaskEntry;
4507 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4508 LValue TDBase = Result.TDBase;
4509 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4510 // Process list of dependences.
4511 Address DependenciesArray = Address::invalid();
4512 llvm::Value *NumOfElements;
4513 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4514 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4515
4516 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4517 // libcall.
4518 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4519 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4520 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4521 // list is not empty
4522 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4523 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4524 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4525 llvm::Value *DepTaskArgs[7];
4526 if (!Data.Dependences.empty()) {
4527 DepTaskArgs[0] = UpLoc;
4528 DepTaskArgs[1] = ThreadID;
4529 DepTaskArgs[2] = NewTask;
4530 DepTaskArgs[3] = NumOfElements;
4531 DepTaskArgs[4] = DependenciesArray.getPointer();
4532 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4533 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4534 }
4535 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4536 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4537 if (!Data.Tied) {
4538 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4539 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4540 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4541 }
4542 if (!Data.Dependences.empty()) {
4543 CGF.EmitRuntimeCall(
4544 callee: OMPBuilder.getOrCreateRuntimeFunction(
4545 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4546 args: DepTaskArgs);
4547 } else {
4548 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4549 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4550 args: TaskArgs);
4551 }
4552 // Check if parent region is untied and build return for untied task;
4553 if (auto *Region =
4554 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4555 Region->emitUntiedSwitch(CGF);
4556 };
4557
4558 llvm::Value *DepWaitTaskArgs[7];
4559 if (!Data.Dependences.empty()) {
4560 DepWaitTaskArgs[0] = UpLoc;
4561 DepWaitTaskArgs[1] = ThreadID;
4562 DepWaitTaskArgs[2] = NumOfElements;
4563 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4564 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4565 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4566 DepWaitTaskArgs[6] =
4567 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4568 }
4569 auto &M = CGM.getModule();
4570 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4571 TaskEntry, &Data, &DepWaitTaskArgs,
4572 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4573 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4574 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4575 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4576 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4577 // is specified.
4578 if (!Data.Dependences.empty())
4579 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4580 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4581 args: DepWaitTaskArgs);
4582 // Call proxy_task_entry(gtid, new_task);
4583 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4584 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4585 Action.Enter(CGF);
4586 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4587 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4588 Args: OutlinedFnArgs);
4589 };
4590
4591 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4592 // kmp_task_t *new_task);
4593 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4594 // kmp_task_t *new_task);
4595 RegionCodeGenTy RCG(CodeGen);
4596 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4597 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4598 TaskArgs,
4599 OMPBuilder.getOrCreateRuntimeFunction(
4600 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4601 TaskArgs);
4602 RCG.setAction(Action);
4603 RCG(CGF);
4604 };
4605
4606 if (IfCond) {
4607 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4608 } else {
4609 RegionCodeGenTy ThenRCG(ThenCodeGen);
4610 ThenRCG(CGF);
4611 }
4612}
4613
4614void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4615 const OMPLoopDirective &D,
4616 llvm::Function *TaskFunction,
4617 QualType SharedsTy, Address Shareds,
4618 const Expr *IfCond,
4619 const OMPTaskDataTy &Data) {
4620 if (!CGF.HaveInsertPoint())
4621 return;
4622 TaskResultTy Result =
4623 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4624 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4625 // libcall.
4626 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4627 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4628 // sched, kmp_uint64 grainsize, void *task_dup);
4629 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4630 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4631 llvm::Value *IfVal;
4632 if (IfCond) {
4633 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4634 /*isSigned=*/true);
4635 } else {
4636 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4637 }
4638
4639 LValue LBLVal = CGF.EmitLValueForField(
4640 Base: Result.TDBase,
4641 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4642 const auto *LBVar =
4643 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4644 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(CGF),
4645 Quals: LBLVal.getQuals(),
4646 /*IsInitializer=*/true);
4647 LValue UBLVal = CGF.EmitLValueForField(
4648 Base: Result.TDBase,
4649 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4650 const auto *UBVar =
4651 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4652 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(CGF),
4653 Quals: UBLVal.getQuals(),
4654 /*IsInitializer=*/true);
4655 LValue StLVal = CGF.EmitLValueForField(
4656 Base: Result.TDBase,
4657 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4658 const auto *StVar =
4659 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4660 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(CGF),
4661 Quals: StLVal.getQuals(),
4662 /*IsInitializer=*/true);
4663 // Store reductions address.
4664 LValue RedLVal = CGF.EmitLValueForField(
4665 Base: Result.TDBase,
4666 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4667 if (Data.Reductions) {
4668 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4669 } else {
4670 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(CGF),
4671 Ty: CGF.getContext().VoidPtrTy);
4672 }
4673 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4674 llvm::Value *TaskArgs[] = {
4675 UpLoc,
4676 ThreadID,
4677 Result.NewTask,
4678 IfVal,
4679 LBLVal.getPointer(CGF),
4680 UBLVal.getPointer(CGF),
4681 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4682 llvm::ConstantInt::getSigned(
4683 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4684 llvm::ConstantInt::getSigned(
4685 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4686 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4687 : NoSchedule),
4688 Data.Schedule.getPointer()
4689 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4690 /*isSigned=*/false)
4691 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0),
4692 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4693 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4694 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy)};
4695 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4696 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskloop),
4697 args: TaskArgs);
4698}
4699
4700/// Emit reduction operation for each element of array (required for
4701/// array sections) LHS op = RHS.
4702/// \param Type Type of array.
4703/// \param LHSVar Variable on the left side of the reduction operation
4704/// (references element of array in original variable).
4705/// \param RHSVar Variable on the right side of the reduction operation
4706/// (references element of array in original variable).
4707/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4708/// RHSVar.
4709static void EmitOMPAggregateReduction(
4710 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4711 const VarDecl *RHSVar,
4712 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4713 const Expr *, const Expr *)> &RedOpGen,
4714 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4715 const Expr *UpExpr = nullptr) {
4716 // Perform element-by-element initialization.
4717 QualType ElementTy;
4718 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4719 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4720
4721 // Drill down to the base element type on both arrays.
4722 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4723 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4724
4725 llvm::Value *RHSBegin = RHSAddr.getPointer();
4726 llvm::Value *LHSBegin = LHSAddr.getPointer();
4727 // Cast from pointer to array type to pointer to single element.
4728 llvm::Value *LHSEnd =
4729 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4730 // The basic structure here is a while-do loop.
4731 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4732 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4733 llvm::Value *IsEmpty =
4734 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4735 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4736
4737 // Enter the loop body, making that address the current address.
4738 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4739 CGF.EmitBlock(BB: BodyBB);
4740
4741 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4742
4743 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4744 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4745 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4746 Address RHSElementCurrent(
4747 RHSElementPHI, RHSAddr.getElementType(),
4748 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4749
4750 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4751 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4752 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4753 Address LHSElementCurrent(
4754 LHSElementPHI, LHSAddr.getElementType(),
4755 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4756
4757 // Emit copy.
4758 CodeGenFunction::OMPPrivateScope Scope(CGF);
4759 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4760 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4761 Scope.Privatize();
4762 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4763 Scope.ForceCleanup();
4764
4765 // Shift the address forward by one element.
4766 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4767 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4768 Name: "omp.arraycpy.dest.element");
4769 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4770 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4771 Name: "omp.arraycpy.src.element");
4772 // Check whether we've reached the end.
4773 llvm::Value *Done =
4774 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4775 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4776 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4777 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4778
4779 // Done.
4780 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4781}
4782
4783/// Emit reduction combiner. If the combiner is a simple expression emit it as
4784/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4785/// UDR combiner function.
4786static void emitReductionCombiner(CodeGenFunction &CGF,
4787 const Expr *ReductionOp) {
4788 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4789 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4790 if (const auto *DRE =
4791 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4792 if (const auto *DRD =
4793 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4794 std::pair<llvm::Function *, llvm::Function *> Reduction =
4795 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4796 RValue Func = RValue::get(V: Reduction.first);
4797 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4798 CGF.EmitIgnoredExpr(E: ReductionOp);
4799 return;
4800 }
4801 CGF.EmitIgnoredExpr(E: ReductionOp);
4802}
4803
4804llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4805 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4806 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4807 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4808 ASTContext &C = CGM.getContext();
4809
4810 // void reduction_func(void *LHSArg, void *RHSArg);
4811 FunctionArgList Args;
4812 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4813 ImplicitParamKind::Other);
4814 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4815 ImplicitParamKind::Other);
4816 Args.push_back(&LHSArg);
4817 Args.push_back(&RHSArg);
4818 const auto &CGFI =
4819 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4820 std::string Name = getReductionFuncName(Name: ReducerName);
4821 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4822 llvm::GlobalValue::InternalLinkage, Name,
4823 &CGM.getModule());
4824 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4825 Fn->setDoesNotRecurse();
4826 CodeGenFunction CGF(CGM);
4827 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4828
4829 // Dst = (void*[n])(LHSArg);
4830 // Src = (void*[n])(RHSArg);
4831 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&LHSArg)),
4833 DestTy: ArgsElemType->getPointerTo()),
4834 ArgsElemType, CGF.getPointerAlign());
4835 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4836 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&RHSArg)),
4837 DestTy: ArgsElemType->getPointerTo()),
4838 ArgsElemType, CGF.getPointerAlign());
4839
4840 // ...
4841 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4842 // ...
4843 CodeGenFunction::OMPPrivateScope Scope(CGF);
4844 const auto *IPriv = Privates.begin();
4845 unsigned Idx = 0;
4846 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4847 const auto *RHSVar =
4848 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
4849 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
4850 const auto *LHSVar =
4851 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
4852 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
4853 QualType PrivTy = (*IPriv)->getType();
4854 if (PrivTy->isVariablyModifiedType()) {
4855 // Get array size and emit VLA type.
4856 ++Idx;
4857 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
4858 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
4859 const VariableArrayType *VLA =
4860 CGF.getContext().getAsVariableArrayType(T: PrivTy);
4861 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
4862 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4863 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
4864 CGF.EmitVariablyModifiedType(Ty: PrivTy);
4865 }
4866 }
4867 Scope.Privatize();
4868 IPriv = Privates.begin();
4869 const auto *ILHS = LHSExprs.begin();
4870 const auto *IRHS = RHSExprs.begin();
4871 for (const Expr *E : ReductionOps) {
4872 if ((*IPriv)->getType()->isArrayType()) {
4873 // Emit reduction for array section.
4874 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4875 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4876 EmitOMPAggregateReduction(
4877 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
4878 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4879 emitReductionCombiner(CGF, ReductionOp: E);
4880 });
4881 } else {
4882 // Emit reduction for array subscript or single variable.
4883 emitReductionCombiner(CGF, ReductionOp: E);
4884 }
4885 ++IPriv;
4886 ++ILHS;
4887 ++IRHS;
4888 }
4889 Scope.ForceCleanup();
4890 CGF.FinishFunction();
4891 return Fn;
4892}
4893
4894void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4895 const Expr *ReductionOp,
4896 const Expr *PrivateRef,
4897 const DeclRefExpr *LHS,
4898 const DeclRefExpr *RHS) {
4899 if (PrivateRef->getType()->isArrayType()) {
4900 // Emit reduction for array section.
4901 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
4902 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
4903 EmitOMPAggregateReduction(
4904 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
4905 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4906 emitReductionCombiner(CGF, ReductionOp);
4907 });
4908 } else {
4909 // Emit reduction for array subscript or single variable.
4910 emitReductionCombiner(CGF, ReductionOp);
4911 }
4912}
4913
4914void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4915 ArrayRef<const Expr *> Privates,
4916 ArrayRef<const Expr *> LHSExprs,
4917 ArrayRef<const Expr *> RHSExprs,
4918 ArrayRef<const Expr *> ReductionOps,
4919 ReductionOptionsTy Options) {
4920 if (!CGF.HaveInsertPoint())
4921 return;
4922
4923 bool WithNowait = Options.WithNowait;
4924 bool SimpleReduction = Options.SimpleReduction;
4925
4926 // Next code should be emitted for reduction:
4927 //
4928 // static kmp_critical_name lock = { 0 };
4929 //
4930 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4931 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4932 // ...
4933 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4934 // *(Type<n>-1*)rhs[<n>-1]);
4935 // }
4936 //
4937 // ...
4938 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4939 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4940 // RedList, reduce_func, &<lock>)) {
4941 // case 1:
4942 // ...
4943 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4944 // ...
4945 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4946 // break;
4947 // case 2:
4948 // ...
4949 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4950 // ...
4951 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4952 // break;
4953 // default:;
4954 // }
4955 //
4956 // if SimpleReduction is true, only the next code is generated:
4957 // ...
4958 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4959 // ...
4960
4961 ASTContext &C = CGM.getContext();
4962
4963 if (SimpleReduction) {
4964 CodeGenFunction::RunCleanupsScope Scope(CGF);
4965 const auto *IPriv = Privates.begin();
4966 const auto *ILHS = LHSExprs.begin();
4967 const auto *IRHS = RHSExprs.begin();
4968 for (const Expr *E : ReductionOps) {
4969 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
4970 RHS: cast<DeclRefExpr>(Val: *IRHS));
4971 ++IPriv;
4972 ++ILHS;
4973 ++IRHS;
4974 }
4975 return;
4976 }
4977
4978 // 1. Build a list of reduction variables.
4979 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4980 auto Size = RHSExprs.size();
4981 for (const Expr *E : Privates) {
4982 if (E->getType()->isVariablyModifiedType())
4983 // Reserve place for array size.
4984 ++Size;
4985 }
4986 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4987 QualType ReductionArrayTy = C.getConstantArrayType(
4988 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
4989 /*IndexTypeQuals=*/0);
4990 Address ReductionList =
4991 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
4992 const auto *IPriv = Privates.begin();
4993 unsigned Idx = 0;
4994 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4995 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
4996 CGF.Builder.CreateStore(
4997 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4998 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
4999 Addr: Elem);
5000 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5001 // Store array size.
5002 ++Idx;
5003 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5004 llvm::Value *Size = CGF.Builder.CreateIntCast(
5005 V: CGF.getVLASize(
5006 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5007 .NumElts,
5008 DestTy: CGF.SizeTy, /*isSigned=*/false);
5009 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5010 Addr: Elem);
5011 }
5012 }
5013
5014 // 2. Emit reduce_func().
5015 llvm::Function *ReductionFn = emitReductionFunction(
5016 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5017 Privates, LHSExprs, RHSExprs, ReductionOps);
5018
5019 // 3. Create static kmp_critical_name lock = { 0 };
5020 std::string Name = getName(Parts: {"reduction"});
5021 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5022
5023 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5024 // RedList, reduce_func, &<lock>);
5025 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5026 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5027 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5028 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5029 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5030 llvm::Value *Args[] = {
5031 IdentTLoc, // ident_t *<loc>
5032 ThreadId, // i32 <gtid>
5033 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5034 ReductionArrayTySize, // size_type sizeof(RedList)
5035 RL, // void *RedList
5036 ReductionFn, // void (*) (void *, void *) <reduce_func>
5037 Lock // kmp_critical_name *&<lock>
5038 };
5039 llvm::Value *Res = CGF.EmitRuntimeCall(
5040 callee: OMPBuilder.getOrCreateRuntimeFunction(
5041 M&: CGM.getModule(),
5042 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5043 args: Args);
5044
5045 // 5. Build switch(res)
5046 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5047 llvm::SwitchInst *SwInst =
5048 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5049
5050 // 6. Build case 1:
5051 // ...
5052 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5053 // ...
5054 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5055 // break;
5056 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5057 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5058 CGF.EmitBlock(BB: Case1BB);
5059
5060 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5061 llvm::Value *EndArgs[] = {
5062 IdentTLoc, // ident_t *<loc>
5063 ThreadId, // i32 <gtid>
5064 Lock // kmp_critical_name *&<lock>
5065 };
5066 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5067 CodeGenFunction &CGF, PrePostActionTy &Action) {
5068 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5069 const auto *IPriv = Privates.begin();
5070 const auto *ILHS = LHSExprs.begin();
5071 const auto *IRHS = RHSExprs.begin();
5072 for (const Expr *E : ReductionOps) {
5073 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5074 RHS: cast<DeclRefExpr>(Val: *IRHS));
5075 ++IPriv;
5076 ++ILHS;
5077 ++IRHS;
5078 }
5079 };
5080 RegionCodeGenTy RCG(CodeGen);
5081 CommonActionTy Action(
5082 nullptr, std::nullopt,
5083 OMPBuilder.getOrCreateRuntimeFunction(
5084 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5085 : OMPRTL___kmpc_end_reduce),
5086 EndArgs);
5087 RCG.setAction(Action);
5088 RCG(CGF);
5089
5090 CGF.EmitBranch(Block: DefaultBB);
5091
5092 // 7. Build case 2:
5093 // ...
5094 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5095 // ...
5096 // break;
5097 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5098 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5099 CGF.EmitBlock(BB: Case2BB);
5100
5101 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5102 CodeGenFunction &CGF, PrePostActionTy &Action) {
5103 const auto *ILHS = LHSExprs.begin();
5104 const auto *IRHS = RHSExprs.begin();
5105 const auto *IPriv = Privates.begin();
5106 for (const Expr *E : ReductionOps) {
5107 const Expr *XExpr = nullptr;
5108 const Expr *EExpr = nullptr;
5109 const Expr *UpExpr = nullptr;
5110 BinaryOperatorKind BO = BO_Comma;
5111 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5112 if (BO->getOpcode() == BO_Assign) {
5113 XExpr = BO->getLHS();
5114 UpExpr = BO->getRHS();
5115 }
5116 }
5117 // Try to emit update expression as a simple atomic.
5118 const Expr *RHSExpr = UpExpr;
5119 if (RHSExpr) {
5120 // Analyze RHS part of the whole expression.
5121 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5122 Val: RHSExpr->IgnoreParenImpCasts())) {
5123 // If this is a conditional operator, analyze its condition for
5124 // min/max reduction operator.
5125 RHSExpr = ACO->getCond();
5126 }
5127 if (const auto *BORHS =
5128 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5129 EExpr = BORHS->getRHS();
5130 BO = BORHS->getOpcode();
5131 }
5132 }
5133 if (XExpr) {
5134 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5135 auto &&AtomicRedGen = [BO, VD,
5136 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5137 const Expr *EExpr, const Expr *UpExpr) {
5138 LValue X = CGF.EmitLValue(E: XExpr);
5139 RValue E;
5140 if (EExpr)
5141 E = CGF.EmitAnyExpr(E: EExpr);
5142 CGF.EmitOMPAtomicSimpleUpdateExpr(
5143 X, E, BO, /*IsXLHSInRHSPart=*/true,
5144 AO: llvm::AtomicOrdering::Monotonic, Loc,
5145 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5146 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5147 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5148 CGF.emitOMPSimpleStore(
5149 LVal: CGF.MakeAddrLValue(LHSTemp, VD->getType()), RVal: XRValue,
5150 RValTy: VD->getType().getNonReferenceType(), Loc);
5151 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5152 (void)PrivateScope.Privatize();
5153 return CGF.EmitAnyExpr(E: UpExpr);
5154 });
5155 };
5156 if ((*IPriv)->getType()->isArrayType()) {
5157 // Emit atomic reduction for array section.
5158 const auto *RHSVar =
5159 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5160 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5161 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5162 } else {
5163 // Emit atomic reduction for array subscript or single variable.
5164 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5165 }
5166 } else {
5167 // Emit as a critical region.
5168 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5169 const Expr *, const Expr *) {
5170 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5171 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5172 RT.emitCriticalRegion(
5173 CGF, CriticalName: Name,
5174 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5175 Action.Enter(CGF);
5176 emitReductionCombiner(CGF, ReductionOp: E);
5177 },
5178 Loc);
5179 };
5180 if ((*IPriv)->getType()->isArrayType()) {
5181 const auto *LHSVar =
5182 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5183 const auto *RHSVar =
5184 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5185 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5186 RedOpGen: CritRedGen);
5187 } else {
5188 CritRedGen(CGF, nullptr, nullptr, nullptr);
5189 }
5190 }
5191 ++ILHS;
5192 ++IRHS;
5193 ++IPriv;
5194 }
5195 };
5196 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5197 if (!WithNowait) {
5198 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5199 llvm::Value *EndArgs[] = {
5200 IdentTLoc, // ident_t *<loc>
5201 ThreadId, // i32 <gtid>
5202 Lock // kmp_critical_name *&<lock>
5203 };
5204 CommonActionTy Action(nullptr, std::nullopt,
5205 OMPBuilder.getOrCreateRuntimeFunction(
5206 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5207 EndArgs);
5208 AtomicRCG.setAction(Action);
5209 AtomicRCG(CGF);
5210 } else {
5211 AtomicRCG(CGF);
5212 }
5213
5214 CGF.EmitBranch(Block: DefaultBB);
5215 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5216}
5217
5218/// Generates unique name for artificial threadprivate variables.
5219/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5220static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5221 const Expr *Ref) {
5222 SmallString<256> Buffer;
5223 llvm::raw_svector_ostream Out(Buffer);
5224 const clang::DeclRefExpr *DE;
5225 const VarDecl *D = ::getBaseDecl(Ref, DE);
5226 if (!D)
5227 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5228 D = D->getCanonicalDecl();
5229 std::string Name = CGM.getOpenMPRuntime().getName(
5230 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5231 Out << Prefix << Name << "_"
5232 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5233 return std::string(Out.str());
5234}
5235
5236/// Emits reduction initializer function:
5237/// \code
5238/// void @.red_init(void* %arg, void* %orig) {
5239/// %0 = bitcast void* %arg to <type>*
5240/// store <type> <init>, <type>* %0
5241/// ret void
5242/// }
5243/// \endcode
5244static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5245 SourceLocation Loc,
5246 ReductionCodeGen &RCG, unsigned N) {
5247 ASTContext &C = CGM.getContext();
5248 QualType VoidPtrTy = C.VoidPtrTy;
5249 VoidPtrTy.addRestrict();
5250 FunctionArgList Args;
5251 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5252 ImplicitParamKind::Other);
5253 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5254 ImplicitParamKind::Other);
5255 Args.emplace_back(Args: &Param);
5256 Args.emplace_back(Args: &ParamOrig);
5257 const auto &FnInfo =
5258 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5259 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5260 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5261 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5262 N: Name, M: &CGM.getModule());
5263 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5264 Fn->setDoesNotRecurse();
5265 CodeGenFunction CGF(CGM);
5266 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5267 QualType PrivateType = RCG.getPrivateType(N);
5268 Address PrivateAddr = CGF.EmitLoadOfPointer(
5269 Ptr: CGF.GetAddrOfLocalVar(&Param).withElementType(
5270 ElemTy: CGF.ConvertTypeForMem(T: PrivateType)->getPointerTo()),
5271 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5272 llvm::Value *Size = nullptr;
5273 // If the size of the reduction item is non-constant, load it from global
5274 // threadprivate variable.
5275 if (RCG.getSizes(N).second) {
5276 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5277 CGF, VarType: CGM.getContext().getSizeType(),
5278 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5279 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5280 Ty: CGM.getContext().getSizeType(), Loc);
5281 }
5282 RCG.emitAggregateType(CGF, N, Size);
5283 Address OrigAddr = Address::invalid();
5284 // If initializer uses initializer from declare reduction construct, emit a
5285 // pointer to the address of the original reduction item (reuired by reduction
5286 // initializer)
5287 if (RCG.usesReductionInitializer(N)) {
5288 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5289 OrigAddr = CGF.EmitLoadOfPointer(
5290 Ptr: SharedAddr,
5291 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5292 }
5293 // Emit the initializer:
5294 // %0 = bitcast void* %arg to <type>*
5295 // store <type> <init>, <type>* %0
5296 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5297 DefaultInit: [](CodeGenFunction &) { return false; });
5298 CGF.FinishFunction();
5299 return Fn;
5300}
5301
5302/// Emits reduction combiner function:
5303/// \code
5304/// void @.red_comb(void* %arg0, void* %arg1) {
5305/// %lhs = bitcast void* %arg0 to <type>*
5306/// %rhs = bitcast void* %arg1 to <type>*
5307/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5308/// store <type> %2, <type>* %lhs
5309/// ret void
5310/// }
5311/// \endcode
5312static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5313 SourceLocation Loc,
5314 ReductionCodeGen &RCG, unsigned N,
5315 const Expr *ReductionOp,
5316 const Expr *LHS, const Expr *RHS,
5317 const Expr *PrivateRef) {
5318 ASTContext &C = CGM.getContext();
5319 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5320 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5321 FunctionArgList Args;
5322 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5323 C.VoidPtrTy, ImplicitParamKind::Other);
5324 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5325 ImplicitParamKind::Other);
5326 Args.emplace_back(Args: &ParamInOut);
5327 Args.emplace_back(Args: &ParamIn);
5328 const auto &FnInfo =
5329 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5330 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5331 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5332 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5333 N: Name, M: &CGM.getModule());
5334 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5335 Fn->setDoesNotRecurse();
5336 CodeGenFunction CGF(CGM);
5337 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5338 llvm::Value *Size = nullptr;
5339 // If the size of the reduction item is non-constant, load it from global
5340 // threadprivate variable.
5341 if (RCG.getSizes(N).second) {
5342 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5343 CGF, VarType: CGM.getContext().getSizeType(),
5344 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5345 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5346 Ty: CGM.getContext().getSizeType(), Loc);
5347 }
5348 RCG.emitAggregateType(CGF, N, Size);
5349 // Remap lhs and rhs variables to the addresses of the function arguments.
5350 // %lhs = bitcast void* %arg0 to <type>*
5351 // %rhs = bitcast void* %arg1 to <type>*
5352 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5353 PrivateScope.addPrivate(
5354 LocalVD: LHSVD,
5355 // Pull out the pointer to the variable.
5356 Addr: CGF.EmitLoadOfPointer(
5357 Ptr: CGF.GetAddrOfLocalVar(&ParamInOut)
5358 .withElementType(
5359 ElemTy: CGF.ConvertTypeForMem(T: LHSVD->getType())->getPointerTo()),
5360 PtrTy: C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5361 PrivateScope.addPrivate(
5362 LocalVD: RHSVD,
5363 // Pull out the pointer to the variable.
5364 Addr: CGF.EmitLoadOfPointer(
5365 Ptr: CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5366 ElemTy: CGF.ConvertTypeForMem(T: RHSVD->getType())->getPointerTo()),
5367 PtrTy: C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5368 PrivateScope.Privatize();
5369 // Emit the combiner body:
5370 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5371 // store <type> %2, <type>* %lhs
5372 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5373 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5374 RHS: cast<DeclRefExpr>(Val: RHS));
5375 CGF.FinishFunction();
5376 return Fn;
5377}
5378
5379/// Emits reduction finalizer function:
5380/// \code
5381/// void @.red_fini(void* %arg) {
5382/// %0 = bitcast void* %arg to <type>*
5383/// <destroy>(<type>* %0)
5384/// ret void
5385/// }
5386/// \endcode
5387static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5388 SourceLocation Loc,
5389 ReductionCodeGen &RCG, unsigned N) {
5390 if (!RCG.needCleanups(N))
5391 return nullptr;
5392 ASTContext &C = CGM.getContext();
5393 FunctionArgList Args;
5394 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5395 ImplicitParamKind::Other);
5396 Args.emplace_back(Args: &Param);
5397 const auto &FnInfo =
5398 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5399 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5400 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5401 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5402 N: Name, M: &CGM.getModule());
5403 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5404 Fn->setDoesNotRecurse();
5405 CodeGenFunction CGF(CGM);
5406 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5407 Address PrivateAddr = CGF.EmitLoadOfPointer(
5408 Ptr: CGF.GetAddrOfLocalVar(&Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5409 llvm::Value *Size = nullptr;
5410 // If the size of the reduction item is non-constant, load it from global
5411 // threadprivate variable.
5412 if (RCG.getSizes(N).second) {
5413 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5414 CGF, VarType: CGM.getContext().getSizeType(),
5415 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5416 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5417 Ty: CGM.getContext().getSizeType(), Loc);
5418 }
5419 RCG.emitAggregateType(CGF, N, Size);
5420 // Emit the finalizer body:
5421 // <destroy>(<type>* %0)
5422 RCG.emitCleanups(CGF, N, PrivateAddr);
5423 CGF.FinishFunction(EndLoc: Loc);
5424 return Fn;
5425}
5426
5427llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5428 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5429 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5430 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5431 return nullptr;
5432
5433 // Build typedef struct:
5434 // kmp_taskred_input {
5435 // void *reduce_shar; // shared reduction item
5436 // void *reduce_orig; // original reduction item used for initialization
5437 // size_t reduce_size; // size of data item
5438 // void *reduce_init; // data initialization routine
5439 // void *reduce_fini; // data finalization routine
5440 // void *reduce_comb; // data combiner routine
5441 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5442 // } kmp_taskred_input_t;
5443 ASTContext &C = CGM.getContext();
5444 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5445 RD->startDefinition();
5446 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5447 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5448 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5449 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5450 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5451 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5452 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5453 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5454 RD->completeDefinition();
5455 QualType RDType = C.getRecordType(Decl: RD);
5456 unsigned Size = Data.ReductionVars.size();
5457 llvm::APInt ArraySize(/*numBits=*/64, Size);
5458 QualType ArrayRDType =
5459 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5460 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5461 // kmp_task_red_input_t .rd_input.[Size];
5462 Address TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5463 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5464 Data.ReductionCopies, Data.ReductionOps);
5465 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5466 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5467 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5468 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5469 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5470 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5471 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5472 Name: ".rd_input.gep.");
5473 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(V: GEP, T: RDType);
5474 // ElemLVal.reduce_shar = &Shareds[Cnt];
5475 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5476 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5477 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5478 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5479 // ElemLVal.reduce_orig = &Origs[Cnt];
5480 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5481 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5482 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5483 RCG.emitAggregateType(CGF, N: Cnt);
5484 llvm::Value *SizeValInChars;
5485 llvm::Value *SizeVal;
5486 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5487 // We use delayed creation/initialization for VLAs and array sections. It is
5488 // required because runtime does not provide the way to pass the sizes of
5489 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5490 // threadprivate global variables are used to store these values and use
5491 // them in the functions.
5492 bool DelayedCreation = !!SizeVal;
5493 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5494 /*isSigned=*/false);
5495 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5496 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5497 // ElemLVal.reduce_init = init;
5498 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5499 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5500 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5501 // ElemLVal.reduce_fini = fini;
5502 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5503 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5504 llvm::Value *FiniAddr =
5505 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5506 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5507 // ElemLVal.reduce_comb = comb;
5508 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5509 llvm::Value *CombAddr = emitReduceCombFunction(
5510 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5511 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5512 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5513 // ElemLVal.flags = 0;
5514 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5515 if (DelayedCreation) {
5516 CGF.EmitStoreOfScalar(
5517 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5518 lvalue: FlagsLVal);
5519 } else
5520 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(CGF),
5521 Ty: FlagsLVal.getType());
5522 }
5523 if (Data.IsReductionWithTaskMod) {
5524 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5525 // is_ws, int num, void *data);
5526 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5527 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5528 DestTy: CGM.IntTy, /*isSigned=*/true);
5529 llvm::Value *Args[] = {
5530 IdentTLoc, GTid,
5531 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5532 /*isSigned=*/IsSigned: true),
5533 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5534 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5535 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5536 return CGF.EmitRuntimeCall(
5537 callee: OMPBuilder.getOrCreateRuntimeFunction(
5538 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5539 args: Args);
5540 }
5541 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5542 llvm::Value *Args[] = {
5543 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5544 /*isSigned=*/true),
5545 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5546 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5547 DestTy: CGM.VoidPtrTy)};
5548 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5549 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5550 args: Args);
5551}
5552
5553void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5554 SourceLocation Loc,
5555 bool IsWorksharingReduction) {
5556 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5557 // is_ws, int num, void *data);
5558 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5559 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5560 DestTy: CGM.IntTy, /*isSigned=*/true);
5561 llvm::Value *Args[] = {IdentTLoc, GTid,
5562 llvm::ConstantInt::get(Ty: CGM.IntTy,
5563 V: IsWorksharingReduction ? 1 : 0,
5564 /*isSigned=*/IsSigned: true)};
5565 (void)CGF.EmitRuntimeCall(
5566 callee: OMPBuilder.getOrCreateRuntimeFunction(
5567 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
5568 args: Args);
5569}
5570
5571void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5572 SourceLocation Loc,
5573 ReductionCodeGen &RCG,
5574 unsigned N) {
5575 auto Sizes = RCG.getSizes(N);
5576 // Emit threadprivate global variable if the type is non-constant
5577 // (Sizes.second = nullptr).
5578 if (Sizes.second) {
5579 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
5580 /*isSigned=*/false);
5581 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5582 CGF, VarType: CGM.getContext().getSizeType(),
5583 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5584 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
5585 }
5586}
5587
5588Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5589 SourceLocation Loc,
5590 llvm::Value *ReductionsPtr,
5591 LValue SharedLVal) {
5592 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5593 // *d);
5594 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5595 DestTy: CGM.IntTy,
5596 /*isSigned=*/true),
5597 ReductionsPtr,
5598 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5599 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
5600 return Address(
5601 CGF.EmitRuntimeCall(
5602 callee: OMPBuilder.getOrCreateRuntimeFunction(
5603 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
5604 args: Args),
5605 CGF.Int8Ty, SharedLVal.getAlignment());
5606}
5607
5608void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5609 const OMPTaskDataTy &Data) {
5610 if (!CGF.HaveInsertPoint())
5611 return;
5612
5613 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5614 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5615 OMPBuilder.createTaskwait(Loc: CGF.Builder);
5616 } else {
5617 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5618 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5619 auto &M = CGM.getModule();
5620 Address DependenciesArray = Address::invalid();
5621 llvm::Value *NumOfElements;
5622 std::tie(args&: NumOfElements, args&: DependenciesArray) =
5623 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
5624 if (!Data.Dependences.empty()) {
5625 llvm::Value *DepWaitTaskArgs[7];
5626 DepWaitTaskArgs[0] = UpLoc;
5627 DepWaitTaskArgs[1] = ThreadID;
5628 DepWaitTaskArgs[2] = NumOfElements;
5629 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5630 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
5631 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5632 DepWaitTaskArgs[6] =
5633 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
5634
5635 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5636
5637 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5638 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5639 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5640 // kmp_int32 has_no_wait); if dependence info is specified.
5641 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5642 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
5643 args: DepWaitTaskArgs);
5644
5645 } else {
5646
5647 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5648 // global_tid);
5649 llvm::Value *Args[] = {UpLoc, ThreadID};
5650 // Ignore return result until untied tasks are supported.
5651 CGF.EmitRuntimeCall(
5652 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
5653 args: Args);
5654 }
5655 }
5656
5657 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
5658 Region->emitUntiedSwitch(CGF);
5659}
5660
5661void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5662 OpenMPDirectiveKind InnerKind,
5663 const RegionCodeGenTy &CodeGen,
5664 bool HasCancel) {
5665 if (!CGF.HaveInsertPoint())
5666 return;
5667 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5668 InnerKind != OMPD_critical &&
5669 InnerKind != OMPD_master &&
5670 InnerKind != OMPD_masked);
5671 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5672}
5673
5674namespace {
5675enum RTCancelKind {
5676 CancelNoreq = 0,
5677 CancelParallel = 1,
5678 CancelLoop = 2,
5679 CancelSections = 3,
5680 CancelTaskgroup = 4
5681};
5682} // anonymous namespace
5683
5684static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5685 RTCancelKind CancelKind = CancelNoreq;
5686 if (CancelRegion == OMPD_parallel)
5687 CancelKind = CancelParallel;
5688 else if (CancelRegion == OMPD_for)
5689 CancelKind = CancelLoop;
5690 else if (CancelRegion == OMPD_sections)
5691 CancelKind = CancelSections;
5692 else {
5693 assert(CancelRegion == OMPD_taskgroup);
5694 CancelKind = CancelTaskgroup;
5695 }
5696 return CancelKind;
5697}
5698
5699void CGOpenMPRuntime::emitCancellationPointCall(
5700 CodeGenFunction &CGF, SourceLocation Loc,
5701 OpenMPDirectiveKind CancelRegion) {
5702 if (!CGF.HaveInsertPoint())
5703 return;
5704 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5705 // global_tid, kmp_int32 cncl_kind);
5706 if (auto *OMPRegionInfo =
5707 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5708 // For 'cancellation point taskgroup', the task region info may not have a
5709 // cancel. This may instead happen in another adjacent task.
5710 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5711 llvm::Value *Args[] = {
5712 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5713 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5714 // Ignore return result until untied tasks are supported.
5715 llvm::Value *Result = CGF.EmitRuntimeCall(
5716 OMPBuilder.getOrCreateRuntimeFunction(
5717 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
5718 Args);
5719 // if (__kmpc_cancellationpoint()) {
5720 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5721 // exit from construct;
5722 // }
5723 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5724 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5725 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5726 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5727 CGF.EmitBlock(BB: ExitBB);
5728 if (CancelRegion == OMPD_parallel)
5729 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5730 // exit from construct;
5731 CodeGenFunction::JumpDest CancelDest =
5732 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5733 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5734 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5735 }
5736 }
5737}
5738
5739void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5740 const Expr *IfCond,
5741 OpenMPDirectiveKind CancelRegion) {
5742 if (!CGF.HaveInsertPoint())
5743 return;
5744 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5745 // kmp_int32 cncl_kind);
5746 auto &M = CGM.getModule();
5747 if (auto *OMPRegionInfo =
5748 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5749 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5750 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5751 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5752 llvm::Value *Args[] = {
5753 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5754 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5755 // Ignore return result until untied tasks are supported.
5756 llvm::Value *Result = CGF.EmitRuntimeCall(
5757 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), Args);
5758 // if (__kmpc_cancel()) {
5759 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5760 // exit from construct;
5761 // }
5762 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5763 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5764 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5765 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5766 CGF.EmitBlock(BB: ExitBB);
5767 if (CancelRegion == OMPD_parallel)
5768 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5769 // exit from construct;
5770 CodeGenFunction::JumpDest CancelDest =
5771 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5772 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5773 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5774 };
5775 if (IfCond) {
5776 emitIfClause(CGF, Cond: IfCond, ThenGen,
5777 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
5778 } else {
5779 RegionCodeGenTy ThenRCG(ThenGen);
5780 ThenRCG(CGF);
5781 }
5782 }
5783}
5784
5785namespace {
5786/// Cleanup action for uses_allocators support.
5787class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5788 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5789
5790public:
5791 OMPUsesAllocatorsActionTy(
5792 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5793 : Allocators(Allocators) {}
5794 void Enter(CodeGenFunction &CGF) override {
5795 if (!CGF.HaveInsertPoint())
5796 return;
5797 for (const auto &AllocatorData : Allocators) {
5798 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5799 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
5800 }
5801 }
5802 void Exit(CodeGenFunction &CGF) override {
5803 if (!CGF.HaveInsertPoint())
5804 return;
5805 for (const auto &AllocatorData : Allocators) {
5806 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5807 Allocator: AllocatorData.first);
5808 }
5809 }
5810};
5811} // namespace
5812
5813void CGOpenMPRuntime::emitTargetOutlinedFunction(
5814 const OMPExecutableDirective &D, StringRef ParentName,
5815 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5816 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5817 assert(!ParentName.empty() && "Invalid target entry parent name!");
5818 HasEmittedTargetRegion = true;
5819 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5820 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5821 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5822 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5823 if (!D.AllocatorTraits)
5824 continue;
5825 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
5826 }
5827 }
5828 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5829 CodeGen.setAction(UsesAllocatorAction);
5830 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5831 IsOffloadEntry, CodeGen);
5832}
5833
5834void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5835 const Expr *Allocator,
5836 const Expr *AllocatorTraits) {
5837 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
5838 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
5839 // Use default memspace handle.
5840 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5841 llvm::Value *NumTraits = llvm::ConstantInt::get(
5842 CGF.IntTy, cast<ConstantArrayType>(
5843 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5844 ->getSize()
5845 .getLimitedValue());
5846 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
5847 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5848 Addr: AllocatorTraitsLVal.getAddress(CGF), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
5849 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5850 AllocatorTraitsLVal.getBaseInfo(),
5851 AllocatorTraitsLVal.getTBAAInfo());
5852 llvm::Value *Traits = Addr.getPointer();
5853
5854 llvm::Value *AllocatorVal =
5855 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5856 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
5857 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
5858 // Store to allocator.
5859 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
5860 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
5861 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
5862 AllocatorVal =
5863 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
5864 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
5865 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
5866}
5867
5868void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5869 const Expr *Allocator) {
5870 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
5871 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
5872 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
5873 llvm::Value *AllocatorVal =
5874 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
5875 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
5876 DstTy: CGF.getContext().VoidPtrTy,
5877 Loc: Allocator->getExprLoc());
5878 (void)CGF.EmitRuntimeCall(
5879 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
5880 FnID: OMPRTL___kmpc_destroy_allocator),
5881 args: {ThreadId, AllocatorVal});
5882}
5883
5884void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5885 const OMPExecutableDirective &D, CodeGenFunction &CGF,
5886 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5887 int32_t &MaxTeamsVal) {
5888
5889 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5890 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
5891 /*UpperBoundOnly=*/true);
5892
5893 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5894 for (auto *A : C->getAttrs()) {
5895 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5896 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5897 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5898 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5899 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5900 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5901 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5902 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5903 &AttrMaxThreadsVal);
5904 else
5905 continue;
5906
5907 MinThreadsVal = std::max(a: MinThreadsVal, b: AttrMinThreadsVal);
5908 if (AttrMaxThreadsVal > 0)
5909 MaxThreadsVal = MaxThreadsVal > 0
5910 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
5911 : AttrMaxThreadsVal;
5912 MinTeamsVal = std::max(a: MinTeamsVal, b: AttrMinBlocksVal);
5913 if (AttrMaxBlocksVal > 0)
5914 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
5915 : AttrMaxBlocksVal;
5916 }
5917 }
5918}
5919
5920void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5921 const OMPExecutableDirective &D, StringRef ParentName,
5922 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5923 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5924
5925 llvm::TargetRegionEntryInfo EntryInfo =
5926 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
5927
5928 CodeGenFunction CGF(CGM, true);
5929 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5930 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5931 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5932
5933 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5934 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5935 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, Loc: D.getBeginLoc());
5936 };
5937
5938 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction,
5939 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5940
5941 if (!OutlinedFn)
5942 return;
5943
5944 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
5945
5946 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5947 for (auto *A : C->getAttrs()) {
5948 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5949 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5950 }
5951 }
5952}
5953
5954/// Checks if the expression is constant or does not have non-trivial function
5955/// calls.
5956static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5957 // We can skip constant expressions.
5958 // We can skip expressions with trivial calls or simple expressions.
5959 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
5960 !E->hasNonTrivialCall(Ctx)) &&
5961 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5962}
5963
5964const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5965 const Stmt *Body) {
5966 const Stmt *Child = Body->IgnoreContainers();
5967 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
5968 Child = nullptr;
5969 for (const Stmt *S : C->body()) {
5970 if (const auto *E = dyn_cast<Expr>(Val: S)) {
5971 if (isTrivial(Ctx, E))
5972 continue;
5973 }
5974 // Some of the statements can be ignored.
5975 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
5976 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
5977 continue;
5978 // Analyze declarations.
5979 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
5980 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
5981 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
5982 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
5983 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
5984 isa<UsingDirectiveDecl>(Val: D) ||
5985 isa<OMPDeclareReductionDecl>(Val: D) ||
5986 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
5987 return true;
5988 const auto *VD = dyn_cast<VarDecl>(Val: D);
5989 if (!VD)
5990 return false;
5991 return VD->hasGlobalStorage() || !VD->isUsed();
5992 }))
5993 continue;
5994 }
5995 // Found multiple children - cannot get the one child only.
5996 if (Child)
5997 return nullptr;
5998 Child = S;
5999 }
6000 if (Child)
6001 Child = Child->IgnoreContainers();
6002 }
6003 return Child;
6004}
6005
6006const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6007 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6008 int32_t &MaxTeamsVal) {
6009
6010 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6011 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6012 "Expected target-based executable directive.");
6013 switch (DirectiveKind) {
6014 case OMPD_target: {
6015 const auto *CS = D.getInnermostCapturedStmt();
6016 const auto *Body =
6017 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6018 const Stmt *ChildStmt =
6019 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6020 if (const auto *NestedDir =
6021 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6022 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6023 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6024 const Expr *NumTeams =
6025 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6026 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6027 if (auto Constant =
6028 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6029 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6030 return NumTeams;
6031 }
6032 MinTeamsVal = MaxTeamsVal = 0;
6033 return nullptr;
6034 }
6035 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6036 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6037 MinTeamsVal = MaxTeamsVal = 1;
6038 return nullptr;
6039 }
6040 MinTeamsVal = MaxTeamsVal = 1;
6041 return nullptr;
6042 }
6043 // A value of -1 is used to check if we need to emit no teams region
6044 MinTeamsVal = MaxTeamsVal = -1;
6045 return nullptr;
6046 }
6047 case OMPD_target_teams_loop:
6048 case OMPD_target_teams:
6049 case OMPD_target_teams_distribute:
6050 case OMPD_target_teams_distribute_simd:
6051 case OMPD_target_teams_distribute_parallel_for:
6052 case OMPD_target_teams_distribute_parallel_for_simd: {
6053 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6054 const Expr *NumTeams =
6055 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6056 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6057 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6058 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6059 return NumTeams;
6060 }
6061 MinTeamsVal = MaxTeamsVal = 0;
6062 return nullptr;
6063 }
6064 case OMPD_target_parallel:
6065 case OMPD_target_parallel_for:
6066 case OMPD_target_parallel_for_simd:
6067 case OMPD_target_parallel_loop:
6068 case OMPD_target_simd:
6069 MinTeamsVal = MaxTeamsVal = 1;
6070 return nullptr;
6071 case OMPD_parallel:
6072 case OMPD_for:
6073 case OMPD_parallel_for:
6074 case OMPD_parallel_loop:
6075 case OMPD_parallel_master:
6076 case OMPD_parallel_sections:
6077 case OMPD_for_simd:
6078 case OMPD_parallel_for_simd:
6079 case OMPD_cancel:
6080 case OMPD_cancellation_point:
6081 case OMPD_ordered:
6082 case OMPD_threadprivate:
6083 case OMPD_allocate:
6084 case OMPD_task:
6085 case OMPD_simd:
6086 case OMPD_tile:
6087 case OMPD_unroll:
6088 case OMPD_sections:
6089 case OMPD_section:
6090 case OMPD_single:
6091 case OMPD_master:
6092 case OMPD_critical:
6093 case OMPD_taskyield:
6094 case OMPD_barrier:
6095 case OMPD_taskwait:
6096 case OMPD_taskgroup:
6097 case OMPD_atomic:
6098 case OMPD_flush:
6099 case OMPD_depobj:
6100 case OMPD_scan:
6101 case OMPD_teams:
6102 case OMPD_target_data:
6103 case OMPD_target_exit_data:
6104 case OMPD_target_enter_data:
6105 case OMPD_distribute:
6106 case OMPD_distribute_simd:
6107 case OMPD_distribute_parallel_for:
6108 case OMPD_distribute_parallel_for_simd:
6109 case OMPD_teams_distribute:
6110 case OMPD_teams_distribute_simd:
6111 case OMPD_teams_distribute_parallel_for:
6112 case OMPD_teams_distribute_parallel_for_simd:
6113 case OMPD_target_update:
6114 case OMPD_declare_simd:
6115 case OMPD_declare_variant:
6116 case OMPD_begin_declare_variant:
6117 case OMPD_end_declare_variant:
6118 case OMPD_declare_target:
6119 case OMPD_end_declare_target:
6120 case OMPD_declare_reduction:
6121 case OMPD_declare_mapper:
6122 case OMPD_taskloop:
6123 case OMPD_taskloop_simd:
6124 case OMPD_master_taskloop:
6125 case OMPD_master_taskloop_simd:
6126 case OMPD_parallel_master_taskloop:
6127 case OMPD_parallel_master_taskloop_simd:
6128 case OMPD_requires:
6129 case OMPD_metadirective:
6130 case OMPD_unknown:
6131 break;
6132 default:
6133 break;
6134 }
6135 llvm_unreachable("Unexpected directive kind.");
6136}
6137
6138llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6139 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6140 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6141 "Clauses associated with the teams directive expected to be emitted "
6142 "only for the host!");
6143 CGBuilderTy &Bld = CGF.Builder;
6144 int32_t MinNT = -1, MaxNT = -1;
6145 const Expr *NumTeams =
6146 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6147 if (NumTeams != nullptr) {
6148 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6149
6150 switch (DirectiveKind) {
6151 case OMPD_target: {
6152 const auto *CS = D.getInnermostCapturedStmt();
6153 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6154 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6155 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6156 /*IgnoreResultAssign*/ true);
6157 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6158 /*isSigned=*/true);
6159 }
6160 case OMPD_target_teams:
6161 case OMPD_target_teams_distribute:
6162 case OMPD_target_teams_distribute_simd:
6163 case OMPD_target_teams_distribute_parallel_for:
6164 case OMPD_target_teams_distribute_parallel_for_simd: {
6165 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6166 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6167 /*IgnoreResultAssign*/ true);
6168 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6169 /*isSigned=*/true);
6170 }
6171 default:
6172 break;
6173 }
6174 }
6175
6176 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6177 return llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: MinNT);
6178}
6179
6180/// Check for a num threads constant value (stored in \p DefaultVal), or
6181/// expression (stored in \p E). If the value is conditional (via an if-clause),
6182/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6183/// nullptr, no expression evaluation is perfomed.
6184static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6185 const Expr **E, int32_t &UpperBound,
6186 bool UpperBoundOnly, llvm::Value **CondVal) {
6187 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6188 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6189 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6190 if (!Dir)
6191 return;
6192
6193 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6194 // Handle if clause. If if clause present, the number of threads is
6195 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6196 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6197 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6198 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199 const OMPIfClause *IfClause = nullptr;
6200 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6201 if (C->getNameModifier() == OMPD_unknown ||
6202 C->getNameModifier() == OMPD_parallel) {
6203 IfClause = C;
6204 break;
6205 }
6206 }
6207 if (IfClause) {
6208 const Expr *CondExpr = IfClause->getCondition();
6209 bool Result;
6210 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6211 if (!Result) {
6212 UpperBound = 1;
6213 return;
6214 }
6215 } else {
6216 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6217 if (const auto *PreInit =
6218 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6219 for (const auto *I : PreInit->decls()) {
6220 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6221 CGF.EmitVarDecl(cast<VarDecl>(*I));
6222 } else {
6223 CodeGenFunction::AutoVarEmission Emission =
6224 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6225 CGF.EmitAutoVarCleanups(Emission);
6226 }
6227 }
6228 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6229 }
6230 }
6231 }
6232 }
6233 // Check the value of num_threads clause iff if clause was not specified
6234 // or is not evaluated to false.
6235 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6236 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6237 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6238 const auto *NumThreadsClause =
6239 Dir->getSingleClause<OMPNumThreadsClause>();
6240 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6241 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6242 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6243 UpperBound =
6244 UpperBound
6245 ? Constant->getZExtValue()
6246 : std::min(a: UpperBound,
6247 b: static_cast<int32_t>(Constant->getZExtValue()));
6248 // If we haven't found a upper bound, remember we saw a thread limiting
6249 // clause.
6250 if (UpperBound == -1)
6251 UpperBound = 0;
6252 if (!E)
6253 return;
6254 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6255 if (const auto *PreInit =
6256 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6257 for (const auto *I : PreInit->decls()) {
6258 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6259 CGF.EmitVarDecl(cast<VarDecl>(*I));
6260 } else {
6261 CodeGenFunction::AutoVarEmission Emission =
6262 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6263 CGF.EmitAutoVarCleanups(Emission);
6264 }
6265 }
6266 }
6267 *E = NTExpr;
6268 }
6269 return;
6270 }
6271 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6272 UpperBound = 1;
6273}
6274
6275const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6276 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6277 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6278 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6279 "Clauses associated with the teams directive expected to be emitted "
6280 "only for the host!");
6281 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6282 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6283 "Expected target-based executable directive.");
6284
6285 const Expr *NT = nullptr;
6286 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6287
6288 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6289 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6290 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6291 UpperBound = UpperBound ? Constant->getZExtValue()
6292 : std::min(a: UpperBound,
6293 b: int32_t(Constant->getZExtValue()));
6294 }
6295 // If we haven't found a upper bound, remember we saw a thread limiting
6296 // clause.
6297 if (UpperBound == -1)
6298 UpperBound = 0;
6299 if (EPtr)
6300 *EPtr = E;
6301 };
6302
6303 auto ReturnSequential = [&]() {
6304 UpperBound = 1;
6305 return NT;
6306 };
6307
6308 switch (DirectiveKind) {
6309 case OMPD_target: {
6310 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6311 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6312 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6313 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6314 // TODO: The standard is not clear how to resolve two thread limit clauses,
6315 // let's pick the teams one if it's present, otherwise the target one.
6316 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6317 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6318 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6319 ThreadLimitClause = TLC;
6320 if (ThreadLimitExpr) {
6321 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6323 CodeGenFunction::LexicalScope Scope(
6324 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6325 if (const auto *PreInit =
6326 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6327 for (const auto *I : PreInit->decls()) {
6328 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6329 CGF.EmitVarDecl(cast<VarDecl>(*I));
6330 } else {
6331 CodeGenFunction::AutoVarEmission Emission =
6332 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6333 CGF.EmitAutoVarCleanups(Emission);
6334 }
6335 }
6336 }
6337 }
6338 }
6339 }
6340 if (ThreadLimitClause)
6341 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6342 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6343 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6344 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6345 CS = Dir->getInnermostCapturedStmt();
6346 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6347 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6348 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6349 }
6350 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6351 CS = Dir->getInnermostCapturedStmt();
6352 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6353 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6354 return ReturnSequential();
6355 }
6356 return NT;
6357 }
6358 case OMPD_target_teams: {
6359 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6360 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6361 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6362 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6363 }
6364 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6365 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6366 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6367 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6368 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6369 if (Dir->getDirectiveKind() == OMPD_distribute) {
6370 CS = Dir->getInnermostCapturedStmt();
6371 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372 }
6373 }
6374 return NT;
6375 }
6376 case OMPD_target_teams_distribute:
6377 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6378 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6379 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6380 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6381 }
6382 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6383 UpperBoundOnly, CondVal);
6384 return NT;
6385 case OMPD_target_teams_loop:
6386 case OMPD_target_parallel_loop:
6387 case OMPD_target_parallel:
6388 case OMPD_target_parallel_for:
6389 case OMPD_target_parallel_for_simd:
6390 case OMPD_target_teams_distribute_parallel_for:
6391 case OMPD_target_teams_distribute_parallel_for_simd: {
6392 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6393 const OMPIfClause *IfClause = nullptr;
6394 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6395 if (C->getNameModifier() == OMPD_unknown ||
6396 C->getNameModifier() == OMPD_parallel) {
6397 IfClause = C;
6398 break;
6399 }
6400 }
6401 if (IfClause) {
6402 const Expr *Cond = IfClause->getCondition();
6403 bool Result;
6404 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6405 if (!Result)
6406 return ReturnSequential();
6407 } else {
6408 CodeGenFunction::RunCleanupsScope Scope(CGF);
6409 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6410 }
6411 }
6412 }
6413 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6414 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6415 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6416 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6417 }
6418 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6419 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6420 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6421 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6422 return NumThreadsClause->getNumThreads();
6423 }
6424 return NT;
6425 }
6426 case OMPD_target_teams_distribute_simd:
6427 case OMPD_target_simd:
6428 return ReturnSequential();
6429 default:
6430 break;
6431 }
6432 llvm_unreachable("Unsupported directive kind.");
6433}
6434
6435llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6436 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6437 llvm::Value *NumThreadsVal = nullptr;
6438 llvm::Value *CondVal = nullptr;
6439 llvm::Value *ThreadLimitVal = nullptr;
6440 const Expr *ThreadLimitExpr = nullptr;
6441 int32_t UpperBound = -1;
6442
6443 const Expr *NT = getNumThreadsExprForTargetDirective(
6444 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6445 ThreadLimitExpr: &ThreadLimitExpr);
6446
6447 // Thread limit expressions are used below, emit them.
6448 if (ThreadLimitExpr) {
6449 ThreadLimitVal =
6450 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6451 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6452 /*isSigned=*/false);
6453 }
6454
6455 // Generate the num teams expression.
6456 if (UpperBound == 1) {
6457 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6458 } else if (NT) {
6459 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6460 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6461 /*isSigned=*/false);
6462 } else if (ThreadLimitVal) {
6463 // If we do not have a num threads value but a thread limit, replace the
6464 // former with the latter. We know handled the thread limit expression.
6465 NumThreadsVal = ThreadLimitVal;
6466 ThreadLimitVal = nullptr;
6467 } else {
6468 // Default to "0" which means runtime choice.
6469 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6470 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6471 }
6472
6473 // Handle if clause. If if clause present, the number of threads is
6474 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6475 if (CondVal) {
6476 CodeGenFunction::RunCleanupsScope Scope(CGF);
6477 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6478 False: CGF.Builder.getInt32(C: 1));
6479 }
6480
6481 // If the thread limit and num teams expression were present, take the
6482 // minimum.
6483 if (ThreadLimitVal) {
6484 NumThreadsVal = CGF.Builder.CreateSelect(
6485 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6486 True: ThreadLimitVal, False: NumThreadsVal);
6487 }
6488
6489 return NumThreadsVal;
6490}
6491
6492namespace {
6493LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6494
6495// Utility to handle information from clauses associated with a given
6496// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6497// It provides a convenient interface to obtain the information and generate
6498// code for that information.
6499class MappableExprsHandler {
6500public:
6501 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6502 static unsigned getFlagMemberOffset() {
6503 unsigned Offset = 0;
6504 for (uint64_t Remain =
6505 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6506 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6507 !(Remain & 1); Remain = Remain >> 1)
6508 Offset++;
6509 return Offset;
6510 }
6511
6512 /// Class that holds debugging information for a data mapping to be passed to
6513 /// the runtime library.
6514 class MappingExprInfo {
6515 /// The variable declaration used for the data mapping.
6516 const ValueDecl *MapDecl = nullptr;
6517 /// The original expression used in the map clause, or null if there is
6518 /// none.
6519 const Expr *MapExpr = nullptr;
6520
6521 public:
6522 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6523 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6524
6525 const ValueDecl *getMapDecl() const { return MapDecl; }
6526 const Expr *getMapExpr() const { return MapExpr; }
6527 };
6528
6529 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6530 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6531 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6532 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6533 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6534 using MapNonContiguousArrayTy =
6535 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6536 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6537 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6538
6539 /// This structure contains combined information generated for mappable
6540 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6541 /// mappers, and non-contiguous information.
6542 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6543 MapExprsArrayTy Exprs;
6544 MapValueDeclsArrayTy Mappers;
6545 MapValueDeclsArrayTy DevicePtrDecls;
6546
6547 /// Append arrays in \a CurInfo.
6548 void append(MapCombinedInfoTy &CurInfo) {
6549 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
6550 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
6551 in_end: CurInfo.DevicePtrDecls.end());
6552 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
6553 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6554 }
6555 };
6556
6557 /// Map between a struct and the its lowest & highest elements which have been
6558 /// mapped.
6559 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6560 /// HE(FieldIndex, Pointer)}
6561 struct StructRangeInfoTy {
6562 MapCombinedInfoTy PreliminaryMapData;
6563 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6564 0, Address::invalid()};
6565 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6566 0, Address::invalid()};
6567 Address Base = Address::invalid();
6568 Address LB = Address::invalid();
6569 bool IsArraySection = false;
6570 bool HasCompleteRecord = false;
6571 };
6572
6573private:
6574 /// Kind that defines how a device pointer has to be returned.
6575 struct MapInfo {
6576 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6577 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6578 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6579 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6580 bool ReturnDevicePointer = false;
6581 bool IsImplicit = false;
6582 const ValueDecl *Mapper = nullptr;
6583 const Expr *VarRef = nullptr;
6584 bool ForDeviceAddr = false;
6585
6586 MapInfo() = default;
6587 MapInfo(
6588 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6589 OpenMPMapClauseKind MapType,
6590 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6591 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6592 bool ReturnDevicePointer, bool IsImplicit,
6593 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6594 bool ForDeviceAddr = false)
6595 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6596 MotionModifiers(MotionModifiers),
6597 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6598 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6599 };
6600
6601 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6602 /// member and there is no map information about it, then emission of that
6603 /// entry is deferred until the whole struct has been processed.
6604 struct DeferredDevicePtrEntryTy {
6605 const Expr *IE = nullptr;
6606 const ValueDecl *VD = nullptr;
6607 bool ForDeviceAddr = false;
6608
6609 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6610 bool ForDeviceAddr)
6611 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6612 };
6613
6614 /// The target directive from where the mappable clauses were extracted. It
6615 /// is either a executable directive or a user-defined mapper directive.
6616 llvm::PointerUnion<const OMPExecutableDirective *,
6617 const OMPDeclareMapperDecl *>
6618 CurDir;
6619
6620 /// Function the directive is being generated for.
6621 CodeGenFunction &CGF;
6622
6623 /// Set of all first private variables in the current directive.
6624 /// bool data is set to true if the variable is implicitly marked as
6625 /// firstprivate, false otherwise.
6626 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6627
6628 /// Map between device pointer declarations and their expression components.
6629 /// The key value for declarations in 'this' is null.
6630 llvm::DenseMap<
6631 const ValueDecl *,
6632 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6633 DevPointersMap;
6634
6635 /// Map between device addr declarations and their expression components.
6636 /// The key value for declarations in 'this' is null.
6637 llvm::DenseMap<
6638 const ValueDecl *,
6639 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6640 HasDevAddrsMap;
6641
6642 /// Map between lambda declarations and their map type.
6643 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6644
6645 llvm::Value *getExprTypeSize(const Expr *E) const {
6646 QualType ExprTy = E->getType().getCanonicalType();
6647
6648 // Calculate the size for array shaping expression.
6649 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
6650 llvm::Value *Size =
6651 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
6652 for (const Expr *SE : OAE->getDimensions()) {
6653 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
6654 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
6655 DstTy: CGF.getContext().getSizeType(),
6656 Loc: SE->getExprLoc());
6657 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
6658 }
6659 return Size;
6660 }
6661
6662 // Reference types are ignored for mapping purposes.
6663 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6664 ExprTy = RefTy->getPointeeType().getCanonicalType();
6665
6666 // Given that an array section is considered a built-in type, we need to
6667 // do the calculation based on the length of the section instead of relying
6668 // on CGF.getTypeSize(E->getType()).
6669 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(Val: E)) {
6670 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6671 Base: OAE->getBase()->IgnoreParenImpCasts())
6672 .getCanonicalType();
6673
6674 // If there is no length associated with the expression and lower bound is
6675 // not specified too, that means we are using the whole length of the
6676 // base.
6677 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6678 !OAE->getLowerBound())
6679 return CGF.getTypeSize(Ty: BaseTy);
6680
6681 llvm::Value *ElemSize;
6682 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6683 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
6684 } else {
6685 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
6686 assert(ATy && "Expecting array type if not a pointer type.");
6687 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
6688 }
6689
6690 // If we don't have a length at this point, that is because we have an
6691 // array section with a single element.
6692 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6693 return ElemSize;
6694
6695 if (const Expr *LenExpr = OAE->getLength()) {
6696 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
6697 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
6698 DstTy: CGF.getContext().getSizeType(),
6699 Loc: LenExpr->getExprLoc());
6700 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
6701 }
6702 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6703 OAE->getLowerBound() && "expected array_section[lb:].");
6704 // Size = sizetype - lb * elemtype;
6705 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
6706 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
6707 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
6708 DstTy: CGF.getContext().getSizeType(),
6709 Loc: OAE->getLowerBound()->getExprLoc());
6710 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
6711 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
6712 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
6713 LengthVal = CGF.Builder.CreateSelect(
6714 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
6715 return LengthVal;
6716 }
6717 return CGF.getTypeSize(Ty: ExprTy);
6718 }
6719
6720 /// Return the corresponding bits for a given map clause modifier. Add
6721 /// a flag marking the map as a pointer if requested. Add a flag marking the
6722 /// map as the first one of a series of maps that relate to the same map
6723 /// expression.
6724 OpenMPOffloadMappingFlags getMapTypeBits(
6725 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6726 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6727 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6728 OpenMPOffloadMappingFlags Bits =
6729 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6730 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6731 switch (MapType) {
6732 case OMPC_MAP_alloc:
6733 case OMPC_MAP_release:
6734 // alloc and release is the default behavior in the runtime library, i.e.
6735 // if we don't pass any bits alloc/release that is what the runtime is
6736 // going to do. Therefore, we don't need to signal anything for these two
6737 // type modifiers.
6738 break;
6739 case OMPC_MAP_to:
6740 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6741 break;
6742 case OMPC_MAP_from:
6743 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6744 break;
6745 case OMPC_MAP_tofrom:
6746 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6747 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6748 break;
6749 case OMPC_MAP_delete:
6750 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6751 break;
6752 case OMPC_MAP_unknown:
6753 llvm_unreachable("Unexpected map type!");
6754 }
6755 if (AddPtrFlag)
6756 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6757 if (AddIsTargetParamFlag)
6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6759 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6761 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6763 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
6764 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6766 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
6767 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6768 if (IsNonContiguous)
6769 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6770 return Bits;
6771 }
6772
6773 /// Return true if the provided expression is a final array section. A
6774 /// final array section, is one whose length can't be proved to be one.
6775 bool isFinalArraySectionExpression(const Expr *E) const {
6776 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: E);
6777
6778 // It is not an array section and therefore not a unity-size one.
6779 if (!OASE)
6780 return false;
6781
6782 // An array section with no colon always refer to a single element.
6783 if (OASE->getColonLocFirst().isInvalid())
6784 return false;
6785
6786 const Expr *Length = OASE->getLength();
6787
6788 // If we don't have a length we have to check if the array has size 1
6789 // for this dimension. Also, we should always expect a length if the
6790 // base type is pointer.
6791 if (!Length) {
6792 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6793 Base: OASE->getBase()->IgnoreParenImpCasts())
6794 .getCanonicalType();
6795 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
6796 return ATy->getSize().getSExtValue() != 1;
6797 // If we don't have a constant dimension length, we have to consider
6798 // the current section as having any size, so it is not necessarily
6799 // unitary. If it happen to be unity size, that's user fault.
6800 return true;
6801 }
6802
6803 // Check if the length evaluates to 1.
6804 Expr::EvalResult Result;
6805 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
6806 return true; // Can have more that size 1.
6807
6808 llvm::APSInt ConstLength = Result.Val.getInt();
6809 return ConstLength.getSExtValue() != 1;
6810 }
6811
6812 /// Generate the base pointers, section pointers, sizes, map type bits, and
6813 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6814 /// map type, map or motion modifiers, and expression components.
6815 /// \a IsFirstComponent should be set to true if the provided set of
6816 /// components is the first associated with a capture.
6817 void generateInfoForComponentList(
6818 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6819 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6820 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6821 MapCombinedInfoTy &CombinedInfo,
6822 MapCombinedInfoTy &StructBaseCombinedInfo,
6823 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6824 bool IsImplicit, bool GenerateAllInfoForClauses,
6825 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6826 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6827 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6828 OverlappedElements = std::nullopt) const {
6829 // The following summarizes what has to be generated for each map and the
6830 // types below. The generated information is expressed in this order:
6831 // base pointer, section pointer, size, flags
6832 // (to add to the ones that come from the map type and modifier).
6833 //
6834 // double d;
6835 // int i[100];
6836 // float *p;
6837 // int **a = &i;
6838 //
6839 // struct S1 {
6840 // int i;
6841 // float f[50];
6842 // }
6843 // struct S2 {
6844 // int i;
6845 // float f[50];
6846 // S1 s;
6847 // double *p;
6848 // struct S2 *ps;
6849 // int &ref;
6850 // }
6851 // S2 s;
6852 // S2 *ps;
6853 //
6854 // map(d)
6855 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6856 //
6857 // map(i)
6858 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6859 //
6860 // map(i[1:23])
6861 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6862 //
6863 // map(p)
6864 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6865 //
6866 // map(p[1:24])
6867 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6868 // in unified shared memory mode or for local pointers
6869 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6870 //
6871 // map((*a)[0:3])
6872 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6873 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6874 //
6875 // map(**a)
6876 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6877 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6878 //
6879 // map(s)
6880 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6881 //
6882 // map(s.i)
6883 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6884 //
6885 // map(s.s.f)
6886 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6887 //
6888 // map(s.p)
6889 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6890 //
6891 // map(to: s.p[:22])
6892 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6893 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6894 // &(s.p), &(s.p[0]), 22*sizeof(double),
6895 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6896 // (*) alloc space for struct members, only this is a target parameter
6897 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6898 // optimizes this entry out, same in the examples below)
6899 // (***) map the pointee (map: to)
6900 //
6901 // map(to: s.ref)
6902 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6903 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6904 // (*) alloc space for struct members, only this is a target parameter
6905 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6906 // optimizes this entry out, same in the examples below)
6907 // (***) map the pointee (map: to)
6908 //
6909 // map(s.ps)
6910 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6911 //
6912 // map(from: s.ps->s.i)
6913 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6914 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6915 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6916 //
6917 // map(to: s.ps->ps)
6918 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6919 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6920 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6921 //
6922 // map(s.ps->ps->ps)
6923 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6924 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6925 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6926 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6927 //
6928 // map(to: s.ps->ps->s.f[:22])
6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6932 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6933 //
6934 // map(ps)
6935 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6936 //
6937 // map(ps->i)
6938 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6939 //
6940 // map(ps->s.f)
6941 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6942 //
6943 // map(from: ps->p)
6944 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6945 //
6946 // map(to: ps->p[:22])
6947 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6948 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6949 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6950 //
6951 // map(ps->ps)
6952 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6953 //
6954 // map(from: ps->ps->s.i)
6955 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6956 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6957 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6958 //
6959 // map(from: ps->ps->ps)
6960 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6961 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6962 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6963 //
6964 // map(ps->ps->ps->ps)
6965 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6966 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6967 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6968 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6969 //
6970 // map(to: ps->ps->ps->s.f[:22])
6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6974 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6975 //
6976 // map(to: s.f[:22]) map(from: s.p[:33])
6977 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6978 // sizeof(double*) (**), TARGET_PARAM
6979 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6980 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6981 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6982 // (*) allocate contiguous space needed to fit all mapped members even if
6983 // we allocate space for members not mapped (in this example,
6984 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6985 // them as well because they fall between &s.f[0] and &s.p)
6986 //
6987 // map(from: s.f[:22]) map(to: ps->p[:33])
6988 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6989 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6990 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6991 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6992 // (*) the struct this entry pertains to is the 2nd element in the list of
6993 // arguments, hence MEMBER_OF(2)
6994 //
6995 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6996 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6997 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6998 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6999 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7000 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7001 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7002 // (*) the struct this entry pertains to is the 4th element in the list
7003 // of arguments, hence MEMBER_OF(4)
7004
7005 // Track if the map information being generated is the first for a capture.
7006 bool IsCaptureFirstInfo = IsFirstComponentList;
7007 // When the variable is on a declare target link or in a to clause with
7008 // unified memory, a reference is needed to hold the host/device address
7009 // of the variable.
7010 bool RequiresReference = false;
7011
7012 // Scan the components from the base to the complete expression.
7013 auto CI = Components.rbegin();
7014 auto CE = Components.rend();
7015 auto I = CI;
7016
7017 // Track if the map information being generated is the first for a list of
7018 // components.
7019 bool IsExpressionFirstInfo = true;
7020 bool FirstPointerInComplexData = false;
7021 Address BP = Address::invalid();
7022 const Expr *AssocExpr = I->getAssociatedExpression();
7023 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7024 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: AssocExpr);
7025 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7026
7027 if (isa<MemberExpr>(Val: AssocExpr)) {
7028 // The base is the 'this' pointer. The content of the pointer is going
7029 // to be the base of the field being mapped.
7030 BP = CGF.LoadCXXThisAddress();
7031 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7032 (OASE &&
7033 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7034 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress(CGF);
7035 } else if (OAShE &&
7036 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7037 BP = Address(
7038 CGF.EmitScalarExpr(E: OAShE->getBase()),
7039 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7040 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7041 } else {
7042 // The base is the reference to the variable.
7043 // BP = &Var.
7044 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress(CGF);
7045 if (const auto *VD =
7046 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7047 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7048 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7049 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7050 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7051 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7052 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7053 RequiresReference = true;
7054 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7055 }
7056 }
7057 }
7058
7059 // If the variable is a pointer and is being dereferenced (i.e. is not
7060 // the last component), the base has to be the pointer itself, not its
7061 // reference. References are ignored for mapping purposes.
7062 QualType Ty =
7063 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7064 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7065 // No need to generate individual map information for the pointer, it
7066 // can be associated with the combined storage if shared memory mode is
7067 // active or the base declaration is not global variable.
7068 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
7069 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7070 !VD || VD->hasLocalStorage())
7071 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7072 else
7073 FirstPointerInComplexData = true;
7074 ++I;
7075 }
7076 }
7077
7078 // Track whether a component of the list should be marked as MEMBER_OF some
7079 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7080 // in a component list should be marked as MEMBER_OF, all subsequent entries
7081 // do not belong to the base struct. E.g.
7082 // struct S2 s;
7083 // s.ps->ps->ps->f[:]
7084 // (1) (2) (3) (4)
7085 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7086 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7087 // is the pointee of ps(2) which is not member of struct s, so it should not
7088 // be marked as such (it is still PTR_AND_OBJ).
7089 // The variable is initialized to false so that PTR_AND_OBJ entries which
7090 // are not struct members are not considered (e.g. array of pointers to
7091 // data).
7092 bool ShouldBeMemberOf = false;
7093
7094 // Variable keeping track of whether or not we have encountered a component
7095 // in the component list which is a member expression. Useful when we have a
7096 // pointer or a final array section, in which case it is the previous
7097 // component in the list which tells us whether we have a member expression.
7098 // E.g. X.f[:]
7099 // While processing the final array section "[:]" it is "f" which tells us
7100 // whether we are dealing with a member of a declared struct.
7101 const MemberExpr *EncounteredME = nullptr;
7102
7103 // Track for the total number of dimension. Start from one for the dummy
7104 // dimension.
7105 uint64_t DimSize = 1;
7106
7107 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7108 bool IsPrevMemberReference = false;
7109
7110 // We need to check if we will be encountering any MEs. If we do not
7111 // encounter any ME expression it means we will be mapping the whole struct.
7112 // In that case we need to skip adding an entry for the struct to the
7113 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7114 // list only when generating all info for clauses.
7115 bool IsMappingWholeStruct = true;
7116 if (!GenerateAllInfoForClauses) {
7117 IsMappingWholeStruct = false;
7118 } else {
7119 for (auto TempI = I; TempI != CE; ++TempI) {
7120 const MemberExpr *PossibleME =
7121 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
7122 if (PossibleME) {
7123 IsMappingWholeStruct = false;
7124 break;
7125 }
7126 }
7127 }
7128
7129 for (; I != CE; ++I) {
7130 // If the current component is member of a struct (parent struct) mark it.
7131 if (!EncounteredME) {
7132 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
7133 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7134 // as MEMBER_OF the parent struct.
7135 if (EncounteredME) {
7136 ShouldBeMemberOf = true;
7137 // Do not emit as complex pointer if this is actually not array-like
7138 // expression.
7139 if (FirstPointerInComplexData) {
7140 QualType Ty = std::prev(x: I)
7141 ->getAssociatedDeclaration()
7142 ->getType()
7143 .getNonReferenceType();
7144 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7145 FirstPointerInComplexData = false;
7146 }
7147 }
7148 }
7149
7150 auto Next = std::next(x: I);
7151
7152 // We need to generate the addresses and sizes if this is the last
7153 // component, if the component is a pointer or if it is an array section
7154 // whose length can't be proved to be one. If this is a pointer, it
7155 // becomes the base address for the following components.
7156
7157 // A final array section, is one whose length can't be proved to be one.
7158 // If the map item is non-contiguous then we don't treat any array section
7159 // as final array section.
7160 bool IsFinalArraySection =
7161 !IsNonContiguous &&
7162 isFinalArraySectionExpression(E: I->getAssociatedExpression());
7163
7164 // If we have a declaration for the mapping use that, otherwise use
7165 // the base declaration of the map clause.
7166 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7167 ? I->getAssociatedDeclaration()
7168 : BaseDecl;
7169 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7170 : MapExpr;
7171
7172 // Get information on whether the element is a pointer. Have to do a
7173 // special treatment for array sections given that they are built-in
7174 // types.
7175 const auto *OASE =
7176 dyn_cast<OMPArraySectionExpr>(Val: I->getAssociatedExpression());
7177 const auto *OAShE =
7178 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
7179 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
7180 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
7181 bool IsPointer =
7182 OAShE ||
7183 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7184 .getCanonicalType()
7185 ->isAnyPointerType()) ||
7186 I->getAssociatedExpression()->getType()->isAnyPointerType();
7187 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
7188 MapDecl &&
7189 MapDecl->getType()->isLValueReferenceType();
7190 bool IsNonDerefPointer = IsPointer &&
7191 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7192 !IsNonContiguous;
7193
7194 if (OASE)
7195 ++DimSize;
7196
7197 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7198 IsFinalArraySection) {
7199 // If this is not the last component, we expect the pointer to be
7200 // associated with an array expression or member expression.
7201 assert((Next == CE ||
7202 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7203 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7204 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7205 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7206 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7207 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7208 "Unexpected expression");
7209
7210 Address LB = Address::invalid();
7211 Address LowestElem = Address::invalid();
7212 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7213 const MemberExpr *E) {
7214 const Expr *BaseExpr = E->getBase();
7215 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7216 // scalar.
7217 LValue BaseLV;
7218 if (E->isArrow()) {
7219 LValueBaseInfo BaseInfo;
7220 TBAAAccessInfo TBAAInfo;
7221 Address Addr =
7222 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
7223 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7224 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
7225 } else {
7226 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
7227 }
7228 return BaseLV;
7229 };
7230 if (OAShE) {
7231 LowestElem = LB =
7232 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
7233 CGF.ConvertTypeForMem(
7234 T: OAShE->getBase()->getType()->getPointeeType()),
7235 CGF.getContext().getTypeAlignInChars(
7236 T: OAShE->getBase()->getType()));
7237 } else if (IsMemberReference) {
7238 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
7239 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7240 LowestElem = CGF.EmitLValueForFieldInitialization(
7241 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
7242 .getAddress(CGF);
7243 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
7244 .getAddress(CGF);
7245 } else {
7246 LowestElem = LB =
7247 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
7248 .getAddress(CGF);
7249 }
7250
7251 // If this component is a pointer inside the base struct then we don't
7252 // need to create any entry for it - it will be combined with the object
7253 // it is pointing to into a single PTR_AND_OBJ entry.
7254 bool IsMemberPointerOrAddr =
7255 EncounteredME &&
7256 (((IsPointer || ForDeviceAddr) &&
7257 I->getAssociatedExpression() == EncounteredME) ||
7258 (IsPrevMemberReference && !IsPointer) ||
7259 (IsMemberReference && Next != CE &&
7260 !Next->getAssociatedExpression()->getType()->isPointerType()));
7261 if (!OverlappedElements.empty() && Next == CE) {
7262 // Handle base element with the info for overlapped elements.
7263 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7264 assert(!IsPointer &&
7265 "Unexpected base element with the pointer type.");
7266 // Mark the whole struct as the struct that requires allocation on the
7267 // device.
7268 PartialStruct.LowestElem = {0, LowestElem};
7269 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7270 T: I->getAssociatedExpression()->getType());
7271 Address HB = CGF.Builder.CreateConstGEP(
7272 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7273 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
7274 Index: TypeSize.getQuantity() - 1);
7275 PartialStruct.HighestElem = {
7276 std::numeric_limits<decltype(
7277 PartialStruct.HighestElem.first)>::max(),
7278 HB};
7279 PartialStruct.Base = BP;
7280 PartialStruct.LB = LB;
7281 assert(
7282 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7283 "Overlapped elements must be used only once for the variable.");
7284 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
7285 // Emit data for non-overlapped data.
7286 OpenMPOffloadMappingFlags Flags =
7287 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7288 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7289 /*AddPtrFlag=*/false,
7290 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7291 llvm::Value *Size = nullptr;
7292 // Do bitcopy of all non-overlapped structure elements.
7293 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7294 Component : OverlappedElements) {
7295 Address ComponentLB = Address::invalid();
7296 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7297 Component) {
7298 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7299 const auto *FD = dyn_cast<FieldDecl>(Val: VD);
7300 if (FD && FD->getType()->isLValueReferenceType()) {
7301 const auto *ME =
7302 cast<MemberExpr>(Val: MC.getAssociatedExpression());
7303 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7304 ComponentLB =
7305 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD)
7306 .getAddress(CGF);
7307 } else {
7308 ComponentLB =
7309 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression())
7310 .getAddress(CGF);
7311 }
7312 Size = CGF.Builder.CreatePtrDiff(
7313 ElemTy: CGF.Int8Ty, LHS: ComponentLB.getPointer(), RHS: LB.getPointer());
7314 break;
7315 }
7316 }
7317 assert(Size && "Failed to determine structure size");
7318 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7319 CombinedInfo.BasePointers.push_back(Elt: BP.getPointer());
7320 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7321 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7322 CombinedInfo.Pointers.push_back(Elt: LB.getPointer());
7323 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7324 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7325 CombinedInfo.Types.push_back(Elt: Flags);
7326 CombinedInfo.Mappers.push_back(Elt: nullptr);
7327 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7328 : 1);
7329 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7330 }
7331 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7332 CombinedInfo.BasePointers.push_back(Elt: BP.getPointer());
7333 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7334 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7335 CombinedInfo.Pointers.push_back(Elt: LB.getPointer());
7336 Size = CGF.Builder.CreatePtrDiff(
7337 ElemTy: CGF.Int8Ty, LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).getPointer(),
7338 RHS: LB.getPointer());
7339 CombinedInfo.Sizes.push_back(
7340 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7341 CombinedInfo.Types.push_back(Elt: Flags);
7342 CombinedInfo.Mappers.push_back(Elt: nullptr);
7343 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7344 : 1);
7345 break;
7346 }
7347 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
7348 // Skip adding an entry in the CurInfo of this combined entry if the
7349 // whole struct is currently being mapped. The struct needs to be added
7350 // in the first position before any data internal to the struct is being
7351 // mapped.
7352 if (!IsMemberPointerOrAddr ||
7353 (Next == CE && MapType != OMPC_MAP_unknown)) {
7354 if (!IsMappingWholeStruct) {
7355 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7356 CombinedInfo.BasePointers.push_back(Elt: BP.getPointer());
7357 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7358 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7359 CombinedInfo.Pointers.push_back(Elt: LB.getPointer());
7360 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7361 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7362 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7363 : 1);
7364 } else {
7365 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7366 StructBaseCombinedInfo.BasePointers.push_back(Elt: BP.getPointer());
7367 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7368 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7369 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.getPointer());
7370 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7371 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7372 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7373 Elt: IsNonContiguous ? DimSize : 1);
7374 }
7375
7376 // If Mapper is valid, the last component inherits the mapper.
7377 bool HasMapper = Mapper && Next == CE;
7378 if (!IsMappingWholeStruct)
7379 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
7380 else
7381 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
7382 : nullptr);
7383
7384 // We need to add a pointer flag for each map that comes from the
7385 // same expression except for the first one. We also need to signal
7386 // this map is the first one that relates with the current capture
7387 // (there is a set of entries for each capture).
7388 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7389 MapType, MapModifiers, MotionModifiers, IsImplicit,
7390 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
7391 FirstPointerInComplexData || IsMemberReference,
7392 AddIsTargetParamFlag: IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7393
7394 if (!IsExpressionFirstInfo || IsMemberReference) {
7395 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7396 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7397 if (IsPointer || (IsMemberReference && Next != CE))
7398 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7399 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7400 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7401 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7402 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7403
7404 if (ShouldBeMemberOf) {
7405 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7406 // should be later updated with the correct value of MEMBER_OF.
7407 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7408 // From now on, all subsequent PTR_AND_OBJ entries should not be
7409 // marked as MEMBER_OF.
7410 ShouldBeMemberOf = false;
7411 }
7412 }
7413
7414 if (!IsMappingWholeStruct)
7415 CombinedInfo.Types.push_back(Elt: Flags);
7416 else
7417 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
7418 }
7419
7420 // If we have encountered a member expression so far, keep track of the
7421 // mapped member. If the parent is "*this", then the value declaration
7422 // is nullptr.
7423 if (EncounteredME) {
7424 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
7425 unsigned FieldIndex = FD->getFieldIndex();
7426
7427 // Update info about the lowest and highest elements for this struct
7428 if (!PartialStruct.Base.isValid()) {
7429 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7430 if (IsFinalArraySection) {
7431 Address HB =
7432 CGF.EmitOMPArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7433 .getAddress(CGF);
7434 PartialStruct.HighestElem = {FieldIndex, HB};
7435 } else {
7436 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7437 }
7438 PartialStruct.Base = BP;
7439 PartialStruct.LB = BP;
7440 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7441 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7442 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7443 if (IsFinalArraySection) {
7444 Address HB =
7445 CGF.EmitOMPArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7446 .getAddress(CGF);
7447 PartialStruct.HighestElem = {FieldIndex, HB};
7448 } else {
7449 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7450 }
7451 }
7452 }
7453
7454 // Need to emit combined struct for array sections.
7455 if (IsFinalArraySection || IsNonContiguous)
7456 PartialStruct.IsArraySection = true;
7457
7458 // If we have a final array section, we are done with this expression.
7459 if (IsFinalArraySection)
7460 break;
7461
7462 // The pointer becomes the base for the next element.
7463 if (Next != CE)
7464 BP = IsMemberReference ? LowestElem : LB;
7465
7466 IsExpressionFirstInfo = false;
7467 IsCaptureFirstInfo = false;
7468 FirstPointerInComplexData = false;
7469 IsPrevMemberReference = IsMemberReference;
7470 } else if (FirstPointerInComplexData) {
7471 QualType Ty = Components.rbegin()
7472 ->getAssociatedDeclaration()
7473 ->getType()
7474 .getNonReferenceType();
7475 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7476 FirstPointerInComplexData = false;
7477 }
7478 }
7479 // If ran into the whole component - allocate the space for the whole
7480 // record.
7481 if (!EncounteredME)
7482 PartialStruct.HasCompleteRecord = true;
7483
7484 if (!IsNonContiguous)
7485 return;
7486
7487 const ASTContext &Context = CGF.getContext();
7488
7489 // For supporting stride in array section, we need to initialize the first
7490 // dimension size as 1, first offset as 0, and first count as 1
7491 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
7492 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7493 MapValuesArrayTy CurStrides;
7494 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7495 uint64_t ElementTypeSize;
7496
7497 // Collect Size information for each dimension and get the element size as
7498 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7499 // should be [10, 10] and the first stride is 4 btyes.
7500 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7501 Components) {
7502 const Expr *AssocExpr = Component.getAssociatedExpression();
7503 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: AssocExpr);
7504
7505 if (!OASE)
7506 continue;
7507
7508 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
7509 auto *CAT = Context.getAsConstantArrayType(T: Ty);
7510 auto *VAT = Context.getAsVariableArrayType(T: Ty);
7511
7512 // We need all the dimension size except for the last dimension.
7513 assert((VAT || CAT || &Component == &*Components.begin()) &&
7514 "Should be either ConstantArray or VariableArray if not the "
7515 "first Component");
7516
7517 // Get element size if CurStrides is empty.
7518 if (CurStrides.empty()) {
7519 const Type *ElementType = nullptr;
7520 if (CAT)
7521 ElementType = CAT->getElementType().getTypePtr();
7522 else if (VAT)
7523 ElementType = VAT->getElementType().getTypePtr();
7524 else
7525 assert(&Component == &*Components.begin() &&
7526 "Only expect pointer (non CAT or VAT) when this is the "
7527 "first Component");
7528 // If ElementType is null, then it means the base is a pointer
7529 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7530 // for next iteration.
7531 if (ElementType) {
7532 // For the case that having pointer as base, we need to remove one
7533 // level of indirection.
7534 if (&Component != &*Components.begin())
7535 ElementType = ElementType->getPointeeOrArrayElementType();
7536 ElementTypeSize =
7537 Context.getTypeSizeInChars(T: ElementType).getQuantity();
7538 CurStrides.push_back(
7539 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
7540 }
7541 }
7542 // Get dimension value except for the last dimension since we don't need
7543 // it.
7544 if (DimSizes.size() < Components.size() - 1) {
7545 if (CAT)
7546 DimSizes.push_back(Elt: llvm::ConstantInt::get(
7547 Ty: CGF.Int64Ty, V: CAT->getSize().getZExtValue()));
7548 else if (VAT)
7549 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
7550 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
7551 /*IsSigned=*/isSigned: false));
7552 }
7553 }
7554
7555 // Skip the dummy dimension since we have already have its information.
7556 auto *DI = DimSizes.begin() + 1;
7557 // Product of dimension.
7558 llvm::Value *DimProd =
7559 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
7560
7561 // Collect info for non-contiguous. Notice that offset, count, and stride
7562 // are only meaningful for array-section, so we insert a null for anything
7563 // other than array-section.
7564 // Also, the size of offset, count, and stride are not the same as
7565 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7566 // count, and stride are the same as the number of non-contiguous
7567 // declaration in target update to/from clause.
7568 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7569 Components) {
7570 const Expr *AssocExpr = Component.getAssociatedExpression();
7571
7572 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
7573 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7574 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
7575 /*isSigned=*/false);
7576 CurOffsets.push_back(Elt: Offset);
7577 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
7578 CurStrides.push_back(Elt: CurStrides.back());
7579 continue;
7580 }
7581
7582 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: AssocExpr);
7583
7584 if (!OASE)
7585 continue;
7586
7587 // Offset
7588 const Expr *OffsetExpr = OASE->getLowerBound();
7589 llvm::Value *Offset = nullptr;
7590 if (!OffsetExpr) {
7591 // If offset is absent, then we just set it to zero.
7592 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
7593 } else {
7594 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
7595 DestTy: CGF.Int64Ty,
7596 /*isSigned=*/false);
7597 }
7598 CurOffsets.push_back(Elt: Offset);
7599
7600 // Count
7601 const Expr *CountExpr = OASE->getLength();
7602 llvm::Value *Count = nullptr;
7603 if (!CountExpr) {
7604 // In Clang, once a high dimension is an array section, we construct all
7605 // the lower dimension as array section, however, for case like
7606 // arr[0:2][2], Clang construct the inner dimension as an array section
7607 // but it actually is not in an array section form according to spec.
7608 if (!OASE->getColonLocFirst().isValid() &&
7609 !OASE->getColonLocSecond().isValid()) {
7610 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
7611 } else {
7612 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7613 // When the length is absent it defaults to ⌈(size −
7614 // lower-bound)/stride⌉, where size is the size of the array
7615 // dimension.
7616 const Expr *StrideExpr = OASE->getStride();
7617 llvm::Value *Stride =
7618 StrideExpr
7619 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7620 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7621 : nullptr;
7622 if (Stride)
7623 Count = CGF.Builder.CreateUDiv(
7624 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
7625 else
7626 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
7627 }
7628 } else {
7629 Count = CGF.EmitScalarExpr(E: CountExpr);
7630 }
7631 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
7632 CurCounts.push_back(Elt: Count);
7633
7634 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7635 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7636 // Offset Count Stride
7637 // D0 0 1 4 (int) <- dummy dimension
7638 // D1 0 2 8 (2 * (1) * 4)
7639 // D2 1 2 20 (1 * (1 * 5) * 4)
7640 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7641 const Expr *StrideExpr = OASE->getStride();
7642 llvm::Value *Stride =
7643 StrideExpr
7644 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7645 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7646 : nullptr;
7647 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
7648 if (Stride)
7649 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
7650 else
7651 CurStrides.push_back(Elt: DimProd);
7652 if (DI != DimSizes.end())
7653 ++DI;
7654 }
7655
7656 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
7657 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
7658 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
7659 }
7660
7661 /// Return the adjusted map modifiers if the declaration a capture refers to
7662 /// appears in a first-private clause. This is expected to be used only with
7663 /// directives that start with 'target'.
7664 OpenMPOffloadMappingFlags
7665 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7666 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7667
7668 // A first private variable captured by reference will use only the
7669 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7670 // declaration is known as first-private in this handler.
7671 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
7672 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7673 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7674 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7675 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7676 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7677 }
7678 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7679 if (I != LambdasMap.end())
7680 // for map(to: lambda): using user specified map type.
7681 return getMapTypeBits(
7682 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
7683 /*MotionModifiers=*/std::nullopt, IsImplicit: I->getSecond()->isImplicit(),
7684 /*AddPtrFlag=*/false,
7685 /*AddIsTargetParamFlag=*/false,
7686 /*isNonContiguous=*/IsNonContiguous: false);
7687 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7688 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7689 }
7690
7691 void getPlainLayout(const CXXRecordDecl *RD,
7692 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7693 bool AsBase) const {
7694 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7695
7696 llvm::StructType *St =
7697 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7698
7699 unsigned NumElements = St->getNumElements();
7700 llvm::SmallVector<
7701 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7702 RecordLayout(NumElements);
7703
7704 // Fill bases.
7705 for (const auto &I : RD->bases()) {
7706 if (I.isVirtual())
7707 continue;
7708 const auto *Base = I.getType()->getAsCXXRecordDecl();
7709 // Ignore empty bases.
7710 if (Base->isEmpty() || CGF.getContext()
7711 .getASTRecordLayout(Base)
7712 .getNonVirtualSize()
7713 .isZero())
7714 continue;
7715
7716 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
7717 RecordLayout[FieldIndex] = Base;
7718 }
7719 // Fill in virtual bases.
7720 for (const auto &I : RD->vbases()) {
7721 const auto *Base = I.getType()->getAsCXXRecordDecl();
7722 // Ignore empty bases.
7723 if (Base->isEmpty())
7724 continue;
7725 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
7726 if (RecordLayout[FieldIndex])
7727 continue;
7728 RecordLayout[FieldIndex] = Base;
7729 }
7730 // Fill in all the fields.
7731 assert(!RD->isUnion() && "Unexpected union.");
7732 for (const auto *Field : RD->fields()) {
7733 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7734 // will fill in later.)
7735 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7736 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7737 RecordLayout[FieldIndex] = Field;
7738 }
7739 }
7740 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7741 &Data : RecordLayout) {
7742 if (Data.isNull())
7743 continue;
7744 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7745 getPlainLayout(Base, Layout, /*AsBase=*/true);
7746 else
7747 Layout.push_back(Data.get<const FieldDecl *>());
7748 }
7749 }
7750
7751 /// Generate all the base pointers, section pointers, sizes, map types, and
7752 /// mappers for the extracted mappable expressions (all included in \a
7753 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7754 /// pair of the relevant declaration and index where it occurs is appended to
7755 /// the device pointers info array.
7756 void generateAllInfoForClauses(
7757 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7758 llvm::OpenMPIRBuilder &OMPBuilder,
7759 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7760 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7761 // We have to process the component lists that relate with the same
7762 // declaration in a single chunk so that we can generate the map flags
7763 // correctly. Therefore, we organize all lists in a map.
7764 enum MapKind { Present, Allocs, Other, Total };
7765 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7766 SmallVector<SmallVector<MapInfo, 8>, 4>>
7767 Info;
7768
7769 // Helper function to fill the information map for the different supported
7770 // clauses.
7771 auto &&InfoGen =
7772 [&Info, &SkipVarSet](
7773 const ValueDecl *D, MapKind Kind,
7774 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7775 OpenMPMapClauseKind MapType,
7776 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7777 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7778 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7779 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7780 if (SkipVarSet.contains(D))
7781 return;
7782 auto It = Info.find(D);
7783 if (It == Info.end())
7784 It = Info
7785 .insert(std::make_pair(
7786 x&: D, y: SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7787 .first;
7788 It->second[Kind].emplace_back(
7789 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7790 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7791 };
7792
7793 for (const auto *Cl : Clauses) {
7794 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
7795 if (!C)
7796 continue;
7797 MapKind Kind = Other;
7798 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
7799 Element: OMPC_MAP_MODIFIER_present))
7800 Kind = Present;
7801 else if (C->getMapType() == OMPC_MAP_alloc)
7802 Kind = Allocs;
7803 const auto *EI = C->getVarRefs().begin();
7804 for (const auto L : C->component_lists()) {
7805 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7806 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7807 C->getMapTypeModifiers(), std::nullopt,
7808 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7809 E);
7810 ++EI;
7811 }
7812 }
7813 for (const auto *Cl : Clauses) {
7814 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
7815 if (!C)
7816 continue;
7817 MapKind Kind = Other;
7818 if (llvm::is_contained(Range: C->getMotionModifiers(),
7819 Element: OMPC_MOTION_MODIFIER_present))
7820 Kind = Present;
7821 const auto *EI = C->getVarRefs().begin();
7822 for (const auto L : C->component_lists()) {
7823 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7824 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7825 C->isImplicit(), std::get<2>(L), *EI);
7826 ++EI;
7827 }
7828 }
7829 for (const auto *Cl : Clauses) {
7830 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
7831 if (!C)
7832 continue;
7833 MapKind Kind = Other;
7834 if (llvm::is_contained(Range: C->getMotionModifiers(),
7835 Element: OMPC_MOTION_MODIFIER_present))
7836 Kind = Present;
7837 const auto *EI = C->getVarRefs().begin();
7838 for (const auto L : C->component_lists()) {
7839 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7840 std::nullopt, C->getMotionModifiers(),
7841 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7842 *EI);
7843 ++EI;
7844 }
7845 }
7846
7847 // Look at the use_device_ptr and use_device_addr clauses information and
7848 // mark the existing map entries as such. If there is no map information for
7849 // an entry in the use_device_ptr and use_device_addr list, we create one
7850 // with map type 'alloc' and zero size section. It is the user fault if that
7851 // was not mapped before. If there is no map information and the pointer is
7852 // a struct member, then we defer the emission of that entry until the whole
7853 // struct has been processed.
7854 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7855 SmallVector<DeferredDevicePtrEntryTy, 4>>
7856 DeferredInfo;
7857 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7858
7859 auto &&UseDeviceDataCombinedInfoGen =
7860 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7861 CodeGenFunction &CGF, bool IsDevAddr) {
7862 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
7863 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
7864 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
7865 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7866 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7867 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
7868 UseDeviceDataCombinedInfo.Sizes.push_back(
7869 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
7870 UseDeviceDataCombinedInfo.Types.push_back(
7871 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7872 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
7873 };
7874
7875 auto &&MapInfoGen =
7876 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7877 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7878 OMPClauseMappableExprCommon::MappableExprComponentListRef
7879 Components,
7880 bool IsImplicit, bool IsDevAddr) {
7881 // We didn't find any match in our map information - generate a zero
7882 // size array section - if the pointer is a struct member we defer
7883 // this action until the whole struct has been processed.
7884 if (isa<MemberExpr>(Val: IE)) {
7885 // Insert the pointer into Info to be processed by
7886 // generateInfoForComponentList. Because it is a member pointer
7887 // without a pointee, no entry will be generated for it, therefore
7888 // we need to generate one after the whole struct has been
7889 // processed. Nonetheless, generateInfoForComponentList must be
7890 // called to take the pointer into account for the calculation of
7891 // the range of the partial struct.
7892 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7893 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7894 nullptr, nullptr, IsDevAddr);
7895 DeferredInfo[nullptr].emplace_back(Args&: IE, Args&: VD, Args&: IsDevAddr);
7896 } else {
7897 llvm::Value *Ptr;
7898 if (IsDevAddr) {
7899 if (IE->isGLValue())
7900 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
7901 else
7902 Ptr = CGF.EmitScalarExpr(E: IE);
7903 } else {
7904 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
7905 }
7906 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7907 }
7908 };
7909
7910 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7911 const Expr *IE, bool IsDevAddr) -> bool {
7912 // We potentially have map information for this declaration already.
7913 // Look for the first set of components that refer to it. If found,
7914 // return true.
7915 // If the first component is a member expression, we have to look into
7916 // 'this', which maps to null in the map of map information. Otherwise
7917 // look directly for the information.
7918 auto It = Info.find(isa<MemberExpr>(Val: IE) ? nullptr : VD);
7919 if (It != Info.end()) {
7920 bool Found = false;
7921 for (auto &Data : It->second) {
7922 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7923 return MI.Components.back().getAssociatedDeclaration() == VD;
7924 });
7925 // If we found a map entry, signal that the pointer has to be
7926 // returned and move on to the next declaration. Exclude cases where
7927 // the base pointer is mapped as array subscript, array section or
7928 // array shaping. The base address is passed as a pointer to base in
7929 // this case and cannot be used as a base for use_device_ptr list
7930 // item.
7931 if (CI != Data.end()) {
7932 if (IsDevAddr) {
7933 CI->ForDeviceAddr = IsDevAddr;
7934 CI->ReturnDevicePointer = true;
7935 Found = true;
7936 break;
7937 } else {
7938 auto PrevCI = std::next(CI->Components.rbegin());
7939 const auto *VarD = dyn_cast<VarDecl>(VD);
7940 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7941 isa<MemberExpr>(IE) ||
7942 !VD->getType().getNonReferenceType()->isPointerType() ||
7943 PrevCI == CI->Components.rend() ||
7944 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7945 VarD->hasLocalStorage()) {
7946 CI->ForDeviceAddr = IsDevAddr;
7947 CI->ReturnDevicePointer = true;
7948 Found = true;
7949 break;
7950 }
7951 }
7952 }
7953 }
7954 return Found;
7955 }
7956 return false;
7957 };
7958
7959 // Look at the use_device_ptr clause information and mark the existing map
7960 // entries as such. If there is no map information for an entry in the
7961 // use_device_ptr list, we create one with map type 'alloc' and zero size
7962 // section. It is the user fault if that was not mapped before. If there is
7963 // no map information and the pointer is a struct member, then we defer the
7964 // emission of that entry until the whole struct has been processed.
7965 for (const auto *Cl : Clauses) {
7966 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
7967 if (!C)
7968 continue;
7969 for (const auto L : C->component_lists()) {
7970 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7971 std::get<1>(L);
7972 assert(!Components.empty() &&
7973 "Not expecting empty list of components!");
7974 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7975 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7976 const Expr *IE = Components.back().getAssociatedExpression();
7977 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7978 continue;
7979 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7980 /*IsDevAddr=*/false);
7981 }
7982 }
7983
7984 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7985 for (const auto *Cl : Clauses) {
7986 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
7987 if (!C)
7988 continue;
7989 for (const auto L : C->component_lists()) {
7990 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7991 std::get<1>(L);
7992 assert(!std::get<1>(L).empty() &&
7993 "Not expecting empty list of components!");
7994 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
7995 if (!Processed.insert(VD).second)
7996 continue;
7997 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7998 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
7999 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8000 continue;
8001 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8002 /*IsDevAddr=*/true);
8003 }
8004 }
8005
8006 for (const auto &Data : Info) {
8007 StructRangeInfoTy PartialStruct;
8008 // Current struct information:
8009 MapCombinedInfoTy CurInfo;
8010 // Current struct base information:
8011 MapCombinedInfoTy StructBaseCurInfo;
8012 const Decl *D = Data.first;
8013 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
8014 for (const auto &M : Data.second) {
8015 for (const MapInfo &L : M) {
8016 assert(!L.Components.empty() &&
8017 "Not expecting declaration with no component lists.");
8018
8019 // Remember the current base pointer index.
8020 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8021 unsigned StructBasePointersIdx =
8022 StructBaseCurInfo.BasePointers.size();
8023 CurInfo.NonContigInfo.IsNonContiguous =
8024 L.Components.back().isNonContiguous();
8025 generateInfoForComponentList(
8026 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
8027 CombinedInfo&: CurInfo, StructBaseCombinedInfo&: StructBaseCurInfo, PartialStruct,
8028 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
8029 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
8030 MapExpr: L.VarRef);
8031
8032 // If this entry relates to a device pointer, set the relevant
8033 // declaration and add the 'return pointer' flag.
8034 if (L.ReturnDevicePointer) {
8035 // Check whether a value was added to either CurInfo or
8036 // StructBaseCurInfo and error if no value was added to either of
8037 // them:
8038 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8039 StructBasePointersIdx <
8040 StructBaseCurInfo.BasePointers.size()) &&
8041 "Unexpected number of mapped base pointers.");
8042
8043 // Choose a base pointer index which is always valid:
8044 const ValueDecl *RelevantVD =
8045 L.Components.back().getAssociatedDeclaration();
8046 assert(RelevantVD &&
8047 "No relevant declaration related with device pointer??");
8048
8049 // If StructBaseCurInfo has been updated this iteration then work on
8050 // the first new entry added to it i.e. make sure that when multiple
8051 // values are added to any of the lists, the first value added is
8052 // being modified by the assignments below (not the last value
8053 // added).
8054 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8055 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8056 RelevantVD;
8057 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8058 L.ForDeviceAddr ? DeviceInfoTy::Address
8059 : DeviceInfoTy::Pointer;
8060 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8061 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8062 } else {
8063 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8064 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8065 L.ForDeviceAddr ? DeviceInfoTy::Address
8066 : DeviceInfoTy::Pointer;
8067 CurInfo.Types[CurrentBasePointersIdx] |=
8068 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8069 }
8070 }
8071 }
8072 }
8073
8074 // Append any pending zero-length pointers which are struct members and
8075 // used with use_device_ptr or use_device_addr.
8076 auto CI = DeferredInfo.find(Key: Data.first);
8077 if (CI != DeferredInfo.end()) {
8078 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8079 llvm::Value *BasePtr;
8080 llvm::Value *Ptr;
8081 if (L.ForDeviceAddr) {
8082 if (L.IE->isGLValue())
8083 Ptr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8084 else
8085 Ptr = this->CGF.EmitScalarExpr(E: L.IE);
8086 BasePtr = Ptr;
8087 // Entry is RETURN_PARAM. Also, set the placeholder value
8088 // MEMBER_OF=FFFF so that the entry is later updated with the
8089 // correct value of MEMBER_OF.
8090 CurInfo.Types.push_back(
8091 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8092 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8093 } else {
8094 BasePtr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8095 Ptr = this->CGF.EmitLoadOfScalar(lvalue: this->CGF.EmitLValue(E: L.IE),
8096 Loc: L.IE->getExprLoc());
8097 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8098 // placeholder value MEMBER_OF=FFFF so that the entry is later
8099 // updated with the correct value of MEMBER_OF.
8100 CurInfo.Types.push_back(
8101 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8102 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8103 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8104 }
8105 CurInfo.Exprs.push_back(Elt: L.VD);
8106 CurInfo.BasePointers.emplace_back(Args&: BasePtr);
8107 CurInfo.DevicePtrDecls.emplace_back(Args: L.VD);
8108 CurInfo.DevicePointers.emplace_back(
8109 Args: L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8110 CurInfo.Pointers.push_back(Elt: Ptr);
8111 CurInfo.Sizes.push_back(
8112 Elt: llvm::Constant::getNullValue(Ty: this->CGF.Int64Ty));
8113 CurInfo.Mappers.push_back(Elt: nullptr);
8114 }
8115 }
8116
8117 // Unify entries in one list making sure the struct mapping precedes the
8118 // individual fields:
8119 MapCombinedInfoTy UnionCurInfo;
8120 UnionCurInfo.append(CurInfo&: StructBaseCurInfo);
8121 UnionCurInfo.append(CurInfo);
8122
8123 // If there is an entry in PartialStruct it means we have a struct with
8124 // individual members mapped. Emit an extra combined entry.
8125 if (PartialStruct.Base.isValid()) {
8126 UnionCurInfo.NonContigInfo.Dims.push_back(Elt: 0);
8127 // Emit a combined entry:
8128 emitCombinedEntry(CombinedInfo, CurTypes&: UnionCurInfo.Types, PartialStruct,
8129 /*IsMapThis*/ !VD, OMPBuilder, VD);
8130 }
8131
8132 // We need to append the results of this capture to what we already have.
8133 CombinedInfo.append(CurInfo&: UnionCurInfo);
8134 }
8135 // Append data for use_device_ptr clauses.
8136 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
8137 }
8138
8139public:
8140 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8141 : CurDir(&Dir), CGF(CGF) {
8142 // Extract firstprivate clause information.
8143 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8144 for (const auto *D : C->varlists())
8145 FirstPrivateDecls.try_emplace(
8146 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8147 // Extract implicit firstprivates from uses_allocators clauses.
8148 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8149 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8150 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8151 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
8152 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
8153 /*Implicit=*/Args: true);
8154 else if (const auto *VD = dyn_cast<VarDecl>(
8155 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
8156 ->getDecl()))
8157 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
8158 }
8159 }
8160 // Extract device pointer clause information.
8161 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8162 for (auto L : C->component_lists())
8163 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8164 // Extract device addr clause information.
8165 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8166 for (auto L : C->component_lists())
8167 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8168 // Extract map information.
8169 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8170 if (C->getMapType() != OMPC_MAP_to)
8171 continue;
8172 for (auto L : C->component_lists()) {
8173 const ValueDecl *VD = std::get<0>(L);
8174 const auto *RD = VD ? VD->getType()
8175 .getCanonicalType()
8176 .getNonReferenceType()
8177 ->getAsCXXRecordDecl()
8178 : nullptr;
8179 if (RD && RD->isLambda())
8180 LambdasMap.try_emplace(std::get<0>(L), C);
8181 }
8182 }
8183 }
8184
8185 /// Constructor for the declare mapper directive.
8186 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8187 : CurDir(&Dir), CGF(CGF) {}
8188
8189 /// Generate code for the combined entry if we have a partially mapped struct
8190 /// and take care of the mapping flags of the arguments corresponding to
8191 /// individual struct members.
8192 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8193 MapFlagsArrayTy &CurTypes,
8194 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8195 llvm::OpenMPIRBuilder &OMPBuilder,
8196 const ValueDecl *VD = nullptr,
8197 bool NotTargetParams = true) const {
8198 if (CurTypes.size() == 1 &&
8199 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8200 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8201 !PartialStruct.IsArraySection)
8202 return;
8203 Address LBAddr = PartialStruct.LowestElem.second;
8204 Address HBAddr = PartialStruct.HighestElem.second;
8205 if (PartialStruct.HasCompleteRecord) {
8206 LBAddr = PartialStruct.LB;
8207 HBAddr = PartialStruct.LB;
8208 }
8209 CombinedInfo.Exprs.push_back(Elt: VD);
8210 // Base is the base of the struct
8211 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.getPointer());
8212 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8213 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8214 // Pointer is the address of the lowest element
8215 llvm::Value *LB = LBAddr.getPointer();
8216 const CXXMethodDecl *MD =
8217 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
8218 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8219 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8220 // There should not be a mapper for a combined entry.
8221 if (HasBaseClass) {
8222 // OpenMP 5.2 148:21:
8223 // If the target construct is within a class non-static member function,
8224 // and a variable is an accessible data member of the object for which the
8225 // non-static data member function is invoked, the variable is treated as
8226 // if the this[:1] expression had appeared in a map clause with a map-type
8227 // of tofrom.
8228 // Emit this[:1]
8229 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.getPointer());
8230 QualType Ty = MD->getFunctionObjectParameterType();
8231 llvm::Value *Size =
8232 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
8233 /*isSigned=*/true);
8234 CombinedInfo.Sizes.push_back(Elt: Size);
8235 } else {
8236 CombinedInfo.Pointers.push_back(Elt: LB);
8237 // Size is (addr of {highest+1} element) - (addr of lowest element)
8238 llvm::Value *HB = HBAddr.getPointer();
8239 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8240 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
8241 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
8242 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
8243 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: CHAddr, RHS: CLAddr);
8244 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
8245 /*isSigned=*/false);
8246 CombinedInfo.Sizes.push_back(Elt: Size);
8247 }
8248 CombinedInfo.Mappers.push_back(Elt: nullptr);
8249 // Map type is always TARGET_PARAM, if generate info for captures.
8250 CombinedInfo.Types.push_back(
8251 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8252 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8253 // If any element has the present modifier, then make sure the runtime
8254 // doesn't attempt to allocate the struct.
8255 if (CurTypes.end() !=
8256 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8257 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8258 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8259 }))
8260 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8261 // Remove TARGET_PARAM flag from the first element
8262 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8263 // If any element has the ompx_hold modifier, then make sure the runtime
8264 // uses the hold reference count for the struct as a whole so that it won't
8265 // be unmapped by an extra dynamic reference count decrement. Add it to all
8266 // elements as well so the runtime knows which reference count to check
8267 // when determining whether it's time for device-to-host transfers of
8268 // individual elements.
8269 if (CurTypes.end() !=
8270 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8271 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8272 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8273 })) {
8274 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8275 for (auto &M : CurTypes)
8276 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8277 }
8278
8279 // All other current entries will be MEMBER_OF the combined entry
8280 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8281 // 0xFFFF in the MEMBER_OF field).
8282 OpenMPOffloadMappingFlags MemberOfFlag =
8283 OMPBuilder.getMemberOfFlag(Position: CombinedInfo.BasePointers.size() - 1);
8284 for (auto &M : CurTypes)
8285 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
8286 }
8287
8288 /// Generate all the base pointers, section pointers, sizes, map types, and
8289 /// mappers for the extracted mappable expressions (all included in \a
8290 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8291 /// pair of the relevant declaration and index where it occurs is appended to
8292 /// the device pointers info array.
8293 void generateAllInfo(
8294 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8295 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8296 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8297 assert(CurDir.is<const OMPExecutableDirective *>() &&
8298 "Expect a executable directive");
8299 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8300 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8301 SkipVarSet);
8302 }
8303
8304 /// Generate all the base pointers, section pointers, sizes, map types, and
8305 /// mappers for the extracted map clauses of user-defined mapper (all included
8306 /// in \a CombinedInfo).
8307 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8308 llvm::OpenMPIRBuilder &OMPBuilder) const {
8309 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8310 "Expect a declare mapper directive");
8311 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8312 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
8313 OMPBuilder);
8314 }
8315
8316 /// Emit capture info for lambdas for variables captured by reference.
8317 void generateInfoForLambdaCaptures(
8318 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8319 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8320 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8321 const auto *RD = VDType->getAsCXXRecordDecl();
8322 if (!RD || !RD->isLambda())
8323 return;
8324 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
8325 CGF.getContext().getDeclAlign(VD));
8326 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
8327 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8328 FieldDecl *ThisCapture = nullptr;
8329 RD->getCaptureFields(Captures, ThisCapture);
8330 if (ThisCapture) {
8331 LValue ThisLVal =
8332 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
8333 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
8334 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
8335 Args: VDLVal.getPointer(CGF));
8336 CombinedInfo.Exprs.push_back(Elt: VD);
8337 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
8338 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8339 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8340 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
8341 CombinedInfo.Sizes.push_back(
8342 Elt: CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
8343 CGF.Int64Ty, /*isSigned=*/true));
8344 CombinedInfo.Types.push_back(
8345 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8346 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8347 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8348 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8349 CombinedInfo.Mappers.push_back(Elt: nullptr);
8350 }
8351 for (const LambdaCapture &LC : RD->captures()) {
8352 if (!LC.capturesVariable())
8353 continue;
8354 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
8355 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8356 continue;
8357 auto It = Captures.find(VD);
8358 assert(It != Captures.end() && "Found lambda capture without field.");
8359 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
8360 if (LC.getCaptureKind() == LCK_ByRef) {
8361 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
8362 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8363 Args: VDLVal.getPointer(CGF));
8364 CombinedInfo.Exprs.push_back(VD);
8365 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8366 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8367 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8368 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
8369 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8370 CGF.getTypeSize(
8371 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
8372 CGF.Int64Ty, /*isSigned=*/true));
8373 } else {
8374 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
8375 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8376 Args: VDLVal.getPointer(CGF));
8377 CombinedInfo.Exprs.push_back(VD);
8378 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8379 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8380 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8381 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
8382 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
8383 }
8384 CombinedInfo.Types.push_back(
8385 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8386 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8387 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8388 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8389 CombinedInfo.Mappers.push_back(Elt: nullptr);
8390 }
8391 }
8392
8393 /// Set correct indices for lambdas captures.
8394 void adjustMemberOfForLambdaCaptures(
8395 llvm::OpenMPIRBuilder &OMPBuilder,
8396 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8397 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8398 MapFlagsArrayTy &Types) const {
8399 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8400 // Set correct member_of idx for all implicit lambda captures.
8401 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8402 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8403 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8404 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8405 continue;
8406 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
8407 assert(BasePtr && "Unable to find base lambda address.");
8408 int TgtIdx = -1;
8409 for (unsigned J = I; J > 0; --J) {
8410 unsigned Idx = J - 1;
8411 if (Pointers[Idx] != BasePtr)
8412 continue;
8413 TgtIdx = Idx;
8414 break;
8415 }
8416 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8417 // All other current entries will be MEMBER_OF the combined entry
8418 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8419 // 0xFFFF in the MEMBER_OF field).
8420 OpenMPOffloadMappingFlags MemberOfFlag =
8421 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
8422 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
8423 }
8424 }
8425
8426 /// Generate the base pointers, section pointers, sizes, map types, and
8427 /// mappers associated to a given capture (all included in \a CombinedInfo).
8428 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8429 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8430 StructRangeInfoTy &PartialStruct) const {
8431 assert(!Cap->capturesVariableArrayType() &&
8432 "Not expecting to generate map info for a variable array type!");
8433
8434 // We need to know when we generating information for the first component
8435 const ValueDecl *VD = Cap->capturesThis()
8436 ? nullptr
8437 : Cap->getCapturedVar()->getCanonicalDecl();
8438
8439 // for map(to: lambda): skip here, processing it in
8440 // generateDefaultMapInfo
8441 if (LambdasMap.count(Val: VD))
8442 return;
8443
8444 // If this declaration appears in a is_device_ptr clause we just have to
8445 // pass the pointer by value. If it is a reference to a declaration, we just
8446 // pass its value.
8447 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
8448 CombinedInfo.Exprs.push_back(Elt: VD);
8449 CombinedInfo.BasePointers.emplace_back(Args&: Arg);
8450 CombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8451 CombinedInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
8452 CombinedInfo.Pointers.push_back(Elt: Arg);
8453 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8454 CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8455 /*isSigned=*/true));
8456 CombinedInfo.Types.push_back(
8457 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8458 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8459 CombinedInfo.Mappers.push_back(Elt: nullptr);
8460 return;
8461 }
8462
8463 using MapData =
8464 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8465 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8466 const ValueDecl *, const Expr *>;
8467 SmallVector<MapData, 4> DeclComponentLists;
8468 // For member fields list in is_device_ptr, store it in
8469 // DeclComponentLists for generating components info.
8470 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8471 auto It = DevPointersMap.find(Val: VD);
8472 if (It != DevPointersMap.end())
8473 for (const auto &MCL : It->second)
8474 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8475 /*IsImpicit = */ true, nullptr,
8476 nullptr);
8477 auto I = HasDevAddrsMap.find(Val: VD);
8478 if (I != HasDevAddrsMap.end())
8479 for (const auto &MCL : I->second)
8480 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8481 /*IsImpicit = */ true, nullptr,
8482 nullptr);
8483 assert(CurDir.is<const OMPExecutableDirective *>() &&
8484 "Expect a executable directive");
8485 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8486 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8487 const auto *EI = C->getVarRefs().begin();
8488 for (const auto L : C->decl_component_lists(VD)) {
8489 const ValueDecl *VDecl, *Mapper;
8490 // The Expression is not correct if the mapping is implicit
8491 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8492 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8493 std::tie(VDecl, Components, Mapper) = L;
8494 assert(VDecl == VD && "We got information for the wrong declaration??");
8495 assert(!Components.empty() &&
8496 "Not expecting declaration with no component lists.");
8497 DeclComponentLists.emplace_back(Components, C->getMapType(),
8498 C->getMapTypeModifiers(),
8499 C->isImplicit(), Mapper, E);
8500 ++EI;
8501 }
8502 }
8503 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
8504 const MapData &RHS) {
8505 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
8506 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
8507 bool HasPresent =
8508 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8509 bool HasAllocs = MapType == OMPC_MAP_alloc;
8510 MapModifiers = std::get<2>(t: RHS);
8511 MapType = std::get<1>(t: LHS);
8512 bool HasPresentR =
8513 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8514 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8515 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8516 });
8517
8518 // Find overlapping elements (including the offset from the base element).
8519 llvm::SmallDenseMap<
8520 const MapData *,
8521 llvm::SmallVector<
8522 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8523 4>
8524 OverlappedData;
8525 size_t Count = 0;
8526 for (const MapData &L : DeclComponentLists) {
8527 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8528 OpenMPMapClauseKind MapType;
8529 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8530 bool IsImplicit;
8531 const ValueDecl *Mapper;
8532 const Expr *VarRef;
8533 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8534 L;
8535 ++Count;
8536 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
8537 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8538 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
8539 args&: VarRef) = L1;
8540 auto CI = Components.rbegin();
8541 auto CE = Components.rend();
8542 auto SI = Components1.rbegin();
8543 auto SE = Components1.rend();
8544 for (; CI != CE && SI != SE; ++CI, ++SI) {
8545 if (CI->getAssociatedExpression()->getStmtClass() !=
8546 SI->getAssociatedExpression()->getStmtClass())
8547 break;
8548 // Are we dealing with different variables/fields?
8549 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8550 break;
8551 }
8552 // Found overlapping if, at least for one component, reached the head
8553 // of the components list.
8554 if (CI == CE || SI == SE) {
8555 // Ignore it if it is the same component.
8556 if (CI == CE && SI == SE)
8557 continue;
8558 const auto It = (SI == SE) ? CI : SI;
8559 // If one component is a pointer and another one is a kind of
8560 // dereference of this pointer (array subscript, section, dereference,
8561 // etc.), it is not an overlapping.
8562 // Same, if one component is a base and another component is a
8563 // dereferenced pointer memberexpr with the same base.
8564 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
8565 (std::prev(x: It)->getAssociatedDeclaration() &&
8566 std::prev(x: It)
8567 ->getAssociatedDeclaration()
8568 ->getType()
8569 ->isPointerType()) ||
8570 (It->getAssociatedDeclaration() &&
8571 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8572 std::next(x: It) != CE && std::next(x: It) != SE))
8573 continue;
8574 const MapData &BaseData = CI == CE ? L : L1;
8575 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8576 SI == SE ? Components : Components1;
8577 auto &OverlappedElements = OverlappedData.FindAndConstruct(Key: &BaseData);
8578 OverlappedElements.getSecond().push_back(Elt: SubData);
8579 }
8580 }
8581 }
8582 // Sort the overlapped elements for each item.
8583 llvm::SmallVector<const FieldDecl *, 4> Layout;
8584 if (!OverlappedData.empty()) {
8585 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8586 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8587 while (BaseType != OrigType) {
8588 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8589 OrigType = BaseType->getPointeeOrArrayElementType();
8590 }
8591
8592 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8593 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
8594 else {
8595 const auto *RD = BaseType->getAsRecordDecl();
8596 Layout.append(RD->field_begin(), RD->field_end());
8597 }
8598 }
8599 for (auto &Pair : OverlappedData) {
8600 llvm::stable_sort(
8601 Range&: Pair.getSecond(),
8602 C: [&Layout](
8603 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8604 OMPClauseMappableExprCommon::MappableExprComponentListRef
8605 Second) {
8606 auto CI = First.rbegin();
8607 auto CE = First.rend();
8608 auto SI = Second.rbegin();
8609 auto SE = Second.rend();
8610 for (; CI != CE && SI != SE; ++CI, ++SI) {
8611 if (CI->getAssociatedExpression()->getStmtClass() !=
8612 SI->getAssociatedExpression()->getStmtClass())
8613 break;
8614 // Are we dealing with different variables/fields?
8615 if (CI->getAssociatedDeclaration() !=
8616 SI->getAssociatedDeclaration())
8617 break;
8618 }
8619
8620 // Lists contain the same elements.
8621 if (CI == CE && SI == SE)
8622 return false;
8623
8624 // List with less elements is less than list with more elements.
8625 if (CI == CE || SI == SE)
8626 return CI == CE;
8627
8628 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
8629 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
8630 if (FD1->getParent() == FD2->getParent())
8631 return FD1->getFieldIndex() < FD2->getFieldIndex();
8632 const auto *It =
8633 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
8634 return FD == FD1 || FD == FD2;
8635 });
8636 return *It == FD1;
8637 });
8638 }
8639
8640 // Associated with a capture, because the mapping flags depend on it.
8641 // Go through all of the elements with the overlapped elements.
8642 bool IsFirstComponentList = true;
8643 MapCombinedInfoTy StructBaseCombinedInfo;
8644 for (const auto &Pair : OverlappedData) {
8645 const MapData &L = *Pair.getFirst();
8646 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8647 OpenMPMapClauseKind MapType;
8648 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8649 bool IsImplicit;
8650 const ValueDecl *Mapper;
8651 const Expr *VarRef;
8652 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8653 L;
8654 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8655 OverlappedComponents = Pair.getSecond();
8656 generateInfoForComponentList(
8657 MapType, MapModifiers, MotionModifiers: std::nullopt, Components, CombinedInfo,
8658 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8659 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8660 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
8661 IsFirstComponentList = false;
8662 }
8663 // Go through other elements without overlapped elements.
8664 for (const MapData &L : DeclComponentLists) {
8665 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8666 OpenMPMapClauseKind MapType;
8667 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8668 bool IsImplicit;
8669 const ValueDecl *Mapper;
8670 const Expr *VarRef;
8671 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8672 L;
8673 auto It = OverlappedData.find(Val: &L);
8674 if (It == OverlappedData.end())
8675 generateInfoForComponentList(
8676 MapType, MapModifiers, MotionModifiers: std::nullopt, Components, CombinedInfo,
8677 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8678 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8679 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef);
8680 IsFirstComponentList = false;
8681 }
8682 }
8683
8684 /// Generate the default map information for a given capture \a CI,
8685 /// record field declaration \a RI and captured value \a CV.
8686 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8687 const FieldDecl &RI, llvm::Value *CV,
8688 MapCombinedInfoTy &CombinedInfo) const {
8689 bool IsImplicit = true;
8690 // Do the default mapping.
8691 if (CI.capturesThis()) {
8692 CombinedInfo.Exprs.push_back(Elt: nullptr);
8693 CombinedInfo.BasePointers.push_back(Elt: CV);
8694 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8695 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8696 CombinedInfo.Pointers.push_back(Elt: CV);
8697 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8698 CombinedInfo.Sizes.push_back(
8699 Elt: CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
8700 CGF.Int64Ty, /*isSigned=*/true));
8701 // Default map type.
8702 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
8703 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8704 } else if (CI.capturesVariableByCopy()) {
8705 const VarDecl *VD = CI.getCapturedVar();
8706 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8707 CombinedInfo.BasePointers.push_back(Elt: CV);
8708 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8709 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8710 CombinedInfo.Pointers.push_back(Elt: CV);
8711 if (!RI.getType()->isAnyPointerType()) {
8712 // We have to signal to the runtime captures passed by value that are
8713 // not pointers.
8714 CombinedInfo.Types.push_back(
8715 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8716 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8717 CGF.getTypeSize(Ty: RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8718 } else {
8719 // Pointers are implicitly mapped with a zero size and no flags
8720 // (other than first map that is added for all implicit maps).
8721 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8722 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8723 }
8724 auto I = FirstPrivateDecls.find(Val: VD);
8725 if (I != FirstPrivateDecls.end())
8726 IsImplicit = I->getSecond();
8727 } else {
8728 assert(CI.capturesVariable() && "Expected captured reference.");
8729 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8730 QualType ElementType = PtrTy->getPointeeType();
8731 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8732 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
8733 // The default map type for a scalar/complex type is 'to' because by
8734 // default the value doesn't have to be retrieved. For an aggregate
8735 // type, the default is 'tofrom'.
8736 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
8737 const VarDecl *VD = CI.getCapturedVar();
8738 auto I = FirstPrivateDecls.find(Val: VD);
8739 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8740 CombinedInfo.BasePointers.push_back(Elt: CV);
8741 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8742 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8743 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8744 Address PtrAddr = CGF.EmitLoadOfReference(RefLVal: CGF.MakeAddrLValue(
8745 V: CV, T: ElementType, Alignment: CGF.getContext().getDeclAlign(VD),
8746 Source: AlignmentSource::Decl));
8747 CombinedInfo.Pointers.push_back(Elt: PtrAddr.getPointer());
8748 } else {
8749 CombinedInfo.Pointers.push_back(Elt: CV);
8750 }
8751 if (I != FirstPrivateDecls.end())
8752 IsImplicit = I->getSecond();
8753 }
8754 // Every default map produces a single argument which is a target parameter.
8755 CombinedInfo.Types.back() |=
8756 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8757
8758 // Add flag stating this is an implicit map.
8759 if (IsImplicit)
8760 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8761
8762 // No user-defined mapper for default mapping.
8763 CombinedInfo.Mappers.push_back(Elt: nullptr);
8764 }
8765};
8766} // anonymous namespace
8767
8768// Try to extract the base declaration from a `this->x` expression if possible.
8769static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8770 if (!E)
8771 return nullptr;
8772
8773 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: E->IgnoreParenCasts()))
8774 if (const MemberExpr *ME =
8775 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
8776 return ME->getMemberDecl();
8777 return nullptr;
8778}
8779
8780/// Emit a string constant containing the names of the values mapped to the
8781/// offloading runtime library.
8782llvm::Constant *
8783emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8784 MappableExprsHandler::MappingExprInfo &MapExprs) {
8785
8786 uint32_t SrcLocStrSize;
8787 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8788 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8789
8790 SourceLocation Loc;
8791 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8792 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
8793 Loc = VD->getLocation();
8794 else
8795 Loc = MapExprs.getMapExpr()->getExprLoc();
8796 } else {
8797 Loc = MapExprs.getMapDecl()->getLocation();
8798 }
8799
8800 std::string ExprName;
8801 if (MapExprs.getMapExpr()) {
8802 PrintingPolicy P(CGF.getContext().getLangOpts());
8803 llvm::raw_string_ostream OS(ExprName);
8804 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8805 OS.flush();
8806 } else {
8807 ExprName = MapExprs.getMapDecl()->getNameAsString();
8808 }
8809
8810 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8811 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: PLoc.getFilename(), FileName: ExprName,
8812 Line: PLoc.getLine(), Column: PLoc.getColumn(),
8813 SrcLocStrSize);
8814}
8815
8816/// Emit the arrays used to pass the captures and map information to the
8817/// offloading runtime library. If there is no map or capture information,
8818/// return nullptr by reference.
8819static void emitOffloadingArrays(
8820 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8821 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8822 bool IsNonContiguous = false) {
8823 CodeGenModule &CGM = CGF.CGM;
8824
8825 // Reset the array information.
8826 Info.clearArrayInfo();
8827 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8828
8829 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8830 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8831 CGF.AllocaInsertPt->getIterator());
8832 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8833 CGF.Builder.GetInsertPoint());
8834
8835 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8836 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
8837 };
8838 if (CGM.getCodeGenOpts().getDebugInfo() !=
8839 llvm::codegenoptions::NoDebugInfo) {
8840 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
8841 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
8842 F: FillInfoMap);
8843 }
8844
8845 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8846 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8847 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
8848 }
8849 };
8850
8851 auto CustomMapperCB = [&](unsigned int I) {
8852 llvm::Value *MFunc = nullptr;
8853 if (CombinedInfo.Mappers[I]) {
8854 Info.HasMapper = true;
8855 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8856 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
8857 }
8858 return MFunc;
8859 };
8860 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8861 /*IsNonContiguous=*/true, DeviceAddrCB,
8862 CustomMapperCB);
8863}
8864
8865/// Check for inner distribute directive.
8866static const OMPExecutableDirective *
8867getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8868 const auto *CS = D.getInnermostCapturedStmt();
8869 const auto *Body =
8870 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8871 const Stmt *ChildStmt =
8872 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8873
8874 if (const auto *NestedDir =
8875 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
8876 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8877 switch (D.getDirectiveKind()) {
8878 case OMPD_target:
8879 // For now, just treat 'target teams loop' as if it's distributed.
8880 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8881 return NestedDir;
8882 if (DKind == OMPD_teams) {
8883 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8884 /*IgnoreCaptured=*/true);
8885 if (!Body)
8886 return nullptr;
8887 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8888 if (const auto *NND =
8889 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
8890 DKind = NND->getDirectiveKind();
8891 if (isOpenMPDistributeDirective(DKind))
8892 return NND;
8893 }
8894 }
8895 return nullptr;
8896 case OMPD_target_teams:
8897 if (isOpenMPDistributeDirective(DKind))
8898 return NestedDir;
8899 return nullptr;
8900 case OMPD_target_parallel:
8901 case OMPD_target_simd:
8902 case OMPD_target_parallel_for:
8903 case OMPD_target_parallel_for_simd:
8904 return nullptr;
8905 case OMPD_target_teams_distribute:
8906 case OMPD_target_teams_distribute_simd:
8907 case OMPD_target_teams_distribute_parallel_for:
8908 case OMPD_target_teams_distribute_parallel_for_simd:
8909 case OMPD_parallel:
8910 case OMPD_for:
8911 case OMPD_parallel_for:
8912 case OMPD_parallel_master:
8913 case OMPD_parallel_sections:
8914 case OMPD_for_simd:
8915 case OMPD_parallel_for_simd:
8916 case OMPD_cancel:
8917 case OMPD_cancellation_point:
8918 case OMPD_ordered:
8919 case OMPD_threadprivate:
8920 case OMPD_allocate:
8921 case OMPD_task:
8922 case OMPD_simd:
8923 case OMPD_tile:
8924 case OMPD_unroll:
8925 case OMPD_sections:
8926 case OMPD_section:
8927 case OMPD_single:
8928 case OMPD_master:
8929 case OMPD_critical:
8930 case OMPD_taskyield:
8931 case OMPD_barrier:
8932 case OMPD_taskwait:
8933 case OMPD_taskgroup:
8934 case OMPD_atomic:
8935 case OMPD_flush:
8936 case OMPD_depobj:
8937 case OMPD_scan:
8938 case OMPD_teams:
8939 case OMPD_target_data:
8940 case OMPD_target_exit_data:
8941 case OMPD_target_enter_data:
8942 case OMPD_distribute:
8943 case OMPD_distribute_simd:
8944 case OMPD_distribute_parallel_for:
8945 case OMPD_distribute_parallel_for_simd:
8946 case OMPD_teams_distribute:
8947 case OMPD_teams_distribute_simd:
8948 case OMPD_teams_distribute_parallel_for:
8949 case OMPD_teams_distribute_parallel_for_simd:
8950 case OMPD_target_update:
8951 case OMPD_declare_simd:
8952 case OMPD_declare_variant:
8953 case OMPD_begin_declare_variant:
8954 case OMPD_end_declare_variant:
8955 case OMPD_declare_target:
8956 case OMPD_end_declare_target:
8957 case OMPD_declare_reduction:
8958 case OMPD_declare_mapper:
8959 case OMPD_taskloop:
8960 case OMPD_taskloop_simd:
8961 case OMPD_master_taskloop:
8962 case OMPD_master_taskloop_simd:
8963 case OMPD_parallel_master_taskloop:
8964 case OMPD_parallel_master_taskloop_simd:
8965 case OMPD_requires:
8966 case OMPD_metadirective:
8967 case OMPD_unknown:
8968 default:
8969 llvm_unreachable("Unexpected directive.");
8970 }
8971 }
8972
8973 return nullptr;
8974}
8975
8976/// Emit the user-defined mapper function. The code generation follows the
8977/// pattern in the example below.
8978/// \code
8979/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8980/// void *base, void *begin,
8981/// int64_t size, int64_t type,
8982/// void *name = nullptr) {
8983/// // Allocate space for an array section first or add a base/begin for
8984/// // pointer dereference.
8985/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8986/// !maptype.IsDelete)
8987/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8988/// size*sizeof(Ty), clearToFromMember(type));
8989/// // Map members.
8990/// for (unsigned i = 0; i < size; i++) {
8991/// // For each component specified by this mapper:
8992/// for (auto c : begin[i]->all_components) {
8993/// if (c.hasMapper())
8994/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8995/// c.arg_type, c.arg_name);
8996/// else
8997/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8998/// c.arg_begin, c.arg_size, c.arg_type,
8999/// c.arg_name);
9000/// }
9001/// }
9002/// // Delete the array section.
9003/// if (size > 1 && maptype.IsDelete)
9004/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9005/// size*sizeof(Ty), clearToFromMember(type));
9006/// }
9007/// \endcode
9008void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9009 CodeGenFunction *CGF) {
9010 if (UDMMap.count(Val: D) > 0)
9011 return;
9012 ASTContext &C = CGM.getContext();
9013 QualType Ty = D->getType();
9014 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
9015 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9016 auto *MapperVarDecl =
9017 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
9018 SourceLocation Loc = D->getLocation();
9019 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
9020 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
9021
9022 // Prepare mapper function arguments and attributes.
9023 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9024 C.VoidPtrTy, ImplicitParamKind::Other);
9025 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9026 ImplicitParamKind::Other);
9027 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9028 C.VoidPtrTy, ImplicitParamKind::Other);
9029 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9030 ImplicitParamKind::Other);
9031 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9032 ImplicitParamKind::Other);
9033 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9034 ImplicitParamKind::Other);
9035 FunctionArgList Args;
9036 Args.push_back(&HandleArg);
9037 Args.push_back(&BaseArg);
9038 Args.push_back(&BeginArg);
9039 Args.push_back(&SizeArg);
9040 Args.push_back(&TypeArg);
9041 Args.push_back(&NameArg);
9042 const CGFunctionInfo &FnInfo =
9043 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9044 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
9045 SmallString<64> TyStr;
9046 llvm::raw_svector_ostream Out(TyStr);
9047 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
9048 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
9049 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
9050 N: Name, M: &CGM.getModule());
9051 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
9052 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9053 // Start the mapper function code generation.
9054 CodeGenFunction MapperCGF(CGM);
9055 MapperCGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo, Args, Loc, StartLoc: Loc);
9056 // Compute the starting and end addresses of array elements.
9057 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9058 Addr: MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9059 Ty: C.getPointerType(T: Int64Ty), Loc);
9060 // Prepare common arguments for array initiation and deletion.
9061 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9062 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9063 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9064 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9065 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9066 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9067 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9068 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9069 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9070 // Convert the size in bytes into the number of array elements.
9071 Size = MapperCGF.Builder.CreateExactUDiv(
9072 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: ElementSize.getQuantity()));
9073 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9074 V: BeginIn, DestTy: CGM.getTypes().ConvertTypeForMem(T: PtrTy));
9075 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(Ty: ElemTy, Ptr: PtrBegin, IdxList: Size);
9076 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9077 Addr: MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9078 Ty: C.getPointerType(T: Int64Ty), Loc);
9079 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9080 MapperCGF.GetAddrOfLocalVar(&NameArg),
9081 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9082
9083 // Emit array initiation if this is an array section and \p MapType indicates
9084 // that memory allocation is required.
9085 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock(name: "omp.arraymap.head");
9086 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BasePtr: BaseIn, Ptr: BeginIn, Size, MapType,
9087 MapName, ElementSize, ExitBB: HeadBB, /*IsInit=*/true);
9088
9089 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9090
9091 // Emit the loop header block.
9092 MapperCGF.EmitBlock(BB: HeadBB);
9093 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock(name: "omp.arraymap.body");
9094 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock(name: "omp.done");
9095 // Evaluate whether the initial condition is satisfied.
9096 llvm::Value *IsEmpty =
9097 MapperCGF.Builder.CreateICmpEQ(LHS: PtrBegin, RHS: PtrEnd, Name: "omp.arraymap.isempty");
9098 MapperCGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
9099 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9100
9101 // Emit the loop body block.
9102 MapperCGF.EmitBlock(BB: BodyBB);
9103 llvm::BasicBlock *LastBB = BodyBB;
9104 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9105 Ty: PtrBegin->getType(), NumReservedValues: 2, Name: "omp.arraymap.ptrcurrent");
9106 PtrPHI->addIncoming(V: PtrBegin, BB: EntryBB);
9107 Address PtrCurrent(PtrPHI, ElemTy,
9108 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9109 .getAlignment()
9110 .alignmentOfArrayElement(elementSize: ElementSize));
9111 // Privatize the declared variable of mapper to be the current array element.
9112 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9113 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
9114 (void)Scope.Privatize();
9115
9116 // Get map clause information. Fill up the arrays with all mapped variables.
9117 MappableExprsHandler::MapCombinedInfoTy Info;
9118 MappableExprsHandler MEHandler(*D, MapperCGF);
9119 MEHandler.generateAllInfoForMapper(CombinedInfo&: Info, OMPBuilder);
9120
9121 // Call the runtime API __tgt_mapper_num_components to get the number of
9122 // pre-existing components.
9123 llvm::Value *OffloadingArgs[] = {Handle};
9124 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9125 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
9126 FnID: OMPRTL___tgt_mapper_num_components),
9127 args: OffloadingArgs);
9128 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9129 LHS: PreviousSize,
9130 RHS: MapperCGF.Builder.getInt64(C: MappableExprsHandler::getFlagMemberOffset()));
9131
9132 // Fill up the runtime mapper handle for all components.
9133 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9134 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9135 V: Info.BasePointers[I], DestTy: CGM.getTypes().ConvertTypeForMem(T: C.VoidPtrTy));
9136 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9137 V: Info.Pointers[I], DestTy: CGM.getTypes().ConvertTypeForMem(T: C.VoidPtrTy));
9138 llvm::Value *CurSizeArg = Info.Sizes[I];
9139 llvm::Value *CurNameArg =
9140 (CGM.getCodeGenOpts().getDebugInfo() ==
9141 llvm::codegenoptions::NoDebugInfo)
9142 ? llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy)
9143 : emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: Info.Exprs[I]);
9144
9145 // Extract the MEMBER_OF field from the map type.
9146 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9147 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9148 Info.Types[I]));
9149 llvm::Value *MemberMapType =
9150 MapperCGF.Builder.CreateNUWAdd(LHS: OriMapType, RHS: ShiftedPreviousSize);
9151
9152 // Combine the map type inherited from user-defined mapper with that
9153 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9154 // bits of the \a MapType, which is the input argument of the mapper
9155 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9156 // bits of MemberMapType.
9157 // [OpenMP 5.0], 1.2.6. map-type decay.
9158 // | alloc | to | from | tofrom | release | delete
9159 // ----------------------------------------------------------
9160 // alloc | alloc | alloc | alloc | alloc | release | delete
9161 // to | alloc | to | alloc | to | release | delete
9162 // from | alloc | alloc | from | from | release | delete
9163 // tofrom | alloc | to | from | tofrom | release | delete
9164 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9165 LHS: MapType,
9166 RHS: MapperCGF.Builder.getInt64(
9167 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9168 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9169 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9170 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock(name: "omp.type.alloc");
9171 llvm::BasicBlock *AllocElseBB =
9172 MapperCGF.createBasicBlock(name: "omp.type.alloc.else");
9173 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock(name: "omp.type.to");
9174 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock(name: "omp.type.to.else");
9175 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock(name: "omp.type.from");
9176 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock(name: "omp.type.end");
9177 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(Arg: LeftToFrom);
9178 MapperCGF.Builder.CreateCondBr(Cond: IsAlloc, True: AllocBB, False: AllocElseBB);
9179 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9180 MapperCGF.EmitBlock(BB: AllocBB);
9181 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9182 LHS: MemberMapType,
9183 RHS: MapperCGF.Builder.getInt64(
9184 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9185 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9186 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9187 MapperCGF.Builder.CreateBr(Dest: EndBB);
9188 MapperCGF.EmitBlock(BB: AllocElseBB);
9189 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9190 LHS: LeftToFrom,
9191 RHS: MapperCGF.Builder.getInt64(
9192 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9193 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9194 MapperCGF.Builder.CreateCondBr(Cond: IsTo, True: ToBB, False: ToElseBB);
9195 // In case of to, clear OMP_MAP_FROM.
9196 MapperCGF.EmitBlock(BB: ToBB);
9197 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9198 LHS: MemberMapType,
9199 RHS: MapperCGF.Builder.getInt64(
9200 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9201 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9202 MapperCGF.Builder.CreateBr(Dest: EndBB);
9203 MapperCGF.EmitBlock(BB: ToElseBB);
9204 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9205 LHS: LeftToFrom,
9206 RHS: MapperCGF.Builder.getInt64(
9207 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9208 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9209 MapperCGF.Builder.CreateCondBr(Cond: IsFrom, True: FromBB, False: EndBB);
9210 // In case of from, clear OMP_MAP_TO.
9211 MapperCGF.EmitBlock(BB: FromBB);
9212 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9213 LHS: MemberMapType,
9214 RHS: MapperCGF.Builder.getInt64(
9215 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9216 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9217 // In case of tofrom, do nothing.
9218 MapperCGF.EmitBlock(BB: EndBB);
9219 LastBB = EndBB;
9220 llvm::PHINode *CurMapType =
9221 MapperCGF.Builder.CreatePHI(Ty: CGM.Int64Ty, NumReservedValues: 4, Name: "omp.maptype");
9222 CurMapType->addIncoming(V: AllocMapType, BB: AllocBB);
9223 CurMapType->addIncoming(V: ToMapType, BB: ToBB);
9224 CurMapType->addIncoming(V: FromMapType, BB: FromBB);
9225 CurMapType->addIncoming(V: MemberMapType, BB: ToElseBB);
9226
9227 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9228 CurSizeArg, CurMapType, CurNameArg};
9229 if (Info.Mappers[I]) {
9230 // Call the corresponding mapper function.
9231 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9232 D: cast<OMPDeclareMapperDecl>(Val: Info.Mappers[I]));
9233 assert(MapperFunc && "Expect a valid mapper function is available.");
9234 MapperCGF.EmitNounwindRuntimeCall(callee: MapperFunc, args: OffloadingArgs);
9235 } else {
9236 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9237 // data structure.
9238 MapperCGF.EmitRuntimeCall(
9239 callee: OMPBuilder.getOrCreateRuntimeFunction(
9240 M&: CGM.getModule(), FnID: OMPRTL___tgt_push_mapper_component),
9241 args: OffloadingArgs);
9242 }
9243 }
9244
9245 // Update the pointer to point to the next element that needs to be mapped,
9246 // and check whether we have mapped all elements.
9247 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9248 Ty: ElemTy, Ptr: PtrPHI, /*Idx0=*/1, Name: "omp.arraymap.next");
9249 PtrPHI->addIncoming(V: PtrNext, BB: LastBB);
9250 llvm::Value *IsDone =
9251 MapperCGF.Builder.CreateICmpEQ(LHS: PtrNext, RHS: PtrEnd, Name: "omp.arraymap.isdone");
9252 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock(name: "omp.arraymap.exit");
9253 MapperCGF.Builder.CreateCondBr(Cond: IsDone, True: ExitBB, False: BodyBB);
9254
9255 MapperCGF.EmitBlock(BB: ExitBB);
9256 // Emit array deletion if this is an array section and \p MapType indicates
9257 // that deletion is required.
9258 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BasePtr: BaseIn, Ptr: BeginIn, Size, MapType,
9259 MapName, ElementSize, ExitBB: DoneBB, /*IsInit=*/false);
9260
9261 // Emit the function exit block.
9262 MapperCGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
9263 MapperCGF.FinishFunction();
9264 UDMMap.try_emplace(D, Fn);
9265 if (CGF) {
9266 auto &Decls = FunctionUDMMap.FindAndConstruct(Key: CGF->CurFn);
9267 Decls.second.push_back(Elt: D);
9268 }
9269}
9270
9271/// Emit the array initialization or deletion portion for user-defined mapper
9272/// code generation. First, it evaluates whether an array section is mapped and
9273/// whether the \a MapType instructs to delete this section. If \a IsInit is
9274/// true, and \a MapType indicates to not delete this array, array
9275/// initialization code is generated. If \a IsInit is false, and \a MapType
9276/// indicates to not this array, array deletion code is generated.
9277void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9278 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9279 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9280 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9281 bool IsInit) {
9282 StringRef Prefix = IsInit ? ".init" : ".del";
9283
9284 // Evaluate if this is an array section.
9285 llvm::BasicBlock *BodyBB =
9286 MapperCGF.createBasicBlock(name: getName(Parts: {"omp.array", Prefix}));
9287 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9288 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: 1), Name: "omp.arrayinit.isarray");
9289 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9290 LHS: MapType,
9291 RHS: MapperCGF.Builder.getInt64(
9292 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9293 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9294 llvm::Value *DeleteCond;
9295 llvm::Value *Cond;
9296 if (IsInit) {
9297 // base != begin?
9298 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(LHS: Base, RHS: Begin);
9299 // IsPtrAndObj?
9300 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9301 LHS: MapType,
9302 RHS: MapperCGF.Builder.getInt64(
9303 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9304 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9305 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(Arg: PtrAndObjBit);
9306 BaseIsBegin = MapperCGF.Builder.CreateAnd(LHS: BaseIsBegin, RHS: PtrAndObjBit);
9307 Cond = MapperCGF.Builder.CreateOr(LHS: IsArray, RHS: BaseIsBegin);
9308 DeleteCond = MapperCGF.Builder.CreateIsNull(
9309 Arg: DeleteBit, Name: getName(Parts: {"omp.array", Prefix, ".delete"}));
9310 } else {
9311 Cond = IsArray;
9312 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9313 Arg: DeleteBit, Name: getName(Parts: {"omp.array", Prefix, ".delete"}));
9314 }
9315 Cond = MapperCGF.Builder.CreateAnd(LHS: Cond, RHS: DeleteCond);
9316 MapperCGF.Builder.CreateCondBr(Cond, True: BodyBB, False: ExitBB);
9317
9318 MapperCGF.EmitBlock(BB: BodyBB);
9319 // Get the array size by multiplying element size and element number (i.e., \p
9320 // Size).
9321 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9322 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: ElementSize.getQuantity()));
9323 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9324 // memory allocation/deletion purpose only.
9325 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9326 LHS: MapType,
9327 RHS: MapperCGF.Builder.getInt64(
9328 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9329 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9330 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9331 MapTypeArg = MapperCGF.Builder.CreateOr(
9332 LHS: MapTypeArg,
9333 RHS: MapperCGF.Builder.getInt64(
9334 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9335 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9336
9337 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9338 // data structure.
9339 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9340 ArraySize, MapTypeArg, MapName};
9341 MapperCGF.EmitRuntimeCall(
9342 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
9343 FnID: OMPRTL___tgt_push_mapper_component),
9344 args: OffloadingArgs);
9345}
9346
9347llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9348 const OMPDeclareMapperDecl *D) {
9349 auto I = UDMMap.find(Val: D);
9350 if (I != UDMMap.end())
9351 return I->second;
9352 emitUserDefinedMapper(D);
9353 return UDMMap.lookup(Val: D);
9354}
9355
9356llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9357 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9358 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9359 const OMPLoopDirective &D)>
9360 SizeEmitter) {
9361 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9362 const OMPExecutableDirective *TD = &D;
9363 // Get nested teams distribute kind directive, if any.
9364 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9365 Kind != OMPD_target_teams_loop)
9366 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
9367 if (!TD)
9368 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9369
9370 const auto *LD = cast<OMPLoopDirective>(Val: TD);
9371 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9372 return NumIterations;
9373 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9374}
9375
9376static void
9377emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9378 const OMPExecutableDirective &D,
9379 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9380 bool RequiresOuterTask, const CapturedStmt &CS,
9381 bool OffloadingMandatory, CodeGenFunction &CGF) {
9382 if (OffloadingMandatory) {
9383 CGF.Builder.CreateUnreachable();
9384 } else {
9385 if (RequiresOuterTask) {
9386 CapturedVars.clear();
9387 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9388 }
9389 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
9390 Args: CapturedVars);
9391 }
9392}
9393
9394static llvm::Value *emitDeviceID(
9395 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9396 CodeGenFunction &CGF) {
9397 // Emit device ID if any.
9398 llvm::Value *DeviceID;
9399 if (Device.getPointer()) {
9400 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9401 Device.getInt() == OMPC_DEVICE_device_num) &&
9402 "Expected device_num modifier.");
9403 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
9404 DeviceID =
9405 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
9406 } else {
9407 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
9408 }
9409 return DeviceID;
9410}
9411
9412llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9413 CodeGenFunction &CGF) {
9414 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(C: 0);
9415
9416 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9417 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9418 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9419 E: DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9420 DynCGroupMem = CGF.Builder.CreateIntCast(V: DynCGroupMemVal, DestTy: CGF.Int32Ty,
9421 /*isSigned=*/false);
9422 }
9423 return DynCGroupMem;
9424}
9425
9426static void emitTargetCallKernelLaunch(
9427 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9428 const OMPExecutableDirective &D,
9429 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9430 const CapturedStmt &CS, bool OffloadingMandatory,
9431 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9432 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9433 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9434 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9435 const OMPLoopDirective &D)>
9436 SizeEmitter,
9437 CodeGenFunction &CGF, CodeGenModule &CGM) {
9438 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9439
9440 // Fill up the arrays with all the captured variables.
9441 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9442
9443 // Get mappable expression information.
9444 MappableExprsHandler MEHandler(D, CGF);
9445 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9446 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9447
9448 auto RI = CS.getCapturedRecordDecl()->field_begin();
9449 auto *CV = CapturedVars.begin();
9450 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9451 CE = CS.capture_end();
9452 CI != CE; ++CI, ++RI, ++CV) {
9453 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9454 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9455
9456 // VLA sizes are passed to the outlined region by copy and do not have map
9457 // information associated.
9458 if (CI->capturesVariableArrayType()) {
9459 CurInfo.Exprs.push_back(Elt: nullptr);
9460 CurInfo.BasePointers.push_back(Elt: *CV);
9461 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
9462 CurInfo.DevicePointers.push_back(
9463 Elt: MappableExprsHandler::DeviceInfoTy::None);
9464 CurInfo.Pointers.push_back(Elt: *CV);
9465 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9466 CGF.getTypeSize(Ty: RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9467 // Copy to the device as an argument. No need to retrieve it.
9468 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9469 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9470 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9471 CurInfo.Mappers.push_back(Elt: nullptr);
9472 } else {
9473 // If we have any information in the map clause, we use it, otherwise we
9474 // just do a default mapping.
9475 MEHandler.generateInfoForCapture(Cap: CI, Arg: *CV, CombinedInfo&: CurInfo, PartialStruct);
9476 if (!CI->capturesThis())
9477 MappedVarSet.insert(CI->getCapturedVar());
9478 else
9479 MappedVarSet.insert(V: nullptr);
9480 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9481 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
9482 // Generate correct mapping for variables captured by reference in
9483 // lambdas.
9484 if (CI->capturesVariable())
9485 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9486 CurInfo, LambdaPointers);
9487 }
9488 // We expect to have at least an element of information for this capture.
9489 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9490 "Non-existing map pointer for capture!");
9491 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9492 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9493 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9494 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9495 "Inconsistent map information sizes!");
9496
9497 // If there is an entry in PartialStruct it means we have a struct with
9498 // individual members mapped. Emit an extra combined entry.
9499 if (PartialStruct.Base.isValid()) {
9500 CombinedInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9501 MEHandler.emitCombinedEntry(
9502 CombinedInfo, CurTypes&: CurInfo.Types, PartialStruct, IsMapThis: CI->capturesThis(),
9503 OMPBuilder, VD: nullptr,
9504 NotTargetParams: !PartialStruct.PreliminaryMapData.BasePointers.empty());
9505 }
9506
9507 // We need to append the results of this capture to what we already have.
9508 CombinedInfo.append(CurInfo);
9509 }
9510 // Adjust MEMBER_OF flags for the lambdas captures.
9511 MEHandler.adjustMemberOfForLambdaCaptures(
9512 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
9513 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
9514 // Map any list items in a map clause that were not captures because they
9515 // weren't referenced within the construct.
9516 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: MappedVarSet);
9517
9518 CGOpenMPRuntime::TargetDataInfo Info;
9519 // Fill up the arrays and create the arguments.
9520 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9521 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9522 llvm::codegenoptions::NoDebugInfo;
9523 OMPBuilder.emitOffloadingArraysArgument(Builder&: CGF.Builder, RTArgs&: Info.RTArgs, Info,
9524 EmitDebug,
9525 /*ForEndCall=*/false);
9526
9527 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9528 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9529 CGF.VoidPtrTy, CGM.getPointerAlign());
9530 InputInfo.PointersArray =
9531 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9532 InputInfo.SizesArray =
9533 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9534 InputInfo.MappersArray =
9535 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9536 MapTypesArray = Info.RTArgs.MapTypesArray;
9537 MapNamesArray = Info.RTArgs.MapNamesArray;
9538
9539 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9540 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9541 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9542 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9543 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9544
9545 if (IsReverseOffloading) {
9546 // Reverse offloading is not supported, so just execute on the host.
9547 // FIXME: This fallback solution is incorrect since it ignores the
9548 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9549 // assert here and ensure SEMA emits an error.
9550 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9551 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9552 return;
9553 }
9554
9555 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9556 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9557
9558 llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9559 llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9560 llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9561 llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9562
9563 auto &&EmitTargetCallFallbackCB =
9564 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9565 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9566 -> llvm::OpenMPIRBuilder::InsertPointTy {
9567 CGF.Builder.restoreIP(IP);
9568 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9569 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9570 return CGF.Builder.saveIP();
9571 };
9572
9573 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9574 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9575 llvm::Value *NumThreads =
9576 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9577 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
9578 llvm::Value *NumIterations =
9579 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9580 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9581 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9582 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9583
9584 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9585 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9586 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9587
9588 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9589 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9590 DynCGGroupMem, HasNoWait);
9591
9592 CGF.Builder.restoreIP(IP: OMPRuntime->getOMPBuilder().emitKernelLaunch(
9593 Loc: CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9594 DeviceID, RTLoc, AllocaIP));
9595 };
9596
9597 if (RequiresOuterTask)
9598 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
9599 else
9600 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9601}
9602
9603static void
9604emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9605 const OMPExecutableDirective &D,
9606 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9607 bool RequiresOuterTask, const CapturedStmt &CS,
9608 bool OffloadingMandatory, CodeGenFunction &CGF) {
9609
9610 // Notify that the host version must be executed.
9611 auto &&ElseGen =
9612 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9613 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9614 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9615 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9616 };
9617
9618 if (RequiresOuterTask) {
9619 CodeGenFunction::OMPTargetDataInfo InputInfo;
9620 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
9621 } else {
9622 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9623 }
9624}
9625
9626void CGOpenMPRuntime::emitTargetCall(
9627 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9628 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9629 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9630 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9631 const OMPLoopDirective &D)>
9632 SizeEmitter) {
9633 if (!CGF.HaveInsertPoint())
9634 return;
9635
9636 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9637 CGM.getLangOpts().OpenMPOffloadMandatory;
9638
9639 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9640
9641 const bool RequiresOuterTask =
9642 D.hasClausesOfKind<OMPDependClause>() ||
9643 D.hasClausesOfKind<OMPNowaitClause>() ||
9644 D.hasClausesOfKind<OMPInReductionClause>() ||
9645 (CGM.getLangOpts().OpenMP >= 51 &&
9646 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9647 D.hasClausesOfKind<OMPThreadLimitClause>());
9648 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9649 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9650 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9651 PrePostActionTy &) {
9652 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9653 };
9654 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9655
9656 CodeGenFunction::OMPTargetDataInfo InputInfo;
9657 llvm::Value *MapTypesArray = nullptr;
9658 llvm::Value *MapNamesArray = nullptr;
9659
9660 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9661 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9662 OutlinedFnID, &InputInfo, &MapTypesArray,
9663 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9664 PrePostActionTy &) {
9665 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
9666 RequiresOuterTask, CS, OffloadingMandatory,
9667 Device, OutlinedFnID, InputInfo, MapTypesArray,
9668 MapNamesArray, SizeEmitter, CGF, CGM);
9669 };
9670
9671 auto &&TargetElseGen =
9672 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9673 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9674 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9675 CS, OffloadingMandatory, CGF);
9676 };
9677
9678 // If we have a target function ID it means that we need to support
9679 // offloading, otherwise, just execute on the host. We need to execute on host
9680 // regardless of the conditional in the if clause if, e.g., the user do not
9681 // specify target triples.
9682 if (OutlinedFnID) {
9683 if (IfCond) {
9684 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
9685 } else {
9686 RegionCodeGenTy ThenRCG(TargetThenGen);
9687 ThenRCG(CGF);
9688 }
9689 } else {
9690 RegionCodeGenTy ElseRCG(TargetElseGen);
9691 ElseRCG(CGF);
9692 }
9693}
9694
9695void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9696 StringRef ParentName) {
9697 if (!S)
9698 return;
9699
9700 // Codegen OMP target directives that offload compute to the device.
9701 bool RequiresDeviceCodegen =
9702 isa<OMPExecutableDirective>(S) &&
9703 isOpenMPTargetExecutionDirective(
9704 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9705
9706 if (RequiresDeviceCodegen) {
9707 const auto &E = *cast<OMPExecutableDirective>(Val: S);
9708
9709 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9710 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
9711
9712 // Is this a target region that should not be emitted as an entry point? If
9713 // so just signal we are done with this target region.
9714 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9715 return;
9716
9717 switch (E.getDirectiveKind()) {
9718 case OMPD_target:
9719 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9720 S: cast<OMPTargetDirective>(Val: E));
9721 break;
9722 case OMPD_target_parallel:
9723 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9724 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
9725 break;
9726 case OMPD_target_teams:
9727 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9728 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
9729 break;
9730 case OMPD_target_teams_distribute:
9731 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9732 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
9733 break;
9734 case OMPD_target_teams_distribute_simd:
9735 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9736 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
9737 break;
9738 case OMPD_target_parallel_for:
9739 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9740 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
9741 break;
9742 case OMPD_target_parallel_for_simd:
9743 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9744 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
9745 break;
9746 case OMPD_target_simd:
9747 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9748 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
9749 break;
9750 case OMPD_target_teams_distribute_parallel_for:
9751 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9752 CGM, ParentName,
9753 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
9754 break;
9755 case OMPD_target_teams_distribute_parallel_for_simd:
9756 CodeGenFunction::
9757 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9758 CGM, ParentName,
9759 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
9760 break;
9761 case OMPD_target_teams_loop:
9762 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9763 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
9764 break;
9765 case OMPD_target_parallel_loop:
9766 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9767 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
9768 break;
9769 case OMPD_parallel:
9770 case OMPD_for:
9771 case OMPD_parallel_for:
9772 case OMPD_parallel_master:
9773 case OMPD_parallel_sections:
9774 case OMPD_for_simd:
9775 case OMPD_parallel_for_simd:
9776 case OMPD_cancel:
9777 case OMPD_cancellation_point:
9778 case OMPD_ordered:
9779 case OMPD_threadprivate:
9780 case OMPD_allocate:
9781 case OMPD_task:
9782 case OMPD_simd:
9783 case OMPD_tile:
9784 case OMPD_unroll:
9785 case OMPD_sections:
9786 case OMPD_section:
9787 case OMPD_single:
9788 case OMPD_master:
9789 case OMPD_critical:
9790 case OMPD_taskyield:
9791 case OMPD_barrier:
9792 case OMPD_taskwait:
9793 case OMPD_taskgroup:
9794 case OMPD_atomic:
9795 case OMPD_flush:
9796 case OMPD_depobj:
9797 case OMPD_scan:
9798 case OMPD_teams:
9799 case OMPD_target_data:
9800 case OMPD_target_exit_data:
9801 case OMPD_target_enter_data:
9802 case OMPD_distribute:
9803 case OMPD_distribute_simd:
9804 case OMPD_distribute_parallel_for:
9805 case OMPD_distribute_parallel_for_simd:
9806 case OMPD_teams_distribute:
9807 case OMPD_teams_distribute_simd:
9808 case OMPD_teams_distribute_parallel_for:
9809 case OMPD_teams_distribute_parallel_for_simd:
9810 case OMPD_target_update:
9811 case OMPD_declare_simd:
9812 case OMPD_declare_variant:
9813 case OMPD_begin_declare_variant:
9814 case OMPD_end_declare_variant:
9815 case OMPD_declare_target:
9816 case OMPD_end_declare_target:
9817 case OMPD_declare_reduction:
9818 case OMPD_declare_mapper:
9819 case OMPD_taskloop:
9820 case OMPD_taskloop_simd:
9821 case OMPD_master_taskloop:
9822 case OMPD_master_taskloop_simd:
9823 case OMPD_parallel_master_taskloop:
9824 case OMPD_parallel_master_taskloop_simd:
9825 case OMPD_requires:
9826 case OMPD_metadirective:
9827 case OMPD_unknown:
9828 default:
9829 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9830 }
9831 return;
9832 }
9833
9834 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
9835 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9836 return;
9837
9838 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
9839 return;
9840 }
9841
9842 // If this is a lambda function, look into its body.
9843 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
9844 S = L->getBody();
9845
9846 // Keep looking for target regions recursively.
9847 for (const Stmt *II : S->children())
9848 scanForTargetRegionsFunctions(S: II, ParentName);
9849}
9850
9851static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9852 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9853 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9854 if (!DevTy)
9855 return false;
9856 // Do not emit device_type(nohost) functions for the host.
9857 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9858 return true;
9859 // Do not emit device_type(host) functions for the device.
9860 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9861 return true;
9862 return false;
9863}
9864
9865bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9866 // If emitting code for the host, we do not process FD here. Instead we do
9867 // the normal code generation.
9868 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9869 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
9870 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9871 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9872 return true;
9873 return false;
9874 }
9875
9876 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
9877 // Try to detect target regions in the function.
9878 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
9879 StringRef Name = CGM.getMangledName(GD);
9880 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
9881 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9882 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9883 return true;
9884 }
9885
9886 // Do not to emit function if it is not marked as declare target.
9887 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9888 AlreadyEmittedTargetDecls.count(VD) == 0;
9889}
9890
9891bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9892 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
9893 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9894 return true;
9895
9896 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9897 return false;
9898
9899 // Check if there are Ctors/Dtors in this declaration and look for target
9900 // regions in it. We use the complete variant to produce the kernel name
9901 // mangling.
9902 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
9903 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9904 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9905 StringRef ParentName =
9906 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9907 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9908 }
9909 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9910 StringRef ParentName =
9911 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
9912 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
9913 }
9914 }
9915
9916 // Do not to emit variable if it is not marked as declare target.
9917 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9918 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9919 cast<VarDecl>(GD.getDecl()));
9920 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9921 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9922 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9923 HasRequiresUnifiedSharedMemory)) {
9924 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
9925 return true;
9926 }
9927 return false;
9928}
9929
9930void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9931 llvm::Constant *Addr) {
9932 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9933 !CGM.getLangOpts().OpenMPIsTargetDevice)
9934 return;
9935
9936 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9937 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9938
9939 // If this is an 'extern' declaration we defer to the canonical definition and
9940 // do not emit an offloading entry.
9941 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9942 VD->hasExternalStorage())
9943 return;
9944
9945 if (!Res) {
9946 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9947 // Register non-target variables being emitted in device code (debug info
9948 // may cause this).
9949 StringRef VarName = CGM.getMangledName(GD: VD);
9950 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
9951 }
9952 return;
9953 }
9954
9955 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
9956 auto LinkageForVariable = [&VD, this]() {
9957 return CGM.getLLVMLinkageVarDefinition(VD);
9958 };
9959
9960 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9961 OMPBuilder.registerTargetGlobalVariable(
9962 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
9963 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9964 IsExternallyVisible: VD->isExternallyVisible(),
9965 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9966 VD->getCanonicalDecl()->getBeginLoc()),
9967 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
9968 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
9969 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
9970 T: CGM.getContext().getPointerType(VD->getType())),
9971 Addr);
9972
9973 for (auto *ref : GeneratedRefs)
9974 CGM.addCompilerUsedGlobal(GV: ref);
9975}
9976
9977bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9978 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
9979 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
9980 return emitTargetFunctions(GD);
9981
9982 return emitTargetGlobalVariable(GD);
9983}
9984
9985void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9986 for (const VarDecl *VD : DeferredGlobalVariables) {
9987 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9988 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9989 if (!Res)
9990 continue;
9991 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9992 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9993 !HasRequiresUnifiedSharedMemory) {
9994 CGM.EmitGlobal(D: VD);
9995 } else {
9996 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9997 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9998 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9999 HasRequiresUnifiedSharedMemory)) &&
10000 "Expected link clause or to clause with unified memory.");
10001 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10002 }
10003 }
10004}
10005
10006void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10007 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10008 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10009 " Expected target-based directive.");
10010}
10011
10012void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10013 for (const OMPClause *Clause : D->clauselists()) {
10014 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10015 HasRequiresUnifiedSharedMemory = true;
10016 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10017 } else if (const auto *AC =
10018 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
10019 switch (AC->getAtomicDefaultMemOrderKind()) {
10020 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10021 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10022 break;
10023 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10024 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10025 break;
10026 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10027 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10028 break;
10029 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10030 break;
10031 }
10032 }
10033 }
10034}
10035
10036llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10037 return RequiresAtomicOrdering;
10038}
10039
10040bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10041 LangAS &AS) {
10042 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10043 return false;
10044 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10045 switch(A->getAllocatorType()) {
10046 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10047 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10048 // Not supported, fallback to the default mem space.
10049 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10050 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10051 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10052 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10053 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10054 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10055 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10056 AS = LangAS::Default;
10057 return true;
10058 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10059 llvm_unreachable("Expected predefined allocator for the variables with the "
10060 "static storage.");
10061 }
10062 return false;
10063}
10064
10065bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10066 return HasRequiresUnifiedSharedMemory;
10067}
10068
10069CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10070 CodeGenModule &CGM)
10071 : CGM(CGM) {
10072 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10073 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10074 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10075 }
10076}
10077
10078CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10079 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10080 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10081}
10082
10083bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10084 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10085 return true;
10086
10087 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
10088 // Do not to emit function if it is marked as declare target as it was already
10089 // emitted.
10090 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10091 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10092 if (auto *F = dyn_cast_or_null<llvm::Function>(
10093 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
10094 return !F->isDeclaration();
10095 return false;
10096 }
10097 return true;
10098 }
10099
10100 return !AlreadyEmittedTargetDecls.insert(D).second;
10101}
10102
10103llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10104 // If we don't have entries or if we are emitting code for the device, we
10105 // don't need to do anything.
10106 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10107 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10108 (OMPBuilder.OffloadInfoManager.empty() &&
10109 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10110 return nullptr;
10111
10112 // Create and register the function that handles the requires directives.
10113 ASTContext &C = CGM.getContext();
10114
10115 llvm::Function *RequiresRegFn;
10116 {
10117 CodeGenFunction CGF(CGM);
10118 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10119 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
10120 std::string ReqName = getName(Parts: {"omp_offloading", "requires_reg"});
10121 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: ReqName, FI);
10122 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: RequiresRegFn, FnInfo: FI, Args: {});
10123 // TODO: check for other requires clauses.
10124 // The requires directive takes effect only when a target region is
10125 // present in the compilation unit. Otherwise it is ignored and not
10126 // passed to the runtime. This avoids the runtime from throwing an error
10127 // for mismatching requires clauses across compilation units that don't
10128 // contain at least 1 target region.
10129 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10130 !OMPBuilder.OffloadInfoManager.empty()) &&
10131 "Target or declare target region expected.");
10132 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10133 M&: CGM.getModule(), FnID: OMPRTL___tgt_register_requires),
10134 args: llvm::ConstantInt::get(
10135 Ty: CGM.Int64Ty, V: OMPBuilder.Config.getRequiresFlags()));
10136 CGF.FinishFunction();
10137 }
10138 return RequiresRegFn;
10139}
10140
10141void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10142 const OMPExecutableDirective &D,
10143 SourceLocation Loc,
10144 llvm::Function *OutlinedFn,
10145 ArrayRef<llvm::Value *> CapturedVars) {
10146 if (!CGF.HaveInsertPoint())
10147 return;
10148
10149 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10150 CodeGenFunction::RunCleanupsScope Scope(CGF);
10151
10152 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10153 llvm::Value *Args[] = {
10154 RTLoc,
10155 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
10156 CGF.Builder.CreateBitCast(V: OutlinedFn, DestTy: getKmpc_MicroPointerTy())};
10157 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10158 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
10159 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
10160
10161 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10162 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
10163 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
10164}
10165
10166void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10167 const Expr *NumTeams,
10168 const Expr *ThreadLimit,
10169 SourceLocation Loc) {
10170 if (!CGF.HaveInsertPoint())
10171 return;
10172
10173 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10174
10175 llvm::Value *NumTeamsVal =
10176 NumTeams
10177 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
10178 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10179 : CGF.Builder.getInt32(C: 0);
10180
10181 llvm::Value *ThreadLimitVal =
10182 ThreadLimit
10183 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10184 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10185 : CGF.Builder.getInt32(C: 0);
10186
10187 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10188 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10189 ThreadLimitVal};
10190 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10191 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
10192 args: PushNumTeamsArgs);
10193}
10194
10195void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10196 const Expr *ThreadLimit,
10197 SourceLocation Loc) {
10198 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10199 llvm::Value *ThreadLimitVal =
10200 ThreadLimit
10201 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10202 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10203 : CGF.Builder.getInt32(C: 0);
10204
10205 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10206 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10207 ThreadLimitVal};
10208 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10209 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
10210 args: ThreadLimitArgs);
10211}
10212
10213void CGOpenMPRuntime::emitTargetDataCalls(
10214 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10215 const Expr *Device, const RegionCodeGenTy &CodeGen,
10216 CGOpenMPRuntime::TargetDataInfo &Info) {
10217 if (!CGF.HaveInsertPoint())
10218 return;
10219
10220 // Action used to replace the default codegen action and turn privatization
10221 // off.
10222 PrePostActionTy NoPrivAction;
10223
10224 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10225
10226 llvm::Value *IfCondVal = nullptr;
10227 if (IfCond)
10228 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
10229
10230 // Emit device ID if any.
10231 llvm::Value *DeviceID = nullptr;
10232 if (Device) {
10233 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10234 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10235 } else {
10236 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10237 }
10238
10239 // Fill up the arrays with all the mapped variables.
10240 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10241 auto GenMapInfoCB =
10242 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10243 CGF.Builder.restoreIP(IP: CodeGenIP);
10244 // Get map clause information.
10245 MappableExprsHandler MEHandler(D, CGF);
10246 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10247
10248 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10249 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10250 };
10251 if (CGM.getCodeGenOpts().getDebugInfo() !=
10252 llvm::codegenoptions::NoDebugInfo) {
10253 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10254 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10255 F: FillInfoMap);
10256 }
10257
10258 return CombinedInfo;
10259 };
10260 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10261 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10262 CGF.Builder.restoreIP(IP: CodeGenIP);
10263 switch (BodyGenType) {
10264 case BodyGenTy::Priv:
10265 if (!Info.CaptureDeviceAddrMap.empty())
10266 CodeGen(CGF);
10267 break;
10268 case BodyGenTy::DupNoPriv:
10269 if (!Info.CaptureDeviceAddrMap.empty()) {
10270 CodeGen.setAction(NoPrivAction);
10271 CodeGen(CGF);
10272 }
10273 break;
10274 case BodyGenTy::NoPriv:
10275 if (Info.CaptureDeviceAddrMap.empty()) {
10276 CodeGen.setAction(NoPrivAction);
10277 CodeGen(CGF);
10278 }
10279 break;
10280 }
10281 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10282 CGF.Builder.GetInsertPoint());
10283 };
10284
10285 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10286 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10287 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10288 }
10289 };
10290
10291 auto CustomMapperCB = [&](unsigned int I) {
10292 llvm::Value *MFunc = nullptr;
10293 if (CombinedInfo.Mappers[I]) {
10294 Info.HasMapper = true;
10295 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10296 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10297 }
10298 return MFunc;
10299 };
10300
10301 // Source location for the ident struct
10302 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10303
10304 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10305 CGF.AllocaInsertPt->getIterator());
10306 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10307 CGF.Builder.GetInsertPoint());
10308 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10309 CGF.Builder.restoreIP(IP: OMPBuilder.createTargetData(
10310 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
10311 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, CustomMapperCB, SrcLocInfo: RTLoc));
10312}
10313
10314void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10315 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10316 const Expr *Device) {
10317 if (!CGF.HaveInsertPoint())
10318 return;
10319
10320 assert((isa<OMPTargetEnterDataDirective>(D) ||
10321 isa<OMPTargetExitDataDirective>(D) ||
10322 isa<OMPTargetUpdateDirective>(D)) &&
10323 "Expecting either target enter, exit data, or update directives.");
10324
10325 CodeGenFunction::OMPTargetDataInfo InputInfo;
10326 llvm::Value *MapTypesArray = nullptr;
10327 llvm::Value *MapNamesArray = nullptr;
10328 // Generate the code for the opening of the data environment.
10329 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10330 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10331 // Emit device ID if any.
10332 llvm::Value *DeviceID = nullptr;
10333 if (Device) {
10334 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10335 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10336 } else {
10337 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10338 }
10339
10340 // Emit the number of elements in the offloading arrays.
10341 llvm::Constant *PointerNum =
10342 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
10343
10344 // Source location for the ident struct
10345 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10346
10347 llvm::Value *OffloadingArgs[] = {RTLoc,
10348 DeviceID,
10349 PointerNum,
10350 InputInfo.BasePointersArray.getPointer(),
10351 InputInfo.PointersArray.getPointer(),
10352 InputInfo.SizesArray.getPointer(),
10353 MapTypesArray,
10354 MapNamesArray,
10355 InputInfo.MappersArray.getPointer()};
10356
10357 // Select the right runtime function call for each standalone
10358 // directive.
10359 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10360 RuntimeFunction RTLFn;
10361 switch (D.getDirectiveKind()) {
10362 case OMPD_target_enter_data:
10363 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10364 : OMPRTL___tgt_target_data_begin_mapper;
10365 break;
10366 case OMPD_target_exit_data:
10367 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10368 : OMPRTL___tgt_target_data_end_mapper;
10369 break;
10370 case OMPD_target_update:
10371 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10372 : OMPRTL___tgt_target_data_update_mapper;
10373 break;
10374 case OMPD_parallel:
10375 case OMPD_for:
10376 case OMPD_parallel_for:
10377 case OMPD_parallel_master:
10378 case OMPD_parallel_sections:
10379 case OMPD_for_simd:
10380 case OMPD_parallel_for_simd:
10381 case OMPD_cancel:
10382 case OMPD_cancellation_point:
10383 case OMPD_ordered:
10384 case OMPD_threadprivate:
10385 case OMPD_allocate:
10386 case OMPD_task:
10387 case OMPD_simd:
10388 case OMPD_tile:
10389 case OMPD_unroll:
10390 case OMPD_sections:
10391 case OMPD_section:
10392 case OMPD_single:
10393 case OMPD_master:
10394 case OMPD_critical:
10395 case OMPD_taskyield:
10396 case OMPD_barrier:
10397 case OMPD_taskwait:
10398 case OMPD_taskgroup:
10399 case OMPD_atomic:
10400 case OMPD_flush:
10401 case OMPD_depobj:
10402 case OMPD_scan:
10403 case OMPD_teams:
10404 case OMPD_target_data:
10405 case OMPD_distribute:
10406 case OMPD_distribute_simd:
10407 case OMPD_distribute_parallel_for:
10408 case OMPD_distribute_parallel_for_simd:
10409 case OMPD_teams_distribute:
10410 case OMPD_teams_distribute_simd:
10411 case OMPD_teams_distribute_parallel_for:
10412 case OMPD_teams_distribute_parallel_for_simd:
10413 case OMPD_declare_simd:
10414 case OMPD_declare_variant:
10415 case OMPD_begin_declare_variant:
10416 case OMPD_end_declare_variant:
10417 case OMPD_declare_target:
10418 case OMPD_end_declare_target:
10419 case OMPD_declare_reduction:
10420 case OMPD_declare_mapper:
10421 case OMPD_taskloop:
10422 case OMPD_taskloop_simd:
10423 case OMPD_master_taskloop:
10424 case OMPD_master_taskloop_simd:
10425 case OMPD_parallel_master_taskloop:
10426 case OMPD_parallel_master_taskloop_simd:
10427 case OMPD_target:
10428 case OMPD_target_simd:
10429 case OMPD_target_teams_distribute:
10430 case OMPD_target_teams_distribute_simd:
10431 case OMPD_target_teams_distribute_parallel_for:
10432 case OMPD_target_teams_distribute_parallel_for_simd:
10433 case OMPD_target_teams:
10434 case OMPD_target_parallel:
10435 case OMPD_target_parallel_for:
10436 case OMPD_target_parallel_for_simd:
10437 case OMPD_requires:
10438 case OMPD_metadirective:
10439 case OMPD_unknown:
10440 default:
10441 llvm_unreachable("Unexpected standalone target data directive.");
10442 break;
10443 }
10444 CGF.EmitRuntimeCall(
10445 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
10446 args: OffloadingArgs);
10447 };
10448
10449 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10450 &MapNamesArray](CodeGenFunction &CGF,
10451 PrePostActionTy &) {
10452 // Fill up the arrays with all the mapped variables.
10453 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10454
10455 // Get map clause information.
10456 MappableExprsHandler MEHandler(D, CGF);
10457 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10458
10459 CGOpenMPRuntime::TargetDataInfo Info;
10460 // Fill up the arrays and create the arguments.
10461 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10462 /*IsNonContiguous=*/true);
10463 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10464 D.hasClausesOfKind<OMPNowaitClause>();
10465 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10466 llvm::codegenoptions::NoDebugInfo;
10467 OMPBuilder.emitOffloadingArraysArgument(Builder&: CGF.Builder, RTArgs&: Info.RTArgs, Info,
10468 EmitDebug,
10469 /*ForEndCall=*/false);
10470 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10471 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10472 CGF.VoidPtrTy, CGM.getPointerAlign());
10473 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10474 CGM.getPointerAlign());
10475 InputInfo.SizesArray =
10476 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10477 InputInfo.MappersArray =
10478 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10479 MapTypesArray = Info.RTArgs.MapTypesArray;
10480 MapNamesArray = Info.RTArgs.MapNamesArray;
10481 if (RequiresOuterTask)
10482 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10483 else
10484 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10485 };
10486
10487 if (IfCond) {
10488 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
10489 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
10490 } else {
10491 RegionCodeGenTy ThenRCG(TargetThenGen);
10492 ThenRCG(CGF);
10493 }
10494}
10495
10496namespace {
10497 /// Kind of parameter in a function with 'declare simd' directive.
10498enum ParamKindTy {
10499 Linear,
10500 LinearRef,
10501 LinearUVal,
10502 LinearVal,
10503 Uniform,
10504 Vector,
10505};
10506/// Attribute set of the parameter.
10507struct ParamAttrTy {
10508 ParamKindTy Kind = Vector;
10509 llvm::APSInt StrideOrArg;
10510 llvm::APSInt Alignment;
10511 bool HasVarStride = false;
10512};
10513} // namespace
10514
10515static unsigned evaluateCDTSize(const FunctionDecl *FD,
10516 ArrayRef<ParamAttrTy> ParamAttrs) {
10517 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10518 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10519 // of that clause. The VLEN value must be power of 2.
10520 // In other case the notion of the function`s "characteristic data type" (CDT)
10521 // is used to compute the vector length.
10522 // CDT is defined in the following order:
10523 // a) For non-void function, the CDT is the return type.
10524 // b) If the function has any non-uniform, non-linear parameters, then the
10525 // CDT is the type of the first such parameter.
10526 // c) If the CDT determined by a) or b) above is struct, union, or class
10527 // type which is pass-by-value (except for the type that maps to the
10528 // built-in complex data type), the characteristic data type is int.
10529 // d) If none of the above three cases is applicable, the CDT is int.
10530 // The VLEN is then determined based on the CDT and the size of vector
10531 // register of that ISA for which current vector version is generated. The
10532 // VLEN is computed using the formula below:
10533 // VLEN = sizeof(vector_register) / sizeof(CDT),
10534 // where vector register size specified in section 3.2.1 Registers and the
10535 // Stack Frame of original AMD64 ABI document.
10536 QualType RetType = FD->getReturnType();
10537 if (RetType.isNull())
10538 return 0;
10539 ASTContext &C = FD->getASTContext();
10540 QualType CDT;
10541 if (!RetType.isNull() && !RetType->isVoidType()) {
10542 CDT = RetType;
10543 } else {
10544 unsigned Offset = 0;
10545 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
10546 if (ParamAttrs[Offset].Kind == Vector)
10547 CDT = C.getPointerType(T: C.getRecordType(MD->getParent()));
10548 ++Offset;
10549 }
10550 if (CDT.isNull()) {
10551 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10552 if (ParamAttrs[I + Offset].Kind == Vector) {
10553 CDT = FD->getParamDecl(i: I)->getType();
10554 break;
10555 }
10556 }
10557 }
10558 }
10559 if (CDT.isNull())
10560 CDT = C.IntTy;
10561 CDT = CDT->getCanonicalTypeUnqualified();
10562 if (CDT->isRecordType() || CDT->isUnionType())
10563 CDT = C.IntTy;
10564 return C.getTypeSize(T: CDT);
10565}
10566
10567/// Mangle the parameter part of the vector function name according to
10568/// their OpenMP classification. The mangling function is defined in
10569/// section 4.5 of the AAVFABI(2021Q1).
10570static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10571 SmallString<256> Buffer;
10572 llvm::raw_svector_ostream Out(Buffer);
10573 for (const auto &ParamAttr : ParamAttrs) {
10574 switch (ParamAttr.Kind) {
10575 case Linear:
10576 Out << 'l';
10577 break;
10578 case LinearRef:
10579 Out << 'R';
10580 break;
10581 case LinearUVal:
10582 Out << 'U';
10583 break;
10584 case LinearVal:
10585 Out << 'L';
10586 break;
10587 case Uniform:
10588 Out << 'u';
10589 break;
10590 case Vector:
10591 Out << 'v';
10592 break;
10593 }
10594 if (ParamAttr.HasVarStride)
10595 Out << "s" << ParamAttr.StrideOrArg;
10596 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10597 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10598 // Don't print the step value if it is not present or if it is
10599 // equal to 1.
10600 if (ParamAttr.StrideOrArg < 0)
10601 Out << 'n' << -ParamAttr.StrideOrArg;
10602 else if (ParamAttr.StrideOrArg != 1)
10603 Out << ParamAttr.StrideOrArg;
10604 }
10605
10606 if (!!ParamAttr.Alignment)
10607 Out << 'a' << ParamAttr.Alignment;
10608 }
10609
10610 return std::string(Out.str());
10611}
10612
10613static void
10614emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10615 const llvm::APSInt &VLENVal,
10616 ArrayRef<ParamAttrTy> ParamAttrs,
10617 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10618 struct ISADataTy {
10619 char ISA;
10620 unsigned VecRegSize;
10621 };
10622 ISADataTy ISAData[] = {
10623 {
10624 .ISA: 'b', .VecRegSize: 128
10625 }, // SSE
10626 {
10627 .ISA: 'c', .VecRegSize: 256
10628 }, // AVX
10629 {
10630 .ISA: 'd', .VecRegSize: 256
10631 }, // AVX2
10632 {
10633 .ISA: 'e', .VecRegSize: 512
10634 }, // AVX512
10635 };
10636 llvm::SmallVector<char, 2> Masked;
10637 switch (State) {
10638 case OMPDeclareSimdDeclAttr::BS_Undefined:
10639 Masked.push_back(Elt: 'N');
10640 Masked.push_back(Elt: 'M');
10641 break;
10642 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10643 Masked.push_back(Elt: 'N');
10644 break;
10645 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10646 Masked.push_back(Elt: 'M');
10647 break;
10648 }
10649 for (char Mask : Masked) {
10650 for (const ISADataTy &Data : ISAData) {
10651 SmallString<256> Buffer;
10652 llvm::raw_svector_ostream Out(Buffer);
10653 Out << "_ZGV" << Data.ISA << Mask;
10654 if (!VLENVal) {
10655 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10656 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10657 Out << llvm::APSInt::getUnsigned(X: Data.VecRegSize / NumElts);
10658 } else {
10659 Out << VLENVal;
10660 }
10661 Out << mangleVectorParameters(ParamAttrs);
10662 Out << '_' << Fn->getName();
10663 Fn->addFnAttr(Kind: Out.str());
10664 }
10665 }
10666}
10667
10668// This are the Functions that are needed to mangle the name of the
10669// vector functions generated by the compiler, according to the rules
10670// defined in the "Vector Function ABI specifications for AArch64",
10671// available at
10672// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10673
10674/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10675static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10676 QT = QT.getCanonicalType();
10677
10678 if (QT->isVoidType())
10679 return false;
10680
10681 if (Kind == ParamKindTy::Uniform)
10682 return false;
10683
10684 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10685 return false;
10686
10687 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10688 !QT->isReferenceType())
10689 return false;
10690
10691 return true;
10692}
10693
10694/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10695static bool getAArch64PBV(QualType QT, ASTContext &C) {
10696 QT = QT.getCanonicalType();
10697 unsigned Size = C.getTypeSize(T: QT);
10698
10699 // Only scalars and complex within 16 bytes wide set PVB to true.
10700 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10701 return false;
10702
10703 if (QT->isFloatingType())
10704 return true;
10705
10706 if (QT->isIntegerType())
10707 return true;
10708
10709 if (QT->isPointerType())
10710 return true;
10711
10712 // TODO: Add support for complex types (section 3.1.2, item 2).
10713
10714 return false;
10715}
10716
10717/// Computes the lane size (LS) of a return type or of an input parameter,
10718/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10719/// TODO: Add support for references, section 3.2.1, item 1.
10720static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10721 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10722 QualType PTy = QT.getCanonicalType()->getPointeeType();
10723 if (getAArch64PBV(QT: PTy, C))
10724 return C.getTypeSize(T: PTy);
10725 }
10726 if (getAArch64PBV(QT, C))
10727 return C.getTypeSize(T: QT);
10728
10729 return C.getTypeSize(T: C.getUIntPtrType());
10730}
10731
10732// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10733// signature of the scalar function, as defined in 3.2.2 of the
10734// AAVFABI.
10735static std::tuple<unsigned, unsigned, bool>
10736getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10737 QualType RetType = FD->getReturnType().getCanonicalType();
10738
10739 ASTContext &C = FD->getASTContext();
10740
10741 bool OutputBecomesInput = false;
10742
10743 llvm::SmallVector<unsigned, 8> Sizes;
10744 if (!RetType->isVoidType()) {
10745 Sizes.push_back(Elt: getAArch64LS(QT: RetType, Kind: ParamKindTy::Vector, C));
10746 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
10747 OutputBecomesInput = true;
10748 }
10749 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10750 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
10751 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
10752 }
10753
10754 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10755 // The LS of a function parameter / return value can only be a power
10756 // of 2, starting from 8 bits, up to 128.
10757 assert(llvm::all_of(Sizes,
10758 [](unsigned Size) {
10759 return Size == 8 || Size == 16 || Size == 32 ||
10760 Size == 64 || Size == 128;
10761 }) &&
10762 "Invalid size");
10763
10764 return std::make_tuple(args&: *std::min_element(first: std::begin(cont&: Sizes), last: std::end(cont&: Sizes)),
10765 args&: *std::max_element(first: std::begin(cont&: Sizes), last: std::end(cont&: Sizes)),
10766 args&: OutputBecomesInput);
10767}
10768
10769// Function used to add the attribute. The parameter `VLEN` is
10770// templated to allow the use of "x" when targeting scalable functions
10771// for SVE.
10772template <typename T>
10773static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10774 char ISA, StringRef ParSeq,
10775 StringRef MangledName, bool OutputBecomesInput,
10776 llvm::Function *Fn) {
10777 SmallString<256> Buffer;
10778 llvm::raw_svector_ostream Out(Buffer);
10779 Out << Prefix << ISA << LMask << VLEN;
10780 if (OutputBecomesInput)
10781 Out << "v";
10782 Out << ParSeq << "_" << MangledName;
10783 Fn->addFnAttr(Kind: Out.str());
10784}
10785
10786// Helper function to generate the Advanced SIMD names depending on
10787// the value of the NDS when simdlen is not present.
10788static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10789 StringRef Prefix, char ISA,
10790 StringRef ParSeq, StringRef MangledName,
10791 bool OutputBecomesInput,
10792 llvm::Function *Fn) {
10793 switch (NDS) {
10794 case 8:
10795 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10796 OutputBecomesInput, Fn);
10797 addAArch64VectorName(VLEN: 16, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10798 OutputBecomesInput, Fn);
10799 break;
10800 case 16:
10801 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10802 OutputBecomesInput, Fn);
10803 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10804 OutputBecomesInput, Fn);
10805 break;
10806 case 32:
10807 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10808 OutputBecomesInput, Fn);
10809 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10810 OutputBecomesInput, Fn);
10811 break;
10812 case 64:
10813 case 128:
10814 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10815 OutputBecomesInput, Fn);
10816 break;
10817 default:
10818 llvm_unreachable("Scalar type is too wide.");
10819 }
10820}
10821
10822/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10823static void emitAArch64DeclareSimdFunction(
10824 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10825 ArrayRef<ParamAttrTy> ParamAttrs,
10826 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10827 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10828
10829 // Get basic data for building the vector signature.
10830 const auto Data = getNDSWDS(FD, ParamAttrs);
10831 const unsigned NDS = std::get<0>(t: Data);
10832 const unsigned WDS = std::get<1>(t: Data);
10833 const bool OutputBecomesInput = std::get<2>(t: Data);
10834
10835 // Check the values provided via `simdlen` by the user.
10836 // 1. A `simdlen(1)` doesn't produce vector signatures,
10837 if (UserVLEN == 1) {
10838 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10839 L: DiagnosticsEngine::Warning,
10840 FormatString: "The clause simdlen(1) has no effect when targeting aarch64.");
10841 CGM.getDiags().Report(Loc: SLoc, DiagID);
10842 return;
10843 }
10844
10845 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10846 // Advanced SIMD output.
10847 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
10848 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10849 L: DiagnosticsEngine::Warning, FormatString: "The value specified in simdlen must be a "
10850 "power of 2 when targeting Advanced SIMD.");
10851 CGM.getDiags().Report(Loc: SLoc, DiagID);
10852 return;
10853 }
10854
10855 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10856 // limits.
10857 if (ISA == 's' && UserVLEN != 0) {
10858 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10859 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10860 L: DiagnosticsEngine::Warning, FormatString: "The clause simdlen must fit the %0-bit "
10861 "lanes in the architectural constraints "
10862 "for SVE (min is 128-bit, max is "
10863 "2048-bit, by steps of 128-bit)");
10864 CGM.getDiags().Report(Loc: SLoc, DiagID) << WDS;
10865 return;
10866 }
10867 }
10868
10869 // Sort out parameter sequence.
10870 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10871 StringRef Prefix = "_ZGV";
10872 // Generate simdlen from user input (if any).
10873 if (UserVLEN) {
10874 if (ISA == 's') {
10875 // SVE generates only a masked function.
10876 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10877 OutputBecomesInput, Fn);
10878 } else {
10879 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10880 // Advanced SIMD generates one or two functions, depending on
10881 // the `[not]inbranch` clause.
10882 switch (State) {
10883 case OMPDeclareSimdDeclAttr::BS_Undefined:
10884 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10885 OutputBecomesInput, Fn);
10886 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10887 OutputBecomesInput, Fn);
10888 break;
10889 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10890 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10891 OutputBecomesInput, Fn);
10892 break;
10893 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10894 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10895 OutputBecomesInput, Fn);
10896 break;
10897 }
10898 }
10899 } else {
10900 // If no user simdlen is provided, follow the AAVFABI rules for
10901 // generating the vector length.
10902 if (ISA == 's') {
10903 // SVE, section 3.4.1, item 1.
10904 addAArch64VectorName(VLEN: "x", LMask: "M", Prefix, ISA, ParSeq, MangledName,
10905 OutputBecomesInput, Fn);
10906 } else {
10907 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10908 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10909 // two vector names depending on the use of the clause
10910 // `[not]inbranch`.
10911 switch (State) {
10912 case OMPDeclareSimdDeclAttr::BS_Undefined:
10913 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10914 OutputBecomesInput, Fn);
10915 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10916 OutputBecomesInput, Fn);
10917 break;
10918 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10919 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10920 OutputBecomesInput, Fn);
10921 break;
10922 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10923 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10924 OutputBecomesInput, Fn);
10925 break;
10926 }
10927 }
10928 }
10929}
10930
10931void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10932 llvm::Function *Fn) {
10933 ASTContext &C = CGM.getContext();
10934 FD = FD->getMostRecentDecl();
10935 while (FD) {
10936 // Map params to their positions in function decl.
10937 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10938 if (isa<CXXMethodDecl>(Val: FD))
10939 ParamPositions.try_emplace(FD, 0);
10940 unsigned ParamPos = ParamPositions.size();
10941 for (const ParmVarDecl *P : FD->parameters()) {
10942 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10943 ++ParamPos;
10944 }
10945 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10946 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10947 // Mark uniform parameters.
10948 for (const Expr *E : Attr->uniforms()) {
10949 E = E->IgnoreParenImpCasts();
10950 unsigned Pos;
10951 if (isa<CXXThisExpr>(E)) {
10952 Pos = ParamPositions[FD];
10953 } else {
10954 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10955 ->getCanonicalDecl();
10956 auto It = ParamPositions.find(PVD);
10957 assert(It != ParamPositions.end() && "Function parameter not found");
10958 Pos = It->second;
10959 }
10960 ParamAttrs[Pos].Kind = Uniform;
10961 }
10962 // Get alignment info.
10963 auto *NI = Attr->alignments_begin();
10964 for (const Expr *E : Attr->aligneds()) {
10965 E = E->IgnoreParenImpCasts();
10966 unsigned Pos;
10967 QualType ParmTy;
10968 if (isa<CXXThisExpr>(E)) {
10969 Pos = ParamPositions[FD];
10970 ParmTy = E->getType();
10971 } else {
10972 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10973 ->getCanonicalDecl();
10974 auto It = ParamPositions.find(PVD);
10975 assert(It != ParamPositions.end() && "Function parameter not found");
10976 Pos = It->second;
10977 ParmTy = PVD->getType();
10978 }
10979 ParamAttrs[Pos].Alignment =
10980 (*NI)
10981 ? (*NI)->EvaluateKnownConstInt(C)
10982 : llvm::APSInt::getUnsigned(
10983 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10984 .getQuantity());
10985 ++NI;
10986 }
10987 // Mark linear parameters.
10988 auto *SI = Attr->steps_begin();
10989 auto *MI = Attr->modifiers_begin();
10990 for (const Expr *E : Attr->linears()) {
10991 E = E->IgnoreParenImpCasts();
10992 unsigned Pos;
10993 bool IsReferenceType = false;
10994 // Rescaling factor needed to compute the linear parameter
10995 // value in the mangled name.
10996 unsigned PtrRescalingFactor = 1;
10997 if (isa<CXXThisExpr>(E)) {
10998 Pos = ParamPositions[FD];
10999 auto *P = cast<PointerType>(E->getType());
11000 PtrRescalingFactor = CGM.getContext()
11001 .getTypeSizeInChars(P->getPointeeType())
11002 .getQuantity();
11003 } else {
11004 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11005 ->getCanonicalDecl();
11006 auto It = ParamPositions.find(PVD);
11007 assert(It != ParamPositions.end() && "Function parameter not found");
11008 Pos = It->second;
11009 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11010 PtrRescalingFactor = CGM.getContext()
11011 .getTypeSizeInChars(P->getPointeeType())
11012 .getQuantity();
11013 else if (PVD->getType()->isReferenceType()) {
11014 IsReferenceType = true;
11015 PtrRescalingFactor =
11016 CGM.getContext()
11017 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11018 .getQuantity();
11019 }
11020 }
11021 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11022 if (*MI == OMPC_LINEAR_ref)
11023 ParamAttr.Kind = LinearRef;
11024 else if (*MI == OMPC_LINEAR_uval)
11025 ParamAttr.Kind = LinearUVal;
11026 else if (IsReferenceType)
11027 ParamAttr.Kind = LinearVal;
11028 else
11029 ParamAttr.Kind = Linear;
11030 // Assuming a stride of 1, for `linear` without modifiers.
11031 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11032 if (*SI) {
11033 Expr::EvalResult Result;
11034 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11035 if (const auto *DRE =
11036 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11037 if (const auto *StridePVD =
11038 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11039 ParamAttr.HasVarStride = true;
11040 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11041 assert(It != ParamPositions.end() &&
11042 "Function parameter not found");
11043 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11044 }
11045 }
11046 } else {
11047 ParamAttr.StrideOrArg = Result.Val.getInt();
11048 }
11049 }
11050 // If we are using a linear clause on a pointer, we need to
11051 // rescale the value of linear_step with the byte size of the
11052 // pointee type.
11053 if (!ParamAttr.HasVarStride &&
11054 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11055 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11056 ++SI;
11057 ++MI;
11058 }
11059 llvm::APSInt VLENVal;
11060 SourceLocation ExprLoc;
11061 const Expr *VLENExpr = Attr->getSimdlen();
11062 if (VLENExpr) {
11063 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11064 ExprLoc = VLENExpr->getExprLoc();
11065 }
11066 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11067 if (CGM.getTriple().isX86()) {
11068 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11069 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11070 unsigned VLEN = VLENVal.getExtValue();
11071 StringRef MangledName = Fn->getName();
11072 if (CGM.getTarget().hasFeature("sve"))
11073 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11074 MangledName, 's', 128, Fn, ExprLoc);
11075 else if (CGM.getTarget().hasFeature("neon"))
11076 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11077 MangledName, 'n', 128, Fn, ExprLoc);
11078 }
11079 }
11080 FD = FD->getPreviousDecl();
11081 }
11082}
11083
11084namespace {
11085/// Cleanup action for doacross support.
11086class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11087public:
11088 static const int DoacrossFinArgs = 2;
11089
11090private:
11091 llvm::FunctionCallee RTLFn;
11092 llvm::Value *Args[DoacrossFinArgs];
11093
11094public:
11095 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11096 ArrayRef<llvm::Value *> CallArgs)
11097 : RTLFn(RTLFn) {
11098 assert(CallArgs.size() == DoacrossFinArgs);
11099 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
11100 }
11101 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11102 if (!CGF.HaveInsertPoint())
11103 return;
11104 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11105 }
11106};
11107} // namespace
11108
11109void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11110 const OMPLoopDirective &D,
11111 ArrayRef<Expr *> NumIterations) {
11112 if (!CGF.HaveInsertPoint())
11113 return;
11114
11115 ASTContext &C = CGM.getContext();
11116 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11117 RecordDecl *RD;
11118 if (KmpDimTy.isNull()) {
11119 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11120 // kmp_int64 lo; // lower
11121 // kmp_int64 up; // upper
11122 // kmp_int64 st; // stride
11123 // };
11124 RD = C.buildImplicitRecord(Name: "kmp_dim");
11125 RD->startDefinition();
11126 addFieldToRecordDecl(C, RD, Int64Ty);
11127 addFieldToRecordDecl(C, RD, Int64Ty);
11128 addFieldToRecordDecl(C, RD, Int64Ty);
11129 RD->completeDefinition();
11130 KmpDimTy = C.getRecordType(RD);
11131 } else {
11132 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11133 }
11134 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11135 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11136 ArraySizeModifier::Normal, 0);
11137
11138 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
11139 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
11140 enum { LowerFD = 0, UpperFD, StrideFD };
11141 // Fill dims with data.
11142 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11143 LValue DimsLVal = CGF.MakeAddrLValue(
11144 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11145 // dims.upper = num_iterations;
11146 LValue UpperLVal = CGF.EmitLValueForField(
11147 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
11148 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11149 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
11150 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
11151 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
11152 // dims.stride = 1;
11153 LValue StrideLVal = CGF.EmitLValueForField(
11154 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
11155 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
11156 lvalue: StrideLVal);
11157 }
11158
11159 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11160 // kmp_int32 num_dims, struct kmp_dim * dims);
11161 llvm::Value *Args[] = {
11162 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
11163 getThreadID(CGF, Loc: D.getBeginLoc()),
11164 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
11165 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11166 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).getPointer(),
11167 DestTy: CGM.VoidPtrTy)};
11168
11169 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11170 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
11171 CGF.EmitRuntimeCall(RTLFn, Args);
11172 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11173 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
11174 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11175 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
11176 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
11177 A: llvm::ArrayRef(FiniArgs));
11178}
11179
11180template <typename T>
11181static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11182 const T *C, llvm::Value *ULoc,
11183 llvm::Value *ThreadID) {
11184 QualType Int64Ty =
11185 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11186 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11187 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11188 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11189 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
11190 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11191 const Expr *CounterVal = C->getLoopData(I);
11192 assert(CounterVal);
11193 llvm::Value *CntVal = CGF.EmitScalarConversion(
11194 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
11195 Loc: CounterVal->getExprLoc());
11196 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
11197 /*Volatile=*/false, Ty: Int64Ty);
11198 }
11199 llvm::Value *Args[] = {
11200 ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).getPointer()};
11201 llvm::FunctionCallee RTLFn;
11202 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11203 OMPDoacrossKind<T> ODK;
11204 if (ODK.isSource(C)) {
11205 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11206 FnID: OMPRTL___kmpc_doacross_post);
11207 } else {
11208 assert(ODK.isSink(C) && "Expect sink modifier.");
11209 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11210 FnID: OMPRTL___kmpc_doacross_wait);
11211 }
11212 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11213}
11214
11215void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11216 const OMPDependClause *C) {
11217 return EmitDoacrossOrdered<OMPDependClause>(
11218 CGF, CGM, C, emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11219 getThreadID(CGF, Loc: C->getBeginLoc()));
11220}
11221
11222void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11223 const OMPDoacrossClause *C) {
11224 return EmitDoacrossOrdered<OMPDoacrossClause>(
11225 CGF, CGM, C, emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11226 getThreadID(CGF, Loc: C->getBeginLoc()));
11227}
11228
11229void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11230 llvm::FunctionCallee Callee,
11231 ArrayRef<llvm::Value *> Args) const {
11232 assert(Loc.isValid() && "Outlined function call location must be valid.");
11233 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
11234
11235 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
11236 if (Fn->doesNotThrow()) {
11237 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
11238 return;
11239 }
11240 }
11241 CGF.EmitRuntimeCall(callee: Callee, args: Args);
11242}
11243
11244void CGOpenMPRuntime::emitOutlinedFunctionCall(
11245 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11246 ArrayRef<llvm::Value *> Args) const {
11247 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
11248}
11249
11250void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11251 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
11252 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11253 HasEmittedDeclareTargetRegion = true;
11254}
11255
11256Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11257 const VarDecl *NativeParam,
11258 const VarDecl *TargetParam) const {
11259 return CGF.GetAddrOfLocalVar(VD: NativeParam);
11260}
11261
11262/// Return allocator value from expression, or return a null allocator (default
11263/// when no allocator specified).
11264static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11265 const Expr *Allocator) {
11266 llvm::Value *AllocVal;
11267 if (Allocator) {
11268 AllocVal = CGF.EmitScalarExpr(E: Allocator);
11269 // According to the standard, the original allocator type is a enum
11270 // (integer). Convert to pointer type, if required.
11271 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
11272 DstTy: CGF.getContext().VoidPtrTy,
11273 Loc: Allocator->getExprLoc());
11274 } else {
11275 // If no allocator specified, it defaults to the null allocator.
11276 AllocVal = llvm::Constant::getNullValue(
11277 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
11278 }
11279 return AllocVal;
11280}
11281
11282/// Return the alignment from an allocate directive if present.
11283static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11284 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11285
11286 if (!AllocateAlignment)
11287 return nullptr;
11288
11289 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
11290}
11291
11292Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11293 const VarDecl *VD) {
11294 if (!VD)
11295 return Address::invalid();
11296 Address UntiedAddr = Address::invalid();
11297 Address UntiedRealAddr = Address::invalid();
11298 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11299 if (It != FunctionToUntiedTaskStackMap.end()) {
11300 const UntiedLocalVarsAddressesMap &UntiedData =
11301 UntiedLocalVarsStack[It->second];
11302 auto I = UntiedData.find(Key: VD);
11303 if (I != UntiedData.end()) {
11304 UntiedAddr = I->second.first;
11305 UntiedRealAddr = I->second.second;
11306 }
11307 }
11308 const VarDecl *CVD = VD->getCanonicalDecl();
11309 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11310 // Use the default allocation.
11311 if (!isAllocatableDecl(VD))
11312 return UntiedAddr;
11313 llvm::Value *Size;
11314 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11315 if (CVD->getType()->isVariablyModifiedType()) {
11316 Size = CGF.getTypeSize(Ty: CVD->getType());
11317 // Align the size: ((size + align - 1) / align) * align
11318 Size = CGF.Builder.CreateNUWAdd(
11319 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
11320 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
11321 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
11322 } else {
11323 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11324 Size = CGM.getSize(numChars: Sz.alignTo(Align));
11325 }
11326 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
11327 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11328 const Expr *Allocator = AA->getAllocator();
11329 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11330 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
11331 SmallVector<llvm::Value *, 4> Args;
11332 Args.push_back(Elt: ThreadID);
11333 if (Alignment)
11334 Args.push_back(Elt: Alignment);
11335 Args.push_back(Elt: Size);
11336 Args.push_back(Elt: AllocVal);
11337 llvm::omp::RuntimeFunction FnID =
11338 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11339 llvm::Value *Addr = CGF.EmitRuntimeCall(
11340 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), Args,
11341 getName(Parts: {CVD->getName(), ".void.addr"}));
11342 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11343 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
11344 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11345 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11346 Addr, CGF.ConvertTypeForMem(T: Ty), getName(Parts: {CVD->getName(), ".addr"}));
11347 if (UntiedAddr.isValid())
11348 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
11349
11350 // Cleanup action for allocate support.
11351 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11352 llvm::FunctionCallee RTLFn;
11353 SourceLocation::UIntTy LocEncoding;
11354 Address Addr;
11355 const Expr *AllocExpr;
11356
11357 public:
11358 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11359 SourceLocation::UIntTy LocEncoding, Address Addr,
11360 const Expr *AllocExpr)
11361 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11362 AllocExpr(AllocExpr) {}
11363 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11364 if (!CGF.HaveInsertPoint())
11365 return;
11366 llvm::Value *Args[3];
11367 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11368 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
11369 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11370 V: Addr.getPointer(), DestTy: CGF.VoidPtrTy);
11371 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
11372 Args[2] = AllocVal;
11373 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11374 }
11375 };
11376 Address VDAddr =
11377 UntiedRealAddr.isValid()
11378 ? UntiedRealAddr
11379 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
11380 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11381 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11382 VDAddr, Allocator);
11383 if (UntiedRealAddr.isValid())
11384 if (auto *Region =
11385 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
11386 Region->emitUntiedSwitch(CGF);
11387 return VDAddr;
11388 }
11389 return UntiedAddr;
11390}
11391
11392bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11393 const VarDecl *VD) const {
11394 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11395 if (It == FunctionToUntiedTaskStackMap.end())
11396 return false;
11397 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
11398}
11399
11400CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11401 CodeGenModule &CGM, const OMPLoopDirective &S)
11402 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11403 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11404 if (!NeedToPush)
11405 return;
11406 NontemporalDeclsSet &DS =
11407 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11408 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11409 for (const Stmt *Ref : C->private_refs()) {
11410 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11411 const ValueDecl *VD;
11412 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11413 VD = DRE->getDecl();
11414 } else {
11415 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11416 assert((ME->isImplicitCXXThis() ||
11417 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11418 "Expected member of current class.");
11419 VD = ME->getMemberDecl();
11420 }
11421 DS.insert(VD);
11422 }
11423 }
11424}
11425
11426CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11427 if (!NeedToPush)
11428 return;
11429 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11430}
11431
11432CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11433 CodeGenFunction &CGF,
11434 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11435 std::pair<Address, Address>> &LocalVars)
11436 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11437 if (!NeedToPush)
11438 return;
11439 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11440 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11441 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
11442}
11443
11444CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11445 if (!NeedToPush)
11446 return;
11447 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11448}
11449
11450bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11451 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11452
11453 return llvm::any_of(
11454 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
11455 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11456}
11457
11458void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11459 const OMPExecutableDirective &S,
11460 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11461 const {
11462 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11463 // Vars in target/task regions must be excluded completely.
11464 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11465 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11466 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11467 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11468 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11469 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11470 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11471 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11472 }
11473 }
11474 // Exclude vars in private clauses.
11475 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11476 for (const Expr *Ref : C->varlists()) {
11477 if (!Ref->getType()->isScalarType())
11478 continue;
11479 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11480 if (!DRE)
11481 continue;
11482 NeedToCheckForLPCs.insert(DRE->getDecl());
11483 }
11484 }
11485 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11486 for (const Expr *Ref : C->varlists()) {
11487 if (!Ref->getType()->isScalarType())
11488 continue;
11489 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11490 if (!DRE)
11491 continue;
11492 NeedToCheckForLPCs.insert(DRE->getDecl());
11493 }
11494 }
11495 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11496 for (const Expr *Ref : C->varlists()) {
11497 if (!Ref->getType()->isScalarType())
11498 continue;
11499 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11500 if (!DRE)
11501 continue;
11502 NeedToCheckForLPCs.insert(DRE->getDecl());
11503 }
11504 }
11505 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11506 for (const Expr *Ref : C->varlists()) {
11507 if (!Ref->getType()->isScalarType())
11508 continue;
11509 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11510 if (!DRE)
11511 continue;
11512 NeedToCheckForLPCs.insert(DRE->getDecl());
11513 }
11514 }
11515 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11516 for (const Expr *Ref : C->varlists()) {
11517 if (!Ref->getType()->isScalarType())
11518 continue;
11519 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11520 if (!DRE)
11521 continue;
11522 NeedToCheckForLPCs.insert(DRE->getDecl());
11523 }
11524 }
11525 for (const Decl *VD : NeedToCheckForLPCs) {
11526 for (const LastprivateConditionalData &Data :
11527 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11528 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
11529 if (!Data.Disabled)
11530 NeedToAddForLPCsAsDisabled.insert(V: VD);
11531 break;
11532 }
11533 }
11534 }
11535}
11536
11537CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11538 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11539 : CGM(CGF.CGM),
11540 Action((CGM.getLangOpts().OpenMP >= 50 &&
11541 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
11542 P: [](const OMPLastprivateClause *C) {
11543 return C->getKind() ==
11544 OMPC_LASTPRIVATE_conditional;
11545 }))
11546 ? ActionToDo::PushAsLastprivateConditional
11547 : ActionToDo::DoNotPush) {
11548 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11549 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11550 return;
11551 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11552 "Expected a push action.");
11553 LastprivateConditionalData &Data =
11554 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11555 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11556 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11557 continue;
11558
11559 for (const Expr *Ref : C->varlists()) {
11560 Data.DeclToUniqueName.insert(std::make_pair(
11561 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11562 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11563 }
11564 }
11565 Data.IVLVal = IVLVal;
11566 Data.Fn = CGF.CurFn;
11567}
11568
11569CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11570 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11571 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11572 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11573 if (CGM.getLangOpts().OpenMP < 50)
11574 return;
11575 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11576 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11577 if (!NeedToAddForLPCsAsDisabled.empty()) {
11578 Action = ActionToDo::DisableLastprivateConditional;
11579 LastprivateConditionalData &Data =
11580 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11581 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11582 Data.DeclToUniqueName.insert(KV: std::make_pair(x&: VD, y: SmallString<16>()));
11583 Data.Fn = CGF.CurFn;
11584 Data.Disabled = true;
11585 }
11586}
11587
11588CGOpenMPRuntime::LastprivateConditionalRAII
11589CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11590 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11591 return LastprivateConditionalRAII(CGF, S);
11592}
11593
11594CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11595 if (CGM.getLangOpts().OpenMP < 50)
11596 return;
11597 if (Action == ActionToDo::DisableLastprivateConditional) {
11598 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11599 "Expected list of disabled private vars.");
11600 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11601 }
11602 if (Action == ActionToDo::PushAsLastprivateConditional) {
11603 assert(
11604 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11605 "Expected list of lastprivate conditional vars.");
11606 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11607 }
11608}
11609
11610Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11611 const VarDecl *VD) {
11612 ASTContext &C = CGM.getContext();
11613 auto I = LastprivateConditionalToTypes.find(Val: CGF.CurFn);
11614 if (I == LastprivateConditionalToTypes.end())
11615 I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
11616 QualType NewType;
11617 const FieldDecl *VDField;
11618 const FieldDecl *FiredField;
11619 LValue BaseLVal;
11620 auto VI = I->getSecond().find(VD);
11621 if (VI == I->getSecond().end()) {
11622 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
11623 RD->startDefinition();
11624 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11625 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11626 RD->completeDefinition();
11627 NewType = C.getRecordType(Decl: RD);
11628 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11629 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
11630 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11631 } else {
11632 NewType = std::get<0>(VI->getSecond());
11633 VDField = std::get<1>(VI->getSecond());
11634 FiredField = std::get<2>(VI->getSecond());
11635 BaseLVal = std::get<3>(VI->getSecond());
11636 }
11637 LValue FiredLVal =
11638 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
11639 CGF.EmitStoreOfScalar(
11640 llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
11641 FiredLVal);
11642 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress(CGF);
11643}
11644
11645namespace {
11646/// Checks if the lastprivate conditional variable is referenced in LHS.
11647class LastprivateConditionalRefChecker final
11648 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11649 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11650 const Expr *FoundE = nullptr;
11651 const Decl *FoundD = nullptr;
11652 StringRef UniqueDeclName;
11653 LValue IVLVal;
11654 llvm::Function *FoundFn = nullptr;
11655 SourceLocation Loc;
11656
11657public:
11658 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11659 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11660 llvm::reverse(C&: LPM)) {
11661 auto It = D.DeclToUniqueName.find(E->getDecl());
11662 if (It == D.DeclToUniqueName.end())
11663 continue;
11664 if (D.Disabled)
11665 return false;
11666 FoundE = E;
11667 FoundD = E->getDecl()->getCanonicalDecl();
11668 UniqueDeclName = It->second;
11669 IVLVal = D.IVLVal;
11670 FoundFn = D.Fn;
11671 break;
11672 }
11673 return FoundE == E;
11674 }
11675 bool VisitMemberExpr(const MemberExpr *E) {
11676 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
11677 return false;
11678 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11679 llvm::reverse(C&: LPM)) {
11680 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11681 if (It == D.DeclToUniqueName.end())
11682 continue;
11683 if (D.Disabled)
11684 return false;
11685 FoundE = E;
11686 FoundD = E->getMemberDecl()->getCanonicalDecl();
11687 UniqueDeclName = It->second;
11688 IVLVal = D.IVLVal;
11689 FoundFn = D.Fn;
11690 break;
11691 }
11692 return FoundE == E;
11693 }
11694 bool VisitStmt(const Stmt *S) {
11695 for (const Stmt *Child : S->children()) {
11696 if (!Child)
11697 continue;
11698 if (const auto *E = dyn_cast<Expr>(Val: Child))
11699 if (!E->isGLValue())
11700 continue;
11701 if (Visit(Child))
11702 return true;
11703 }
11704 return false;
11705 }
11706 explicit LastprivateConditionalRefChecker(
11707 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11708 : LPM(LPM) {}
11709 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11710 getFoundData() const {
11711 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11712 }
11713};
11714} // namespace
11715
11716void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11717 LValue IVLVal,
11718 StringRef UniqueDeclName,
11719 LValue LVal,
11720 SourceLocation Loc) {
11721 // Last updated loop counter for the lastprivate conditional var.
11722 // int<xx> last_iv = 0;
11723 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
11724 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11725 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
11726 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
11727 IVLVal.getAlignment().getAsAlign());
11728 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(V: LastIV, T: IVLVal.getType());
11729
11730 // Last value of the lastprivate conditional.
11731 // decltype(priv_a) last_a;
11732 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11733 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
11734 Last->setAlignment(LVal.getAlignment().getAsAlign());
11735 LValue LastLVal = CGF.MakeAddrLValue(
11736 Addr: Address(Last, Last->getValueType(), LVal.getAlignment()), T: LVal.getType());
11737
11738 // Global loop counter. Required to handle inner parallel-for regions.
11739 // iv
11740 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
11741
11742 // #pragma omp critical(a)
11743 // if (last_iv <= iv) {
11744 // last_iv = iv;
11745 // last_a = priv_a;
11746 // }
11747 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11748 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11749 Action.Enter(CGF);
11750 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
11751 // (last_iv <= iv) ? Check if the variable is updated and store new
11752 // value in global var.
11753 llvm::Value *CmpRes;
11754 if (IVLVal.getType()->isSignedIntegerType()) {
11755 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
11756 } else {
11757 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11758 "Loop iteration variable must be integer.");
11759 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
11760 }
11761 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
11762 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
11763 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
11764 // {
11765 CGF.EmitBlock(BB: ThenBB);
11766
11767 // last_iv = iv;
11768 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
11769
11770 // last_a = priv_a;
11771 switch (CGF.getEvaluationKind(T: LVal.getType())) {
11772 case TEK_Scalar: {
11773 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
11774 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
11775 break;
11776 }
11777 case TEK_Complex: {
11778 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
11779 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
11780 break;
11781 }
11782 case TEK_Aggregate:
11783 llvm_unreachable(
11784 "Aggregates are not supported in lastprivate conditional.");
11785 }
11786 // }
11787 CGF.EmitBranch(Block: ExitBB);
11788 // There is no need to emit line number for unconditional branch.
11789 (void)ApplyDebugLocation::CreateEmpty(CGF);
11790 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
11791 };
11792
11793 if (CGM.getLangOpts().OpenMPSimd) {
11794 // Do not emit as a critical region as no parallel region could be emitted.
11795 RegionCodeGenTy ThenRCG(CodeGen);
11796 ThenRCG(CGF);
11797 } else {
11798 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
11799 }
11800}
11801
11802void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11803 const Expr *LHS) {
11804 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11805 return;
11806 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11807 if (!Checker.Visit(LHS))
11808 return;
11809 const Expr *FoundE;
11810 const Decl *FoundD;
11811 StringRef UniqueDeclName;
11812 LValue IVLVal;
11813 llvm::Function *FoundFn;
11814 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
11815 Checker.getFoundData();
11816 if (FoundFn != CGF.CurFn) {
11817 // Special codegen for inner parallel regions.
11818 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11819 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
11820 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11821 "Lastprivate conditional is not found in outer region.");
11822 QualType StructTy = std::get<0>(t&: It->getSecond());
11823 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
11824 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
11825 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11826 Addr: PrivLVal.getAddress(CGF),
11827 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
11828 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
11829 LValue BaseLVal =
11830 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
11831 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
11832 CGF.EmitAtomicStore(RValue::get(V: llvm::ConstantInt::get(
11833 CGF.ConvertTypeForMem(T: FiredDecl->getType()), 1)),
11834 FiredLVal, llvm::AtomicOrdering::Unordered,
11835 /*IsVolatile=*/true, /*isInit=*/false);
11836 return;
11837 }
11838
11839 // Private address of the lastprivate conditional in the current context.
11840 // priv_a
11841 LValue LVal = CGF.EmitLValue(E: FoundE);
11842 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11843 Loc: FoundE->getExprLoc());
11844}
11845
11846void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11847 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11848 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11849 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11850 return;
11851 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
11852 auto It = llvm::find_if(
11853 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
11854 if (It == Range.end() || It->Fn != CGF.CurFn)
11855 return;
11856 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
11857 assert(LPCI != LastprivateConditionalToTypes.end() &&
11858 "Lastprivates must be registered already.");
11859 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11860 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11861 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11862 for (const auto &Pair : It->DeclToUniqueName) {
11863 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
11864 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
11865 continue;
11866 auto I = LPCI->getSecond().find(Val: Pair.first);
11867 assert(I != LPCI->getSecond().end() &&
11868 "Lastprivate must be rehistered already.");
11869 // bool Cmp = priv_a.Fired != 0;
11870 LValue BaseLVal = std::get<3>(t&: I->getSecond());
11871 LValue FiredLVal =
11872 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
11873 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
11874 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
11875 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
11876 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
11877 // if (Cmp) {
11878 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
11879 CGF.EmitBlock(BB: ThenBB);
11880 Address Addr = CGF.GetAddrOfLocalVar(VD);
11881 LValue LVal;
11882 if (VD->getType()->isReferenceType())
11883 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11884 AlignmentSource::Decl);
11885 else
11886 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11887 AlignmentSource::Decl);
11888 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
11889 Loc: D.getBeginLoc());
11890 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11891 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
11892 // }
11893 }
11894}
11895
11896void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11897 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11898 SourceLocation Loc) {
11899 if (CGF.getLangOpts().OpenMP < 50)
11900 return;
11901 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11902 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11903 "Unknown lastprivate conditional variable.");
11904 StringRef UniqueName = It->second;
11905 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
11906 // The variable was not updated in the region - exit.
11907 if (!GV)
11908 return;
11909 LValue LPLVal = CGF.MakeAddrLValue(
11910 Addr: Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11911 T: PrivLVal.getType().getNonReferenceType());
11912 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
11913 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
11914}
11915
11916llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11917 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11918 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11919 const RegionCodeGenTy &CodeGen) {
11920 llvm_unreachable("Not supported in SIMD-only mode");
11921}
11922
11923llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11924 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11925 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11926 const RegionCodeGenTy &CodeGen) {
11927 llvm_unreachable("Not supported in SIMD-only mode");
11928}
11929
11930llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11931 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11932 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11933 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11934 bool Tied, unsigned &NumberOfParts) {
11935 llvm_unreachable("Not supported in SIMD-only mode");
11936}
11937
11938void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11939 SourceLocation Loc,
11940 llvm::Function *OutlinedFn,
11941 ArrayRef<llvm::Value *> CapturedVars,
11942 const Expr *IfCond,
11943 llvm::Value *NumThreads) {
11944 llvm_unreachable("Not supported in SIMD-only mode");
11945}
11946
11947void CGOpenMPSIMDRuntime::emitCriticalRegion(
11948 CodeGenFunction &CGF, StringRef CriticalName,
11949 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11950 const Expr *Hint) {
11951 llvm_unreachable("Not supported in SIMD-only mode");
11952}
11953
11954void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11955 const RegionCodeGenTy &MasterOpGen,
11956 SourceLocation Loc) {
11957 llvm_unreachable("Not supported in SIMD-only mode");
11958}
11959
11960void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11961 const RegionCodeGenTy &MasterOpGen,
11962 SourceLocation Loc,
11963 const Expr *Filter) {
11964 llvm_unreachable("Not supported in SIMD-only mode");
11965}
11966
11967void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11968 SourceLocation Loc) {
11969 llvm_unreachable("Not supported in SIMD-only mode");
11970}
11971
11972void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11973 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11974 SourceLocation Loc) {
11975 llvm_unreachable("Not supported in SIMD-only mode");
11976}
11977
11978void CGOpenMPSIMDRuntime::emitSingleRegion(
11979 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11980 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11981 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11982 ArrayRef<const Expr *> AssignmentOps) {
11983 llvm_unreachable("Not supported in SIMD-only mode");
11984}
11985
11986void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11987 const RegionCodeGenTy &OrderedOpGen,
11988 SourceLocation Loc,
11989 bool IsThreads) {
11990 llvm_unreachable("Not supported in SIMD-only mode");
11991}
11992
11993void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11994 SourceLocation Loc,
11995 OpenMPDirectiveKind Kind,
11996 bool EmitChecks,
11997 bool ForceSimpleCall) {
11998 llvm_unreachable("Not supported in SIMD-only mode");
11999}
12000
12001void CGOpenMPSIMDRuntime::emitForDispatchInit(
12002 CodeGenFunction &CGF, SourceLocation Loc,
12003 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12004 bool Ordered, const DispatchRTInput &DispatchValues) {
12005 llvm_unreachable("Not supported in SIMD-only mode");
12006}
12007
12008void CGOpenMPSIMDRuntime::emitForStaticInit(
12009 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12010 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12011 llvm_unreachable("Not supported in SIMD-only mode");
12012}
12013
12014void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12015 CodeGenFunction &CGF, SourceLocation Loc,
12016 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12017 llvm_unreachable("Not supported in SIMD-only mode");
12018}
12019
12020void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12021 SourceLocation Loc,
12022 unsigned IVSize,
12023 bool IVSigned) {
12024 llvm_unreachable("Not supported in SIMD-only mode");
12025}
12026
12027void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12028 SourceLocation Loc,
12029 OpenMPDirectiveKind DKind) {
12030 llvm_unreachable("Not supported in SIMD-only mode");
12031}
12032
12033llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12034 SourceLocation Loc,
12035 unsigned IVSize, bool IVSigned,
12036 Address IL, Address LB,
12037 Address UB, Address ST) {
12038 llvm_unreachable("Not supported in SIMD-only mode");
12039}
12040
12041void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12042 llvm::Value *NumThreads,
12043 SourceLocation Loc) {
12044 llvm_unreachable("Not supported in SIMD-only mode");
12045}
12046
12047void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12048 ProcBindKind ProcBind,
12049 SourceLocation Loc) {
12050 llvm_unreachable("Not supported in SIMD-only mode");
12051}
12052
12053Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12054 const VarDecl *VD,
12055 Address VDAddr,
12056 SourceLocation Loc) {
12057 llvm_unreachable("Not supported in SIMD-only mode");
12058}
12059
12060llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12061 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12062 CodeGenFunction *CGF) {
12063 llvm_unreachable("Not supported in SIMD-only mode");
12064}
12065
12066Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12067 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12068 llvm_unreachable("Not supported in SIMD-only mode");
12069}
12070
12071void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12072 ArrayRef<const Expr *> Vars,
12073 SourceLocation Loc,
12074 llvm::AtomicOrdering AO) {
12075 llvm_unreachable("Not supported in SIMD-only mode");
12076}
12077
12078void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12079 const OMPExecutableDirective &D,
12080 llvm::Function *TaskFunction,
12081 QualType SharedsTy, Address Shareds,
12082 const Expr *IfCond,
12083 const OMPTaskDataTy &Data) {
12084 llvm_unreachable("Not supported in SIMD-only mode");
12085}
12086
12087void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12088 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12089 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12090 const Expr *IfCond, const OMPTaskDataTy &Data) {
12091 llvm_unreachable("Not supported in SIMD-only mode");
12092}
12093
12094void CGOpenMPSIMDRuntime::emitReduction(
12095 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12096 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12097 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12098 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12099 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12100 ReductionOps, Options);
12101}
12102
12103llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12104 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12105 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12106 llvm_unreachable("Not supported in SIMD-only mode");
12107}
12108
12109void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12110 SourceLocation Loc,
12111 bool IsWorksharingReduction) {
12112 llvm_unreachable("Not supported in SIMD-only mode");
12113}
12114
12115void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12116 SourceLocation Loc,
12117 ReductionCodeGen &RCG,
12118 unsigned N) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12120}
12121
12122Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12123 SourceLocation Loc,
12124 llvm::Value *ReductionsPtr,
12125 LValue SharedLVal) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12127}
12128
12129void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12130 SourceLocation Loc,
12131 const OMPTaskDataTy &Data) {
12132 llvm_unreachable("Not supported in SIMD-only mode");
12133}
12134
12135void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12136 CodeGenFunction &CGF, SourceLocation Loc,
12137 OpenMPDirectiveKind CancelRegion) {
12138 llvm_unreachable("Not supported in SIMD-only mode");
12139}
12140
12141void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12142 SourceLocation Loc, const Expr *IfCond,
12143 OpenMPDirectiveKind CancelRegion) {
12144 llvm_unreachable("Not supported in SIMD-only mode");
12145}
12146
12147void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12148 const OMPExecutableDirective &D, StringRef ParentName,
12149 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12150 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12151 llvm_unreachable("Not supported in SIMD-only mode");
12152}
12153
12154void CGOpenMPSIMDRuntime::emitTargetCall(
12155 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12156 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12157 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12158 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12159 const OMPLoopDirective &D)>
12160 SizeEmitter) {
12161 llvm_unreachable("Not supported in SIMD-only mode");
12162}
12163
12164bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12165 llvm_unreachable("Not supported in SIMD-only mode");
12166}
12167
12168bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12169 llvm_unreachable("Not supported in SIMD-only mode");
12170}
12171
12172bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12173 return false;
12174}
12175
12176void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12177 const OMPExecutableDirective &D,
12178 SourceLocation Loc,
12179 llvm::Function *OutlinedFn,
12180 ArrayRef<llvm::Value *> CapturedVars) {
12181 llvm_unreachable("Not supported in SIMD-only mode");
12182}
12183
12184void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12185 const Expr *NumTeams,
12186 const Expr *ThreadLimit,
12187 SourceLocation Loc) {
12188 llvm_unreachable("Not supported in SIMD-only mode");
12189}
12190
12191void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12192 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12193 const Expr *Device, const RegionCodeGenTy &CodeGen,
12194 CGOpenMPRuntime::TargetDataInfo &Info) {
12195 llvm_unreachable("Not supported in SIMD-only mode");
12196}
12197
12198void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12199 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12200 const Expr *Device) {
12201 llvm_unreachable("Not supported in SIMD-only mode");
12202}
12203
12204void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12205 const OMPLoopDirective &D,
12206 ArrayRef<Expr *> NumIterations) {
12207 llvm_unreachable("Not supported in SIMD-only mode");
12208}
12209
12210void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12211 const OMPDependClause *C) {
12212 llvm_unreachable("Not supported in SIMD-only mode");
12213}
12214
12215void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12216 const OMPDoacrossClause *C) {
12217 llvm_unreachable("Not supported in SIMD-only mode");
12218}
12219
12220const VarDecl *
12221CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12222 const VarDecl *NativeParam) const {
12223 llvm_unreachable("Not supported in SIMD-only mode");
12224}
12225
12226Address
12227CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12228 const VarDecl *NativeParam,
12229 const VarDecl *TargetParam) const {
12230 llvm_unreachable("Not supported in SIMD-only mode");
12231}
12232

source code of clang/lib/CodeGen/CGOpenMPRuntime.cpp