1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "clang/AST/APValue.h"
19#include "clang/AST/Attr.h"
20#include "clang/AST/Decl.h"
21#include "clang/AST/OpenMPClause.h"
22#include "clang/AST/StmtOpenMP.h"
23#include "clang/AST/StmtVisitor.h"
24#include "clang/Basic/BitmaskEnum.h"
25#include "clang/Basic/FileManager.h"
26#include "clang/Basic/OpenMPKinds.h"
27#include "clang/Basic/SourceManager.h"
28#include "clang/CodeGen/ConstantInitBuilder.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/SetOperations.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Bitcode/BitcodeReader.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/GlobalValue.h"
36#include "llvm/IR/Value.h"
37#include "llvm/Support/AtomicOrdering.h"
38#include "llvm/Support/Format.h"
39#include "llvm/Support/raw_ostream.h"
40#include <cassert>
41#include <numeric>
42
43using namespace clang;
44using namespace CodeGen;
45using namespace llvm::omp;
46
47namespace {
48/// Base class for handling code generation inside OpenMP regions.
49class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50public:
51 /// Kinds of OpenMP regions used in codegen.
52 enum CGOpenMPRegionKind {
53 /// Region with outlined function for standalone 'parallel'
54 /// directive.
55 ParallelOutlinedRegion,
56 /// Region with outlined function for standalone 'task' directive.
57 TaskOutlinedRegion,
58 /// Region for constructs that do not require function outlining,
59 /// like 'for', 'sections', 'atomic' etc. directives.
60 InlinedRegion,
61 /// Region with outlined function for standalone 'target' directive.
62 TargetRegion,
63 };
64
65 CGOpenMPRegionInfo(const CapturedStmt &CS,
66 const CGOpenMPRegionKind RegionKind,
67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68 bool HasCancel)
69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71
72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74 bool HasCancel)
75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76 Kind(Kind), HasCancel(HasCancel) {}
77
78 /// Get a variable or parameter for storing global thread id
79 /// inside OpenMP construct.
80 virtual const VarDecl *getThreadIDVariable() const = 0;
81
82 /// Emit the captured statement body.
83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84
85 /// Get an LValue for the current ThreadID variable.
86 /// \return LValue for thread id variable. This LValue always has type int32*.
87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88
89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90
91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92
93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94
95 bool hasCancel() const { return HasCancel; }
96
97 static bool classof(const CGCapturedStmtInfo *Info) {
98 return Info->getKind() == CR_OpenMP;
99 }
100
101 ~CGOpenMPRegionInfo() override = default;
102
103protected:
104 CGOpenMPRegionKind RegionKind;
105 RegionCodeGenTy CodeGen;
106 OpenMPDirectiveKind Kind;
107 bool HasCancel;
108};
109
110/// API for captured statement code generation in OpenMP constructs.
111class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112public:
113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114 const RegionCodeGenTy &CodeGen,
115 OpenMPDirectiveKind Kind, bool HasCancel,
116 StringRef HelperName)
117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118 HasCancel),
119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121 }
122
123 /// Get a variable or parameter for storing global thread id
124 /// inside OpenMP construct.
125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126
127 /// Get the name of the capture helper.
128 StringRef getHelperName() const override { return HelperName; }
129
130 static bool classof(const CGCapturedStmtInfo *Info) {
131 return CGOpenMPRegionInfo::classof(Info) &&
132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133 ParallelOutlinedRegion;
134 }
135
136private:
137 /// A variable or parameter storing global thread id for OpenMP
138 /// constructs.
139 const VarDecl *ThreadIDVar;
140 StringRef HelperName;
141};
142
143/// API for captured statement code generation in OpenMP constructs.
144class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145public:
146 class UntiedTaskActionTy final : public PrePostActionTy {
147 bool Untied;
148 const VarDecl *PartIDVar;
149 const RegionCodeGenTy UntiedCodeGen;
150 llvm::SwitchInst *UntiedSwitch = nullptr;
151
152 public:
153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154 const RegionCodeGenTy &UntiedCodeGen)
155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156 void Enter(CodeGenFunction &CGF) override {
157 if (Untied) {
158 // Emit task switching point.
159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160 CGF.GetAddrOfLocalVar(PartIDVar),
161 PartIDVar->getType()->castAs<PointerType>());
162 llvm::Value *Res =
163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166 CGF.EmitBlock(DoneBB);
167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170 CGF.Builder.GetInsertBlock());
171 emitUntiedSwitch(CGF);
172 }
173 }
174 void emitUntiedSwitch(CodeGenFunction &CGF) const {
175 if (Untied) {
176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177 CGF.GetAddrOfLocalVar(PartIDVar),
178 PartIDVar->getType()->castAs<PointerType>());
179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180 PartIdLVal);
181 UntiedCodeGen(CGF);
182 CodeGenFunction::JumpDest CurPoint =
183 CGF.getJumpDestInCurrentScope(".untied.next.");
184 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187 CGF.Builder.GetInsertBlock());
188 CGF.EmitBranchThroughCleanup(CurPoint);
189 CGF.EmitBlock(CurPoint.getBlock());
190 }
191 }
192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193 };
194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195 const VarDecl *ThreadIDVar,
196 const RegionCodeGenTy &CodeGen,
197 OpenMPDirectiveKind Kind, bool HasCancel,
198 const UntiedTaskActionTy &Action)
199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200 ThreadIDVar(ThreadIDVar), Action(Action) {
201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202 }
203
204 /// Get a variable or parameter for storing global thread id
205 /// inside OpenMP construct.
206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207
208 /// Get an LValue for the current ThreadID variable.
209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210
211 /// Get the name of the capture helper.
212 StringRef getHelperName() const override { return ".omp_outlined."; }
213
214 void emitUntiedSwitch(CodeGenFunction &CGF) override {
215 Action.emitUntiedSwitch(CGF);
216 }
217
218 static bool classof(const CGCapturedStmtInfo *Info) {
219 return CGOpenMPRegionInfo::classof(Info) &&
220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221 TaskOutlinedRegion;
222 }
223
224private:
225 /// A variable or parameter storing global thread id for OpenMP
226 /// constructs.
227 const VarDecl *ThreadIDVar;
228 /// Action for emitting code for untied tasks.
229 const UntiedTaskActionTy &Action;
230};
231
232/// API for inlined captured statement code generation in OpenMP
233/// constructs.
234class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235public:
236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237 const RegionCodeGenTy &CodeGen,
238 OpenMPDirectiveKind Kind, bool HasCancel)
239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240 OldCSI(OldCSI),
241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242
243 // Retrieve the value of the context parameter.
244 llvm::Value *getContextValue() const override {
245 if (OuterRegionInfo)
246 return OuterRegionInfo->getContextValue();
247 llvm_unreachable("No context value for inlined OpenMP region");
248 }
249
250 void setContextValue(llvm::Value *V) override {
251 if (OuterRegionInfo) {
252 OuterRegionInfo->setContextValue(V);
253 return;
254 }
255 llvm_unreachable("No context value for inlined OpenMP region");
256 }
257
258 /// Lookup the captured field decl for a variable.
259 const FieldDecl *lookup(const VarDecl *VD) const override {
260 if (OuterRegionInfo)
261 return OuterRegionInfo->lookup(VD);
262 // If there is no outer outlined region,no need to lookup in a list of
263 // captured variables, we can use the original one.
264 return nullptr;
265 }
266
267 FieldDecl *getThisFieldDecl() const override {
268 if (OuterRegionInfo)
269 return OuterRegionInfo->getThisFieldDecl();
270 return nullptr;
271 }
272
273 /// Get a variable or parameter for storing global thread id
274 /// inside OpenMP construct.
275 const VarDecl *getThreadIDVariable() const override {
276 if (OuterRegionInfo)
277 return OuterRegionInfo->getThreadIDVariable();
278 return nullptr;
279 }
280
281 /// Get an LValue for the current ThreadID variable.
282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283 if (OuterRegionInfo)
284 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285 llvm_unreachable("No LValue for inlined OpenMP construct");
286 }
287
288 /// Get the name of the capture helper.
289 StringRef getHelperName() const override {
290 if (auto *OuterRegionInfo = getOldCSI())
291 return OuterRegionInfo->getHelperName();
292 llvm_unreachable("No helper name for inlined OpenMP construct");
293 }
294
295 void emitUntiedSwitch(CodeGenFunction &CGF) override {
296 if (OuterRegionInfo)
297 OuterRegionInfo->emitUntiedSwitch(CGF);
298 }
299
300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301
302 static bool classof(const CGCapturedStmtInfo *Info) {
303 return CGOpenMPRegionInfo::classof(Info) &&
304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305 }
306
307 ~CGOpenMPInlinedRegionInfo() override = default;
308
309private:
310 /// CodeGen info about outer OpenMP region.
311 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312 CGOpenMPRegionInfo *OuterRegionInfo;
313};
314
315/// API for captured statement code generation in OpenMP target
316/// constructs. For this captures, implicit parameters are used instead of the
317/// captured fields. The name of the target region has to be unique in a given
318/// application so it is provided by the client, because only the client has
319/// the information to generate that.
320class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321public:
322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323 const RegionCodeGenTy &CodeGen, StringRef HelperName)
324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325 /*HasCancel=*/false),
326 HelperName(HelperName) {}
327
328 /// This is unused for target regions because each starts executing
329 /// with a single thread.
330 const VarDecl *getThreadIDVariable() const override { return nullptr; }
331
332 /// Get the name of the capture helper.
333 StringRef getHelperName() const override { return HelperName; }
334
335 static bool classof(const CGCapturedStmtInfo *Info) {
336 return CGOpenMPRegionInfo::classof(Info) &&
337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338 }
339
340private:
341 StringRef HelperName;
342};
343
344static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345 llvm_unreachable("No codegen for expressions");
346}
347/// API for generation of expressions captured in a innermost OpenMP
348/// region.
349class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350public:
351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353 OMPD_unknown,
354 /*HasCancel=*/false),
355 PrivScope(CGF) {
356 // Make sure the globals captured in the provided statement are local by
357 // using the privatization logic. We assume the same variable is not
358 // captured more than once.
359 for (const auto &C : CS.captures()) {
360 if (!C.capturesVariable() && !C.capturesVariableByCopy())
361 continue;
362
363 const VarDecl *VD = C.getCapturedVar();
364 if (VD->isLocalVarDeclOrParm())
365 continue;
366
367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368 /*RefersToEnclosingVariableOrCapture=*/false,
369 VD->getType().getNonReferenceType(), VK_LValue,
370 C.getLocation());
371 PrivScope.addPrivate(
372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373 }
374 (void)PrivScope.Privatize();
375 }
376
377 /// Lookup the captured field decl for a variable.
378 const FieldDecl *lookup(const VarDecl *VD) const override {
379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380 return FD;
381 return nullptr;
382 }
383
384 /// Emit the captured statement body.
385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386 llvm_unreachable("No body for expressions");
387 }
388
389 /// Get a variable or parameter for storing global thread id
390 /// inside OpenMP construct.
391 const VarDecl *getThreadIDVariable() const override {
392 llvm_unreachable("No thread id for expressions");
393 }
394
395 /// Get the name of the capture helper.
396 StringRef getHelperName() const override {
397 llvm_unreachable("No helper name for expressions");
398 }
399
400 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401
402private:
403 /// Private scope to capture global variables.
404 CodeGenFunction::OMPPrivateScope PrivScope;
405};
406
407/// RAII for emitting code of OpenMP constructs.
408class InlinedOpenMPRegionRAII {
409 CodeGenFunction &CGF;
410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411 FieldDecl *LambdaThisCaptureField = nullptr;
412 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 bool NoInheritance = false;
414
415public:
416 /// Constructs region for combined constructs.
417 /// \param CodeGen Code generation sequence for combined directives. Includes
418 /// a list of functions used for code generation of implicitly inlined
419 /// regions.
420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421 OpenMPDirectiveKind Kind, bool HasCancel,
422 bool NoInheritance = true)
423 : CGF(CGF), NoInheritance(NoInheritance) {
424 // Start emission for the construct.
425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427 if (NoInheritance) {
428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430 CGF.LambdaThisCaptureField = nullptr;
431 BlockInfo = CGF.BlockInfo;
432 CGF.BlockInfo = nullptr;
433 }
434 }
435
436 ~InlinedOpenMPRegionRAII() {
437 // Restore original CapturedStmtInfo only if we're done with code emission.
438 auto *OldCSI =
439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440 delete CGF.CapturedStmtInfo;
441 CGF.CapturedStmtInfo = OldCSI;
442 if (NoInheritance) {
443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445 CGF.BlockInfo = BlockInfo;
446 }
447 }
448};
449
450/// Values for bit flags used in the ident_t to describe the fields.
451/// All enumeric elements are named and described in accordance with the code
452/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453enum OpenMPLocationFlags : unsigned {
454 /// Use trampoline for internal microtask.
455 OMP_IDENT_IMD = 0x01,
456 /// Use c-style ident structure.
457 OMP_IDENT_KMPC = 0x02,
458 /// Atomic reduction option for kmpc_reduce.
459 OMP_ATOMIC_REDUCE = 0x10,
460 /// Explicit 'barrier' directive.
461 OMP_IDENT_BARRIER_EXPL = 0x20,
462 /// Implicit barrier in code.
463 OMP_IDENT_BARRIER_IMPL = 0x40,
464 /// Implicit barrier in 'for' directive.
465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466 /// Implicit barrier in 'sections' directive.
467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468 /// Implicit barrier in 'single' directive.
469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470 /// Call of __kmp_for_static_init for static loop.
471 OMP_IDENT_WORK_LOOP = 0x200,
472 /// Call of __kmp_for_static_init for sections.
473 OMP_IDENT_WORK_SECTIONS = 0x400,
474 /// Call of __kmp_for_static_init for distribute.
475 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477};
478
479namespace {
480LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481/// Values for bit flags for marking which requires clauses have been used.
482enum OpenMPOffloadingRequiresDirFlags : int64_t {
483 /// flag undefined.
484 OMP_REQ_UNDEFINED = 0x000,
485 /// no requires clause present.
486 OMP_REQ_NONE = 0x001,
487 /// reverse_offload clause.
488 OMP_REQ_REVERSE_OFFLOAD = 0x002,
489 /// unified_address clause.
490 OMP_REQ_UNIFIED_ADDRESS = 0x004,
491 /// unified_shared_memory clause.
492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
493 /// dynamic_allocators clause.
494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496};
497
498enum OpenMPOffloadingReservedDeviceIDs {
499 /// Device ID if the device was not defined, runtime should get it
500 /// from environment variables in the spec.
501 OMP_DEVICEID_UNDEF = -1,
502};
503} // anonymous namespace
504
505/// Describes ident structure that describes a source location.
506/// All descriptions are taken from
507/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508/// Original structure:
509/// typedef struct ident {
510/// kmp_int32 reserved_1; /**< might be used in Fortran;
511/// see above */
512/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
513/// KMP_IDENT_KMPC identifies this union
514/// member */
515/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
516/// see above */
517///#if USE_ITT_BUILD
518/// /* but currently used for storing
519/// region-specific ITT */
520/// /* contextual information. */
521///#endif /* USE_ITT_BUILD */
522/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
523/// C++ */
524/// char const *psource; /**< String describing the source location.
525/// The string is composed of semi-colon separated
526// fields which describe the source file,
527/// the function and a pair of line numbers that
528/// delimit the construct.
529/// */
530/// } ident_t;
531enum IdentFieldIndex {
532 /// might be used in Fortran
533 IdentField_Reserved_1,
534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535 IdentField_Flags,
536 /// Not really used in Fortran any more
537 IdentField_Reserved_2,
538 /// Source[4] in Fortran, do not use for C++
539 IdentField_Reserved_3,
540 /// String describing the source location. The string is composed of
541 /// semi-colon separated fields which describe the source file, the function
542 /// and a pair of line numbers that delimit the construct.
543 IdentField_PSource
544};
545
546/// Schedule types for 'omp for' loops (these enumerators are taken from
547/// the enum sched_type in kmp.h).
548enum OpenMPSchedType {
549 /// Lower bound for default (unordered) versions.
550 OMP_sch_lower = 32,
551 OMP_sch_static_chunked = 33,
552 OMP_sch_static = 34,
553 OMP_sch_dynamic_chunked = 35,
554 OMP_sch_guided_chunked = 36,
555 OMP_sch_runtime = 37,
556 OMP_sch_auto = 38,
557 /// static with chunk adjustment (e.g., simd)
558 OMP_sch_static_balanced_chunked = 45,
559 /// Lower bound for 'ordered' versions.
560 OMP_ord_lower = 64,
561 OMP_ord_static_chunked = 65,
562 OMP_ord_static = 66,
563 OMP_ord_dynamic_chunked = 67,
564 OMP_ord_guided_chunked = 68,
565 OMP_ord_runtime = 69,
566 OMP_ord_auto = 70,
567 OMP_sch_default = OMP_sch_static,
568 /// dist_schedule types
569 OMP_dist_sch_static_chunked = 91,
570 OMP_dist_sch_static = 92,
571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572 /// Set if the monotonic schedule modifier was present.
573 OMP_sch_modifier_monotonic = (1 << 29),
574 /// Set if the nonmonotonic schedule modifier was present.
575 OMP_sch_modifier_nonmonotonic = (1 << 30),
576};
577
578/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579/// region.
580class CleanupTy final : public EHScopeStack::Cleanup {
581 PrePostActionTy *Action;
582
583public:
584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586 if (!CGF.HaveInsertPoint())
587 return;
588 Action->Exit(CGF);
589 }
590};
591
592} // anonymous namespace
593
594void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595 CodeGenFunction::RunCleanupsScope Scope(CGF);
596 if (PrePostAction) {
597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598 Callback(CodeGen, CGF, *PrePostAction);
599 } else {
600 PrePostActionTy Action;
601 Callback(CodeGen, CGF, Action);
602 }
603}
604
605/// Check if the combiner is a call to UDR combiner and if it is so return the
606/// UDR decl used for reduction.
607static const OMPDeclareReductionDecl *
608getReductionInit(const Expr *ReductionOp) {
609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611 if (const auto *DRE =
612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614 return DRD;
615 return nullptr;
616}
617
618static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619 const OMPDeclareReductionDecl *DRD,
620 const Expr *InitOp,
621 Address Private, Address Original,
622 QualType Ty) {
623 if (DRD->getInitializer()) {
624 std::pair<llvm::Function *, llvm::Function *> Reduction =
625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626 const auto *CE = cast<CallExpr>(InitOp);
627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630 const auto *LHSDRE =
631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632 const auto *RHSDRE =
633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636 [=]() { return Private; });
637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638 [=]() { return Original; });
639 (void)PrivateScope.Privatize();
640 RValue Func = RValue::get(Reduction.second);
641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642 CGF.EmitIgnoredExpr(InitOp);
643 } else {
644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646 auto *GV = new llvm::GlobalVariable(
647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648 llvm::GlobalValue::PrivateLinkage, Init, Name);
649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650 RValue InitRVal;
651 switch (CGF.getEvaluationKind(Ty)) {
652 case TEK_Scalar:
653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654 break;
655 case TEK_Complex:
656 InitRVal =
657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658 break;
659 case TEK_Aggregate: {
660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663 /*IsInitializer=*/false);
664 return;
665 }
666 }
667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670 /*IsInitializer=*/false);
671 }
672}
673
674/// Emit initialization of arrays of complex types.
675/// \param DestAddr Address of the array.
676/// \param Type Type of array.
677/// \param Init Initial expression of array.
678/// \param SrcAddr Address of the original array.
679static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680 QualType Type, bool EmitDeclareReductionInit,
681 const Expr *Init,
682 const OMPDeclareReductionDecl *DRD,
683 Address SrcAddr = Address::invalid()) {
684 // Perform element-by-element initialization.
685 QualType ElementTy;
686
687 // Drill down to the base element type on both arrays.
688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690 DestAddr =
691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692 if (DRD)
693 SrcAddr =
694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695
696 llvm::Value *SrcBegin = nullptr;
697 if (DRD)
698 SrcBegin = SrcAddr.getPointer();
699 llvm::Value *DestBegin = DestAddr.getPointer();
700 // Cast from pointer to array type to pointer to single element.
701 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
702 // The basic structure here is a while-do loop.
703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705 llvm::Value *IsEmpty =
706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708
709 // Enter the loop body, making that address the current address.
710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711 CGF.EmitBlock(BodyBB);
712
713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714
715 llvm::PHINode *SrcElementPHI = nullptr;
716 Address SrcElementCurrent = Address::invalid();
717 if (DRD) {
718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719 "omp.arraycpy.srcElementPast");
720 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721 SrcElementCurrent =
722 Address(SrcElementPHI,
723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724 }
725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727 DestElementPHI->addIncoming(DestBegin, EntryBB);
728 Address DestElementCurrent =
729 Address(DestElementPHI,
730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731
732 // Emit copy.
733 {
734 CodeGenFunction::RunCleanupsScope InitScope(CGF);
735 if (EmitDeclareReductionInit) {
736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737 SrcElementCurrent, ElementTy);
738 } else
739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740 /*IsInitializer=*/false);
741 }
742
743 if (DRD) {
744 // Shift the address forward by one element.
745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748 }
749
750 // Shift the address forward by one element.
751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
753 // Check whether we've reached the end.
754 llvm::Value *Done =
755 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
756 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
757 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
758
759 // Done.
760 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
761}
762
763LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
764 return CGF.EmitOMPSharedLValue(E);
765}
766
767LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
768 const Expr *E) {
769 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
770 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
771 return LValue();
772}
773
774void ReductionCodeGen::emitAggregateInitialization(
775 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
776 const OMPDeclareReductionDecl *DRD) {
777 // Emit VarDecl with copy init for arrays.
778 // Get the address of the original variable captured in current
779 // captured region.
780 const auto *PrivateVD =
781 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
782 bool EmitDeclareReductionInit =
783 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
784 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
785 EmitDeclareReductionInit,
786 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
787 : PrivateVD->getInit(),
788 DRD, SharedLVal.getAddress(CGF));
789}
790
791ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
792 ArrayRef<const Expr *> Origs,
793 ArrayRef<const Expr *> Privates,
794 ArrayRef<const Expr *> ReductionOps) {
795 ClausesData.reserve(Shareds.size());
796 SharedAddresses.reserve(Shareds.size());
797 Sizes.reserve(Shareds.size());
798 BaseDecls.reserve(Shareds.size());
799 const auto *IOrig = Origs.begin();
800 const auto *IPriv = Privates.begin();
801 const auto *IRed = ReductionOps.begin();
802 for (const Expr *Ref : Shareds) {
803 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
804 std::advance(IOrig, 1);
805 std::advance(IPriv, 1);
806 std::advance(IRed, 1);
807 }
808}
809
810void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
811 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
812 "Number of generated lvalues must be exactly N.");
813 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
814 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
815 SharedAddresses.emplace_back(First, Second);
816 if (ClausesData[N].Shared == ClausesData[N].Ref) {
817 OrigAddresses.emplace_back(First, Second);
818 } else {
819 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
820 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
821 OrigAddresses.emplace_back(First, Second);
822 }
823}
824
825void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
826 const auto *PrivateVD =
827 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
828 QualType PrivateType = PrivateVD->getType();
829 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
830 if (!PrivateType->isVariablyModifiedType()) {
831 Sizes.emplace_back(
832 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
833 nullptr);
834 return;
835 }
836 llvm::Value *Size;
837 llvm::Value *SizeInChars;
838 auto *ElemType =
839 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
840 ->getElementType();
841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842 if (AsArraySection) {
843 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
844 OrigAddresses[N].first.getPointer(CGF));
845 Size = CGF.Builder.CreateNUWAdd(
846 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
847 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
848 } else {
849 SizeInChars =
850 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
851 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
852 }
853 Sizes.emplace_back(SizeInChars, Size);
854 CodeGenFunction::OpaqueValueMapping OpaqueMap(
855 CGF,
856 cast<OpaqueValueExpr>(
857 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
858 RValue::get(Size));
859 CGF.EmitVariablyModifiedType(PrivateType);
860}
861
862void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
863 llvm::Value *Size) {
864 const auto *PrivateVD =
865 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
866 QualType PrivateType = PrivateVD->getType();
867 if (!PrivateType->isVariablyModifiedType()) {
868 assert(!Size && !Sizes[N].second &&
869 "Size should be nullptr for non-variably modified reduction "
870 "items.");
871 return;
872 }
873 CodeGenFunction::OpaqueValueMapping OpaqueMap(
874 CGF,
875 cast<OpaqueValueExpr>(
876 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
877 RValue::get(Size));
878 CGF.EmitVariablyModifiedType(PrivateType);
879}
880
881void ReductionCodeGen::emitInitialization(
882 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
883 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
884 assert(SharedAddresses.size() > N && "No variable was generated");
885 const auto *PrivateVD =
886 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
887 const OMPDeclareReductionDecl *DRD =
888 getReductionInit(ClausesData[N].ReductionOp);
889 QualType PrivateType = PrivateVD->getType();
890 PrivateAddr = CGF.Builder.CreateElementBitCast(
891 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
892 QualType SharedType = SharedAddresses[N].first.getType();
893 SharedLVal = CGF.MakeAddrLValue(
894 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
895 CGF.ConvertTypeForMem(SharedType)),
896 SharedType, SharedAddresses[N].first.getBaseInfo(),
897 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
898 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
899 if (DRD && DRD->getInitializer())
900 (void)DefaultInit(CGF);
901 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
902 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
903 (void)DefaultInit(CGF);
904 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
905 PrivateAddr, SharedLVal.getAddress(CGF),
906 SharedLVal.getType());
907 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
908 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
909 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
910 PrivateVD->getType().getQualifiers(),
911 /*IsInitializer=*/false);
912 }
913}
914
915bool ReductionCodeGen::needCleanups(unsigned N) {
916 const auto *PrivateVD =
917 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
918 QualType PrivateType = PrivateVD->getType();
919 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
920 return DTorKind != QualType::DK_none;
921}
922
923void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
924 Address PrivateAddr) {
925 const auto *PrivateVD =
926 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
927 QualType PrivateType = PrivateVD->getType();
928 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
929 if (needCleanups(N)) {
930 PrivateAddr = CGF.Builder.CreateElementBitCast(
931 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
932 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
933 }
934}
935
936static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
937 LValue BaseLV) {
938 BaseTy = BaseTy.getNonReferenceType();
939 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
940 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
941 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
942 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
943 } else {
944 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
945 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
946 }
947 BaseTy = BaseTy->getPointeeType();
948 }
949 return CGF.MakeAddrLValue(
950 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
951 CGF.ConvertTypeForMem(ElTy)),
952 BaseLV.getType(), BaseLV.getBaseInfo(),
953 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
954}
955
956static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
957 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
958 llvm::Value *Addr) {
959 Address Tmp = Address::invalid();
960 Address TopTmp = Address::invalid();
961 Address MostTopTmp = Address::invalid();
962 BaseTy = BaseTy.getNonReferenceType();
963 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
964 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
965 Tmp = CGF.CreateMemTemp(BaseTy);
966 if (TopTmp.isValid())
967 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
968 else
969 MostTopTmp = Tmp;
970 TopTmp = Tmp;
971 BaseTy = BaseTy->getPointeeType();
972 }
973 llvm::Type *Ty = BaseLVType;
974 if (Tmp.isValid())
975 Ty = Tmp.getElementType();
976 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
977 if (Tmp.isValid()) {
978 CGF.Builder.CreateStore(Addr, Tmp);
979 return MostTopTmp;
980 }
981 return Address(Addr, BaseLVAlignment);
982}
983
984static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
985 const VarDecl *OrigVD = nullptr;
986 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
987 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
988 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
989 Base = TempOASE->getBase()->IgnoreParenImpCasts();
990 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
991 Base = TempASE->getBase()->IgnoreParenImpCasts();
992 DE = cast<DeclRefExpr>(Base);
993 OrigVD = cast<VarDecl>(DE->getDecl());
994 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
995 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
996 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
997 Base = TempASE->getBase()->IgnoreParenImpCasts();
998 DE = cast<DeclRefExpr>(Base);
999 OrigVD = cast<VarDecl>(DE->getDecl());
1000 }
1001 return OrigVD;
1002}
1003
1004Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1005 Address PrivateAddr) {
1006 const DeclRefExpr *DE;
1007 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1008 BaseDecls.emplace_back(OrigVD);
1009 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1010 LValue BaseLValue =
1011 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1012 OriginalBaseLValue);
1013 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1014 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1015 llvm::Value *PrivatePointer =
1016 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1017 PrivateAddr.getPointer(),
1018 SharedAddresses[N].first.getAddress(CGF).getType());
1019 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1020 return castToBase(CGF, OrigVD->getType(),
1021 SharedAddresses[N].first.getType(),
1022 OriginalBaseLValue.getAddress(CGF).getType(),
1023 OriginalBaseLValue.getAlignment(), Ptr);
1024 }
1025 BaseDecls.emplace_back(
1026 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1027 return PrivateAddr;
1028}
1029
1030bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1031 const OMPDeclareReductionDecl *DRD =
1032 getReductionInit(ClausesData[N].ReductionOp);
1033 return DRD && DRD->getInitializer();
1034}
1035
1036LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1037 return CGF.EmitLoadOfPointerLValue(
1038 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1039 getThreadIDVariable()->getType()->castAs<PointerType>());
1040}
1041
1042void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1043 if (!CGF.HaveInsertPoint())
1044 return;
1045 // 1.2.2 OpenMP Language Terminology
1046 // Structured block - An executable statement with a single entry at the
1047 // top and a single exit at the bottom.
1048 // The point of exit cannot be a branch out of the structured block.
1049 // longjmp() and throw() must not violate the entry/exit criteria.
1050 CGF.EHStack.pushTerminate();
1051 if (S)
1052 CGF.incrementProfileCounter(S);
1053 CodeGen(CGF);
1054 CGF.EHStack.popTerminate();
1055}
1056
1057LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1058 CodeGenFunction &CGF) {
1059 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1060 getThreadIDVariable()->getType(),
1061 AlignmentSource::Decl);
1062}
1063
1064static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1065 QualType FieldTy) {
1066 auto *Field = FieldDecl::Create(
1067 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1068 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1069 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1070 Field->setAccess(AS_public);
1071 DC->addDecl(Field);
1072 return Field;
1073}
1074
1075CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1076 StringRef Separator)
1077 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1078 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1079 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1080
1081 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1082 OMPBuilder.initialize();
1083 loadOffloadInfoMetadata();
1084}
1085
1086void CGOpenMPRuntime::clear() {
1087 InternalVars.clear();
1088 // Clean non-target variable declarations possibly used only in debug info.
1089 for (const auto &Data : EmittedNonTargetVariables) {
1090 if (!Data.getValue().pointsToAliveValue())
1091 continue;
1092 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1093 if (!GV)
1094 continue;
1095 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1096 continue;
1097 GV->eraseFromParent();
1098 }
1099}
1100
1101std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1102 SmallString<128> Buffer;
1103 llvm::raw_svector_ostream OS(Buffer);
1104 StringRef Sep = FirstSeparator;
1105 for (StringRef Part : Parts) {
1106 OS << Sep << Part;
1107 Sep = Separator;
1108 }
1109 return std::string(OS.str());
1110}
1111
1112static llvm::Function *
1113emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1114 const Expr *CombinerInitializer, const VarDecl *In,
1115 const VarDecl *Out, bool IsCombiner) {
1116 // void .omp_combiner.(Ty *in, Ty *out);
1117 ASTContext &C = CGM.getContext();
1118 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1119 FunctionArgList Args;
1120 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1121 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1122 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124 Args.push_back(&OmpOutParm);
1125 Args.push_back(&OmpInParm);
1126 const CGFunctionInfo &FnInfo =
1127 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1128 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1129 std::string Name = CGM.getOpenMPRuntime().getName(
1130 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1131 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1132 Name, &CGM.getModule());
1133 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1134 if (CGM.getLangOpts().Optimize) {
1135 Fn->removeFnAttr(llvm::Attribute::NoInline);
1136 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1137 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1138 }
1139 CodeGenFunction CGF(CGM);
1140 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1141 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1142 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1143 Out->getLocation());
1144 CodeGenFunction::OMPPrivateScope Scope(CGF);
1145 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1146 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1147 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1148 .getAddress(CGF);
1149 });
1150 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1151 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1152 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1153 .getAddress(CGF);
1154 });
1155 (void)Scope.Privatize();
1156 if (!IsCombiner && Out->hasInit() &&
1157 !CGF.isTrivialInitializer(Out->getInit())) {
1158 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1159 Out->getType().getQualifiers(),
1160 /*IsInitializer=*/true);
1161 }
1162 if (CombinerInitializer)
1163 CGF.EmitIgnoredExpr(CombinerInitializer);
1164 Scope.ForceCleanup();
1165 CGF.FinishFunction();
1166 return Fn;
1167}
1168
1169void CGOpenMPRuntime::emitUserDefinedReduction(
1170 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1171 if (UDRMap.count(D) > 0)
1172 return;
1173 llvm::Function *Combiner = emitCombinerOrInitializer(
1174 CGM, D->getType(), D->getCombiner(),
1175 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1176 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1177 /*IsCombiner=*/true);
1178 llvm::Function *Initializer = nullptr;
1179 if (const Expr *Init = D->getInitializer()) {
1180 Initializer = emitCombinerOrInitializer(
1181 CGM, D->getType(),
1182 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1183 : nullptr,
1184 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1185 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1186 /*IsCombiner=*/false);
1187 }
1188 UDRMap.try_emplace(D, Combiner, Initializer);
1189 if (CGF) {
1190 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1191 Decls.second.push_back(D);
1192 }
1193}
1194
1195std::pair<llvm::Function *, llvm::Function *>
1196CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1197 auto I = UDRMap.find(D);
1198 if (I != UDRMap.end())
1199 return I->second;
1200 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1201 return UDRMap.lookup(D);
1202}
1203
1204namespace {
1205// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1206// Builder if one is present.
1207struct PushAndPopStackRAII {
1208 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1209 bool HasCancel, llvm::omp::Directive Kind)
1210 : OMPBuilder(OMPBuilder) {
1211 if (!OMPBuilder)
1212 return;
1213
1214 // The following callback is the crucial part of clangs cleanup process.
1215 //
1216 // NOTE:
1217 // Once the OpenMPIRBuilder is used to create parallel regions (and
1218 // similar), the cancellation destination (Dest below) is determined via
1219 // IP. That means if we have variables to finalize we split the block at IP,
1220 // use the new block (=BB) as destination to build a JumpDest (via
1221 // getJumpDestInCurrentScope(BB)) which then is fed to
1222 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1223 // to push & pop an FinalizationInfo object.
1224 // The FiniCB will still be needed but at the point where the
1225 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1226 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1227 assert(IP.getBlock()->end() == IP.getPoint() &&
1228 "Clang CG should cause non-terminated block!");
1229 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1230 CGF.Builder.restoreIP(IP);
1231 CodeGenFunction::JumpDest Dest =
1232 CGF.getOMPCancelDestination(OMPD_parallel);
1233 CGF.EmitBranchThroughCleanup(Dest);
1234 };
1235
1236 // TODO: Remove this once we emit parallel regions through the
1237 // OpenMPIRBuilder as it can do this setup internally.
1238 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1239 OMPBuilder->pushFinalizationCB(std::move(FI));
1240 }
1241 ~PushAndPopStackRAII() {
1242 if (OMPBuilder)
1243 OMPBuilder->popFinalizationCB();
1244 }
1245 llvm::OpenMPIRBuilder *OMPBuilder;
1246};
1247} // namespace
1248
1249static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1250 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1251 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1252 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1253 assert(ThreadIDVar->getType()->isPointerType() &&
1254 "thread id variable must be of type kmp_int32 *");
1255 CodeGenFunction CGF(CGM, true);
1256 bool HasCancel = false;
1257 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1258 HasCancel = OPD->hasCancel();
1259 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1260 HasCancel = OPD->hasCancel();
1261 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1262 HasCancel = OPSD->hasCancel();
1263 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1264 HasCancel = OPFD->hasCancel();
1265 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1266 HasCancel = OPFD->hasCancel();
1267 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1268 HasCancel = OPFD->hasCancel();
1269 else if (const auto *OPFD =
1270 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1271 HasCancel = OPFD->hasCancel();
1272 else if (const auto *OPFD =
1273 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1274 HasCancel = OPFD->hasCancel();
1275
1276 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1277 // parallel region to make cancellation barriers work properly.
1278 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1279 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1280 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1281 HasCancel, OutlinedHelperName);
1282 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1283 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1284}
1285
1286llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1287 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1289 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1290 return emitParallelOrTeamsOutlinedFunction(
1291 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1292}
1293
1294llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1297 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1298 return emitParallelOrTeamsOutlinedFunction(
1299 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1300}
1301
1302llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1303 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1304 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1305 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1306 bool Tied, unsigned &NumberOfParts) {
1307 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1308 PrePostActionTy &) {
1309 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1310 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1311 llvm::Value *TaskArgs[] = {
1312 UpLoc, ThreadID,
1313 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1314 TaskTVar->getType()->castAs<PointerType>())
1315 .getPointer(CGF)};
1316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317 CGM.getModule(), OMPRTL___kmpc_omp_task),
1318 TaskArgs);
1319 };
1320 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1321 UntiedCodeGen);
1322 CodeGen.setAction(Action);
1323 assert(!ThreadIDVar->getType()->isPointerType() &&
1324 "thread id variable must be of type kmp_int32 for tasks");
1325 const OpenMPDirectiveKind Region =
1326 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1327 : OMPD_task;
1328 const CapturedStmt *CS = D.getCapturedStmt(Region);
1329 bool HasCancel = false;
1330 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1331 HasCancel = TD->hasCancel();
1332 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1333 HasCancel = TD->hasCancel();
1334 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1335 HasCancel = TD->hasCancel();
1336 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1337 HasCancel = TD->hasCancel();
1338
1339 CodeGenFunction CGF(CGM, true);
1340 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1341 InnermostKind, HasCancel, Action);
1342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1343 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1344 if (!Tied)
1345 NumberOfParts = Action.getNumberOfParts();
1346 return Res;
1347}
1348
1349static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1350 const RecordDecl *RD, const CGRecordLayout &RL,
1351 ArrayRef<llvm::Constant *> Data) {
1352 llvm::StructType *StructTy = RL.getLLVMType();
1353 unsigned PrevIdx = 0;
1354 ConstantInitBuilder CIBuilder(CGM);
1355 auto DI = Data.begin();
1356 for (const FieldDecl *FD : RD->fields()) {
1357 unsigned Idx = RL.getLLVMFieldNo(FD);
1358 // Fill the alignment.
1359 for (unsigned I = PrevIdx; I < Idx; ++I)
1360 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1361 PrevIdx = Idx + 1;
1362 Fields.add(*DI);
1363 ++DI;
1364 }
1365}
1366
1367template <class... As>
1368static llvm::GlobalVariable *
1369createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1370 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1371 As &&... Args) {
1372 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1373 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1374 ConstantInitBuilder CIBuilder(CGM);
1375 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1376 buildStructValue(Fields, CGM, RD, RL, Data);
1377 return Fields.finishAndCreateGlobal(
1378 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1379 std::forward<As>(Args)...);
1380}
1381
1382template <typename T>
1383static void
1384createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1385 ArrayRef<llvm::Constant *> Data,
1386 T &Parent) {
1387 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1388 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1389 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1390 buildStructValue(Fields, CGM, RD, RL, Data);
1391 Fields.finishAndAddTo(Parent);
1392}
1393
1394void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1395 bool AtCurrentPoint) {
1396 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1397 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1398
1399 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1400 if (AtCurrentPoint) {
1401 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1402 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1403 } else {
1404 Elem.second.ServiceInsertPt =
1405 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1406 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1407 }
1408}
1409
1410void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1412 if (Elem.second.ServiceInsertPt) {
1413 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1414 Elem.second.ServiceInsertPt = nullptr;
1415 Ptr->eraseFromParent();
1416 }
1417}
1418
1419static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1420 SourceLocation Loc,
1421 SmallString<128> &Buffer) {
1422 llvm::raw_svector_ostream OS(Buffer);
1423 // Build debug location
1424 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1425 OS << ";" << PLoc.getFilename() << ";";
1426 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1427 OS << FD->getQualifiedNameAsString();
1428 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1429 return OS.str();
1430}
1431
1432llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1433 SourceLocation Loc,
1434 unsigned Flags) {
1435 llvm::Constant *SrcLocStr;
1436 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1437 Loc.isInvalid()) {
1438 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1439 } else {
1440 std::string FunctionName = "";
1441 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1442 FunctionName = FD->getQualifiedNameAsString();
1443 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1444 const char *FileName = PLoc.getFilename();
1445 unsigned Line = PLoc.getLine();
1446 unsigned Column = PLoc.getColumn();
1447 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1448 Line, Column);
1449 }
1450 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1451 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1452 Reserved2Flags);
1453}
1454
1455llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1456 SourceLocation Loc) {
1457 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1458 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1459 // the clang invariants used below might be broken.
1460 if (CGM.getLangOpts().OpenMPIRBuilder) {
1461 SmallString<128> Buffer;
1462 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1465 return OMPBuilder.getOrCreateThreadID(
1466 OMPBuilder.getOrCreateIdent(SrcLocStr));
1467 }
1468
1469 llvm::Value *ThreadID = nullptr;
1470 // Check whether we've already cached a load of the thread id in this
1471 // function.
1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473 if (I != OpenMPLocThreadIDMap.end()) {
1474 ThreadID = I->second.ThreadID;
1475 if (ThreadID != nullptr)
1476 return ThreadID;
1477 }
1478 // If exceptions are enabled, do not use parameter to avoid possible crash.
1479 if (auto *OMPRegionInfo =
1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481 if (OMPRegionInfo->getThreadIDVariable()) {
1482 // Check if this an outlined function with thread id passed as argument.
1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486 !CGF.getLangOpts().CXXExceptions ||
1487 CGF.Builder.GetInsertBlock() == TopBlock ||
1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490 TopBlock ||
1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492 CGF.Builder.GetInsertBlock()) {
1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494 // If value loaded in entry block, cache it and use it everywhere in
1495 // function.
1496 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498 Elem.second.ThreadID = ThreadID;
1499 }
1500 return ThreadID;
1501 }
1502 }
1503 }
1504
1505 // This is not an outlined function region - need to call __kmpc_int32
1506 // kmpc_global_thread_num(ident_t *loc).
1507 // Generate thread id value and cache this value for use across the
1508 // function.
1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510 if (!Elem.second.ServiceInsertPt)
1511 setLocThreadIdInsertPt(CGF);
1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514 llvm::CallInst *Call = CGF.Builder.CreateCall(
1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516 OMPRTL___kmpc_global_thread_num),
1517 emitUpdateLocation(CGF, Loc));
1518 Call->setCallingConv(CGF.getRuntimeCC());
1519 Elem.second.ThreadID = Call;
1520 return Call;
1521}
1522
1523void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526 clearLocThreadIdInsertPt(CGF);
1527 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528 }
1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531 UDRMap.erase(D);
1532 FunctionUDRMap.erase(CGF.CurFn);
1533 }
1534 auto I = FunctionUDMMap.find(CGF.CurFn);
1535 if (I != FunctionUDMMap.end()) {
1536 for(const auto *D : I->second)
1537 UDMMap.erase(D);
1538 FunctionUDMMap.erase(I);
1539 }
1540 LastprivateConditionalToTypes.erase(CGF.CurFn);
1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542}
1543
1544llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545 return OMPBuilder.IdentPtr;
1546}
1547
1548llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549 if (!Kmpc_MicroTy) {
1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554 }
1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556}
1557
1558llvm::FunctionCallee
1559CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1560 assert((IVSize == 32 || IVSize == 64) &&
1561 "IV size is not compatible with the omp runtime");
1562 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1563 : "__kmpc_for_static_init_4u")
1564 : (IVSigned ? "__kmpc_for_static_init_8"
1565 : "__kmpc_for_static_init_8u");
1566 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1567 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1568 llvm::Type *TypeParams[] = {
1569 getIdentTyPointerTy(), // loc
1570 CGM.Int32Ty, // tid
1571 CGM.Int32Ty, // schedtype
1572 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1573 PtrTy, // p_lower
1574 PtrTy, // p_upper
1575 PtrTy, // p_stride
1576 ITy, // incr
1577 ITy // chunk
1578 };
1579 auto *FnTy =
1580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581 return CGM.CreateRuntimeFunction(FnTy, Name);
1582}
1583
1584llvm::FunctionCallee
1585CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1586 assert((IVSize == 32 || IVSize == 64) &&
1587 "IV size is not compatible with the omp runtime");
1588 StringRef Name =
1589 IVSize == 32
1590 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1591 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1592 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1593 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1594 CGM.Int32Ty, // tid
1595 CGM.Int32Ty, // schedtype
1596 ITy, // lower
1597 ITy, // upper
1598 ITy, // stride
1599 ITy // chunk
1600 };
1601 auto *FnTy =
1602 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1603 return CGM.CreateRuntimeFunction(FnTy, Name);
1604}
1605
1606llvm::FunctionCallee
1607CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1608 assert((IVSize == 32 || IVSize == 64) &&
1609 "IV size is not compatible with the omp runtime");
1610 StringRef Name =
1611 IVSize == 32
1612 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1613 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1614 llvm::Type *TypeParams[] = {
1615 getIdentTyPointerTy(), // loc
1616 CGM.Int32Ty, // tid
1617 };
1618 auto *FnTy =
1619 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1620 return CGM.CreateRuntimeFunction(FnTy, Name);
1621}
1622
1623llvm::FunctionCallee
1624CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1625 assert((IVSize == 32 || IVSize == 64) &&
1626 "IV size is not compatible with the omp runtime");
1627 StringRef Name =
1628 IVSize == 32
1629 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1630 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1631 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1632 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1633 llvm::Type *TypeParams[] = {
1634 getIdentTyPointerTy(), // loc
1635 CGM.Int32Ty, // tid
1636 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1637 PtrTy, // p_lower
1638 PtrTy, // p_upper
1639 PtrTy // p_stride
1640 };
1641 auto *FnTy =
1642 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1643 return CGM.CreateRuntimeFunction(FnTy, Name);
1644}
1645
1646/// Obtain information that uniquely identifies a target entry. This
1647/// consists of the file and device IDs as well as line number associated with
1648/// the relevant entry source location.
1649static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1650 unsigned &DeviceID, unsigned &FileID,
1651 unsigned &LineNum) {
1652 SourceManager &SM = C.getSourceManager();
1653
1654 // The loc should be always valid and have a file ID (the user cannot use
1655 // #pragma directives in macros)
1656
1657 assert(Loc.isValid() && "Source location is expected to be always valid.");
1658
1659 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1660 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1661
1662 llvm::sys::fs::UniqueID ID;
1663 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1664 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1665 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1667 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1668 << PLoc.getFilename() << EC.message();
1669 }
1670
1671 DeviceID = ID.getDevice();
1672 FileID = ID.getFile();
1673 LineNum = PLoc.getLine();
1674}
1675
1676Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1677 if (CGM.getLangOpts().OpenMPSimd)
1678 return Address::invalid();
1679 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1680 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1681 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1682 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1683 HasRequiresUnifiedSharedMemory))) {
1684 SmallString<64> PtrName;
1685 {
1686 llvm::raw_svector_ostream OS(PtrName);
1687 OS << CGM.getMangledName(GlobalDecl(VD));
1688 if (!VD->isExternallyVisible()) {
1689 unsigned DeviceID, FileID, Line;
1690 getTargetEntryUniqueInfo(CGM.getContext(),
1691 VD->getCanonicalDecl()->getBeginLoc(),
1692 DeviceID, FileID, Line);
1693 OS << llvm::format("_%x", FileID);
1694 }
1695 OS << "_decl_tgt_ref_ptr";
1696 }
1697 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1698 if (!Ptr) {
1699 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1700 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1701 PtrName);
1702
1703 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1704 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1705
1706 if (!CGM.getLangOpts().OpenMPIsDevice)
1707 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1708 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1709 }
1710 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1711 }
1712 return Address::invalid();
1713}
1714
1715llvm::Constant *
1716CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1717 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1718 !CGM.getContext().getTargetInfo().isTLSSupported());
1719 // Lookup the entry, lazily creating it if necessary.
1720 std::string Suffix = getName({"cache", ""});
1721 return getOrCreateInternalVariable(
1722 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1723}
1724
1725Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1726 const VarDecl *VD,
1727 Address VDAddr,
1728 SourceLocation Loc) {
1729 if (CGM.getLangOpts().OpenMPUseTLS &&
1730 CGM.getContext().getTargetInfo().isTLSSupported())
1731 return VDAddr;
1732
1733 llvm::Type *VarTy = VDAddr.getElementType();
1734 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1735 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1736 CGM.Int8PtrTy),
1737 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1738 getOrCreateThreadPrivateCache(VD)};
1739 return Address(CGF.EmitRuntimeCall(
1740 OMPBuilder.getOrCreateRuntimeFunction(
1741 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1742 Args),
1743 VDAddr.getAlignment());
1744}
1745
1746void CGOpenMPRuntime::emitThreadPrivateVarInit(
1747 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1748 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1749 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1750 // library.
1751 llvm::