1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
17#include "llvm/Analysis/MemorySSAUpdater.h"
18#include "llvm/Frontend/OpenMP/OMPConstants.h"
19#include "llvm/IR/DebugLoc.h"
20#include "llvm/IR/IRBuilder.h"
21#include "llvm/Support/Allocator.h"
22#include "llvm/TargetParser/Triple.h"
23#include <forward_list>
24#include <map>
25#include <optional>
26
27namespace llvm {
28class CanonicalLoopInfo;
29struct TargetRegionEntryInfo;
30class OffloadEntriesInfoManager;
31class OpenMPIRBuilder;
32
33/// Move the instruction after an InsertPoint to the beginning of another
34/// BasicBlock.
35///
36/// The instructions after \p IP are moved to the beginning of \p New which must
37/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
38/// \p New will be added such that there is no semantic change. Otherwise, the
39/// \p IP insert block remains degenerate and it is up to the caller to insert a
40/// terminator.
41void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
42 bool CreateBranch);
43
44/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
45/// insert location will stick to after the instruction before the insertion
46/// point (instead of moving with the instruction the InsertPoint stores
47/// internally).
48void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
49
50/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
51/// (missing the terminator).
52///
53/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
54/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
55/// is true, a branch to the new successor will new created such that
56/// semantically there is no change; otherwise the block of the insertion point
57/// remains degenerate and it is the caller's responsibility to insert a
58/// terminator. Returns the new successor block.
59BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
60 llvm::Twine Name = {});
61
62/// Split a BasicBlock at \p Builder's insertion point, even if the block is
63/// degenerate (missing the terminator). Its new insert location will stick to
64/// after the instruction before the insertion point (instead of moving with the
65/// instruction the InsertPoint stores internally).
66BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
67 llvm::Twine Name = {});
68
69/// Split a BasicBlock at \p Builder's insertion point, even if the block is
70/// degenerate (missing the terminator). Its new insert location will stick to
71/// after the instruction before the insertion point (instead of moving with the
72/// instruction the InsertPoint stores internally).
73BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
74
75/// Like splitBB, but reuses the current block's name for the new name.
76BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
77 llvm::Twine Suffix = ".split");
78
79/// Captures attributes that affect generating LLVM-IR using the
80/// OpenMPIRBuilder and related classes. Note that not all attributes are
81/// required for all classes or functions. In some use cases the configuration
82/// is not necessary at all, because because the only functions that are called
83/// are ones that are not dependent on the configuration.
84class OpenMPIRBuilderConfig {
85public:
86 /// Flag to define whether to generate code for the role of the OpenMP host
87 /// (if set to false) or device (if set to true) in an offloading context. It
88 /// is set when the -fopenmp-is-target-device compiler frontend option is
89 /// specified.
90 std::optional<bool> IsTargetDevice;
91
92 /// Flag for specifying if the compilation is done for an accelerator. It is
93 /// set according to the architecture of the target triple and currently only
94 /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform
95 /// the role of an OpenMP target device, so `IsTargetDevice` must also be true
96 /// if `IsGPU` is true. This restriction might be lifted if an accelerator-
97 /// like target with the ability to work as the OpenMP host is added, or if
98 /// the capabilities of the currently supported GPU architectures are
99 /// expanded.
100 std::optional<bool> IsGPU;
101
102 // Flag for specifying if offloading is mandatory.
103 std::optional<bool> OpenMPOffloadMandatory;
104
105 /// First separator used between the initial two parts of a name.
106 std::optional<StringRef> FirstSeparator;
107 /// Separator used between all of the rest consecutive parts of s name
108 std::optional<StringRef> Separator;
109
110 OpenMPIRBuilderConfig();
111 OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
112 bool OpenMPOffloadMandatory,
113 bool HasRequiresReverseOffload,
114 bool HasRequiresUnifiedAddress,
115 bool HasRequiresUnifiedSharedMemory,
116 bool HasRequiresDynamicAllocators);
117
118 // Getters functions that assert if the required values are not present.
119 bool isTargetDevice() const {
120 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
121 return *IsTargetDevice;
122 }
123
124 bool isGPU() const {
125 assert(IsGPU.has_value() && "IsGPU is not set");
126 return *IsGPU;
127 }
128
129 bool openMPOffloadMandatory() const {
130 assert(OpenMPOffloadMandatory.has_value() &&
131 "OpenMPOffloadMandatory is not set");
132 return *OpenMPOffloadMandatory;
133 }
134
135 bool hasRequiresFlags() const { return RequiresFlags; }
136 bool hasRequiresReverseOffload() const;
137 bool hasRequiresUnifiedAddress() const;
138 bool hasRequiresUnifiedSharedMemory() const;
139 bool hasRequiresDynamicAllocators() const;
140
141 /// Returns requires directive clauses as flags compatible with those expected
142 /// by libomptarget.
143 int64_t getRequiresFlags() const;
144
145 // Returns the FirstSeparator if set, otherwise use the default separator
146 // depending on isGPU
147 StringRef firstSeparator() const {
148 if (FirstSeparator.has_value())
149 return *FirstSeparator;
150 if (isGPU())
151 return "_";
152 return ".";
153 }
154
155 // Returns the Separator if set, otherwise use the default separator depending
156 // on isGPU
157 StringRef separator() const {
158 if (Separator.has_value())
159 return *Separator;
160 if (isGPU())
161 return "$";
162 return ".";
163 }
164
165 void setIsTargetDevice(bool Value) { IsTargetDevice = Value; }
166 void setIsGPU(bool Value) { IsGPU = Value; }
167 void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; }
168 void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
169 void setSeparator(StringRef S) { Separator = S; }
170
171 void setHasRequiresReverseOffload(bool Value);
172 void setHasRequiresUnifiedAddress(bool Value);
173 void setHasRequiresUnifiedSharedMemory(bool Value);
174 void setHasRequiresDynamicAllocators(bool Value);
175
176private:
177 /// Flags for specifying which requires directive clauses are present.
178 int64_t RequiresFlags;
179};
180
181/// Data structure to contain the information needed to uniquely identify
182/// a target entry.
183struct TargetRegionEntryInfo {
184 std::string ParentName;
185 unsigned DeviceID;
186 unsigned FileID;
187 unsigned Line;
188 unsigned Count;
189
190 TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {}
191 TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
192 unsigned FileID, unsigned Line, unsigned Count = 0)
193 : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
194 Count(Count) {}
195
196 static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
197 StringRef ParentName,
198 unsigned DeviceID, unsigned FileID,
199 unsigned Line, unsigned Count);
200
201 bool operator<(const TargetRegionEntryInfo RHS) const {
202 return std::make_tuple(args: ParentName, args: DeviceID, args: FileID, args: Line, args: Count) <
203 std::make_tuple(args: RHS.ParentName, args: RHS.DeviceID, args: RHS.FileID, args: RHS.Line,
204 args: RHS.Count);
205 }
206};
207
208/// Class that manages information about offload code regions and data
209class OffloadEntriesInfoManager {
210 /// Number of entries registered so far.
211 OpenMPIRBuilder *OMPBuilder;
212 unsigned OffloadingEntriesNum = 0;
213
214public:
215 /// Base class of the entries info.
216 class OffloadEntryInfo {
217 public:
218 /// Kind of a given entry.
219 enum OffloadingEntryInfoKinds : unsigned {
220 /// Entry is a target region.
221 OffloadingEntryInfoTargetRegion = 0,
222 /// Entry is a declare target variable.
223 OffloadingEntryInfoDeviceGlobalVar = 1,
224 /// Invalid entry info.
225 OffloadingEntryInfoInvalid = ~0u
226 };
227
228 protected:
229 OffloadEntryInfo() = delete;
230 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
231 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
232 uint32_t Flags)
233 : Flags(Flags), Order(Order), Kind(Kind) {}
234 ~OffloadEntryInfo() = default;
235
236 public:
237 bool isValid() const { return Order != ~0u; }
238 unsigned getOrder() const { return Order; }
239 OffloadingEntryInfoKinds getKind() const { return Kind; }
240 uint32_t getFlags() const { return Flags; }
241 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
242 Constant *getAddress() const { return cast_or_null<Constant>(Val: Addr); }
243 void setAddress(Constant *V) {
244 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
245 Addr = V;
246 }
247 static bool classof(const OffloadEntryInfo *Info) { return true; }
248
249 private:
250 /// Address of the entity that has to be mapped for offloading.
251 WeakTrackingVH Addr;
252
253 /// Flags associated with the device global.
254 uint32_t Flags = 0u;
255
256 /// Order this entry was emitted.
257 unsigned Order = ~0u;
258
259 OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
260 };
261
262 /// Return true if a there are no entries defined.
263 bool empty() const;
264 /// Return number of entries defined so far.
265 unsigned size() const { return OffloadingEntriesNum; }
266
267 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
268
269 //
270 // Target region entries related.
271 //
272
273 /// Kind of the target registry entry.
274 enum OMPTargetRegionEntryKind : uint32_t {
275 /// Mark the entry as target region.
276 OMPTargetRegionEntryTargetRegion = 0x0,
277 };
278
279 /// Target region entries info.
280 class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
281 /// Address that can be used as the ID of the entry.
282 Constant *ID = nullptr;
283
284 public:
285 OffloadEntryInfoTargetRegion()
286 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
287 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
288 Constant *ID,
289 OMPTargetRegionEntryKind Flags)
290 : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
291 ID(ID) {
292 setAddress(Addr);
293 }
294
295 Constant *getID() const { return ID; }
296 void setID(Constant *V) {
297 assert(!ID && "ID has been set before!");
298 ID = V;
299 }
300 static bool classof(const OffloadEntryInfo *Info) {
301 return Info->getKind() == OffloadingEntryInfoTargetRegion;
302 }
303 };
304
305 /// Initialize target region entry.
306 /// This is ONLY needed for DEVICE compilation.
307 void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
308 unsigned Order);
309 /// Register target region entry.
310 void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
311 Constant *Addr, Constant *ID,
312 OMPTargetRegionEntryKind Flags);
313 /// Return true if a target region entry with the provided information
314 /// exists.
315 bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
316 bool IgnoreAddressId = false) const;
317
318 // Return the Name based on \a EntryInfo using the next available Count.
319 void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
320 const TargetRegionEntryInfo &EntryInfo);
321
322 /// brief Applies action \a Action on all registered entries.
323 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
324 const OffloadEntryInfoTargetRegion &)>
325 OffloadTargetRegionEntryInfoActTy;
326 void
327 actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
328
329 //
330 // Device global variable entries related.
331 //
332
333 /// Kind of the global variable entry..
334 enum OMPTargetGlobalVarEntryKind : uint32_t {
335 /// Mark the entry as a to declare target.
336 OMPTargetGlobalVarEntryTo = 0x0,
337 /// Mark the entry as a to declare target link.
338 OMPTargetGlobalVarEntryLink = 0x1,
339 /// Mark the entry as a declare target enter.
340 OMPTargetGlobalVarEntryEnter = 0x2,
341 /// Mark the entry as having no declare target entry kind.
342 OMPTargetGlobalVarEntryNone = 0x3,
343 /// Mark the entry as a declare target indirect global.
344 OMPTargetGlobalVarEntryIndirect = 0x8,
345 };
346
347 /// Kind of device clause for declare target variables
348 /// and functions
349 /// NOTE: Currently not used as a part of a variable entry
350 /// used for Flang and Clang to interface with the variable
351 /// related registration functions
352 enum OMPTargetDeviceClauseKind : uint32_t {
353 /// The target is marked for all devices
354 OMPTargetDeviceClauseAny = 0x0,
355 /// The target is marked for non-host devices
356 OMPTargetDeviceClauseNoHost = 0x1,
357 /// The target is marked for host devices
358 OMPTargetDeviceClauseHost = 0x2,
359 /// The target is marked as having no clause
360 OMPTargetDeviceClauseNone = 0x3
361 };
362
363 /// Device global variable entries info.
364 class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
365 /// Type of the global variable.
366 int64_t VarSize;
367 GlobalValue::LinkageTypes Linkage;
368 const std::string VarName;
369
370 public:
371 OffloadEntryInfoDeviceGlobalVar()
372 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
373 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
374 OMPTargetGlobalVarEntryKind Flags)
375 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
376 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
377 int64_t VarSize,
378 OMPTargetGlobalVarEntryKind Flags,
379 GlobalValue::LinkageTypes Linkage,
380 const std::string &VarName)
381 : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
382 VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
383 setAddress(Addr);
384 }
385
386 int64_t getVarSize() const { return VarSize; }
387 StringRef getVarName() const { return VarName; }
388 void setVarSize(int64_t Size) { VarSize = Size; }
389 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
390 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
391 static bool classof(const OffloadEntryInfo *Info) {
392 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
393 }
394 };
395
396 /// Initialize device global variable entry.
397 /// This is ONLY used for DEVICE compilation.
398 void initializeDeviceGlobalVarEntryInfo(StringRef Name,
399 OMPTargetGlobalVarEntryKind Flags,
400 unsigned Order);
401
402 /// Register device global variable entry.
403 void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
404 int64_t VarSize,
405 OMPTargetGlobalVarEntryKind Flags,
406 GlobalValue::LinkageTypes Linkage);
407 /// Checks if the variable with the given name has been registered already.
408 bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
409 return OffloadEntriesDeviceGlobalVar.count(Key: VarName) > 0;
410 }
411 /// Applies action \a Action on all registered entries.
412 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
413 OffloadDeviceGlobalVarEntryInfoActTy;
414 void actOnDeviceGlobalVarEntriesInfo(
415 const OffloadDeviceGlobalVarEntryInfoActTy &Action);
416
417private:
418 /// Return the count of entries at a particular source location.
419 unsigned
420 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
421
422 /// Update the count of entries at a particular source location.
423 void
424 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
425
426 static TargetRegionEntryInfo
427 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
428 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
429 EntryInfo.FileID, EntryInfo.Line, 0);
430 }
431
432 // Count of entries at a location.
433 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
434
435 // Storage for target region entries kind.
436 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
437 OffloadEntriesTargetRegionTy;
438 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
439 /// Storage for device global variable entries kind. The storage is to be
440 /// indexed by mangled name.
441 typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
442 OffloadEntriesDeviceGlobalVarTy;
443 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
444};
445
446/// An interface to create LLVM-IR for OpenMP directives.
447///
448/// Each OpenMP directive has a corresponding public generator method.
449class OpenMPIRBuilder {
450public:
451 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
452 /// not have an effect on \p M (see initialize)
453 OpenMPIRBuilder(Module &M)
454 : M(M), Builder(M.getContext()), OffloadInfoManager(this),
455 T(Triple(M.getTargetTriple())) {}
456 ~OpenMPIRBuilder();
457
458 /// Initialize the internal state, this will put structures types and
459 /// potentially other helpers into the underlying module. Must be called
460 /// before any other method and only once! This internal state includes types
461 /// used in the OpenMPIRBuilder generated from OMPKinds.def.
462 void initialize();
463
464 void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
465
466 /// Finalize the underlying module, e.g., by outlining regions.
467 /// \param Fn The function to be finalized. If not used,
468 /// all functions are finalized.
469 void finalize(Function *Fn = nullptr);
470
471 /// Add attributes known for \p FnID to \p Fn.
472 void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
473
474 /// Type used throughout for insertion points.
475 using InsertPointTy = IRBuilder<>::InsertPoint;
476
477 /// Get the create a name using the platform specific separators.
478 /// \param Parts parts of the final name that needs separation
479 /// The created name has a first separator between the first and second part
480 /// and a second separator between all other parts.
481 /// E.g. with FirstSeparator "$" and Separator "." and
482 /// parts: "p1", "p2", "p3", "p4"
483 /// The resulting name is "p1$p2.p3.p4"
484 /// The separators are retrieved from the OpenMPIRBuilderConfig.
485 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
486
487 /// Callback type for variable finalization (think destructors).
488 ///
489 /// \param CodeGenIP is the insertion point at which the finalization code
490 /// should be placed.
491 ///
492 /// A finalize callback knows about all objects that need finalization, e.g.
493 /// destruction, when the scope of the currently generated construct is left
494 /// at the time, and location, the callback is invoked.
495 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
496
497 struct FinalizationInfo {
498 /// The finalization callback provided by the last in-flight invocation of
499 /// createXXXX for the directive of kind DK.
500 FinalizeCallbackTy FiniCB;
501
502 /// The directive kind of the innermost directive that has an associated
503 /// region which might require finalization when it is left.
504 omp::Directive DK;
505
506 /// Flag to indicate if the directive is cancellable.
507 bool IsCancellable;
508 };
509
510 /// Push a finalization callback on the finalization stack.
511 ///
512 /// NOTE: Temporary solution until Clang CG is gone.
513 void pushFinalizationCB(const FinalizationInfo &FI) {
514 FinalizationStack.push_back(Elt: FI);
515 }
516
517 /// Pop the last finalization callback from the finalization stack.
518 ///
519 /// NOTE: Temporary solution until Clang CG is gone.
520 void popFinalizationCB() { FinalizationStack.pop_back(); }
521
522 /// Callback type for body (=inner region) code generation
523 ///
524 /// The callback takes code locations as arguments, each describing a
525 /// location where additional instructions can be inserted.
526 ///
527 /// The CodeGenIP may be in the middle of a basic block or point to the end of
528 /// it. The basic block may have a terminator or be degenerate. The callback
529 /// function may just insert instructions at that position, but also split the
530 /// block (without the Before argument of BasicBlock::splitBasicBlock such
531 /// that the identify of the split predecessor block is preserved) and insert
532 /// additional control flow, including branches that do not lead back to what
533 /// follows the CodeGenIP. Note that since the callback is allowed to split
534 /// the block, callers must assume that InsertPoints to positions in the
535 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
536 /// such InsertPoints need to be preserved, it can split the block itself
537 /// before calling the callback.
538 ///
539 /// AllocaIP and CodeGenIP must not point to the same position.
540 ///
541 /// \param AllocaIP is the insertion point at which new alloca instructions
542 /// should be placed. The BasicBlock it is pointing to must
543 /// not be split.
544 /// \param CodeGenIP is the insertion point at which the body code should be
545 /// placed.
546 using BodyGenCallbackTy =
547 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
548
549 // This is created primarily for sections construct as llvm::function_ref
550 // (BodyGenCallbackTy) is not storable (as described in the comments of
551 // function_ref class - function_ref contains non-ownable reference
552 // to the callable.
553 using StorableBodyGenCallbackTy =
554 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
555
556 /// Callback type for loop body code generation.
557 ///
558 /// \param CodeGenIP is the insertion point where the loop's body code must be
559 /// placed. This will be a dedicated BasicBlock with a
560 /// conditional branch from the loop condition check and
561 /// terminated with an unconditional branch to the loop
562 /// latch.
563 /// \param IndVar is the induction variable usable at the insertion point.
564 using LoopBodyGenCallbackTy =
565 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
566
567 /// Callback type for variable privatization (think copy & default
568 /// constructor).
569 ///
570 /// \param AllocaIP is the insertion point at which new alloca instructions
571 /// should be placed.
572 /// \param CodeGenIP is the insertion point at which the privatization code
573 /// should be placed.
574 /// \param Original The value being copied/created, should not be used in the
575 /// generated IR.
576 /// \param Inner The equivalent of \p Original that should be used in the
577 /// generated IR; this is equal to \p Original if the value is
578 /// a pointer and can thus be passed directly, otherwise it is
579 /// an equivalent but different value.
580 /// \param ReplVal The replacement value, thus a copy or new created version
581 /// of \p Inner.
582 ///
583 /// \returns The new insertion point where code generation continues and
584 /// \p ReplVal the replacement value.
585 using PrivatizeCallbackTy = function_ref<InsertPointTy(
586 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
587 Value &Inner, Value *&ReplVal)>;
588
589 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
590 /// (filename, line, column, ...).
591 struct LocationDescription {
592 LocationDescription(const IRBuilderBase &IRB)
593 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
594 LocationDescription(const InsertPointTy &IP) : IP(IP) {}
595 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
596 : IP(IP), DL(DL) {}
597 InsertPointTy IP;
598 DebugLoc DL;
599 };
600
601 /// Emitter methods for OpenMP directives.
602 ///
603 ///{
604
605 /// Generator for '#omp barrier'
606 ///
607 /// \param Loc The location where the barrier directive was encountered.
608 /// \param DK The kind of directive that caused the barrier.
609 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
610 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
611 /// should be checked and acted upon.
612 ///
613 /// \returns The insertion point after the barrier.
614 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
615 bool ForceSimpleCall = false,
616 bool CheckCancelFlag = true);
617
618 /// Generator for '#omp cancel'
619 ///
620 /// \param Loc The location where the directive was encountered.
621 /// \param IfCondition The evaluated 'if' clause expression, if any.
622 /// \param CanceledDirective The kind of directive that is cancled.
623 ///
624 /// \returns The insertion point after the barrier.
625 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
626 omp::Directive CanceledDirective);
627
628 /// Generator for '#omp parallel'
629 ///
630 /// \param Loc The insert and source location description.
631 /// \param AllocaIP The insertion points to be used for alloca instructions.
632 /// \param BodyGenCB Callback that will generate the region code.
633 /// \param PrivCB Callback to copy a given variable (think copy constructor).
634 /// \param FiniCB Callback to finalize variable copies.
635 /// \param IfCondition The evaluated 'if' clause expression, if any.
636 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
637 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
638 /// \param IsCancellable Flag to indicate a cancellable parallel region.
639 ///
640 /// \returns The insertion position *after* the parallel.
641 IRBuilder<>::InsertPoint
642 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
643 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
644 FinalizeCallbackTy FiniCB, Value *IfCondition,
645 Value *NumThreads, omp::ProcBindKind ProcBind,
646 bool IsCancellable);
647
648 /// Generator for the control flow structure of an OpenMP canonical loop.
649 ///
650 /// This generator operates on the logical iteration space of the loop, i.e.
651 /// the caller only has to provide a loop trip count of the loop as defined by
652 /// base language semantics. The trip count is interpreted as an unsigned
653 /// integer. The induction variable passed to \p BodyGenCB will be of the same
654 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
655 /// convert the logical iteration variable to the loop counter variable in the
656 /// loop body.
657 ///
658 /// \param Loc The insert and source location description. The insert
659 /// location can be between two instructions or the end of a
660 /// degenerate block (e.g. a BB under construction).
661 /// \param BodyGenCB Callback that will generate the loop body code.
662 /// \param TripCount Number of iterations the loop body is executed.
663 /// \param Name Base name used to derive BB and instruction names.
664 ///
665 /// \returns An object representing the created control flow structure which
666 /// can be used for loop-associated directives.
667 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
668 LoopBodyGenCallbackTy BodyGenCB,
669 Value *TripCount,
670 const Twine &Name = "loop");
671
672 /// Generator for the control flow structure of an OpenMP canonical loop.
673 ///
674 /// Instead of a logical iteration space, this allows specifying user-defined
675 /// loop counter values using increment, upper- and lower bounds. To
676 /// disambiguate the terminology when counting downwards, instead of lower
677 /// bounds we use \p Start for the loop counter value in the first body
678 /// iteration.
679 ///
680 /// Consider the following limitations:
681 ///
682 /// * A loop counter space over all integer values of its bit-width cannot be
683 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
684 /// stored into an 8 bit integer):
685 ///
686 /// DO I = 0, 255, 1
687 ///
688 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
689 /// effectively counting downwards:
690 ///
691 /// for (uint8_t i = 100u; i > 0; i += 127u)
692 ///
693 ///
694 /// TODO: May need to add additional parameters to represent:
695 ///
696 /// * Allow representing downcounting with unsigned integers.
697 ///
698 /// * Sign of the step and the comparison operator might disagree:
699 ///
700 /// for (int i = 0; i < 42; i -= 1u)
701 ///
702 //
703 /// \param Loc The insert and source location description.
704 /// \param BodyGenCB Callback that will generate the loop body code.
705 /// \param Start Value of the loop counter for the first iterations.
706 /// \param Stop Loop counter values past this will stop the loop.
707 /// \param Step Loop counter increment after each iteration; negative
708 /// means counting down.
709 /// \param IsSigned Whether Start, Stop and Step are signed integers.
710 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
711 /// counter.
712 /// \param ComputeIP Insertion point for instructions computing the trip
713 /// count. Can be used to ensure the trip count is available
714 /// at the outermost loop of a loop nest. If not set,
715 /// defaults to the preheader of the generated loop.
716 /// \param Name Base name used to derive BB and instruction names.
717 ///
718 /// \returns An object representing the created control flow structure which
719 /// can be used for loop-associated directives.
720 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
721 LoopBodyGenCallbackTy BodyGenCB,
722 Value *Start, Value *Stop, Value *Step,
723 bool IsSigned, bool InclusiveStop,
724 InsertPointTy ComputeIP = {},
725 const Twine &Name = "loop");
726
727 /// Collapse a loop nest into a single loop.
728 ///
729 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
730 /// that has the same number of innermost loop iterations as the origin loop
731 /// nest. The induction variables of the input loops are derived from the
732 /// collapsed loop's induction variable. This is intended to be used to
733 /// implement OpenMP's collapse clause. Before applying a directive,
734 /// collapseLoops normalizes a loop nest to contain only a single loop and the
735 /// directive's implementation does not need to handle multiple loops itself.
736 /// This does not remove the need to handle all loop nest handling by
737 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
738 /// modifier of the worksharing-loop directive.
739 ///
740 /// Example:
741 /// \code
742 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
743 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
744 /// body(i, j);
745 /// \endcode
746 ///
747 /// After collapsing with Loops={i,j}, the loop is changed to
748 /// \code
749 /// for (int ij = 0; ij < 63; ++ij) {
750 /// int i = ij / 9;
751 /// int j = ij % 9;
752 /// body(i, j);
753 /// }
754 /// \endcode
755 ///
756 /// In the current implementation, the following limitations apply:
757 ///
758 /// * All input loops have an induction variable of the same type.
759 ///
760 /// * The collapsed loop will have the same trip count integer type as the
761 /// input loops. Therefore it is possible that the collapsed loop cannot
762 /// represent all iterations of the input loops. For instance, assuming a
763 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
764 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
765 /// which cannot be represented in an 32-bit integer. Behavior is undefined
766 /// in this case.
767 ///
768 /// * The trip counts of every input loop must be available at \p ComputeIP.
769 /// Non-rectangular loops are not yet supported.
770 ///
771 /// * At each nest level, code between a surrounding loop and its nested loop
772 /// is hoisted into the loop body, and such code will be executed more
773 /// often than before collapsing (or not at all if any inner loop iteration
774 /// has a trip count of 0). This is permitted by the OpenMP specification.
775 ///
776 /// \param DL Debug location for instructions added for collapsing,
777 /// such as instructions to compute/derive the input loop's
778 /// induction variables.
779 /// \param Loops Loops in the loop nest to collapse. Loops are specified
780 /// from outermost-to-innermost and every control flow of a
781 /// loop's body must pass through its directly nested loop.
782 /// \param ComputeIP Where additional instruction that compute the collapsed
783 /// trip count. If not set, defaults to before the generated
784 /// loop.
785 ///
786 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
787 CanonicalLoopInfo *collapseLoops(DebugLoc DL,
788 ArrayRef<CanonicalLoopInfo *> Loops,
789 InsertPointTy ComputeIP);
790
791 /// Get the default alignment value for given target
792 ///
793 /// \param TargetTriple Target triple
794 /// \param Features StringMap which describes extra CPU features
795 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
796 const StringMap<bool> &Features);
797
798 /// Retrieve (or create if non-existent) the address of a declare
799 /// target variable, used in conjunction with registerTargetGlobalVariable
800 /// to create declare target global variables.
801 ///
802 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
803 /// clause used in conjunction with the variable being registered (link,
804 /// to, enter).
805 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
806 /// clause used in conjunction with the variable being registered (nohost,
807 /// host, any)
808 /// \param IsDeclaration - boolean stating if the variable being registered
809 /// is a declaration-only and not a definition
810 /// \param IsExternallyVisible - boolean stating if the variable is externally
811 /// visible
812 /// \param EntryInfo - Unique entry information for the value generated
813 /// using getTargetEntryUniqueInfo, used to name generated pointer references
814 /// to the declare target variable
815 /// \param MangledName - the mangled name of the variable being registered
816 /// \param GeneratedRefs - references generated by invocations of
817 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
818 /// these are required by Clang for book keeping.
819 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
820 /// \param TargetTriple - The OpenMP device target triple we are compiling
821 /// for
822 /// \param LlvmPtrTy - The type of the variable we are generating or
823 /// retrieving an address for
824 /// \param GlobalInitializer - a lambda function which creates a constant
825 /// used for initializing a pointer reference to the variable in certain
826 /// cases. If a nullptr is passed, it will default to utilising the original
827 /// variable to initialize the pointer reference.
828 /// \param VariableLinkage - a lambda function which returns the variables
829 /// linkage type, if unspecified and a nullptr is given, it will instead
830 /// utilise the linkage stored on the existing global variable in the
831 /// LLVMModule.
832 Constant *getAddrOfDeclareTargetVar(
833 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
834 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
835 bool IsDeclaration, bool IsExternallyVisible,
836 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
837 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
838 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
839 std::function<Constant *()> GlobalInitializer,
840 std::function<GlobalValue::LinkageTypes()> VariableLinkage);
841
842 /// Registers a target variable for device or host.
843 ///
844 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
845 /// clause used in conjunction with the variable being registered (link,
846 /// to, enter).
847 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
848 /// clause used in conjunction with the variable being registered (nohost,
849 /// host, any)
850 /// \param IsDeclaration - boolean stating if the variable being registered
851 /// is a declaration-only and not a definition
852 /// \param IsExternallyVisible - boolean stating if the variable is externally
853 /// visible
854 /// \param EntryInfo - Unique entry information for the value generated
855 /// using getTargetEntryUniqueInfo, used to name generated pointer references
856 /// to the declare target variable
857 /// \param MangledName - the mangled name of the variable being registered
858 /// \param GeneratedRefs - references generated by invocations of
859 /// registerTargetGlobalVariable these are required by Clang for book
860 /// keeping.
861 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
862 /// \param TargetTriple - The OpenMP device target triple we are compiling
863 /// for
864 /// \param GlobalInitializer - a lambda function which creates a constant
865 /// used for initializing a pointer reference to the variable in certain
866 /// cases. If a nullptr is passed, it will default to utilising the original
867 /// variable to initialize the pointer reference.
868 /// \param VariableLinkage - a lambda function which returns the variables
869 /// linkage type, if unspecified and a nullptr is given, it will instead
870 /// utilise the linkage stored on the existing global variable in the
871 /// LLVMModule.
872 /// \param LlvmPtrTy - The type of the variable we are generating or
873 /// retrieving an address for
874 /// \param Addr - the original llvm value (addr) of the variable to be
875 /// registered
876 void registerTargetGlobalVariable(
877 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
878 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
879 bool IsDeclaration, bool IsExternallyVisible,
880 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
881 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
882 std::vector<Triple> TargetTriple,
883 std::function<Constant *()> GlobalInitializer,
884 std::function<GlobalValue::LinkageTypes()> VariableLinkage,
885 Type *LlvmPtrTy, Constant *Addr);
886
887 /// Get the offset of the OMP_MAP_MEMBER_OF field.
888 unsigned getFlagMemberOffset();
889
890 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
891 /// the position given.
892 /// \param Position - A value indicating the position of the parent
893 /// of the member in the kernel argument structure, often retrieved
894 /// by the parents position in the combined information vectors used
895 /// to generate the structure itself. Multiple children (member's of)
896 /// with the same parent will use the same returned member flag.
897 omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position);
898
899 /// Given an initial flag set, this function modifies it to contain
900 /// the passed in MemberOfFlag generated from the getMemberOfFlag
901 /// function. The results are dependent on the existing flag bits
902 /// set in the original flag set.
903 /// \param Flags - The original set of flags to be modified with the
904 /// passed in MemberOfFlag.
905 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
906 /// slightly based on the getMemberOfFlag which adjusts the flag bits
907 /// based on the members position in its parent.
908 void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags,
909 omp::OpenMPOffloadMappingFlags MemberOfFlag);
910
911private:
912 /// Modifies the canonical loop to be a statically-scheduled workshare loop
913 /// which is executed on the device
914 ///
915 /// This takes a \p CLI representing a canonical loop, such as the one
916 /// created by \see createCanonicalLoop and emits additional instructions to
917 /// turn it into a workshare loop. In particular, it calls to an OpenMP
918 /// runtime function in the preheader to call OpenMP device rtl function
919 /// which handles worksharing of loop body interations.
920 ///
921 /// \param DL Debug location for instructions added for the
922 /// workshare-loop construct itself.
923 /// \param CLI A descriptor of the canonical loop to workshare.
924 /// \param AllocaIP An insertion point for Alloca instructions usable in the
925 /// preheader of the loop.
926 /// \param LoopType Information about type of loop worksharing.
927 /// It corresponds to type of loop workshare OpenMP pragma.
928 ///
929 /// \returns Point where to insert code after the workshare construct.
930 InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
931 InsertPointTy AllocaIP,
932 omp::WorksharingLoopType LoopType);
933
934 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
935 ///
936 /// This takes a \p LoopInfo representing a canonical loop, such as the one
937 /// created by \p createCanonicalLoop and emits additional instructions to
938 /// turn it into a workshare loop. In particular, it calls to an OpenMP
939 /// runtime function in the preheader to obtain the loop bounds to be used in
940 /// the current thread, updates the relevant instructions in the canonical
941 /// loop and calls to an OpenMP runtime finalization function after the loop.
942 ///
943 /// \param DL Debug location for instructions added for the
944 /// workshare-loop construct itself.
945 /// \param CLI A descriptor of the canonical loop to workshare.
946 /// \param AllocaIP An insertion point for Alloca instructions usable in the
947 /// preheader of the loop.
948 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
949 /// the loop.
950 ///
951 /// \returns Point where to insert code after the workshare construct.
952 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
953 InsertPointTy AllocaIP,
954 bool NeedsBarrier);
955
956 /// Modifies the canonical loop a statically-scheduled workshare loop with a
957 /// user-specified chunk size.
958 ///
959 /// \param DL Debug location for instructions added for the
960 /// workshare-loop construct itself.
961 /// \param CLI A descriptor of the canonical loop to workshare.
962 /// \param AllocaIP An insertion point for Alloca instructions usable in
963 /// the preheader of the loop.
964 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
965 /// loop.
966 /// \param ChunkSize The user-specified chunk size.
967 ///
968 /// \returns Point where to insert code after the workshare construct.
969 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
970 CanonicalLoopInfo *CLI,
971 InsertPointTy AllocaIP,
972 bool NeedsBarrier,
973 Value *ChunkSize);
974
975 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
976 ///
977 /// This takes a \p LoopInfo representing a canonical loop, such as the one
978 /// created by \p createCanonicalLoop and emits additional instructions to
979 /// turn it into a workshare loop. In particular, it calls to an OpenMP
980 /// runtime function in the preheader to obtain, and then in each iteration
981 /// to update the loop counter.
982 ///
983 /// \param DL Debug location for instructions added for the
984 /// workshare-loop construct itself.
985 /// \param CLI A descriptor of the canonical loop to workshare.
986 /// \param AllocaIP An insertion point for Alloca instructions usable in the
987 /// preheader of the loop.
988 /// \param SchedType Type of scheduling to be passed to the init function.
989 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
990 /// the loop.
991 /// \param Chunk The size of loop chunk considered as a unit when
992 /// scheduling. If \p nullptr, defaults to 1.
993 ///
994 /// \returns Point where to insert code after the workshare construct.
995 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
996 InsertPointTy AllocaIP,
997 omp::OMPScheduleType SchedType,
998 bool NeedsBarrier,
999 Value *Chunk = nullptr);
1000
1001 /// Create alternative version of the loop to support if clause
1002 ///
1003 /// OpenMP if clause can require to generate second loop. This loop
1004 /// will be executed when if clause condition is not met. createIfVersion
1005 /// adds branch instruction to the copied loop if \p ifCond is not met.
1006 ///
1007 /// \param Loop Original loop which should be versioned.
1008 /// \param IfCond Value which corresponds to if clause condition
1009 /// \param VMap Value to value map to define relation between
1010 /// original and copied loop values and loop blocks.
1011 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
1012 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
1013 ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
1014
1015public:
1016 /// Modifies the canonical loop to be a workshare loop.
1017 ///
1018 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1019 /// created by \p createCanonicalLoop and emits additional instructions to
1020 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1021 /// runtime function in the preheader to obtain the loop bounds to be used in
1022 /// the current thread, updates the relevant instructions in the canonical
1023 /// loop and calls to an OpenMP runtime finalization function after the loop.
1024 ///
1025 /// The concrete transformation is done by applyStaticWorkshareLoop,
1026 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
1027 /// on the value of \p SchedKind and \p ChunkSize.
1028 ///
1029 /// \param DL Debug location for instructions added for the
1030 /// workshare-loop construct itself.
1031 /// \param CLI A descriptor of the canonical loop to workshare.
1032 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1033 /// preheader of the loop.
1034 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1035 /// the loop.
1036 /// \param SchedKind Scheduling algorithm to use.
1037 /// \param ChunkSize The chunk size for the inner loop.
1038 /// \param HasSimdModifier Whether the simd modifier is present in the
1039 /// schedule clause.
1040 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1041 /// the schedule clause.
1042 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1043 /// present in the schedule clause.
1044 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1045 /// present.
1046 /// \param LoopType Information about type of loop worksharing.
1047 /// It corresponds to type of loop workshare OpenMP pragma.
1048 ///
1049 /// \returns Point where to insert code after the workshare construct.
1050 InsertPointTy applyWorkshareLoop(
1051 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1052 bool NeedsBarrier,
1053 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1054 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1055 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1056 bool HasOrderedClause = false,
1057 omp::WorksharingLoopType LoopType =
1058 omp::WorksharingLoopType::ForStaticLoop);
1059
1060 /// Tile a loop nest.
1061 ///
1062 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1063 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1064 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1065 /// of every loop and every tile sizes must be usable in the outermost
1066 /// loop's preheader. This implies that the loop nest is rectangular.
1067 ///
1068 /// Example:
1069 /// \code
1070 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
1071 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
1072 /// body(i, j);
1073 /// \endcode
1074 ///
1075 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1076 /// \code
1077 /// for (int i1 = 0; i1 < 3; ++i1)
1078 /// for (int j1 = 0; j1 < 2; ++j1)
1079 /// for (int i2 = 0; i2 < 5; ++i2)
1080 /// for (int j2 = 0; j2 < 7; ++j2)
1081 /// body(i1*3+i2, j1*3+j2);
1082 /// \endcode
1083 ///
1084 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1085 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1086 /// handles non-constant trip counts, non-constant tile sizes and trip counts
1087 /// that are not multiples of the tile size. In the latter case the tile loop
1088 /// of the last floor-loop iteration will have fewer iterations than specified
1089 /// as its tile size.
1090 ///
1091 ///
1092 /// @param DL Debug location for instructions added by tiling, for
1093 /// instance the floor- and tile trip count computation.
1094 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
1095 /// invalidated by this method, i.e. should not used after
1096 /// tiling.
1097 /// @param TileSizes For each loop in \p Loops, the tile size for that
1098 /// dimensions.
1099 ///
1100 /// \returns A list of generated loops. Contains twice as many loops as the
1101 /// input loop nest; the first half are the floor loops and the
1102 /// second half are the tile loops.
1103 std::vector<CanonicalLoopInfo *>
1104 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
1105 ArrayRef<Value *> TileSizes);
1106
1107 /// Fully unroll a loop.
1108 ///
1109 /// Instead of unrolling the loop immediately (and duplicating its body
1110 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1111 /// metadata.
1112 ///
1113 /// \param DL Debug location for instructions added by unrolling.
1114 /// \param Loop The loop to unroll. The loop will be invalidated.
1115 void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
1116
1117 /// Fully or partially unroll a loop. How the loop is unrolled is determined
1118 /// using LLVM's LoopUnrollPass.
1119 ///
1120 /// \param DL Debug location for instructions added by unrolling.
1121 /// \param Loop The loop to unroll. The loop will be invalidated.
1122 void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
1123
1124 /// Partially unroll a loop.
1125 ///
1126 /// The CanonicalLoopInfo of the unrolled loop for use with chained
1127 /// loop-associated directive can be requested using \p UnrolledCLI. Not
1128 /// needing the CanonicalLoopInfo allows more efficient code generation by
1129 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1130 /// A loop-associated directive applied to the unrolled loop needs to know the
1131 /// new trip count which means that if using a heuristically determined unroll
1132 /// factor (\p Factor == 0), that factor must be computed immediately. We are
1133 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1134 /// but which assumes that some canonicalization has taken place (e.g.
1135 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1136 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1137 ///
1138 /// \param DL Debug location for instructions added by unrolling.
1139 /// \param Loop The loop to unroll. The loop will be invalidated.
1140 /// \param Factor The factor to unroll the loop by. A factor of 0
1141 /// indicates that a heuristic should be used to determine
1142 /// the unroll-factor.
1143 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1144 /// partially unrolled loop. Otherwise, uses loop metadata
1145 /// to defer unrolling to the LoopUnrollPass.
1146 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1147 CanonicalLoopInfo **UnrolledCLI);
1148
1149 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1150 /// is cloned. The metadata which prevents vectorization is added to
1151 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1152 /// to false.
1153 ///
1154 /// \param Loop The loop to simd-ize.
1155 /// \param AlignedVars The map which containts pairs of the pointer
1156 /// and its corresponding alignment.
1157 /// \param IfCond The value which corresponds to the if clause
1158 /// condition.
1159 /// \param Order The enum to map order clause.
1160 /// \param Simdlen The Simdlen length to apply to the simd loop.
1161 /// \param Safelen The Safelen length to apply to the simd loop.
1162 void applySimd(CanonicalLoopInfo *Loop,
1163 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1164 omp::OrderKind Order, ConstantInt *Simdlen,
1165 ConstantInt *Safelen);
1166
1167 /// Generator for '#omp flush'
1168 ///
1169 /// \param Loc The location where the flush directive was encountered
1170 void createFlush(const LocationDescription &Loc);
1171
1172 /// Generator for '#omp taskwait'
1173 ///
1174 /// \param Loc The location where the taskwait directive was encountered.
1175 void createTaskwait(const LocationDescription &Loc);
1176
1177 /// Generator for '#omp taskyield'
1178 ///
1179 /// \param Loc The location where the taskyield directive was encountered.
1180 void createTaskyield(const LocationDescription &Loc);
1181
1182 /// A struct to pack the relevant information for an OpenMP depend clause.
1183 struct DependData {
1184 omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
1185 Type *DepValueType;
1186 Value *DepVal;
1187 explicit DependData() = default;
1188 DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
1189 Value *DepVal)
1190 : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
1191 };
1192
1193 /// Generator for `#omp task`
1194 ///
1195 /// \param Loc The location where the task construct was encountered.
1196 /// \param AllocaIP The insertion point to be used for alloca instructions.
1197 /// \param BodyGenCB Callback that will generate the region code.
1198 /// \param Tied True if the task is tied, false if the task is untied.
1199 /// \param Final i1 value which is `true` if the task is final, `false` if the
1200 /// task is not final.
1201 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1202 /// task is generated, and the encountering thread must
1203 /// suspend the current task region, for which execution
1204 /// cannot be resumed until execution of the structured
1205 /// block that is associated with the generated task is
1206 /// completed.
1207 InsertPointTy createTask(const LocationDescription &Loc,
1208 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1209 bool Tied = true, Value *Final = nullptr,
1210 Value *IfCondition = nullptr,
1211 SmallVector<DependData> Dependencies = {});
1212
1213 /// Generator for the taskgroup construct
1214 ///
1215 /// \param Loc The location where the taskgroup construct was encountered.
1216 /// \param AllocaIP The insertion point to be used for alloca instructions.
1217 /// \param BodyGenCB Callback that will generate the region code.
1218 InsertPointTy createTaskgroup(const LocationDescription &Loc,
1219 InsertPointTy AllocaIP,
1220 BodyGenCallbackTy BodyGenCB);
1221
1222 using FileIdentifierInfoCallbackTy =
1223 std::function<std::tuple<std::string, uint64_t>()>;
1224
1225 /// Creates a unique info for a target entry when provided a filename and
1226 /// line number from.
1227 ///
1228 /// \param CallBack A callback function which should return filename the entry
1229 /// resides in as well as the line number for the target entry
1230 /// \param ParentName The name of the parent the target entry resides in, if
1231 /// any.
1232 static TargetRegionEntryInfo
1233 getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
1234 StringRef ParentName = "");
1235
1236 /// Functions used to generate reductions. Such functions take two Values
1237 /// representing LHS and RHS of the reduction, respectively, and a reference
1238 /// to the value that is updated to refer to the reduction result.
1239 using ReductionGenTy =
1240 function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
1241
1242 /// Functions used to generate atomic reductions. Such functions take two
1243 /// Values representing pointers to LHS and RHS of the reduction, as well as
1244 /// the element type of these pointers. They are expected to atomically
1245 /// update the LHS to the reduced value.
1246 using AtomicReductionGenTy =
1247 function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
1248
1249 /// Information about an OpenMP reduction.
1250 struct ReductionInfo {
1251 ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
1252 ReductionGenTy ReductionGen,
1253 AtomicReductionGenTy AtomicReductionGen)
1254 : ElementType(ElementType), Variable(Variable),
1255 PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
1256 AtomicReductionGen(AtomicReductionGen) {}
1257
1258 /// Reduction element type, must match pointee type of variable.
1259 Type *ElementType;
1260
1261 /// Reduction variable of pointer type.
1262 Value *Variable;
1263
1264 /// Thread-private partial reduction variable.
1265 Value *PrivateVariable;
1266
1267 /// Callback for generating the reduction body. The IR produced by this will
1268 /// be used to combine two values in a thread-safe context, e.g., under
1269 /// lock or within the same thread, and therefore need not be atomic.
1270 ReductionGenTy ReductionGen;
1271
1272 /// Callback for generating the atomic reduction body, may be null. The IR
1273 /// produced by this will be used to atomically combine two values during
1274 /// reduction. If null, the implementation will use the non-atomic version
1275 /// along with the appropriate synchronization mechanisms.
1276 AtomicReductionGenTy AtomicReductionGen;
1277 };
1278
1279 // TODO: provide atomic and non-atomic reduction generators for reduction
1280 // operators defined by the OpenMP specification.
1281
1282 /// Generator for '#omp reduction'.
1283 ///
1284 /// Emits the IR instructing the runtime to perform the specific kind of
1285 /// reductions. Expects reduction variables to have been privatized and
1286 /// initialized to reduction-neutral values separately. Emits the calls to
1287 /// runtime functions as well as the reduction function and the basic blocks
1288 /// performing the reduction atomically and non-atomically.
1289 ///
1290 /// The code emitted for the following:
1291 ///
1292 /// \code
1293 /// type var_1;
1294 /// type var_2;
1295 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1296 /// /* body */;
1297 /// \endcode
1298 ///
1299 /// corresponds to the following sketch.
1300 ///
1301 /// \code
1302 /// void _outlined_par() {
1303 /// // N is the number of different reductions.
1304 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1305 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1306 /// _omp_reduction_func,
1307 /// _gomp_critical_user.reduction.var)) {
1308 /// case 1: {
1309 /// var_1 = var_1 <reduction-op> privatized_var_1;
1310 /// var_2 = var_2 <reduction-op> privatized_var_2;
1311 /// // ...
1312 /// __kmpc_end_reduce(...);
1313 /// break;
1314 /// }
1315 /// case 2: {
1316 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
1317 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
1318 /// // ...
1319 /// break;
1320 /// }
1321 /// default: break;
1322 /// }
1323 /// }
1324 ///
1325 /// void _omp_reduction_func(void **lhs, void **rhs) {
1326 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1327 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1328 /// // ...
1329 /// }
1330 /// \endcode
1331 ///
1332 /// \param Loc The location where the reduction was
1333 /// encountered. Must be within the associate
1334 /// directive and after the last local access to the
1335 /// reduction variables.
1336 /// \param AllocaIP An insertion point suitable for allocas usable
1337 /// in reductions.
1338 /// \param ReductionInfos A list of info on each reduction variable.
1339 /// \param IsNoWait A flag set if the reduction is marked as nowait.
1340 InsertPointTy createReductions(const LocationDescription &Loc,
1341 InsertPointTy AllocaIP,
1342 ArrayRef<ReductionInfo> ReductionInfos,
1343 bool IsNoWait = false);
1344
1345 ///}
1346
1347 /// Return the insertion point used by the underlying IRBuilder.
1348 InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
1349
1350 /// Update the internal location to \p Loc.
1351 bool updateToLocation(const LocationDescription &Loc) {
1352 Builder.restoreIP(IP: Loc.IP);
1353 Builder.SetCurrentDebugLocation(Loc.DL);
1354 return Loc.IP.getBlock() != nullptr;
1355 }
1356
1357 /// Return the function declaration for the runtime function with \p FnID.
1358 FunctionCallee getOrCreateRuntimeFunction(Module &M,
1359 omp::RuntimeFunction FnID);
1360
1361 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
1362
1363 /// Return the (LLVM-IR) string describing the source location \p LocStr.
1364 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1365
1366 /// Return the (LLVM-IR) string describing the default source location.
1367 Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
1368
1369 /// Return the (LLVM-IR) string describing the source location identified by
1370 /// the arguments.
1371 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1372 unsigned Line, unsigned Column,
1373 uint32_t &SrcLocStrSize);
1374
1375 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1376 /// fallback if \p DL does not specify the function name.
1377 Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
1378 Function *F = nullptr);
1379
1380 /// Return the (LLVM-IR) string describing the source location \p Loc.
1381 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1382 uint32_t &SrcLocStrSize);
1383
1384 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1385 /// TODO: Create a enum class for the Reserve2Flags
1386 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1387 omp::IdentFlag Flags = omp::IdentFlag(0),
1388 unsigned Reserve2Flags = 0);
1389
1390 /// Create a hidden global flag \p Name in the module with initial value \p
1391 /// Value.
1392 GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
1393
1394 /// Generate control flow and cleanup for cancellation.
1395 ///
1396 /// \param CancelFlag Flag indicating if the cancellation is performed.
1397 /// \param CanceledDirective The kind of directive that is cancled.
1398 /// \param ExitCB Extra code to be generated in the exit block.
1399 void emitCancelationCheckImpl(Value *CancelFlag,
1400 omp::Directive CanceledDirective,
1401 FinalizeCallbackTy ExitCB = {});
1402
1403 /// Generate a target region entry call.
1404 ///
1405 /// \param Loc The location at which the request originated and is fulfilled.
1406 /// \param AllocaIP The insertion point to be used for alloca instructions.
1407 /// \param Return Return value of the created function returned by reference.
1408 /// \param DeviceID Identifier for the device via the 'device' clause.
1409 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1410 /// or 0 if unspecified and -1 if there is no 'teams' clause.
1411 /// \param NumThreads Number of threads via the 'thread_limit' clause.
1412 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1413 /// \param KernelArgs Array of arguments to the kernel.
1414 InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1415 InsertPointTy AllocaIP, Value *&Return,
1416 Value *Ident, Value *DeviceID, Value *NumTeams,
1417 Value *NumThreads, Value *HostPtr,
1418 ArrayRef<Value *> KernelArgs);
1419
1420 /// Generate a barrier runtime call.
1421 ///
1422 /// \param Loc The location at which the request originated and is fulfilled.
1423 /// \param DK The directive which caused the barrier
1424 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1425 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1426 /// should be checked and acted upon.
1427 ///
1428 /// \returns The insertion point after the barrier.
1429 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1430 omp::Directive DK, bool ForceSimpleCall,
1431 bool CheckCancelFlag);
1432
1433 /// Generate a flush runtime call.
1434 ///
1435 /// \param Loc The location at which the request originated and is fulfilled.
1436 void emitFlush(const LocationDescription &Loc);
1437
1438 /// The finalization stack made up of finalize callbacks currently in-flight,
1439 /// wrapped into FinalizationInfo objects that reference also the finalization
1440 /// target block and the kind of cancellable directive.
1441 SmallVector<FinalizationInfo, 8> FinalizationStack;
1442
1443 /// Return true if the last entry in the finalization stack is of kind \p DK
1444 /// and cancellable.
1445 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1446 return !FinalizationStack.empty() &&
1447 FinalizationStack.back().IsCancellable &&
1448 FinalizationStack.back().DK == DK;
1449 }
1450
1451 /// Generate a taskwait runtime call.
1452 ///
1453 /// \param Loc The location at which the request originated and is fulfilled.
1454 void emitTaskwaitImpl(const LocationDescription &Loc);
1455
1456 /// Generate a taskyield runtime call.
1457 ///
1458 /// \param Loc The location at which the request originated and is fulfilled.
1459 void emitTaskyieldImpl(const LocationDescription &Loc);
1460
1461 /// Return the current thread ID.
1462 ///
1463 /// \param Ident The ident (ident_t*) describing the query origin.
1464 Value *getOrCreateThreadID(Value *Ident);
1465
1466 /// The OpenMPIRBuilder Configuration
1467 OpenMPIRBuilderConfig Config;
1468
1469 /// The underlying LLVM-IR module
1470 Module &M;
1471
1472 /// The LLVM-IR Builder used to create IR.
1473 IRBuilder<> Builder;
1474
1475 /// Map to remember source location strings
1476 StringMap<Constant *> SrcLocStrMap;
1477
1478 /// Map to remember existing ident_t*.
1479 DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
1480
1481 /// Info manager to keep track of target regions.
1482 OffloadEntriesInfoManager OffloadInfoManager;
1483
1484 /// The target triple of the underlying module.
1485 const Triple T;
1486
1487 /// Helper that contains information about regions we need to outline
1488 /// during finalization.
1489 struct OutlineInfo {
1490 using PostOutlineCBTy = std::function<void(Function &)>;
1491 PostOutlineCBTy PostOutlineCB;
1492 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
1493 SmallVector<Value *, 2> ExcludeArgsFromAggregate;
1494
1495 /// Collect all blocks in between EntryBB and ExitBB in both the given
1496 /// vector and set.
1497 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
1498 SmallVectorImpl<BasicBlock *> &BlockVector);
1499
1500 /// Return the function that contains the region to be outlined.
1501 Function *getFunction() const { return EntryBB->getParent(); }
1502 };
1503
1504 /// Collection of regions that need to be outlined during finalization.
1505 SmallVector<OutlineInfo, 16> OutlineInfos;
1506
1507 /// Collection of owned canonical loop objects that eventually need to be
1508 /// free'd.
1509 std::forward_list<CanonicalLoopInfo> LoopInfos;
1510
1511 /// Add a new region that will be outlined later.
1512 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(Args&: OI); }
1513
1514 /// An ordered map of auto-generated variables to their unique names.
1515 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1516 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1517 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1518 /// variables.
1519 StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars;
1520
1521 /// Computes the size of type in bytes.
1522 Value *getSizeInBytes(Value *BasePtr);
1523
1524 // Emit a branch from the current block to the Target block only if
1525 // the current block has a terminator.
1526 void emitBranch(BasicBlock *Target);
1527
1528 // If BB has no use then delete it and return. Else place BB after the current
1529 // block, if possible, or else at the end of the function. Also add a branch
1530 // from current block to BB if current block does not have a terminator.
1531 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1532
1533 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1534 /// Here is the logic:
1535 /// if (Cond) {
1536 /// ThenGen();
1537 /// } else {
1538 /// ElseGen();
1539 /// }
1540 void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
1541 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1542
1543 /// Create the global variable holding the offload mappings information.
1544 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
1545 std::string VarName);
1546
1547 /// Create the global variable holding the offload names information.
1548 GlobalVariable *
1549 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
1550 std::string VarName);
1551
1552 struct MapperAllocas {
1553 AllocaInst *ArgsBase = nullptr;
1554 AllocaInst *Args = nullptr;
1555 AllocaInst *ArgSizes = nullptr;
1556 };
1557
1558 /// Create the allocas instruction used in call to mapper functions.
1559 void createMapperAllocas(const LocationDescription &Loc,
1560 InsertPointTy AllocaIP, unsigned NumOperands,
1561 struct MapperAllocas &MapperAllocas);
1562
1563 /// Create the call for the target mapper function.
1564 /// \param Loc The source location description.
1565 /// \param MapperFunc Function to be called.
1566 /// \param SrcLocInfo Source location information global.
1567 /// \param MaptypesArg The argument types.
1568 /// \param MapnamesArg The argument names.
1569 /// \param MapperAllocas The AllocaInst used for the call.
1570 /// \param DeviceID Device ID for the call.
1571 /// \param NumOperands Number of operands in the call.
1572 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1573 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1574 struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1575 unsigned NumOperands);
1576
1577 /// Container for the arguments used to pass data to the runtime library.
1578 struct TargetDataRTArgs {
1579 /// The array of base pointer passed to the runtime library.
1580 Value *BasePointersArray = nullptr;
1581 /// The array of section pointers passed to the runtime library.
1582 Value *PointersArray = nullptr;
1583 /// The array of sizes passed to the runtime library.
1584 Value *SizesArray = nullptr;
1585 /// The array of map types passed to the runtime library for the beginning
1586 /// of the region or for the entire region if there are no separate map
1587 /// types for the region end.
1588 Value *MapTypesArray = nullptr;
1589 /// The array of map types passed to the runtime library for the end of the
1590 /// region, or nullptr if there are no separate map types for the region
1591 /// end.
1592 Value *MapTypesArrayEnd = nullptr;
1593 /// The array of user-defined mappers passed to the runtime library.
1594 Value *MappersArray = nullptr;
1595 /// The array of original declaration names of mapped pointers sent to the
1596 /// runtime library for debugging
1597 Value *MapNamesArray = nullptr;
1598
1599 explicit TargetDataRTArgs() {}
1600 explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray,
1601 Value *SizesArray, Value *MapTypesArray,
1602 Value *MapTypesArrayEnd, Value *MappersArray,
1603 Value *MapNamesArray)
1604 : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
1605 SizesArray(SizesArray), MapTypesArray(MapTypesArray),
1606 MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray),
1607 MapNamesArray(MapNamesArray) {}
1608 };
1609
1610 /// Data structure that contains the needed information to construct the
1611 /// kernel args vector.
1612 struct TargetKernelArgs {
1613 /// Number of arguments passed to the runtime library.
1614 unsigned NumTargetItems;
1615 /// Arguments passed to the runtime library
1616 TargetDataRTArgs RTArgs;
1617 /// The number of iterations
1618 Value *NumIterations;
1619 /// The number of teams.
1620 Value *NumTeams;
1621 /// The number of threads.
1622 Value *NumThreads;
1623 /// The size of the dynamic shared memory.
1624 Value *DynCGGroupMem;
1625 /// True if the kernel has 'no wait' clause.
1626 bool HasNoWait;
1627
1628 /// Constructor for TargetKernelArgs
1629 TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs,
1630 Value *NumIterations, Value *NumTeams, Value *NumThreads,
1631 Value *DynCGGroupMem, bool HasNoWait)
1632 : NumTargetItems(NumTargetItems), RTArgs(RTArgs),
1633 NumIterations(NumIterations), NumTeams(NumTeams),
1634 NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem),
1635 HasNoWait(HasNoWait) {}
1636 };
1637
1638 /// Create the kernel args vector used by emitTargetKernel. This function
1639 /// creates various constant values that are used in the resulting args
1640 /// vector.
1641 static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1642 IRBuilderBase &Builder,
1643 SmallVector<Value *> &ArgsVector);
1644
1645 /// Struct that keeps the information that should be kept throughout
1646 /// a 'target data' region.
1647 class TargetDataInfo {
1648 /// Set to true if device pointer information have to be obtained.
1649 bool RequiresDevicePointerInfo = false;
1650 /// Set to true if Clang emits separate runtime calls for the beginning and
1651 /// end of the region. These calls might have separate map type arrays.
1652 bool SeparateBeginEndCalls = false;
1653
1654 public:
1655 TargetDataRTArgs RTArgs;
1656
1657 SmallMapVector<const Value *, std::pair<Value *, Value *>, 4>
1658 DevicePtrInfoMap;
1659
1660 /// Indicate whether any user-defined mapper exists.
1661 bool HasMapper = false;
1662 /// The total number of pointers passed to the runtime library.
1663 unsigned NumberOfPtrs = 0u;
1664
1665 explicit TargetDataInfo() {}
1666 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1667 bool SeparateBeginEndCalls)
1668 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1669 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1670 /// Clear information about the data arrays.
1671 void clearArrayInfo() {
1672 RTArgs = TargetDataRTArgs();
1673 HasMapper = false;
1674 NumberOfPtrs = 0u;
1675 }
1676 /// Return true if the current target data information has valid arrays.
1677 bool isValid() {
1678 return RTArgs.BasePointersArray && RTArgs.PointersArray &&
1679 RTArgs.SizesArray && RTArgs.MapTypesArray &&
1680 (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
1681 }
1682 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1683 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1684 };
1685
1686 enum class DeviceInfoTy { None, Pointer, Address };
1687 using MapValuesArrayTy = SmallVector<Value *, 4>;
1688 using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>;
1689 using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>;
1690 using MapNamesArrayTy = SmallVector<Constant *, 4>;
1691 using MapDimArrayTy = SmallVector<uint64_t, 4>;
1692 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
1693
1694 /// This structure contains combined information generated for mappable
1695 /// clauses, including base pointers, pointers, sizes, map types, user-defined
1696 /// mappers, and non-contiguous information.
1697 struct MapInfosTy {
1698 struct StructNonContiguousInfo {
1699 bool IsNonContiguous = false;
1700 MapDimArrayTy Dims;
1701 MapNonContiguousArrayTy Offsets;
1702 MapNonContiguousArrayTy Counts;
1703 MapNonContiguousArrayTy Strides;
1704 };
1705 MapValuesArrayTy BasePointers;
1706 MapValuesArrayTy Pointers;
1707 MapDeviceInfoArrayTy DevicePointers;
1708 MapValuesArrayTy Sizes;
1709 MapFlagsArrayTy Types;
1710 MapNamesArrayTy Names;
1711 StructNonContiguousInfo NonContigInfo;
1712
1713 /// Append arrays in \a CurInfo.
1714 void append(MapInfosTy &CurInfo) {
1715 BasePointers.append(in_start: CurInfo.BasePointers.begin(),
1716 in_end: CurInfo.BasePointers.end());
1717 Pointers.append(in_start: CurInfo.Pointers.begin(), in_end: CurInfo.Pointers.end());
1718 DevicePointers.append(in_start: CurInfo.DevicePointers.begin(),
1719 in_end: CurInfo.DevicePointers.end());
1720 Sizes.append(in_start: CurInfo.Sizes.begin(), in_end: CurInfo.Sizes.end());
1721 Types.append(in_start: CurInfo.Types.begin(), in_end: CurInfo.Types.end());
1722 Names.append(in_start: CurInfo.Names.begin(), in_end: CurInfo.Names.end());
1723 NonContigInfo.Dims.append(in_start: CurInfo.NonContigInfo.Dims.begin(),
1724 in_end: CurInfo.NonContigInfo.Dims.end());
1725 NonContigInfo.Offsets.append(in_start: CurInfo.NonContigInfo.Offsets.begin(),
1726 in_end: CurInfo.NonContigInfo.Offsets.end());
1727 NonContigInfo.Counts.append(in_start: CurInfo.NonContigInfo.Counts.begin(),
1728 in_end: CurInfo.NonContigInfo.Counts.end());
1729 NonContigInfo.Strides.append(in_start: CurInfo.NonContigInfo.Strides.begin(),
1730 in_end: CurInfo.NonContigInfo.Strides.end());
1731 }
1732 };
1733
1734 /// Callback function type for functions emitting the host fallback code that
1735 /// is executed when the kernel launch fails. It takes an insertion point as
1736 /// parameter where the code should be emitted. It returns an insertion point
1737 /// that points right after after the emitted code.
1738 using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>;
1739
1740 /// Generate a target region entry call and host fallback call.
1741 ///
1742 /// \param Loc The location at which the request originated and is fulfilled.
1743 /// \param OutlinedFn The outlined kernel function.
1744 /// \param OutlinedFnID The ooulined function ID.
1745 /// \param EmitTargetCallFallbackCB Call back function to generate host
1746 /// fallback code.
1747 /// \param Args Data structure holding information about the kernel arguments.
1748 /// \param DeviceID Identifier for the device via the 'device' clause.
1749 /// \param RTLoc Source location identifier
1750 /// \param AllocaIP The insertion point to be used for alloca instructions.
1751 InsertPointTy emitKernelLaunch(
1752 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1753 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1754 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1755
1756 /// Emit the arguments to be passed to the runtime library based on the
1757 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
1758 /// ForEndCall, emit map types to be passed for the end of the region instead
1759 /// of the beginning.
1760 void emitOffloadingArraysArgument(IRBuilderBase &Builder,
1761 OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
1762 OpenMPIRBuilder::TargetDataInfo &Info,
1763 bool EmitDebug = false,
1764 bool ForEndCall = false);
1765
1766 /// Emit an array of struct descriptors to be assigned to the offload args.
1767 void emitNonContiguousDescriptor(InsertPointTy AllocaIP,
1768 InsertPointTy CodeGenIP,
1769 MapInfosTy &CombinedInfo,
1770 TargetDataInfo &Info);
1771
1772 /// Emit the arrays used to pass the captures and map information to the
1773 /// offloading runtime library. If there is no map or capture information,
1774 /// return nullptr by reference.
1775 void emitOffloadingArrays(
1776 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1777 TargetDataInfo &Info, bool IsNonContiguous = false,
1778 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1779 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1780
1781 /// Creates offloading entry for the provided entry ID \a ID, address \a
1782 /// Addr, size \a Size, and flags \a Flags.
1783 void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
1784 int32_t Flags, GlobalValue::LinkageTypes,
1785 StringRef Name = "");
1786
1787 /// The kind of errors that can occur when emitting the offload entries and
1788 /// metadata.
1789 enum EmitMetadataErrorKind {
1790 EMIT_MD_TARGET_REGION_ERROR,
1791 EMIT_MD_DECLARE_TARGET_ERROR,
1792 EMIT_MD_GLOBAL_VAR_LINK_ERROR
1793 };
1794
1795 /// Callback function type
1796 using EmitMetadataErrorReportFunctionTy =
1797 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1798
1799 // Emit the offloading entries and metadata so that the device codegen side
1800 // can easily figure out what to emit. The produced metadata looks like
1801 // this:
1802 //
1803 // !omp_offload.info = !{!1, ...}
1804 //
1805 // We only generate metadata for function that contain target regions.
1806 void createOffloadEntriesAndInfoMetadata(
1807 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1808
1809public:
1810 /// Generator for __kmpc_copyprivate
1811 ///
1812 /// \param Loc The source location description.
1813 /// \param BufSize Number of elements in the buffer.
1814 /// \param CpyBuf List of pointers to data to be copied.
1815 /// \param CpyFn function to call for copying data.
1816 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1817 ///
1818 /// \return The insertion position *after* the CopyPrivate call.
1819
1820 InsertPointTy createCopyPrivate(const LocationDescription &Loc,
1821 llvm::Value *BufSize, llvm::Value *CpyBuf,
1822 llvm::Value *CpyFn, llvm::Value *DidIt);
1823
1824 /// Generator for '#omp single'
1825 ///
1826 /// \param Loc The source location description.
1827 /// \param BodyGenCB Callback that will generate the region code.
1828 /// \param FiniCB Callback to finalize variable copies.
1829 /// \param IsNowait If false, a barrier is emitted.
1830 /// \param DidIt Local variable used as a flag to indicate 'single' thread
1831 ///
1832 /// \returns The insertion position *after* the single call.
1833 InsertPointTy createSingle(const LocationDescription &Loc,
1834 BodyGenCallbackTy BodyGenCB,
1835 FinalizeCallbackTy FiniCB, bool IsNowait,
1836 llvm::Value *DidIt);
1837
1838 /// Generator for '#omp master'
1839 ///
1840 /// \param Loc The insert and source location description.
1841 /// \param BodyGenCB Callback that will generate the region code.
1842 /// \param FiniCB Callback to finalize variable copies.
1843 ///
1844 /// \returns The insertion position *after* the master.
1845 InsertPointTy createMaster(const LocationDescription &Loc,
1846 BodyGenCallbackTy BodyGenCB,
1847 FinalizeCallbackTy FiniCB);
1848
1849 /// Generator for '#omp masked'
1850 ///
1851 /// \param Loc The insert and source location description.
1852 /// \param BodyGenCB Callback that will generate the region code.
1853 /// \param FiniCB Callback to finialize variable copies.
1854 ///
1855 /// \returns The insertion position *after* the masked.
1856 InsertPointTy createMasked(const LocationDescription &Loc,
1857 BodyGenCallbackTy BodyGenCB,
1858 FinalizeCallbackTy FiniCB, Value *Filter);
1859
1860 /// Generator for '#omp critical'
1861 ///
1862 /// \param Loc The insert and source location description.
1863 /// \param BodyGenCB Callback that will generate the region body code.
1864 /// \param FiniCB Callback to finalize variable copies.
1865 /// \param CriticalName name of the lock used by the critical directive
1866 /// \param HintInst Hint Instruction for hint clause associated with critical
1867 ///
1868 /// \returns The insertion position *after* the critical.
1869 InsertPointTy createCritical(const LocationDescription &Loc,
1870 BodyGenCallbackTy BodyGenCB,
1871 FinalizeCallbackTy FiniCB,
1872 StringRef CriticalName, Value *HintInst);
1873
1874 /// Generator for '#omp ordered depend (source | sink)'
1875 ///
1876 /// \param Loc The insert and source location description.
1877 /// \param AllocaIP The insertion point to be used for alloca instructions.
1878 /// \param NumLoops The number of loops in depend clause.
1879 /// \param StoreValues The value will be stored in vector address.
1880 /// \param Name The name of alloca instruction.
1881 /// \param IsDependSource If true, depend source; otherwise, depend sink.
1882 ///
1883 /// \return The insertion position *after* the ordered.
1884 InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1885 InsertPointTy AllocaIP, unsigned NumLoops,
1886 ArrayRef<llvm::Value *> StoreValues,
1887 const Twine &Name, bool IsDependSource);
1888
1889 /// Generator for '#omp ordered [threads | simd]'
1890 ///
1891 /// \param Loc The insert and source location description.
1892 /// \param BodyGenCB Callback that will generate the region code.
1893 /// \param FiniCB Callback to finalize variable copies.
1894 /// \param IsThreads If true, with threads clause or without clause;
1895 /// otherwise, with simd clause;
1896 ///
1897 /// \returns The insertion position *after* the ordered.
1898 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1899 BodyGenCallbackTy BodyGenCB,
1900 FinalizeCallbackTy FiniCB,
1901 bool IsThreads);
1902
1903 /// Generator for '#omp sections'
1904 ///
1905 /// \param Loc The insert and source location description.
1906 /// \param AllocaIP The insertion points to be used for alloca instructions.
1907 /// \param SectionCBs Callbacks that will generate body of each section.
1908 /// \param PrivCB Callback to copy a given variable (think copy constructor).
1909 /// \param FiniCB Callback to finalize variable copies.
1910 /// \param IsCancellable Flag to indicate a cancellable parallel region.
1911 /// \param IsNowait If true, barrier - to ensure all sections are executed
1912 /// before moving forward will not be generated.
1913 /// \returns The insertion position *after* the sections.
1914 InsertPointTy createSections(const LocationDescription &Loc,
1915 InsertPointTy AllocaIP,
1916 ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1917 PrivatizeCallbackTy PrivCB,
1918 FinalizeCallbackTy FiniCB, bool IsCancellable,
1919 bool IsNowait);
1920
1921 /// Generator for '#omp section'
1922 ///
1923 /// \param Loc The insert and source location description.
1924 /// \param BodyGenCB Callback that will generate the region body code.
1925 /// \param FiniCB Callback to finalize variable copies.
1926 /// \returns The insertion position *after* the section.
1927 InsertPointTy createSection(const LocationDescription &Loc,
1928 BodyGenCallbackTy BodyGenCB,
1929 FinalizeCallbackTy FiniCB);
1930
1931 /// Generator for `#omp teams`
1932 ///
1933 /// \param Loc The location where the teams construct was encountered.
1934 /// \param BodyGenCB Callback that will generate the region code.
1935 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
1936 /// it is as if lower bound is specified as equal to upperbound. If
1937 /// this is non-null, then upperbound must also be non-null.
1938 /// \param NumTeamsUpper Upper bound on the number of teams.
1939 /// \param ThreadLimit on the number of threads that may participate in a
1940 /// contention group created by each team.
1941 /// \param IfExpr is the integer argument value of the if condition on the
1942 /// teams clause.
1943 InsertPointTy
1944 createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1945 Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
1946 Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
1947
1948 /// Generate conditional branch and relevant BasicBlocks through which private
1949 /// threads copy the 'copyin' variables from Master copy to threadprivate
1950 /// copies.
1951 ///
1952 /// \param IP insertion block for copyin conditional
1953 /// \param MasterVarPtr a pointer to the master variable
1954 /// \param PrivateVarPtr a pointer to the threadprivate variable
1955 /// \param IntPtrTy Pointer size type
1956 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1957 // and copy.in.end block
1958 ///
1959 /// \returns The insertion point where copying operation to be emitted.
1960 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1961 Value *PrivateAddr,
1962 llvm::IntegerType *IntPtrTy,
1963 bool BranchtoEnd = true);
1964
1965 /// Create a runtime call for kmpc_Alloc
1966 ///
1967 /// \param Loc The insert and source location description.
1968 /// \param Size Size of allocated memory space
1969 /// \param Allocator Allocator information instruction
1970 /// \param Name Name of call Instruction for OMP_alloc
1971 ///
1972 /// \returns CallInst to the OMP_Alloc call
1973 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1974 Value *Allocator, std::string Name = "");
1975
1976 /// Create a runtime call for kmpc_free
1977 ///
1978 /// \param Loc The insert and source location description.
1979 /// \param Addr Address of memory space to be freed
1980 /// \param Allocator Allocator information instruction
1981 /// \param Name Name of call Instruction for OMP_Free
1982 ///
1983 /// \returns CallInst to the OMP_Free call
1984 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1985 Value *Allocator, std::string Name = "");
1986
1987 /// Create a runtime call for kmpc_threadprivate_cached
1988 ///
1989 /// \param Loc The insert and source location description.
1990 /// \param Pointer pointer to data to be cached
1991 /// \param Size size of data to be cached
1992 /// \param Name Name of call Instruction for callinst
1993 ///
1994 /// \returns CallInst to the thread private cache call.
1995 CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
1996 llvm::Value *Pointer,
1997 llvm::ConstantInt *Size,
1998 const llvm::Twine &Name = Twine(""));
1999
2000 /// Create a runtime call for __tgt_interop_init
2001 ///
2002 /// \param Loc The insert and source location description.
2003 /// \param InteropVar variable to be allocated
2004 /// \param InteropType type of interop operation
2005 /// \param Device devide to which offloading will occur
2006 /// \param NumDependences number of dependence variables
2007 /// \param DependenceAddress pointer to dependence variables
2008 /// \param HaveNowaitClause does nowait clause exist
2009 ///
2010 /// \returns CallInst to the __tgt_interop_init call
2011 CallInst *createOMPInteropInit(const LocationDescription &Loc,
2012 Value *InteropVar,
2013 omp::OMPInteropType InteropType, Value *Device,
2014 Value *NumDependences,
2015 Value *DependenceAddress,
2016 bool HaveNowaitClause);
2017
2018 /// Create a runtime call for __tgt_interop_destroy
2019 ///
2020 /// \param Loc The insert and source location description.
2021 /// \param InteropVar variable to be allocated
2022 /// \param Device devide to which offloading will occur
2023 /// \param NumDependences number of dependence variables
2024 /// \param DependenceAddress pointer to dependence variables
2025 /// \param HaveNowaitClause does nowait clause exist
2026 ///
2027 /// \returns CallInst to the __tgt_interop_destroy call
2028 CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
2029 Value *InteropVar, Value *Device,
2030 Value *NumDependences,
2031 Value *DependenceAddress,
2032 bool HaveNowaitClause);
2033
2034 /// Create a runtime call for __tgt_interop_use
2035 ///
2036 /// \param Loc The insert and source location description.
2037 /// \param InteropVar variable to be allocated
2038 /// \param Device devide to which offloading will occur
2039 /// \param NumDependences number of dependence variables
2040 /// \param DependenceAddress pointer to dependence variables
2041 /// \param HaveNowaitClause does nowait clause exist
2042 ///
2043 /// \returns CallInst to the __tgt_interop_use call
2044 CallInst *createOMPInteropUse(const LocationDescription &Loc,
2045 Value *InteropVar, Value *Device,
2046 Value *NumDependences, Value *DependenceAddress,
2047 bool HaveNowaitClause);
2048
2049 /// The `omp target` interface
2050 ///
2051 /// For more information about the usage of this interface,
2052 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
2053 ///
2054 ///{
2055
2056 /// Create a runtime call for kmpc_target_init
2057 ///
2058 /// \param Loc The insert and source location description.
2059 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2060 /// \param MinThreads Minimal number of threads, or 0.
2061 /// \param MaxThreads Maximal number of threads, or 0.
2062 /// \param MinTeams Minimal number of teams, or 0.
2063 /// \param MaxTeams Maximal number of teams, or 0.
2064 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
2065 int32_t MinThreadsVal = 0,
2066 int32_t MaxThreadsVal = 0,
2067 int32_t MinTeamsVal = 0,
2068 int32_t MaxTeamsVal = 0);
2069
2070 /// Create a runtime call for kmpc_target_deinit
2071 ///
2072 /// \param Loc The insert and source location description.
2073 /// \param TeamsReductionDataSize The maximal size of all the reduction data
2074 /// for teams reduction.
2075 /// \param TeamsReductionBufferLength The number of elements (each of up to
2076 /// \p TeamsReductionDataSize size), in the teams reduction buffer.
2077 void createTargetDeinit(const LocationDescription &Loc,
2078 int32_t TeamsReductionDataSize = 0,
2079 int32_t TeamsReductionBufferLength = 1024);
2080
2081 ///}
2082
2083 /// Helpers to read/write kernel annotations from the IR.
2084 ///
2085 ///{
2086
2087 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
2088 /// is set.
2089 static std::pair<int32_t, int32_t>
2090 readThreadBoundsForKernel(const Triple &T, Function &Kernel);
2091 static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel,
2092 int32_t LB, int32_t UB);
2093
2094 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
2095 /// is set.
2096 static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T,
2097 Function &Kernel);
2098 static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB,
2099 int32_t UB);
2100 ///}
2101
2102private:
2103 // Sets the function attributes expected for the outlined function
2104 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
2105
2106 // Creates the function ID/Address for the given outlined function.
2107 // In the case of an embedded device function the address of the function is
2108 // used, in the case of a non-offload function a constant is created.
2109 Constant *createOutlinedFunctionID(Function *OutlinedFn,
2110 StringRef EntryFnIDName);
2111
2112 // Creates the region entry address for the outlined function
2113 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2114 StringRef EntryFnName);
2115
2116public:
2117 /// Functions used to generate a function with the given name.
2118 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2119
2120 /// Create a unique name for the entry function using the source location
2121 /// information of the current target region. The name will be something like:
2122 ///
2123 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2124 ///
2125 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2126 /// mangled name of the function that encloses the target region and BB is the
2127 /// line number of the target region. CC is a count added when more than one
2128 /// region is located at the same location.
2129 ///
2130 /// If this target outline function is not an offload entry, we don't need to
2131 /// register it. This may happen if it is guarded by an if clause that is
2132 /// false at compile time, or no target archs have been specified.
2133 ///
2134 /// The created target region ID is used by the runtime library to identify
2135 /// the current target region, so it only has to be unique and not
2136 /// necessarily point to anything. It could be the pointer to the outlined
2137 /// function that implements the target region, but we aren't using that so
2138 /// that the compiler doesn't need to keep that, and could therefore inline
2139 /// the host function if proven worthwhile during optimization. In the other
2140 /// hand, if emitting code for the device, the ID has to be the function
2141 /// address so that it can retrieved from the offloading entry and launched
2142 /// by the runtime library. We also mark the outlined function to have
2143 /// external linkage in case we are emitting code for the device, because
2144 /// these functions will be entry points to the device.
2145 ///
2146 /// \param InfoManager The info manager keeping track of the offload entries
2147 /// \param EntryInfo The entry information about the function
2148 /// \param GenerateFunctionCallback The callback function to generate the code
2149 /// \param OutlinedFunction Pointer to the outlined function
2150 /// \param EntryFnIDName Name of the ID o be created
2151 void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2152 FunctionGenCallback &GenerateFunctionCallback,
2153 bool IsOffloadEntry, Function *&OutlinedFn,
2154 Constant *&OutlinedFnID);
2155
2156 /// Registers the given function and sets up the attribtues of the function
2157 /// Returns the FunctionID.
2158 ///
2159 /// \param InfoManager The info manager keeping track of the offload entries
2160 /// \param EntryInfo The entry information about the function
2161 /// \param OutlinedFunction Pointer to the outlined function
2162 /// \param EntryFnName Name of the outlined function
2163 /// \param EntryFnIDName Name of the ID o be created
2164 Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2165 Function *OutlinedFunction,
2166 StringRef EntryFnName,
2167 StringRef EntryFnIDName);
2168
2169 /// Type of BodyGen to use for region codegen
2170 ///
2171 /// Priv: If device pointer privatization is required, emit the body of the
2172 /// region here. It will have to be duplicated: with and without
2173 /// privatization.
2174 /// DupNoPriv: If we need device pointer privatization, we need
2175 /// to emit the body of the region with no privatization in the 'else' branch
2176 /// of the conditional.
2177 /// NoPriv: If we don't require privatization of device
2178 /// pointers, we emit the body in between the runtime calls. This avoids
2179 /// duplicating the body code.
2180 enum BodyGenTy { Priv, DupNoPriv, NoPriv };
2181
2182 /// Callback type for creating the map infos for the kernel parameters.
2183 /// \param CodeGenIP is the insertion point where code should be generated,
2184 /// if any.
2185 using GenMapInfoCallbackTy =
2186 function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
2187
2188 /// Generator for '#omp target data'
2189 ///
2190 /// \param Loc The location where the target data construct was encountered.
2191 /// \param AllocaIP The insertion points to be used for alloca instructions.
2192 /// \param CodeGenIP The insertion point at which the target directive code
2193 /// should be placed.
2194 /// \param IsBegin If true then emits begin mapper call otherwise emits
2195 /// end mapper call.
2196 /// \param DeviceID Stores the DeviceID from the device clause.
2197 /// \param IfCond Value which corresponds to the if clause condition.
2198 /// \param Info Stores all information realted to the Target Data directive.
2199 /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2200 /// \param BodyGenCB Optional Callback to generate the region code.
2201 /// \param DeviceAddrCB Optional callback to generate code related to
2202 /// use_device_ptr and use_device_addr.
2203 /// \param CustomMapperCB Optional callback to generate code related to
2204 /// custom mappers.
2205 OpenMPIRBuilder::InsertPointTy createTargetData(
2206 const LocationDescription &Loc, InsertPointTy AllocaIP,
2207 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2208 TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
2209 omp::RuntimeFunction *MapperFunc = nullptr,
2210 function_ref<InsertPointTy(InsertPointTy CodeGenIP,
2211 BodyGenTy BodyGenType)>
2212 BodyGenCB = nullptr,
2213 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2214 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2215 Value *SrcLocInfo = nullptr);
2216
2217 using TargetBodyGenCallbackTy = function_ref<InsertPointTy(
2218 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2219
2220 using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointTy(
2221 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
2222 InsertPointTy CodeGenIP)>;
2223
2224 /// Generator for '#omp target'
2225 ///
2226 /// \param Loc where the target data construct was encountered.
2227 /// \param CodeGenIP The insertion point where the call to the outlined
2228 /// function should be emitted.
2229 /// \param EntryInfo The entry information about the function.
2230 /// \param NumTeams Number of teams specified in the num_teams clause.
2231 /// \param NumThreads Number of teams specified in the thread_limit clause.
2232 /// \param Inputs The input values to the region that will be passed.
2233 /// as arguments to the outlined function.
2234 /// \param BodyGenCB Callback that will generate the region code.
2235 /// \param ArgAccessorFuncCB Callback that will generate accessors
2236 /// instructions for passed in target arguments where neccessary
2237 InsertPointTy createTarget(const LocationDescription &Loc,
2238 OpenMPIRBuilder::InsertPointTy AllocaIP,
2239 OpenMPIRBuilder::InsertPointTy CodeGenIP,
2240 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2241 int32_t NumThreads,
2242 SmallVectorImpl<Value *> &Inputs,
2243 GenMapInfoCallbackTy GenMapInfoCB,
2244 TargetBodyGenCallbackTy BodyGenCB,
2245 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB);
2246
2247 /// Returns __kmpc_for_static_init_* runtime function for the specified
2248 /// size \a IVSize and sign \a IVSigned. Will create a distribute call
2249 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
2250 FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned,
2251 bool IsGPUDistribute);
2252
2253 /// Returns __kmpc_dispatch_init_* runtime function for the specified
2254 /// size \a IVSize and sign \a IVSigned.
2255 FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned);
2256
2257 /// Returns __kmpc_dispatch_next_* runtime function for the specified
2258 /// size \a IVSize and sign \a IVSigned.
2259 FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned);
2260
2261 /// Returns __kmpc_dispatch_fini_* runtime function for the specified
2262 /// size \a IVSize and sign \a IVSigned.
2263 FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
2264
2265 /// Declarations for LLVM-IR types (simple, array, function and structure) are
2266 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2267 /// we provide the declarations, the initializeTypes function will provide the
2268 /// values.
2269 ///
2270 ///{
2271#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2272#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2273 ArrayType *VarName##Ty = nullptr; \
2274 PointerType *VarName##PtrTy = nullptr;
2275#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2276 FunctionType *VarName = nullptr; \
2277 PointerType *VarName##Ptr = nullptr;
2278#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
2279 StructType *VarName = nullptr; \
2280 PointerType *VarName##Ptr = nullptr;
2281#include "llvm/Frontend/OpenMP/OMPKinds.def"
2282
2283 ///}
2284
2285private:
2286 /// Create all simple and struct types exposed by the runtime and remember
2287 /// the llvm::PointerTypes of them for easy access later.
2288 void initializeTypes(Module &M);
2289
2290 /// Common interface for generating entry calls for OMP Directives.
2291 /// if the directive has a region/body, It will set the insertion
2292 /// point to the body
2293 ///
2294 /// \param OMPD Directive to generate entry blocks for
2295 /// \param EntryCall Call to the entry OMP Runtime Function
2296 /// \param ExitBB block where the region ends.
2297 /// \param Conditional indicate if the entry call result will be used
2298 /// to evaluate a conditional of whether a thread will execute
2299 /// body code or not.
2300 ///
2301 /// \return The insertion position in exit block
2302 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2303 BasicBlock *ExitBB,
2304 bool Conditional = false);
2305
2306 /// Common interface to finalize the region
2307 ///
2308 /// \param OMPD Directive to generate exiting code for
2309 /// \param FinIP Insertion point for emitting Finalization code and exit call
2310 /// \param ExitCall Call to the ending OMP Runtime Function
2311 /// \param HasFinalize indicate if the directive will require finalization
2312 /// and has a finalization callback in the stack that
2313 /// should be called.
2314 ///
2315 /// \return The insertion position in exit block
2316 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2317 InsertPointTy FinIP,
2318 Instruction *ExitCall,
2319 bool HasFinalize = true);
2320
2321 /// Common Interface to generate OMP inlined regions
2322 ///
2323 /// \param OMPD Directive to generate inlined region for
2324 /// \param EntryCall Call to the entry OMP Runtime Function
2325 /// \param ExitCall Call to the ending OMP Runtime Function
2326 /// \param BodyGenCB Body code generation callback.
2327 /// \param FiniCB Finalization Callback. Will be called when finalizing region
2328 /// \param Conditional indicate if the entry call result will be used
2329 /// to evaluate a conditional of whether a thread will execute
2330 /// body code or not.
2331 /// \param HasFinalize indicate if the directive will require finalization
2332 /// and has a finalization callback in the stack that
2333 /// should be called.
2334 /// \param IsCancellable if HasFinalize is set to true, indicate if the
2335 /// the directive should be cancellable.
2336 /// \return The insertion point after the region
2337
2338 InsertPointTy
2339 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2340 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2341 FinalizeCallbackTy FiniCB, bool Conditional = false,
2342 bool HasFinalize = true, bool IsCancellable = false);
2343
2344 /// Get the platform-specific name separator.
2345 /// \param Parts different parts of the final name that needs separation
2346 /// \param FirstSeparator First separator used between the initial two
2347 /// parts of the name.
2348 /// \param Separator separator used between all of the rest consecutive
2349 /// parts of the name
2350 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2351 StringRef FirstSeparator,
2352 StringRef Separator);
2353
2354 /// Returns corresponding lock object for the specified critical region
2355 /// name. If the lock object does not exist it is created, otherwise the
2356 /// reference to the existing copy is returned.
2357 /// \param CriticalName Name of the critical region.
2358 ///
2359 Value *getOMPCriticalRegionLock(StringRef CriticalName);
2360
2361 /// Callback type for Atomic Expression update
2362 /// ex:
2363 /// \code{.cpp}
2364 /// unsigned x = 0;
2365 /// #pragma omp atomic update
2366 /// x = Expr(x_old); //Expr() is any legal operation
2367 /// \endcode
2368 ///
2369 /// \param XOld the value of the atomic memory address to use for update
2370 /// \param IRB reference to the IRBuilder to use
2371 ///
2372 /// \returns Value to update X to.
2373 using AtomicUpdateCallbackTy =
2374 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2375
2376private:
2377 enum AtomicKind { Read, Write, Update, Capture, Compare };
2378
2379 /// Determine whether to emit flush or not
2380 ///
2381 /// \param Loc The insert and source location description.
2382 /// \param AO The required atomic ordering
2383 /// \param AK The OpenMP atomic operation kind used.
2384 ///
2385 /// \returns wether a flush was emitted or not
2386 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2387 AtomicOrdering AO, AtomicKind AK);
2388
2389 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2390 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2391 /// Only Scalar data types.
2392 ///
2393 /// \param AllocaIP The insertion point to be used for alloca
2394 /// instructions.
2395 /// \param X The target atomic pointer to be updated
2396 /// \param XElemTy The element type of the atomic pointer.
2397 /// \param Expr The value to update X with.
2398 /// \param AO Atomic ordering of the generated atomic
2399 /// instructions.
2400 /// \param RMWOp The binary operation used for update. If
2401 /// operation is not supported by atomicRMW,
2402 /// or belong to {FADD, FSUB, BAD_BINOP}.
2403 /// Then a `cmpExch` based atomic will be generated.
2404 /// \param UpdateOp Code generator for complex expressions that cannot be
2405 /// expressed through atomicrmw instruction.
2406 /// \param VolatileX true if \a X volatile?
2407 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2408 /// update expression, false otherwise.
2409 /// (e.g. true for X = X BinOp Expr)
2410 ///
2411 /// \returns A pair of the old value of X before the update, and the value
2412 /// used for the update.
2413 std::pair<Value *, Value *>
2414 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2415 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2416 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2417 bool IsXBinopExpr);
2418
2419 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2420 ///
2421 /// \Return The instruction
2422 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2423 AtomicRMWInst::BinOp RMWOp);
2424
2425public:
2426 /// a struct to pack relevant information while generating atomic Ops
2427 struct AtomicOpValue {
2428 Value *Var = nullptr;
2429 Type *ElemTy = nullptr;
2430 bool IsSigned = false;
2431 bool IsVolatile = false;
2432 };
2433
2434 /// Emit atomic Read for : V = X --- Only Scalar data types.
2435 ///
2436 /// \param Loc The insert and source location description.
2437 /// \param X The target pointer to be atomically read
2438 /// \param V Memory address where to store atomically read
2439 /// value
2440 /// \param AO Atomic ordering of the generated atomic
2441 /// instructions.
2442 ///
2443 /// \return Insertion point after generated atomic read IR.
2444 InsertPointTy createAtomicRead(const LocationDescription &Loc,
2445 AtomicOpValue &X, AtomicOpValue &V,
2446 AtomicOrdering AO);
2447
2448 /// Emit atomic write for : X = Expr --- Only Scalar data types.
2449 ///
2450 /// \param Loc The insert and source location description.
2451 /// \param X The target pointer to be atomically written to
2452 /// \param Expr The value to store.
2453 /// \param AO Atomic ordering of the generated atomic
2454 /// instructions.
2455 ///
2456 /// \return Insertion point after generated atomic Write IR.
2457 InsertPointTy createAtomicWrite(const LocationDescription &Loc,
2458 AtomicOpValue &X, Value *Expr,
2459 AtomicOrdering AO);
2460
2461 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2462 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2463 /// Only Scalar data types.
2464 ///
2465 /// \param Loc The insert and source location description.
2466 /// \param AllocaIP The insertion point to be used for alloca instructions.
2467 /// \param X The target atomic pointer to be updated
2468 /// \param Expr The value to update X with.
2469 /// \param AO Atomic ordering of the generated atomic instructions.
2470 /// \param RMWOp The binary operation used for update. If operation
2471 /// is not supported by atomicRMW, or belong to
2472 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2473 /// atomic will be generated.
2474 /// \param UpdateOp Code generator for complex expressions that cannot be
2475 /// expressed through atomicrmw instruction.
2476 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2477 /// update expression, false otherwise.
2478 /// (e.g. true for X = X BinOp Expr)
2479 ///
2480 /// \return Insertion point after generated atomic update IR.
2481 InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
2482 InsertPointTy AllocaIP, AtomicOpValue &X,
2483 Value *Expr, AtomicOrdering AO,
2484 AtomicRMWInst::BinOp RMWOp,
2485 AtomicUpdateCallbackTy &UpdateOp,
2486 bool IsXBinopExpr);
2487
2488 /// Emit atomic update for constructs: --- Only Scalar data types
2489 /// V = X; X = X BinOp Expr ,
2490 /// X = X BinOp Expr; V = X,
2491 /// V = X; X = Expr BinOp X,
2492 /// X = Expr BinOp X; V = X,
2493 /// V = X; X = UpdateOp(X),
2494 /// X = UpdateOp(X); V = X,
2495 ///
2496 /// \param Loc The insert and source location description.
2497 /// \param AllocaIP The insertion point to be used for alloca instructions.
2498 /// \param X The target atomic pointer to be updated
2499 /// \param V Memory address where to store captured value
2500 /// \param Expr The value to update X with.
2501 /// \param AO Atomic ordering of the generated atomic instructions
2502 /// \param RMWOp The binary operation used for update. If
2503 /// operation is not supported by atomicRMW, or belong to
2504 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2505 /// atomic will be generated.
2506 /// \param UpdateOp Code generator for complex expressions that cannot be
2507 /// expressed through atomicrmw instruction.
2508 /// \param UpdateExpr true if X is an in place update of the form
2509 /// X = X BinOp Expr or X = Expr BinOp X
2510 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2511 /// update expression, false otherwise.
2512 /// (e.g. true for X = X BinOp Expr)
2513 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2514 /// 'v', not an updated one.
2515 ///
2516 /// \return Insertion point after generated atomic capture IR.
2517 InsertPointTy
2518 createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
2519 AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2520 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2521 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2522 bool IsPostfixUpdate, bool IsXBinopExpr);
2523
2524 /// Emit atomic compare for constructs: --- Only scalar data types
2525 /// cond-expr-stmt:
2526 /// x = x ordop expr ? expr : x;
2527 /// x = expr ordop x ? expr : x;
2528 /// x = x == e ? d : x;
2529 /// x = e == x ? d : x; (this one is not in the spec)
2530 /// cond-update-stmt:
2531 /// if (x ordop expr) { x = expr; }
2532 /// if (expr ordop x) { x = expr; }
2533 /// if (x == e) { x = d; }
2534 /// if (e == x) { x = d; } (this one is not in the spec)
2535 /// conditional-update-capture-atomic:
2536 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2537 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2538 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2539 /// IsFailOnly=true)
2540 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2541 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2542 /// IsFailOnly=true)
2543 ///
2544 /// \param Loc The insert and source location description.
2545 /// \param X The target atomic pointer to be updated.
2546 /// \param V Memory address where to store captured value (for
2547 /// compare capture only).
2548 /// \param R Memory address where to store comparison result
2549 /// (for compare capture with '==' only).
2550 /// \param E The expected value ('e') for forms that use an
2551 /// equality comparison or an expression ('expr') for
2552 /// forms that use 'ordop' (logically an atomic maximum or
2553 /// minimum).
2554 /// \param D The desired value for forms that use an equality
2555 /// comparison. If forms that use 'ordop', it should be
2556 /// \p nullptr.
2557 /// \param AO Atomic ordering of the generated atomic instructions.
2558 /// \param Op Atomic compare operation. It can only be ==, <, or >.
2559 /// \param IsXBinopExpr True if the conditional statement is in the form where
2560 /// x is on LHS. It only matters for < or >.
2561 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
2562 /// 'v', not an updated one (for compare capture
2563 /// only).
2564 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
2565 /// only when the comparison fails. This is only valid for
2566 /// the case the comparison is '=='.
2567 ///
2568 /// \return Insertion point after generated atomic capture IR.
2569 InsertPointTy
2570 createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
2571 AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
2572 AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
2573 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2574 InsertPointTy createAtomicCompare(const LocationDescription &Loc,
2575 AtomicOpValue &X, AtomicOpValue &V,
2576 AtomicOpValue &R, Value *E, Value *D,
2577 AtomicOrdering AO,
2578 omp::OMPAtomicCompareOp Op,
2579 bool IsXBinopExpr, bool IsPostfixUpdate,
2580 bool IsFailOnly, AtomicOrdering Failure);
2581
2582 /// Create the control flow structure of a canonical OpenMP loop.
2583 ///
2584 /// The emitted loop will be disconnected, i.e. no edge to the loop's
2585 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2586 /// IRBuilder location is not preserved.
2587 ///
2588 /// \param DL DebugLoc used for the instructions in the skeleton.
2589 /// \param TripCount Value to be used for the trip count.
2590 /// \param F Function in which to insert the BasicBlocks.
2591 /// \param PreInsertBefore Where to insert BBs that execute before the body,
2592 /// typically the body itself.
2593 /// \param PostInsertBefore Where to insert BBs that execute after the body.
2594 /// \param Name Base name used to derive BB
2595 /// and instruction names.
2596 ///
2597 /// \returns The CanonicalLoopInfo that represents the emitted loop.
2598 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
2599 Function *F,
2600 BasicBlock *PreInsertBefore,
2601 BasicBlock *PostInsertBefore,
2602 const Twine &Name = {});
2603 /// OMP Offload Info Metadata name string
2604 const std::string ompOffloadInfoName = "omp_offload.info";
2605
2606 /// Loads all the offload entries information from the host IR
2607 /// metadata. This function is only meant to be used with device code
2608 /// generation.
2609 ///
2610 /// \param M Module to load Metadata info from. Module passed maybe
2611 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2612 void loadOffloadInfoMetadata(Module &M);
2613
2614 /// Loads all the offload entries information from the host IR
2615 /// metadata read from the file passed in as the HostFilePath argument. This
2616 /// function is only meant to be used with device code generation.
2617 ///
2618 /// \param HostFilePath The path to the host IR file,
2619 /// used to load in offload metadata for the device, allowing host and device
2620 /// to maintain the same metadata mapping.
2621 void loadOffloadInfoMetadata(StringRef HostFilePath);
2622
2623 /// Gets (if variable with the given name already exist) or creates
2624 /// internal global variable with the specified Name. The created variable has
2625 /// linkage CommonLinkage by default and is initialized by null value.
2626 /// \param Ty Type of the global variable. If it is exist already the type
2627 /// must be the same.
2628 /// \param Name Name of the variable.
2629 GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
2630 unsigned AddressSpace = 0);
2631
2632 /// Create a global function to register OpenMP requires flags into the
2633 /// runtime, according to the `Config`.
2634 ///
2635 /// This function should be added to the list of constructors of the
2636 /// compilation unit in order to be called before other OpenMP runtime
2637 /// functions.
2638 ///
2639 /// \param Name Name of the created function.
2640 Function *createRegisterRequires(StringRef Name);
2641};
2642
2643/// Class to represented the control flow structure of an OpenMP canonical loop.
2644///
2645/// The control-flow structure is standardized for easy consumption by
2646/// directives associated with loops. For instance, the worksharing-loop
2647/// construct may change this control flow such that each loop iteration is
2648/// executed on only one thread. The constraints of a canonical loop in brief
2649/// are:
2650///
2651/// * The number of loop iterations must have been computed before entering the
2652/// loop.
2653///
2654/// * Has an (unsigned) logical induction variable that starts at zero and
2655/// increments by one.
2656///
2657/// * The loop's CFG itself has no side-effects. The OpenMP specification
2658/// itself allows side-effects, but the order in which they happen, including
2659/// how often or whether at all, is unspecified. We expect that the frontend
2660/// will emit those side-effect instructions somewhere (e.g. before the loop)
2661/// such that the CanonicalLoopInfo itself can be side-effect free.
2662///
2663/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2664/// execution of a loop body that satifies these constraints. It does NOT
2665/// represent arbitrary SESE regions that happen to contain a loop. Do not use
2666/// CanonicalLoopInfo for such purposes.
2667///
2668/// The control flow can be described as follows:
2669///
2670/// Preheader
2671/// |
2672/// /-> Header
2673/// | |
2674/// | Cond---\
2675/// | | |
2676/// | Body |
2677/// | | | |
2678/// | <...> |
2679/// | | | |
2680/// \--Latch |
2681/// |
2682/// Exit
2683/// |
2684/// After
2685///
2686/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2687/// including) and end at AfterIP (at the After's first instruction, excluding).
2688/// That is, instructions in the Preheader and After blocks (except the
2689/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2690/// side-effects. Typically, the Preheader is used to compute the loop's trip
2691/// count. The instructions from BodyIP (at the Body block's first instruction,
2692/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2693/// control and thus can have side-effects. The body block is the single entry
2694/// point into the loop body, which may contain arbitrary control flow as long
2695/// as all control paths eventually branch to the Latch block.
2696///
2697/// TODO: Consider adding another standardized BasicBlock between Body CFG and
2698/// Latch to guarantee that there is only a single edge to the latch. It would
2699/// make loop transformations easier to not needing to consider multiple
2700/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2701/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2702/// executes after each body iteration.
2703///
2704/// There must be no loop-carried dependencies through llvm::Values. This is
2705/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2706/// for the induction variable.
2707///
2708/// All code in Header, Cond, Latch and Exit (plus the terminator of the
2709/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2710/// by assertOK(). They are expected to not be modified unless explicitly
2711/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2712/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2713/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2714/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2715/// anymore as its underlying control flow may not exist anymore.
2716/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2717/// may also return a new CanonicalLoopInfo that can be passed to other
2718/// loop-associated construct implementing methods. These loop-transforming
2719/// methods may either create a new CanonicalLoopInfo usually using
2720/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2721/// modify one of the input CanonicalLoopInfo and return it as representing the
2722/// modified loop. What is done is an implementation detail of
2723/// transformation-implementing method and callers should always assume that the
2724/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2725/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2726/// created by createCanonicalLoop, such that transforming methods do not have
2727/// to special case where the CanonicalLoopInfo originated from.
2728///
2729/// Generally, methods consuming CanonicalLoopInfo do not need an
2730/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2731/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2732/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2733/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2734/// any InsertPoint in the Preheader, After or Block can still be used after
2735/// calling such a method.
2736///
2737/// TODO: Provide mechanisms for exception handling and cancellation points.
2738///
2739/// Defined outside OpenMPIRBuilder because nested classes cannot be
2740/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2741class CanonicalLoopInfo {
2742 friend class OpenMPIRBuilder;
2743
2744private:
2745 BasicBlock *Header = nullptr;
2746 BasicBlock *Cond = nullptr;
2747 BasicBlock *Latch = nullptr;
2748 BasicBlock *Exit = nullptr;
2749
2750 /// Add the control blocks of this loop to \p BBs.
2751 ///
2752 /// This does not include any block from the body, including the one returned
2753 /// by getBody().
2754 ///
2755 /// FIXME: This currently includes the Preheader and After blocks even though
2756 /// their content is (mostly) not under CanonicalLoopInfo's control.
2757 /// Re-evaluated whether this makes sense.
2758 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2759
2760 /// Sets the number of loop iterations to the given value. This value must be
2761 /// valid in the condition block (i.e., defined in the preheader) and is
2762 /// interpreted as an unsigned integer.
2763 void setTripCount(Value *TripCount);
2764
2765 /// Replace all uses of the canonical induction variable in the loop body with
2766 /// a new one.
2767 ///
2768 /// The intended use case is to update the induction variable for an updated
2769 /// iteration space such that it can stay normalized in the 0...tripcount-1
2770 /// range.
2771 ///
2772 /// The \p Updater is called with the (presumable updated) current normalized
2773 /// induction variable and is expected to return the value that uses of the
2774 /// pre-updated induction values should use instead, typically dependent on
2775 /// the new induction variable. This is a lambda (instead of e.g. just passing
2776 /// the new value) to be able to distinguish the uses of the pre-updated
2777 /// induction variable and uses of the induction varible to compute the
2778 /// updated induction variable value.
2779 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2780
2781public:
2782 /// Returns whether this object currently represents the IR of a loop. If
2783 /// returning false, it may have been consumed by a loop transformation or not
2784 /// been intialized. Do not use in this case;
2785 bool isValid() const { return Header; }
2786
2787 /// The preheader ensures that there is only a single edge entering the loop.
2788 /// Code that must be execute before any loop iteration can be emitted here,
2789 /// such as computing the loop trip count and begin lifetime markers. Code in
2790 /// the preheader is not considered part of the canonical loop.
2791 BasicBlock *getPreheader() const;
2792
2793 /// The header is the entry for each iteration. In the canonical control flow,
2794 /// it only contains the PHINode for the induction variable.
2795 BasicBlock *getHeader() const {
2796 assert(isValid() && "Requires a valid canonical loop");
2797 return Header;
2798 }
2799
2800 /// The condition block computes whether there is another loop iteration. If
2801 /// yes, branches to the body; otherwise to the exit block.
2802 BasicBlock *getCond() const {
2803 assert(isValid() && "Requires a valid canonical loop");
2804 return Cond;
2805 }
2806
2807 /// The body block is the single entry for a loop iteration and not controlled
2808 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2809 /// eventually branch to the \p Latch block.
2810 BasicBlock *getBody() const {
2811 assert(isValid() && "Requires a valid canonical loop");
2812 return cast<BranchInst>(Val: Cond->getTerminator())->getSuccessor(i: 0);
2813 }
2814
2815 /// Reaching the latch indicates the end of the loop body code. In the
2816 /// canonical control flow, it only contains the increment of the induction
2817 /// variable.
2818 BasicBlock *getLatch() const {
2819 assert(isValid() && "Requires a valid canonical loop");
2820 return Latch;
2821 }
2822
2823 /// Reaching the exit indicates no more iterations are being executed.
2824 BasicBlock *getExit() const {
2825 assert(isValid() && "Requires a valid canonical loop");
2826 return Exit;
2827 }
2828
2829 /// The after block is intended for clean-up code such as lifetime end
2830 /// markers. It is separate from the exit block to ensure, analogous to the
2831 /// preheader, it having just a single entry edge and being free from PHI
2832 /// nodes should there be multiple loop exits (such as from break
2833 /// statements/cancellations).
2834 BasicBlock *getAfter() const {
2835 assert(isValid() && "Requires a valid canonical loop");
2836 return Exit->getSingleSuccessor();
2837 }
2838
2839 /// Returns the llvm::Value containing the number of loop iterations. It must
2840 /// be valid in the preheader and always interpreted as an unsigned integer of
2841 /// any bit-width.
2842 Value *getTripCount() const {
2843 assert(isValid() && "Requires a valid canonical loop");
2844 Instruction *CmpI = &Cond->front();
2845 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2846 return CmpI->getOperand(i: 1);
2847 }
2848
2849 /// Returns the instruction representing the current logical induction
2850 /// variable. Always unsigned, always starting at 0 with an increment of one.
2851 Instruction *getIndVar() const {
2852 assert(isValid() && "Requires a valid canonical loop");
2853 Instruction *IndVarPHI = &Header->front();
2854 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2855 return IndVarPHI;
2856 }
2857
2858 /// Return the type of the induction variable (and the trip count).
2859 Type *getIndVarType() const {
2860 assert(isValid() && "Requires a valid canonical loop");
2861 return getIndVar()->getType();
2862 }
2863
2864 /// Return the insertion point for user code before the loop.
2865 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
2866 assert(isValid() && "Requires a valid canonical loop");
2867 BasicBlock *Preheader = getPreheader();
2868 return {Preheader, std::prev(x: Preheader->end())};
2869 };
2870
2871 /// Return the insertion point for user code in the body.
2872 OpenMPIRBuilder::InsertPointTy getBodyIP() const {
2873 assert(isValid() && "Requires a valid canonical loop");
2874 BasicBlock *Body = getBody();
2875 return {Body, Body->begin()};
2876 };
2877
2878 /// Return the insertion point for user code after the loop.
2879 OpenMPIRBuilder::InsertPointTy getAfterIP() const {
2880 assert(isValid() && "Requires a valid canonical loop");
2881 BasicBlock *After = getAfter();
2882 return {After, After->begin()};
2883 };
2884
2885 Function *getFunction() const {
2886 assert(isValid() && "Requires a valid canonical loop");
2887 return Header->getParent();
2888 }
2889
2890 /// Consistency self-check.
2891 void assertOK() const;
2892
2893 /// Invalidate this loop. That is, the underlying IR does not fulfill the
2894 /// requirements of an OpenMP canonical loop anymore.
2895 void invalidate();
2896};
2897
2898} // end namespace llvm
2899
2900#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
2901

source code of llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h