1 | //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the OpenMPIRBuilder class and helpers used as a convenient |
10 | // way to create LLVM instructions for OpenMP directives. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
15 | #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
16 | |
17 | #include "llvm/Analysis/MemorySSAUpdater.h" |
18 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
19 | #include "llvm/IR/DebugLoc.h" |
20 | #include "llvm/IR/IRBuilder.h" |
21 | #include "llvm/Support/Allocator.h" |
22 | #include "llvm/TargetParser/Triple.h" |
23 | #include <forward_list> |
24 | #include <map> |
25 | #include <optional> |
26 | |
27 | namespace llvm { |
28 | class CanonicalLoopInfo; |
29 | struct TargetRegionEntryInfo; |
30 | class OffloadEntriesInfoManager; |
31 | class OpenMPIRBuilder; |
32 | |
33 | /// Move the instruction after an InsertPoint to the beginning of another |
34 | /// BasicBlock. |
35 | /// |
36 | /// The instructions after \p IP are moved to the beginning of \p New which must |
37 | /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to |
38 | /// \p New will be added such that there is no semantic change. Otherwise, the |
39 | /// \p IP insert block remains degenerate and it is up to the caller to insert a |
40 | /// terminator. |
41 | void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, |
42 | bool CreateBranch); |
43 | |
44 | /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new |
45 | /// insert location will stick to after the instruction before the insertion |
46 | /// point (instead of moving with the instruction the InsertPoint stores |
47 | /// internally). |
48 | void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); |
49 | |
50 | /// Split a BasicBlock at an InsertPoint, even if the block is degenerate |
51 | /// (missing the terminator). |
52 | /// |
53 | /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed |
54 | /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch |
55 | /// is true, a branch to the new successor will new created such that |
56 | /// semantically there is no change; otherwise the block of the insertion point |
57 | /// remains degenerate and it is the caller's responsibility to insert a |
58 | /// terminator. Returns the new successor block. |
59 | BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, |
60 | llvm::Twine Name = {}); |
61 | |
62 | /// Split a BasicBlock at \p Builder's insertion point, even if the block is |
63 | /// degenerate (missing the terminator). Its new insert location will stick to |
64 | /// after the instruction before the insertion point (instead of moving with the |
65 | /// instruction the InsertPoint stores internally). |
66 | BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, |
67 | llvm::Twine Name = {}); |
68 | |
69 | /// Split a BasicBlock at \p Builder's insertion point, even if the block is |
70 | /// degenerate (missing the terminator). Its new insert location will stick to |
71 | /// after the instruction before the insertion point (instead of moving with the |
72 | /// instruction the InsertPoint stores internally). |
73 | BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); |
74 | |
75 | /// Like splitBB, but reuses the current block's name for the new name. |
76 | BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, |
77 | llvm::Twine Suffix = ".split" ); |
78 | |
79 | /// Captures attributes that affect generating LLVM-IR using the |
80 | /// OpenMPIRBuilder and related classes. Note that not all attributes are |
81 | /// required for all classes or functions. In some use cases the configuration |
82 | /// is not necessary at all, because because the only functions that are called |
83 | /// are ones that are not dependent on the configuration. |
84 | class OpenMPIRBuilderConfig { |
85 | public: |
86 | /// Flag to define whether to generate code for the role of the OpenMP host |
87 | /// (if set to false) or device (if set to true) in an offloading context. It |
88 | /// is set when the -fopenmp-is-target-device compiler frontend option is |
89 | /// specified. |
90 | std::optional<bool> IsTargetDevice; |
91 | |
92 | /// Flag for specifying if the compilation is done for an accelerator. It is |
93 | /// set according to the architecture of the target triple and currently only |
94 | /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform |
95 | /// the role of an OpenMP target device, so `IsTargetDevice` must also be true |
96 | /// if `IsGPU` is true. This restriction might be lifted if an accelerator- |
97 | /// like target with the ability to work as the OpenMP host is added, or if |
98 | /// the capabilities of the currently supported GPU architectures are |
99 | /// expanded. |
100 | std::optional<bool> IsGPU; |
101 | |
102 | // Flag for specifying if offloading is mandatory. |
103 | std::optional<bool> OpenMPOffloadMandatory; |
104 | |
105 | /// First separator used between the initial two parts of a name. |
106 | std::optional<StringRef> FirstSeparator; |
107 | /// Separator used between all of the rest consecutive parts of s name |
108 | std::optional<StringRef> Separator; |
109 | |
110 | OpenMPIRBuilderConfig(); |
111 | OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU, |
112 | bool OpenMPOffloadMandatory, |
113 | bool HasRequiresReverseOffload, |
114 | bool HasRequiresUnifiedAddress, |
115 | bool HasRequiresUnifiedSharedMemory, |
116 | bool HasRequiresDynamicAllocators); |
117 | |
118 | // Getters functions that assert if the required values are not present. |
119 | bool isTargetDevice() const { |
120 | assert(IsTargetDevice.has_value() && "IsTargetDevice is not set" ); |
121 | return *IsTargetDevice; |
122 | } |
123 | |
124 | bool isGPU() const { |
125 | assert(IsGPU.has_value() && "IsGPU is not set" ); |
126 | return *IsGPU; |
127 | } |
128 | |
129 | bool openMPOffloadMandatory() const { |
130 | assert(OpenMPOffloadMandatory.has_value() && |
131 | "OpenMPOffloadMandatory is not set" ); |
132 | return *OpenMPOffloadMandatory; |
133 | } |
134 | |
135 | bool hasRequiresFlags() const { return RequiresFlags; } |
136 | bool hasRequiresReverseOffload() const; |
137 | bool hasRequiresUnifiedAddress() const; |
138 | bool hasRequiresUnifiedSharedMemory() const; |
139 | bool hasRequiresDynamicAllocators() const; |
140 | |
141 | /// Returns requires directive clauses as flags compatible with those expected |
142 | /// by libomptarget. |
143 | int64_t getRequiresFlags() const; |
144 | |
145 | // Returns the FirstSeparator if set, otherwise use the default separator |
146 | // depending on isGPU |
147 | StringRef firstSeparator() const { |
148 | if (FirstSeparator.has_value()) |
149 | return *FirstSeparator; |
150 | if (isGPU()) |
151 | return "_" ; |
152 | return "." ; |
153 | } |
154 | |
155 | // Returns the Separator if set, otherwise use the default separator depending |
156 | // on isGPU |
157 | StringRef separator() const { |
158 | if (Separator.has_value()) |
159 | return *Separator; |
160 | if (isGPU()) |
161 | return "$" ; |
162 | return "." ; |
163 | } |
164 | |
165 | void setIsTargetDevice(bool Value) { IsTargetDevice = Value; } |
166 | void setIsGPU(bool Value) { IsGPU = Value; } |
167 | void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; } |
168 | void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } |
169 | void setSeparator(StringRef S) { Separator = S; } |
170 | |
171 | void setHasRequiresReverseOffload(bool Value); |
172 | void setHasRequiresUnifiedAddress(bool Value); |
173 | void setHasRequiresUnifiedSharedMemory(bool Value); |
174 | void setHasRequiresDynamicAllocators(bool Value); |
175 | |
176 | private: |
177 | /// Flags for specifying which requires directive clauses are present. |
178 | int64_t RequiresFlags; |
179 | }; |
180 | |
181 | /// Data structure to contain the information needed to uniquely identify |
182 | /// a target entry. |
183 | struct TargetRegionEntryInfo { |
184 | std::string ParentName; |
185 | unsigned DeviceID; |
186 | unsigned FileID; |
187 | unsigned Line; |
188 | unsigned Count; |
189 | |
190 | TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {} |
191 | TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, |
192 | unsigned FileID, unsigned Line, unsigned Count = 0) |
193 | : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), |
194 | Count(Count) {} |
195 | |
196 | static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, |
197 | StringRef ParentName, |
198 | unsigned DeviceID, unsigned FileID, |
199 | unsigned Line, unsigned Count); |
200 | |
201 | bool operator<(const TargetRegionEntryInfo RHS) const { |
202 | return std::make_tuple(args: ParentName, args: DeviceID, args: FileID, args: Line, args: Count) < |
203 | std::make_tuple(args: RHS.ParentName, args: RHS.DeviceID, args: RHS.FileID, args: RHS.Line, |
204 | args: RHS.Count); |
205 | } |
206 | }; |
207 | |
208 | /// Class that manages information about offload code regions and data |
209 | class OffloadEntriesInfoManager { |
210 | /// Number of entries registered so far. |
211 | OpenMPIRBuilder *OMPBuilder; |
212 | unsigned OffloadingEntriesNum = 0; |
213 | |
214 | public: |
215 | /// Base class of the entries info. |
216 | class OffloadEntryInfo { |
217 | public: |
218 | /// Kind of a given entry. |
219 | enum OffloadingEntryInfoKinds : unsigned { |
220 | /// Entry is a target region. |
221 | OffloadingEntryInfoTargetRegion = 0, |
222 | /// Entry is a declare target variable. |
223 | OffloadingEntryInfoDeviceGlobalVar = 1, |
224 | /// Invalid entry info. |
225 | OffloadingEntryInfoInvalid = ~0u |
226 | }; |
227 | |
228 | protected: |
229 | OffloadEntryInfo() = delete; |
230 | explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} |
231 | explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, |
232 | uint32_t Flags) |
233 | : Flags(Flags), Order(Order), Kind(Kind) {} |
234 | ~OffloadEntryInfo() = default; |
235 | |
236 | public: |
237 | bool isValid() const { return Order != ~0u; } |
238 | unsigned getOrder() const { return Order; } |
239 | OffloadingEntryInfoKinds getKind() const { return Kind; } |
240 | uint32_t getFlags() const { return Flags; } |
241 | void setFlags(uint32_t NewFlags) { Flags = NewFlags; } |
242 | Constant *getAddress() const { return cast_or_null<Constant>(Val: Addr); } |
243 | void setAddress(Constant *V) { |
244 | assert(!Addr.pointsToAliveValue() && "Address has been set before!" ); |
245 | Addr = V; |
246 | } |
247 | static bool classof(const OffloadEntryInfo *Info) { return true; } |
248 | |
249 | private: |
250 | /// Address of the entity that has to be mapped for offloading. |
251 | WeakTrackingVH Addr; |
252 | |
253 | /// Flags associated with the device global. |
254 | uint32_t Flags = 0u; |
255 | |
256 | /// Order this entry was emitted. |
257 | unsigned Order = ~0u; |
258 | |
259 | OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; |
260 | }; |
261 | |
262 | /// Return true if a there are no entries defined. |
263 | bool empty() const; |
264 | /// Return number of entries defined so far. |
265 | unsigned size() const { return OffloadingEntriesNum; } |
266 | |
267 | OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {} |
268 | |
269 | // |
270 | // Target region entries related. |
271 | // |
272 | |
273 | /// Kind of the target registry entry. |
274 | enum OMPTargetRegionEntryKind : uint32_t { |
275 | /// Mark the entry as target region. |
276 | OMPTargetRegionEntryTargetRegion = 0x0, |
277 | }; |
278 | |
279 | /// Target region entries info. |
280 | class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { |
281 | /// Address that can be used as the ID of the entry. |
282 | Constant *ID = nullptr; |
283 | |
284 | public: |
285 | OffloadEntryInfoTargetRegion() |
286 | : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} |
287 | explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, |
288 | Constant *ID, |
289 | OMPTargetRegionEntryKind Flags) |
290 | : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), |
291 | ID(ID) { |
292 | setAddress(Addr); |
293 | } |
294 | |
295 | Constant *getID() const { return ID; } |
296 | void setID(Constant *V) { |
297 | assert(!ID && "ID has been set before!" ); |
298 | ID = V; |
299 | } |
300 | static bool classof(const OffloadEntryInfo *Info) { |
301 | return Info->getKind() == OffloadingEntryInfoTargetRegion; |
302 | } |
303 | }; |
304 | |
305 | /// Initialize target region entry. |
306 | /// This is ONLY needed for DEVICE compilation. |
307 | void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, |
308 | unsigned Order); |
309 | /// Register target region entry. |
310 | void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, |
311 | Constant *Addr, Constant *ID, |
312 | OMPTargetRegionEntryKind Flags); |
313 | /// Return true if a target region entry with the provided information |
314 | /// exists. |
315 | bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, |
316 | bool IgnoreAddressId = false) const; |
317 | |
318 | // Return the Name based on \a EntryInfo using the next available Count. |
319 | void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, |
320 | const TargetRegionEntryInfo &EntryInfo); |
321 | |
322 | /// brief Applies action \a Action on all registered entries. |
323 | typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo, |
324 | const OffloadEntryInfoTargetRegion &)> |
325 | OffloadTargetRegionEntryInfoActTy; |
326 | void |
327 | actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); |
328 | |
329 | // |
330 | // Device global variable entries related. |
331 | // |
332 | |
333 | /// Kind of the global variable entry.. |
334 | enum OMPTargetGlobalVarEntryKind : uint32_t { |
335 | /// Mark the entry as a to declare target. |
336 | OMPTargetGlobalVarEntryTo = 0x0, |
337 | /// Mark the entry as a to declare target link. |
338 | OMPTargetGlobalVarEntryLink = 0x1, |
339 | /// Mark the entry as a declare target enter. |
340 | OMPTargetGlobalVarEntryEnter = 0x2, |
341 | /// Mark the entry as having no declare target entry kind. |
342 | OMPTargetGlobalVarEntryNone = 0x3, |
343 | /// Mark the entry as a declare target indirect global. |
344 | OMPTargetGlobalVarEntryIndirect = 0x8, |
345 | }; |
346 | |
347 | /// Kind of device clause for declare target variables |
348 | /// and functions |
349 | /// NOTE: Currently not used as a part of a variable entry |
350 | /// used for Flang and Clang to interface with the variable |
351 | /// related registration functions |
352 | enum OMPTargetDeviceClauseKind : uint32_t { |
353 | /// The target is marked for all devices |
354 | OMPTargetDeviceClauseAny = 0x0, |
355 | /// The target is marked for non-host devices |
356 | OMPTargetDeviceClauseNoHost = 0x1, |
357 | /// The target is marked for host devices |
358 | OMPTargetDeviceClauseHost = 0x2, |
359 | /// The target is marked as having no clause |
360 | OMPTargetDeviceClauseNone = 0x3 |
361 | }; |
362 | |
363 | /// Device global variable entries info. |
364 | class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { |
365 | /// Type of the global variable. |
366 | int64_t VarSize; |
367 | GlobalValue::LinkageTypes Linkage; |
368 | const std::string VarName; |
369 | |
370 | public: |
371 | OffloadEntryInfoDeviceGlobalVar() |
372 | : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} |
373 | explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, |
374 | OMPTargetGlobalVarEntryKind Flags) |
375 | : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} |
376 | explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, |
377 | int64_t VarSize, |
378 | OMPTargetGlobalVarEntryKind Flags, |
379 | GlobalValue::LinkageTypes Linkage, |
380 | const std::string &VarName) |
381 | : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), |
382 | VarSize(VarSize), Linkage(Linkage), VarName(VarName) { |
383 | setAddress(Addr); |
384 | } |
385 | |
386 | int64_t getVarSize() const { return VarSize; } |
387 | StringRef getVarName() const { return VarName; } |
388 | void setVarSize(int64_t Size) { VarSize = Size; } |
389 | GlobalValue::LinkageTypes getLinkage() const { return Linkage; } |
390 | void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } |
391 | static bool classof(const OffloadEntryInfo *Info) { |
392 | return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; |
393 | } |
394 | }; |
395 | |
396 | /// Initialize device global variable entry. |
397 | /// This is ONLY used for DEVICE compilation. |
398 | void initializeDeviceGlobalVarEntryInfo(StringRef Name, |
399 | OMPTargetGlobalVarEntryKind Flags, |
400 | unsigned Order); |
401 | |
402 | /// Register device global variable entry. |
403 | void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, |
404 | int64_t VarSize, |
405 | OMPTargetGlobalVarEntryKind Flags, |
406 | GlobalValue::LinkageTypes Linkage); |
407 | /// Checks if the variable with the given name has been registered already. |
408 | bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { |
409 | return OffloadEntriesDeviceGlobalVar.count(Key: VarName) > 0; |
410 | } |
411 | /// Applies action \a Action on all registered entries. |
412 | typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> |
413 | OffloadDeviceGlobalVarEntryInfoActTy; |
414 | void actOnDeviceGlobalVarEntriesInfo( |
415 | const OffloadDeviceGlobalVarEntryInfoActTy &Action); |
416 | |
417 | private: |
418 | /// Return the count of entries at a particular source location. |
419 | unsigned |
420 | getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; |
421 | |
422 | /// Update the count of entries at a particular source location. |
423 | void |
424 | incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); |
425 | |
426 | static TargetRegionEntryInfo |
427 | getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { |
428 | return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, |
429 | EntryInfo.FileID, EntryInfo.Line, 0); |
430 | } |
431 | |
432 | // Count of entries at a location. |
433 | std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount; |
434 | |
435 | // Storage for target region entries kind. |
436 | typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion> |
437 | OffloadEntriesTargetRegionTy; |
438 | OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; |
439 | /// Storage for device global variable entries kind. The storage is to be |
440 | /// indexed by mangled name. |
441 | typedef StringMap<OffloadEntryInfoDeviceGlobalVar> |
442 | OffloadEntriesDeviceGlobalVarTy; |
443 | OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; |
444 | }; |
445 | |
446 | /// An interface to create LLVM-IR for OpenMP directives. |
447 | /// |
448 | /// Each OpenMP directive has a corresponding public generator method. |
449 | class OpenMPIRBuilder { |
450 | public: |
451 | /// Create a new OpenMPIRBuilder operating on the given module \p M. This will |
452 | /// not have an effect on \p M (see initialize) |
453 | OpenMPIRBuilder(Module &M) |
454 | : M(M), Builder(M.getContext()), OffloadInfoManager(this), |
455 | T(Triple(M.getTargetTriple())) {} |
456 | ~OpenMPIRBuilder(); |
457 | |
458 | /// Initialize the internal state, this will put structures types and |
459 | /// potentially other helpers into the underlying module. Must be called |
460 | /// before any other method and only once! This internal state includes types |
461 | /// used in the OpenMPIRBuilder generated from OMPKinds.def. |
462 | void initialize(); |
463 | |
464 | void setConfig(OpenMPIRBuilderConfig C) { Config = C; } |
465 | |
466 | /// Finalize the underlying module, e.g., by outlining regions. |
467 | /// \param Fn The function to be finalized. If not used, |
468 | /// all functions are finalized. |
469 | void finalize(Function *Fn = nullptr); |
470 | |
471 | /// Add attributes known for \p FnID to \p Fn. |
472 | void addAttributes(omp::RuntimeFunction FnID, Function &Fn); |
473 | |
474 | /// Type used throughout for insertion points. |
475 | using InsertPointTy = IRBuilder<>::InsertPoint; |
476 | |
477 | /// Get the create a name using the platform specific separators. |
478 | /// \param Parts parts of the final name that needs separation |
479 | /// The created name has a first separator between the first and second part |
480 | /// and a second separator between all other parts. |
481 | /// E.g. with FirstSeparator "$" and Separator "." and |
482 | /// parts: "p1", "p2", "p3", "p4" |
483 | /// The resulting name is "p1$p2.p3.p4" |
484 | /// The separators are retrieved from the OpenMPIRBuilderConfig. |
485 | std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const; |
486 | |
487 | /// Callback type for variable finalization (think destructors). |
488 | /// |
489 | /// \param CodeGenIP is the insertion point at which the finalization code |
490 | /// should be placed. |
491 | /// |
492 | /// A finalize callback knows about all objects that need finalization, e.g. |
493 | /// destruction, when the scope of the currently generated construct is left |
494 | /// at the time, and location, the callback is invoked. |
495 | using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; |
496 | |
497 | struct FinalizationInfo { |
498 | /// The finalization callback provided by the last in-flight invocation of |
499 | /// createXXXX for the directive of kind DK. |
500 | FinalizeCallbackTy FiniCB; |
501 | |
502 | /// The directive kind of the innermost directive that has an associated |
503 | /// region which might require finalization when it is left. |
504 | omp::Directive DK; |
505 | |
506 | /// Flag to indicate if the directive is cancellable. |
507 | bool IsCancellable; |
508 | }; |
509 | |
510 | /// Push a finalization callback on the finalization stack. |
511 | /// |
512 | /// NOTE: Temporary solution until Clang CG is gone. |
513 | void pushFinalizationCB(const FinalizationInfo &FI) { |
514 | FinalizationStack.push_back(Elt: FI); |
515 | } |
516 | |
517 | /// Pop the last finalization callback from the finalization stack. |
518 | /// |
519 | /// NOTE: Temporary solution until Clang CG is gone. |
520 | void popFinalizationCB() { FinalizationStack.pop_back(); } |
521 | |
522 | /// Callback type for body (=inner region) code generation |
523 | /// |
524 | /// The callback takes code locations as arguments, each describing a |
525 | /// location where additional instructions can be inserted. |
526 | /// |
527 | /// The CodeGenIP may be in the middle of a basic block or point to the end of |
528 | /// it. The basic block may have a terminator or be degenerate. The callback |
529 | /// function may just insert instructions at that position, but also split the |
530 | /// block (without the Before argument of BasicBlock::splitBasicBlock such |
531 | /// that the identify of the split predecessor block is preserved) and insert |
532 | /// additional control flow, including branches that do not lead back to what |
533 | /// follows the CodeGenIP. Note that since the callback is allowed to split |
534 | /// the block, callers must assume that InsertPoints to positions in the |
535 | /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If |
536 | /// such InsertPoints need to be preserved, it can split the block itself |
537 | /// before calling the callback. |
538 | /// |
539 | /// AllocaIP and CodeGenIP must not point to the same position. |
540 | /// |
541 | /// \param AllocaIP is the insertion point at which new alloca instructions |
542 | /// should be placed. The BasicBlock it is pointing to must |
543 | /// not be split. |
544 | /// \param CodeGenIP is the insertion point at which the body code should be |
545 | /// placed. |
546 | using BodyGenCallbackTy = |
547 | function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; |
548 | |
549 | // This is created primarily for sections construct as llvm::function_ref |
550 | // (BodyGenCallbackTy) is not storable (as described in the comments of |
551 | // function_ref class - function_ref contains non-ownable reference |
552 | // to the callable. |
553 | using StorableBodyGenCallbackTy = |
554 | std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; |
555 | |
556 | /// Callback type for loop body code generation. |
557 | /// |
558 | /// \param CodeGenIP is the insertion point where the loop's body code must be |
559 | /// placed. This will be a dedicated BasicBlock with a |
560 | /// conditional branch from the loop condition check and |
561 | /// terminated with an unconditional branch to the loop |
562 | /// latch. |
563 | /// \param IndVar is the induction variable usable at the insertion point. |
564 | using LoopBodyGenCallbackTy = |
565 | function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; |
566 | |
567 | /// Callback type for variable privatization (think copy & default |
568 | /// constructor). |
569 | /// |
570 | /// \param AllocaIP is the insertion point at which new alloca instructions |
571 | /// should be placed. |
572 | /// \param CodeGenIP is the insertion point at which the privatization code |
573 | /// should be placed. |
574 | /// \param Original The value being copied/created, should not be used in the |
575 | /// generated IR. |
576 | /// \param Inner The equivalent of \p Original that should be used in the |
577 | /// generated IR; this is equal to \p Original if the value is |
578 | /// a pointer and can thus be passed directly, otherwise it is |
579 | /// an equivalent but different value. |
580 | /// \param ReplVal The replacement value, thus a copy or new created version |
581 | /// of \p Inner. |
582 | /// |
583 | /// \returns The new insertion point where code generation continues and |
584 | /// \p ReplVal the replacement value. |
585 | using PrivatizeCallbackTy = function_ref<InsertPointTy( |
586 | InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, |
587 | Value &Inner, Value *&ReplVal)>; |
588 | |
589 | /// Description of a LLVM-IR insertion point (IP) and a debug/source location |
590 | /// (filename, line, column, ...). |
591 | struct LocationDescription { |
592 | LocationDescription(const IRBuilderBase &IRB) |
593 | : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} |
594 | LocationDescription(const InsertPointTy &IP) : IP(IP) {} |
595 | LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) |
596 | : IP(IP), DL(DL) {} |
597 | InsertPointTy IP; |
598 | DebugLoc DL; |
599 | }; |
600 | |
601 | /// Emitter methods for OpenMP directives. |
602 | /// |
603 | ///{ |
604 | |
605 | /// Generator for '#omp barrier' |
606 | /// |
607 | /// \param Loc The location where the barrier directive was encountered. |
608 | /// \param DK The kind of directive that caused the barrier. |
609 | /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. |
610 | /// \param CheckCancelFlag Flag to indicate a cancel barrier return value |
611 | /// should be checked and acted upon. |
612 | /// |
613 | /// \returns The insertion point after the barrier. |
614 | InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, |
615 | bool ForceSimpleCall = false, |
616 | bool CheckCancelFlag = true); |
617 | |
618 | /// Generator for '#omp cancel' |
619 | /// |
620 | /// \param Loc The location where the directive was encountered. |
621 | /// \param IfCondition The evaluated 'if' clause expression, if any. |
622 | /// \param CanceledDirective The kind of directive that is cancled. |
623 | /// |
624 | /// \returns The insertion point after the barrier. |
625 | InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, |
626 | omp::Directive CanceledDirective); |
627 | |
628 | /// Generator for '#omp parallel' |
629 | /// |
630 | /// \param Loc The insert and source location description. |
631 | /// \param AllocaIP The insertion points to be used for alloca instructions. |
632 | /// \param BodyGenCB Callback that will generate the region code. |
633 | /// \param PrivCB Callback to copy a given variable (think copy constructor). |
634 | /// \param FiniCB Callback to finalize variable copies. |
635 | /// \param IfCondition The evaluated 'if' clause expression, if any. |
636 | /// \param NumThreads The evaluated 'num_threads' clause expression, if any. |
637 | /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). |
638 | /// \param IsCancellable Flag to indicate a cancellable parallel region. |
639 | /// |
640 | /// \returns The insertion position *after* the parallel. |
641 | IRBuilder<>::InsertPoint |
642 | createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, |
643 | BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, |
644 | FinalizeCallbackTy FiniCB, Value *IfCondition, |
645 | Value *NumThreads, omp::ProcBindKind ProcBind, |
646 | bool IsCancellable); |
647 | |
648 | /// Generator for the control flow structure of an OpenMP canonical loop. |
649 | /// |
650 | /// This generator operates on the logical iteration space of the loop, i.e. |
651 | /// the caller only has to provide a loop trip count of the loop as defined by |
652 | /// base language semantics. The trip count is interpreted as an unsigned |
653 | /// integer. The induction variable passed to \p BodyGenCB will be of the same |
654 | /// type and run from 0 to \p TripCount - 1. It is up to the callback to |
655 | /// convert the logical iteration variable to the loop counter variable in the |
656 | /// loop body. |
657 | /// |
658 | /// \param Loc The insert and source location description. The insert |
659 | /// location can be between two instructions or the end of a |
660 | /// degenerate block (e.g. a BB under construction). |
661 | /// \param BodyGenCB Callback that will generate the loop body code. |
662 | /// \param TripCount Number of iterations the loop body is executed. |
663 | /// \param Name Base name used to derive BB and instruction names. |
664 | /// |
665 | /// \returns An object representing the created control flow structure which |
666 | /// can be used for loop-associated directives. |
667 | CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, |
668 | LoopBodyGenCallbackTy BodyGenCB, |
669 | Value *TripCount, |
670 | const Twine &Name = "loop" ); |
671 | |
672 | /// Generator for the control flow structure of an OpenMP canonical loop. |
673 | /// |
674 | /// Instead of a logical iteration space, this allows specifying user-defined |
675 | /// loop counter values using increment, upper- and lower bounds. To |
676 | /// disambiguate the terminology when counting downwards, instead of lower |
677 | /// bounds we use \p Start for the loop counter value in the first body |
678 | /// iteration. |
679 | /// |
680 | /// Consider the following limitations: |
681 | /// |
682 | /// * A loop counter space over all integer values of its bit-width cannot be |
683 | /// represented. E.g using uint8_t, its loop trip count of 256 cannot be |
684 | /// stored into an 8 bit integer): |
685 | /// |
686 | /// DO I = 0, 255, 1 |
687 | /// |
688 | /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. |
689 | /// effectively counting downwards: |
690 | /// |
691 | /// for (uint8_t i = 100u; i > 0; i += 127u) |
692 | /// |
693 | /// |
694 | /// TODO: May need to add additional parameters to represent: |
695 | /// |
696 | /// * Allow representing downcounting with unsigned integers. |
697 | /// |
698 | /// * Sign of the step and the comparison operator might disagree: |
699 | /// |
700 | /// for (int i = 0; i < 42; i -= 1u) |
701 | /// |
702 | // |
703 | /// \param Loc The insert and source location description. |
704 | /// \param BodyGenCB Callback that will generate the loop body code. |
705 | /// \param Start Value of the loop counter for the first iterations. |
706 | /// \param Stop Loop counter values past this will stop the loop. |
707 | /// \param Step Loop counter increment after each iteration; negative |
708 | /// means counting down. |
709 | /// \param IsSigned Whether Start, Stop and Step are signed integers. |
710 | /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop |
711 | /// counter. |
712 | /// \param ComputeIP Insertion point for instructions computing the trip |
713 | /// count. Can be used to ensure the trip count is available |
714 | /// at the outermost loop of a loop nest. If not set, |
715 | /// defaults to the preheader of the generated loop. |
716 | /// \param Name Base name used to derive BB and instruction names. |
717 | /// |
718 | /// \returns An object representing the created control flow structure which |
719 | /// can be used for loop-associated directives. |
720 | CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, |
721 | LoopBodyGenCallbackTy BodyGenCB, |
722 | Value *Start, Value *Stop, Value *Step, |
723 | bool IsSigned, bool InclusiveStop, |
724 | InsertPointTy ComputeIP = {}, |
725 | const Twine &Name = "loop" ); |
726 | |
727 | /// Collapse a loop nest into a single loop. |
728 | /// |
729 | /// Merges loops of a loop nest into a single CanonicalLoopNest representation |
730 | /// that has the same number of innermost loop iterations as the origin loop |
731 | /// nest. The induction variables of the input loops are derived from the |
732 | /// collapsed loop's induction variable. This is intended to be used to |
733 | /// implement OpenMP's collapse clause. Before applying a directive, |
734 | /// collapseLoops normalizes a loop nest to contain only a single loop and the |
735 | /// directive's implementation does not need to handle multiple loops itself. |
736 | /// This does not remove the need to handle all loop nest handling by |
737 | /// directives, such as the ordered(<n>) clause or the simd schedule-clause |
738 | /// modifier of the worksharing-loop directive. |
739 | /// |
740 | /// Example: |
741 | /// \code |
742 | /// for (int i = 0; i < 7; ++i) // Canonical loop "i" |
743 | /// for (int j = 0; j < 9; ++j) // Canonical loop "j" |
744 | /// body(i, j); |
745 | /// \endcode |
746 | /// |
747 | /// After collapsing with Loops={i,j}, the loop is changed to |
748 | /// \code |
749 | /// for (int ij = 0; ij < 63; ++ij) { |
750 | /// int i = ij / 9; |
751 | /// int j = ij % 9; |
752 | /// body(i, j); |
753 | /// } |
754 | /// \endcode |
755 | /// |
756 | /// In the current implementation, the following limitations apply: |
757 | /// |
758 | /// * All input loops have an induction variable of the same type. |
759 | /// |
760 | /// * The collapsed loop will have the same trip count integer type as the |
761 | /// input loops. Therefore it is possible that the collapsed loop cannot |
762 | /// represent all iterations of the input loops. For instance, assuming a |
763 | /// 32 bit integer type, and two input loops both iterating 2^16 times, the |
764 | /// theoretical trip count of the collapsed loop would be 2^32 iteration, |
765 | /// which cannot be represented in an 32-bit integer. Behavior is undefined |
766 | /// in this case. |
767 | /// |
768 | /// * The trip counts of every input loop must be available at \p ComputeIP. |
769 | /// Non-rectangular loops are not yet supported. |
770 | /// |
771 | /// * At each nest level, code between a surrounding loop and its nested loop |
772 | /// is hoisted into the loop body, and such code will be executed more |
773 | /// often than before collapsing (or not at all if any inner loop iteration |
774 | /// has a trip count of 0). This is permitted by the OpenMP specification. |
775 | /// |
776 | /// \param DL Debug location for instructions added for collapsing, |
777 | /// such as instructions to compute/derive the input loop's |
778 | /// induction variables. |
779 | /// \param Loops Loops in the loop nest to collapse. Loops are specified |
780 | /// from outermost-to-innermost and every control flow of a |
781 | /// loop's body must pass through its directly nested loop. |
782 | /// \param ComputeIP Where additional instruction that compute the collapsed |
783 | /// trip count. If not set, defaults to before the generated |
784 | /// loop. |
785 | /// |
786 | /// \returns The CanonicalLoopInfo object representing the collapsed loop. |
787 | CanonicalLoopInfo *collapseLoops(DebugLoc DL, |
788 | ArrayRef<CanonicalLoopInfo *> Loops, |
789 | InsertPointTy ComputeIP); |
790 | |
791 | /// Get the default alignment value for given target |
792 | /// |
793 | /// \param TargetTriple Target triple |
794 | /// \param Features StringMap which describes extra CPU features |
795 | static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, |
796 | const StringMap<bool> &Features); |
797 | |
798 | /// Retrieve (or create if non-existent) the address of a declare |
799 | /// target variable, used in conjunction with registerTargetGlobalVariable |
800 | /// to create declare target global variables. |
801 | /// |
802 | /// \param CaptureClause - enumerator corresponding to the OpenMP capture |
803 | /// clause used in conjunction with the variable being registered (link, |
804 | /// to, enter). |
805 | /// \param DeviceClause - enumerator corresponding to the OpenMP capture |
806 | /// clause used in conjunction with the variable being registered (nohost, |
807 | /// host, any) |
808 | /// \param IsDeclaration - boolean stating if the variable being registered |
809 | /// is a declaration-only and not a definition |
810 | /// \param IsExternallyVisible - boolean stating if the variable is externally |
811 | /// visible |
812 | /// \param EntryInfo - Unique entry information for the value generated |
813 | /// using getTargetEntryUniqueInfo, used to name generated pointer references |
814 | /// to the declare target variable |
815 | /// \param MangledName - the mangled name of the variable being registered |
816 | /// \param GeneratedRefs - references generated by invocations of |
817 | /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar, |
818 | /// these are required by Clang for book keeping. |
819 | /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled |
820 | /// \param TargetTriple - The OpenMP device target triple we are compiling |
821 | /// for |
822 | /// \param LlvmPtrTy - The type of the variable we are generating or |
823 | /// retrieving an address for |
824 | /// \param GlobalInitializer - a lambda function which creates a constant |
825 | /// used for initializing a pointer reference to the variable in certain |
826 | /// cases. If a nullptr is passed, it will default to utilising the original |
827 | /// variable to initialize the pointer reference. |
828 | /// \param VariableLinkage - a lambda function which returns the variables |
829 | /// linkage type, if unspecified and a nullptr is given, it will instead |
830 | /// utilise the linkage stored on the existing global variable in the |
831 | /// LLVMModule. |
832 | Constant *getAddrOfDeclareTargetVar( |
833 | OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, |
834 | OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, |
835 | bool IsDeclaration, bool IsExternallyVisible, |
836 | TargetRegionEntryInfo EntryInfo, StringRef MangledName, |
837 | std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, |
838 | std::vector<Triple> TargetTriple, Type *LlvmPtrTy, |
839 | std::function<Constant *()> GlobalInitializer, |
840 | std::function<GlobalValue::LinkageTypes()> VariableLinkage); |
841 | |
842 | /// Registers a target variable for device or host. |
843 | /// |
844 | /// \param CaptureClause - enumerator corresponding to the OpenMP capture |
845 | /// clause used in conjunction with the variable being registered (link, |
846 | /// to, enter). |
847 | /// \param DeviceClause - enumerator corresponding to the OpenMP capture |
848 | /// clause used in conjunction with the variable being registered (nohost, |
849 | /// host, any) |
850 | /// \param IsDeclaration - boolean stating if the variable being registered |
851 | /// is a declaration-only and not a definition |
852 | /// \param IsExternallyVisible - boolean stating if the variable is externally |
853 | /// visible |
854 | /// \param EntryInfo - Unique entry information for the value generated |
855 | /// using getTargetEntryUniqueInfo, used to name generated pointer references |
856 | /// to the declare target variable |
857 | /// \param MangledName - the mangled name of the variable being registered |
858 | /// \param GeneratedRefs - references generated by invocations of |
859 | /// registerTargetGlobalVariable these are required by Clang for book |
860 | /// keeping. |
861 | /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled |
862 | /// \param TargetTriple - The OpenMP device target triple we are compiling |
863 | /// for |
864 | /// \param GlobalInitializer - a lambda function which creates a constant |
865 | /// used for initializing a pointer reference to the variable in certain |
866 | /// cases. If a nullptr is passed, it will default to utilising the original |
867 | /// variable to initialize the pointer reference. |
868 | /// \param VariableLinkage - a lambda function which returns the variables |
869 | /// linkage type, if unspecified and a nullptr is given, it will instead |
870 | /// utilise the linkage stored on the existing global variable in the |
871 | /// LLVMModule. |
872 | /// \param LlvmPtrTy - The type of the variable we are generating or |
873 | /// retrieving an address for |
874 | /// \param Addr - the original llvm value (addr) of the variable to be |
875 | /// registered |
876 | void registerTargetGlobalVariable( |
877 | OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, |
878 | OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, |
879 | bool IsDeclaration, bool IsExternallyVisible, |
880 | TargetRegionEntryInfo EntryInfo, StringRef MangledName, |
881 | std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD, |
882 | std::vector<Triple> TargetTriple, |
883 | std::function<Constant *()> GlobalInitializer, |
884 | std::function<GlobalValue::LinkageTypes()> VariableLinkage, |
885 | Type *LlvmPtrTy, Constant *Addr); |
886 | |
887 | /// Get the offset of the OMP_MAP_MEMBER_OF field. |
888 | unsigned getFlagMemberOffset(); |
889 | |
890 | /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on |
891 | /// the position given. |
892 | /// \param Position - A value indicating the position of the parent |
893 | /// of the member in the kernel argument structure, often retrieved |
894 | /// by the parents position in the combined information vectors used |
895 | /// to generate the structure itself. Multiple children (member's of) |
896 | /// with the same parent will use the same returned member flag. |
897 | omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position); |
898 | |
899 | /// Given an initial flag set, this function modifies it to contain |
900 | /// the passed in MemberOfFlag generated from the getMemberOfFlag |
901 | /// function. The results are dependent on the existing flag bits |
902 | /// set in the original flag set. |
903 | /// \param Flags - The original set of flags to be modified with the |
904 | /// passed in MemberOfFlag. |
905 | /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted |
906 | /// slightly based on the getMemberOfFlag which adjusts the flag bits |
907 | /// based on the members position in its parent. |
908 | void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, |
909 | omp::OpenMPOffloadMappingFlags MemberOfFlag); |
910 | |
911 | private: |
912 | /// Modifies the canonical loop to be a statically-scheduled workshare loop |
913 | /// which is executed on the device |
914 | /// |
915 | /// This takes a \p CLI representing a canonical loop, such as the one |
916 | /// created by \see createCanonicalLoop and emits additional instructions to |
917 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
918 | /// runtime function in the preheader to call OpenMP device rtl function |
919 | /// which handles worksharing of loop body interations. |
920 | /// |
921 | /// \param DL Debug location for instructions added for the |
922 | /// workshare-loop construct itself. |
923 | /// \param CLI A descriptor of the canonical loop to workshare. |
924 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
925 | /// preheader of the loop. |
926 | /// \param LoopType Information about type of loop worksharing. |
927 | /// It corresponds to type of loop workshare OpenMP pragma. |
928 | /// |
929 | /// \returns Point where to insert code after the workshare construct. |
930 | InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI, |
931 | InsertPointTy AllocaIP, |
932 | omp::WorksharingLoopType LoopType); |
933 | |
934 | /// Modifies the canonical loop to be a statically-scheduled workshare loop. |
935 | /// |
936 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
937 | /// created by \p createCanonicalLoop and emits additional instructions to |
938 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
939 | /// runtime function in the preheader to obtain the loop bounds to be used in |
940 | /// the current thread, updates the relevant instructions in the canonical |
941 | /// loop and calls to an OpenMP runtime finalization function after the loop. |
942 | /// |
943 | /// \param DL Debug location for instructions added for the |
944 | /// workshare-loop construct itself. |
945 | /// \param CLI A descriptor of the canonical loop to workshare. |
946 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
947 | /// preheader of the loop. |
948 | /// \param NeedsBarrier Indicates whether a barrier must be inserted after |
949 | /// the loop. |
950 | /// |
951 | /// \returns Point where to insert code after the workshare construct. |
952 | InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, |
953 | InsertPointTy AllocaIP, |
954 | bool NeedsBarrier); |
955 | |
956 | /// Modifies the canonical loop a statically-scheduled workshare loop with a |
957 | /// user-specified chunk size. |
958 | /// |
959 | /// \param DL Debug location for instructions added for the |
960 | /// workshare-loop construct itself. |
961 | /// \param CLI A descriptor of the canonical loop to workshare. |
962 | /// \param AllocaIP An insertion point for Alloca instructions usable in |
963 | /// the preheader of the loop. |
964 | /// \param NeedsBarrier Indicates whether a barrier must be inserted after the |
965 | /// loop. |
966 | /// \param ChunkSize The user-specified chunk size. |
967 | /// |
968 | /// \returns Point where to insert code after the workshare construct. |
969 | InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, |
970 | CanonicalLoopInfo *CLI, |
971 | InsertPointTy AllocaIP, |
972 | bool NeedsBarrier, |
973 | Value *ChunkSize); |
974 | |
975 | /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. |
976 | /// |
977 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
978 | /// created by \p createCanonicalLoop and emits additional instructions to |
979 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
980 | /// runtime function in the preheader to obtain, and then in each iteration |
981 | /// to update the loop counter. |
982 | /// |
983 | /// \param DL Debug location for instructions added for the |
984 | /// workshare-loop construct itself. |
985 | /// \param CLI A descriptor of the canonical loop to workshare. |
986 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
987 | /// preheader of the loop. |
988 | /// \param SchedType Type of scheduling to be passed to the init function. |
989 | /// \param NeedsBarrier Indicates whether a barrier must be insterted after |
990 | /// the loop. |
991 | /// \param Chunk The size of loop chunk considered as a unit when |
992 | /// scheduling. If \p nullptr, defaults to 1. |
993 | /// |
994 | /// \returns Point where to insert code after the workshare construct. |
995 | InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, |
996 | InsertPointTy AllocaIP, |
997 | omp::OMPScheduleType SchedType, |
998 | bool NeedsBarrier, |
999 | Value *Chunk = nullptr); |
1000 | |
1001 | /// Create alternative version of the loop to support if clause |
1002 | /// |
1003 | /// OpenMP if clause can require to generate second loop. This loop |
1004 | /// will be executed when if clause condition is not met. createIfVersion |
1005 | /// adds branch instruction to the copied loop if \p ifCond is not met. |
1006 | /// |
1007 | /// \param Loop Original loop which should be versioned. |
1008 | /// \param IfCond Value which corresponds to if clause condition |
1009 | /// \param VMap Value to value map to define relation between |
1010 | /// original and copied loop values and loop blocks. |
1011 | /// \param NamePrefix Optional name prefix for if.then if.else blocks. |
1012 | void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond, |
1013 | ValueToValueMapTy &VMap, const Twine &NamePrefix = "" ); |
1014 | |
1015 | public: |
1016 | /// Modifies the canonical loop to be a workshare loop. |
1017 | /// |
1018 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
1019 | /// created by \p createCanonicalLoop and emits additional instructions to |
1020 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
1021 | /// runtime function in the preheader to obtain the loop bounds to be used in |
1022 | /// the current thread, updates the relevant instructions in the canonical |
1023 | /// loop and calls to an OpenMP runtime finalization function after the loop. |
1024 | /// |
1025 | /// The concrete transformation is done by applyStaticWorkshareLoop, |
1026 | /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending |
1027 | /// on the value of \p SchedKind and \p ChunkSize. |
1028 | /// |
1029 | /// \param DL Debug location for instructions added for the |
1030 | /// workshare-loop construct itself. |
1031 | /// \param CLI A descriptor of the canonical loop to workshare. |
1032 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
1033 | /// preheader of the loop. |
1034 | /// \param NeedsBarrier Indicates whether a barrier must be insterted after |
1035 | /// the loop. |
1036 | /// \param SchedKind Scheduling algorithm to use. |
1037 | /// \param ChunkSize The chunk size for the inner loop. |
1038 | /// \param HasSimdModifier Whether the simd modifier is present in the |
1039 | /// schedule clause. |
1040 | /// \param HasMonotonicModifier Whether the monotonic modifier is present in |
1041 | /// the schedule clause. |
1042 | /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is |
1043 | /// present in the schedule clause. |
1044 | /// \param HasOrderedClause Whether the (parameterless) ordered clause is |
1045 | /// present. |
1046 | /// \param LoopType Information about type of loop worksharing. |
1047 | /// It corresponds to type of loop workshare OpenMP pragma. |
1048 | /// |
1049 | /// \returns Point where to insert code after the workshare construct. |
1050 | InsertPointTy applyWorkshareLoop( |
1051 | DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, |
1052 | bool NeedsBarrier, |
1053 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, |
1054 | Value *ChunkSize = nullptr, bool HasSimdModifier = false, |
1055 | bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, |
1056 | bool HasOrderedClause = false, |
1057 | omp::WorksharingLoopType LoopType = |
1058 | omp::WorksharingLoopType::ForStaticLoop); |
1059 | |
1060 | /// Tile a loop nest. |
1061 | /// |
1062 | /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in |
1063 | /// \p/ Loops must be perfectly nested, from outermost to innermost loop |
1064 | /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value |
1065 | /// of every loop and every tile sizes must be usable in the outermost |
1066 | /// loop's preheader. This implies that the loop nest is rectangular. |
1067 | /// |
1068 | /// Example: |
1069 | /// \code |
1070 | /// for (int i = 0; i < 15; ++i) // Canonical loop "i" |
1071 | /// for (int j = 0; j < 14; ++j) // Canonical loop "j" |
1072 | /// body(i, j); |
1073 | /// \endcode |
1074 | /// |
1075 | /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to |
1076 | /// \code |
1077 | /// for (int i1 = 0; i1 < 3; ++i1) |
1078 | /// for (int j1 = 0; j1 < 2; ++j1) |
1079 | /// for (int i2 = 0; i2 < 5; ++i2) |
1080 | /// for (int j2 = 0; j2 < 7; ++j2) |
1081 | /// body(i1*3+i2, j1*3+j2); |
1082 | /// \endcode |
1083 | /// |
1084 | /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are |
1085 | /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also |
1086 | /// handles non-constant trip counts, non-constant tile sizes and trip counts |
1087 | /// that are not multiples of the tile size. In the latter case the tile loop |
1088 | /// of the last floor-loop iteration will have fewer iterations than specified |
1089 | /// as its tile size. |
1090 | /// |
1091 | /// |
1092 | /// @param DL Debug location for instructions added by tiling, for |
1093 | /// instance the floor- and tile trip count computation. |
1094 | /// @param Loops Loops to tile. The CanonicalLoopInfo objects are |
1095 | /// invalidated by this method, i.e. should not used after |
1096 | /// tiling. |
1097 | /// @param TileSizes For each loop in \p Loops, the tile size for that |
1098 | /// dimensions. |
1099 | /// |
1100 | /// \returns A list of generated loops. Contains twice as many loops as the |
1101 | /// input loop nest; the first half are the floor loops and the |
1102 | /// second half are the tile loops. |
1103 | std::vector<CanonicalLoopInfo *> |
1104 | tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, |
1105 | ArrayRef<Value *> TileSizes); |
1106 | |
1107 | /// Fully unroll a loop. |
1108 | /// |
1109 | /// Instead of unrolling the loop immediately (and duplicating its body |
1110 | /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop |
1111 | /// metadata. |
1112 | /// |
1113 | /// \param DL Debug location for instructions added by unrolling. |
1114 | /// \param Loop The loop to unroll. The loop will be invalidated. |
1115 | void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); |
1116 | |
1117 | /// Fully or partially unroll a loop. How the loop is unrolled is determined |
1118 | /// using LLVM's LoopUnrollPass. |
1119 | /// |
1120 | /// \param DL Debug location for instructions added by unrolling. |
1121 | /// \param Loop The loop to unroll. The loop will be invalidated. |
1122 | void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); |
1123 | |
1124 | /// Partially unroll a loop. |
1125 | /// |
1126 | /// The CanonicalLoopInfo of the unrolled loop for use with chained |
1127 | /// loop-associated directive can be requested using \p UnrolledCLI. Not |
1128 | /// needing the CanonicalLoopInfo allows more efficient code generation by |
1129 | /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. |
1130 | /// A loop-associated directive applied to the unrolled loop needs to know the |
1131 | /// new trip count which means that if using a heuristically determined unroll |
1132 | /// factor (\p Factor == 0), that factor must be computed immediately. We are |
1133 | /// using the same logic as the LoopUnrollPass to derived the unroll factor, |
1134 | /// but which assumes that some canonicalization has taken place (e.g. |
1135 | /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform |
1136 | /// better when the unrolled loop's CanonicalLoopInfo is not needed. |
1137 | /// |
1138 | /// \param DL Debug location for instructions added by unrolling. |
1139 | /// \param Loop The loop to unroll. The loop will be invalidated. |
1140 | /// \param Factor The factor to unroll the loop by. A factor of 0 |
1141 | /// indicates that a heuristic should be used to determine |
1142 | /// the unroll-factor. |
1143 | /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the |
1144 | /// partially unrolled loop. Otherwise, uses loop metadata |
1145 | /// to defer unrolling to the LoopUnrollPass. |
1146 | void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, |
1147 | CanonicalLoopInfo **UnrolledCLI); |
1148 | |
1149 | /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop |
1150 | /// is cloned. The metadata which prevents vectorization is added to |
1151 | /// to the cloned loop. The cloned loop is executed when ifCond is evaluated |
1152 | /// to false. |
1153 | /// |
1154 | /// \param Loop The loop to simd-ize. |
1155 | /// \param AlignedVars The map which containts pairs of the pointer |
1156 | /// and its corresponding alignment. |
1157 | /// \param IfCond The value which corresponds to the if clause |
1158 | /// condition. |
1159 | /// \param Order The enum to map order clause. |
1160 | /// \param Simdlen The Simdlen length to apply to the simd loop. |
1161 | /// \param Safelen The Safelen length to apply to the simd loop. |
1162 | void applySimd(CanonicalLoopInfo *Loop, |
1163 | MapVector<Value *, Value *> AlignedVars, Value *IfCond, |
1164 | omp::OrderKind Order, ConstantInt *Simdlen, |
1165 | ConstantInt *Safelen); |
1166 | |
1167 | /// Generator for '#omp flush' |
1168 | /// |
1169 | /// \param Loc The location where the flush directive was encountered |
1170 | void createFlush(const LocationDescription &Loc); |
1171 | |
1172 | /// Generator for '#omp taskwait' |
1173 | /// |
1174 | /// \param Loc The location where the taskwait directive was encountered. |
1175 | void createTaskwait(const LocationDescription &Loc); |
1176 | |
1177 | /// Generator for '#omp taskyield' |
1178 | /// |
1179 | /// \param Loc The location where the taskyield directive was encountered. |
1180 | void createTaskyield(const LocationDescription &Loc); |
1181 | |
1182 | /// A struct to pack the relevant information for an OpenMP depend clause. |
1183 | struct DependData { |
1184 | omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown; |
1185 | Type *DepValueType; |
1186 | Value *DepVal; |
1187 | explicit DependData() = default; |
1188 | DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, |
1189 | Value *DepVal) |
1190 | : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} |
1191 | }; |
1192 | |
1193 | /// Generator for `#omp task` |
1194 | /// |
1195 | /// \param Loc The location where the task construct was encountered. |
1196 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1197 | /// \param BodyGenCB Callback that will generate the region code. |
1198 | /// \param Tied True if the task is tied, false if the task is untied. |
1199 | /// \param Final i1 value which is `true` if the task is final, `false` if the |
1200 | /// task is not final. |
1201 | /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred |
1202 | /// task is generated, and the encountering thread must |
1203 | /// suspend the current task region, for which execution |
1204 | /// cannot be resumed until execution of the structured |
1205 | /// block that is associated with the generated task is |
1206 | /// completed. |
1207 | InsertPointTy createTask(const LocationDescription &Loc, |
1208 | InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, |
1209 | bool Tied = true, Value *Final = nullptr, |
1210 | Value *IfCondition = nullptr, |
1211 | SmallVector<DependData> Dependencies = {}); |
1212 | |
1213 | /// Generator for the taskgroup construct |
1214 | /// |
1215 | /// \param Loc The location where the taskgroup construct was encountered. |
1216 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1217 | /// \param BodyGenCB Callback that will generate the region code. |
1218 | InsertPointTy createTaskgroup(const LocationDescription &Loc, |
1219 | InsertPointTy AllocaIP, |
1220 | BodyGenCallbackTy BodyGenCB); |
1221 | |
1222 | using FileIdentifierInfoCallbackTy = |
1223 | std::function<std::tuple<std::string, uint64_t>()>; |
1224 | |
1225 | /// Creates a unique info for a target entry when provided a filename and |
1226 | /// line number from. |
1227 | /// |
1228 | /// \param CallBack A callback function which should return filename the entry |
1229 | /// resides in as well as the line number for the target entry |
1230 | /// \param ParentName The name of the parent the target entry resides in, if |
1231 | /// any. |
1232 | static TargetRegionEntryInfo |
1233 | getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, |
1234 | StringRef ParentName = "" ); |
1235 | |
1236 | /// Functions used to generate reductions. Such functions take two Values |
1237 | /// representing LHS and RHS of the reduction, respectively, and a reference |
1238 | /// to the value that is updated to refer to the reduction result. |
1239 | using ReductionGenTy = |
1240 | function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; |
1241 | |
1242 | /// Functions used to generate atomic reductions. Such functions take two |
1243 | /// Values representing pointers to LHS and RHS of the reduction, as well as |
1244 | /// the element type of these pointers. They are expected to atomically |
1245 | /// update the LHS to the reduced value. |
1246 | using AtomicReductionGenTy = |
1247 | function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; |
1248 | |
1249 | /// Information about an OpenMP reduction. |
1250 | struct ReductionInfo { |
1251 | ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, |
1252 | ReductionGenTy ReductionGen, |
1253 | AtomicReductionGenTy AtomicReductionGen) |
1254 | : ElementType(ElementType), Variable(Variable), |
1255 | PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), |
1256 | AtomicReductionGen(AtomicReductionGen) {} |
1257 | |
1258 | /// Reduction element type, must match pointee type of variable. |
1259 | Type *ElementType; |
1260 | |
1261 | /// Reduction variable of pointer type. |
1262 | Value *Variable; |
1263 | |
1264 | /// Thread-private partial reduction variable. |
1265 | Value *PrivateVariable; |
1266 | |
1267 | /// Callback for generating the reduction body. The IR produced by this will |
1268 | /// be used to combine two values in a thread-safe context, e.g., under |
1269 | /// lock or within the same thread, and therefore need not be atomic. |
1270 | ReductionGenTy ReductionGen; |
1271 | |
1272 | /// Callback for generating the atomic reduction body, may be null. The IR |
1273 | /// produced by this will be used to atomically combine two values during |
1274 | /// reduction. If null, the implementation will use the non-atomic version |
1275 | /// along with the appropriate synchronization mechanisms. |
1276 | AtomicReductionGenTy AtomicReductionGen; |
1277 | }; |
1278 | |
1279 | // TODO: provide atomic and non-atomic reduction generators for reduction |
1280 | // operators defined by the OpenMP specification. |
1281 | |
1282 | /// Generator for '#omp reduction'. |
1283 | /// |
1284 | /// Emits the IR instructing the runtime to perform the specific kind of |
1285 | /// reductions. Expects reduction variables to have been privatized and |
1286 | /// initialized to reduction-neutral values separately. Emits the calls to |
1287 | /// runtime functions as well as the reduction function and the basic blocks |
1288 | /// performing the reduction atomically and non-atomically. |
1289 | /// |
1290 | /// The code emitted for the following: |
1291 | /// |
1292 | /// \code |
1293 | /// type var_1; |
1294 | /// type var_2; |
1295 | /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) |
1296 | /// /* body */; |
1297 | /// \endcode |
1298 | /// |
1299 | /// corresponds to the following sketch. |
1300 | /// |
1301 | /// \code |
1302 | /// void _outlined_par() { |
1303 | /// // N is the number of different reductions. |
1304 | /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; |
1305 | /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, |
1306 | /// _omp_reduction_func, |
1307 | /// _gomp_critical_user.reduction.var)) { |
1308 | /// case 1: { |
1309 | /// var_1 = var_1 <reduction-op> privatized_var_1; |
1310 | /// var_2 = var_2 <reduction-op> privatized_var_2; |
1311 | /// // ... |
1312 | /// __kmpc_end_reduce(...); |
1313 | /// break; |
1314 | /// } |
1315 | /// case 2: { |
1316 | /// _Atomic<ReductionOp>(var_1, privatized_var_1); |
1317 | /// _Atomic<ReductionOp>(var_2, privatized_var_2); |
1318 | /// // ... |
1319 | /// break; |
1320 | /// } |
1321 | /// default: break; |
1322 | /// } |
1323 | /// } |
1324 | /// |
1325 | /// void _omp_reduction_func(void **lhs, void **rhs) { |
1326 | /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; |
1327 | /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; |
1328 | /// // ... |
1329 | /// } |
1330 | /// \endcode |
1331 | /// |
1332 | /// \param Loc The location where the reduction was |
1333 | /// encountered. Must be within the associate |
1334 | /// directive and after the last local access to the |
1335 | /// reduction variables. |
1336 | /// \param AllocaIP An insertion point suitable for allocas usable |
1337 | /// in reductions. |
1338 | /// \param ReductionInfos A list of info on each reduction variable. |
1339 | /// \param IsNoWait A flag set if the reduction is marked as nowait. |
1340 | InsertPointTy createReductions(const LocationDescription &Loc, |
1341 | InsertPointTy AllocaIP, |
1342 | ArrayRef<ReductionInfo> ReductionInfos, |
1343 | bool IsNoWait = false); |
1344 | |
1345 | ///} |
1346 | |
1347 | /// Return the insertion point used by the underlying IRBuilder. |
1348 | InsertPointTy getInsertionPoint() { return Builder.saveIP(); } |
1349 | |
1350 | /// Update the internal location to \p Loc. |
1351 | bool updateToLocation(const LocationDescription &Loc) { |
1352 | Builder.restoreIP(IP: Loc.IP); |
1353 | Builder.SetCurrentDebugLocation(Loc.DL); |
1354 | return Loc.IP.getBlock() != nullptr; |
1355 | } |
1356 | |
1357 | /// Return the function declaration for the runtime function with \p FnID. |
1358 | FunctionCallee getOrCreateRuntimeFunction(Module &M, |
1359 | omp::RuntimeFunction FnID); |
1360 | |
1361 | Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); |
1362 | |
1363 | /// Return the (LLVM-IR) string describing the source location \p LocStr. |
1364 | Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); |
1365 | |
1366 | /// Return the (LLVM-IR) string describing the default source location. |
1367 | Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); |
1368 | |
1369 | /// Return the (LLVM-IR) string describing the source location identified by |
1370 | /// the arguments. |
1371 | Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, |
1372 | unsigned Line, unsigned Column, |
1373 | uint32_t &SrcLocStrSize); |
1374 | |
1375 | /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as |
1376 | /// fallback if \p DL does not specify the function name. |
1377 | Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, |
1378 | Function *F = nullptr); |
1379 | |
1380 | /// Return the (LLVM-IR) string describing the source location \p Loc. |
1381 | Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, |
1382 | uint32_t &SrcLocStrSize); |
1383 | |
1384 | /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. |
1385 | /// TODO: Create a enum class for the Reserve2Flags |
1386 | Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, |
1387 | omp::IdentFlag Flags = omp::IdentFlag(0), |
1388 | unsigned Reserve2Flags = 0); |
1389 | |
1390 | /// Create a hidden global flag \p Name in the module with initial value \p |
1391 | /// Value. |
1392 | GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); |
1393 | |
1394 | /// Generate control flow and cleanup for cancellation. |
1395 | /// |
1396 | /// \param CancelFlag Flag indicating if the cancellation is performed. |
1397 | /// \param CanceledDirective The kind of directive that is cancled. |
1398 | /// \param ExitCB Extra code to be generated in the exit block. |
1399 | void emitCancelationCheckImpl(Value *CancelFlag, |
1400 | omp::Directive CanceledDirective, |
1401 | FinalizeCallbackTy ExitCB = {}); |
1402 | |
1403 | /// Generate a target region entry call. |
1404 | /// |
1405 | /// \param Loc The location at which the request originated and is fulfilled. |
1406 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1407 | /// \param Return Return value of the created function returned by reference. |
1408 | /// \param DeviceID Identifier for the device via the 'device' clause. |
1409 | /// \param NumTeams Numer of teams for the region via the 'num_teams' clause |
1410 | /// or 0 if unspecified and -1 if there is no 'teams' clause. |
1411 | /// \param NumThreads Number of threads via the 'thread_limit' clause. |
1412 | /// \param HostPtr Pointer to the host-side pointer of the target kernel. |
1413 | /// \param KernelArgs Array of arguments to the kernel. |
1414 | InsertPointTy emitTargetKernel(const LocationDescription &Loc, |
1415 | InsertPointTy AllocaIP, Value *&Return, |
1416 | Value *Ident, Value *DeviceID, Value *NumTeams, |
1417 | Value *NumThreads, Value *HostPtr, |
1418 | ArrayRef<Value *> KernelArgs); |
1419 | |
1420 | /// Generate a barrier runtime call. |
1421 | /// |
1422 | /// \param Loc The location at which the request originated and is fulfilled. |
1423 | /// \param DK The directive which caused the barrier |
1424 | /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. |
1425 | /// \param CheckCancelFlag Flag to indicate a cancel barrier return value |
1426 | /// should be checked and acted upon. |
1427 | /// |
1428 | /// \returns The insertion point after the barrier. |
1429 | InsertPointTy emitBarrierImpl(const LocationDescription &Loc, |
1430 | omp::Directive DK, bool ForceSimpleCall, |
1431 | bool CheckCancelFlag); |
1432 | |
1433 | /// Generate a flush runtime call. |
1434 | /// |
1435 | /// \param Loc The location at which the request originated and is fulfilled. |
1436 | void emitFlush(const LocationDescription &Loc); |
1437 | |
1438 | /// The finalization stack made up of finalize callbacks currently in-flight, |
1439 | /// wrapped into FinalizationInfo objects that reference also the finalization |
1440 | /// target block and the kind of cancellable directive. |
1441 | SmallVector<FinalizationInfo, 8> FinalizationStack; |
1442 | |
1443 | /// Return true if the last entry in the finalization stack is of kind \p DK |
1444 | /// and cancellable. |
1445 | bool isLastFinalizationInfoCancellable(omp::Directive DK) { |
1446 | return !FinalizationStack.empty() && |
1447 | FinalizationStack.back().IsCancellable && |
1448 | FinalizationStack.back().DK == DK; |
1449 | } |
1450 | |
1451 | /// Generate a taskwait runtime call. |
1452 | /// |
1453 | /// \param Loc The location at which the request originated and is fulfilled. |
1454 | void emitTaskwaitImpl(const LocationDescription &Loc); |
1455 | |
1456 | /// Generate a taskyield runtime call. |
1457 | /// |
1458 | /// \param Loc The location at which the request originated and is fulfilled. |
1459 | void emitTaskyieldImpl(const LocationDescription &Loc); |
1460 | |
1461 | /// Return the current thread ID. |
1462 | /// |
1463 | /// \param Ident The ident (ident_t*) describing the query origin. |
1464 | Value *getOrCreateThreadID(Value *Ident); |
1465 | |
1466 | /// The OpenMPIRBuilder Configuration |
1467 | OpenMPIRBuilderConfig Config; |
1468 | |
1469 | /// The underlying LLVM-IR module |
1470 | Module &M; |
1471 | |
1472 | /// The LLVM-IR Builder used to create IR. |
1473 | IRBuilder<> Builder; |
1474 | |
1475 | /// Map to remember source location strings |
1476 | StringMap<Constant *> SrcLocStrMap; |
1477 | |
1478 | /// Map to remember existing ident_t*. |
1479 | DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; |
1480 | |
1481 | /// Info manager to keep track of target regions. |
1482 | OffloadEntriesInfoManager OffloadInfoManager; |
1483 | |
1484 | /// The target triple of the underlying module. |
1485 | const Triple T; |
1486 | |
1487 | /// Helper that contains information about regions we need to outline |
1488 | /// during finalization. |
1489 | struct OutlineInfo { |
1490 | using PostOutlineCBTy = std::function<void(Function &)>; |
1491 | PostOutlineCBTy PostOutlineCB; |
1492 | BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; |
1493 | SmallVector<Value *, 2> ExcludeArgsFromAggregate; |
1494 | |
1495 | /// Collect all blocks in between EntryBB and ExitBB in both the given |
1496 | /// vector and set. |
1497 | void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, |
1498 | SmallVectorImpl<BasicBlock *> &BlockVector); |
1499 | |
1500 | /// Return the function that contains the region to be outlined. |
1501 | Function *getFunction() const { return EntryBB->getParent(); } |
1502 | }; |
1503 | |
1504 | /// Collection of regions that need to be outlined during finalization. |
1505 | SmallVector<OutlineInfo, 16> OutlineInfos; |
1506 | |
1507 | /// Collection of owned canonical loop objects that eventually need to be |
1508 | /// free'd. |
1509 | std::forward_list<CanonicalLoopInfo> LoopInfos; |
1510 | |
1511 | /// Add a new region that will be outlined later. |
1512 | void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(Args&: OI); } |
1513 | |
1514 | /// An ordered map of auto-generated variables to their unique names. |
1515 | /// It stores variables with the following names: 1) ".gomp_critical_user_" + |
1516 | /// <critical_section_name> + ".var" for "omp critical" directives; 2) |
1517 | /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate |
1518 | /// variables. |
1519 | StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars; |
1520 | |
1521 | /// Computes the size of type in bytes. |
1522 | Value *getSizeInBytes(Value *BasePtr); |
1523 | |
1524 | // Emit a branch from the current block to the Target block only if |
1525 | // the current block has a terminator. |
1526 | void emitBranch(BasicBlock *Target); |
1527 | |
1528 | // If BB has no use then delete it and return. Else place BB after the current |
1529 | // block, if possible, or else at the end of the function. Also add a branch |
1530 | // from current block to BB if current block does not have a terminator. |
1531 | void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false); |
1532 | |
1533 | /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy |
1534 | /// Here is the logic: |
1535 | /// if (Cond) { |
1536 | /// ThenGen(); |
1537 | /// } else { |
1538 | /// ElseGen(); |
1539 | /// } |
1540 | void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, |
1541 | BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {}); |
1542 | |
1543 | /// Create the global variable holding the offload mappings information. |
1544 | GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, |
1545 | std::string VarName); |
1546 | |
1547 | /// Create the global variable holding the offload names information. |
1548 | GlobalVariable * |
1549 | createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, |
1550 | std::string VarName); |
1551 | |
1552 | struct MapperAllocas { |
1553 | AllocaInst *ArgsBase = nullptr; |
1554 | AllocaInst *Args = nullptr; |
1555 | AllocaInst *ArgSizes = nullptr; |
1556 | }; |
1557 | |
1558 | /// Create the allocas instruction used in call to mapper functions. |
1559 | void createMapperAllocas(const LocationDescription &Loc, |
1560 | InsertPointTy AllocaIP, unsigned NumOperands, |
1561 | struct MapperAllocas &MapperAllocas); |
1562 | |
1563 | /// Create the call for the target mapper function. |
1564 | /// \param Loc The source location description. |
1565 | /// \param MapperFunc Function to be called. |
1566 | /// \param SrcLocInfo Source location information global. |
1567 | /// \param MaptypesArg The argument types. |
1568 | /// \param MapnamesArg The argument names. |
1569 | /// \param MapperAllocas The AllocaInst used for the call. |
1570 | /// \param DeviceID Device ID for the call. |
1571 | /// \param NumOperands Number of operands in the call. |
1572 | void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, |
1573 | Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, |
1574 | struct MapperAllocas &MapperAllocas, int64_t DeviceID, |
1575 | unsigned NumOperands); |
1576 | |
1577 | /// Container for the arguments used to pass data to the runtime library. |
1578 | struct TargetDataRTArgs { |
1579 | /// The array of base pointer passed to the runtime library. |
1580 | Value *BasePointersArray = nullptr; |
1581 | /// The array of section pointers passed to the runtime library. |
1582 | Value *PointersArray = nullptr; |
1583 | /// The array of sizes passed to the runtime library. |
1584 | Value *SizesArray = nullptr; |
1585 | /// The array of map types passed to the runtime library for the beginning |
1586 | /// of the region or for the entire region if there are no separate map |
1587 | /// types for the region end. |
1588 | Value *MapTypesArray = nullptr; |
1589 | /// The array of map types passed to the runtime library for the end of the |
1590 | /// region, or nullptr if there are no separate map types for the region |
1591 | /// end. |
1592 | Value *MapTypesArrayEnd = nullptr; |
1593 | /// The array of user-defined mappers passed to the runtime library. |
1594 | Value *MappersArray = nullptr; |
1595 | /// The array of original declaration names of mapped pointers sent to the |
1596 | /// runtime library for debugging |
1597 | Value *MapNamesArray = nullptr; |
1598 | |
1599 | explicit TargetDataRTArgs() {} |
1600 | explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, |
1601 | Value *SizesArray, Value *MapTypesArray, |
1602 | Value *MapTypesArrayEnd, Value *MappersArray, |
1603 | Value *MapNamesArray) |
1604 | : BasePointersArray(BasePointersArray), PointersArray(PointersArray), |
1605 | SizesArray(SizesArray), MapTypesArray(MapTypesArray), |
1606 | MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray), |
1607 | MapNamesArray(MapNamesArray) {} |
1608 | }; |
1609 | |
1610 | /// Data structure that contains the needed information to construct the |
1611 | /// kernel args vector. |
1612 | struct TargetKernelArgs { |
1613 | /// Number of arguments passed to the runtime library. |
1614 | unsigned NumTargetItems; |
1615 | /// Arguments passed to the runtime library |
1616 | TargetDataRTArgs RTArgs; |
1617 | /// The number of iterations |
1618 | Value *NumIterations; |
1619 | /// The number of teams. |
1620 | Value *NumTeams; |
1621 | /// The number of threads. |
1622 | Value *NumThreads; |
1623 | /// The size of the dynamic shared memory. |
1624 | Value *DynCGGroupMem; |
1625 | /// True if the kernel has 'no wait' clause. |
1626 | bool HasNoWait; |
1627 | |
1628 | /// Constructor for TargetKernelArgs |
1629 | TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, |
1630 | Value *NumIterations, Value *NumTeams, Value *NumThreads, |
1631 | Value *DynCGGroupMem, bool HasNoWait) |
1632 | : NumTargetItems(NumTargetItems), RTArgs(RTArgs), |
1633 | NumIterations(NumIterations), NumTeams(NumTeams), |
1634 | NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem), |
1635 | HasNoWait(HasNoWait) {} |
1636 | }; |
1637 | |
1638 | /// Create the kernel args vector used by emitTargetKernel. This function |
1639 | /// creates various constant values that are used in the resulting args |
1640 | /// vector. |
1641 | static void getKernelArgsVector(TargetKernelArgs &KernelArgs, |
1642 | IRBuilderBase &Builder, |
1643 | SmallVector<Value *> &ArgsVector); |
1644 | |
1645 | /// Struct that keeps the information that should be kept throughout |
1646 | /// a 'target data' region. |
1647 | class TargetDataInfo { |
1648 | /// Set to true if device pointer information have to be obtained. |
1649 | bool RequiresDevicePointerInfo = false; |
1650 | /// Set to true if Clang emits separate runtime calls for the beginning and |
1651 | /// end of the region. These calls might have separate map type arrays. |
1652 | bool SeparateBeginEndCalls = false; |
1653 | |
1654 | public: |
1655 | TargetDataRTArgs RTArgs; |
1656 | |
1657 | SmallMapVector<const Value *, std::pair<Value *, Value *>, 4> |
1658 | DevicePtrInfoMap; |
1659 | |
1660 | /// Indicate whether any user-defined mapper exists. |
1661 | bool HasMapper = false; |
1662 | /// The total number of pointers passed to the runtime library. |
1663 | unsigned NumberOfPtrs = 0u; |
1664 | |
1665 | explicit TargetDataInfo() {} |
1666 | explicit TargetDataInfo(bool RequiresDevicePointerInfo, |
1667 | bool SeparateBeginEndCalls) |
1668 | : RequiresDevicePointerInfo(RequiresDevicePointerInfo), |
1669 | SeparateBeginEndCalls(SeparateBeginEndCalls) {} |
1670 | /// Clear information about the data arrays. |
1671 | void clearArrayInfo() { |
1672 | RTArgs = TargetDataRTArgs(); |
1673 | HasMapper = false; |
1674 | NumberOfPtrs = 0u; |
1675 | } |
1676 | /// Return true if the current target data information has valid arrays. |
1677 | bool isValid() { |
1678 | return RTArgs.BasePointersArray && RTArgs.PointersArray && |
1679 | RTArgs.SizesArray && RTArgs.MapTypesArray && |
1680 | (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs; |
1681 | } |
1682 | bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } |
1683 | bool separateBeginEndCalls() { return SeparateBeginEndCalls; } |
1684 | }; |
1685 | |
1686 | enum class DeviceInfoTy { None, Pointer, Address }; |
1687 | using MapValuesArrayTy = SmallVector<Value *, 4>; |
1688 | using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>; |
1689 | using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>; |
1690 | using MapNamesArrayTy = SmallVector<Constant *, 4>; |
1691 | using MapDimArrayTy = SmallVector<uint64_t, 4>; |
1692 | using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; |
1693 | |
1694 | /// This structure contains combined information generated for mappable |
1695 | /// clauses, including base pointers, pointers, sizes, map types, user-defined |
1696 | /// mappers, and non-contiguous information. |
1697 | struct MapInfosTy { |
1698 | struct StructNonContiguousInfo { |
1699 | bool IsNonContiguous = false; |
1700 | MapDimArrayTy Dims; |
1701 | MapNonContiguousArrayTy Offsets; |
1702 | MapNonContiguousArrayTy Counts; |
1703 | MapNonContiguousArrayTy Strides; |
1704 | }; |
1705 | MapValuesArrayTy BasePointers; |
1706 | MapValuesArrayTy Pointers; |
1707 | MapDeviceInfoArrayTy DevicePointers; |
1708 | MapValuesArrayTy Sizes; |
1709 | MapFlagsArrayTy Types; |
1710 | MapNamesArrayTy Names; |
1711 | StructNonContiguousInfo NonContigInfo; |
1712 | |
1713 | /// Append arrays in \a CurInfo. |
1714 | void append(MapInfosTy &CurInfo) { |
1715 | BasePointers.append(in_start: CurInfo.BasePointers.begin(), |
1716 | in_end: CurInfo.BasePointers.end()); |
1717 | Pointers.append(in_start: CurInfo.Pointers.begin(), in_end: CurInfo.Pointers.end()); |
1718 | DevicePointers.append(in_start: CurInfo.DevicePointers.begin(), |
1719 | in_end: CurInfo.DevicePointers.end()); |
1720 | Sizes.append(in_start: CurInfo.Sizes.begin(), in_end: CurInfo.Sizes.end()); |
1721 | Types.append(in_start: CurInfo.Types.begin(), in_end: CurInfo.Types.end()); |
1722 | Names.append(in_start: CurInfo.Names.begin(), in_end: CurInfo.Names.end()); |
1723 | NonContigInfo.Dims.append(in_start: CurInfo.NonContigInfo.Dims.begin(), |
1724 | in_end: CurInfo.NonContigInfo.Dims.end()); |
1725 | NonContigInfo.Offsets.append(in_start: CurInfo.NonContigInfo.Offsets.begin(), |
1726 | in_end: CurInfo.NonContigInfo.Offsets.end()); |
1727 | NonContigInfo.Counts.append(in_start: CurInfo.NonContigInfo.Counts.begin(), |
1728 | in_end: CurInfo.NonContigInfo.Counts.end()); |
1729 | NonContigInfo.Strides.append(in_start: CurInfo.NonContigInfo.Strides.begin(), |
1730 | in_end: CurInfo.NonContigInfo.Strides.end()); |
1731 | } |
1732 | }; |
1733 | |
1734 | /// Callback function type for functions emitting the host fallback code that |
1735 | /// is executed when the kernel launch fails. It takes an insertion point as |
1736 | /// parameter where the code should be emitted. It returns an insertion point |
1737 | /// that points right after after the emitted code. |
1738 | using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>; |
1739 | |
1740 | /// Generate a target region entry call and host fallback call. |
1741 | /// |
1742 | /// \param Loc The location at which the request originated and is fulfilled. |
1743 | /// \param OutlinedFn The outlined kernel function. |
1744 | /// \param OutlinedFnID The ooulined function ID. |
1745 | /// \param EmitTargetCallFallbackCB Call back function to generate host |
1746 | /// fallback code. |
1747 | /// \param Args Data structure holding information about the kernel arguments. |
1748 | /// \param DeviceID Identifier for the device via the 'device' clause. |
1749 | /// \param RTLoc Source location identifier |
1750 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1751 | InsertPointTy emitKernelLaunch( |
1752 | const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, |
1753 | EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, |
1754 | Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP); |
1755 | |
1756 | /// Emit the arguments to be passed to the runtime library based on the |
1757 | /// arrays of base pointers, pointers, sizes, map types, and mappers. If |
1758 | /// ForEndCall, emit map types to be passed for the end of the region instead |
1759 | /// of the beginning. |
1760 | void emitOffloadingArraysArgument(IRBuilderBase &Builder, |
1761 | OpenMPIRBuilder::TargetDataRTArgs &RTArgs, |
1762 | OpenMPIRBuilder::TargetDataInfo &Info, |
1763 | bool EmitDebug = false, |
1764 | bool ForEndCall = false); |
1765 | |
1766 | /// Emit an array of struct descriptors to be assigned to the offload args. |
1767 | void emitNonContiguousDescriptor(InsertPointTy AllocaIP, |
1768 | InsertPointTy CodeGenIP, |
1769 | MapInfosTy &CombinedInfo, |
1770 | TargetDataInfo &Info); |
1771 | |
1772 | /// Emit the arrays used to pass the captures and map information to the |
1773 | /// offloading runtime library. If there is no map or capture information, |
1774 | /// return nullptr by reference. |
1775 | void emitOffloadingArrays( |
1776 | InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, |
1777 | TargetDataInfo &Info, bool IsNonContiguous = false, |
1778 | function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, |
1779 | function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); |
1780 | |
1781 | /// Creates offloading entry for the provided entry ID \a ID, address \a |
1782 | /// Addr, size \a Size, and flags \a Flags. |
1783 | void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, |
1784 | int32_t Flags, GlobalValue::LinkageTypes, |
1785 | StringRef Name = "" ); |
1786 | |
1787 | /// The kind of errors that can occur when emitting the offload entries and |
1788 | /// metadata. |
1789 | enum EmitMetadataErrorKind { |
1790 | EMIT_MD_TARGET_REGION_ERROR, |
1791 | EMIT_MD_DECLARE_TARGET_ERROR, |
1792 | EMIT_MD_GLOBAL_VAR_LINK_ERROR |
1793 | }; |
1794 | |
1795 | /// Callback function type |
1796 | using EmitMetadataErrorReportFunctionTy = |
1797 | std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>; |
1798 | |
1799 | // Emit the offloading entries and metadata so that the device codegen side |
1800 | // can easily figure out what to emit. The produced metadata looks like |
1801 | // this: |
1802 | // |
1803 | // !omp_offload.info = !{!1, ...} |
1804 | // |
1805 | // We only generate metadata for function that contain target regions. |
1806 | void createOffloadEntriesAndInfoMetadata( |
1807 | EmitMetadataErrorReportFunctionTy &ErrorReportFunction); |
1808 | |
1809 | public: |
1810 | /// Generator for __kmpc_copyprivate |
1811 | /// |
1812 | /// \param Loc The source location description. |
1813 | /// \param BufSize Number of elements in the buffer. |
1814 | /// \param CpyBuf List of pointers to data to be copied. |
1815 | /// \param CpyFn function to call for copying data. |
1816 | /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. |
1817 | /// |
1818 | /// \return The insertion position *after* the CopyPrivate call. |
1819 | |
1820 | InsertPointTy createCopyPrivate(const LocationDescription &Loc, |
1821 | llvm::Value *BufSize, llvm::Value *CpyBuf, |
1822 | llvm::Value *CpyFn, llvm::Value *DidIt); |
1823 | |
1824 | /// Generator for '#omp single' |
1825 | /// |
1826 | /// \param Loc The source location description. |
1827 | /// \param BodyGenCB Callback that will generate the region code. |
1828 | /// \param FiniCB Callback to finalize variable copies. |
1829 | /// \param IsNowait If false, a barrier is emitted. |
1830 | /// \param DidIt Local variable used as a flag to indicate 'single' thread |
1831 | /// |
1832 | /// \returns The insertion position *after* the single call. |
1833 | InsertPointTy createSingle(const LocationDescription &Loc, |
1834 | BodyGenCallbackTy BodyGenCB, |
1835 | FinalizeCallbackTy FiniCB, bool IsNowait, |
1836 | llvm::Value *DidIt); |
1837 | |
1838 | /// Generator for '#omp master' |
1839 | /// |
1840 | /// \param Loc The insert and source location description. |
1841 | /// \param BodyGenCB Callback that will generate the region code. |
1842 | /// \param FiniCB Callback to finalize variable copies. |
1843 | /// |
1844 | /// \returns The insertion position *after* the master. |
1845 | InsertPointTy createMaster(const LocationDescription &Loc, |
1846 | BodyGenCallbackTy BodyGenCB, |
1847 | FinalizeCallbackTy FiniCB); |
1848 | |
1849 | /// Generator for '#omp masked' |
1850 | /// |
1851 | /// \param Loc The insert and source location description. |
1852 | /// \param BodyGenCB Callback that will generate the region code. |
1853 | /// \param FiniCB Callback to finialize variable copies. |
1854 | /// |
1855 | /// \returns The insertion position *after* the masked. |
1856 | InsertPointTy createMasked(const LocationDescription &Loc, |
1857 | BodyGenCallbackTy BodyGenCB, |
1858 | FinalizeCallbackTy FiniCB, Value *Filter); |
1859 | |
1860 | /// Generator for '#omp critical' |
1861 | /// |
1862 | /// \param Loc The insert and source location description. |
1863 | /// \param BodyGenCB Callback that will generate the region body code. |
1864 | /// \param FiniCB Callback to finalize variable copies. |
1865 | /// \param CriticalName name of the lock used by the critical directive |
1866 | /// \param HintInst Hint Instruction for hint clause associated with critical |
1867 | /// |
1868 | /// \returns The insertion position *after* the critical. |
1869 | InsertPointTy createCritical(const LocationDescription &Loc, |
1870 | BodyGenCallbackTy BodyGenCB, |
1871 | FinalizeCallbackTy FiniCB, |
1872 | StringRef CriticalName, Value *HintInst); |
1873 | |
1874 | /// Generator for '#omp ordered depend (source | sink)' |
1875 | /// |
1876 | /// \param Loc The insert and source location description. |
1877 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1878 | /// \param NumLoops The number of loops in depend clause. |
1879 | /// \param StoreValues The value will be stored in vector address. |
1880 | /// \param Name The name of alloca instruction. |
1881 | /// \param IsDependSource If true, depend source; otherwise, depend sink. |
1882 | /// |
1883 | /// \return The insertion position *after* the ordered. |
1884 | InsertPointTy createOrderedDepend(const LocationDescription &Loc, |
1885 | InsertPointTy AllocaIP, unsigned NumLoops, |
1886 | ArrayRef<llvm::Value *> StoreValues, |
1887 | const Twine &Name, bool IsDependSource); |
1888 | |
1889 | /// Generator for '#omp ordered [threads | simd]' |
1890 | /// |
1891 | /// \param Loc The insert and source location description. |
1892 | /// \param BodyGenCB Callback that will generate the region code. |
1893 | /// \param FiniCB Callback to finalize variable copies. |
1894 | /// \param IsThreads If true, with threads clause or without clause; |
1895 | /// otherwise, with simd clause; |
1896 | /// |
1897 | /// \returns The insertion position *after* the ordered. |
1898 | InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, |
1899 | BodyGenCallbackTy BodyGenCB, |
1900 | FinalizeCallbackTy FiniCB, |
1901 | bool IsThreads); |
1902 | |
1903 | /// Generator for '#omp sections' |
1904 | /// |
1905 | /// \param Loc The insert and source location description. |
1906 | /// \param AllocaIP The insertion points to be used for alloca instructions. |
1907 | /// \param SectionCBs Callbacks that will generate body of each section. |
1908 | /// \param PrivCB Callback to copy a given variable (think copy constructor). |
1909 | /// \param FiniCB Callback to finalize variable copies. |
1910 | /// \param IsCancellable Flag to indicate a cancellable parallel region. |
1911 | /// \param IsNowait If true, barrier - to ensure all sections are executed |
1912 | /// before moving forward will not be generated. |
1913 | /// \returns The insertion position *after* the sections. |
1914 | InsertPointTy createSections(const LocationDescription &Loc, |
1915 | InsertPointTy AllocaIP, |
1916 | ArrayRef<StorableBodyGenCallbackTy> SectionCBs, |
1917 | PrivatizeCallbackTy PrivCB, |
1918 | FinalizeCallbackTy FiniCB, bool IsCancellable, |
1919 | bool IsNowait); |
1920 | |
1921 | /// Generator for '#omp section' |
1922 | /// |
1923 | /// \param Loc The insert and source location description. |
1924 | /// \param BodyGenCB Callback that will generate the region body code. |
1925 | /// \param FiniCB Callback to finalize variable copies. |
1926 | /// \returns The insertion position *after* the section. |
1927 | InsertPointTy createSection(const LocationDescription &Loc, |
1928 | BodyGenCallbackTy BodyGenCB, |
1929 | FinalizeCallbackTy FiniCB); |
1930 | |
1931 | /// Generator for `#omp teams` |
1932 | /// |
1933 | /// \param Loc The location where the teams construct was encountered. |
1934 | /// \param BodyGenCB Callback that will generate the region code. |
1935 | /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr, |
1936 | /// it is as if lower bound is specified as equal to upperbound. If |
1937 | /// this is non-null, then upperbound must also be non-null. |
1938 | /// \param NumTeamsUpper Upper bound on the number of teams. |
1939 | /// \param ThreadLimit on the number of threads that may participate in a |
1940 | /// contention group created by each team. |
1941 | /// \param IfExpr is the integer argument value of the if condition on the |
1942 | /// teams clause. |
1943 | InsertPointTy |
1944 | createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, |
1945 | Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, |
1946 | Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); |
1947 | |
1948 | /// Generate conditional branch and relevant BasicBlocks through which private |
1949 | /// threads copy the 'copyin' variables from Master copy to threadprivate |
1950 | /// copies. |
1951 | /// |
1952 | /// \param IP insertion block for copyin conditional |
1953 | /// \param MasterVarPtr a pointer to the master variable |
1954 | /// \param PrivateVarPtr a pointer to the threadprivate variable |
1955 | /// \param IntPtrTy Pointer size type |
1956 | /// \param BranchtoEnd Create a branch between the copyin.not.master blocks |
1957 | // and copy.in.end block |
1958 | /// |
1959 | /// \returns The insertion point where copying operation to be emitted. |
1960 | InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, |
1961 | Value *PrivateAddr, |
1962 | llvm::IntegerType *IntPtrTy, |
1963 | bool BranchtoEnd = true); |
1964 | |
1965 | /// Create a runtime call for kmpc_Alloc |
1966 | /// |
1967 | /// \param Loc The insert and source location description. |
1968 | /// \param Size Size of allocated memory space |
1969 | /// \param Allocator Allocator information instruction |
1970 | /// \param Name Name of call Instruction for OMP_alloc |
1971 | /// |
1972 | /// \returns CallInst to the OMP_Alloc call |
1973 | CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, |
1974 | Value *Allocator, std::string Name = "" ); |
1975 | |
1976 | /// Create a runtime call for kmpc_free |
1977 | /// |
1978 | /// \param Loc The insert and source location description. |
1979 | /// \param Addr Address of memory space to be freed |
1980 | /// \param Allocator Allocator information instruction |
1981 | /// \param Name Name of call Instruction for OMP_Free |
1982 | /// |
1983 | /// \returns CallInst to the OMP_Free call |
1984 | CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, |
1985 | Value *Allocator, std::string Name = "" ); |
1986 | |
1987 | /// Create a runtime call for kmpc_threadprivate_cached |
1988 | /// |
1989 | /// \param Loc The insert and source location description. |
1990 | /// \param Pointer pointer to data to be cached |
1991 | /// \param Size size of data to be cached |
1992 | /// \param Name Name of call Instruction for callinst |
1993 | /// |
1994 | /// \returns CallInst to the thread private cache call. |
1995 | CallInst *createCachedThreadPrivate(const LocationDescription &Loc, |
1996 | llvm::Value *Pointer, |
1997 | llvm::ConstantInt *Size, |
1998 | const llvm::Twine &Name = Twine("" )); |
1999 | |
2000 | /// Create a runtime call for __tgt_interop_init |
2001 | /// |
2002 | /// \param Loc The insert and source location description. |
2003 | /// \param InteropVar variable to be allocated |
2004 | /// \param InteropType type of interop operation |
2005 | /// \param Device devide to which offloading will occur |
2006 | /// \param NumDependences number of dependence variables |
2007 | /// \param DependenceAddress pointer to dependence variables |
2008 | /// \param HaveNowaitClause does nowait clause exist |
2009 | /// |
2010 | /// \returns CallInst to the __tgt_interop_init call |
2011 | CallInst *createOMPInteropInit(const LocationDescription &Loc, |
2012 | Value *InteropVar, |
2013 | omp::OMPInteropType InteropType, Value *Device, |
2014 | Value *NumDependences, |
2015 | Value *DependenceAddress, |
2016 | bool HaveNowaitClause); |
2017 | |
2018 | /// Create a runtime call for __tgt_interop_destroy |
2019 | /// |
2020 | /// \param Loc The insert and source location description. |
2021 | /// \param InteropVar variable to be allocated |
2022 | /// \param Device devide to which offloading will occur |
2023 | /// \param NumDependences number of dependence variables |
2024 | /// \param DependenceAddress pointer to dependence variables |
2025 | /// \param HaveNowaitClause does nowait clause exist |
2026 | /// |
2027 | /// \returns CallInst to the __tgt_interop_destroy call |
2028 | CallInst *createOMPInteropDestroy(const LocationDescription &Loc, |
2029 | Value *InteropVar, Value *Device, |
2030 | Value *NumDependences, |
2031 | Value *DependenceAddress, |
2032 | bool HaveNowaitClause); |
2033 | |
2034 | /// Create a runtime call for __tgt_interop_use |
2035 | /// |
2036 | /// \param Loc The insert and source location description. |
2037 | /// \param InteropVar variable to be allocated |
2038 | /// \param Device devide to which offloading will occur |
2039 | /// \param NumDependences number of dependence variables |
2040 | /// \param DependenceAddress pointer to dependence variables |
2041 | /// \param HaveNowaitClause does nowait clause exist |
2042 | /// |
2043 | /// \returns CallInst to the __tgt_interop_use call |
2044 | CallInst *createOMPInteropUse(const LocationDescription &Loc, |
2045 | Value *InteropVar, Value *Device, |
2046 | Value *NumDependences, Value *DependenceAddress, |
2047 | bool HaveNowaitClause); |
2048 | |
2049 | /// The `omp target` interface |
2050 | /// |
2051 | /// For more information about the usage of this interface, |
2052 | /// \see openmp/libomptarget/deviceRTLs/common/include/target.h |
2053 | /// |
2054 | ///{ |
2055 | |
2056 | /// Create a runtime call for kmpc_target_init |
2057 | /// |
2058 | /// \param Loc The insert and source location description. |
2059 | /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. |
2060 | /// \param MinThreads Minimal number of threads, or 0. |
2061 | /// \param MaxThreads Maximal number of threads, or 0. |
2062 | /// \param MinTeams Minimal number of teams, or 0. |
2063 | /// \param MaxTeams Maximal number of teams, or 0. |
2064 | InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, |
2065 | int32_t MinThreadsVal = 0, |
2066 | int32_t MaxThreadsVal = 0, |
2067 | int32_t MinTeamsVal = 0, |
2068 | int32_t MaxTeamsVal = 0); |
2069 | |
2070 | /// Create a runtime call for kmpc_target_deinit |
2071 | /// |
2072 | /// \param Loc The insert and source location description. |
2073 | /// \param TeamsReductionDataSize The maximal size of all the reduction data |
2074 | /// for teams reduction. |
2075 | /// \param TeamsReductionBufferLength The number of elements (each of up to |
2076 | /// \p TeamsReductionDataSize size), in the teams reduction buffer. |
2077 | void createTargetDeinit(const LocationDescription &Loc, |
2078 | int32_t TeamsReductionDataSize = 0, |
2079 | int32_t TeamsReductionBufferLength = 1024); |
2080 | |
2081 | ///} |
2082 | |
2083 | /// Helpers to read/write kernel annotations from the IR. |
2084 | /// |
2085 | ///{ |
2086 | |
2087 | /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none |
2088 | /// is set. |
2089 | static std::pair<int32_t, int32_t> |
2090 | readThreadBoundsForKernel(const Triple &T, Function &Kernel); |
2091 | static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, |
2092 | int32_t LB, int32_t UB); |
2093 | |
2094 | /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none |
2095 | /// is set. |
2096 | static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T, |
2097 | Function &Kernel); |
2098 | static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, |
2099 | int32_t UB); |
2100 | ///} |
2101 | |
2102 | private: |
2103 | // Sets the function attributes expected for the outlined function |
2104 | void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn); |
2105 | |
2106 | // Creates the function ID/Address for the given outlined function. |
2107 | // In the case of an embedded device function the address of the function is |
2108 | // used, in the case of a non-offload function a constant is created. |
2109 | Constant *createOutlinedFunctionID(Function *OutlinedFn, |
2110 | StringRef EntryFnIDName); |
2111 | |
2112 | // Creates the region entry address for the outlined function |
2113 | Constant *createTargetRegionEntryAddr(Function *OutlinedFunction, |
2114 | StringRef EntryFnName); |
2115 | |
2116 | public: |
2117 | /// Functions used to generate a function with the given name. |
2118 | using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>; |
2119 | |
2120 | /// Create a unique name for the entry function using the source location |
2121 | /// information of the current target region. The name will be something like: |
2122 | /// |
2123 | /// __omp_offloading_DD_FFFF_PP_lBB[_CC] |
2124 | /// |
2125 | /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the |
2126 | /// mangled name of the function that encloses the target region and BB is the |
2127 | /// line number of the target region. CC is a count added when more than one |
2128 | /// region is located at the same location. |
2129 | /// |
2130 | /// If this target outline function is not an offload entry, we don't need to |
2131 | /// register it. This may happen if it is guarded by an if clause that is |
2132 | /// false at compile time, or no target archs have been specified. |
2133 | /// |
2134 | /// The created target region ID is used by the runtime library to identify |
2135 | /// the current target region, so it only has to be unique and not |
2136 | /// necessarily point to anything. It could be the pointer to the outlined |
2137 | /// function that implements the target region, but we aren't using that so |
2138 | /// that the compiler doesn't need to keep that, and could therefore inline |
2139 | /// the host function if proven worthwhile during optimization. In the other |
2140 | /// hand, if emitting code for the device, the ID has to be the function |
2141 | /// address so that it can retrieved from the offloading entry and launched |
2142 | /// by the runtime library. We also mark the outlined function to have |
2143 | /// external linkage in case we are emitting code for the device, because |
2144 | /// these functions will be entry points to the device. |
2145 | /// |
2146 | /// \param InfoManager The info manager keeping track of the offload entries |
2147 | /// \param EntryInfo The entry information about the function |
2148 | /// \param GenerateFunctionCallback The callback function to generate the code |
2149 | /// \param OutlinedFunction Pointer to the outlined function |
2150 | /// \param EntryFnIDName Name of the ID o be created |
2151 | void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, |
2152 | FunctionGenCallback &GenerateFunctionCallback, |
2153 | bool IsOffloadEntry, Function *&OutlinedFn, |
2154 | Constant *&OutlinedFnID); |
2155 | |
2156 | /// Registers the given function and sets up the attribtues of the function |
2157 | /// Returns the FunctionID. |
2158 | /// |
2159 | /// \param InfoManager The info manager keeping track of the offload entries |
2160 | /// \param EntryInfo The entry information about the function |
2161 | /// \param OutlinedFunction Pointer to the outlined function |
2162 | /// \param EntryFnName Name of the outlined function |
2163 | /// \param EntryFnIDName Name of the ID o be created |
2164 | Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, |
2165 | Function *OutlinedFunction, |
2166 | StringRef EntryFnName, |
2167 | StringRef EntryFnIDName); |
2168 | |
2169 | /// Type of BodyGen to use for region codegen |
2170 | /// |
2171 | /// Priv: If device pointer privatization is required, emit the body of the |
2172 | /// region here. It will have to be duplicated: with and without |
2173 | /// privatization. |
2174 | /// DupNoPriv: If we need device pointer privatization, we need |
2175 | /// to emit the body of the region with no privatization in the 'else' branch |
2176 | /// of the conditional. |
2177 | /// NoPriv: If we don't require privatization of device |
2178 | /// pointers, we emit the body in between the runtime calls. This avoids |
2179 | /// duplicating the body code. |
2180 | enum BodyGenTy { Priv, DupNoPriv, NoPriv }; |
2181 | |
2182 | /// Callback type for creating the map infos for the kernel parameters. |
2183 | /// \param CodeGenIP is the insertion point where code should be generated, |
2184 | /// if any. |
2185 | using GenMapInfoCallbackTy = |
2186 | function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; |
2187 | |
2188 | /// Generator for '#omp target data' |
2189 | /// |
2190 | /// \param Loc The location where the target data construct was encountered. |
2191 | /// \param AllocaIP The insertion points to be used for alloca instructions. |
2192 | /// \param CodeGenIP The insertion point at which the target directive code |
2193 | /// should be placed. |
2194 | /// \param IsBegin If true then emits begin mapper call otherwise emits |
2195 | /// end mapper call. |
2196 | /// \param DeviceID Stores the DeviceID from the device clause. |
2197 | /// \param IfCond Value which corresponds to the if clause condition. |
2198 | /// \param Info Stores all information realted to the Target Data directive. |
2199 | /// \param GenMapInfoCB Callback that populates the MapInfos and returns. |
2200 | /// \param BodyGenCB Optional Callback to generate the region code. |
2201 | /// \param DeviceAddrCB Optional callback to generate code related to |
2202 | /// use_device_ptr and use_device_addr. |
2203 | /// \param CustomMapperCB Optional callback to generate code related to |
2204 | /// custom mappers. |
2205 | OpenMPIRBuilder::InsertPointTy createTargetData( |
2206 | const LocationDescription &Loc, InsertPointTy AllocaIP, |
2207 | InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, |
2208 | TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, |
2209 | omp::RuntimeFunction *MapperFunc = nullptr, |
2210 | function_ref<InsertPointTy(InsertPointTy CodeGenIP, |
2211 | BodyGenTy BodyGenType)> |
2212 | BodyGenCB = nullptr, |
2213 | function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, |
2214 | function_ref<Value *(unsigned int)> CustomMapperCB = nullptr, |
2215 | Value *SrcLocInfo = nullptr); |
2216 | |
2217 | using TargetBodyGenCallbackTy = function_ref<InsertPointTy( |
2218 | InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; |
2219 | |
2220 | using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointTy( |
2221 | Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, |
2222 | InsertPointTy CodeGenIP)>; |
2223 | |
2224 | /// Generator for '#omp target' |
2225 | /// |
2226 | /// \param Loc where the target data construct was encountered. |
2227 | /// \param CodeGenIP The insertion point where the call to the outlined |
2228 | /// function should be emitted. |
2229 | /// \param EntryInfo The entry information about the function. |
2230 | /// \param NumTeams Number of teams specified in the num_teams clause. |
2231 | /// \param NumThreads Number of teams specified in the thread_limit clause. |
2232 | /// \param Inputs The input values to the region that will be passed. |
2233 | /// as arguments to the outlined function. |
2234 | /// \param BodyGenCB Callback that will generate the region code. |
2235 | /// \param ArgAccessorFuncCB Callback that will generate accessors |
2236 | /// instructions for passed in target arguments where neccessary |
2237 | InsertPointTy createTarget(const LocationDescription &Loc, |
2238 | OpenMPIRBuilder::InsertPointTy AllocaIP, |
2239 | OpenMPIRBuilder::InsertPointTy CodeGenIP, |
2240 | TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, |
2241 | int32_t NumThreads, |
2242 | SmallVectorImpl<Value *> &Inputs, |
2243 | GenMapInfoCallbackTy GenMapInfoCB, |
2244 | TargetBodyGenCallbackTy BodyGenCB, |
2245 | TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB); |
2246 | |
2247 | /// Returns __kmpc_for_static_init_* runtime function for the specified |
2248 | /// size \a IVSize and sign \a IVSigned. Will create a distribute call |
2249 | /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set. |
2250 | FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, |
2251 | bool IsGPUDistribute); |
2252 | |
2253 | /// Returns __kmpc_dispatch_init_* runtime function for the specified |
2254 | /// size \a IVSize and sign \a IVSigned. |
2255 | FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned); |
2256 | |
2257 | /// Returns __kmpc_dispatch_next_* runtime function for the specified |
2258 | /// size \a IVSize and sign \a IVSigned. |
2259 | FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned); |
2260 | |
2261 | /// Returns __kmpc_dispatch_fini_* runtime function for the specified |
2262 | /// size \a IVSize and sign \a IVSigned. |
2263 | FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned); |
2264 | |
2265 | /// Declarations for LLVM-IR types (simple, array, function and structure) are |
2266 | /// generated below. Their names are defined and used in OpenMPKinds.def. Here |
2267 | /// we provide the declarations, the initializeTypes function will provide the |
2268 | /// values. |
2269 | /// |
2270 | ///{ |
2271 | #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; |
2272 | #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ |
2273 | ArrayType *VarName##Ty = nullptr; \ |
2274 | PointerType *VarName##PtrTy = nullptr; |
2275 | #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ |
2276 | FunctionType *VarName = nullptr; \ |
2277 | PointerType *VarName##Ptr = nullptr; |
2278 | #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ |
2279 | StructType *VarName = nullptr; \ |
2280 | PointerType *VarName##Ptr = nullptr; |
2281 | #include "llvm/Frontend/OpenMP/OMPKinds.def" |
2282 | |
2283 | ///} |
2284 | |
2285 | private: |
2286 | /// Create all simple and struct types exposed by the runtime and remember |
2287 | /// the llvm::PointerTypes of them for easy access later. |
2288 | void initializeTypes(Module &M); |
2289 | |
2290 | /// Common interface for generating entry calls for OMP Directives. |
2291 | /// if the directive has a region/body, It will set the insertion |
2292 | /// point to the body |
2293 | /// |
2294 | /// \param OMPD Directive to generate entry blocks for |
2295 | /// \param EntryCall Call to the entry OMP Runtime Function |
2296 | /// \param ExitBB block where the region ends. |
2297 | /// \param Conditional indicate if the entry call result will be used |
2298 | /// to evaluate a conditional of whether a thread will execute |
2299 | /// body code or not. |
2300 | /// |
2301 | /// \return The insertion position in exit block |
2302 | InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, |
2303 | BasicBlock *ExitBB, |
2304 | bool Conditional = false); |
2305 | |
2306 | /// Common interface to finalize the region |
2307 | /// |
2308 | /// \param OMPD Directive to generate exiting code for |
2309 | /// \param FinIP Insertion point for emitting Finalization code and exit call |
2310 | /// \param ExitCall Call to the ending OMP Runtime Function |
2311 | /// \param HasFinalize indicate if the directive will require finalization |
2312 | /// and has a finalization callback in the stack that |
2313 | /// should be called. |
2314 | /// |
2315 | /// \return The insertion position in exit block |
2316 | InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, |
2317 | InsertPointTy FinIP, |
2318 | Instruction *ExitCall, |
2319 | bool HasFinalize = true); |
2320 | |
2321 | /// Common Interface to generate OMP inlined regions |
2322 | /// |
2323 | /// \param OMPD Directive to generate inlined region for |
2324 | /// \param EntryCall Call to the entry OMP Runtime Function |
2325 | /// \param ExitCall Call to the ending OMP Runtime Function |
2326 | /// \param BodyGenCB Body code generation callback. |
2327 | /// \param FiniCB Finalization Callback. Will be called when finalizing region |
2328 | /// \param Conditional indicate if the entry call result will be used |
2329 | /// to evaluate a conditional of whether a thread will execute |
2330 | /// body code or not. |
2331 | /// \param HasFinalize indicate if the directive will require finalization |
2332 | /// and has a finalization callback in the stack that |
2333 | /// should be called. |
2334 | /// \param IsCancellable if HasFinalize is set to true, indicate if the |
2335 | /// the directive should be cancellable. |
2336 | /// \return The insertion point after the region |
2337 | |
2338 | InsertPointTy |
2339 | EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, |
2340 | Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, |
2341 | FinalizeCallbackTy FiniCB, bool Conditional = false, |
2342 | bool HasFinalize = true, bool IsCancellable = false); |
2343 | |
2344 | /// Get the platform-specific name separator. |
2345 | /// \param Parts different parts of the final name that needs separation |
2346 | /// \param FirstSeparator First separator used between the initial two |
2347 | /// parts of the name. |
2348 | /// \param Separator separator used between all of the rest consecutive |
2349 | /// parts of the name |
2350 | static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, |
2351 | StringRef FirstSeparator, |
2352 | StringRef Separator); |
2353 | |
2354 | /// Returns corresponding lock object for the specified critical region |
2355 | /// name. If the lock object does not exist it is created, otherwise the |
2356 | /// reference to the existing copy is returned. |
2357 | /// \param CriticalName Name of the critical region. |
2358 | /// |
2359 | Value *getOMPCriticalRegionLock(StringRef CriticalName); |
2360 | |
2361 | /// Callback type for Atomic Expression update |
2362 | /// ex: |
2363 | /// \code{.cpp} |
2364 | /// unsigned x = 0; |
2365 | /// #pragma omp atomic update |
2366 | /// x = Expr(x_old); //Expr() is any legal operation |
2367 | /// \endcode |
2368 | /// |
2369 | /// \param XOld the value of the atomic memory address to use for update |
2370 | /// \param IRB reference to the IRBuilder to use |
2371 | /// |
2372 | /// \returns Value to update X to. |
2373 | using AtomicUpdateCallbackTy = |
2374 | const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; |
2375 | |
2376 | private: |
2377 | enum AtomicKind { Read, Write, Update, Capture, Compare }; |
2378 | |
2379 | /// Determine whether to emit flush or not |
2380 | /// |
2381 | /// \param Loc The insert and source location description. |
2382 | /// \param AO The required atomic ordering |
2383 | /// \param AK The OpenMP atomic operation kind used. |
2384 | /// |
2385 | /// \returns wether a flush was emitted or not |
2386 | bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, |
2387 | AtomicOrdering AO, AtomicKind AK); |
2388 | |
2389 | /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X |
2390 | /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) |
2391 | /// Only Scalar data types. |
2392 | /// |
2393 | /// \param AllocaIP The insertion point to be used for alloca |
2394 | /// instructions. |
2395 | /// \param X The target atomic pointer to be updated |
2396 | /// \param XElemTy The element type of the atomic pointer. |
2397 | /// \param Expr The value to update X with. |
2398 | /// \param AO Atomic ordering of the generated atomic |
2399 | /// instructions. |
2400 | /// \param RMWOp The binary operation used for update. If |
2401 | /// operation is not supported by atomicRMW, |
2402 | /// or belong to {FADD, FSUB, BAD_BINOP}. |
2403 | /// Then a `cmpExch` based atomic will be generated. |
2404 | /// \param UpdateOp Code generator for complex expressions that cannot be |
2405 | /// expressed through atomicrmw instruction. |
2406 | /// \param VolatileX true if \a X volatile? |
2407 | /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the |
2408 | /// update expression, false otherwise. |
2409 | /// (e.g. true for X = X BinOp Expr) |
2410 | /// |
2411 | /// \returns A pair of the old value of X before the update, and the value |
2412 | /// used for the update. |
2413 | std::pair<Value *, Value *> |
2414 | emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, |
2415 | AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, |
2416 | AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, |
2417 | bool IsXBinopExpr); |
2418 | |
2419 | /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . |
2420 | /// |
2421 | /// \Return The instruction |
2422 | Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, |
2423 | AtomicRMWInst::BinOp RMWOp); |
2424 | |
2425 | public: |
2426 | /// a struct to pack relevant information while generating atomic Ops |
2427 | struct AtomicOpValue { |
2428 | Value *Var = nullptr; |
2429 | Type *ElemTy = nullptr; |
2430 | bool IsSigned = false; |
2431 | bool IsVolatile = false; |
2432 | }; |
2433 | |
2434 | /// Emit atomic Read for : V = X --- Only Scalar data types. |
2435 | /// |
2436 | /// \param Loc The insert and source location description. |
2437 | /// \param X The target pointer to be atomically read |
2438 | /// \param V Memory address where to store atomically read |
2439 | /// value |
2440 | /// \param AO Atomic ordering of the generated atomic |
2441 | /// instructions. |
2442 | /// |
2443 | /// \return Insertion point after generated atomic read IR. |
2444 | InsertPointTy createAtomicRead(const LocationDescription &Loc, |
2445 | AtomicOpValue &X, AtomicOpValue &V, |
2446 | AtomicOrdering AO); |
2447 | |
2448 | /// Emit atomic write for : X = Expr --- Only Scalar data types. |
2449 | /// |
2450 | /// \param Loc The insert and source location description. |
2451 | /// \param X The target pointer to be atomically written to |
2452 | /// \param Expr The value to store. |
2453 | /// \param AO Atomic ordering of the generated atomic |
2454 | /// instructions. |
2455 | /// |
2456 | /// \return Insertion point after generated atomic Write IR. |
2457 | InsertPointTy createAtomicWrite(const LocationDescription &Loc, |
2458 | AtomicOpValue &X, Value *Expr, |
2459 | AtomicOrdering AO); |
2460 | |
2461 | /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X |
2462 | /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) |
2463 | /// Only Scalar data types. |
2464 | /// |
2465 | /// \param Loc The insert and source location description. |
2466 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
2467 | /// \param X The target atomic pointer to be updated |
2468 | /// \param Expr The value to update X with. |
2469 | /// \param AO Atomic ordering of the generated atomic instructions. |
2470 | /// \param RMWOp The binary operation used for update. If operation |
2471 | /// is not supported by atomicRMW, or belong to |
2472 | /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based |
2473 | /// atomic will be generated. |
2474 | /// \param UpdateOp Code generator for complex expressions that cannot be |
2475 | /// expressed through atomicrmw instruction. |
2476 | /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the |
2477 | /// update expression, false otherwise. |
2478 | /// (e.g. true for X = X BinOp Expr) |
2479 | /// |
2480 | /// \return Insertion point after generated atomic update IR. |
2481 | InsertPointTy createAtomicUpdate(const LocationDescription &Loc, |
2482 | InsertPointTy AllocaIP, AtomicOpValue &X, |
2483 | Value *Expr, AtomicOrdering AO, |
2484 | AtomicRMWInst::BinOp RMWOp, |
2485 | AtomicUpdateCallbackTy &UpdateOp, |
2486 | bool IsXBinopExpr); |
2487 | |
2488 | /// Emit atomic update for constructs: --- Only Scalar data types |
2489 | /// V = X; X = X BinOp Expr , |
2490 | /// X = X BinOp Expr; V = X, |
2491 | /// V = X; X = Expr BinOp X, |
2492 | /// X = Expr BinOp X; V = X, |
2493 | /// V = X; X = UpdateOp(X), |
2494 | /// X = UpdateOp(X); V = X, |
2495 | /// |
2496 | /// \param Loc The insert and source location description. |
2497 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
2498 | /// \param X The target atomic pointer to be updated |
2499 | /// \param V Memory address where to store captured value |
2500 | /// \param Expr The value to update X with. |
2501 | /// \param AO Atomic ordering of the generated atomic instructions |
2502 | /// \param RMWOp The binary operation used for update. If |
2503 | /// operation is not supported by atomicRMW, or belong to |
2504 | /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based |
2505 | /// atomic will be generated. |
2506 | /// \param UpdateOp Code generator for complex expressions that cannot be |
2507 | /// expressed through atomicrmw instruction. |
2508 | /// \param UpdateExpr true if X is an in place update of the form |
2509 | /// X = X BinOp Expr or X = Expr BinOp X |
2510 | /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the |
2511 | /// update expression, false otherwise. |
2512 | /// (e.g. true for X = X BinOp Expr) |
2513 | /// \param IsPostfixUpdate true if original value of 'x' must be stored in |
2514 | /// 'v', not an updated one. |
2515 | /// |
2516 | /// \return Insertion point after generated atomic capture IR. |
2517 | InsertPointTy |
2518 | createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, |
2519 | AtomicOpValue &X, AtomicOpValue &V, Value *Expr, |
2520 | AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, |
2521 | AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, |
2522 | bool IsPostfixUpdate, bool IsXBinopExpr); |
2523 | |
2524 | /// Emit atomic compare for constructs: --- Only scalar data types |
2525 | /// cond-expr-stmt: |
2526 | /// x = x ordop expr ? expr : x; |
2527 | /// x = expr ordop x ? expr : x; |
2528 | /// x = x == e ? d : x; |
2529 | /// x = e == x ? d : x; (this one is not in the spec) |
2530 | /// cond-update-stmt: |
2531 | /// if (x ordop expr) { x = expr; } |
2532 | /// if (expr ordop x) { x = expr; } |
2533 | /// if (x == e) { x = d; } |
2534 | /// if (e == x) { x = d; } (this one is not in the spec) |
2535 | /// conditional-update-capture-atomic: |
2536 | /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) |
2537 | /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) |
2538 | /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, |
2539 | /// IsFailOnly=true) |
2540 | /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) |
2541 | /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, |
2542 | /// IsFailOnly=true) |
2543 | /// |
2544 | /// \param Loc The insert and source location description. |
2545 | /// \param X The target atomic pointer to be updated. |
2546 | /// \param V Memory address where to store captured value (for |
2547 | /// compare capture only). |
2548 | /// \param R Memory address where to store comparison result |
2549 | /// (for compare capture with '==' only). |
2550 | /// \param E The expected value ('e') for forms that use an |
2551 | /// equality comparison or an expression ('expr') for |
2552 | /// forms that use 'ordop' (logically an atomic maximum or |
2553 | /// minimum). |
2554 | /// \param D The desired value for forms that use an equality |
2555 | /// comparison. If forms that use 'ordop', it should be |
2556 | /// \p nullptr. |
2557 | /// \param AO Atomic ordering of the generated atomic instructions. |
2558 | /// \param Op Atomic compare operation. It can only be ==, <, or >. |
2559 | /// \param IsXBinopExpr True if the conditional statement is in the form where |
2560 | /// x is on LHS. It only matters for < or >. |
2561 | /// \param IsPostfixUpdate True if original value of 'x' must be stored in |
2562 | /// 'v', not an updated one (for compare capture |
2563 | /// only). |
2564 | /// \param IsFailOnly True if the original value of 'x' is stored to 'v' |
2565 | /// only when the comparison fails. This is only valid for |
2566 | /// the case the comparison is '=='. |
2567 | /// |
2568 | /// \return Insertion point after generated atomic capture IR. |
2569 | InsertPointTy |
2570 | createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, |
2571 | AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, |
2572 | AtomicOrdering AO, omp::OMPAtomicCompareOp Op, |
2573 | bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); |
2574 | InsertPointTy createAtomicCompare(const LocationDescription &Loc, |
2575 | AtomicOpValue &X, AtomicOpValue &V, |
2576 | AtomicOpValue &R, Value *E, Value *D, |
2577 | AtomicOrdering AO, |
2578 | omp::OMPAtomicCompareOp Op, |
2579 | bool IsXBinopExpr, bool IsPostfixUpdate, |
2580 | bool IsFailOnly, AtomicOrdering Failure); |
2581 | |
2582 | /// Create the control flow structure of a canonical OpenMP loop. |
2583 | /// |
2584 | /// The emitted loop will be disconnected, i.e. no edge to the loop's |
2585 | /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's |
2586 | /// IRBuilder location is not preserved. |
2587 | /// |
2588 | /// \param DL DebugLoc used for the instructions in the skeleton. |
2589 | /// \param TripCount Value to be used for the trip count. |
2590 | /// \param F Function in which to insert the BasicBlocks. |
2591 | /// \param PreInsertBefore Where to insert BBs that execute before the body, |
2592 | /// typically the body itself. |
2593 | /// \param PostInsertBefore Where to insert BBs that execute after the body. |
2594 | /// \param Name Base name used to derive BB |
2595 | /// and instruction names. |
2596 | /// |
2597 | /// \returns The CanonicalLoopInfo that represents the emitted loop. |
2598 | CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, |
2599 | Function *F, |
2600 | BasicBlock *PreInsertBefore, |
2601 | BasicBlock *PostInsertBefore, |
2602 | const Twine &Name = {}); |
2603 | /// OMP Offload Info Metadata name string |
2604 | const std::string ompOffloadInfoName = "omp_offload.info" ; |
2605 | |
2606 | /// Loads all the offload entries information from the host IR |
2607 | /// metadata. This function is only meant to be used with device code |
2608 | /// generation. |
2609 | /// |
2610 | /// \param M Module to load Metadata info from. Module passed maybe |
2611 | /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. |
2612 | void loadOffloadInfoMetadata(Module &M); |
2613 | |
2614 | /// Loads all the offload entries information from the host IR |
2615 | /// metadata read from the file passed in as the HostFilePath argument. This |
2616 | /// function is only meant to be used with device code generation. |
2617 | /// |
2618 | /// \param HostFilePath The path to the host IR file, |
2619 | /// used to load in offload metadata for the device, allowing host and device |
2620 | /// to maintain the same metadata mapping. |
2621 | void loadOffloadInfoMetadata(StringRef HostFilePath); |
2622 | |
2623 | /// Gets (if variable with the given name already exist) or creates |
2624 | /// internal global variable with the specified Name. The created variable has |
2625 | /// linkage CommonLinkage by default and is initialized by null value. |
2626 | /// \param Ty Type of the global variable. If it is exist already the type |
2627 | /// must be the same. |
2628 | /// \param Name Name of the variable. |
2629 | GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, |
2630 | unsigned AddressSpace = 0); |
2631 | |
2632 | /// Create a global function to register OpenMP requires flags into the |
2633 | /// runtime, according to the `Config`. |
2634 | /// |
2635 | /// This function should be added to the list of constructors of the |
2636 | /// compilation unit in order to be called before other OpenMP runtime |
2637 | /// functions. |
2638 | /// |
2639 | /// \param Name Name of the created function. |
2640 | Function *createRegisterRequires(StringRef Name); |
2641 | }; |
2642 | |
2643 | /// Class to represented the control flow structure of an OpenMP canonical loop. |
2644 | /// |
2645 | /// The control-flow structure is standardized for easy consumption by |
2646 | /// directives associated with loops. For instance, the worksharing-loop |
2647 | /// construct may change this control flow such that each loop iteration is |
2648 | /// executed on only one thread. The constraints of a canonical loop in brief |
2649 | /// are: |
2650 | /// |
2651 | /// * The number of loop iterations must have been computed before entering the |
2652 | /// loop. |
2653 | /// |
2654 | /// * Has an (unsigned) logical induction variable that starts at zero and |
2655 | /// increments by one. |
2656 | /// |
2657 | /// * The loop's CFG itself has no side-effects. The OpenMP specification |
2658 | /// itself allows side-effects, but the order in which they happen, including |
2659 | /// how often or whether at all, is unspecified. We expect that the frontend |
2660 | /// will emit those side-effect instructions somewhere (e.g. before the loop) |
2661 | /// such that the CanonicalLoopInfo itself can be side-effect free. |
2662 | /// |
2663 | /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated |
2664 | /// execution of a loop body that satifies these constraints. It does NOT |
2665 | /// represent arbitrary SESE regions that happen to contain a loop. Do not use |
2666 | /// CanonicalLoopInfo for such purposes. |
2667 | /// |
2668 | /// The control flow can be described as follows: |
2669 | /// |
2670 | /// Preheader |
2671 | /// | |
2672 | /// /-> Header |
2673 | /// | | |
2674 | /// | Cond---\ |
2675 | /// | | | |
2676 | /// | Body | |
2677 | /// | | | | |
2678 | /// | <...> | |
2679 | /// | | | | |
2680 | /// \--Latch | |
2681 | /// | |
2682 | /// Exit |
2683 | /// | |
2684 | /// After |
2685 | /// |
2686 | /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, |
2687 | /// including) and end at AfterIP (at the After's first instruction, excluding). |
2688 | /// That is, instructions in the Preheader and After blocks (except the |
2689 | /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have |
2690 | /// side-effects. Typically, the Preheader is used to compute the loop's trip |
2691 | /// count. The instructions from BodyIP (at the Body block's first instruction, |
2692 | /// excluding) until the Latch are also considered outside CanonicalLoopInfo's |
2693 | /// control and thus can have side-effects. The body block is the single entry |
2694 | /// point into the loop body, which may contain arbitrary control flow as long |
2695 | /// as all control paths eventually branch to the Latch block. |
2696 | /// |
2697 | /// TODO: Consider adding another standardized BasicBlock between Body CFG and |
2698 | /// Latch to guarantee that there is only a single edge to the latch. It would |
2699 | /// make loop transformations easier to not needing to consider multiple |
2700 | /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us |
2701 | /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that |
2702 | /// executes after each body iteration. |
2703 | /// |
2704 | /// There must be no loop-carried dependencies through llvm::Values. This is |
2705 | /// equivalant to that the Latch has no PHINode and the Header's only PHINode is |
2706 | /// for the induction variable. |
2707 | /// |
2708 | /// All code in Header, Cond, Latch and Exit (plus the terminator of the |
2709 | /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked |
2710 | /// by assertOK(). They are expected to not be modified unless explicitly |
2711 | /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP |
2712 | /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, |
2713 | /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its |
2714 | /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used |
2715 | /// anymore as its underlying control flow may not exist anymore. |
2716 | /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop |
2717 | /// may also return a new CanonicalLoopInfo that can be passed to other |
2718 | /// loop-associated construct implementing methods. These loop-transforming |
2719 | /// methods may either create a new CanonicalLoopInfo usually using |
2720 | /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and |
2721 | /// modify one of the input CanonicalLoopInfo and return it as representing the |
2722 | /// modified loop. What is done is an implementation detail of |
2723 | /// transformation-implementing method and callers should always assume that the |
2724 | /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. |
2725 | /// Returned CanonicalLoopInfo have the same structure and guarantees as the one |
2726 | /// created by createCanonicalLoop, such that transforming methods do not have |
2727 | /// to special case where the CanonicalLoopInfo originated from. |
2728 | /// |
2729 | /// Generally, methods consuming CanonicalLoopInfo do not need an |
2730 | /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the |
2731 | /// CanonicalLoopInfo to insert new or modify existing instructions. Unless |
2732 | /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate |
2733 | /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, |
2734 | /// any InsertPoint in the Preheader, After or Block can still be used after |
2735 | /// calling such a method. |
2736 | /// |
2737 | /// TODO: Provide mechanisms for exception handling and cancellation points. |
2738 | /// |
2739 | /// Defined outside OpenMPIRBuilder because nested classes cannot be |
2740 | /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. |
2741 | class CanonicalLoopInfo { |
2742 | friend class OpenMPIRBuilder; |
2743 | |
2744 | private: |
2745 | BasicBlock * = nullptr; |
2746 | BasicBlock *Cond = nullptr; |
2747 | BasicBlock *Latch = nullptr; |
2748 | BasicBlock *Exit = nullptr; |
2749 | |
2750 | /// Add the control blocks of this loop to \p BBs. |
2751 | /// |
2752 | /// This does not include any block from the body, including the one returned |
2753 | /// by getBody(). |
2754 | /// |
2755 | /// FIXME: This currently includes the Preheader and After blocks even though |
2756 | /// their content is (mostly) not under CanonicalLoopInfo's control. |
2757 | /// Re-evaluated whether this makes sense. |
2758 | void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); |
2759 | |
2760 | /// Sets the number of loop iterations to the given value. This value must be |
2761 | /// valid in the condition block (i.e., defined in the preheader) and is |
2762 | /// interpreted as an unsigned integer. |
2763 | void setTripCount(Value *TripCount); |
2764 | |
2765 | /// Replace all uses of the canonical induction variable in the loop body with |
2766 | /// a new one. |
2767 | /// |
2768 | /// The intended use case is to update the induction variable for an updated |
2769 | /// iteration space such that it can stay normalized in the 0...tripcount-1 |
2770 | /// range. |
2771 | /// |
2772 | /// The \p Updater is called with the (presumable updated) current normalized |
2773 | /// induction variable and is expected to return the value that uses of the |
2774 | /// pre-updated induction values should use instead, typically dependent on |
2775 | /// the new induction variable. This is a lambda (instead of e.g. just passing |
2776 | /// the new value) to be able to distinguish the uses of the pre-updated |
2777 | /// induction variable and uses of the induction varible to compute the |
2778 | /// updated induction variable value. |
2779 | void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); |
2780 | |
2781 | public: |
2782 | /// Returns whether this object currently represents the IR of a loop. If |
2783 | /// returning false, it may have been consumed by a loop transformation or not |
2784 | /// been intialized. Do not use in this case; |
2785 | bool isValid() const { return Header; } |
2786 | |
2787 | /// The preheader ensures that there is only a single edge entering the loop. |
2788 | /// Code that must be execute before any loop iteration can be emitted here, |
2789 | /// such as computing the loop trip count and begin lifetime markers. Code in |
2790 | /// the preheader is not considered part of the canonical loop. |
2791 | BasicBlock *() const; |
2792 | |
2793 | /// The header is the entry for each iteration. In the canonical control flow, |
2794 | /// it only contains the PHINode for the induction variable. |
2795 | BasicBlock *() const { |
2796 | assert(isValid() && "Requires a valid canonical loop" ); |
2797 | return Header; |
2798 | } |
2799 | |
2800 | /// The condition block computes whether there is another loop iteration. If |
2801 | /// yes, branches to the body; otherwise to the exit block. |
2802 | BasicBlock *getCond() const { |
2803 | assert(isValid() && "Requires a valid canonical loop" ); |
2804 | return Cond; |
2805 | } |
2806 | |
2807 | /// The body block is the single entry for a loop iteration and not controlled |
2808 | /// by CanonicalLoopInfo. It can contain arbitrary control flow but must |
2809 | /// eventually branch to the \p Latch block. |
2810 | BasicBlock *getBody() const { |
2811 | assert(isValid() && "Requires a valid canonical loop" ); |
2812 | return cast<BranchInst>(Val: Cond->getTerminator())->getSuccessor(i: 0); |
2813 | } |
2814 | |
2815 | /// Reaching the latch indicates the end of the loop body code. In the |
2816 | /// canonical control flow, it only contains the increment of the induction |
2817 | /// variable. |
2818 | BasicBlock *getLatch() const { |
2819 | assert(isValid() && "Requires a valid canonical loop" ); |
2820 | return Latch; |
2821 | } |
2822 | |
2823 | /// Reaching the exit indicates no more iterations are being executed. |
2824 | BasicBlock *getExit() const { |
2825 | assert(isValid() && "Requires a valid canonical loop" ); |
2826 | return Exit; |
2827 | } |
2828 | |
2829 | /// The after block is intended for clean-up code such as lifetime end |
2830 | /// markers. It is separate from the exit block to ensure, analogous to the |
2831 | /// preheader, it having just a single entry edge and being free from PHI |
2832 | /// nodes should there be multiple loop exits (such as from break |
2833 | /// statements/cancellations). |
2834 | BasicBlock *getAfter() const { |
2835 | assert(isValid() && "Requires a valid canonical loop" ); |
2836 | return Exit->getSingleSuccessor(); |
2837 | } |
2838 | |
2839 | /// Returns the llvm::Value containing the number of loop iterations. It must |
2840 | /// be valid in the preheader and always interpreted as an unsigned integer of |
2841 | /// any bit-width. |
2842 | Value *getTripCount() const { |
2843 | assert(isValid() && "Requires a valid canonical loop" ); |
2844 | Instruction *CmpI = &Cond->front(); |
2845 | assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount" ); |
2846 | return CmpI->getOperand(i: 1); |
2847 | } |
2848 | |
2849 | /// Returns the instruction representing the current logical induction |
2850 | /// variable. Always unsigned, always starting at 0 with an increment of one. |
2851 | Instruction *getIndVar() const { |
2852 | assert(isValid() && "Requires a valid canonical loop" ); |
2853 | Instruction *IndVarPHI = &Header->front(); |
2854 | assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI" ); |
2855 | return IndVarPHI; |
2856 | } |
2857 | |
2858 | /// Return the type of the induction variable (and the trip count). |
2859 | Type *getIndVarType() const { |
2860 | assert(isValid() && "Requires a valid canonical loop" ); |
2861 | return getIndVar()->getType(); |
2862 | } |
2863 | |
2864 | /// Return the insertion point for user code before the loop. |
2865 | OpenMPIRBuilder::InsertPointTy () const { |
2866 | assert(isValid() && "Requires a valid canonical loop" ); |
2867 | BasicBlock * = getPreheader(); |
2868 | return {Preheader, std::prev(x: Preheader->end())}; |
2869 | }; |
2870 | |
2871 | /// Return the insertion point for user code in the body. |
2872 | OpenMPIRBuilder::InsertPointTy getBodyIP() const { |
2873 | assert(isValid() && "Requires a valid canonical loop" ); |
2874 | BasicBlock *Body = getBody(); |
2875 | return {Body, Body->begin()}; |
2876 | }; |
2877 | |
2878 | /// Return the insertion point for user code after the loop. |
2879 | OpenMPIRBuilder::InsertPointTy getAfterIP() const { |
2880 | assert(isValid() && "Requires a valid canonical loop" ); |
2881 | BasicBlock *After = getAfter(); |
2882 | return {After, After->begin()}; |
2883 | }; |
2884 | |
2885 | Function *getFunction() const { |
2886 | assert(isValid() && "Requires a valid canonical loop" ); |
2887 | return Header->getParent(); |
2888 | } |
2889 | |
2890 | /// Consistency self-check. |
2891 | void assertOK() const; |
2892 | |
2893 | /// Invalidate this loop. That is, the underlying IR does not fulfill the |
2894 | /// requirements of an OpenMP canonical loop anymore. |
2895 | void invalidate(); |
2896 | }; |
2897 | |
2898 | } // end namespace llvm |
2899 | |
2900 | #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
2901 | |