1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
17#include "llvm/Frontend/OpenMP/OMPConstants.h"
18#include "llvm/IR/DebugLoc.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/Support/Allocator.h"
21#include <forward_list>
22
23namespace llvm {
24class CanonicalLoopInfo;
25
26/// An interface to create LLVM-IR for OpenMP directives.
27///
28/// Each OpenMP directive has a corresponding public generator method.
29class OpenMPIRBuilder {
30public:
31 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
32 /// not have an effect on \p M (see initialize).
33 OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
34 ~OpenMPIRBuilder();
35
36 /// Initialize the internal state, this will put structures types and
37 /// potentially other helpers into the underlying module. Must be called
38 /// before any other method and only once!
39 void initialize();
40
41 /// Finalize the underlying module, e.g., by outlining regions.
42 /// \param Fn The function to be finalized. If not used,
43 /// all functions are finalized.
44 /// \param AllowExtractorSinking Flag to include sinking instructions,
45 /// emitted by CodeExtractor, in the
46 /// outlined region. Default is false.
47 void finalize(Function *Fn = nullptr, bool AllowExtractorSinking = false);
48
49 /// Add attributes known for \p FnID to \p Fn.
50 void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
51
52 /// Type used throughout for insertion points.
53 using InsertPointTy = IRBuilder<>::InsertPoint;
54
55 /// Callback type for variable finalization (think destructors).
56 ///
57 /// \param CodeGenIP is the insertion point at which the finalization code
58 /// should be placed.
59 ///
60 /// A finalize callback knows about all objects that need finalization, e.g.
61 /// destruction, when the scope of the currently generated construct is left
62 /// at the time, and location, the callback is invoked.
63 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
64
65 struct FinalizationInfo {
66 /// The finalization callback provided by the last in-flight invocation of
67 /// createXXXX for the directive of kind DK.
68 FinalizeCallbackTy FiniCB;
69
70 /// The directive kind of the innermost directive that has an associated
71 /// region which might require finalization when it is left.
72 omp::Directive DK;
73
74 /// Flag to indicate if the directive is cancellable.
75 bool IsCancellable;
76 };
77
78 /// Push a finalization callback on the finalization stack.
79 ///
80 /// NOTE: Temporary solution until Clang CG is gone.
81 void pushFinalizationCB(const FinalizationInfo &FI) {
82 FinalizationStack.push_back(FI);
83 }
84
85 /// Pop the last finalization callback from the finalization stack.
86 ///
87 /// NOTE: Temporary solution until Clang CG is gone.
88 void popFinalizationCB() { FinalizationStack.pop_back(); }
89
90 /// Callback type for body (=inner region) code generation
91 ///
92 /// The callback takes code locations as arguments, each describing a
93 /// location at which code might need to be generated or a location that is
94 /// the target of control transfer.
95 ///
96 /// \param AllocaIP is the insertion point at which new alloca instructions
97 /// should be placed.
98 /// \param CodeGenIP is the insertion point at which the body code should be
99 /// placed.
100 /// \param ContinuationBB is the basic block target to leave the body.
101 ///
102 /// Note that all blocks pointed to by the arguments have terminators.
103 using BodyGenCallbackTy =
104 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
105 BasicBlock &ContinuationBB)>;
106
107 // This is created primarily for sections construct as llvm::function_ref
108 // (BodyGenCallbackTy) is not storable (as described in the comments of
109 // function_ref class - function_ref contains non-ownable reference
110 // to the callable.
111 using StorableBodyGenCallbackTy =
112 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
113 BasicBlock &ContinuationBB)>;
114
115 /// Callback type for loop body code generation.
116 ///
117 /// \param CodeGenIP is the insertion point where the loop's body code must be
118 /// placed. This will be a dedicated BasicBlock with a
119 /// conditional branch from the loop condition check and
120 /// terminated with an unconditional branch to the loop
121 /// latch.
122 /// \param IndVar is the induction variable usable at the insertion point.
123 using LoopBodyGenCallbackTy =
124 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
125
126 /// Callback type for variable privatization (think copy & default
127 /// constructor).
128 ///
129 /// \param AllocaIP is the insertion point at which new alloca instructions
130 /// should be placed.
131 /// \param CodeGenIP is the insertion point at which the privatization code
132 /// should be placed.
133 /// \param Original The value being copied/created, should not be used in the
134 /// generated IR.
135 /// \param Inner The equivalent of \p Original that should be used in the
136 /// generated IR; this is equal to \p Original if the value is
137 /// a pointer and can thus be passed directly, otherwise it is
138 /// an equivalent but different value.
139 /// \param ReplVal The replacement value, thus a copy or new created version
140 /// of \p Inner.
141 ///
142 /// \returns The new insertion point where code generation continues and
143 /// \p ReplVal the replacement value.
144 using PrivatizeCallbackTy = function_ref<InsertPointTy(
145 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
146 Value &Inner, Value *&ReplVal)>;
147
148 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
149 /// (filename, line, column, ...).
150 struct LocationDescription {
151 template <typename T, typename U>
152 LocationDescription(const IRBuilder<T, U> &IRB)
153 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
154 LocationDescription(const InsertPointTy &IP) : IP(IP) {}
155 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
156 : IP(IP), DL(DL) {}
157 InsertPointTy IP;
158 DebugLoc DL;
159 };
160
161 /// Emitter methods for OpenMP directives.
162 ///
163 ///{
164
165 /// Generator for '#omp barrier'
166 ///
167 /// \param Loc The location where the barrier directive was encountered.
168 /// \param DK The kind of directive that caused the barrier.
169 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
170 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
171 /// should be checked and acted upon.
172 ///
173 /// \returns The insertion point after the barrier.
174 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
175 bool ForceSimpleCall = false,
176 bool CheckCancelFlag = true);
177
178 /// Generator for '#omp cancel'
179 ///
180 /// \param Loc The location where the directive was encountered.
181 /// \param IfCondition The evaluated 'if' clause expression, if any.
182 /// \param CanceledDirective The kind of directive that is cancled.
183 ///
184 /// \returns The insertion point after the barrier.
185 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
186 omp::Directive CanceledDirective);
187
188 /// Generator for '#omp parallel'
189 ///
190 /// \param Loc The insert and source location description.
191 /// \param AllocaIP The insertion points to be used for alloca instructions.
192 /// \param BodyGenCB Callback that will generate the region code.
193 /// \param PrivCB Callback to copy a given variable (think copy constructor).
194 /// \param FiniCB Callback to finalize variable copies.
195 /// \param IfCondition The evaluated 'if' clause expression, if any.
196 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
197 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
198 /// \param IsCancellable Flag to indicate a cancellable parallel region.
199 ///
200 /// \returns The insertion position *after* the parallel.
201 IRBuilder<>::InsertPoint
202 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
203 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
204 FinalizeCallbackTy FiniCB, Value *IfCondition,
205 Value *NumThreads, omp::ProcBindKind ProcBind,
206 bool IsCancellable);
207
208 /// Generator for the control flow structure of an OpenMP canonical loop.
209 ///
210 /// This generator operates on the logical iteration space of the loop, i.e.
211 /// the caller only has to provide a loop trip count of the loop as defined by
212 /// base language semantics. The trip count is interpreted as an unsigned
213 /// integer. The induction variable passed to \p BodyGenCB will be of the same
214 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
215 /// convert the logical iteration variable to the loop counter variable in the
216 /// loop body.
217 ///
218 /// \param Loc The insert and source location description. The insert
219 /// location can be between two instructions or the end of a
220 /// degenerate block (e.g. a BB under construction).
221 /// \param BodyGenCB Callback that will generate the loop body code.
222 /// \param TripCount Number of iterations the loop body is executed.
223 /// \param Name Base name used to derive BB and instruction names.
224 ///
225 /// \returns An object representing the created control flow structure which
226 /// can be used for loop-associated directives.
227 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
228 LoopBodyGenCallbackTy BodyGenCB,
229 Value *TripCount,
230 const Twine &Name = "loop");
231
232 /// Generator for the control flow structure of an OpenMP canonical loop.
233 ///
234 /// Instead of a logical iteration space, this allows specifying user-defined
235 /// loop counter values using increment, upper- and lower bounds. To
236 /// disambiguate the terminology when counting downwards, instead of lower
237 /// bounds we use \p Start for the loop counter value in the first body
238 /// iteration.
239 ///
240 /// Consider the following limitations:
241 ///
242 /// * A loop counter space over all integer values of its bit-width cannot be
243 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
244 /// stored into an 8 bit integer):
245 ///
246 /// DO I = 0, 255, 1
247 ///
248 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
249 /// effectively counting downwards:
250 ///
251 /// for (uint8_t i = 100u; i > 0; i += 127u)
252 ///
253 ///
254 /// TODO: May need to add additional parameters to represent:
255 ///
256 /// * Allow representing downcounting with unsigned integers.
257 ///
258 /// * Sign of the step and the comparison operator might disagree:
259 ///
260 /// for (int i = 0; i < 42; --i)
261 ///
262 //
263 /// \param Loc The insert and source location description.
264 /// \param BodyGenCB Callback that will generate the loop body code.
265 /// \param Start Value of the loop counter for the first iterations.
266 /// \param Stop Loop counter values past this will stop the the
267 /// iterations.
268 /// \param Step Loop counter increment after each iteration; negative
269 /// means counting down. \param IsSigned Whether Start, Stop
270 /// and Stop are signed integers.
271 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
272 /// counter.
273 /// \param ComputeIP Insertion point for instructions computing the trip
274 /// count. Can be used to ensure the trip count is available
275 /// at the outermost loop of a loop nest. If not set,
276 /// defaults to the preheader of the generated loop.
277 /// \param Name Base name used to derive BB and instruction names.
278 ///
279 /// \returns An object representing the created control flow structure which
280 /// can be used for loop-associated directives.
281 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
282 LoopBodyGenCallbackTy BodyGenCB,
283 Value *Start, Value *Stop, Value *Step,
284 bool IsSigned, bool InclusiveStop,
285 InsertPointTy ComputeIP = {},
286 const Twine &Name = "loop");
287
288 /// Collapse a loop nest into a single loop.
289 ///
290 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
291 /// that has the same number of innermost loop iterations as the origin loop
292 /// nest. The induction variables of the input loops are derived from the
293 /// collapsed loop's induction variable. This is intended to be used to
294 /// implement OpenMP's collapse clause. Before applying a directive,
295 /// collapseLoops normalizes a loop nest to contain only a single loop and the
296 /// directive's implementation does not need to handle multiple loops itself.
297 /// This does not remove the need to handle all loop nest handling by
298 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
299 /// modifier of the worksharing-loop directive.
300 ///
301 /// Example:
302 /// \code
303 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
304 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
305 /// body(i, j);
306 /// \endcode
307 ///
308 /// After collapsing with Loops={i,j}, the loop is changed to
309 /// \code
310 /// for (int ij = 0; ij < 63; ++ij) {
311 /// int i = ij / 9;
312 /// int j = ij % 9;
313 /// body(i, j);
314 /// }
315 /// \endcode
316 ///
317 /// In the current implementation, the following limitations apply:
318 ///
319 /// * All input loops have an induction variable of the same type.
320 ///
321 /// * The collapsed loop will have the same trip count integer type as the
322 /// input loops. Therefore it is possible that the collapsed loop cannot
323 /// represent all iterations of the input loops. For instance, assuming a
324 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
325 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
326 /// which cannot be represented in an 32-bit integer. Behavior is undefined
327 /// in this case.
328 ///
329 /// * The trip counts of every input loop must be available at \p ComputeIP.
330 /// Non-rectangular loops are not yet supported.
331 ///
332 /// * At each nest level, code between a surrounding loop and its nested loop
333 /// is hoisted into the loop body, and such code will be executed more
334 /// often than before collapsing (or not at all if any inner loop iteration
335 /// has a trip count of 0). This is permitted by the OpenMP specification.
336 ///
337 /// \param DL Debug location for instructions added for collapsing,
338 /// such as instructions to compute derive the input loop's
339 /// induction variables.
340 /// \param Loops Loops in the loop nest to collapse. Loops are specified
341 /// from outermost-to-innermost and every control flow of a
342 /// loop's body must pass through its directly nested loop.
343 /// \param ComputeIP Where additional instruction that compute the collapsed
344 /// trip count. If not set, defaults to before the generated
345 /// loop.
346 ///
347 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
348 CanonicalLoopInfo *collapseLoops(DebugLoc DL,
349 ArrayRef<CanonicalLoopInfo *> Loops,
350 InsertPointTy ComputeIP);
351
352 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
353 ///
354 /// This takes a \p LoopInfo representing a canonical loop, such as the one
355 /// created by \p createCanonicalLoop and emits additional instructions to
356 /// turn it into a workshare loop. In particular, it calls to an OpenMP
357 /// runtime function in the preheader to obtain the loop bounds to be used in
358 /// the current thread, updates the relevant instructions in the canonical
359 /// loop and calls to an OpenMP runtime finalization function after the loop.
360 ///
361 /// \param Loc The source location description, the insertion location
362 /// is not used.
363 /// \param CLI A descriptor of the canonical loop to workshare.
364 /// \param AllocaIP An insertion point for Alloca instructions usable in the
365 /// preheader of the loop.
366 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
367 /// the loop.
368 /// \param Chunk The size of loop chunk considered as a unit when
369 /// scheduling. If \p nullptr, defaults to 1.
370 ///
371 /// \returns Updated CanonicalLoopInfo.
372 CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
373 CanonicalLoopInfo *CLI,
374 InsertPointTy AllocaIP,
375 bool NeedsBarrier,
376 Value *Chunk = nullptr);
377
378 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
379 ///
380 /// This takes a \p LoopInfo representing a canonical loop, such as the one
381 /// created by \p createCanonicalLoop and emits additional instructions to
382 /// turn it into a workshare loop. In particular, it calls to an OpenMP
383 /// runtime function in the preheader to obtain, and then in each iteration
384 /// to update the loop counter.
385 /// \param Loc The source location description, the insertion location
386 /// is not used.
387 /// \param CLI A descriptor of the canonical loop to workshare.
388 /// \param AllocaIP An insertion point for Alloca instructions usable in the
389 /// preheader of the loop.
390 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
391 /// the loop.
392 /// \param Chunk The size of loop chunk considered as a unit when
393 /// scheduling. If \p nullptr, defaults to 1.
394 ///
395 /// \returns Point where to insert code after the loop.
396 InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
397 CanonicalLoopInfo *CLI,
398 InsertPointTy AllocaIP,
399 bool NeedsBarrier,
400 Value *Chunk = nullptr);
401
402 /// Modifies the canonical loop to be a workshare loop.
403 ///
404 /// This takes a \p LoopInfo representing a canonical loop, such as the one
405 /// created by \p createCanonicalLoop and emits additional instructions to
406 /// turn it into a workshare loop. In particular, it calls to an OpenMP
407 /// runtime function in the preheader to obtain the loop bounds to be used in
408 /// the current thread, updates the relevant instructions in the canonical
409 /// loop and calls to an OpenMP runtime finalization function after the loop.
410 ///
411 /// \param Loc The source location description, the insertion location
412 /// is not used.
413 /// \param CLI A descriptor of the canonical loop to workshare.
414 /// \param AllocaIP An insertion point for Alloca instructions usable in the
415 /// preheader of the loop.
416 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
417 /// the loop.
418 ///
419 /// \returns Updated CanonicalLoopInfo.
420 CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
421 CanonicalLoopInfo *CLI,
422 InsertPointTy AllocaIP,
423 bool NeedsBarrier);
424
425 /// Tile a loop nest.
426 ///
427 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
428 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
429 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
430 /// of every loop and every tile sizes must be usable in the outermost
431 /// loop's preheader. This implies that the loop nest is rectangular.
432 ///
433 /// Example:
434 /// \code
435 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
436 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
437 /// body(i, j);
438 /// \endcode
439 ///
440 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
441 /// \code
442 /// for (int i1 = 0; i1 < 3; ++i1)
443 /// for (int j1 = 0; j1 < 2; ++j1)
444 /// for (int i2 = 0; i2 < 5; ++i2)
445 /// for (int j2 = 0; j2 < 7; ++j2)
446 /// body(i1*3+i2, j1*3+j2);
447 /// \endcode
448 ///
449 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
450 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
451 /// handles non-constant trip counts, non-constant tile sizes and trip counts
452 /// that are not multiples of the tile size. In the latter case the tile loop
453 /// of the last floor-loop iteration will have fewer iterations than specified
454 /// as its tile size.
455 ///
456 ///
457 /// @param DL Debug location for instructions added by tiling, for
458 /// instance the floor- and tile trip count computation.
459 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
460 /// invalidated by this method, i.e. should not used after
461 /// tiling.
462 /// @param TileSizes For each loop in \p Loops, the tile size for that
463 /// dimensions.
464 ///
465 /// \returns A list of generated loops. Contains twice as many loops as the
466 /// input loop nest; the first half are the floor loops and the
467 /// second half are the tile loops.
468 std::vector<CanonicalLoopInfo *>
469 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
470 ArrayRef<Value *> TileSizes);
471
472 /// Generator for '#omp flush'
473 ///
474 /// \param Loc The location where the flush directive was encountered
475 void createFlush(const LocationDescription &Loc);
476
477 /// Generator for '#omp taskwait'
478 ///
479 /// \param Loc The location where the taskwait directive was encountered.
480 void createTaskwait(const LocationDescription &Loc);
481
482 /// Generator for '#omp taskyield'
483 ///
484 /// \param Loc The location where the taskyield directive was encountered.
485 void createTaskyield(const LocationDescription &Loc);
486
487 ///}
488
489 /// Return the insertion point used by the underlying IRBuilder.
490 InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
491
492 /// Update the internal location to \p Loc.
493 bool updateToLocation(const LocationDescription &Loc) {
494 Builder.restoreIP(Loc.IP);
495 Builder.SetCurrentDebugLocation(Loc.DL);
496 return Loc.IP.getBlock() != nullptr;
497 }
498
499 /// Return the function declaration for the runtime function with \p FnID.
500 FunctionCallee getOrCreateRuntimeFunction(Module &M,
501 omp::RuntimeFunction FnID);
502
503 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
504
505 /// Return the (LLVM-IR) string describing the source location \p LocStr.
506 Constant *getOrCreateSrcLocStr(StringRef LocStr);
507
508 /// Return the (LLVM-IR) string describing the default source location.
509 Constant *getOrCreateDefaultSrcLocStr();
510
511 /// Return the (LLVM-IR) string describing the source location identified by
512 /// the arguments.
513 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
514 unsigned Line, unsigned Column);
515
516 /// Return the (LLVM-IR) string describing the source location \p Loc.
517 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
518
519 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
520 /// TODO: Create a enum class for the Reserve2Flags
521 Value *getOrCreateIdent(Constant *SrcLocStr,
522 omp::IdentFlag Flags = omp::IdentFlag(0),
523 unsigned Reserve2Flags = 0);
524
525 // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
526 Type *getLanemaskType();
527
528 /// Generate control flow and cleanup for cancellation.
529 ///
530 /// \param CancelFlag Flag indicating if the cancellation is performed.
531 /// \param CanceledDirective The kind of directive that is cancled.
532 void emitCancelationCheckImpl(Value *CancelFlag,
533 omp::Directive CanceledDirective);
534
535 /// Generate a barrier runtime call.
536 ///
537 /// \param Loc The location at which the request originated and is fulfilled.
538 /// \param DK The directive which caused the barrier
539 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
540 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
541 /// should be checked and acted upon.
542 ///
543 /// \returns The insertion point after the barrier.
544 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
545 omp::Directive DK, bool ForceSimpleCall,
546 bool CheckCancelFlag);
547
548 /// Generate a flush runtime call.
549 ///
550 /// \param Loc The location at which the request originated and is fulfilled.
551 void emitFlush(const LocationDescription &Loc);
552
553 /// The finalization stack made up of finalize callbacks currently in-flight,
554 /// wrapped into FinalizationInfo objects that reference also the finalization
555 /// target block and the kind of cancellable directive.
556 SmallVector<FinalizationInfo, 8> FinalizationStack;
557
558 /// Return true if the last entry in the finalization stack is of kind \p DK
559 /// and cancellable.
560 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
561 return !FinalizationStack.empty() &&
562 FinalizationStack.back().IsCancellable &&
563 FinalizationStack.back().DK == DK;
564 }
565
566 /// Generate a taskwait runtime call.
567 ///
568 /// \param Loc The location at which the request originated and is fulfilled.
569 void emitTaskwaitImpl(const LocationDescription &Loc);
570
571 /// Generate a taskyield runtime call.
572 ///
573 /// \param Loc The location at which the request originated and is fulfilled.
574 void emitTaskyieldImpl(const LocationDescription &Loc);
575
576 /// Return the current thread ID.
577 ///
578 /// \param Ident The ident (ident_t*) describing the query origin.
579 Value *getOrCreateThreadID(Value *Ident);
580
581 /// The underlying LLVM-IR module
582 Module &M;
583
584 /// The LLVM-IR Builder used to create IR.
585 IRBuilder<> Builder;
586
587 /// Map to remember source location strings
588 StringMap<Constant *> SrcLocStrMap;
589
590 /// Map to remember existing ident_t*.
591 DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap;
592
593 /// Helper that contains information about regions we need to outline
594 /// during finalization.
595 struct OutlineInfo {
596 using PostOutlineCBTy = std::function<void(Function &)>;
597 PostOutlineCBTy PostOutlineCB;
598 BasicBlock *EntryBB, *ExitBB;
599
600 /// Collect all blocks in between EntryBB and ExitBB in both the given
601 /// vector and set.
602 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
603 SmallVectorImpl<BasicBlock *> &BlockVector);
604
605 /// Return the function that contains the region to be outlined.
606 Function *getFunction() const { return EntryBB->getParent(); }
607 };
608
609 /// Collection of regions that need to be outlined during finalization.
610 SmallVector<OutlineInfo, 16> OutlineInfos;
611
612 /// Collection of owned canonical loop objects that eventually need to be
613 /// free'd.
614 std::forward_list<CanonicalLoopInfo> LoopInfos;
615
616 /// Add a new region that will be outlined later.
617 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
618
619 /// An ordered map of auto-generated variables to their unique names.
620 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
621 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
622 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
623 /// variables.
624 StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
625
626 /// Create the global variable holding the offload mappings information.
627 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
628 std::string VarName);
629
630 /// Create the global variable holding the offload names information.
631 GlobalVariable *
632 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
633 std::string VarName);
634
635public:
636 /// Generator for __kmpc_copyprivate
637 ///
638 /// \param Loc The source location description.
639 /// \param BufSize Number of elements in the buffer.
640 /// \param CpyBuf List of pointers to data to be copied.
641 /// \param CpyFn function to call for copying data.
642 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
643 ///
644 /// \return The insertion position *after* the CopyPrivate call.
645
646 InsertPointTy createCopyPrivate(const LocationDescription &Loc,
647 llvm::Value *BufSize, llvm::Value *CpyBuf,
648 llvm::Value *CpyFn, llvm::Value *DidIt);
649
650 /// Generator for '#omp single'
651 ///
652 /// \param Loc The source location description.
653 /// \param BodyGenCB Callback that will generate the region code.
654 /// \param FiniCB Callback to finalize variable copies.
655 /// \param DidIt Local variable used as a flag to indicate 'single' thread
656 ///
657 /// \returns The insertion position *after* the single call.
658 InsertPointTy createSingle(const LocationDescription &Loc,
659 BodyGenCallbackTy BodyGenCB,
660 FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
661
662 /// Generator for '#omp master'
663 ///
664 /// \param Loc The insert and source location description.
665 /// \param BodyGenCB Callback that will generate the region code.
666 /// \param FiniCB Callback to finalize variable copies.
667 ///
668 /// \returns The insertion position *after* the master.
669 InsertPointTy createMaster(const LocationDescription &Loc,
670 BodyGenCallbackTy BodyGenCB,
671 FinalizeCallbackTy FiniCB);
672
673 /// Generator for '#omp masked'
674 ///
675 /// \param Loc The insert and source location description.
676 /// \param BodyGenCB Callback that will generate the region code.
677 /// \param FiniCB Callback to finialize variable copies.
678 ///
679 /// \returns The insertion position *after* the master.
680 InsertPointTy createMasked(const LocationDescription &Loc,
681 BodyGenCallbackTy BodyGenCB,
682 FinalizeCallbackTy FiniCB, Value *Filter);
683
684 /// Generator for '#omp critical'
685 ///
686 /// \param Loc The insert and source location description.
687 /// \param BodyGenCB Callback that will generate the region body code.
688 /// \param FiniCB Callback to finalize variable copies.
689 /// \param CriticalName name of the lock used by the critical directive
690 /// \param HintInst Hint Instruction for hint clause associated with critical
691 ///
692 /// \returns The insertion position *after* the master.
693 InsertPointTy createCritical(const LocationDescription &Loc,
694 BodyGenCallbackTy BodyGenCB,
695 FinalizeCallbackTy FiniCB,
696 StringRef CriticalName, Value *HintInst);
697
698 /// Generator for '#omp sections'
699 ///
700 /// \param Loc The insert and source location description.
701 /// \param AllocaIP The insertion points to be used for alloca instructions.
702 /// \param SectionCBs Callbacks that will generate body of each section.
703 /// \param PrivCB Callback to copy a given variable (think copy constructor).
704 /// \param FiniCB Callback to finalize variable copies.
705 /// \param IsCancellable Flag to indicate a cancellable parallel region.
706 /// \param IsNowait If true, barrier - to ensure all sections are executed
707 /// before moving forward will not be generated.
708 /// \returns The insertion position *after* the sections.
709 InsertPointTy createSections(const LocationDescription &Loc,
710 InsertPointTy AllocaIP,
711 ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
712 PrivatizeCallbackTy PrivCB,
713 FinalizeCallbackTy FiniCB, bool IsCancellable,
714 bool IsNowait);
715
716 /// Generator for '#omp section'
717 ///
718 /// \param Loc The insert and source location description.
719 /// \param BodyGenCB Callback that will generate the region body code.
720 /// \param FiniCB Callback to finalize variable copies.
721 /// \returns The insertion position *after* the section.
722 InsertPointTy createSection(const LocationDescription &Loc,
723 BodyGenCallbackTy BodyGenCB,
724 FinalizeCallbackTy FiniCB);
725
726 /// Generate conditional branch and relevant BasicBlocks through which private
727 /// threads copy the 'copyin' variables from Master copy to threadprivate
728 /// copies.
729 ///
730 /// \param IP insertion block for copyin conditional
731 /// \param MasterVarPtr a pointer to the master variable
732 /// \param PrivateVarPtr a pointer to the threadprivate variable
733 /// \param IntPtrTy Pointer size type
734 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
735 // and copy.in.end block
736 ///
737 /// \returns The insertion point where copying operation to be emitted.
738 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
739 Value *PrivateAddr,
740 llvm::IntegerType *IntPtrTy,
741 bool BranchtoEnd = true);
742
743 /// Create a runtime call for kmpc_Alloc
744 ///
745 /// \param Loc The insert and source location description.
746 /// \param Size Size of allocated memory space
747 /// \param Allocator Allocator information instruction
748 /// \param Name Name of call Instruction for OMP_alloc
749 ///
750 /// \returns CallInst to the OMP_Alloc call
751 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
752 Value *Allocator, std::string Name = "");
753
754 /// Create a runtime call for kmpc_free
755 ///
756 /// \param Loc The insert and source location description.
757 /// \param Addr Address of memory space to be freed
758 /// \param Allocator Allocator information instruction
759 /// \param Name Name of call Instruction for OMP_Free
760 ///
761 /// \returns CallInst to the OMP_Free call
762 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
763 Value *Allocator, std::string Name = "");
764
765 /// Create a runtime call for kmpc_threadprivate_cached
766 ///
767 /// \param Loc The insert and source location description.
768 /// \param Pointer pointer to data to be cached
769 /// \param Size size of data to be cached
770 /// \param Name Name of call Instruction for callinst
771 ///
772 /// \returns CallInst to the thread private cache call.
773 CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
774 llvm::Value *Pointer,
775 llvm::ConstantInt *Size,
776 const llvm::Twine &Name = Twine(""));
777
778 /// Declarations for LLVM-IR types (simple, array, function and structure) are
779 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
780 /// we provide the declarations, the initializeTypes function will provide the
781 /// values.
782 ///
783 ///{
784#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
785#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
786 ArrayType *VarName##Ty = nullptr; \
787 PointerType *VarName##PtrTy = nullptr;
788#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
789 FunctionType *VarName = nullptr; \
790 PointerType *VarName##Ptr = nullptr;
791#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
792 StructType *VarName = nullptr; \
793 PointerType *VarName##Ptr = nullptr;
794#include "llvm/Frontend/OpenMP/OMPKinds.def"
795
796 ///}
797
798private:
799 /// Create all simple and struct types exposed by the runtime and remember
800 /// the llvm::PointerTypes of them for easy access later.
801 void initializeTypes(Module &M);
802
803 /// Common interface for generating entry calls for OMP Directives.
804 /// if the directive has a region/body, It will set the insertion
805 /// point to the body
806 ///
807 /// \param OMPD Directive to generate entry blocks for
808 /// \param EntryCall Call to the entry OMP Runtime Function
809 /// \param ExitBB block where the region ends.
810 /// \param Conditional indicate if the entry call result will be used
811 /// to evaluate a conditional of whether a thread will execute
812 /// body code or not.
813 ///
814 /// \return The insertion position in exit block
815 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
816 BasicBlock *ExitBB,
817 bool Conditional = false);
818
819 /// Common interface to finalize the region
820 ///
821 /// \param OMPD Directive to generate exiting code for
822 /// \param FinIP Insertion point for emitting Finalization code and exit call
823 /// \param ExitCall Call to the ending OMP Runtime Function
824 /// \param HasFinalize indicate if the directive will require finalization
825 /// and has a finalization callback in the stack that
826 /// should be called.
827 ///
828 /// \return The insertion position in exit block
829 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
830 InsertPointTy FinIP,
831 Instruction *ExitCall,
832 bool HasFinalize = true);
833
834 /// Common Interface to generate OMP inlined regions
835 ///
836 /// \param OMPD Directive to generate inlined region for
837 /// \param EntryCall Call to the entry OMP Runtime Function
838 /// \param ExitCall Call to the ending OMP Runtime Function
839 /// \param BodyGenCB Body code generation callback.
840 /// \param FiniCB Finalization Callback. Will be called when finalizing region
841 /// \param Conditional indicate if the entry call result will be used
842 /// to evaluate a conditional of whether a thread will execute
843 /// body code or not.
844 /// \param HasFinalize indicate if the directive will require finalization
845 /// and has a finalization callback in the stack that
846 /// should be called.
847 /// \param IsCancellable if HasFinalize is set to true, indicate if the
848 /// the directive should be cancellable.
849 /// \return The insertion point after the region
850
851 InsertPointTy
852 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
853 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
854 FinalizeCallbackTy FiniCB, bool Conditional = false,
855 bool HasFinalize = true, bool IsCancellable = false);
856
857 /// Get the platform-specific name separator.
858 /// \param Parts different parts of the final name that needs separation
859 /// \param FirstSeparator First separator used between the initial two
860 /// parts of the name.
861 /// \param Separator separator used between all of the rest consecutive
862 /// parts of the name
863 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
864 StringRef FirstSeparator,
865 StringRef Separator);
866
867 /// Gets (if variable with the given name already exist) or creates
868 /// internal global variable with the specified Name. The created variable has
869 /// linkage CommonLinkage by default and is initialized by null value.
870 /// \param Ty Type of the global variable. If it is exist already the type
871 /// must be the same.
872 /// \param Name Name of the variable.
873 Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
874 unsigned AddressSpace = 0);
875
876 /// Returns corresponding lock object for the specified critical region
877 /// name. If the lock object does not exist it is created, otherwise the
878 /// reference to the existing copy is returned.
879 /// \param CriticalName Name of the critical region.
880 ///
881 Value *getOMPCriticalRegionLock(StringRef CriticalName);
882
883 /// Create the control flow structure of a canonical OpenMP loop.
884 ///
885 /// The emitted loop will be disconnected, i.e. no edge to the loop's
886 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
887 /// IRBuilder location is not preserved.
888 ///
889 /// \param DL DebugLoc used for the instructions in the skeleton.
890 /// \param TripCount Value to be used for the trip count.
891 /// \param F Function in which to insert the BasicBlocks.
892 /// \param PreInsertBefore Where to insert BBs that execute before the body,
893 /// typically the body itself.
894 /// \param PostInsertBefore Where to insert BBs that execute after the body.
895 /// \param Name Base name used to derive BB
896 /// and instruction names.
897 ///
898 /// \returns The CanonicalLoopInfo that represents the emitted loop.
899 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
900 Function *F,
901 BasicBlock *PreInsertBefore,
902 BasicBlock *PostInsertBefore,
903 const Twine &Name = {});
904};
905
906/// Class to represented the control flow structure of an OpenMP canonical loop.
907///
908/// The control-flow structure is standardized for easy consumption by
909/// directives associated with loops. For instance, the worksharing-loop
910/// construct may change this control flow such that each loop iteration is
911/// executed on only one thread.
912///
913/// The control flow can be described as follows:
914///
915/// Preheader
916/// |
917/// /-> Header
918/// | |
919/// | Cond---\
920/// | | |
921/// | Body |
922/// | | | |
923/// | <...> |
924/// | | | |
925/// \--Latch |
926/// |
927/// Exit
928/// |
929/// After
930///
931/// Code in the header, condition block, latch and exit block must not have any
932/// side-effect. The body block is the single entry point into the loop body,
933/// which may contain arbitrary control flow as long as all control paths
934/// eventually branch to the latch block.
935///
936/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
937/// classes.
938class CanonicalLoopInfo {
939 friend class OpenMPIRBuilder;
940
941private:
942 /// Whether this object currently represents a loop.
943 bool IsValid = false;
944
945 BasicBlock *Preheader;
946 BasicBlock *Header;
947 BasicBlock *Cond;
948 BasicBlock *Body;
949 BasicBlock *Latch;
950 BasicBlock *Exit;
951 BasicBlock *After;
952
953 /// Add the control blocks of this loop to \p BBs.
954 ///
955 /// This does not include any block from the body, including the one returned
956 /// by getBody().
957 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
958
959public:
960 /// The preheader ensures that there is only a single edge entering the loop.
961 /// Code that must be execute before any loop iteration can be emitted here,
962 /// such as computing the loop trip count and begin lifetime markers. Code in
963 /// the preheader is not considered part of the canonical loop.
964 BasicBlock *getPreheader() const { return Preheader; }
965
966 /// The header is the entry for each iteration. In the canonical control flow,
967 /// it only contains the PHINode for the induction variable.
968 BasicBlock *getHeader() const { return Header; }
969
970 /// The condition block computes whether there is another loop iteration. If
971 /// yes, branches to the body; otherwise to the exit block.
972 BasicBlock *getCond() const { return Cond; }
973
974 /// The body block is the single entry for a loop iteration and not controlled
975 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
976 /// eventually branch to the \p Latch block.
977 BasicBlock *getBody() const { return Body; }
978
979 /// Reaching the latch indicates the end of the loop body code. In the
980 /// canonical control flow, it only contains the increment of the induction
981 /// variable.
982 BasicBlock *getLatch() const { return Latch; }
983
984 /// Reaching the exit indicates no more iterations are being executed.
985 BasicBlock *getExit() const { return Exit; }
986
987 /// The after block is intended for clean-up code such as lifetime end
988 /// markers. It is separate from the exit block to ensure, analogous to the
989 /// preheader, it having just a single entry edge and being free from PHI
990 /// nodes should there be multiple loop exits (such as from break
991 /// statements/cancellations).
992 BasicBlock *getAfter() const { return After; }
993
994 /// Returns the llvm::Value containing the number of loop iterations. It must
995 /// be valid in the preheader and always interpreted as an unsigned integer of
996 /// any bit-width.
997 Value *getTripCount() const {
998 Instruction *CmpI = &Cond->front();
999 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1000 return CmpI->getOperand(1);
1001 }
1002
1003 /// Returns the instruction representing the current logical induction
1004 /// variable. Always unsigned, always starting at 0 with an increment of one.
1005 Instruction *getIndVar() const {
1006 Instruction *IndVarPHI = &Header->front();
1007 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
1008 return IndVarPHI;
1009 }
1010
1011 /// Return the type of the induction variable (and the trip count).
1012 Type *getIndVarType() const { return getIndVar()->getType(); }
1013
1014 /// Return the insertion point for user code before the loop.
1015 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
1016 return {Preheader, std::prev(Preheader->end())};
1017 };
1018
1019 /// Return the insertion point for user code in the body.
1020 OpenMPIRBuilder::InsertPointTy getBodyIP() const {
1021 return {Body, Body->begin()};
1022 };
1023
1024 /// Return the insertion point for user code after the loop.
1025 OpenMPIRBuilder::InsertPointTy getAfterIP() const {
1026 return {After, After->begin()};
1027 };
1028
1029 Function *getFunction() const { return Header->getParent(); }
1030
1031 /// Consistency self-check.
1032 void assertOK() const;
1033};
1034
1035} // end namespace llvm
1036
1037#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
1038