1//===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the interface to be used by Clang during the codegen of a
10// target region.
11//
12//===----------------------------------------------------------------------===//
13
14#include "OpenMP/OMPT/Interface.h"
15#include "OpenMP/OMPT/Callback.h"
16#include "PluginManager.h"
17#include "private.h"
18
19#include "Shared/EnvironmentVar.h"
20#include "Shared/Profile.h"
21
22#include "Utils/ExponentialBackoff.h"
23
24#include <cassert>
25#include <cstdint>
26#include <cstdio>
27#include <cstdlib>
28
29#ifdef OMPT_SUPPORT
30using namespace llvm::omp::target::ompt;
31#endif
32
33////////////////////////////////////////////////////////////////////////////////
34/// adds requires flags
35EXTERN void __tgt_register_requires(int64_t Flags) {
36 PM->addRequirements(Flags);
37}
38
39////////////////////////////////////////////////////////////////////////////////
40/// adds a target shared library to the target execution image
41EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
42 if (PM->delayRegisterLib(Desc))
43 return;
44
45 PM->registerLib(Desc);
46}
47
48////////////////////////////////////////////////////////////////////////////////
49/// Initialize all available devices without registering any image
50EXTERN void __tgt_init_all_rtls() { PM->initAllPlugins(); }
51
52////////////////////////////////////////////////////////////////////////////////
53/// unloads a target shared library
54EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
55 PM->unregisterLib(Desc);
56}
57
58template <typename TargetAsyncInfoTy>
59static inline void
60targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
61 void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
62 map_var_info_t *ArgNames, void **ArgMappers,
63 TargetDataFuncPtrTy TargetDataFunction, const char *RegionTypeMsg,
64 const char *RegionName) {
65 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
66 "TargetAsyncInfoTy must be convertible to AsyncInfoTy.");
67
68 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy",
69 "NumArgs=" + std::to_string(ArgNum), Loc);
70
71 DP("Entering data %s region for device %" PRId64 " with %d mappings\n",
72 RegionName, DeviceId, ArgNum);
73
74 if (checkDeviceAndCtors(DeviceID&: DeviceId, Loc)) {
75 DP("Not offloading to device %" PRId64 "\n", DeviceId);
76 return;
77 }
78
79 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
80 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
81 RegionType: RegionTypeMsg);
82#ifdef OMPTARGET_DEBUG
83 for (int I = 0; I < ArgNum; ++I) {
84 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
85 ", Type=0x%" PRIx64 ", Name=%s\n",
86 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
87 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
88 }
89#endif
90
91 auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId);
92 if (!DeviceOrErr)
93 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
94
95 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
96 AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
97
98 /// RAII to establish tool anchors before and after data begin / end / update
99 OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin ||
100 TargetDataFunction == targetDataEnd ||
101 TargetDataFunction == targetDataUpdate) &&
102 "Encountered unexpected TargetDataFunction during "
103 "execution of targetData");
104 auto CallbackFunctions =
105 (TargetDataFunction == targetDataBegin)
106 ? RegionInterface.getCallbacks<ompt_target_enter_data>()
107 : (TargetDataFunction == targetDataEnd)
108 ? RegionInterface.getCallbacks<ompt_target_exit_data>()
109 : RegionInterface.getCallbacks<ompt_target_update>();
110 InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId,
111 OMPT_GET_RETURN_ADDRESS);)
112
113 int Rc = OFFLOAD_SUCCESS;
114 Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
115 ArgTypes, ArgNames, ArgMappers, AsyncInfo,
116 false /*FromMapper=*/);
117
118 if (Rc == OFFLOAD_SUCCESS)
119 Rc = AsyncInfo.synchronize();
120
121 handleTargetOutcome(Success: Rc == OFFLOAD_SUCCESS, Loc);
122}
123
124/// creates host-to-target data mapping, stores it in the
125/// libomptarget.so internal structure (an entry in a stack of data maps)
126/// and passes the data to the device.
127EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
128 int32_t ArgNum, void **ArgsBase,
129 void **Args, int64_t *ArgSizes,
130 int64_t *ArgTypes,
131 map_var_info_t *ArgNames,
132 void **ArgMappers) {
133 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
134 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
135 ArgTypes, ArgNames, ArgMappers, TargetDataFunction: targetDataBegin,
136 RegionTypeMsg: "Entering OpenMP data region with being_mapper",
137 RegionName: "begin");
138}
139
140EXTERN void __tgt_target_data_begin_nowait_mapper(
141 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
142 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
143 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
144 void *NoAliasDepList) {
145 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
146 targetData<TaskAsyncInfoWrapperTy>(
147 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
148 ArgMappers, TargetDataFunction: targetDataBegin,
149 RegionTypeMsg: "Entering OpenMP data region with being_nowait_mapper", RegionName: "begin");
150}
151
152/// passes data from the target, releases target memory and destroys
153/// the host-target mapping (top entry from the stack of data maps)
154/// created by the last __tgt_target_data_begin.
155EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
156 int32_t ArgNum, void **ArgsBase,
157 void **Args, int64_t *ArgSizes,
158 int64_t *ArgTypes,
159 map_var_info_t *ArgNames,
160 void **ArgMappers) {
161 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
162 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
163 ArgTypes, ArgNames, ArgMappers, TargetDataFunction: targetDataEnd,
164 RegionTypeMsg: "Exiting OpenMP data region with end_mapper", RegionName: "end");
165}
166
167EXTERN void __tgt_target_data_end_nowait_mapper(
168 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
169 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
170 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
171 void *NoAliasDepList) {
172 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
173 targetData<TaskAsyncInfoWrapperTy>(
174 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
175 ArgMappers, TargetDataFunction: targetDataEnd,
176 RegionTypeMsg: "Exiting OpenMP data region with end_nowait_mapper", RegionName: "end");
177}
178
179EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
180 int32_t ArgNum, void **ArgsBase,
181 void **Args, int64_t *ArgSizes,
182 int64_t *ArgTypes,
183 map_var_info_t *ArgNames,
184 void **ArgMappers) {
185 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
186 targetData<AsyncInfoTy>(
187 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
188 ArgMappers, TargetDataFunction: targetDataUpdate,
189 RegionTypeMsg: "Updating data within the OpenMP data region with update_mapper",
190 RegionName: "update");
191}
192
193EXTERN void __tgt_target_data_update_nowait_mapper(
194 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
195 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
196 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
197 void *NoAliasDepList) {
198 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
199 targetData<TaskAsyncInfoWrapperTy>(
200 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
201 ArgMappers, TargetDataFunction: targetDataUpdate,
202 RegionTypeMsg: "Updating data within the OpenMP data region with update_nowait_mapper",
203 RegionName: "update");
204}
205
206static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs,
207 KernelArgsTy &LocalKernelArgs,
208 int32_t NumTeams, int32_t ThreadLimit) {
209 if (KernelArgs->Version > 2)
210 DP("Unexpected ABI version: %u\n", KernelArgs->Version);
211
212 if (KernelArgs->Version == 1) {
213 LocalKernelArgs.Version = 2;
214 LocalKernelArgs.NumArgs = KernelArgs->NumArgs;
215 LocalKernelArgs.ArgBasePtrs = KernelArgs->ArgBasePtrs;
216 LocalKernelArgs.ArgPtrs = KernelArgs->ArgPtrs;
217 LocalKernelArgs.ArgSizes = KernelArgs->ArgSizes;
218 LocalKernelArgs.ArgTypes = KernelArgs->ArgTypes;
219 LocalKernelArgs.ArgNames = KernelArgs->ArgNames;
220 LocalKernelArgs.ArgMappers = KernelArgs->ArgMappers;
221 LocalKernelArgs.Tripcount = KernelArgs->Tripcount;
222 LocalKernelArgs.Flags = KernelArgs->Flags;
223 LocalKernelArgs.DynCGroupMem = 0;
224 LocalKernelArgs.NumTeams[0] = NumTeams;
225 LocalKernelArgs.NumTeams[1] = 0;
226 LocalKernelArgs.NumTeams[2] = 0;
227 LocalKernelArgs.ThreadLimit[0] = ThreadLimit;
228 LocalKernelArgs.ThreadLimit[1] = 0;
229 LocalKernelArgs.ThreadLimit[2] = 0;
230 return &LocalKernelArgs;
231 }
232
233 return KernelArgs;
234}
235
236template <typename TargetAsyncInfoTy>
237static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
238 int32_t ThreadLimit, void *HostPtr,
239 KernelArgsTy *KernelArgs) {
240 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
241 "Target AsyncInfoTy must be convertible to AsyncInfoTy.");
242 DP("Entering target region for device %" PRId64 " with entry point " DPxMOD
243 "\n",
244 DeviceId, DPxPTR(HostPtr));
245
246 if (checkDeviceAndCtors(DeviceID&: DeviceId, Loc)) {
247 DP("Not offloading to device %" PRId64 "\n", DeviceId);
248 return OMP_TGT_FAIL;
249 }
250
251 bool IsTeams = NumTeams != -1;
252 if (!IsTeams)
253 KernelArgs->NumTeams[0] = NumTeams = 1;
254
255 // Auto-upgrade kernel args version 1 to 2.
256 KernelArgsTy LocalKernelArgs;
257 KernelArgs =
258 upgradeKernelArgs(KernelArgs, LocalKernelArgs, NumTeams, ThreadLimit);
259
260 assert(KernelArgs->NumTeams[0] == static_cast<uint32_t>(NumTeams) &&
261 !KernelArgs->NumTeams[1] && !KernelArgs->NumTeams[2] &&
262 "OpenMP interface should not use multiple dimensions");
263 assert(KernelArgs->ThreadLimit[0] == static_cast<uint32_t>(ThreadLimit) &&
264 !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] &&
265 "OpenMP interface should not use multiple dimensions");
266 TIMESCOPE_WITH_DETAILS_AND_IDENT(
267 "Runtime: target exe",
268 "NumTeams=" + std::to_string(NumTeams) +
269 ";NumArgs=" + std::to_string(KernelArgs->NumArgs),
270 Loc);
271
272 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
273 printKernelArguments(Loc, DeviceId, ArgNum: KernelArgs->NumArgs,
274 ArgSizes: KernelArgs->ArgSizes, ArgTypes: KernelArgs->ArgTypes,
275 ArgNames: KernelArgs->ArgNames, RegionType: "Entering OpenMP kernel");
276#ifdef OMPTARGET_DEBUG
277 for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
278 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
279 ", Type=0x%" PRIx64 ", Name=%s\n",
280 I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]),
281 KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I],
282 (KernelArgs->ArgNames)
283 ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str()
284 : "unknown");
285 }
286#endif
287
288 auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId);
289 if (!DeviceOrErr)
290 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
291
292 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
293 AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
294 /// RAII to establish tool anchors before and after target region
295 OMPT_IF_BUILT(InterfaceRAII TargetRAII(
296 RegionInterface.getCallbacks<ompt_target>(), DeviceId,
297 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
298
299 int Rc = OFFLOAD_SUCCESS;
300 Rc = target(Loc, Device&: *DeviceOrErr, HostPtr, KernelArgs&: *KernelArgs, AsyncInfo);
301 { // required to show syncronization
302 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: syncronize", "", Loc);
303 if (Rc == OFFLOAD_SUCCESS)
304 Rc = AsyncInfo.synchronize();
305
306 handleTargetOutcome(Success: Rc == OFFLOAD_SUCCESS, Loc);
307 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
308 }
309 return OMP_TGT_SUCCESS;
310}
311
312/// Implements a kernel entry that executes the target region on the specified
313/// device.
314///
315/// \param Loc Source location associated with this target region.
316/// \param DeviceId The device to execute this region, -1 indicated the default.
317/// \param NumTeams Number of teams to launch the region with, -1 indicates a
318/// non-teams region and 0 indicates it was unspecified.
319/// \param ThreadLimit Limit to the number of threads to use in the kernel
320/// launch, 0 indicates it was unspecified.
321/// \param HostPtr The pointer to the host function registered with the kernel.
322/// \param Args All arguments to this kernel launch (see struct definition).
323EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
324 int32_t ThreadLimit, void *HostPtr,
325 KernelArgsTy *KernelArgs) {
326 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
327 if (KernelArgs->Flags.NoWait)
328 return targetKernel<TaskAsyncInfoWrapperTy>(
329 Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs);
330 return targetKernel<AsyncInfoTy>(Loc, DeviceId, NumTeams, ThreadLimit,
331 HostPtr, KernelArgs);
332}
333
334/// Activates the record replay mechanism.
335/// \param DeviceId The device identifier to execute the target region.
336/// \param MemorySize The number of bytes to be (pre-)allocated
337/// by the bump allocator
338/// /param IsRecord Activates the record replay mechanism in
339/// 'record' mode or 'replay' mode.
340/// /param SaveOutput Store the device memory after kernel
341/// execution on persistent storage
342EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
343 void *VAddr, bool IsRecord,
344 bool SaveOutput,
345 uint64_t &ReqPtrArgOffset) {
346 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
347 auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId);
348 if (!DeviceOrErr)
349 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
350
351 [[maybe_unused]] int Rc = target_activate_rr(
352 Device&: *DeviceOrErr, MemorySize, ReqAddr: VAddr, isRecord: IsRecord, SaveOutput, ReqPtrArgOffset);
353 assert(Rc == OFFLOAD_SUCCESS &&
354 "__tgt_activate_record_replay unexpected failure!");
355 return OMP_TGT_SUCCESS;
356}
357
358/// Implements a target kernel entry that replays a pre-recorded kernel.
359/// \param Loc Source location associated with this target region (unused).
360/// \param DeviceId The device identifier to execute the target region.
361/// \param HostPtr A pointer to an address that uniquely identifies the kernel.
362/// \param DeviceMemory A pointer to an array storing device memory data to move
363/// prior to kernel execution.
364/// \param DeviceMemorySize The size of the above device memory data in bytes.
365/// \param TgtArgs An array of pointers of the pre-recorded target kernel
366/// arguments.
367/// \param TgtOffsets An array of pointers of the pre-recorded target kernel
368/// argument offsets.
369/// \param NumArgs The number of kernel arguments.
370/// \param NumTeams Number of teams to launch the target region with.
371/// \param ThreadLimit Limit to the number of threads to use in kernel
372/// execution.
373/// \param LoopTripCount The pre-recorded value of the loop tripcount, if any.
374/// \return OMP_TGT_SUCCESS on success, OMP_TGT_FAIL on failure.
375EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
376 void *HostPtr, void *DeviceMemory,
377 int64_t DeviceMemorySize, void **TgtArgs,
378 ptrdiff_t *TgtOffsets, int32_t NumArgs,
379 int32_t NumTeams, int32_t ThreadLimit,
380 uint64_t LoopTripCount) {
381
382 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
383 if (checkDeviceAndCtors(DeviceID&: DeviceId, Loc)) {
384 DP("Not offloading to device %" PRId64 "\n", DeviceId);
385 return OMP_TGT_FAIL;
386 }
387 auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId);
388 if (!DeviceOrErr)
389 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
390
391 /// RAII to establish tool anchors before and after target region
392 OMPT_IF_BUILT(InterfaceRAII TargetRAII(
393 RegionInterface.getCallbacks<ompt_target>(), DeviceId,
394 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
395
396 AsyncInfoTy AsyncInfo(*DeviceOrErr);
397 int Rc = target_replay(Loc, Device&: *DeviceOrErr, HostPtr, DeviceMemory,
398 DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs,
399 NumTeams, ThreadLimit, LoopTripCount, AsyncInfo);
400 if (Rc == OFFLOAD_SUCCESS)
401 Rc = AsyncInfo.synchronize();
402 handleTargetOutcome(Success: Rc == OFFLOAD_SUCCESS, Loc);
403 assert(Rc == OFFLOAD_SUCCESS &&
404 "__tgt_target_kernel_replay unexpected failure!");
405 return OMP_TGT_SUCCESS;
406}
407
408// Get the current number of components for a user-defined mapper.
409EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
410 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
411 int64_t Size = MapperComponentsPtr->Components.size();
412 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
413 DPxPTR(RtMapperHandle), Size);
414 return Size;
415}
416
417// Push back one component for a user-defined mapper.
418EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
419 void *Begin, int64_t Size, int64_t Type,
420 void *Name) {
421 DP("__tgt_push_mapper_component(Handle=" DPxMOD
422 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
423 ", Type=0x%" PRIx64 ", Name=%s).\n",
424 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
425 (Name) ? getNameFromMapping(Name).c_str() : "unknown");
426 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
427 MapperComponentsPtr->Components.push_back(
428 Elt: MapComponentInfoTy(Base, Begin, Size, Type, Name));
429}
430
431EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
432 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
433 InfoLevel.store(i: NewInfoLevel);
434 for (auto &R : PM->pluginAdaptors()) {
435 if (R.set_info_flag)
436 R.set_info_flag(NewInfoLevel);
437 }
438}
439
440EXTERN int __tgt_print_device_info(int64_t DeviceId) {
441 auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId);
442 if (!DeviceOrErr)
443 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
444
445 return DeviceOrErr->printDeviceInfo();
446}
447
448EXTERN void __tgt_target_nowait_query(void **AsyncHandle) {
449 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
450 if (!AsyncHandle || !*AsyncHandle) {
451 FATAL_MESSAGE0(
452 1, "Receive an invalid async handle from the current OpenMP task. Is "
453 "this a target nowait region?\n");
454 }
455
456 // Exponential backoff tries to optimally decide if a thread should just query
457 // for the device operations (work/spin wait on them) or block until they are
458 // completed (use device side blocking mechanism). This allows the runtime to
459 // adapt itself when there are a lot of long-running target regions in-flight.
460 static thread_local utils::ExponentialBackoff QueryCounter(
461 Int64Envar("OMPTARGET_QUERY_COUNT_MAX", 10),
462 Int64Envar("OMPTARGET_QUERY_COUNT_THRESHOLD", 5),
463 Envar<float>("OMPTARGET_QUERY_COUNT_BACKOFF_FACTOR", 0.5f));
464
465 auto *AsyncInfo = (AsyncInfoTy *)*AsyncHandle;
466
467 // If the thread is actively waiting on too many target nowait regions, we
468 // should use the blocking sync type.
469 if (QueryCounter.isAboveThreshold())
470 AsyncInfo->SyncType = AsyncInfoTy::SyncTy::BLOCKING;
471
472 if (const int Rc = AsyncInfo->synchronize())
473 FATAL_MESSAGE0(1, "Error while querying the async queue for completion.\n");
474 // If there are device operations still pending, return immediately without
475 // deallocating the handle and increase the current thread query count.
476 if (!AsyncInfo->isDone()) {
477 QueryCounter.increment();
478 return;
479 }
480
481 // When a thread successfully completes a target nowait region, we
482 // exponentially backoff its query counter by the query factor.
483 QueryCounter.decrement();
484
485 // Delete the handle and unset it from the OpenMP task data.
486 delete AsyncInfo;
487 *AsyncHandle = nullptr;
488}
489

source code of openmp/libomptarget/src/interface.cpp