1 | //===-------- interface.cpp - Target independent OpenMP target RTL --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation of the interface to be used by Clang during the codegen of a |
10 | // target region. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "OpenMP/OMPT/Interface.h" |
15 | #include "OpenMP/OMPT/Callback.h" |
16 | #include "PluginManager.h" |
17 | #include "private.h" |
18 | |
19 | #include "Shared/EnvironmentVar.h" |
20 | #include "Shared/Profile.h" |
21 | |
22 | #include "Utils/ExponentialBackoff.h" |
23 | |
24 | #include <cassert> |
25 | #include <cstdint> |
26 | #include <cstdio> |
27 | #include <cstdlib> |
28 | |
29 | #ifdef OMPT_SUPPORT |
30 | using namespace llvm::omp::target::ompt; |
31 | #endif |
32 | |
33 | //////////////////////////////////////////////////////////////////////////////// |
34 | /// adds requires flags |
35 | EXTERN void __tgt_register_requires(int64_t Flags) { |
36 | PM->addRequirements(Flags); |
37 | } |
38 | |
39 | //////////////////////////////////////////////////////////////////////////////// |
40 | /// adds a target shared library to the target execution image |
41 | EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { |
42 | if (PM->delayRegisterLib(Desc)) |
43 | return; |
44 | |
45 | PM->registerLib(Desc); |
46 | } |
47 | |
48 | //////////////////////////////////////////////////////////////////////////////// |
49 | /// Initialize all available devices without registering any image |
50 | EXTERN void __tgt_init_all_rtls() { PM->initAllPlugins(); } |
51 | |
52 | //////////////////////////////////////////////////////////////////////////////// |
53 | /// unloads a target shared library |
54 | EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { |
55 | PM->unregisterLib(Desc); |
56 | } |
57 | |
58 | template <typename TargetAsyncInfoTy> |
59 | static inline void |
60 | targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, |
61 | void **Args, int64_t *ArgSizes, int64_t *ArgTypes, |
62 | map_var_info_t *ArgNames, void **ArgMappers, |
63 | TargetDataFuncPtrTy TargetDataFunction, const char *RegionTypeMsg, |
64 | const char *RegionName) { |
65 | static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>, |
66 | "TargetAsyncInfoTy must be convertible to AsyncInfoTy." ); |
67 | |
68 | TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy" , |
69 | "NumArgs=" + std::to_string(ArgNum), Loc); |
70 | |
71 | DP("Entering data %s region for device %" PRId64 " with %d mappings\n" , |
72 | RegionName, DeviceId, ArgNum); |
73 | |
74 | if (checkDeviceAndCtors(DeviceID&: DeviceId, Loc)) { |
75 | DP("Not offloading to device %" PRId64 "\n" , DeviceId); |
76 | return; |
77 | } |
78 | |
79 | if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) |
80 | printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, |
81 | RegionType: RegionTypeMsg); |
82 | #ifdef OMPTARGET_DEBUG |
83 | for (int I = 0; I < ArgNum; ++I) { |
84 | DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 |
85 | ", Type=0x%" PRIx64 ", Name=%s\n" , |
86 | I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], |
87 | (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown" ); |
88 | } |
89 | #endif |
90 | |
91 | auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId); |
92 | if (!DeviceOrErr) |
93 | FATAL_MESSAGE(DeviceId, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
94 | |
95 | TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr); |
96 | AsyncInfoTy &AsyncInfo = TargetAsyncInfo; |
97 | |
98 | /// RAII to establish tool anchors before and after data begin / end / update |
99 | OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin || |
100 | TargetDataFunction == targetDataEnd || |
101 | TargetDataFunction == targetDataUpdate) && |
102 | "Encountered unexpected TargetDataFunction during " |
103 | "execution of targetData" ); |
104 | auto CallbackFunctions = |
105 | (TargetDataFunction == targetDataBegin) |
106 | ? RegionInterface.getCallbacks<ompt_target_enter_data>() |
107 | : (TargetDataFunction == targetDataEnd) |
108 | ? RegionInterface.getCallbacks<ompt_target_exit_data>() |
109 | : RegionInterface.getCallbacks<ompt_target_update>(); |
110 | InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId, |
111 | OMPT_GET_RETURN_ADDRESS);) |
112 | |
113 | int Rc = OFFLOAD_SUCCESS; |
114 | Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes, |
115 | ArgTypes, ArgNames, ArgMappers, AsyncInfo, |
116 | false /*FromMapper=*/); |
117 | |
118 | if (Rc == OFFLOAD_SUCCESS) |
119 | Rc = AsyncInfo.synchronize(); |
120 | |
121 | handleTargetOutcome(Success: Rc == OFFLOAD_SUCCESS, Loc); |
122 | } |
123 | |
124 | /// creates host-to-target data mapping, stores it in the |
125 | /// libomptarget.so internal structure (an entry in a stack of data maps) |
126 | /// and passes the data to the device. |
127 | EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, |
128 | int32_t ArgNum, void **ArgsBase, |
129 | void **Args, int64_t *ArgSizes, |
130 | int64_t *ArgTypes, |
131 | map_var_info_t *ArgNames, |
132 | void **ArgMappers) { |
133 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
134 | targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, |
135 | ArgTypes, ArgNames, ArgMappers, TargetDataFunction: targetDataBegin, |
136 | RegionTypeMsg: "Entering OpenMP data region with being_mapper" , |
137 | RegionName: "begin" ); |
138 | } |
139 | |
140 | EXTERN void __tgt_target_data_begin_nowait_mapper( |
141 | ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, |
142 | void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, |
143 | void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, |
144 | void *NoAliasDepList) { |
145 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
146 | targetData<TaskAsyncInfoWrapperTy>( |
147 | Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, |
148 | ArgMappers, TargetDataFunction: targetDataBegin, |
149 | RegionTypeMsg: "Entering OpenMP data region with being_nowait_mapper" , RegionName: "begin" ); |
150 | } |
151 | |
152 | /// passes data from the target, releases target memory and destroys |
153 | /// the host-target mapping (top entry from the stack of data maps) |
154 | /// created by the last __tgt_target_data_begin. |
155 | EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, |
156 | int32_t ArgNum, void **ArgsBase, |
157 | void **Args, int64_t *ArgSizes, |
158 | int64_t *ArgTypes, |
159 | map_var_info_t *ArgNames, |
160 | void **ArgMappers) { |
161 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
162 | targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, |
163 | ArgTypes, ArgNames, ArgMappers, TargetDataFunction: targetDataEnd, |
164 | RegionTypeMsg: "Exiting OpenMP data region with end_mapper" , RegionName: "end" ); |
165 | } |
166 | |
167 | EXTERN void __tgt_target_data_end_nowait_mapper( |
168 | ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, |
169 | void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, |
170 | void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, |
171 | void *NoAliasDepList) { |
172 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
173 | targetData<TaskAsyncInfoWrapperTy>( |
174 | Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, |
175 | ArgMappers, TargetDataFunction: targetDataEnd, |
176 | RegionTypeMsg: "Exiting OpenMP data region with end_nowait_mapper" , RegionName: "end" ); |
177 | } |
178 | |
179 | EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, |
180 | int32_t ArgNum, void **ArgsBase, |
181 | void **Args, int64_t *ArgSizes, |
182 | int64_t *ArgTypes, |
183 | map_var_info_t *ArgNames, |
184 | void **ArgMappers) { |
185 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
186 | targetData<AsyncInfoTy>( |
187 | Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, |
188 | ArgMappers, TargetDataFunction: targetDataUpdate, |
189 | RegionTypeMsg: "Updating data within the OpenMP data region with update_mapper" , |
190 | RegionName: "update" ); |
191 | } |
192 | |
193 | EXTERN void __tgt_target_data_update_nowait_mapper( |
194 | ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, |
195 | void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, |
196 | void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, |
197 | void *NoAliasDepList) { |
198 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
199 | targetData<TaskAsyncInfoWrapperTy>( |
200 | Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, |
201 | ArgMappers, TargetDataFunction: targetDataUpdate, |
202 | RegionTypeMsg: "Updating data within the OpenMP data region with update_nowait_mapper" , |
203 | RegionName: "update" ); |
204 | } |
205 | |
206 | static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs, |
207 | KernelArgsTy &LocalKernelArgs, |
208 | int32_t NumTeams, int32_t ThreadLimit) { |
209 | if (KernelArgs->Version > 2) |
210 | DP("Unexpected ABI version: %u\n" , KernelArgs->Version); |
211 | |
212 | if (KernelArgs->Version == 1) { |
213 | LocalKernelArgs.Version = 2; |
214 | LocalKernelArgs.NumArgs = KernelArgs->NumArgs; |
215 | LocalKernelArgs.ArgBasePtrs = KernelArgs->ArgBasePtrs; |
216 | LocalKernelArgs.ArgPtrs = KernelArgs->ArgPtrs; |
217 | LocalKernelArgs.ArgSizes = KernelArgs->ArgSizes; |
218 | LocalKernelArgs.ArgTypes = KernelArgs->ArgTypes; |
219 | LocalKernelArgs.ArgNames = KernelArgs->ArgNames; |
220 | LocalKernelArgs.ArgMappers = KernelArgs->ArgMappers; |
221 | LocalKernelArgs.Tripcount = KernelArgs->Tripcount; |
222 | LocalKernelArgs.Flags = KernelArgs->Flags; |
223 | LocalKernelArgs.DynCGroupMem = 0; |
224 | LocalKernelArgs.NumTeams[0] = NumTeams; |
225 | LocalKernelArgs.NumTeams[1] = 0; |
226 | LocalKernelArgs.NumTeams[2] = 0; |
227 | LocalKernelArgs.ThreadLimit[0] = ThreadLimit; |
228 | LocalKernelArgs.ThreadLimit[1] = 0; |
229 | LocalKernelArgs.ThreadLimit[2] = 0; |
230 | return &LocalKernelArgs; |
231 | } |
232 | |
233 | return KernelArgs; |
234 | } |
235 | |
236 | template <typename TargetAsyncInfoTy> |
237 | static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, |
238 | int32_t ThreadLimit, void *HostPtr, |
239 | KernelArgsTy *KernelArgs) { |
240 | static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>, |
241 | "Target AsyncInfoTy must be convertible to AsyncInfoTy." ); |
242 | DP("Entering target region for device %" PRId64 " with entry point " DPxMOD |
243 | "\n" , |
244 | DeviceId, DPxPTR(HostPtr)); |
245 | |
246 | if (checkDeviceAndCtors(DeviceID&: DeviceId, Loc)) { |
247 | DP("Not offloading to device %" PRId64 "\n" , DeviceId); |
248 | return OMP_TGT_FAIL; |
249 | } |
250 | |
251 | bool IsTeams = NumTeams != -1; |
252 | if (!IsTeams) |
253 | KernelArgs->NumTeams[0] = NumTeams = 1; |
254 | |
255 | // Auto-upgrade kernel args version 1 to 2. |
256 | KernelArgsTy LocalKernelArgs; |
257 | KernelArgs = |
258 | upgradeKernelArgs(KernelArgs, LocalKernelArgs, NumTeams, ThreadLimit); |
259 | |
260 | assert(KernelArgs->NumTeams[0] == static_cast<uint32_t>(NumTeams) && |
261 | !KernelArgs->NumTeams[1] && !KernelArgs->NumTeams[2] && |
262 | "OpenMP interface should not use multiple dimensions" ); |
263 | assert(KernelArgs->ThreadLimit[0] == static_cast<uint32_t>(ThreadLimit) && |
264 | !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] && |
265 | "OpenMP interface should not use multiple dimensions" ); |
266 | TIMESCOPE_WITH_DETAILS_AND_IDENT( |
267 | "Runtime: target exe" , |
268 | "NumTeams=" + std::to_string(NumTeams) + |
269 | ";NumArgs=" + std::to_string(KernelArgs->NumArgs), |
270 | Loc); |
271 | |
272 | if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) |
273 | printKernelArguments(Loc, DeviceId, ArgNum: KernelArgs->NumArgs, |
274 | ArgSizes: KernelArgs->ArgSizes, ArgTypes: KernelArgs->ArgTypes, |
275 | ArgNames: KernelArgs->ArgNames, RegionType: "Entering OpenMP kernel" ); |
276 | #ifdef OMPTARGET_DEBUG |
277 | for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) { |
278 | DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 |
279 | ", Type=0x%" PRIx64 ", Name=%s\n" , |
280 | I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]), |
281 | KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I], |
282 | (KernelArgs->ArgNames) |
283 | ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str() |
284 | : "unknown" ); |
285 | } |
286 | #endif |
287 | |
288 | auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId); |
289 | if (!DeviceOrErr) |
290 | FATAL_MESSAGE(DeviceId, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
291 | |
292 | TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr); |
293 | AsyncInfoTy &AsyncInfo = TargetAsyncInfo; |
294 | /// RAII to establish tool anchors before and after target region |
295 | OMPT_IF_BUILT(InterfaceRAII TargetRAII( |
296 | RegionInterface.getCallbacks<ompt_target>(), DeviceId, |
297 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
298 | |
299 | int Rc = OFFLOAD_SUCCESS; |
300 | Rc = target(Loc, Device&: *DeviceOrErr, HostPtr, KernelArgs&: *KernelArgs, AsyncInfo); |
301 | { // required to show syncronization |
302 | TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: syncronize" , "" , Loc); |
303 | if (Rc == OFFLOAD_SUCCESS) |
304 | Rc = AsyncInfo.synchronize(); |
305 | |
306 | handleTargetOutcome(Success: Rc == OFFLOAD_SUCCESS, Loc); |
307 | assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!" ); |
308 | } |
309 | return OMP_TGT_SUCCESS; |
310 | } |
311 | |
312 | /// Implements a kernel entry that executes the target region on the specified |
313 | /// device. |
314 | /// |
315 | /// \param Loc Source location associated with this target region. |
316 | /// \param DeviceId The device to execute this region, -1 indicated the default. |
317 | /// \param NumTeams Number of teams to launch the region with, -1 indicates a |
318 | /// non-teams region and 0 indicates it was unspecified. |
319 | /// \param ThreadLimit Limit to the number of threads to use in the kernel |
320 | /// launch, 0 indicates it was unspecified. |
321 | /// \param HostPtr The pointer to the host function registered with the kernel. |
322 | /// \param Args All arguments to this kernel launch (see struct definition). |
323 | EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, |
324 | int32_t ThreadLimit, void *HostPtr, |
325 | KernelArgsTy *KernelArgs) { |
326 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
327 | if (KernelArgs->Flags.NoWait) |
328 | return targetKernel<TaskAsyncInfoWrapperTy>( |
329 | Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs); |
330 | return targetKernel<AsyncInfoTy>(Loc, DeviceId, NumTeams, ThreadLimit, |
331 | HostPtr, KernelArgs); |
332 | } |
333 | |
334 | /// Activates the record replay mechanism. |
335 | /// \param DeviceId The device identifier to execute the target region. |
336 | /// \param MemorySize The number of bytes to be (pre-)allocated |
337 | /// by the bump allocator |
338 | /// /param IsRecord Activates the record replay mechanism in |
339 | /// 'record' mode or 'replay' mode. |
340 | /// /param SaveOutput Store the device memory after kernel |
341 | /// execution on persistent storage |
342 | EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize, |
343 | void *VAddr, bool IsRecord, |
344 | bool SaveOutput, |
345 | uint64_t &ReqPtrArgOffset) { |
346 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
347 | auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId); |
348 | if (!DeviceOrErr) |
349 | FATAL_MESSAGE(DeviceId, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
350 | |
351 | [[maybe_unused]] int Rc = target_activate_rr( |
352 | Device&: *DeviceOrErr, MemorySize, ReqAddr: VAddr, isRecord: IsRecord, SaveOutput, ReqPtrArgOffset); |
353 | assert(Rc == OFFLOAD_SUCCESS && |
354 | "__tgt_activate_record_replay unexpected failure!" ); |
355 | return OMP_TGT_SUCCESS; |
356 | } |
357 | |
358 | /// Implements a target kernel entry that replays a pre-recorded kernel. |
359 | /// \param Loc Source location associated with this target region (unused). |
360 | /// \param DeviceId The device identifier to execute the target region. |
361 | /// \param HostPtr A pointer to an address that uniquely identifies the kernel. |
362 | /// \param DeviceMemory A pointer to an array storing device memory data to move |
363 | /// prior to kernel execution. |
364 | /// \param DeviceMemorySize The size of the above device memory data in bytes. |
365 | /// \param TgtArgs An array of pointers of the pre-recorded target kernel |
366 | /// arguments. |
367 | /// \param TgtOffsets An array of pointers of the pre-recorded target kernel |
368 | /// argument offsets. |
369 | /// \param NumArgs The number of kernel arguments. |
370 | /// \param NumTeams Number of teams to launch the target region with. |
371 | /// \param ThreadLimit Limit to the number of threads to use in kernel |
372 | /// execution. |
373 | /// \param LoopTripCount The pre-recorded value of the loop tripcount, if any. |
374 | /// \return OMP_TGT_SUCCESS on success, OMP_TGT_FAIL on failure. |
375 | EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, |
376 | void *HostPtr, void *DeviceMemory, |
377 | int64_t DeviceMemorySize, void **TgtArgs, |
378 | ptrdiff_t *TgtOffsets, int32_t NumArgs, |
379 | int32_t NumTeams, int32_t ThreadLimit, |
380 | uint64_t LoopTripCount) { |
381 | |
382 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
383 | if (checkDeviceAndCtors(DeviceID&: DeviceId, Loc)) { |
384 | DP("Not offloading to device %" PRId64 "\n" , DeviceId); |
385 | return OMP_TGT_FAIL; |
386 | } |
387 | auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId); |
388 | if (!DeviceOrErr) |
389 | FATAL_MESSAGE(DeviceId, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
390 | |
391 | /// RAII to establish tool anchors before and after target region |
392 | OMPT_IF_BUILT(InterfaceRAII TargetRAII( |
393 | RegionInterface.getCallbacks<ompt_target>(), DeviceId, |
394 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
395 | |
396 | AsyncInfoTy AsyncInfo(*DeviceOrErr); |
397 | int Rc = target_replay(Loc, Device&: *DeviceOrErr, HostPtr, DeviceMemory, |
398 | DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs, |
399 | NumTeams, ThreadLimit, LoopTripCount, AsyncInfo); |
400 | if (Rc == OFFLOAD_SUCCESS) |
401 | Rc = AsyncInfo.synchronize(); |
402 | handleTargetOutcome(Success: Rc == OFFLOAD_SUCCESS, Loc); |
403 | assert(Rc == OFFLOAD_SUCCESS && |
404 | "__tgt_target_kernel_replay unexpected failure!" ); |
405 | return OMP_TGT_SUCCESS; |
406 | } |
407 | |
408 | // Get the current number of components for a user-defined mapper. |
409 | EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { |
410 | auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; |
411 | int64_t Size = MapperComponentsPtr->Components.size(); |
412 | DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n" , |
413 | DPxPTR(RtMapperHandle), Size); |
414 | return Size; |
415 | } |
416 | |
417 | // Push back one component for a user-defined mapper. |
418 | EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, |
419 | void *Begin, int64_t Size, int64_t Type, |
420 | void *Name) { |
421 | DP("__tgt_push_mapper_component(Handle=" DPxMOD |
422 | ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 |
423 | ", Type=0x%" PRIx64 ", Name=%s).\n" , |
424 | DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, |
425 | (Name) ? getNameFromMapping(Name).c_str() : "unknown" ); |
426 | auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; |
427 | MapperComponentsPtr->Components.push_back( |
428 | Elt: MapComponentInfoTy(Base, Begin, Size, Type, Name)); |
429 | } |
430 | |
431 | EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { |
432 | std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal(); |
433 | InfoLevel.store(i: NewInfoLevel); |
434 | for (auto &R : PM->pluginAdaptors()) { |
435 | if (R.set_info_flag) |
436 | R.set_info_flag(NewInfoLevel); |
437 | } |
438 | } |
439 | |
440 | EXTERN int __tgt_print_device_info(int64_t DeviceId) { |
441 | auto DeviceOrErr = PM->getDevice(DeviceNo: DeviceId); |
442 | if (!DeviceOrErr) |
443 | FATAL_MESSAGE(DeviceId, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
444 | |
445 | return DeviceOrErr->printDeviceInfo(); |
446 | } |
447 | |
448 | EXTERN void __tgt_target_nowait_query(void **AsyncHandle) { |
449 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
450 | if (!AsyncHandle || !*AsyncHandle) { |
451 | FATAL_MESSAGE0( |
452 | 1, "Receive an invalid async handle from the current OpenMP task. Is " |
453 | "this a target nowait region?\n" ); |
454 | } |
455 | |
456 | // Exponential backoff tries to optimally decide if a thread should just query |
457 | // for the device operations (work/spin wait on them) or block until they are |
458 | // completed (use device side blocking mechanism). This allows the runtime to |
459 | // adapt itself when there are a lot of long-running target regions in-flight. |
460 | static thread_local utils::ExponentialBackoff QueryCounter( |
461 | Int64Envar("OMPTARGET_QUERY_COUNT_MAX" , 10), |
462 | Int64Envar("OMPTARGET_QUERY_COUNT_THRESHOLD" , 5), |
463 | Envar<float>("OMPTARGET_QUERY_COUNT_BACKOFF_FACTOR" , 0.5f)); |
464 | |
465 | auto *AsyncInfo = (AsyncInfoTy *)*AsyncHandle; |
466 | |
467 | // If the thread is actively waiting on too many target nowait regions, we |
468 | // should use the blocking sync type. |
469 | if (QueryCounter.isAboveThreshold()) |
470 | AsyncInfo->SyncType = AsyncInfoTy::SyncTy::BLOCKING; |
471 | |
472 | if (const int Rc = AsyncInfo->synchronize()) |
473 | FATAL_MESSAGE0(1, "Error while querying the async queue for completion.\n" ); |
474 | // If there are device operations still pending, return immediately without |
475 | // deallocating the handle and increase the current thread query count. |
476 | if (!AsyncInfo->isDone()) { |
477 | QueryCounter.increment(); |
478 | return; |
479 | } |
480 | |
481 | // When a thread successfully completes a target nowait region, we |
482 | // exponentially backoff its query counter by the query factor. |
483 | QueryCounter.decrement(); |
484 | |
485 | // Delete the handle and unset it from the OpenMP task data. |
486 | delete AsyncInfo; |
487 | *AsyncHandle = nullptr; |
488 | } |
489 | |