1 | //===--------- device.cpp - Target independent OpenMP target RTL ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Functionality for managing devices that are handled by RTL plugins. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "device.h" |
14 | #include "OffloadEntry.h" |
15 | #include "OpenMP/Mapping.h" |
16 | #include "OpenMP/OMPT/Callback.h" |
17 | #include "OpenMP/OMPT/Interface.h" |
18 | #include "PluginManager.h" |
19 | #include "Shared/APITypes.h" |
20 | #include "Shared/Debug.h" |
21 | #include "omptarget.h" |
22 | #include "private.h" |
23 | #include "rtl.h" |
24 | |
25 | #include "Shared/EnvironmentVar.h" |
26 | #include "llvm/Support/Error.h" |
27 | |
28 | #include <cassert> |
29 | #include <climits> |
30 | #include <cstdint> |
31 | #include <cstdio> |
32 | #include <mutex> |
33 | #include <string> |
34 | #include <thread> |
35 | |
36 | #ifdef OMPT_SUPPORT |
37 | using namespace llvm::omp::target::ompt; |
38 | #endif |
39 | |
40 | int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, |
41 | AsyncInfoTy &AsyncInfo) const { |
42 | // First, check if the user disabled atomic map transfer/malloc/dealloc. |
43 | if (!MappingConfig::get().UseEventsForAtomicTransfers) |
44 | return OFFLOAD_SUCCESS; |
45 | |
46 | void *Event = getEvent(); |
47 | bool NeedNewEvent = Event == nullptr; |
48 | if (NeedNewEvent && Device.createEvent(Event: &Event) != OFFLOAD_SUCCESS) { |
49 | REPORT("Failed to create event\n" ); |
50 | return OFFLOAD_FAIL; |
51 | } |
52 | |
53 | // We cannot assume the event should not be nullptr because we don't |
54 | // know if the target support event. But if a target doesn't, |
55 | // recordEvent should always return success. |
56 | if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { |
57 | REPORT("Failed to set dependence on event " DPxMOD "\n" , DPxPTR(Event)); |
58 | return OFFLOAD_FAIL; |
59 | } |
60 | |
61 | if (NeedNewEvent) |
62 | setEvent(Event); |
63 | |
64 | return OFFLOAD_SUCCESS; |
65 | } |
66 | |
67 | DeviceTy::DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID) |
68 | : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), |
69 | MappingInfo(*this) {} |
70 | |
71 | DeviceTy::~DeviceTy() { |
72 | if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) |
73 | return; |
74 | |
75 | ident_t Loc = {.reserved_1: 0, .flags: 0, .reserved_2: 0, .reserved_3: 0, .psource: ";libomptarget;libomptarget;0;0;;" }; |
76 | dumpTargetPointerMappings(Loc: &Loc, Device&: *this); |
77 | } |
78 | |
79 | llvm::Error DeviceTy::init() { |
80 | // Make call to init_requires if it exists for this plugin. |
81 | int32_t Ret = 0; |
82 | if (RTL->init_requires) |
83 | Ret = RTL->init_requires(PM->getRequirements()); |
84 | if (Ret != OFFLOAD_SUCCESS) |
85 | return llvm::createStringError( |
86 | EC: llvm::inconvertibleErrorCode(), |
87 | Fmt: "Failed to initialize requirements for device %d\n" , Vals: DeviceID); |
88 | |
89 | Ret = RTL->init_device(RTLDeviceID); |
90 | if (Ret != OFFLOAD_SUCCESS) |
91 | return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), |
92 | Fmt: "Failed to initialize device %d\n" , |
93 | Vals: DeviceID); |
94 | |
95 | // Enables recording kernels if set. |
96 | BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD" , false); |
97 | if (OMPX_RecordKernel) { |
98 | // Enables saving the device memory kernel output post execution if set. |
99 | BoolEnvar OMPX_ReplaySaveOutput("LIBOMPTARGET_RR_SAVE_OUTPUT" , false); |
100 | |
101 | uint64_t ReqPtrArgOffset; |
102 | RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true, |
103 | OMPX_ReplaySaveOutput, ReqPtrArgOffset); |
104 | } |
105 | |
106 | return llvm::Error::success(); |
107 | } |
108 | |
109 | // Load binary to device. |
110 | llvm::Expected<__tgt_device_binary> |
111 | DeviceTy::loadBinary(__tgt_device_image *Img) { |
112 | __tgt_device_binary Binary; |
113 | |
114 | if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS) |
115 | return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), |
116 | Fmt: "Failed to load binary %p" , Vals: Img); |
117 | return Binary; |
118 | } |
119 | |
120 | void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { |
121 | /// RAII to establish tool anchors before and after data allocation |
122 | void *TargetPtr = nullptr; |
123 | OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( |
124 | RegionInterface.getCallbacks<ompt_target_data_alloc>(), |
125 | DeviceID, HstPtr, &TargetPtr, Size, |
126 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
127 | |
128 | TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); |
129 | return TargetPtr; |
130 | } |
131 | |
132 | int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { |
133 | /// RAII to establish tool anchors before and after data deletion |
134 | OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII( |
135 | RegionInterface.getCallbacks<ompt_target_data_delete>(), |
136 | DeviceID, TgtAllocBegin, |
137 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
138 | |
139 | return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind); |
140 | } |
141 | |
142 | // Submit data to device |
143 | int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, |
144 | AsyncInfoTy &AsyncInfo, HostDataToTargetTy *Entry, |
145 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
146 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
147 | MappingInfo.printCopyInfo(TgtPtr: TgtPtrBegin, HstPtr: HstPtrBegin, Size, /*H2D=*/true, |
148 | Entry, HDTTMapPtr); |
149 | |
150 | /// RAII to establish tool anchors before and after data submit |
151 | OMPT_IF_BUILT( |
152 | InterfaceRAII TargetDataSubmitRAII( |
153 | RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(), |
154 | DeviceID, TgtPtrBegin, HstPtrBegin, Size, |
155 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
156 | |
157 | if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) |
158 | return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); |
159 | return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, |
160 | AsyncInfo); |
161 | } |
162 | |
163 | // Retrieve data from device |
164 | int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, |
165 | int64_t Size, AsyncInfoTy &AsyncInfo, |
166 | HostDataToTargetTy *Entry, |
167 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
168 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
169 | MappingInfo.printCopyInfo(TgtPtr: TgtPtrBegin, HstPtr: HstPtrBegin, Size, /*H2D=*/false, |
170 | Entry, HDTTMapPtr); |
171 | |
172 | /// RAII to establish tool anchors before and after data retrieval |
173 | OMPT_IF_BUILT( |
174 | InterfaceRAII TargetDataRetrieveRAII( |
175 | RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(), |
176 | DeviceID, HstPtrBegin, TgtPtrBegin, Size, |
177 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
178 | |
179 | if (!RTL->data_retrieve_async || !RTL->synchronize) |
180 | return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); |
181 | return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, |
182 | AsyncInfo); |
183 | } |
184 | |
185 | // Copy data from current device to destination device directly |
186 | int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, |
187 | int64_t Size, AsyncInfoTy &AsyncInfo) { |
188 | if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) { |
189 | assert(RTL->data_exchange && "RTL->data_exchange is nullptr" ); |
190 | return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, |
191 | Size); |
192 | } |
193 | return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, |
194 | DstPtr, Size, AsyncInfo); |
195 | } |
196 | |
197 | int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { |
198 | if (!RTL->data_notify_mapped) |
199 | return OFFLOAD_SUCCESS; |
200 | |
201 | DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n" , |
202 | DPxPTR(HstPtr), Size); |
203 | |
204 | if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) { |
205 | REPORT("Notifiying about data mapping failed.\n" ); |
206 | return OFFLOAD_FAIL; |
207 | } |
208 | return OFFLOAD_SUCCESS; |
209 | } |
210 | |
211 | int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { |
212 | if (!RTL->data_notify_unmapped) |
213 | return OFFLOAD_SUCCESS; |
214 | |
215 | DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n" , DPxPTR(HstPtr)); |
216 | |
217 | if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) { |
218 | REPORT("Notifiying about data unmapping failed.\n" ); |
219 | return OFFLOAD_FAIL; |
220 | } |
221 | return OFFLOAD_SUCCESS; |
222 | } |
223 | |
224 | // Run region on device |
225 | int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, |
226 | ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs, |
227 | AsyncInfoTy &AsyncInfo) { |
228 | return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, |
229 | &KernelArgs, AsyncInfo); |
230 | } |
231 | |
232 | // Run region on device |
233 | bool DeviceTy::printDeviceInfo() { |
234 | if (!RTL->print_device_info) |
235 | return false; |
236 | RTL->print_device_info(RTLDeviceID); |
237 | return true; |
238 | } |
239 | |
240 | // Whether data can be copied to DstDevice directly |
241 | bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) { |
242 | if (RTL != DstDevice.RTL || !RTL->is_data_exchangable) |
243 | return false; |
244 | |
245 | if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID)) |
246 | return (RTL->data_exchange != nullptr) || |
247 | (RTL->data_exchange_async != nullptr); |
248 | |
249 | return false; |
250 | } |
251 | |
252 | int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { |
253 | if (RTL->synchronize) |
254 | return RTL->synchronize(RTLDeviceID, AsyncInfo); |
255 | return OFFLOAD_SUCCESS; |
256 | } |
257 | |
258 | int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) { |
259 | if (RTL->query_async) |
260 | return RTL->query_async(RTLDeviceID, AsyncInfo); |
261 | |
262 | return synchronize(AsyncInfo); |
263 | } |
264 | |
265 | int32_t DeviceTy::createEvent(void **Event) { |
266 | if (RTL->create_event) |
267 | return RTL->create_event(RTLDeviceID, Event); |
268 | |
269 | return OFFLOAD_SUCCESS; |
270 | } |
271 | |
272 | int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
273 | if (RTL->record_event) |
274 | return RTL->record_event(RTLDeviceID, Event, AsyncInfo); |
275 | |
276 | return OFFLOAD_SUCCESS; |
277 | } |
278 | |
279 | int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
280 | if (RTL->wait_event) |
281 | return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); |
282 | |
283 | return OFFLOAD_SUCCESS; |
284 | } |
285 | |
286 | int32_t DeviceTy::syncEvent(void *Event) { |
287 | if (RTL->sync_event) |
288 | return RTL->sync_event(RTLDeviceID, Event); |
289 | |
290 | return OFFLOAD_SUCCESS; |
291 | } |
292 | |
293 | int32_t DeviceTy::destroyEvent(void *Event) { |
294 | if (RTL->create_event) |
295 | return RTL->destroy_event(RTLDeviceID, Event); |
296 | |
297 | return OFFLOAD_SUCCESS; |
298 | } |
299 | |
300 | void DeviceTy::dumpOffloadEntries() { |
301 | fprintf(stderr, format: "Device %i offload entries:\n" , DeviceID); |
302 | for (auto &It : *DeviceOffloadEntries.getExclusiveAccessor()) { |
303 | const char *Kind = "kernel" ; |
304 | if (It.second.isLink()) |
305 | Kind = "link" ; |
306 | else if (It.second.isGlobal()) |
307 | Kind = "global var." ; |
308 | fprintf(stderr, format: " %11s: %s\n" , Kind, It.second.getNameAsCStr()); |
309 | } |
310 | } |
311 | |
312 | bool DeviceTy::useAutoZeroCopy() { |
313 | if (RTL->use_auto_zero_copy) |
314 | return RTL->use_auto_zero_copy(RTLDeviceID); |
315 | return false; |
316 | } |
317 | |