1 | /* |
2 | * Copyright 2012 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * based on nouveau_prime.c |
23 | * |
24 | * Authors: Alex Deucher |
25 | */ |
26 | |
27 | /** |
28 | * DOC: PRIME Buffer Sharing |
29 | * |
30 | * The following callback implementations are used for :ref:`sharing GEM buffer |
31 | * objects between different devices via PRIME <prime_buffer_sharing>`. |
32 | */ |
33 | |
34 | #include <drm/drmP.h> |
35 | |
36 | #include "amdgpu.h" |
37 | #include "amdgpu_display.h" |
38 | #include "amdgpu_gem.h" |
39 | #include <drm/amdgpu_drm.h> |
40 | #include <linux/dma-buf.h> |
41 | #include <linux/dma-fence-array.h> |
42 | |
43 | /** |
44 | * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table |
45 | * implementation |
46 | * @obj: GEM buffer object (BO) |
47 | * |
48 | * Returns: |
49 | * A scatter/gather table for the pinned pages of the BO's memory. |
50 | */ |
51 | struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) |
52 | { |
53 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
54 | int npages = bo->tbo.num_pages; |
55 | |
56 | return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages); |
57 | } |
58 | |
59 | /** |
60 | * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation |
61 | * @obj: GEM BO |
62 | * |
63 | * Sets up an in-kernel virtual mapping of the BO's memory. |
64 | * |
65 | * Returns: |
66 | * The virtual address of the mapping or an error pointer. |
67 | */ |
68 | void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) |
69 | { |
70 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
71 | int ret; |
72 | |
73 | ret = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, |
74 | &bo->dma_buf_vmap); |
75 | if (ret) |
76 | return ERR_PTR(ret); |
77 | |
78 | return bo->dma_buf_vmap.virtual; |
79 | } |
80 | |
81 | /** |
82 | * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation |
83 | * @obj: GEM BO |
84 | * @vaddr: Virtual address (unused) |
85 | * |
86 | * Tears down the in-kernel virtual mapping of the BO's memory. |
87 | */ |
88 | void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) |
89 | { |
90 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
91 | |
92 | ttm_bo_kunmap(&bo->dma_buf_vmap); |
93 | } |
94 | |
95 | /** |
96 | * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation |
97 | * @obj: GEM BO |
98 | * @vma: Virtual memory area |
99 | * |
100 | * Sets up a userspace mapping of the BO's memory in the given |
101 | * virtual memory area. |
102 | * |
103 | * Returns: |
104 | * 0 on success or a negative error code on failure. |
105 | */ |
106 | int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) |
107 | { |
108 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
109 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
110 | unsigned asize = amdgpu_bo_size(bo); |
111 | int ret; |
112 | |
113 | if (!vma->vm_file) |
114 | return -ENODEV; |
115 | |
116 | if (adev == NULL) |
117 | return -ENODEV; |
118 | |
119 | /* Check for valid size. */ |
120 | if (asize < vma->vm_end - vma->vm_start) |
121 | return -EINVAL; |
122 | |
123 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || |
124 | (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { |
125 | return -EPERM; |
126 | } |
127 | vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT; |
128 | |
129 | /* prime mmap does not need to check access, so allow here */ |
130 | ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data); |
131 | if (ret) |
132 | return ret; |
133 | |
134 | ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev); |
135 | drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data); |
136 | |
137 | return ret; |
138 | } |
139 | |
140 | /** |
141 | * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table |
142 | * implementation |
143 | * @dev: DRM device |
144 | * @attach: DMA-buf attachment |
145 | * @sg: Scatter/gather table |
146 | * |
147 | * Imports shared DMA buffer memory exported by another device. |
148 | * |
149 | * Returns: |
150 | * A new GEM BO of the given DRM device, representing the memory |
151 | * described by the given DMA-buf attachment and scatter/gather table. |
152 | */ |
153 | struct drm_gem_object * |
154 | amdgpu_gem_prime_import_sg_table(struct drm_device *dev, |
155 | struct dma_buf_attachment *attach, |
156 | struct sg_table *sg) |
157 | { |
158 | struct reservation_object *resv = attach->dmabuf->resv; |
159 | struct amdgpu_device *adev = dev->dev_private; |
160 | struct amdgpu_bo *bo; |
161 | struct amdgpu_bo_param bp; |
162 | int ret; |
163 | |
164 | memset(&bp, 0, sizeof(bp)); |
165 | bp.size = attach->dmabuf->size; |
166 | bp.byte_align = PAGE_SIZE; |
167 | bp.domain = AMDGPU_GEM_DOMAIN_CPU; |
168 | bp.flags = 0; |
169 | bp.type = ttm_bo_type_sg; |
170 | bp.resv = resv; |
171 | ww_mutex_lock(&resv->lock, NULL); |
172 | ret = amdgpu_bo_create(adev, &bp, &bo); |
173 | if (ret) |
174 | goto error; |
175 | |
176 | bo->tbo.sg = sg; |
177 | bo->tbo.ttm->sg = sg; |
178 | bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; |
179 | bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; |
180 | if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) |
181 | bo->prime_shared_count = 1; |
182 | |
183 | ww_mutex_unlock(&resv->lock); |
184 | return &bo->gem_base; |
185 | |
186 | error: |
187 | ww_mutex_unlock(&resv->lock); |
188 | return ERR_PTR(ret); |
189 | } |
190 | |
191 | static int |
192 | __reservation_object_make_exclusive(struct reservation_object *obj) |
193 | { |
194 | struct dma_fence **fences; |
195 | unsigned int count; |
196 | int r; |
197 | |
198 | if (!reservation_object_get_list(obj)) /* no shared fences to convert */ |
199 | return 0; |
200 | |
201 | r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); |
202 | if (r) |
203 | return r; |
204 | |
205 | if (count == 0) { |
206 | /* Now that was unexpected. */ |
207 | } else if (count == 1) { |
208 | reservation_object_add_excl_fence(obj, fences[0]); |
209 | dma_fence_put(fences[0]); |
210 | kfree(fences); |
211 | } else { |
212 | struct dma_fence_array *array; |
213 | |
214 | array = dma_fence_array_create(count, fences, |
215 | dma_fence_context_alloc(1), 0, |
216 | false); |
217 | if (!array) |
218 | goto err_fences_put; |
219 | |
220 | reservation_object_add_excl_fence(obj, &array->base); |
221 | dma_fence_put(&array->base); |
222 | } |
223 | |
224 | return 0; |
225 | |
226 | err_fences_put: |
227 | while (count--) |
228 | dma_fence_put(fences[count]); |
229 | kfree(fences); |
230 | return -ENOMEM; |
231 | } |
232 | |
233 | /** |
234 | * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation |
235 | * @dma_buf: Shared DMA buffer |
236 | * @attach: DMA-buf attachment |
237 | * |
238 | * Makes sure that the shared DMA buffer can be accessed by the target device. |
239 | * For now, simply pins it to the GTT domain, where it should be accessible by |
240 | * all DMA devices. |
241 | * |
242 | * Returns: |
243 | * 0 on success or a negative error code on failure. |
244 | */ |
245 | static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, |
246 | struct dma_buf_attachment *attach) |
247 | { |
248 | struct drm_gem_object *obj = dma_buf->priv; |
249 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
250 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
251 | long r; |
252 | |
253 | r = drm_gem_map_attach(dma_buf, attach); |
254 | if (r) |
255 | return r; |
256 | |
257 | r = amdgpu_bo_reserve(bo, false); |
258 | if (unlikely(r != 0)) |
259 | goto error_detach; |
260 | |
261 | |
262 | if (attach->dev->driver != adev->dev->driver) { |
263 | /* |
264 | * We only create shared fences for internal use, but importers |
265 | * of the dmabuf rely on exclusive fences for implicitly |
266 | * tracking write hazards. As any of the current fences may |
267 | * correspond to a write, we need to convert all existing |
268 | * fences on the reservation object into a single exclusive |
269 | * fence. |
270 | */ |
271 | r = __reservation_object_make_exclusive(bo->tbo.resv); |
272 | if (r) |
273 | goto error_unreserve; |
274 | } |
275 | |
276 | /* pin buffer into GTT */ |
277 | r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); |
278 | if (r) |
279 | goto error_unreserve; |
280 | |
281 | if (attach->dev->driver != adev->dev->driver) |
282 | bo->prime_shared_count++; |
283 | |
284 | error_unreserve: |
285 | amdgpu_bo_unreserve(bo); |
286 | |
287 | error_detach: |
288 | if (r) |
289 | drm_gem_map_detach(dma_buf, attach); |
290 | return r; |
291 | } |
292 | |
293 | /** |
294 | * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation |
295 | * @dma_buf: Shared DMA buffer |
296 | * @attach: DMA-buf attachment |
297 | * |
298 | * This is called when a shared DMA buffer no longer needs to be accessible by |
299 | * another device. For now, simply unpins the buffer from GTT. |
300 | */ |
301 | static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, |
302 | struct dma_buf_attachment *attach) |
303 | { |
304 | struct drm_gem_object *obj = dma_buf->priv; |
305 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
306 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
307 | int ret = 0; |
308 | |
309 | ret = amdgpu_bo_reserve(bo, true); |
310 | if (unlikely(ret != 0)) |
311 | goto error; |
312 | |
313 | amdgpu_bo_unpin(bo); |
314 | if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) |
315 | bo->prime_shared_count--; |
316 | amdgpu_bo_unreserve(bo); |
317 | |
318 | error: |
319 | drm_gem_map_detach(dma_buf, attach); |
320 | } |
321 | |
322 | /** |
323 | * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation |
324 | * @obj: GEM BO |
325 | * |
326 | * Returns: |
327 | * The BO's reservation object. |
328 | */ |
329 | struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) |
330 | { |
331 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); |
332 | |
333 | return bo->tbo.resv; |
334 | } |
335 | |
336 | /** |
337 | * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation |
338 | * @dma_buf: Shared DMA buffer |
339 | * @direction: Direction of DMA transfer |
340 | * |
341 | * This is called before CPU access to the shared DMA buffer's memory. If it's |
342 | * a read access, the buffer is moved to the GTT domain if possible, for optimal |
343 | * CPU read performance. |
344 | * |
345 | * Returns: |
346 | * 0 on success or a negative error code on failure. |
347 | */ |
348 | static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, |
349 | enum dma_data_direction direction) |
350 | { |
351 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); |
352 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
353 | struct ttm_operation_ctx ctx = { true, false }; |
354 | u32 domain = amdgpu_display_supported_domains(adev); |
355 | int ret; |
356 | bool reads = (direction == DMA_BIDIRECTIONAL || |
357 | direction == DMA_FROM_DEVICE); |
358 | |
359 | if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT)) |
360 | return 0; |
361 | |
362 | /* move to gtt */ |
363 | ret = amdgpu_bo_reserve(bo, false); |
364 | if (unlikely(ret != 0)) |
365 | return ret; |
366 | |
367 | if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { |
368 | amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); |
369 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
370 | } |
371 | |
372 | amdgpu_bo_unreserve(bo); |
373 | return ret; |
374 | } |
375 | |
376 | const struct dma_buf_ops amdgpu_dmabuf_ops = { |
377 | .attach = amdgpu_gem_map_attach, |
378 | .detach = amdgpu_gem_map_detach, |
379 | .map_dma_buf = drm_gem_map_dma_buf, |
380 | .unmap_dma_buf = drm_gem_unmap_dma_buf, |
381 | .release = drm_gem_dmabuf_release, |
382 | .begin_cpu_access = amdgpu_gem_begin_cpu_access, |
383 | .mmap = drm_gem_dmabuf_mmap, |
384 | .vmap = drm_gem_dmabuf_vmap, |
385 | .vunmap = drm_gem_dmabuf_vunmap, |
386 | }; |
387 | |
388 | /** |
389 | * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation |
390 | * @dev: DRM device |
391 | * @gobj: GEM BO |
392 | * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. |
393 | * |
394 | * The main work is done by the &drm_gem_prime_export helper, which in turn |
395 | * uses &amdgpu_gem_prime_res_obj. |
396 | * |
397 | * Returns: |
398 | * Shared DMA buffer representing the GEM BO from the given device. |
399 | */ |
400 | struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, |
401 | struct drm_gem_object *gobj, |
402 | int flags) |
403 | { |
404 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); |
405 | struct dma_buf *buf; |
406 | |
407 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || |
408 | bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) |
409 | return ERR_PTR(-EPERM); |
410 | |
411 | buf = drm_gem_prime_export(dev, gobj, flags); |
412 | if (!IS_ERR(buf)) { |
413 | buf->file->f_mapping = dev->anon_inode->i_mapping; |
414 | buf->ops = &amdgpu_dmabuf_ops; |
415 | } |
416 | |
417 | return buf; |
418 | } |
419 | |
420 | /** |
421 | * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation |
422 | * @dev: DRM device |
423 | * @dma_buf: Shared DMA buffer |
424 | * |
425 | * The main work is done by the &drm_gem_prime_import helper, which in turn |
426 | * uses &amdgpu_gem_prime_import_sg_table. |
427 | * |
428 | * Returns: |
429 | * GEM BO representing the shared DMA buffer for the given device. |
430 | */ |
431 | struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, |
432 | struct dma_buf *dma_buf) |
433 | { |
434 | struct drm_gem_object *obj; |
435 | |
436 | if (dma_buf->ops == &amdgpu_dmabuf_ops) { |
437 | obj = dma_buf->priv; |
438 | if (obj->dev == dev) { |
439 | /* |
440 | * Importing dmabuf exported from out own gem increases |
441 | * refcount on gem itself instead of f_count of dmabuf. |
442 | */ |
443 | drm_gem_object_get(obj); |
444 | return obj; |
445 | } |
446 | } |
447 | |
448 | return drm_gem_prime_import(dev, dma_buf); |
449 | } |
450 | |