amdgpu_gart.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c]

1	/*
2	* Copyright 2008 Advanced Micro Devices, Inc.
3	* Copyright 2008 Red Hat Inc.
4	* Copyright 2009 Jerome Glisse.
5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the "Software"),
8	* to deal in the Software without restriction, including without limitation
9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10	* and/or sell copies of the Software, and to permit persons to whom the
11	* Software is furnished to do so, subject to the following conditions:
12	*
13	* The above copyright notice and this permission notice shall be included in
14	* all copies or substantial portions of the Software.
15	*
16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22	* OTHER DEALINGS IN THE SOFTWARE.
23	*
24	* Authors: Dave Airlie
25	* Alex Deucher
26	* Jerome Glisse
27	*/
28
29	#include <linux/pci.h>
30	#include <linux/vmalloc.h>
31
32	#include <drm/amdgpu_drm.h>
33	#ifdef CONFIG_X86
34	#include <asm/set_memory.h>
35	#endif
36	#include "amdgpu.h"
37	#include <drm/drm_drv.h>
38	#include <drm/ttm/ttm_tt.h>
39
40	/*
41	* GART
42	* The GART (Graphics Aperture Remapping Table) is an aperture
43	* in the GPU's address space. System pages can be mapped into
44	* the aperture and look like contiguous pages from the GPU's
45	* perspective. A page table maps the pages in the aperture
46	* to the actual backing pages in system memory.
47	*
48	* Radeon GPUs support both an internal GART, as described above,
49	* and AGP. AGP works similarly, but the GART table is configured
50	* and maintained by the northbridge rather than the driver.
51	* Radeon hw has a separate AGP aperture that is programmed to
52	* point to the AGP aperture provided by the northbridge and the
53	* requests are passed through to the northbridge aperture.
54	* Both AGP and internal GART can be used at the same time, however
55	* that is not currently supported by the driver.
56	*
57	* This file handles the common internal GART management.
58	*/
59
60	/*
61	* Common GART table functions.
62	*/
63
64	/**
65	* amdgpu_gart_dummy_page_init - init dummy page used by the driver
66	*
67	* @adev: amdgpu_device pointer
68	*
69	* Allocate the dummy page used by the driver (all asics).
70	* This dummy page is used by the driver as a filler for gart entries
71	* when pages are taken out of the GART
72	* Returns 0 on sucess, -ENOMEM on failure.
73	*/
74	static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
75	{
76	struct page *dummy_page = ttm_glob.dummy_read_page;
77
78	if (adev->dummy_page_addr)
79	return `0`;
80	adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, `0`,
81	PAGE_SIZE, DMA_BIDIRECTIONAL);
82	if (dma_mapping_error(dev: &adev->pdev->dev, dma_addr: adev->dummy_page_addr)) {
83	dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
84	adev->dummy_page_addr = `0`;
85	return -ENOMEM;
86	}
87	return `0`;
88	}
89
90	/**
91	* amdgpu_gart_dummy_page_fini - free dummy page used by the driver
92	*
93	* @adev: amdgpu_device pointer
94	*
95	* Frees the dummy page used by the driver (all asics).
96	*/
97	void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
98	{
99	if (!adev->dummy_page_addr)
100	return;
101	dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
102	DMA_BIDIRECTIONAL);
103	adev->dummy_page_addr = `0`;
104	}
105
106	/**
107	* amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
108	*
109	* @adev: amdgpu_device pointer
110	*
111	* Allocate system memory for GART page table for ASICs that don't have
112	* dedicated VRAM.
113	* Returns 0 for success, error for failure.
114	*/
115	int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
116	{
117	unsigned int order = get_order(size: adev->gart.table_size);
118	gfp_t gfp_flags = GFP_KERNEL \| __GFP_ZERO;
119	struct amdgpu_bo *bo = NULL;
120	struct sg_table *sg = NULL;
121	struct amdgpu_bo_param bp;
122	dma_addr_t dma_addr;
123	struct page *p;
124	int ret;
125
126	if (adev->gart.bo != NULL)
127	return `0`;
128
129	p = alloc_pages(gfp: gfp_flags, order);
130	if (!p)
131	return -ENOMEM;
132
133	/ If the hardware does not support UTCL2 snooping of the CPU caches*
134	* then set_memory_wc() could be used as a workaround to mark the pages
135	* as write combine memory.
136	*/
137	dma_addr = dma_map_page(&adev->pdev->dev, p, `0`, adev->gart.table_size,
138	DMA_BIDIRECTIONAL);
139	if (dma_mapping_error(dev: &adev->pdev->dev, dma_addr)) {
140	dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
141	__free_pages(page: p, order);
142	p = NULL;
143	return -EFAULT;
144	}
145
146	dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
147	/ Create SG table /
148	sg = kmalloc(size: sizeof(*sg), GFP_KERNEL);
149	if (!sg) {
150	ret = -ENOMEM;
151	goto error;
152	}
153	ret = sg_alloc_table(sg, `1`, GFP_KERNEL);
154	if (ret)
155	goto error;
156
157	sg_dma_address(sg->sgl) = dma_addr;
158	sg->sgl->length = adev->gart.table_size;
159	#ifdef CONFIG_NEED_SG_DMA_LENGTH
160	sg->sgl->dma_length = adev->gart.table_size;
161	#endif
162	/ Create SG BO /
163	memset(&bp, `0`, sizeof(bp));
164	bp.size = adev->gart.table_size;
165	bp.byte_align = PAGE_SIZE;
166	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
167	bp.type = ttm_bo_type_sg;
168	bp.resv = NULL;
169	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
170	bp.flags = `0`;
171	ret = amdgpu_bo_create(adev, bp: &bp, bo_ptr: &bo);
172	if (ret)
173	goto error;
174
175	bo->tbo.sg = sg;
176	bo->tbo.ttm->sg = sg;
177	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
178	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
179
180	ret = amdgpu_bo_reserve(bo, no_intr: true);
181	if (ret) {
182	dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
183	goto error;
184	}
185
186	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
187	WARN(ret, "Pinning the GART table failed");
188	if (ret)
189	goto error_resv;
190
191	adev->gart.bo = bo;
192	adev->gart.ptr = page_to_virt(p);
193	/ Make GART table accessible in VMID0 /
194	ret = amdgpu_ttm_alloc_gart(bo: &adev->gart.bo->tbo);
195	if (ret)
196	amdgpu_gart_table_ram_free(adev);
197	amdgpu_bo_unreserve(bo);
198
199	return `0`;
200
201	error_resv:
202	amdgpu_bo_unreserve(bo);
203	error:
204	amdgpu_bo_unref(bo: &bo);
205	if (sg) {
206	sg_free_table(sg);
207	kfree(objp: sg);
208	}
209	__free_pages(page: p, order);
210	return ret;
211	}
212
213	/**
214	* amdgpu_gart_table_ram_free - free gart page table system ram
215	*
216	* @adev: amdgpu_device pointer
217	*
218	* Free the system memory used for the GART page tableon ASICs that don't
219	* have dedicated VRAM.
220	*/
221	void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
222	{
223	unsigned int order = get_order(size: adev->gart.table_size);
224	struct sg_table *sg = adev->gart.bo->tbo.sg;
225	struct page *p;
226	int ret;
227
228	ret = amdgpu_bo_reserve(bo: adev->gart.bo, no_intr: false);
229	if (!ret) {
230	amdgpu_bo_unpin(bo: adev->gart.bo);
231	amdgpu_bo_unreserve(bo: adev->gart.bo);
232	}
233	amdgpu_bo_unref(bo: &adev->gart.bo);
234	sg_free_table(sg);
235	kfree(objp: sg);
236	p = virt_to_page(adev->gart.ptr);
237	__free_pages(page: p, order);
238
239	adev->gart.ptr = NULL;
240	}
241
242	/**
243	* amdgpu_gart_table_vram_alloc - allocate vram for gart page table
244	*
245	* @adev: amdgpu_device pointer
246	*
247	* Allocate video memory for GART page table
248	* (pcie r4xx, r5xx+). These asics require the
249	* gart table to be in video memory.
250	* Returns 0 for success, error for failure.
251	*/
252	int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
253	{
254	if (adev->gart.bo != NULL)
255	return `0`;
256
257	return amdgpu_bo_create_kernel(adev, size: adev->gart.table_size, PAGE_SIZE,
258	AMDGPU_GEM_DOMAIN_VRAM, bo_ptr: &adev->gart.bo,
259	NULL, cpu_addr: (void *)&adev->gart.ptr);
260	}
261
262	/**
263	* amdgpu_gart_table_vram_free - free gart page table vram
264	*
265	* @adev: amdgpu_device pointer
266	*
267	* Free the video memory used for the GART page table
268	* (pcie r4xx, r5xx+). These asics require the gart table to
269	* be in video memory.
270	*/
271	void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
272	{
273	amdgpu_bo_free_kernel(bo: &adev->gart.bo, NULL, cpu_addr: (void *)&adev->gart.ptr);
274	}
275
276	/*
277	* Common gart functions.
278	*/
279	/**
280	* amdgpu_gart_unbind - unbind pages from the gart page table
281	*
282	* @adev: amdgpu_device pointer
283	* @offset: offset into the GPU's gart aperture
284	* @pages: number of pages to unbind
285	*
286	* Unbinds the requested pages from the gart page table and
287	* replaces them with the dummy page (all asics).
288	* Returns 0 for success, -EINVAL for failure.
289	*/
290	void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
291	int pages)
292	{
293	unsigned t;
294	unsigned p;
295	int i, j;
296	u64 page_base;
297	/ Starting from VEGA10, system bit must be 0 to mean invalid. /
298	uint64_t flags = `0`;
299	int idx;
300
301	if (!adev->gart.ptr)
302	return;
303
304	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
305	return;
306
307	t = offset / AMDGPU_GPU_PAGE_SIZE;
308	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
309	for (i = `0`; i < pages; i++, p++) {
310	page_base = adev->dummy_page_addr;
311	if (!adev->gart.ptr)
312	continue;
313
314	for (j = `0`; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
315	amdgpu_gmc_set_pte_pde(adev, cpu_pt_addr: adev->gart.ptr,
316	gpu_page_idx: t, addr: page_base, flags);
317	page_base += AMDGPU_GPU_PAGE_SIZE;
318	}
319	}
320	mb();
321	amdgpu_device_flush_hdp(adev, NULL);
322	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
323	amdgpu_gmc_flush_gpu_tlb(adev, vmid: `0`, vmhub: i, flush_type: `0`);
324
325	drm_dev_exit(idx);
326	}
327
328	/**
329	* amdgpu_gart_map - map dma_addresses into GART entries
330	*
331	* @adev: amdgpu_device pointer
332	* @offset: offset into the GPU's gart aperture
333	* @pages: number of pages to bind
334	* @dma_addr: DMA addresses of pages
335	* @flags: page table entry flags
336	* @dst: CPU address of the gart table
337	*
338	* Map the dma_addresses into GART entries (all asics).
339	* Returns 0 for success, -EINVAL for failure.
340	*/
341	void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
342	int pages, dma_addr_t *dma_addr, uint64_t flags,
343	void *dst)
344	{
345	uint64_t page_base;
346	unsigned i, j, t;
347	int idx;
348
349	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
350	return;
351
352	t = offset / AMDGPU_GPU_PAGE_SIZE;
353
354	for (i = `0`; i < pages; i++) {
355	page_base = dma_addr[i];
356	for (j = `0`; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
357	amdgpu_gmc_set_pte_pde(adev, cpu_pt_addr: dst, gpu_page_idx: t, addr: page_base, flags);
358	page_base += AMDGPU_GPU_PAGE_SIZE;
359	}
360	}
361	drm_dev_exit(idx);
362	}
363
364	/**
365	* amdgpu_gart_bind - bind pages into the gart page table
366	*
367	* @adev: amdgpu_device pointer
368	* @offset: offset into the GPU's gart aperture
369	* @pages: number of pages to bind
370	* @dma_addr: DMA addresses of pages
371	* @flags: page table entry flags
372	*
373	* Binds the requested pages to the gart page table
374	* (all asics).
375	* Returns 0 for success, -EINVAL for failure.
376	*/
377	void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
378	int pages, dma_addr_t *dma_addr,
379	uint64_t flags)
380	{
381	if (!adev->gart.ptr)
382	return;
383
384	amdgpu_gart_map(adev, offset, pages, dma_addr, flags, dst: adev->gart.ptr);
385	}
386
387	/**
388	* amdgpu_gart_invalidate_tlb - invalidate gart TLB
389	*
390	* @adev: amdgpu device driver pointer
391	*
392	* Invalidate gart TLB which can be use as a way to flush gart changes
393	*
394	*/
395	void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
396	{
397	int i;
398
399	if (!adev->gart.ptr)
400	return;
401
402	mb();
403	amdgpu_device_flush_hdp(adev, NULL);
404	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
405	amdgpu_gmc_flush_gpu_tlb(adev, vmid: `0`, vmhub: i, flush_type: `0`);
406	}
407
408	/**
409	* amdgpu_gart_init - init the driver info for managing the gart
410	*
411	* @adev: amdgpu_device pointer
412	*
413	* Allocate the dummy page and init the gart driver info (all asics).
414	* Returns 0 for success, error for failure.
415	*/
416	int amdgpu_gart_init(struct amdgpu_device *adev)
417	{
418	int r;
419
420	if (adev->dummy_page_addr)
421	return `0`;
422
423	/ We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE /
424	if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) {
425	DRM_ERROR("Page size is smaller than GPU page size!\n");
426	return -EINVAL;
427	}
428	r = amdgpu_gart_dummy_page_init(adev);
429	if (r)
430	return r;
431	/ Compute table size /
432	adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
433	adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
434	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
435	adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
436
437	return `0`;
438	}
439

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c