1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright 2010 |
4 | * by Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
5 | * |
6 | * This code provides a IOMMU for Xen PV guests with PCI passthrough. |
7 | * |
8 | * PV guests under Xen are running in an non-contiguous memory architecture. |
9 | * |
10 | * When PCI pass-through is utilized, this necessitates an IOMMU for |
11 | * translating bus (DMA) to virtual and vice-versa and also providing a |
12 | * mechanism to have contiguous pages for device drivers operations (say DMA |
13 | * operations). |
14 | * |
15 | * Specifically, under Xen the Linux idea of pages is an illusion. It |
16 | * assumes that pages start at zero and go up to the available memory. To |
17 | * help with that, the Linux Xen MMU provides a lookup mechanism to |
18 | * translate the page frame numbers (PFN) to machine frame numbers (MFN) |
19 | * and vice-versa. The MFN are the "real" frame numbers. Furthermore |
20 | * memory is not contiguous. Xen hypervisor stitches memory for guests |
21 | * from different pools, which means there is no guarantee that PFN==MFN |
22 | * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are |
23 | * allocated in descending order (high to low), meaning the guest might |
24 | * never get any MFN's under the 4GB mark. |
25 | */ |
26 | |
27 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
28 | |
29 | #include <linux/memblock.h> |
30 | #include <linux/dma-direct.h> |
31 | #include <linux/dma-map-ops.h> |
32 | #include <linux/export.h> |
33 | #include <xen/swiotlb-xen.h> |
34 | #include <xen/page.h> |
35 | #include <xen/xen-ops.h> |
36 | #include <xen/hvc-console.h> |
37 | |
38 | #include <asm/dma-mapping.h> |
39 | |
40 | #include <trace/events/swiotlb.h> |
41 | #define MAX_DMA_BITS 32 |
42 | |
43 | /* |
44 | * Quick lookup value of the bus address of the IOTLB. |
45 | */ |
46 | |
47 | static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr) |
48 | { |
49 | unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr)); |
50 | phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT; |
51 | |
52 | baddr |= paddr & ~XEN_PAGE_MASK; |
53 | return baddr; |
54 | } |
55 | |
56 | static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr) |
57 | { |
58 | return phys_to_dma(dev, paddr: xen_phys_to_bus(dev, paddr)); |
59 | } |
60 | |
61 | static inline phys_addr_t xen_bus_to_phys(struct device *dev, |
62 | phys_addr_t baddr) |
63 | { |
64 | unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr)); |
65 | phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) | |
66 | (baddr & ~XEN_PAGE_MASK); |
67 | |
68 | return paddr; |
69 | } |
70 | |
71 | static inline phys_addr_t xen_dma_to_phys(struct device *dev, |
72 | dma_addr_t dma_addr) |
73 | { |
74 | return xen_bus_to_phys(dev, baddr: dma_to_phys(dev, dma_addr)); |
75 | } |
76 | |
77 | static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) |
78 | { |
79 | unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); |
80 | unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size); |
81 | |
82 | next_bfn = pfn_to_bfn(xen_pfn); |
83 | |
84 | for (i = 1; i < nr_pages; i++) |
85 | if (pfn_to_bfn(++xen_pfn) != ++next_bfn) |
86 | return 1; |
87 | |
88 | return 0; |
89 | } |
90 | |
91 | static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) |
92 | { |
93 | unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr)); |
94 | unsigned long xen_pfn = bfn_to_local_pfn(mfn: bfn); |
95 | phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT; |
96 | |
97 | /* If the address is outside our domain, it CAN |
98 | * have the same virtual address as another address |
99 | * in our domain. Therefore _only_ check address within our domain. |
100 | */ |
101 | if (pfn_valid(PFN_DOWN(paddr))) |
102 | return is_swiotlb_buffer(dev, paddr); |
103 | return 0; |
104 | } |
105 | |
106 | #ifdef CONFIG_X86 |
107 | int xen_swiotlb_fixup(void *buf, unsigned long nslabs) |
108 | { |
109 | int rc; |
110 | unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT); |
111 | unsigned int i, dma_bits = order + PAGE_SHIFT; |
112 | dma_addr_t dma_handle; |
113 | phys_addr_t p = virt_to_phys(address: buf); |
114 | |
115 | BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1)); |
116 | BUG_ON(nslabs % IO_TLB_SEGSIZE); |
117 | |
118 | i = 0; |
119 | do { |
120 | do { |
121 | rc = xen_create_contiguous_region( |
122 | pstart: p + (i << IO_TLB_SHIFT), order, |
123 | address_bits: dma_bits, dma_handle: &dma_handle); |
124 | } while (rc && dma_bits++ < MAX_DMA_BITS); |
125 | if (rc) |
126 | return rc; |
127 | |
128 | i += IO_TLB_SEGSIZE; |
129 | } while (i < nslabs); |
130 | return 0; |
131 | } |
132 | |
133 | static void * |
134 | xen_swiotlb_alloc_coherent(struct device *dev, size_t size, |
135 | dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) |
136 | { |
137 | u64 dma_mask = dev->coherent_dma_mask; |
138 | int order = get_order(size); |
139 | phys_addr_t phys; |
140 | void *ret; |
141 | |
142 | /* Align the allocation to the Xen page size */ |
143 | size = 1UL << (order + XEN_PAGE_SHIFT); |
144 | |
145 | ret = (void *)__get_free_pages(gfp_mask: flags, order: get_order(size)); |
146 | if (!ret) |
147 | return ret; |
148 | phys = virt_to_phys(address: ret); |
149 | |
150 | *dma_handle = xen_phys_to_dma(dev, paddr: phys); |
151 | if (*dma_handle + size - 1 > dma_mask || |
152 | range_straddles_page_boundary(p: phys, size)) { |
153 | if (xen_create_contiguous_region(pstart: phys, order, address_bits: fls64(x: dma_mask), |
154 | dma_handle) != 0) |
155 | goto out_free_pages; |
156 | SetPageXenRemapped(virt_to_page(ret)); |
157 | } |
158 | |
159 | memset(ret, 0, size); |
160 | return ret; |
161 | |
162 | out_free_pages: |
163 | free_pages(addr: (unsigned long)ret, order: get_order(size)); |
164 | return NULL; |
165 | } |
166 | |
167 | static void |
168 | xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, |
169 | dma_addr_t dma_handle, unsigned long attrs) |
170 | { |
171 | phys_addr_t phys = virt_to_phys(address: vaddr); |
172 | int order = get_order(size); |
173 | |
174 | /* Convert the size to actually allocated. */ |
175 | size = 1UL << (order + XEN_PAGE_SHIFT); |
176 | |
177 | if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) || |
178 | WARN_ON_ONCE(range_straddles_page_boundary(phys, size))) |
179 | return; |
180 | |
181 | if (TestClearPageXenRemapped(virt_to_page(vaddr))) |
182 | xen_destroy_contiguous_region(pstart: phys, order); |
183 | free_pages(addr: (unsigned long)vaddr, order: get_order(size)); |
184 | } |
185 | #endif /* CONFIG_X86 */ |
186 | |
187 | /* |
188 | * Map a single buffer of the indicated size for DMA in streaming mode. The |
189 | * physical address to use is returned. |
190 | * |
191 | * Once the device is given the dma address, the device owns this memory until |
192 | * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed. |
193 | */ |
194 | static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, |
195 | unsigned long offset, size_t size, |
196 | enum dma_data_direction dir, |
197 | unsigned long attrs) |
198 | { |
199 | phys_addr_t map, phys = page_to_phys(page) + offset; |
200 | dma_addr_t dev_addr = xen_phys_to_dma(dev, paddr: phys); |
201 | |
202 | BUG_ON(dir == DMA_NONE); |
203 | /* |
204 | * If the address happens to be in the device's DMA window, |
205 | * we can safely return the device addr and not worry about bounce |
206 | * buffering it. |
207 | */ |
208 | if (dma_capable(dev, addr: dev_addr, size, is_ram: true) && |
209 | !range_straddles_page_boundary(p: phys, size) && |
210 | !xen_arch_need_swiotlb(dev, phys, dev_addr) && |
211 | !is_swiotlb_force_bounce(dev)) |
212 | goto done; |
213 | |
214 | /* |
215 | * Oh well, have to allocate and map a bounce buffer. |
216 | */ |
217 | trace_swiotlb_bounced(dev, dev_addr, size); |
218 | |
219 | map = swiotlb_tbl_map_single(hwdev: dev, phys, mapping_size: size, alloc_size: size, alloc_aligned_mask: 0, dir, attrs); |
220 | if (map == (phys_addr_t)DMA_MAPPING_ERROR) |
221 | return DMA_MAPPING_ERROR; |
222 | |
223 | phys = map; |
224 | dev_addr = xen_phys_to_dma(dev, paddr: map); |
225 | |
226 | /* |
227 | * Ensure that the address returned is DMA'ble |
228 | */ |
229 | if (unlikely(!dma_capable(dev, dev_addr, size, true))) { |
230 | swiotlb_tbl_unmap_single(hwdev: dev, tlb_addr: map, mapping_size: size, dir, |
231 | attrs: attrs | DMA_ATTR_SKIP_CPU_SYNC); |
232 | return DMA_MAPPING_ERROR; |
233 | } |
234 | |
235 | done: |
236 | if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { |
237 | if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) |
238 | arch_sync_dma_for_device(paddr: phys, size, dir); |
239 | else |
240 | xen_dma_sync_for_device(dev, handle: dev_addr, size, dir); |
241 | } |
242 | return dev_addr; |
243 | } |
244 | |
245 | /* |
246 | * Unmap a single streaming mode DMA translation. The dma_addr and size must |
247 | * match what was provided for in a previous xen_swiotlb_map_page call. All |
248 | * other usages are undefined. |
249 | * |
250 | * After this call, reads by the cpu to the buffer are guaranteed to see |
251 | * whatever the device wrote there. |
252 | */ |
253 | static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, |
254 | size_t size, enum dma_data_direction dir, unsigned long attrs) |
255 | { |
256 | phys_addr_t paddr = xen_dma_to_phys(dev: hwdev, dma_addr: dev_addr); |
257 | |
258 | BUG_ON(dir == DMA_NONE); |
259 | |
260 | if (!dev_is_dma_coherent(dev: hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { |
261 | if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) |
262 | arch_sync_dma_for_cpu(paddr, size, dir); |
263 | else |
264 | xen_dma_sync_for_cpu(dev: hwdev, handle: dev_addr, size, dir); |
265 | } |
266 | |
267 | /* NOTE: We use dev_addr here, not paddr! */ |
268 | if (is_xen_swiotlb_buffer(dev: hwdev, dma_addr: dev_addr)) |
269 | swiotlb_tbl_unmap_single(hwdev, tlb_addr: paddr, mapping_size: size, dir, attrs); |
270 | } |
271 | |
272 | static void |
273 | xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, |
274 | size_t size, enum dma_data_direction dir) |
275 | { |
276 | phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); |
277 | |
278 | if (!dev_is_dma_coherent(dev)) { |
279 | if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) |
280 | arch_sync_dma_for_cpu(paddr, size, dir); |
281 | else |
282 | xen_dma_sync_for_cpu(dev, handle: dma_addr, size, dir); |
283 | } |
284 | |
285 | if (is_xen_swiotlb_buffer(dev, dma_addr)) |
286 | swiotlb_sync_single_for_cpu(dev, tlb_addr: paddr, size, dir); |
287 | } |
288 | |
289 | static void |
290 | xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, |
291 | size_t size, enum dma_data_direction dir) |
292 | { |
293 | phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); |
294 | |
295 | if (is_xen_swiotlb_buffer(dev, dma_addr)) |
296 | swiotlb_sync_single_for_device(dev, tlb_addr: paddr, size, dir); |
297 | |
298 | if (!dev_is_dma_coherent(dev)) { |
299 | if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) |
300 | arch_sync_dma_for_device(paddr, size, dir); |
301 | else |
302 | xen_dma_sync_for_device(dev, handle: dma_addr, size, dir); |
303 | } |
304 | } |
305 | |
306 | /* |
307 | * Unmap a set of streaming mode DMA translations. Again, cpu read rules |
308 | * concerning calls here are the same as for swiotlb_unmap_page() above. |
309 | */ |
310 | static void |
311 | xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, |
312 | enum dma_data_direction dir, unsigned long attrs) |
313 | { |
314 | struct scatterlist *sg; |
315 | int i; |
316 | |
317 | BUG_ON(dir == DMA_NONE); |
318 | |
319 | for_each_sg(sgl, sg, nelems, i) |
320 | xen_swiotlb_unmap_page(hwdev, dev_addr: sg->dma_address, sg_dma_len(sg), |
321 | dir, attrs); |
322 | |
323 | } |
324 | |
325 | static int |
326 | xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, |
327 | enum dma_data_direction dir, unsigned long attrs) |
328 | { |
329 | struct scatterlist *sg; |
330 | int i; |
331 | |
332 | BUG_ON(dir == DMA_NONE); |
333 | |
334 | for_each_sg(sgl, sg, nelems, i) { |
335 | sg->dma_address = xen_swiotlb_map_page(dev, page: sg_page(sg), |
336 | offset: sg->offset, size: sg->length, dir, attrs); |
337 | if (sg->dma_address == DMA_MAPPING_ERROR) |
338 | goto out_unmap; |
339 | sg_dma_len(sg) = sg->length; |
340 | } |
341 | |
342 | return nelems; |
343 | out_unmap: |
344 | xen_swiotlb_unmap_sg(hwdev: dev, sgl, nelems: i, dir, attrs: attrs | DMA_ATTR_SKIP_CPU_SYNC); |
345 | sg_dma_len(sgl) = 0; |
346 | return -EIO; |
347 | } |
348 | |
349 | static void |
350 | xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, |
351 | int nelems, enum dma_data_direction dir) |
352 | { |
353 | struct scatterlist *sg; |
354 | int i; |
355 | |
356 | for_each_sg(sgl, sg, nelems, i) { |
357 | xen_swiotlb_sync_single_for_cpu(dev, dma_addr: sg->dma_address, |
358 | size: sg->length, dir); |
359 | } |
360 | } |
361 | |
362 | static void |
363 | xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, |
364 | int nelems, enum dma_data_direction dir) |
365 | { |
366 | struct scatterlist *sg; |
367 | int i; |
368 | |
369 | for_each_sg(sgl, sg, nelems, i) { |
370 | xen_swiotlb_sync_single_for_device(dev, dma_addr: sg->dma_address, |
371 | size: sg->length, dir); |
372 | } |
373 | } |
374 | |
375 | /* |
376 | * Return whether the given device DMA address mask can be supported |
377 | * properly. For example, if your device can only drive the low 24-bits |
378 | * during bus mastering, then you would pass 0x00ffffff as the mask to |
379 | * this function. |
380 | */ |
381 | static int |
382 | xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) |
383 | { |
384 | return xen_phys_to_dma(dev: hwdev, paddr: default_swiotlb_limit()) <= mask; |
385 | } |
386 | |
387 | const struct dma_map_ops xen_swiotlb_dma_ops = { |
388 | #ifdef CONFIG_X86 |
389 | .alloc = xen_swiotlb_alloc_coherent, |
390 | .free = xen_swiotlb_free_coherent, |
391 | #else |
392 | .alloc = dma_direct_alloc, |
393 | .free = dma_direct_free, |
394 | #endif |
395 | .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, |
396 | .sync_single_for_device = xen_swiotlb_sync_single_for_device, |
397 | .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, |
398 | .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, |
399 | .map_sg = xen_swiotlb_map_sg, |
400 | .unmap_sg = xen_swiotlb_unmap_sg, |
401 | .map_page = xen_swiotlb_map_page, |
402 | .unmap_page = xen_swiotlb_unmap_page, |
403 | .dma_supported = xen_swiotlb_dma_supported, |
404 | .mmap = dma_common_mmap, |
405 | .get_sgtable = dma_common_get_sgtable, |
406 | .alloc_pages = dma_common_alloc_pages, |
407 | .free_pages = dma_common_free_pages, |
408 | }; |
409 | |