1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2023 Advanced Micro Devices, Inc. */ |
3 | |
4 | #include <linux/interval_tree.h> |
5 | #include <linux/vfio.h> |
6 | |
7 | #include <linux/pds/pds_common.h> |
8 | #include <linux/pds/pds_core_if.h> |
9 | #include <linux/pds/pds_adminq.h> |
10 | |
11 | #include "vfio_dev.h" |
12 | #include "cmds.h" |
13 | #include "dirty.h" |
14 | |
15 | #define READ_SEQ true |
16 | #define WRITE_ACK false |
17 | |
18 | bool pds_vfio_dirty_is_enabled(struct pds_vfio_pci_device *pds_vfio) |
19 | { |
20 | return pds_vfio->dirty.is_enabled; |
21 | } |
22 | |
23 | void pds_vfio_dirty_set_enabled(struct pds_vfio_pci_device *pds_vfio) |
24 | { |
25 | pds_vfio->dirty.is_enabled = true; |
26 | } |
27 | |
28 | void pds_vfio_dirty_set_disabled(struct pds_vfio_pci_device *pds_vfio) |
29 | { |
30 | pds_vfio->dirty.is_enabled = false; |
31 | } |
32 | |
33 | static void |
34 | pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio, |
35 | u8 max_regions) |
36 | { |
37 | int len = max_regions * sizeof(struct pds_lm_dirty_region_info); |
38 | struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; |
39 | struct device *pdsc_dev = &pci_physfn(dev: pdev)->dev; |
40 | struct pds_lm_dirty_region_info *region_info; |
41 | dma_addr_t regions_dma; |
42 | u8 num_regions; |
43 | int err; |
44 | |
45 | region_info = kcalloc(n: max_regions, |
46 | size: sizeof(struct pds_lm_dirty_region_info), |
47 | GFP_KERNEL); |
48 | if (!region_info) |
49 | return; |
50 | |
51 | regions_dma = |
52 | dma_map_single(pdsc_dev, region_info, len, DMA_FROM_DEVICE); |
53 | if (dma_mapping_error(dev: pdsc_dev, dma_addr: regions_dma)) |
54 | goto out_free_region_info; |
55 | |
56 | err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma, max_regions: &max_regions, |
57 | num_regions: &num_regions); |
58 | dma_unmap_single(pdsc_dev, regions_dma, len, DMA_FROM_DEVICE); |
59 | if (err) |
60 | goto out_free_region_info; |
61 | |
62 | for (unsigned int i = 0; i < num_regions; i++) |
63 | dev_dbg(&pdev->dev, |
64 | "region_info[%d]: dma_base 0x%llx page_count %u page_size_log2 %u\n" , |
65 | i, le64_to_cpu(region_info[i].dma_base), |
66 | le32_to_cpu(region_info[i].page_count), |
67 | region_info[i].page_size_log2); |
68 | |
69 | out_free_region_info: |
70 | kfree(objp: region_info); |
71 | } |
72 | |
73 | static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region, |
74 | unsigned long bytes) |
75 | { |
76 | unsigned long *host_seq_bmp, *host_ack_bmp; |
77 | |
78 | host_seq_bmp = vzalloc(size: bytes); |
79 | if (!host_seq_bmp) |
80 | return -ENOMEM; |
81 | |
82 | host_ack_bmp = vzalloc(size: bytes); |
83 | if (!host_ack_bmp) { |
84 | bitmap_free(bitmap: host_seq_bmp); |
85 | return -ENOMEM; |
86 | } |
87 | |
88 | region->host_seq = host_seq_bmp; |
89 | region->host_ack = host_ack_bmp; |
90 | region->bmp_bytes = bytes; |
91 | |
92 | return 0; |
93 | } |
94 | |
95 | static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) |
96 | { |
97 | if (!dirty->regions) |
98 | return; |
99 | |
100 | for (int i = 0; i < dirty->num_regions; i++) { |
101 | struct pds_vfio_region *region = &dirty->regions[i]; |
102 | |
103 | vfree(addr: region->host_seq); |
104 | vfree(addr: region->host_ack); |
105 | region->host_seq = NULL; |
106 | region->host_ack = NULL; |
107 | region->bmp_bytes = 0; |
108 | } |
109 | } |
110 | |
111 | static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, |
112 | struct pds_vfio_region *region) |
113 | { |
114 | struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; |
115 | struct device *pdsc_dev = &pci_physfn(dev: pdev)->dev; |
116 | |
117 | dma_unmap_single(pdsc_dev, region->sgl_addr, |
118 | region->num_sge * sizeof(struct pds_lm_sg_elem), |
119 | DMA_BIDIRECTIONAL); |
120 | kfree(objp: region->sgl); |
121 | |
122 | region->num_sge = 0; |
123 | region->sgl = NULL; |
124 | region->sgl_addr = 0; |
125 | } |
126 | |
127 | static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) |
128 | { |
129 | struct pds_vfio_dirty *dirty = &pds_vfio->dirty; |
130 | |
131 | if (!dirty->regions) |
132 | return; |
133 | |
134 | for (int i = 0; i < dirty->num_regions; i++) { |
135 | struct pds_vfio_region *region = &dirty->regions[i]; |
136 | |
137 | if (region->sgl) |
138 | __pds_vfio_dirty_free_sgl(pds_vfio, region); |
139 | } |
140 | } |
141 | |
142 | static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, |
143 | struct pds_vfio_region *region, |
144 | u32 page_count) |
145 | { |
146 | struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; |
147 | struct device *pdsc_dev = &pci_physfn(dev: pdev)->dev; |
148 | struct pds_lm_sg_elem *sgl; |
149 | dma_addr_t sgl_addr; |
150 | size_t sgl_size; |
151 | u32 max_sge; |
152 | |
153 | max_sge = DIV_ROUND_UP(page_count, PAGE_SIZE * 8); |
154 | sgl_size = max_sge * sizeof(struct pds_lm_sg_elem); |
155 | |
156 | sgl = kzalloc(size: sgl_size, GFP_KERNEL); |
157 | if (!sgl) |
158 | return -ENOMEM; |
159 | |
160 | sgl_addr = dma_map_single(pdsc_dev, sgl, sgl_size, DMA_BIDIRECTIONAL); |
161 | if (dma_mapping_error(dev: pdsc_dev, dma_addr: sgl_addr)) { |
162 | kfree(objp: sgl); |
163 | return -EIO; |
164 | } |
165 | |
166 | region->sgl = sgl; |
167 | region->num_sge = max_sge; |
168 | region->sgl_addr = sgl_addr; |
169 | |
170 | return 0; |
171 | } |
172 | |
173 | static void pds_vfio_dirty_free_regions(struct pds_vfio_dirty *dirty) |
174 | { |
175 | vfree(addr: dirty->regions); |
176 | dirty->regions = NULL; |
177 | dirty->num_regions = 0; |
178 | } |
179 | |
180 | static int pds_vfio_dirty_alloc_regions(struct pds_vfio_pci_device *pds_vfio, |
181 | struct pds_lm_dirty_region_info *region_info, |
182 | u64 region_page_size, u8 num_regions) |
183 | { |
184 | struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; |
185 | struct pds_vfio_dirty *dirty = &pds_vfio->dirty; |
186 | u32 dev_bmp_offset_byte = 0; |
187 | int err; |
188 | |
189 | dirty->regions = vcalloc(n: num_regions, size: sizeof(struct pds_vfio_region)); |
190 | if (!dirty->regions) |
191 | return -ENOMEM; |
192 | dirty->num_regions = num_regions; |
193 | |
194 | for (int i = 0; i < num_regions; i++) { |
195 | struct pds_lm_dirty_region_info *ri = ®ion_info[i]; |
196 | struct pds_vfio_region *region = &dirty->regions[i]; |
197 | u64 region_size, region_start; |
198 | u32 page_count; |
199 | |
200 | /* page_count might be adjusted by the device */ |
201 | page_count = le32_to_cpu(ri->page_count); |
202 | region_start = le64_to_cpu(ri->dma_base); |
203 | region_size = page_count * region_page_size; |
204 | |
205 | err = pds_vfio_dirty_alloc_bitmaps(region, |
206 | bytes: page_count / BITS_PER_BYTE); |
207 | if (err) { |
208 | dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n" , |
209 | ERR_PTR(err)); |
210 | goto out_free_regions; |
211 | } |
212 | |
213 | err = pds_vfio_dirty_alloc_sgl(pds_vfio, region, page_count); |
214 | if (err) { |
215 | dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n" , |
216 | ERR_PTR(err)); |
217 | goto out_free_regions; |
218 | } |
219 | |
220 | region->size = region_size; |
221 | region->start = region_start; |
222 | region->page_size = region_page_size; |
223 | region->dev_bmp_offset_start_byte = dev_bmp_offset_byte; |
224 | |
225 | dev_bmp_offset_byte += page_count / BITS_PER_BYTE; |
226 | if (dev_bmp_offset_byte % BITS_PER_BYTE) { |
227 | dev_err(&pdev->dev, "Device bitmap offset is mis-aligned\n" ); |
228 | err = -EINVAL; |
229 | goto out_free_regions; |
230 | } |
231 | } |
232 | |
233 | return 0; |
234 | |
235 | out_free_regions: |
236 | pds_vfio_dirty_free_bitmaps(dirty); |
237 | pds_vfio_dirty_free_sgl(pds_vfio); |
238 | pds_vfio_dirty_free_regions(dirty); |
239 | |
240 | return err; |
241 | } |
242 | |
243 | static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, |
244 | struct rb_root_cached *ranges, u32 nnodes, |
245 | u64 *page_size) |
246 | { |
247 | struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; |
248 | struct device *pdsc_dev = &pci_physfn(dev: pdev)->dev; |
249 | struct pds_lm_dirty_region_info *region_info; |
250 | struct interval_tree_node *node = NULL; |
251 | u64 region_page_size = *page_size; |
252 | u8 max_regions = 0, num_regions; |
253 | dma_addr_t regions_dma = 0; |
254 | u32 num_ranges = nnodes; |
255 | int err; |
256 | u16 len; |
257 | |
258 | dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n" , |
259 | pds_vfio->vf_id); |
260 | |
261 | if (pds_vfio_dirty_is_enabled(pds_vfio)) |
262 | return -EINVAL; |
263 | |
264 | /* find if dirty tracking is disabled, i.e. num_regions == 0 */ |
265 | err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma: 0, max_regions: &max_regions, |
266 | num_regions: &num_regions); |
267 | if (err < 0) { |
268 | dev_err(&pdev->dev, "Failed to get dirty status, err %pe\n" , |
269 | ERR_PTR(err)); |
270 | return err; |
271 | } else if (num_regions) { |
272 | dev_err(&pdev->dev, |
273 | "Dirty tracking already enabled for %d regions\n" , |
274 | num_regions); |
275 | return -EEXIST; |
276 | } else if (!max_regions) { |
277 | dev_err(&pdev->dev, |
278 | "Device doesn't support dirty tracking, max_regions %d\n" , |
279 | max_regions); |
280 | return -EOPNOTSUPP; |
281 | } |
282 | |
283 | if (num_ranges > max_regions) { |
284 | vfio_combine_iova_ranges(root: ranges, cur_nodes: nnodes, req_nodes: max_regions); |
285 | num_ranges = max_regions; |
286 | } |
287 | |
288 | region_info = kcalloc(n: num_ranges, size: sizeof(*region_info), GFP_KERNEL); |
289 | if (!region_info) |
290 | return -ENOMEM; |
291 | len = num_ranges * sizeof(*region_info); |
292 | |
293 | node = interval_tree_iter_first(root: ranges, start: 0, ULONG_MAX); |
294 | if (!node) |
295 | return -EINVAL; |
296 | for (int i = 0; i < num_ranges; i++) { |
297 | struct pds_lm_dirty_region_info *ri = ®ion_info[i]; |
298 | u64 region_size = node->last - node->start + 1; |
299 | u64 region_start = node->start; |
300 | u32 page_count; |
301 | |
302 | page_count = DIV_ROUND_UP(region_size, region_page_size); |
303 | |
304 | ri->dma_base = cpu_to_le64(region_start); |
305 | ri->page_count = cpu_to_le32(page_count); |
306 | ri->page_size_log2 = ilog2(region_page_size); |
307 | |
308 | dev_dbg(&pdev->dev, |
309 | "region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n" , |
310 | i, region_start, node->last, region_size, page_count, |
311 | region_page_size); |
312 | |
313 | node = interval_tree_iter_next(node, start: 0, ULONG_MAX); |
314 | } |
315 | |
316 | regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, |
317 | DMA_BIDIRECTIONAL); |
318 | if (dma_mapping_error(dev: pdsc_dev, dma_addr: regions_dma)) { |
319 | err = -ENOMEM; |
320 | goto out_free_region_info; |
321 | } |
322 | |
323 | err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, num_regions: num_ranges); |
324 | dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); |
325 | if (err) |
326 | goto out_free_region_info; |
327 | |
328 | err = pds_vfio_dirty_alloc_regions(pds_vfio, region_info, |
329 | region_page_size, num_regions: num_ranges); |
330 | if (err) { |
331 | dev_err(&pdev->dev, |
332 | "Failed to allocate %d regions for tracking dirty regions: %pe\n" , |
333 | num_regions, ERR_PTR(err)); |
334 | goto out_dirty_disable; |
335 | } |
336 | |
337 | pds_vfio_dirty_set_enabled(pds_vfio); |
338 | |
339 | pds_vfio_print_guest_region_info(pds_vfio, max_regions); |
340 | |
341 | kfree(objp: region_info); |
342 | |
343 | return 0; |
344 | |
345 | out_dirty_disable: |
346 | pds_vfio_dirty_disable_cmd(pds_vfio); |
347 | out_free_region_info: |
348 | kfree(objp: region_info); |
349 | return err; |
350 | } |
351 | |
352 | void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) |
353 | { |
354 | if (pds_vfio_dirty_is_enabled(pds_vfio)) { |
355 | pds_vfio_dirty_set_disabled(pds_vfio); |
356 | if (send_cmd) |
357 | pds_vfio_dirty_disable_cmd(pds_vfio); |
358 | pds_vfio_dirty_free_sgl(pds_vfio); |
359 | pds_vfio_dirty_free_bitmaps(dirty: &pds_vfio->dirty); |
360 | pds_vfio_dirty_free_regions(dirty: &pds_vfio->dirty); |
361 | } |
362 | |
363 | if (send_cmd) |
364 | pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, vf_status: PDS_LM_STA_NONE); |
365 | } |
366 | |
367 | static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, |
368 | struct pds_vfio_region *region, |
369 | unsigned long *seq_ack_bmp, u32 offset, |
370 | u32 bmp_bytes, bool read_seq) |
371 | { |
372 | const char *bmp_type_str = read_seq ? "read_seq" : "write_ack" ; |
373 | u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; |
374 | struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; |
375 | struct device *pdsc_dev = &pci_physfn(dev: pdev)->dev; |
376 | unsigned long long npages; |
377 | struct sg_table sg_table; |
378 | struct scatterlist *sg; |
379 | struct page **pages; |
380 | u32 page_offset; |
381 | const void *bmp; |
382 | size_t size; |
383 | u16 num_sge; |
384 | int err; |
385 | int i; |
386 | |
387 | bmp = (void *)((u64)seq_ack_bmp + offset); |
388 | page_offset = offset_in_page(bmp); |
389 | bmp -= page_offset; |
390 | |
391 | /* |
392 | * Start and end of bitmap section to seq/ack might not be page |
393 | * aligned, so use the page_offset to account for that so there |
394 | * will be enough pages to represent the bmp_bytes |
395 | */ |
396 | npages = DIV_ROUND_UP_ULL(bmp_bytes + page_offset, PAGE_SIZE); |
397 | pages = kmalloc_array(n: npages, size: sizeof(*pages), GFP_KERNEL); |
398 | if (!pages) |
399 | return -ENOMEM; |
400 | |
401 | for (unsigned long long i = 0; i < npages; i++) { |
402 | struct page *page = vmalloc_to_page(addr: bmp); |
403 | |
404 | if (!page) { |
405 | err = -EFAULT; |
406 | goto out_free_pages; |
407 | } |
408 | |
409 | pages[i] = page; |
410 | bmp += PAGE_SIZE; |
411 | } |
412 | |
413 | err = sg_alloc_table_from_pages(sgt: &sg_table, pages, n_pages: npages, offset: page_offset, |
414 | size: bmp_bytes, GFP_KERNEL); |
415 | if (err) |
416 | goto out_free_pages; |
417 | |
418 | err = dma_map_sgtable(dev: pdsc_dev, sgt: &sg_table, dir: dma_dir, attrs: 0); |
419 | if (err) |
420 | goto out_free_sg_table; |
421 | |
422 | for_each_sgtable_dma_sg(&sg_table, sg, i) { |
423 | struct pds_lm_sg_elem *sg_elem = ®ion->sgl[i]; |
424 | |
425 | sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); |
426 | sg_elem->len = cpu_to_le32(sg_dma_len(sg)); |
427 | } |
428 | |
429 | num_sge = sg_table.nents; |
430 | size = num_sge * sizeof(struct pds_lm_sg_elem); |
431 | offset += region->dev_bmp_offset_start_byte; |
432 | dma_sync_single_for_device(dev: pdsc_dev, addr: region->sgl_addr, size, dir: dma_dir); |
433 | err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, sgl_dma: region->sgl_addr, num_sge, |
434 | offset, total_len: bmp_bytes, read_seq); |
435 | if (err) |
436 | dev_err(&pdev->dev, |
437 | "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n" , |
438 | bmp_type_str, offset, bmp_bytes, |
439 | num_sge, region->sgl_addr, ERR_PTR(err)); |
440 | dma_sync_single_for_cpu(dev: pdsc_dev, addr: region->sgl_addr, size, dir: dma_dir); |
441 | |
442 | dma_unmap_sgtable(dev: pdsc_dev, sgt: &sg_table, dir: dma_dir, attrs: 0); |
443 | out_free_sg_table: |
444 | sg_free_table(&sg_table); |
445 | out_free_pages: |
446 | kfree(objp: pages); |
447 | |
448 | return err; |
449 | } |
450 | |
451 | static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, |
452 | struct pds_vfio_region *region, |
453 | u32 offset, u32 len) |
454 | { |
455 | |
456 | return pds_vfio_dirty_seq_ack(pds_vfio, region, seq_ack_bmp: region->host_ack, |
457 | offset, bmp_bytes: len, WRITE_ACK); |
458 | } |
459 | |
460 | static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, |
461 | struct pds_vfio_region *region, |
462 | u32 offset, u32 len) |
463 | { |
464 | return pds_vfio_dirty_seq_ack(pds_vfio, region, seq_ack_bmp: region->host_seq, |
465 | offset, bmp_bytes: len, READ_SEQ); |
466 | } |
467 | |
468 | static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, |
469 | struct pds_vfio_region *region, |
470 | struct iova_bitmap *dirty_bitmap, |
471 | u32 bmp_offset, u32 len_bytes) |
472 | { |
473 | u64 page_size = region->page_size; |
474 | u64 region_start = region->start; |
475 | u32 bmp_offset_bit; |
476 | __le64 *seq, *ack; |
477 | int dword_count; |
478 | |
479 | dword_count = len_bytes / sizeof(u64); |
480 | seq = (__le64 *)((u64)region->host_seq + bmp_offset); |
481 | ack = (__le64 *)((u64)region->host_ack + bmp_offset); |
482 | bmp_offset_bit = bmp_offset * 8; |
483 | |
484 | for (int i = 0; i < dword_count; i++) { |
485 | u64 xor = le64_to_cpu(seq[i]) ^ le64_to_cpu(ack[i]); |
486 | |
487 | /* prepare for next write_ack call */ |
488 | ack[i] = seq[i]; |
489 | |
490 | for (u8 bit_i = 0; bit_i < BITS_PER_TYPE(u64); ++bit_i) { |
491 | if (xor & BIT(bit_i)) { |
492 | u64 abs_bit_i = bmp_offset_bit + |
493 | i * BITS_PER_TYPE(u64) + bit_i; |
494 | u64 addr = abs_bit_i * page_size + region_start; |
495 | |
496 | iova_bitmap_set(bitmap: dirty_bitmap, iova: addr, length: page_size); |
497 | } |
498 | } |
499 | } |
500 | |
501 | return 0; |
502 | } |
503 | |
504 | static struct pds_vfio_region * |
505 | pds_vfio_get_region(struct pds_vfio_pci_device *pds_vfio, unsigned long iova) |
506 | { |
507 | struct pds_vfio_dirty *dirty = &pds_vfio->dirty; |
508 | |
509 | for (int i = 0; i < dirty->num_regions; i++) { |
510 | struct pds_vfio_region *region = &dirty->regions[i]; |
511 | |
512 | if (iova >= region->start && |
513 | iova < (region->start + region->size)) |
514 | return region; |
515 | } |
516 | |
517 | return NULL; |
518 | } |
519 | |
520 | static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, |
521 | struct iova_bitmap *dirty_bitmap, |
522 | unsigned long iova, unsigned long length) |
523 | { |
524 | struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; |
525 | struct pds_vfio_region *region; |
526 | u64 bmp_offset, bmp_bytes; |
527 | u64 bitmap_size, pages; |
528 | int err; |
529 | |
530 | dev_dbg(dev, "vf%u: Get dirty page bitmap\n" , pds_vfio->vf_id); |
531 | |
532 | if (!pds_vfio_dirty_is_enabled(pds_vfio)) { |
533 | dev_err(dev, "vf%u: Sync failed, dirty tracking is disabled\n" , |
534 | pds_vfio->vf_id); |
535 | return -EINVAL; |
536 | } |
537 | |
538 | region = pds_vfio_get_region(pds_vfio, iova); |
539 | if (!region) { |
540 | dev_err(dev, "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n" , |
541 | pds_vfio->vf_id, iova, length); |
542 | return -EINVAL; |
543 | } |
544 | |
545 | pages = DIV_ROUND_UP(length, region->page_size); |
546 | bitmap_size = |
547 | round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; |
548 | |
549 | dev_dbg(dev, |
550 | "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n" , |
551 | pds_vfio->vf_id, iova, length, region->page_size, |
552 | pages, bitmap_size); |
553 | |
554 | if (!length || ((iova - region->start + length) > region->size)) { |
555 | dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n" , |
556 | iova, length); |
557 | return -EINVAL; |
558 | } |
559 | |
560 | /* bitmap is modified in 64 bit chunks */ |
561 | bmp_bytes = ALIGN(DIV_ROUND_UP(length / region->page_size, |
562 | sizeof(u64)), sizeof(u64)); |
563 | if (bmp_bytes != bitmap_size) { |
564 | dev_err(dev, |
565 | "Calculated bitmap bytes %llu not equal to bitmap size %llu\n" , |
566 | bmp_bytes, bitmap_size); |
567 | return -EINVAL; |
568 | } |
569 | |
570 | if (bmp_bytes > region->bmp_bytes) { |
571 | dev_err(dev, |
572 | "Calculated bitmap bytes %llu larger than region's cached bmp_bytes %llu\n" , |
573 | bmp_bytes, region->bmp_bytes); |
574 | return -EINVAL; |
575 | } |
576 | |
577 | bmp_offset = DIV_ROUND_UP((iova - region->start) / |
578 | region->page_size, sizeof(u64)); |
579 | |
580 | dev_dbg(dev, |
581 | "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n" , |
582 | iova, length, bmp_offset, bmp_bytes); |
583 | |
584 | err = pds_vfio_dirty_read_seq(pds_vfio, region, offset: bmp_offset, len: bmp_bytes); |
585 | if (err) |
586 | return err; |
587 | |
588 | err = pds_vfio_dirty_process_bitmaps(pds_vfio, region, dirty_bitmap, |
589 | bmp_offset, len_bytes: bmp_bytes); |
590 | if (err) |
591 | return err; |
592 | |
593 | err = pds_vfio_dirty_write_ack(pds_vfio, region, offset: bmp_offset, len: bmp_bytes); |
594 | if (err) |
595 | return err; |
596 | |
597 | return 0; |
598 | } |
599 | |
600 | int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova, |
601 | unsigned long length, struct iova_bitmap *dirty) |
602 | { |
603 | struct pds_vfio_pci_device *pds_vfio = |
604 | container_of(vdev, struct pds_vfio_pci_device, |
605 | vfio_coredev.vdev); |
606 | int err; |
607 | |
608 | mutex_lock(&pds_vfio->state_mutex); |
609 | err = pds_vfio_dirty_sync(pds_vfio, dirty_bitmap: dirty, iova, length); |
610 | mutex_unlock(lock: &pds_vfio->state_mutex); |
611 | |
612 | return err; |
613 | } |
614 | |
615 | int pds_vfio_dma_logging_start(struct vfio_device *vdev, |
616 | struct rb_root_cached *ranges, u32 nnodes, |
617 | u64 *page_size) |
618 | { |
619 | struct pds_vfio_pci_device *pds_vfio = |
620 | container_of(vdev, struct pds_vfio_pci_device, |
621 | vfio_coredev.vdev); |
622 | int err; |
623 | |
624 | mutex_lock(&pds_vfio->state_mutex); |
625 | pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, vf_status: PDS_LM_STA_IN_PROGRESS); |
626 | err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size); |
627 | mutex_unlock(lock: &pds_vfio->state_mutex); |
628 | |
629 | return err; |
630 | } |
631 | |
632 | int pds_vfio_dma_logging_stop(struct vfio_device *vdev) |
633 | { |
634 | struct pds_vfio_pci_device *pds_vfio = |
635 | container_of(vdev, struct pds_vfio_pci_device, |
636 | vfio_coredev.vdev); |
637 | |
638 | mutex_lock(&pds_vfio->state_mutex); |
639 | pds_vfio_dirty_disable(pds_vfio, send_cmd: true); |
640 | mutex_unlock(lock: &pds_vfio->state_mutex); |
641 | |
642 | return 0; |
643 | } |
644 | |