1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2023 Advanced Micro Devices, Inc. */ |
3 | |
4 | #include <linux/anon_inodes.h> |
5 | #include <linux/file.h> |
6 | #include <linux/fs.h> |
7 | #include <linux/highmem.h> |
8 | #include <linux/vfio.h> |
9 | #include <linux/vfio_pci_core.h> |
10 | |
11 | #include "vfio_dev.h" |
12 | #include "cmds.h" |
13 | |
14 | static struct pds_vfio_lm_file * |
15 | pds_vfio_get_lm_file(const struct file_operations *fops, int flags, u64 size) |
16 | { |
17 | struct pds_vfio_lm_file *lm_file = NULL; |
18 | unsigned long long npages; |
19 | struct page **pages; |
20 | void *page_mem; |
21 | const void *p; |
22 | |
23 | if (!size) |
24 | return NULL; |
25 | |
26 | /* Alloc file structure */ |
27 | lm_file = kzalloc(size: sizeof(*lm_file), GFP_KERNEL); |
28 | if (!lm_file) |
29 | return NULL; |
30 | |
31 | /* Create file */ |
32 | lm_file->filep = |
33 | anon_inode_getfile(name: "pds_vfio_lm" , fops, priv: lm_file, flags); |
34 | if (IS_ERR(ptr: lm_file->filep)) |
35 | goto out_free_file; |
36 | |
37 | stream_open(inode: lm_file->filep->f_inode, filp: lm_file->filep); |
38 | mutex_init(&lm_file->lock); |
39 | |
40 | /* prevent file from being released before we are done with it */ |
41 | get_file(f: lm_file->filep); |
42 | |
43 | /* Allocate memory for file pages */ |
44 | npages = DIV_ROUND_UP_ULL(size, PAGE_SIZE); |
45 | pages = kmalloc_array(n: npages, size: sizeof(*pages), GFP_KERNEL); |
46 | if (!pages) |
47 | goto out_put_file; |
48 | |
49 | page_mem = kvzalloc(ALIGN(size, PAGE_SIZE), GFP_KERNEL); |
50 | if (!page_mem) |
51 | goto out_free_pages_array; |
52 | |
53 | p = page_mem - offset_in_page(page_mem); |
54 | for (unsigned long long i = 0; i < npages; i++) { |
55 | if (is_vmalloc_addr(x: p)) |
56 | pages[i] = vmalloc_to_page(addr: p); |
57 | else |
58 | pages[i] = kmap_to_page(addr: (void *)p); |
59 | if (!pages[i]) |
60 | goto out_free_page_mem; |
61 | |
62 | p += PAGE_SIZE; |
63 | } |
64 | |
65 | /* Create scatterlist of file pages to use for DMA mapping later */ |
66 | if (sg_alloc_table_from_pages(sgt: &lm_file->sg_table, pages, n_pages: npages, offset: 0, |
67 | size, GFP_KERNEL)) |
68 | goto out_free_page_mem; |
69 | |
70 | lm_file->size = size; |
71 | lm_file->pages = pages; |
72 | lm_file->npages = npages; |
73 | lm_file->page_mem = page_mem; |
74 | lm_file->alloc_size = npages * PAGE_SIZE; |
75 | |
76 | return lm_file; |
77 | |
78 | out_free_page_mem: |
79 | kvfree(addr: page_mem); |
80 | out_free_pages_array: |
81 | kfree(objp: pages); |
82 | out_put_file: |
83 | fput(lm_file->filep); |
84 | mutex_destroy(lock: &lm_file->lock); |
85 | out_free_file: |
86 | kfree(objp: lm_file); |
87 | |
88 | return NULL; |
89 | } |
90 | |
91 | static void pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file) |
92 | { |
93 | mutex_lock(&lm_file->lock); |
94 | |
95 | lm_file->disabled = true; |
96 | lm_file->size = 0; |
97 | lm_file->alloc_size = 0; |
98 | lm_file->filep->f_pos = 0; |
99 | |
100 | /* Free scatter list of file pages */ |
101 | sg_free_table(&lm_file->sg_table); |
102 | |
103 | kvfree(addr: lm_file->page_mem); |
104 | lm_file->page_mem = NULL; |
105 | kfree(objp: lm_file->pages); |
106 | lm_file->pages = NULL; |
107 | |
108 | mutex_unlock(lock: &lm_file->lock); |
109 | |
110 | /* allow file to be released since we are done with it */ |
111 | fput(lm_file->filep); |
112 | } |
113 | |
114 | void pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio) |
115 | { |
116 | if (!pds_vfio->save_file) |
117 | return; |
118 | |
119 | pds_vfio_put_lm_file(lm_file: pds_vfio->save_file); |
120 | pds_vfio->save_file = NULL; |
121 | } |
122 | |
123 | void pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio) |
124 | { |
125 | if (!pds_vfio->restore_file) |
126 | return; |
127 | |
128 | pds_vfio_put_lm_file(lm_file: pds_vfio->restore_file); |
129 | pds_vfio->restore_file = NULL; |
130 | } |
131 | |
132 | static struct page *pds_vfio_get_file_page(struct pds_vfio_lm_file *lm_file, |
133 | unsigned long offset) |
134 | { |
135 | unsigned long cur_offset = 0; |
136 | struct scatterlist *sg; |
137 | unsigned int i; |
138 | |
139 | /* All accesses are sequential */ |
140 | if (offset < lm_file->last_offset || !lm_file->last_offset_sg) { |
141 | lm_file->last_offset = 0; |
142 | lm_file->last_offset_sg = lm_file->sg_table.sgl; |
143 | lm_file->sg_last_entry = 0; |
144 | } |
145 | |
146 | cur_offset = lm_file->last_offset; |
147 | |
148 | for_each_sg(lm_file->last_offset_sg, sg, |
149 | lm_file->sg_table.orig_nents - lm_file->sg_last_entry, i) { |
150 | if (offset < sg->length + cur_offset) { |
151 | lm_file->last_offset_sg = sg; |
152 | lm_file->sg_last_entry += i; |
153 | lm_file->last_offset = cur_offset; |
154 | return nth_page(sg_page(sg), |
155 | (offset - cur_offset) / PAGE_SIZE); |
156 | } |
157 | cur_offset += sg->length; |
158 | } |
159 | |
160 | return NULL; |
161 | } |
162 | |
163 | static int pds_vfio_release_file(struct inode *inode, struct file *filp) |
164 | { |
165 | struct pds_vfio_lm_file *lm_file = filp->private_data; |
166 | |
167 | mutex_lock(&lm_file->lock); |
168 | lm_file->filep->f_pos = 0; |
169 | lm_file->size = 0; |
170 | mutex_unlock(lock: &lm_file->lock); |
171 | mutex_destroy(lock: &lm_file->lock); |
172 | kfree(objp: lm_file); |
173 | |
174 | return 0; |
175 | } |
176 | |
177 | static ssize_t pds_vfio_save_read(struct file *filp, char __user *buf, |
178 | size_t len, loff_t *pos) |
179 | { |
180 | struct pds_vfio_lm_file *lm_file = filp->private_data; |
181 | ssize_t done = 0; |
182 | |
183 | if (pos) |
184 | return -ESPIPE; |
185 | pos = &filp->f_pos; |
186 | |
187 | mutex_lock(&lm_file->lock); |
188 | |
189 | if (lm_file->disabled) { |
190 | done = -ENODEV; |
191 | goto out_unlock; |
192 | } |
193 | |
194 | if (*pos > lm_file->size) { |
195 | done = -EINVAL; |
196 | goto out_unlock; |
197 | } |
198 | |
199 | len = min_t(size_t, lm_file->size - *pos, len); |
200 | while (len) { |
201 | size_t page_offset; |
202 | struct page *page; |
203 | size_t page_len; |
204 | u8 *from_buff; |
205 | int err; |
206 | |
207 | page_offset = (*pos) % PAGE_SIZE; |
208 | page = pds_vfio_get_file_page(lm_file, offset: *pos - page_offset); |
209 | if (!page) { |
210 | if (done == 0) |
211 | done = -EINVAL; |
212 | goto out_unlock; |
213 | } |
214 | |
215 | page_len = min_t(size_t, len, PAGE_SIZE - page_offset); |
216 | from_buff = kmap_local_page(page); |
217 | err = copy_to_user(to: buf, from: from_buff + page_offset, n: page_len); |
218 | kunmap_local(from_buff); |
219 | if (err) { |
220 | done = -EFAULT; |
221 | goto out_unlock; |
222 | } |
223 | *pos += page_len; |
224 | len -= page_len; |
225 | done += page_len; |
226 | buf += page_len; |
227 | } |
228 | |
229 | out_unlock: |
230 | mutex_unlock(lock: &lm_file->lock); |
231 | return done; |
232 | } |
233 | |
234 | static const struct file_operations pds_vfio_save_fops = { |
235 | .owner = THIS_MODULE, |
236 | .read = pds_vfio_save_read, |
237 | .release = pds_vfio_release_file, |
238 | .llseek = no_llseek, |
239 | }; |
240 | |
241 | static int pds_vfio_get_save_file(struct pds_vfio_pci_device *pds_vfio) |
242 | { |
243 | struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; |
244 | struct pds_vfio_lm_file *lm_file; |
245 | u64 size; |
246 | int err; |
247 | |
248 | /* Get live migration state size in this state */ |
249 | err = pds_vfio_get_lm_state_size_cmd(pds_vfio, size: &size); |
250 | if (err) { |
251 | dev_err(dev, "failed to get save status: %pe\n" , ERR_PTR(err)); |
252 | return err; |
253 | } |
254 | |
255 | dev_dbg(dev, "save status, size = %lld\n" , size); |
256 | |
257 | if (!size) { |
258 | dev_err(dev, "invalid state size\n" ); |
259 | return -EIO; |
260 | } |
261 | |
262 | lm_file = pds_vfio_get_lm_file(fops: &pds_vfio_save_fops, O_RDONLY, size); |
263 | if (!lm_file) { |
264 | dev_err(dev, "failed to create save file\n" ); |
265 | return -ENOENT; |
266 | } |
267 | |
268 | dev_dbg(dev, "size = %lld, alloc_size = %lld, npages = %lld\n" , |
269 | lm_file->size, lm_file->alloc_size, lm_file->npages); |
270 | |
271 | pds_vfio->save_file = lm_file; |
272 | |
273 | return 0; |
274 | } |
275 | |
276 | static ssize_t pds_vfio_restore_write(struct file *filp, const char __user *buf, |
277 | size_t len, loff_t *pos) |
278 | { |
279 | struct pds_vfio_lm_file *lm_file = filp->private_data; |
280 | loff_t requested_length; |
281 | ssize_t done = 0; |
282 | |
283 | if (pos) |
284 | return -ESPIPE; |
285 | |
286 | pos = &filp->f_pos; |
287 | |
288 | if (*pos < 0 || |
289 | check_add_overflow((loff_t)len, *pos, &requested_length)) |
290 | return -EINVAL; |
291 | |
292 | mutex_lock(&lm_file->lock); |
293 | |
294 | if (lm_file->disabled) { |
295 | done = -ENODEV; |
296 | goto out_unlock; |
297 | } |
298 | |
299 | while (len) { |
300 | size_t page_offset; |
301 | struct page *page; |
302 | size_t page_len; |
303 | u8 *to_buff; |
304 | int err; |
305 | |
306 | page_offset = (*pos) % PAGE_SIZE; |
307 | page = pds_vfio_get_file_page(lm_file, offset: *pos - page_offset); |
308 | if (!page) { |
309 | if (done == 0) |
310 | done = -EINVAL; |
311 | goto out_unlock; |
312 | } |
313 | |
314 | page_len = min_t(size_t, len, PAGE_SIZE - page_offset); |
315 | to_buff = kmap_local_page(page); |
316 | err = copy_from_user(to: to_buff + page_offset, from: buf, n: page_len); |
317 | kunmap_local(to_buff); |
318 | if (err) { |
319 | done = -EFAULT; |
320 | goto out_unlock; |
321 | } |
322 | *pos += page_len; |
323 | len -= page_len; |
324 | done += page_len; |
325 | buf += page_len; |
326 | lm_file->size += page_len; |
327 | } |
328 | out_unlock: |
329 | mutex_unlock(lock: &lm_file->lock); |
330 | return done; |
331 | } |
332 | |
333 | static const struct file_operations pds_vfio_restore_fops = { |
334 | .owner = THIS_MODULE, |
335 | .write = pds_vfio_restore_write, |
336 | .release = pds_vfio_release_file, |
337 | .llseek = no_llseek, |
338 | }; |
339 | |
340 | static int pds_vfio_get_restore_file(struct pds_vfio_pci_device *pds_vfio) |
341 | { |
342 | struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; |
343 | struct pds_vfio_lm_file *lm_file; |
344 | u64 size; |
345 | |
346 | size = sizeof(union pds_lm_dev_state); |
347 | dev_dbg(dev, "restore status, size = %lld\n" , size); |
348 | |
349 | if (!size) { |
350 | dev_err(dev, "invalid state size" ); |
351 | return -EIO; |
352 | } |
353 | |
354 | lm_file = pds_vfio_get_lm_file(fops: &pds_vfio_restore_fops, O_WRONLY, size); |
355 | if (!lm_file) { |
356 | dev_err(dev, "failed to create restore file" ); |
357 | return -ENOENT; |
358 | } |
359 | pds_vfio->restore_file = lm_file; |
360 | |
361 | return 0; |
362 | } |
363 | |
364 | struct file * |
365 | pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio, |
366 | enum vfio_device_mig_state next) |
367 | { |
368 | enum vfio_device_mig_state cur = pds_vfio->state; |
369 | int err; |
370 | |
371 | if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_STOP_COPY) { |
372 | err = pds_vfio_get_save_file(pds_vfio); |
373 | if (err) |
374 | return ERR_PTR(error: err); |
375 | |
376 | err = pds_vfio_get_lm_state_cmd(pds_vfio); |
377 | if (err) { |
378 | pds_vfio_put_save_file(pds_vfio); |
379 | return ERR_PTR(error: err); |
380 | } |
381 | |
382 | return pds_vfio->save_file->filep; |
383 | } |
384 | |
385 | if (cur == VFIO_DEVICE_STATE_STOP_COPY && next == VFIO_DEVICE_STATE_STOP) { |
386 | pds_vfio_put_save_file(pds_vfio); |
387 | pds_vfio_dirty_disable(pds_vfio, send_cmd: true); |
388 | return NULL; |
389 | } |
390 | |
391 | if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RESUMING) { |
392 | err = pds_vfio_get_restore_file(pds_vfio); |
393 | if (err) |
394 | return ERR_PTR(error: err); |
395 | |
396 | return pds_vfio->restore_file->filep; |
397 | } |
398 | |
399 | if (cur == VFIO_DEVICE_STATE_RESUMING && next == VFIO_DEVICE_STATE_STOP) { |
400 | err = pds_vfio_set_lm_state_cmd(pds_vfio); |
401 | if (err) |
402 | return ERR_PTR(error: err); |
403 | |
404 | pds_vfio_put_restore_file(pds_vfio); |
405 | return NULL; |
406 | } |
407 | |
408 | if (cur == VFIO_DEVICE_STATE_RUNNING && next == VFIO_DEVICE_STATE_RUNNING_P2P) { |
409 | pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, |
410 | vf_status: PDS_LM_STA_IN_PROGRESS); |
411 | err = pds_vfio_suspend_device_cmd(pds_vfio, |
412 | type: PDS_LM_SUSPEND_RESUME_TYPE_P2P); |
413 | if (err) |
414 | return ERR_PTR(error: err); |
415 | |
416 | return NULL; |
417 | } |
418 | |
419 | if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_RUNNING) { |
420 | err = pds_vfio_resume_device_cmd(pds_vfio, |
421 | type: PDS_LM_SUSPEND_RESUME_TYPE_FULL); |
422 | if (err) |
423 | return ERR_PTR(error: err); |
424 | |
425 | pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, vf_status: PDS_LM_STA_NONE); |
426 | return NULL; |
427 | } |
428 | |
429 | if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RUNNING_P2P) { |
430 | err = pds_vfio_resume_device_cmd(pds_vfio, |
431 | type: PDS_LM_SUSPEND_RESUME_TYPE_P2P); |
432 | if (err) |
433 | return ERR_PTR(error: err); |
434 | |
435 | return NULL; |
436 | } |
437 | |
438 | if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_STOP) { |
439 | err = pds_vfio_suspend_device_cmd(pds_vfio, |
440 | type: PDS_LM_SUSPEND_RESUME_TYPE_FULL); |
441 | if (err) |
442 | return ERR_PTR(error: err); |
443 | return NULL; |
444 | } |
445 | |
446 | return ERR_PTR(error: -EINVAL); |
447 | } |
448 | |