1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* |
3 | * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved |
4 | */ |
5 | |
6 | #include "cmd.h" |
7 | |
8 | enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; |
9 | |
10 | static int mlx5vf_is_migratable(struct mlx5_core_dev *mdev, u16 func_id) |
11 | { |
12 | int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); |
13 | void *query_cap = NULL, *cap; |
14 | int ret; |
15 | |
16 | query_cap = kzalloc(size: query_sz, GFP_KERNEL); |
17 | if (!query_cap) |
18 | return -ENOMEM; |
19 | |
20 | ret = mlx5_vport_get_other_func_cap(dev: mdev, vport: func_id, out: query_cap, |
21 | opmod: MLX5_CAP_GENERAL_2); |
22 | if (ret) |
23 | goto out; |
24 | |
25 | cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); |
26 | if (!MLX5_GET(cmd_hca_cap_2, cap, migratable)) |
27 | ret = -EOPNOTSUPP; |
28 | out: |
29 | kfree(objp: query_cap); |
30 | return ret; |
31 | } |
32 | |
33 | static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, |
34 | u16 *vhca_id); |
35 | static void |
36 | _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); |
37 | |
38 | int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) |
39 | { |
40 | struct mlx5_vf_migration_file *migf = mvdev->saving_migf; |
41 | u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; |
42 | u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; |
43 | int err; |
44 | |
45 | lockdep_assert_held(&mvdev->state_mutex); |
46 | if (mvdev->mdev_detach) |
47 | return -ENOTCONN; |
48 | |
49 | /* |
50 | * In case PRE_COPY is used, saving_migf is exposed while the device is |
51 | * running. Make sure to run only once there is no active save command. |
52 | * Running both in parallel, might end-up with a failure in the save |
53 | * command once it will try to turn on 'tracking' on a suspended device. |
54 | */ |
55 | if (migf) { |
56 | err = wait_for_completion_interruptible(x: &migf->save_comp); |
57 | if (err) |
58 | return err; |
59 | } |
60 | |
61 | MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); |
62 | MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); |
63 | MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); |
64 | |
65 | err = mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); |
66 | if (migf) |
67 | complete(&migf->save_comp); |
68 | |
69 | return err; |
70 | } |
71 | |
72 | int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) |
73 | { |
74 | u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; |
75 | u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; |
76 | |
77 | lockdep_assert_held(&mvdev->state_mutex); |
78 | if (mvdev->mdev_detach) |
79 | return -ENOTCONN; |
80 | |
81 | MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); |
82 | MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id); |
83 | MLX5_SET(resume_vhca_in, in, op_mod, op_mod); |
84 | |
85 | return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out); |
86 | } |
87 | |
88 | int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, |
89 | size_t *state_size, u64 *total_size, |
90 | u8 query_flags) |
91 | { |
92 | u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; |
93 | u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; |
94 | bool inc = query_flags & MLX5VF_QUERY_INC; |
95 | int ret; |
96 | |
97 | lockdep_assert_held(&mvdev->state_mutex); |
98 | if (mvdev->mdev_detach) |
99 | return -ENOTCONN; |
100 | |
101 | /* |
102 | * In case PRE_COPY is used, saving_migf is exposed while device is |
103 | * running. Make sure to run only once there is no active save command. |
104 | * Running both in parallel, might end-up with a failure in the |
105 | * incremental query command on un-tracked vhca. |
106 | */ |
107 | if (inc) { |
108 | ret = wait_for_completion_interruptible(x: &mvdev->saving_migf->save_comp); |
109 | if (ret) |
110 | return ret; |
111 | /* Upon cleanup, ignore previous pre_copy error state */ |
112 | if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR && |
113 | !(query_flags & MLX5VF_QUERY_CLEANUP)) { |
114 | /* |
115 | * In case we had a PRE_COPY error, only query full |
116 | * image for final image |
117 | */ |
118 | if (!(query_flags & MLX5VF_QUERY_FINAL)) { |
119 | *state_size = 0; |
120 | complete(&mvdev->saving_migf->save_comp); |
121 | return 0; |
122 | } |
123 | query_flags &= ~MLX5VF_QUERY_INC; |
124 | } |
125 | /* Block incremental query which is state-dependent */ |
126 | if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) { |
127 | complete(&mvdev->saving_migf->save_comp); |
128 | return -ENODEV; |
129 | } |
130 | } |
131 | |
132 | MLX5_SET(query_vhca_migration_state_in, in, opcode, |
133 | MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); |
134 | MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); |
135 | MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); |
136 | MLX5_SET(query_vhca_migration_state_in, in, incremental, |
137 | query_flags & MLX5VF_QUERY_INC); |
138 | MLX5_SET(query_vhca_migration_state_in, in, chunk, mvdev->chunk_mode); |
139 | |
140 | ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, |
141 | out); |
142 | if (inc) |
143 | complete(&mvdev->saving_migf->save_comp); |
144 | |
145 | if (ret) |
146 | return ret; |
147 | |
148 | *state_size = MLX5_GET(query_vhca_migration_state_out, out, |
149 | required_umem_size); |
150 | if (total_size) |
151 | *total_size = mvdev->chunk_mode ? |
152 | MLX5_GET64(query_vhca_migration_state_out, out, |
153 | remaining_total_size) : *state_size; |
154 | |
155 | return 0; |
156 | } |
157 | |
158 | static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev) |
159 | { |
160 | mvdev->tracker.object_changed = true; |
161 | complete(&mvdev->tracker_comp); |
162 | } |
163 | |
164 | static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) |
165 | { |
166 | /* Mark the tracker under an error and wake it up if it's running */ |
167 | mvdev->tracker.is_err = true; |
168 | complete(&mvdev->tracker_comp); |
169 | } |
170 | |
171 | static int mlx5fv_vf_event(struct notifier_block *nb, |
172 | unsigned long event, void *data) |
173 | { |
174 | struct mlx5vf_pci_core_device *mvdev = |
175 | container_of(nb, struct mlx5vf_pci_core_device, nb); |
176 | |
177 | switch (event) { |
178 | case MLX5_PF_NOTIFY_ENABLE_VF: |
179 | mutex_lock(&mvdev->state_mutex); |
180 | mvdev->mdev_detach = false; |
181 | mlx5vf_state_mutex_unlock(mvdev); |
182 | break; |
183 | case MLX5_PF_NOTIFY_DISABLE_VF: |
184 | mlx5vf_cmd_close_migratable(mvdev); |
185 | mutex_lock(&mvdev->state_mutex); |
186 | mvdev->mdev_detach = true; |
187 | mlx5vf_state_mutex_unlock(mvdev); |
188 | break; |
189 | default: |
190 | break; |
191 | } |
192 | |
193 | return 0; |
194 | } |
195 | |
196 | void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) |
197 | { |
198 | if (!mvdev->migrate_cap) |
199 | return; |
200 | |
201 | /* Must be done outside the lock to let it progress */ |
202 | set_tracker_error(mvdev); |
203 | mutex_lock(&mvdev->state_mutex); |
204 | mlx5vf_disable_fds(mvdev, NULL); |
205 | _mlx5vf_free_page_tracker_resources(mvdev); |
206 | mlx5vf_state_mutex_unlock(mvdev); |
207 | } |
208 | |
209 | void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) |
210 | { |
211 | if (!mvdev->migrate_cap) |
212 | return; |
213 | |
214 | mlx5_sriov_blocking_notifier_unregister(mdev: mvdev->mdev, vf_id: mvdev->vf_id, |
215 | nb: &mvdev->nb); |
216 | destroy_workqueue(wq: mvdev->cb_wq); |
217 | } |
218 | |
219 | void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, |
220 | const struct vfio_migration_ops *mig_ops, |
221 | const struct vfio_log_ops *log_ops) |
222 | { |
223 | struct pci_dev *pdev = mvdev->core_device.pdev; |
224 | int ret; |
225 | |
226 | if (!pdev->is_virtfn) |
227 | return; |
228 | |
229 | mvdev->mdev = mlx5_vf_get_core_dev(pdev); |
230 | if (!mvdev->mdev) |
231 | return; |
232 | |
233 | if (!MLX5_CAP_GEN(mvdev->mdev, migration)) |
234 | goto end; |
235 | |
236 | if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && |
237 | MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) |
238 | goto end; |
239 | |
240 | mvdev->vf_id = pci_iov_vf_id(dev: pdev); |
241 | if (mvdev->vf_id < 0) |
242 | goto end; |
243 | |
244 | ret = mlx5vf_is_migratable(mdev: mvdev->mdev, func_id: mvdev->vf_id + 1); |
245 | if (ret) |
246 | goto end; |
247 | |
248 | if (mlx5vf_cmd_get_vhca_id(mdev: mvdev->mdev, function_id: mvdev->vf_id + 1, |
249 | vhca_id: &mvdev->vhca_id)) |
250 | goto end; |
251 | |
252 | mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq" , 0); |
253 | if (!mvdev->cb_wq) |
254 | goto end; |
255 | |
256 | mutex_init(&mvdev->state_mutex); |
257 | spin_lock_init(&mvdev->reset_lock); |
258 | mvdev->nb.notifier_call = mlx5fv_vf_event; |
259 | ret = mlx5_sriov_blocking_notifier_register(mdev: mvdev->mdev, vf_id: mvdev->vf_id, |
260 | nb: &mvdev->nb); |
261 | if (ret) { |
262 | destroy_workqueue(wq: mvdev->cb_wq); |
263 | goto end; |
264 | } |
265 | |
266 | mvdev->migrate_cap = 1; |
267 | mvdev->core_device.vdev.migration_flags = |
268 | VFIO_MIGRATION_STOP_COPY | |
269 | VFIO_MIGRATION_P2P | |
270 | VFIO_MIGRATION_PRE_COPY; |
271 | |
272 | mvdev->core_device.vdev.mig_ops = mig_ops; |
273 | init_completion(x: &mvdev->tracker_comp); |
274 | if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) |
275 | mvdev->core_device.vdev.log_ops = log_ops; |
276 | |
277 | if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) |
278 | mvdev->chunk_mode = 1; |
279 | |
280 | end: |
281 | mlx5_vf_put_core_dev(mdev: mvdev->mdev); |
282 | } |
283 | |
284 | static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, |
285 | u16 *vhca_id) |
286 | { |
287 | u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; |
288 | int out_size; |
289 | void *out; |
290 | int ret; |
291 | |
292 | out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); |
293 | out = kzalloc(size: out_size, GFP_KERNEL); |
294 | if (!out) |
295 | return -ENOMEM; |
296 | |
297 | MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); |
298 | MLX5_SET(query_hca_cap_in, in, other_function, 1); |
299 | MLX5_SET(query_hca_cap_in, in, function_id, function_id); |
300 | MLX5_SET(query_hca_cap_in, in, op_mod, |
301 | MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | |
302 | HCA_CAP_OPMOD_GET_CUR); |
303 | |
304 | ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); |
305 | if (ret) |
306 | goto err_exec; |
307 | |
308 | *vhca_id = MLX5_GET(query_hca_cap_out, out, |
309 | capability.cmd_hca_cap.vhca_id); |
310 | |
311 | err_exec: |
312 | kfree(objp: out); |
313 | return ret; |
314 | } |
315 | |
316 | static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, |
317 | struct mlx5_vhca_data_buffer *buf, |
318 | struct mlx5_vhca_recv_buf *recv_buf, |
319 | u32 *mkey) |
320 | { |
321 | size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) : |
322 | recv_buf->npages; |
323 | int err = 0, inlen; |
324 | __be64 *mtt; |
325 | void *mkc; |
326 | u32 *in; |
327 | |
328 | inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + |
329 | sizeof(*mtt) * round_up(npages, 2); |
330 | |
331 | in = kvzalloc(size: inlen, GFP_KERNEL); |
332 | if (!in) |
333 | return -ENOMEM; |
334 | |
335 | MLX5_SET(create_mkey_in, in, translations_octword_actual_size, |
336 | DIV_ROUND_UP(npages, 2)); |
337 | mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); |
338 | |
339 | if (buf) { |
340 | struct sg_dma_page_iter dma_iter; |
341 | |
342 | for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0) |
343 | *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); |
344 | } else { |
345 | int i; |
346 | |
347 | for (i = 0; i < npages; i++) |
348 | *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]); |
349 | } |
350 | |
351 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); |
352 | MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); |
353 | MLX5_SET(mkc, mkc, lr, 1); |
354 | MLX5_SET(mkc, mkc, lw, 1); |
355 | MLX5_SET(mkc, mkc, rr, 1); |
356 | MLX5_SET(mkc, mkc, rw, 1); |
357 | MLX5_SET(mkc, mkc, pd, pdn); |
358 | MLX5_SET(mkc, mkc, bsf_octword_size, 0); |
359 | MLX5_SET(mkc, mkc, qpn, 0xffffff); |
360 | MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); |
361 | MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); |
362 | MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE); |
363 | err = mlx5_core_create_mkey(dev: mdev, mkey, in, inlen); |
364 | kvfree(addr: in); |
365 | return err; |
366 | } |
367 | |
368 | static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf) |
369 | { |
370 | struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; |
371 | struct mlx5_core_dev *mdev = mvdev->mdev; |
372 | int ret; |
373 | |
374 | lockdep_assert_held(&mvdev->state_mutex); |
375 | if (mvdev->mdev_detach) |
376 | return -ENOTCONN; |
377 | |
378 | if (buf->dmaed || !buf->allocated_length) |
379 | return -EINVAL; |
380 | |
381 | ret = dma_map_sgtable(dev: mdev->device, sgt: &buf->table.sgt, dir: buf->dma_dir, attrs: 0); |
382 | if (ret) |
383 | return ret; |
384 | |
385 | ret = _create_mkey(mdev, pdn: buf->migf->pdn, buf, NULL, mkey: &buf->mkey); |
386 | if (ret) |
387 | goto err; |
388 | |
389 | buf->dmaed = true; |
390 | |
391 | return 0; |
392 | err: |
393 | dma_unmap_sgtable(dev: mdev->device, sgt: &buf->table.sgt, dir: buf->dma_dir, attrs: 0); |
394 | return ret; |
395 | } |
396 | |
397 | void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) |
398 | { |
399 | struct mlx5_vf_migration_file *migf = buf->migf; |
400 | struct sg_page_iter sg_iter; |
401 | |
402 | lockdep_assert_held(&migf->mvdev->state_mutex); |
403 | WARN_ON(migf->mvdev->mdev_detach); |
404 | |
405 | if (buf->dmaed) { |
406 | mlx5_core_destroy_mkey(dev: migf->mvdev->mdev, mkey: buf->mkey); |
407 | dma_unmap_sgtable(dev: migf->mvdev->mdev->device, sgt: &buf->table.sgt, |
408 | dir: buf->dma_dir, attrs: 0); |
409 | } |
410 | |
411 | /* Undo alloc_pages_bulk_array() */ |
412 | for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) |
413 | __free_page(sg_page_iter_page(&sg_iter)); |
414 | sg_free_append_table(sgt: &buf->table); |
415 | kfree(objp: buf); |
416 | } |
417 | |
418 | static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, |
419 | unsigned int npages) |
420 | { |
421 | unsigned int to_alloc = npages; |
422 | struct page **page_list; |
423 | unsigned long filled; |
424 | unsigned int to_fill; |
425 | int ret; |
426 | |
427 | to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); |
428 | page_list = kvzalloc(size: to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); |
429 | if (!page_list) |
430 | return -ENOMEM; |
431 | |
432 | do { |
433 | filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, nr_pages: to_fill, |
434 | page_array: page_list); |
435 | if (!filled) { |
436 | ret = -ENOMEM; |
437 | goto err; |
438 | } |
439 | to_alloc -= filled; |
440 | ret = sg_alloc_append_table_from_pages( |
441 | sgt: &buf->table, pages: page_list, n_pages: filled, offset: 0, |
442 | size: filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, |
443 | GFP_KERNEL_ACCOUNT); |
444 | |
445 | if (ret) |
446 | goto err; |
447 | buf->allocated_length += filled * PAGE_SIZE; |
448 | /* clean input for another bulk allocation */ |
449 | memset(page_list, 0, filled * sizeof(*page_list)); |
450 | to_fill = min_t(unsigned int, to_alloc, |
451 | PAGE_SIZE / sizeof(*page_list)); |
452 | } while (to_alloc > 0); |
453 | |
454 | kvfree(addr: page_list); |
455 | return 0; |
456 | |
457 | err: |
458 | kvfree(addr: page_list); |
459 | return ret; |
460 | } |
461 | |
462 | struct mlx5_vhca_data_buffer * |
463 | mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, |
464 | size_t length, |
465 | enum dma_data_direction dma_dir) |
466 | { |
467 | struct mlx5_vhca_data_buffer *buf; |
468 | int ret; |
469 | |
470 | buf = kzalloc(size: sizeof(*buf), GFP_KERNEL_ACCOUNT); |
471 | if (!buf) |
472 | return ERR_PTR(error: -ENOMEM); |
473 | |
474 | buf->dma_dir = dma_dir; |
475 | buf->migf = migf; |
476 | if (length) { |
477 | ret = mlx5vf_add_migration_pages(buf, |
478 | DIV_ROUND_UP_ULL(length, PAGE_SIZE)); |
479 | if (ret) |
480 | goto end; |
481 | |
482 | if (dma_dir != DMA_NONE) { |
483 | ret = mlx5vf_dma_data_buffer(buf); |
484 | if (ret) |
485 | goto end; |
486 | } |
487 | } |
488 | |
489 | return buf; |
490 | end: |
491 | mlx5vf_free_data_buffer(buf); |
492 | return ERR_PTR(error: ret); |
493 | } |
494 | |
495 | void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) |
496 | { |
497 | spin_lock_irq(lock: &buf->migf->list_lock); |
498 | buf->stop_copy_chunk_num = 0; |
499 | list_add_tail(new: &buf->buf_elm, head: &buf->migf->avail_list); |
500 | spin_unlock_irq(lock: &buf->migf->list_lock); |
501 | } |
502 | |
503 | struct mlx5_vhca_data_buffer * |
504 | mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, |
505 | size_t length, enum dma_data_direction dma_dir) |
506 | { |
507 | struct mlx5_vhca_data_buffer *buf, *temp_buf; |
508 | struct list_head free_list; |
509 | |
510 | lockdep_assert_held(&migf->mvdev->state_mutex); |
511 | if (migf->mvdev->mdev_detach) |
512 | return ERR_PTR(error: -ENOTCONN); |
513 | |
514 | INIT_LIST_HEAD(list: &free_list); |
515 | |
516 | spin_lock_irq(lock: &migf->list_lock); |
517 | list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { |
518 | if (buf->dma_dir == dma_dir) { |
519 | list_del_init(entry: &buf->buf_elm); |
520 | if (buf->allocated_length >= length) { |
521 | spin_unlock_irq(lock: &migf->list_lock); |
522 | goto found; |
523 | } |
524 | /* |
525 | * Prevent holding redundant buffers. Put in a free |
526 | * list and call at the end not under the spin lock |
527 | * (&migf->list_lock) to mlx5vf_free_data_buffer which |
528 | * might sleep. |
529 | */ |
530 | list_add(new: &buf->buf_elm, head: &free_list); |
531 | } |
532 | } |
533 | spin_unlock_irq(lock: &migf->list_lock); |
534 | buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir); |
535 | |
536 | found: |
537 | while ((temp_buf = list_first_entry_or_null(&free_list, |
538 | struct mlx5_vhca_data_buffer, buf_elm))) { |
539 | list_del(entry: &temp_buf->buf_elm); |
540 | mlx5vf_free_data_buffer(buf: temp_buf); |
541 | } |
542 | |
543 | return buf; |
544 | } |
545 | |
546 | static void |
547 | mlx5vf_save_callback_complete(struct mlx5_vf_migration_file *migf, |
548 | struct mlx5vf_async_data *async_data) |
549 | { |
550 | kvfree(addr: async_data->out); |
551 | complete(&migf->save_comp); |
552 | fput(migf->filp); |
553 | } |
554 | |
555 | void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) |
556 | { |
557 | struct mlx5vf_async_data *async_data = container_of(_work, |
558 | struct mlx5vf_async_data, work); |
559 | struct mlx5_vf_migration_file *migf = container_of(async_data, |
560 | struct mlx5_vf_migration_file, async_data); |
561 | |
562 | mutex_lock(&migf->lock); |
563 | if (async_data->status) { |
564 | mlx5vf_put_data_buffer(buf: async_data->buf); |
565 | if (async_data->header_buf) |
566 | mlx5vf_put_data_buffer(buf: async_data->header_buf); |
567 | if (!async_data->stop_copy_chunk && |
568 | async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) |
569 | migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR; |
570 | else |
571 | migf->state = MLX5_MIGF_STATE_ERROR; |
572 | wake_up_interruptible(&migf->poll_wait); |
573 | } |
574 | mutex_unlock(lock: &migf->lock); |
575 | mlx5vf_save_callback_complete(migf, async_data); |
576 | } |
577 | |
578 | static int (struct mlx5_vhca_data_buffer *, |
579 | size_t image_size, bool initial_pre_copy) |
580 | { |
581 | struct mlx5_vf_migration_file *migf = header_buf->migf; |
582 | struct mlx5_vf_migration_header = {}; |
583 | unsigned long flags; |
584 | struct page *page; |
585 | u8 *to_buff; |
586 | |
587 | header.record_size = cpu_to_le64(image_size); |
588 | header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY); |
589 | header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA); |
590 | page = mlx5vf_get_migration_page(buf: header_buf, offset: 0); |
591 | if (!page) |
592 | return -EINVAL; |
593 | to_buff = kmap_local_page(page); |
594 | memcpy(to_buff, &header, sizeof(header)); |
595 | kunmap_local(to_buff); |
596 | header_buf->length = sizeof(header); |
597 | header_buf->start_pos = header_buf->migf->max_pos; |
598 | migf->max_pos += header_buf->length; |
599 | spin_lock_irqsave(&migf->list_lock, flags); |
600 | list_add_tail(new: &header_buf->buf_elm, head: &migf->buf_list); |
601 | spin_unlock_irqrestore(lock: &migf->list_lock, flags); |
602 | if (initial_pre_copy) |
603 | migf->pre_copy_initial_bytes += sizeof(header); |
604 | return 0; |
605 | } |
606 | |
607 | static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) |
608 | { |
609 | struct mlx5vf_async_data *async_data = container_of(context, |
610 | struct mlx5vf_async_data, cb_work); |
611 | struct mlx5_vf_migration_file *migf = container_of(async_data, |
612 | struct mlx5_vf_migration_file, async_data); |
613 | |
614 | if (!status) { |
615 | size_t next_required_umem_size = 0; |
616 | bool stop_copy_last_chunk; |
617 | size_t image_size; |
618 | unsigned long flags; |
619 | bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && |
620 | !async_data->stop_copy_chunk; |
621 | |
622 | image_size = MLX5_GET(save_vhca_state_out, async_data->out, |
623 | actual_image_size); |
624 | if (async_data->buf->stop_copy_chunk_num) |
625 | next_required_umem_size = MLX5_GET(save_vhca_state_out, |
626 | async_data->out, next_required_umem_size); |
627 | stop_copy_last_chunk = async_data->stop_copy_chunk && |
628 | !next_required_umem_size; |
629 | if (async_data->header_buf) { |
630 | status = add_buf_header(header_buf: async_data->header_buf, image_size, |
631 | initial_pre_copy); |
632 | if (status) |
633 | goto err; |
634 | } |
635 | async_data->buf->length = image_size; |
636 | async_data->buf->start_pos = migf->max_pos; |
637 | migf->max_pos += async_data->buf->length; |
638 | spin_lock_irqsave(&migf->list_lock, flags); |
639 | list_add_tail(new: &async_data->buf->buf_elm, head: &migf->buf_list); |
640 | if (async_data->buf->stop_copy_chunk_num) { |
641 | migf->num_ready_chunks++; |
642 | if (next_required_umem_size && |
643 | migf->num_ready_chunks >= MAX_NUM_CHUNKS) { |
644 | /* Delay the next SAVE till one chunk be consumed */ |
645 | migf->next_required_umem_size = next_required_umem_size; |
646 | next_required_umem_size = 0; |
647 | } |
648 | } |
649 | spin_unlock_irqrestore(lock: &migf->list_lock, flags); |
650 | if (initial_pre_copy) { |
651 | migf->pre_copy_initial_bytes += image_size; |
652 | migf->state = MLX5_MIGF_STATE_PRE_COPY; |
653 | } |
654 | if (stop_copy_last_chunk) |
655 | migf->state = MLX5_MIGF_STATE_COMPLETE; |
656 | wake_up_interruptible(&migf->poll_wait); |
657 | if (next_required_umem_size) |
658 | mlx5vf_mig_file_set_save_work(migf, |
659 | /* Picking up the next chunk num */ |
660 | chunk_num: (async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1, |
661 | next_required_umem_size); |
662 | mlx5vf_save_callback_complete(migf, async_data); |
663 | return; |
664 | } |
665 | |
666 | err: |
667 | /* The error flow can't run from an interrupt context */ |
668 | if (status == -EREMOTEIO) { |
669 | status = MLX5_GET(save_vhca_state_out, async_data->out, status); |
670 | /* Failed in FW, print cmd out failure details */ |
671 | mlx5_cmd_out_err(dev: migf->mvdev->mdev, opcode: MLX5_CMD_OP_SAVE_VHCA_STATE, op_mod: 0, |
672 | out: async_data->out); |
673 | } |
674 | |
675 | async_data->status = status; |
676 | queue_work(wq: migf->mvdev->cb_wq, work: &async_data->work); |
677 | } |
678 | |
679 | int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, |
680 | struct mlx5_vf_migration_file *migf, |
681 | struct mlx5_vhca_data_buffer *buf, bool inc, |
682 | bool track) |
683 | { |
684 | u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); |
685 | u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; |
686 | struct mlx5_vhca_data_buffer * = NULL; |
687 | struct mlx5vf_async_data *async_data; |
688 | bool pre_copy_cleanup = false; |
689 | int err; |
690 | |
691 | lockdep_assert_held(&mvdev->state_mutex); |
692 | if (mvdev->mdev_detach) |
693 | return -ENOTCONN; |
694 | |
695 | err = wait_for_completion_interruptible(x: &migf->save_comp); |
696 | if (err) |
697 | return err; |
698 | |
699 | if ((migf->state == MLX5_MIGF_STATE_PRE_COPY || |
700 | migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc) |
701 | pre_copy_cleanup = true; |
702 | |
703 | if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) |
704 | /* |
705 | * In case we had a PRE_COPY error, SAVE is triggered only for |
706 | * the final image, read device full image. |
707 | */ |
708 | inc = false; |
709 | |
710 | MLX5_SET(save_vhca_state_in, in, opcode, |
711 | MLX5_CMD_OP_SAVE_VHCA_STATE); |
712 | MLX5_SET(save_vhca_state_in, in, op_mod, 0); |
713 | MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); |
714 | MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey); |
715 | MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length); |
716 | MLX5_SET(save_vhca_state_in, in, incremental, inc); |
717 | MLX5_SET(save_vhca_state_in, in, set_track, track); |
718 | |
719 | async_data = &migf->async_data; |
720 | async_data->buf = buf; |
721 | async_data->stop_copy_chunk = (!track && !pre_copy_cleanup); |
722 | async_data->out = kvzalloc(size: out_size, GFP_KERNEL); |
723 | if (!async_data->out) { |
724 | err = -ENOMEM; |
725 | goto err_out; |
726 | } |
727 | |
728 | if (async_data->stop_copy_chunk) { |
729 | u8 = buf->stop_copy_chunk_num ? |
730 | buf->stop_copy_chunk_num - 1 : 0; |
731 | |
732 | header_buf = migf->buf_header[header_idx]; |
733 | migf->buf_header[header_idx] = NULL; |
734 | } |
735 | |
736 | if (!header_buf) { |
737 | header_buf = mlx5vf_get_data_buffer(migf, |
738 | length: sizeof(struct mlx5_vf_migration_header), dma_dir: DMA_NONE); |
739 | if (IS_ERR(ptr: header_buf)) { |
740 | err = PTR_ERR(ptr: header_buf); |
741 | goto err_free; |
742 | } |
743 | } |
744 | |
745 | if (async_data->stop_copy_chunk) |
746 | migf->state = MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK; |
747 | |
748 | async_data->header_buf = header_buf; |
749 | get_file(f: migf->filp); |
750 | err = mlx5_cmd_exec_cb(ctx: &migf->async_ctx, in, in_size: sizeof(in), |
751 | out: async_data->out, |
752 | out_size, callback: mlx5vf_save_callback, |
753 | work: &async_data->cb_work); |
754 | if (err) |
755 | goto err_exec; |
756 | |
757 | return 0; |
758 | |
759 | err_exec: |
760 | if (header_buf) |
761 | mlx5vf_put_data_buffer(buf: header_buf); |
762 | fput(migf->filp); |
763 | err_free: |
764 | kvfree(addr: async_data->out); |
765 | err_out: |
766 | complete(&migf->save_comp); |
767 | return err; |
768 | } |
769 | |
770 | int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, |
771 | struct mlx5_vf_migration_file *migf, |
772 | struct mlx5_vhca_data_buffer *buf) |
773 | { |
774 | u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; |
775 | u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; |
776 | int err; |
777 | |
778 | lockdep_assert_held(&mvdev->state_mutex); |
779 | if (mvdev->mdev_detach) |
780 | return -ENOTCONN; |
781 | |
782 | if (!buf->dmaed) { |
783 | err = mlx5vf_dma_data_buffer(buf); |
784 | if (err) |
785 | return err; |
786 | } |
787 | |
788 | MLX5_SET(load_vhca_state_in, in, opcode, |
789 | MLX5_CMD_OP_LOAD_VHCA_STATE); |
790 | MLX5_SET(load_vhca_state_in, in, op_mod, 0); |
791 | MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); |
792 | MLX5_SET(load_vhca_state_in, in, mkey, buf->mkey); |
793 | MLX5_SET(load_vhca_state_in, in, size, buf->length); |
794 | return mlx5_cmd_exec_inout(mvdev->mdev, load_vhca_state, in, out); |
795 | } |
796 | |
797 | int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf) |
798 | { |
799 | int err; |
800 | |
801 | lockdep_assert_held(&migf->mvdev->state_mutex); |
802 | if (migf->mvdev->mdev_detach) |
803 | return -ENOTCONN; |
804 | |
805 | err = mlx5_core_alloc_pd(dev: migf->mvdev->mdev, pdn: &migf->pdn); |
806 | return err; |
807 | } |
808 | |
809 | void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) |
810 | { |
811 | lockdep_assert_held(&migf->mvdev->state_mutex); |
812 | if (migf->mvdev->mdev_detach) |
813 | return; |
814 | |
815 | mlx5_core_dealloc_pd(dev: migf->mvdev->mdev, pdn: migf->pdn); |
816 | } |
817 | |
818 | void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) |
819 | { |
820 | struct mlx5_vhca_data_buffer *entry; |
821 | int i; |
822 | |
823 | lockdep_assert_held(&migf->mvdev->state_mutex); |
824 | WARN_ON(migf->mvdev->mdev_detach); |
825 | |
826 | for (i = 0; i < MAX_NUM_CHUNKS; i++) { |
827 | if (migf->buf[i]) { |
828 | mlx5vf_free_data_buffer(buf: migf->buf[i]); |
829 | migf->buf[i] = NULL; |
830 | } |
831 | |
832 | if (migf->buf_header[i]) { |
833 | mlx5vf_free_data_buffer(buf: migf->buf_header[i]); |
834 | migf->buf_header[i] = NULL; |
835 | } |
836 | } |
837 | |
838 | list_splice(list: &migf->avail_list, head: &migf->buf_list); |
839 | |
840 | while ((entry = list_first_entry_or_null(&migf->buf_list, |
841 | struct mlx5_vhca_data_buffer, buf_elm))) { |
842 | list_del(entry: &entry->buf_elm); |
843 | mlx5vf_free_data_buffer(buf: entry); |
844 | } |
845 | |
846 | mlx5vf_cmd_dealloc_pd(migf); |
847 | } |
848 | |
849 | static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, |
850 | struct mlx5vf_pci_core_device *mvdev, |
851 | struct rb_root_cached *ranges, u32 nnodes) |
852 | { |
853 | int max_num_range = |
854 | MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range); |
855 | struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
856 | int record_size = MLX5_ST_SZ_BYTES(page_track_range); |
857 | u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; |
858 | struct interval_tree_node *node = NULL; |
859 | u64 total_ranges_len = 0; |
860 | u32 num_ranges = nnodes; |
861 | u8 log_addr_space_size; |
862 | void *range_list_ptr; |
863 | void *obj_context; |
864 | void *cmd_hdr; |
865 | int inlen; |
866 | void *in; |
867 | int err; |
868 | int i; |
869 | |
870 | if (num_ranges > max_num_range) { |
871 | vfio_combine_iova_ranges(root: ranges, cur_nodes: nnodes, req_nodes: max_num_range); |
872 | num_ranges = max_num_range; |
873 | } |
874 | |
875 | inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) + |
876 | record_size * num_ranges; |
877 | in = kzalloc(size: inlen, GFP_KERNEL); |
878 | if (!in) |
879 | return -ENOMEM; |
880 | |
881 | cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in, |
882 | general_obj_in_cmd_hdr); |
883 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, |
884 | MLX5_CMD_OP_CREATE_GENERAL_OBJECT); |
885 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, |
886 | MLX5_OBJ_TYPE_PAGE_TRACK); |
887 | obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context); |
888 | MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id); |
889 | MLX5_SET(page_track, obj_context, track_type, 1); |
890 | MLX5_SET(page_track, obj_context, log_page_size, |
891 | ilog2(tracker->host_qp->tracked_page_size)); |
892 | MLX5_SET(page_track, obj_context, log_msg_size, |
893 | ilog2(tracker->host_qp->max_msg_size)); |
894 | MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn); |
895 | MLX5_SET(page_track, obj_context, num_ranges, num_ranges); |
896 | |
897 | range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range); |
898 | node = interval_tree_iter_first(root: ranges, start: 0, ULONG_MAX); |
899 | for (i = 0; i < num_ranges; i++) { |
900 | void *addr_range_i_base = range_list_ptr + record_size * i; |
901 | unsigned long length = node->last - node->start + 1; |
902 | |
903 | MLX5_SET64(page_track_range, addr_range_i_base, start_address, |
904 | node->start); |
905 | MLX5_SET64(page_track_range, addr_range_i_base, length, length); |
906 | total_ranges_len += length; |
907 | node = interval_tree_iter_next(node, start: 0, ULONG_MAX); |
908 | } |
909 | |
910 | WARN_ON(node); |
911 | log_addr_space_size = ilog2(roundup_pow_of_two(total_ranges_len)); |
912 | if (log_addr_space_size < |
913 | (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) || |
914 | log_addr_space_size > |
915 | (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) { |
916 | err = -EOPNOTSUPP; |
917 | goto out; |
918 | } |
919 | |
920 | MLX5_SET(page_track, obj_context, log_addr_space_size, |
921 | log_addr_space_size); |
922 | err = mlx5_cmd_exec(dev: mdev, in, in_size: inlen, out, out_size: sizeof(out)); |
923 | if (err) |
924 | goto out; |
925 | |
926 | tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); |
927 | out: |
928 | kfree(objp: in); |
929 | return err; |
930 | } |
931 | |
932 | static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev, |
933 | u32 tracker_id) |
934 | { |
935 | u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; |
936 | u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; |
937 | |
938 | MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); |
939 | MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); |
940 | MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id); |
941 | |
942 | return mlx5_cmd_exec(dev: mdev, in, in_size: sizeof(in), out, out_size: sizeof(out)); |
943 | } |
944 | |
945 | static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, |
946 | u32 tracker_id, unsigned long iova, |
947 | unsigned long length, u32 tracker_state) |
948 | { |
949 | u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {}; |
950 | u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; |
951 | void *obj_context; |
952 | void *cmd_hdr; |
953 | |
954 | cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); |
955 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); |
956 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); |
957 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id); |
958 | |
959 | obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context); |
960 | MLX5_SET64(page_track, obj_context, modify_field_select, 0x3); |
961 | MLX5_SET64(page_track, obj_context, range_start_address, iova); |
962 | MLX5_SET64(page_track, obj_context, length, length); |
963 | MLX5_SET(page_track, obj_context, state, tracker_state); |
964 | |
965 | return mlx5_cmd_exec(dev: mdev, in, in_size: sizeof(in), out, out_size: sizeof(out)); |
966 | } |
967 | |
968 | static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev, |
969 | struct mlx5_vhca_page_tracker *tracker) |
970 | { |
971 | u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {}; |
972 | u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; |
973 | void *obj_context; |
974 | void *cmd_hdr; |
975 | int err; |
976 | |
977 | cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); |
978 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); |
979 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); |
980 | MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id); |
981 | |
982 | err = mlx5_cmd_exec(dev: mdev, in, in_size: sizeof(in), out, out_size: sizeof(out)); |
983 | if (err) |
984 | return err; |
985 | |
986 | obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context); |
987 | tracker->status = MLX5_GET(page_track, obj_context, state); |
988 | return 0; |
989 | } |
990 | |
991 | static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, |
992 | struct mlx5_vhca_cq_buf *buf, int nent, |
993 | int cqe_size) |
994 | { |
995 | struct mlx5_frag_buf *frag_buf = &buf->frag_buf; |
996 | u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); |
997 | u8 log_wq_sz = ilog2(cqe_size); |
998 | int err; |
999 | |
1000 | err = mlx5_frag_buf_alloc_node(dev: mdev, size: nent * cqe_size, buf: frag_buf, |
1001 | node: mdev->priv.numa_node); |
1002 | if (err) |
1003 | return err; |
1004 | |
1005 | mlx5_init_fbc(frags: frag_buf->frags, log_stride: log_wq_stride, log_sz: log_wq_sz, fbc: &buf->fbc); |
1006 | buf->cqe_size = cqe_size; |
1007 | buf->nent = nent; |
1008 | return 0; |
1009 | } |
1010 | |
1011 | static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf) |
1012 | { |
1013 | struct mlx5_cqe64 *cqe64; |
1014 | void *cqe; |
1015 | int i; |
1016 | |
1017 | for (i = 0; i < buf->nent; i++) { |
1018 | cqe = mlx5_frag_buf_get_wqe(fbc: &buf->fbc, ix: i); |
1019 | cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; |
1020 | cqe64->op_own = MLX5_CQE_INVALID << 4; |
1021 | } |
1022 | } |
1023 | |
1024 | static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev, |
1025 | struct mlx5_vhca_cq *cq) |
1026 | { |
1027 | mlx5_core_destroy_cq(dev: mdev, cq: &cq->mcq); |
1028 | mlx5_frag_buf_free(dev: mdev, buf: &cq->buf.frag_buf); |
1029 | mlx5_db_free(dev: mdev, db: &cq->db); |
1030 | } |
1031 | |
1032 | static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) |
1033 | { |
1034 | if (type != MLX5_EVENT_TYPE_CQ_ERROR) |
1035 | return; |
1036 | |
1037 | set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device, |
1038 | tracker.cq.mcq)); |
1039 | } |
1040 | |
1041 | static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, |
1042 | void *data) |
1043 | { |
1044 | struct mlx5_vhca_page_tracker *tracker = |
1045 | mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); |
1046 | struct mlx5vf_pci_core_device *mvdev = container_of( |
1047 | tracker, struct mlx5vf_pci_core_device, tracker); |
1048 | struct mlx5_eqe_obj_change *object; |
1049 | struct mlx5_eqe *eqe = data; |
1050 | u8 event_type = (u8)type; |
1051 | u8 queue_type; |
1052 | u32 obj_id; |
1053 | int qp_num; |
1054 | |
1055 | switch (event_type) { |
1056 | case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: |
1057 | case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: |
1058 | case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: |
1059 | queue_type = eqe->data.qp_srq.type; |
1060 | if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP) |
1061 | break; |
1062 | qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; |
1063 | if (qp_num != tracker->host_qp->qpn && |
1064 | qp_num != tracker->fw_qp->qpn) |
1065 | break; |
1066 | set_tracker_error(mvdev); |
1067 | break; |
1068 | case MLX5_EVENT_TYPE_OBJECT_CHANGE: |
1069 | object = &eqe->data.obj_change; |
1070 | obj_id = be32_to_cpu(object->obj_id); |
1071 | if (obj_id == tracker->id) |
1072 | set_tracker_change_event(mvdev); |
1073 | break; |
1074 | default: |
1075 | break; |
1076 | } |
1077 | |
1078 | return NOTIFY_OK; |
1079 | } |
1080 | |
1081 | static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq, |
1082 | struct mlx5_eqe *eqe) |
1083 | { |
1084 | struct mlx5vf_pci_core_device *mvdev = |
1085 | container_of(mcq, struct mlx5vf_pci_core_device, |
1086 | tracker.cq.mcq); |
1087 | |
1088 | complete(&mvdev->tracker_comp); |
1089 | } |
1090 | |
1091 | static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, |
1092 | struct mlx5_vhca_page_tracker *tracker, |
1093 | size_t ncqe) |
1094 | { |
1095 | int cqe_size = cache_line_size() == 128 ? 128 : 64; |
1096 | u32 out[MLX5_ST_SZ_DW(create_cq_out)]; |
1097 | struct mlx5_vhca_cq *cq; |
1098 | int inlen, err, eqn; |
1099 | void *cqc, *in; |
1100 | __be64 *pas; |
1101 | int vector; |
1102 | |
1103 | cq = &tracker->cq; |
1104 | ncqe = roundup_pow_of_two(ncqe); |
1105 | err = mlx5_db_alloc_node(dev: mdev, db: &cq->db, node: mdev->priv.numa_node); |
1106 | if (err) |
1107 | return err; |
1108 | |
1109 | cq->ncqe = ncqe; |
1110 | cq->mcq.set_ci_db = cq->db.db; |
1111 | cq->mcq.arm_db = cq->db.db + 1; |
1112 | cq->mcq.cqe_sz = cqe_size; |
1113 | err = alloc_cq_frag_buf(mdev, buf: &cq->buf, nent: ncqe, cqe_size); |
1114 | if (err) |
1115 | goto err_db_free; |
1116 | |
1117 | init_cq_frag_buf(buf: &cq->buf); |
1118 | inlen = MLX5_ST_SZ_BYTES(create_cq_in) + |
1119 | MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * |
1120 | cq->buf.frag_buf.npages; |
1121 | in = kvzalloc(size: inlen, GFP_KERNEL); |
1122 | if (!in) { |
1123 | err = -ENOMEM; |
1124 | goto err_buff; |
1125 | } |
1126 | |
1127 | vector = raw_smp_processor_id() % mlx5_comp_vectors_max(dev: mdev); |
1128 | err = mlx5_comp_eqn_get(dev: mdev, vecidx: vector, eqn: &eqn); |
1129 | if (err) |
1130 | goto err_vec; |
1131 | |
1132 | cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); |
1133 | MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); |
1134 | MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); |
1135 | MLX5_SET(cqc, cqc, uar_page, tracker->uar->index); |
1136 | MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift - |
1137 | MLX5_ADAPTER_PAGE_SHIFT); |
1138 | MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); |
1139 | pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); |
1140 | mlx5_fill_page_frag_array(frag_buf: &cq->buf.frag_buf, pas); |
1141 | cq->mcq.comp = mlx5vf_cq_complete; |
1142 | cq->mcq.event = mlx5vf_cq_event; |
1143 | err = mlx5_core_create_cq(dev: mdev, cq: &cq->mcq, in, inlen, out, outlen: sizeof(out)); |
1144 | if (err) |
1145 | goto err_vec; |
1146 | |
1147 | mlx5_cq_arm(cq: &cq->mcq, cmd: MLX5_CQ_DB_REQ_NOT, uar_page: tracker->uar->map, |
1148 | cons_index: cq->mcq.cons_index); |
1149 | kvfree(addr: in); |
1150 | return 0; |
1151 | |
1152 | err_vec: |
1153 | kvfree(addr: in); |
1154 | err_buff: |
1155 | mlx5_frag_buf_free(dev: mdev, buf: &cq->buf.frag_buf); |
1156 | err_db_free: |
1157 | mlx5_db_free(dev: mdev, db: &cq->db); |
1158 | return err; |
1159 | } |
1160 | |
1161 | static struct mlx5_vhca_qp * |
1162 | mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, |
1163 | struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr) |
1164 | { |
1165 | u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; |
1166 | struct mlx5_vhca_qp *qp; |
1167 | u8 log_rq_stride; |
1168 | u8 log_rq_sz; |
1169 | void *qpc; |
1170 | int inlen; |
1171 | void *in; |
1172 | int err; |
1173 | |
1174 | qp = kzalloc(size: sizeof(*qp), GFP_KERNEL_ACCOUNT); |
1175 | if (!qp) |
1176 | return ERR_PTR(error: -ENOMEM); |
1177 | |
1178 | err = mlx5_db_alloc_node(dev: mdev, db: &qp->db, node: mdev->priv.numa_node); |
1179 | if (err) |
1180 | goto err_free; |
1181 | |
1182 | if (max_recv_wr) { |
1183 | qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); |
1184 | log_rq_stride = ilog2(MLX5_SEND_WQE_DS); |
1185 | log_rq_sz = ilog2(qp->rq.wqe_cnt); |
1186 | err = mlx5_frag_buf_alloc_node(dev: mdev, |
1187 | size: wq_get_byte_sz(log_sz: log_rq_sz, log_stride: log_rq_stride), |
1188 | buf: &qp->buf, node: mdev->priv.numa_node); |
1189 | if (err) |
1190 | goto err_db_free; |
1191 | mlx5_init_fbc(frags: qp->buf.frags, log_stride: log_rq_stride, log_sz: log_rq_sz, fbc: &qp->rq.fbc); |
1192 | } |
1193 | |
1194 | qp->rq.db = &qp->db.db[MLX5_RCV_DBR]; |
1195 | inlen = MLX5_ST_SZ_BYTES(create_qp_in) + |
1196 | MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * |
1197 | qp->buf.npages; |
1198 | in = kvzalloc(size: inlen, GFP_KERNEL); |
1199 | if (!in) { |
1200 | err = -ENOMEM; |
1201 | goto err_in; |
1202 | } |
1203 | |
1204 | qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); |
1205 | MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); |
1206 | MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); |
1207 | MLX5_SET(qpc, qpc, pd, tracker->pdn); |
1208 | MLX5_SET(qpc, qpc, uar_page, tracker->uar->index); |
1209 | MLX5_SET(qpc, qpc, log_page_size, |
1210 | qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); |
1211 | MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); |
1212 | if (MLX5_CAP_GEN(mdev, cqe_version) == 1) |
1213 | MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); |
1214 | MLX5_SET(qpc, qpc, no_sq, 1); |
1215 | if (max_recv_wr) { |
1216 | MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn); |
1217 | MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4); |
1218 | MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz); |
1219 | MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); |
1220 | MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); |
1221 | mlx5_fill_page_frag_array(frag_buf: &qp->buf, |
1222 | pas: (__be64 *)MLX5_ADDR_OF(create_qp_in, |
1223 | in, pas)); |
1224 | } else { |
1225 | MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); |
1226 | } |
1227 | |
1228 | MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); |
1229 | err = mlx5_cmd_exec(dev: mdev, in, in_size: inlen, out, out_size: sizeof(out)); |
1230 | kvfree(addr: in); |
1231 | if (err) |
1232 | goto err_in; |
1233 | |
1234 | qp->qpn = MLX5_GET(create_qp_out, out, qpn); |
1235 | return qp; |
1236 | |
1237 | err_in: |
1238 | if (max_recv_wr) |
1239 | mlx5_frag_buf_free(dev: mdev, buf: &qp->buf); |
1240 | err_db_free: |
1241 | mlx5_db_free(dev: mdev, db: &qp->db); |
1242 | err_free: |
1243 | kfree(objp: qp); |
1244 | return ERR_PTR(error: err); |
1245 | } |
1246 | |
1247 | static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp) |
1248 | { |
1249 | struct mlx5_wqe_data_seg *data; |
1250 | unsigned int ix; |
1251 | |
1252 | WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt); |
1253 | ix = qp->rq.pc & (qp->rq.wqe_cnt - 1); |
1254 | data = mlx5_frag_buf_get_wqe(fbc: &qp->rq.fbc, ix); |
1255 | data->byte_count = cpu_to_be32(qp->max_msg_size); |
1256 | data->lkey = cpu_to_be32(qp->recv_buf.mkey); |
1257 | data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset); |
1258 | qp->rq.pc++; |
1259 | /* Make sure that descriptors are written before doorbell record. */ |
1260 | dma_wmb(); |
1261 | *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff); |
1262 | } |
1263 | |
1264 | static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev, |
1265 | struct mlx5_vhca_qp *qp, u32 remote_qpn, |
1266 | bool host_qp) |
1267 | { |
1268 | u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; |
1269 | u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; |
1270 | u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; |
1271 | void *qpc; |
1272 | int ret; |
1273 | |
1274 | /* Init */ |
1275 | qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc); |
1276 | MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); |
1277 | MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); |
1278 | MLX5_SET(qpc, qpc, rre, 1); |
1279 | MLX5_SET(qpc, qpc, rwe, 1); |
1280 | MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP); |
1281 | MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn); |
1282 | ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in); |
1283 | if (ret) |
1284 | return ret; |
1285 | |
1286 | if (host_qp) { |
1287 | struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; |
1288 | int i; |
1289 | |
1290 | for (i = 0; i < qp->rq.wqe_cnt; i++) { |
1291 | mlx5vf_post_recv(qp); |
1292 | recv_buf->next_rq_offset += qp->max_msg_size; |
1293 | } |
1294 | } |
1295 | |
1296 | /* RTR */ |
1297 | qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc); |
1298 | MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); |
1299 | MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); |
1300 | MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg)); |
1301 | MLX5_SET(qpc, qpc, remote_qpn, remote_qpn); |
1302 | MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); |
1303 | MLX5_SET(qpc, qpc, primary_address_path.fl, 1); |
1304 | MLX5_SET(qpc, qpc, min_rnr_nak, 1); |
1305 | MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP); |
1306 | MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); |
1307 | ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in); |
1308 | if (ret || host_qp) |
1309 | return ret; |
1310 | |
1311 | /* RTS */ |
1312 | qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc); |
1313 | MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); |
1314 | MLX5_SET(qpc, qpc, retry_count, 7); |
1315 | MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ |
1316 | MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ |
1317 | MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP); |
1318 | MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); |
1319 | |
1320 | return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in); |
1321 | } |
1322 | |
1323 | static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev, |
1324 | struct mlx5_vhca_qp *qp) |
1325 | { |
1326 | u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; |
1327 | |
1328 | MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); |
1329 | MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); |
1330 | mlx5_cmd_exec_in(mdev, destroy_qp, in); |
1331 | |
1332 | mlx5_frag_buf_free(dev: mdev, buf: &qp->buf); |
1333 | mlx5_db_free(dev: mdev, db: &qp->db); |
1334 | kfree(objp: qp); |
1335 | } |
1336 | |
1337 | static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf) |
1338 | { |
1339 | int i; |
1340 | |
1341 | /* Undo alloc_pages_bulk_array() */ |
1342 | for (i = 0; i < recv_buf->npages; i++) |
1343 | __free_page(recv_buf->page_list[i]); |
1344 | |
1345 | kvfree(addr: recv_buf->page_list); |
1346 | } |
1347 | |
1348 | static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, |
1349 | unsigned int npages) |
1350 | { |
1351 | unsigned int filled = 0, done = 0; |
1352 | int i; |
1353 | |
1354 | recv_buf->page_list = kvcalloc(n: npages, size: sizeof(*recv_buf->page_list), |
1355 | GFP_KERNEL_ACCOUNT); |
1356 | if (!recv_buf->page_list) |
1357 | return -ENOMEM; |
1358 | |
1359 | for (;;) { |
1360 | filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, |
1361 | nr_pages: npages - done, |
1362 | page_array: recv_buf->page_list + done); |
1363 | if (!filled) |
1364 | goto err; |
1365 | |
1366 | done += filled; |
1367 | if (done == npages) |
1368 | break; |
1369 | } |
1370 | |
1371 | recv_buf->npages = npages; |
1372 | return 0; |
1373 | |
1374 | err: |
1375 | for (i = 0; i < npages; i++) { |
1376 | if (recv_buf->page_list[i]) |
1377 | __free_page(recv_buf->page_list[i]); |
1378 | } |
1379 | |
1380 | kvfree(addr: recv_buf->page_list); |
1381 | return -ENOMEM; |
1382 | } |
1383 | |
1384 | static int register_dma_recv_pages(struct mlx5_core_dev *mdev, |
1385 | struct mlx5_vhca_recv_buf *recv_buf) |
1386 | { |
1387 | int i, j; |
1388 | |
1389 | recv_buf->dma_addrs = kvcalloc(n: recv_buf->npages, |
1390 | size: sizeof(*recv_buf->dma_addrs), |
1391 | GFP_KERNEL_ACCOUNT); |
1392 | if (!recv_buf->dma_addrs) |
1393 | return -ENOMEM; |
1394 | |
1395 | for (i = 0; i < recv_buf->npages; i++) { |
1396 | recv_buf->dma_addrs[i] = dma_map_page(mdev->device, |
1397 | recv_buf->page_list[i], |
1398 | 0, PAGE_SIZE, |
1399 | DMA_FROM_DEVICE); |
1400 | if (dma_mapping_error(dev: mdev->device, dma_addr: recv_buf->dma_addrs[i])) |
1401 | goto error; |
1402 | } |
1403 | return 0; |
1404 | |
1405 | error: |
1406 | for (j = 0; j < i; j++) |
1407 | dma_unmap_single(mdev->device, recv_buf->dma_addrs[j], |
1408 | PAGE_SIZE, DMA_FROM_DEVICE); |
1409 | |
1410 | kvfree(addr: recv_buf->dma_addrs); |
1411 | return -ENOMEM; |
1412 | } |
1413 | |
1414 | static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev, |
1415 | struct mlx5_vhca_recv_buf *recv_buf) |
1416 | { |
1417 | int i; |
1418 | |
1419 | for (i = 0; i < recv_buf->npages; i++) |
1420 | dma_unmap_single(mdev->device, recv_buf->dma_addrs[i], |
1421 | PAGE_SIZE, DMA_FROM_DEVICE); |
1422 | |
1423 | kvfree(addr: recv_buf->dma_addrs); |
1424 | } |
1425 | |
1426 | static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev, |
1427 | struct mlx5_vhca_qp *qp) |
1428 | { |
1429 | struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; |
1430 | |
1431 | mlx5_core_destroy_mkey(dev: mdev, mkey: recv_buf->mkey); |
1432 | unregister_dma_recv_pages(mdev, recv_buf); |
1433 | free_recv_pages(recv_buf: &qp->recv_buf); |
1434 | } |
1435 | |
1436 | static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev, |
1437 | struct mlx5_vhca_qp *qp, u32 pdn, |
1438 | u64 rq_size) |
1439 | { |
1440 | unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE); |
1441 | struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; |
1442 | int err; |
1443 | |
1444 | err = alloc_recv_pages(recv_buf, npages); |
1445 | if (err < 0) |
1446 | return err; |
1447 | |
1448 | err = register_dma_recv_pages(mdev, recv_buf); |
1449 | if (err) |
1450 | goto end; |
1451 | |
1452 | err = _create_mkey(mdev, pdn, NULL, recv_buf, mkey: &recv_buf->mkey); |
1453 | if (err) |
1454 | goto err_create_mkey; |
1455 | |
1456 | return 0; |
1457 | |
1458 | err_create_mkey: |
1459 | unregister_dma_recv_pages(mdev, recv_buf); |
1460 | end: |
1461 | free_recv_pages(recv_buf); |
1462 | return err; |
1463 | } |
1464 | |
1465 | static void |
1466 | _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev) |
1467 | { |
1468 | struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
1469 | struct mlx5_core_dev *mdev = mvdev->mdev; |
1470 | |
1471 | lockdep_assert_held(&mvdev->state_mutex); |
1472 | |
1473 | if (!mvdev->log_active) |
1474 | return; |
1475 | |
1476 | WARN_ON(mvdev->mdev_detach); |
1477 | |
1478 | mlx5_eq_notifier_unregister(dev: mdev, nb: &tracker->nb); |
1479 | mlx5vf_cmd_destroy_tracker(mdev, tracker_id: tracker->id); |
1480 | mlx5vf_destroy_qp(mdev, qp: tracker->fw_qp); |
1481 | mlx5vf_free_qp_recv_resources(mdev, qp: tracker->host_qp); |
1482 | mlx5vf_destroy_qp(mdev, qp: tracker->host_qp); |
1483 | mlx5vf_destroy_cq(mdev, cq: &tracker->cq); |
1484 | mlx5_core_dealloc_pd(dev: mdev, pdn: tracker->pdn); |
1485 | mlx5_put_uars_page(mdev, up: tracker->uar); |
1486 | mvdev->log_active = false; |
1487 | } |
1488 | |
1489 | int mlx5vf_stop_page_tracker(struct vfio_device *vdev) |
1490 | { |
1491 | struct mlx5vf_pci_core_device *mvdev = container_of( |
1492 | vdev, struct mlx5vf_pci_core_device, core_device.vdev); |
1493 | |
1494 | mutex_lock(&mvdev->state_mutex); |
1495 | if (!mvdev->log_active) |
1496 | goto end; |
1497 | |
1498 | _mlx5vf_free_page_tracker_resources(mvdev); |
1499 | mvdev->log_active = false; |
1500 | end: |
1501 | mlx5vf_state_mutex_unlock(mvdev); |
1502 | return 0; |
1503 | } |
1504 | |
1505 | int mlx5vf_start_page_tracker(struct vfio_device *vdev, |
1506 | struct rb_root_cached *ranges, u32 nnodes, |
1507 | u64 *page_size) |
1508 | { |
1509 | struct mlx5vf_pci_core_device *mvdev = container_of( |
1510 | vdev, struct mlx5vf_pci_core_device, core_device.vdev); |
1511 | struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
1512 | u8 log_tracked_page = ilog2(*page_size); |
1513 | struct mlx5_vhca_qp *host_qp; |
1514 | struct mlx5_vhca_qp *fw_qp; |
1515 | struct mlx5_core_dev *mdev; |
1516 | u32 max_msg_size = PAGE_SIZE; |
1517 | u64 rq_size = SZ_2M; |
1518 | u32 max_recv_wr; |
1519 | int err; |
1520 | |
1521 | mutex_lock(&mvdev->state_mutex); |
1522 | if (mvdev->mdev_detach) { |
1523 | err = -ENOTCONN; |
1524 | goto end; |
1525 | } |
1526 | |
1527 | if (mvdev->log_active) { |
1528 | err = -EINVAL; |
1529 | goto end; |
1530 | } |
1531 | |
1532 | mdev = mvdev->mdev; |
1533 | memset(tracker, 0, sizeof(*tracker)); |
1534 | tracker->uar = mlx5_get_uars_page(mdev); |
1535 | if (IS_ERR(ptr: tracker->uar)) { |
1536 | err = PTR_ERR(ptr: tracker->uar); |
1537 | goto end; |
1538 | } |
1539 | |
1540 | err = mlx5_core_alloc_pd(dev: mdev, pdn: &tracker->pdn); |
1541 | if (err) |
1542 | goto err_uar; |
1543 | |
1544 | max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size); |
1545 | err = mlx5vf_create_cq(mdev, tracker, ncqe: max_recv_wr); |
1546 | if (err) |
1547 | goto err_dealloc_pd; |
1548 | |
1549 | host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr); |
1550 | if (IS_ERR(ptr: host_qp)) { |
1551 | err = PTR_ERR(ptr: host_qp); |
1552 | goto err_cq; |
1553 | } |
1554 | |
1555 | host_qp->max_msg_size = max_msg_size; |
1556 | if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
1557 | pg_track_log_min_page_size)) { |
1558 | log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
1559 | pg_track_log_min_page_size); |
1560 | } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
1561 | pg_track_log_max_page_size)) { |
1562 | log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
1563 | pg_track_log_max_page_size); |
1564 | } |
1565 | |
1566 | host_qp->tracked_page_size = (1ULL << log_tracked_page); |
1567 | err = mlx5vf_alloc_qp_recv_resources(mdev, qp: host_qp, pdn: tracker->pdn, |
1568 | rq_size); |
1569 | if (err) |
1570 | goto err_host_qp; |
1571 | |
1572 | fw_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr: 0); |
1573 | if (IS_ERR(ptr: fw_qp)) { |
1574 | err = PTR_ERR(ptr: fw_qp); |
1575 | goto err_recv_resources; |
1576 | } |
1577 | |
1578 | err = mlx5vf_activate_qp(mdev, qp: host_qp, remote_qpn: fw_qp->qpn, host_qp: true); |
1579 | if (err) |
1580 | goto err_activate; |
1581 | |
1582 | err = mlx5vf_activate_qp(mdev, qp: fw_qp, remote_qpn: host_qp->qpn, host_qp: false); |
1583 | if (err) |
1584 | goto err_activate; |
1585 | |
1586 | tracker->host_qp = host_qp; |
1587 | tracker->fw_qp = fw_qp; |
1588 | err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes); |
1589 | if (err) |
1590 | goto err_activate; |
1591 | |
1592 | MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY); |
1593 | mlx5_eq_notifier_register(dev: mdev, nb: &tracker->nb); |
1594 | *page_size = host_qp->tracked_page_size; |
1595 | mvdev->log_active = true; |
1596 | mlx5vf_state_mutex_unlock(mvdev); |
1597 | return 0; |
1598 | |
1599 | err_activate: |
1600 | mlx5vf_destroy_qp(mdev, qp: fw_qp); |
1601 | err_recv_resources: |
1602 | mlx5vf_free_qp_recv_resources(mdev, qp: host_qp); |
1603 | err_host_qp: |
1604 | mlx5vf_destroy_qp(mdev, qp: host_qp); |
1605 | err_cq: |
1606 | mlx5vf_destroy_cq(mdev, cq: &tracker->cq); |
1607 | err_dealloc_pd: |
1608 | mlx5_core_dealloc_pd(dev: mdev, pdn: tracker->pdn); |
1609 | err_uar: |
1610 | mlx5_put_uars_page(mdev, up: tracker->uar); |
1611 | end: |
1612 | mlx5vf_state_mutex_unlock(mvdev); |
1613 | return err; |
1614 | } |
1615 | |
1616 | static void |
1617 | set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp, |
1618 | struct iova_bitmap *dirty) |
1619 | { |
1620 | u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry); |
1621 | u32 nent = size / entry_size; |
1622 | struct page *page; |
1623 | u64 addr; |
1624 | u64 *buf; |
1625 | int i; |
1626 | |
1627 | if (WARN_ON(index >= qp->recv_buf.npages || |
1628 | (nent > qp->max_msg_size / entry_size))) |
1629 | return; |
1630 | |
1631 | page = qp->recv_buf.page_list[index]; |
1632 | buf = kmap_local_page(page); |
1633 | for (i = 0; i < nent; i++) { |
1634 | addr = MLX5_GET(page_track_report_entry, buf + i, |
1635 | dirty_address_low); |
1636 | addr |= (u64)MLX5_GET(page_track_report_entry, buf + i, |
1637 | dirty_address_high) << 32; |
1638 | iova_bitmap_set(bitmap: dirty, iova: addr, length: qp->tracked_page_size); |
1639 | } |
1640 | kunmap_local(buf); |
1641 | } |
1642 | |
1643 | static void |
1644 | mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe, |
1645 | struct iova_bitmap *dirty, int *tracker_status) |
1646 | { |
1647 | u32 size; |
1648 | int ix; |
1649 | |
1650 | qp->rq.cc++; |
1651 | *tracker_status = be32_to_cpu(cqe->immediate) >> 28; |
1652 | size = be32_to_cpu(cqe->byte_cnt); |
1653 | ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1); |
1654 | |
1655 | /* zero length CQE, no data */ |
1656 | WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING); |
1657 | if (size) |
1658 | set_report_output(size, index: ix, qp, dirty); |
1659 | |
1660 | qp->recv_buf.next_rq_offset = ix * qp->max_msg_size; |
1661 | mlx5vf_post_recv(qp); |
1662 | } |
1663 | |
1664 | static void *get_cqe(struct mlx5_vhca_cq *cq, int n) |
1665 | { |
1666 | return mlx5_frag_buf_get_wqe(fbc: &cq->buf.fbc, ix: n); |
1667 | } |
1668 | |
1669 | static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n) |
1670 | { |
1671 | void *cqe = get_cqe(cq, n: n & (cq->ncqe - 1)); |
1672 | struct mlx5_cqe64 *cqe64; |
1673 | |
1674 | cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; |
1675 | |
1676 | if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && |
1677 | !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) { |
1678 | return cqe64; |
1679 | } else { |
1680 | return NULL; |
1681 | } |
1682 | } |
1683 | |
1684 | static int |
1685 | mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp, |
1686 | struct iova_bitmap *dirty, int *tracker_status) |
1687 | { |
1688 | struct mlx5_cqe64 *cqe; |
1689 | u8 opcode; |
1690 | |
1691 | cqe = get_sw_cqe(cq, n: cq->mcq.cons_index); |
1692 | if (!cqe) |
1693 | return CQ_EMPTY; |
1694 | |
1695 | ++cq->mcq.cons_index; |
1696 | /* |
1697 | * Make sure we read CQ entry contents after we've checked the |
1698 | * ownership bit. |
1699 | */ |
1700 | rmb(); |
1701 | opcode = get_cqe_opcode(cqe); |
1702 | switch (opcode) { |
1703 | case MLX5_CQE_RESP_SEND_IMM: |
1704 | mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status); |
1705 | return CQ_OK; |
1706 | default: |
1707 | return CQ_POLL_ERR; |
1708 | } |
1709 | } |
1710 | |
1711 | int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, |
1712 | unsigned long length, |
1713 | struct iova_bitmap *dirty) |
1714 | { |
1715 | struct mlx5vf_pci_core_device *mvdev = container_of( |
1716 | vdev, struct mlx5vf_pci_core_device, core_device.vdev); |
1717 | struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
1718 | struct mlx5_vhca_cq *cq = &tracker->cq; |
1719 | struct mlx5_core_dev *mdev; |
1720 | int poll_err, err; |
1721 | |
1722 | mutex_lock(&mvdev->state_mutex); |
1723 | if (!mvdev->log_active) { |
1724 | err = -EINVAL; |
1725 | goto end; |
1726 | } |
1727 | |
1728 | if (mvdev->mdev_detach) { |
1729 | err = -ENOTCONN; |
1730 | goto end; |
1731 | } |
1732 | |
1733 | if (tracker->is_err) { |
1734 | err = -EIO; |
1735 | goto end; |
1736 | } |
1737 | |
1738 | mdev = mvdev->mdev; |
1739 | err = mlx5vf_cmd_modify_tracker(mdev, tracker_id: tracker->id, iova, length, |
1740 | tracker_state: MLX5_PAGE_TRACK_STATE_REPORTING); |
1741 | if (err) |
1742 | goto end; |
1743 | |
1744 | tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING; |
1745 | while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING && |
1746 | !tracker->is_err) { |
1747 | poll_err = mlx5vf_cq_poll_one(cq, qp: tracker->host_qp, dirty, |
1748 | tracker_status: &tracker->status); |
1749 | if (poll_err == CQ_EMPTY) { |
1750 | mlx5_cq_arm(cq: &cq->mcq, cmd: MLX5_CQ_DB_REQ_NOT, uar_page: tracker->uar->map, |
1751 | cons_index: cq->mcq.cons_index); |
1752 | poll_err = mlx5vf_cq_poll_one(cq, qp: tracker->host_qp, |
1753 | dirty, tracker_status: &tracker->status); |
1754 | if (poll_err == CQ_EMPTY) { |
1755 | wait_for_completion(&mvdev->tracker_comp); |
1756 | if (tracker->object_changed) { |
1757 | tracker->object_changed = false; |
1758 | err = mlx5vf_cmd_query_tracker(mdev, tracker); |
1759 | if (err) |
1760 | goto end; |
1761 | } |
1762 | continue; |
1763 | } |
1764 | } |
1765 | if (poll_err == CQ_POLL_ERR) { |
1766 | err = -EIO; |
1767 | goto end; |
1768 | } |
1769 | mlx5_cq_set_ci(cq: &cq->mcq); |
1770 | } |
1771 | |
1772 | if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR) |
1773 | tracker->is_err = true; |
1774 | |
1775 | if (tracker->is_err) |
1776 | err = -EIO; |
1777 | end: |
1778 | mlx5vf_state_mutex_unlock(mvdev); |
1779 | return err; |
1780 | } |
1781 | |