1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * Adjunct processor matrix VFIO device driver callbacks. |
4 | * |
5 | * Copyright IBM Corp. 2018 |
6 | * |
7 | * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> |
8 | * Halil Pasic <pasic@linux.ibm.com> |
9 | * Pierre Morel <pmorel@linux.ibm.com> |
10 | */ |
11 | #include <linux/string.h> |
12 | #include <linux/vfio.h> |
13 | #include <linux/device.h> |
14 | #include <linux/list.h> |
15 | #include <linux/ctype.h> |
16 | #include <linux/bitops.h> |
17 | #include <linux/kvm_host.h> |
18 | #include <linux/module.h> |
19 | #include <linux/uuid.h> |
20 | #include <asm/kvm.h> |
21 | #include <asm/zcrypt.h> |
22 | |
23 | #include "vfio_ap_private.h" |
24 | #include "vfio_ap_debug.h" |
25 | |
26 | #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" |
27 | #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" |
28 | |
29 | #define AP_QUEUE_ASSIGNED "assigned" |
30 | #define AP_QUEUE_UNASSIGNED "unassigned" |
31 | #define AP_QUEUE_IN_USE "in use" |
32 | |
33 | #define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */ |
34 | |
35 | static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable); |
36 | static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); |
37 | static const struct vfio_device_ops vfio_ap_matrix_dev_ops; |
38 | static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q); |
39 | |
40 | /** |
41 | * get_update_locks_for_kvm: Acquire the locks required to dynamically update a |
42 | * KVM guest's APCB in the proper order. |
43 | * |
44 | * @kvm: a pointer to a struct kvm object containing the KVM guest's APCB. |
45 | * |
46 | * The proper locking order is: |
47 | * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM |
48 | * guest's APCB. |
49 | * 2. kvm->lock: required to update a guest's APCB |
50 | * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev |
51 | * |
52 | * Note: If @kvm is NULL, the KVM lock will not be taken. |
53 | */ |
54 | static inline void get_update_locks_for_kvm(struct kvm *kvm) |
55 | { |
56 | mutex_lock(&matrix_dev->guests_lock); |
57 | if (kvm) |
58 | mutex_lock(&kvm->lock); |
59 | mutex_lock(&matrix_dev->mdevs_lock); |
60 | } |
61 | |
62 | /** |
63 | * release_update_locks_for_kvm: Release the locks used to dynamically update a |
64 | * KVM guest's APCB in the proper order. |
65 | * |
66 | * @kvm: a pointer to a struct kvm object containing the KVM guest's APCB. |
67 | * |
68 | * The proper unlocking order is: |
69 | * 1. matrix_dev->mdevs_lock |
70 | * 2. kvm->lock |
71 | * 3. matrix_dev->guests_lock |
72 | * |
73 | * Note: If @kvm is NULL, the KVM lock will not be released. |
74 | */ |
75 | static inline void release_update_locks_for_kvm(struct kvm *kvm) |
76 | { |
77 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
78 | if (kvm) |
79 | mutex_unlock(lock: &kvm->lock); |
80 | mutex_unlock(lock: &matrix_dev->guests_lock); |
81 | } |
82 | |
83 | /** |
84 | * get_update_locks_for_mdev: Acquire the locks required to dynamically update a |
85 | * KVM guest's APCB in the proper order. |
86 | * |
87 | * @matrix_mdev: a pointer to a struct ap_matrix_mdev object containing the AP |
88 | * configuration data to use to update a KVM guest's APCB. |
89 | * |
90 | * The proper locking order is: |
91 | * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM |
92 | * guest's APCB. |
93 | * 2. matrix_mdev->kvm->lock: required to update a guest's APCB |
94 | * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev |
95 | * |
96 | * Note: If @matrix_mdev is NULL or is not attached to a KVM guest, the KVM |
97 | * lock will not be taken. |
98 | */ |
99 | static inline void get_update_locks_for_mdev(struct ap_matrix_mdev *matrix_mdev) |
100 | { |
101 | mutex_lock(&matrix_dev->guests_lock); |
102 | if (matrix_mdev && matrix_mdev->kvm) |
103 | mutex_lock(&matrix_mdev->kvm->lock); |
104 | mutex_lock(&matrix_dev->mdevs_lock); |
105 | } |
106 | |
107 | /** |
108 | * release_update_locks_for_mdev: Release the locks used to dynamically update a |
109 | * KVM guest's APCB in the proper order. |
110 | * |
111 | * @matrix_mdev: a pointer to a struct ap_matrix_mdev object containing the AP |
112 | * configuration data to use to update a KVM guest's APCB. |
113 | * |
114 | * The proper unlocking order is: |
115 | * 1. matrix_dev->mdevs_lock |
116 | * 2. matrix_mdev->kvm->lock |
117 | * 3. matrix_dev->guests_lock |
118 | * |
119 | * Note: If @matrix_mdev is NULL or is not attached to a KVM guest, the KVM |
120 | * lock will not be released. |
121 | */ |
122 | static inline void release_update_locks_for_mdev(struct ap_matrix_mdev *matrix_mdev) |
123 | { |
124 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
125 | if (matrix_mdev && matrix_mdev->kvm) |
126 | mutex_unlock(lock: &matrix_mdev->kvm->lock); |
127 | mutex_unlock(lock: &matrix_dev->guests_lock); |
128 | } |
129 | |
130 | /** |
131 | * get_update_locks_by_apqn: Find the mdev to which an APQN is assigned and |
132 | * acquire the locks required to update the APCB of |
133 | * the KVM guest to which the mdev is attached. |
134 | * |
135 | * @apqn: the APQN of a queue device. |
136 | * |
137 | * The proper locking order is: |
138 | * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM |
139 | * guest's APCB. |
140 | * 2. matrix_mdev->kvm->lock: required to update a guest's APCB |
141 | * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev |
142 | * |
143 | * Note: If @apqn is not assigned to a matrix_mdev, the matrix_mdev->kvm->lock |
144 | * will not be taken. |
145 | * |
146 | * Return: the ap_matrix_mdev object to which @apqn is assigned or NULL if @apqn |
147 | * is not assigned to an ap_matrix_mdev. |
148 | */ |
149 | static struct ap_matrix_mdev *get_update_locks_by_apqn(int apqn) |
150 | { |
151 | struct ap_matrix_mdev *matrix_mdev; |
152 | |
153 | mutex_lock(&matrix_dev->guests_lock); |
154 | |
155 | list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { |
156 | if (test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm) && |
157 | test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) { |
158 | if (matrix_mdev->kvm) |
159 | mutex_lock(&matrix_mdev->kvm->lock); |
160 | |
161 | mutex_lock(&matrix_dev->mdevs_lock); |
162 | |
163 | return matrix_mdev; |
164 | } |
165 | } |
166 | |
167 | mutex_lock(&matrix_dev->mdevs_lock); |
168 | |
169 | return NULL; |
170 | } |
171 | |
172 | /** |
173 | * get_update_locks_for_queue: get the locks required to update the APCB of the |
174 | * KVM guest to which the matrix mdev linked to a |
175 | * vfio_ap_queue object is attached. |
176 | * |
177 | * @q: a pointer to a vfio_ap_queue object. |
178 | * |
179 | * The proper locking order is: |
180 | * 1. q->matrix_dev->guests_lock: required to use the KVM pointer to update a |
181 | * KVM guest's APCB. |
182 | * 2. q->matrix_mdev->kvm->lock: required to update a guest's APCB |
183 | * 3. matrix_dev->mdevs_lock: required to access data stored in matrix_mdev |
184 | * |
185 | * Note: if @queue is not linked to an ap_matrix_mdev object, the KVM lock |
186 | * will not be taken. |
187 | */ |
188 | static inline void get_update_locks_for_queue(struct vfio_ap_queue *q) |
189 | { |
190 | mutex_lock(&matrix_dev->guests_lock); |
191 | if (q->matrix_mdev && q->matrix_mdev->kvm) |
192 | mutex_lock(&q->matrix_mdev->kvm->lock); |
193 | mutex_lock(&matrix_dev->mdevs_lock); |
194 | } |
195 | |
196 | /** |
197 | * vfio_ap_mdev_get_queue - retrieve a queue with a specific APQN from a |
198 | * hash table of queues assigned to a matrix mdev |
199 | * @matrix_mdev: the matrix mdev |
200 | * @apqn: The APQN of a queue device |
201 | * |
202 | * Return: the pointer to the vfio_ap_queue struct representing the queue or |
203 | * NULL if the queue is not assigned to @matrix_mdev |
204 | */ |
205 | static struct vfio_ap_queue *vfio_ap_mdev_get_queue( |
206 | struct ap_matrix_mdev *matrix_mdev, |
207 | int apqn) |
208 | { |
209 | struct vfio_ap_queue *q; |
210 | |
211 | hash_for_each_possible(matrix_mdev->qtable.queues, q, mdev_qnode, |
212 | apqn) { |
213 | if (q && q->apqn == apqn) |
214 | return q; |
215 | } |
216 | |
217 | return NULL; |
218 | } |
219 | |
220 | /** |
221 | * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries |
222 | * @apqn: The AP Queue number |
223 | * |
224 | * Checks the IRQ bit for the status of this APQN using ap_tapq. |
225 | * Returns if the ap_tapq function succeeded and the bit is clear. |
226 | * Returns if ap_tapq function failed with invalid, deconfigured or |
227 | * checkstopped AP. |
228 | * Otherwise retries up to 5 times after waiting 20ms. |
229 | */ |
230 | static void vfio_ap_wait_for_irqclear(int apqn) |
231 | { |
232 | struct ap_queue_status status; |
233 | int retry = 5; |
234 | |
235 | do { |
236 | status = ap_tapq(apqn, NULL); |
237 | switch (status.response_code) { |
238 | case AP_RESPONSE_NORMAL: |
239 | case AP_RESPONSE_RESET_IN_PROGRESS: |
240 | if (!status.irq_enabled) |
241 | return; |
242 | fallthrough; |
243 | case AP_RESPONSE_BUSY: |
244 | msleep(msecs: 20); |
245 | break; |
246 | case AP_RESPONSE_Q_NOT_AVAIL: |
247 | case AP_RESPONSE_DECONFIGURED: |
248 | case AP_RESPONSE_CHECKSTOPPED: |
249 | default: |
250 | WARN_ONCE(1, "%s: tapq rc %02x: %04x\n" , __func__, |
251 | status.response_code, apqn); |
252 | return; |
253 | } |
254 | } while (--retry); |
255 | |
256 | WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n" , |
257 | __func__, status.response_code, apqn); |
258 | } |
259 | |
260 | /** |
261 | * vfio_ap_free_aqic_resources - free vfio_ap_queue resources |
262 | * @q: The vfio_ap_queue |
263 | * |
264 | * Unregisters the ISC in the GIB when the saved ISC not invalid. |
265 | * Unpins the guest's page holding the NIB when it exists. |
266 | * Resets the saved_iova and saved_isc to invalid values. |
267 | */ |
268 | static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) |
269 | { |
270 | if (!q) |
271 | return; |
272 | if (q->saved_isc != VFIO_AP_ISC_INVALID && |
273 | !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) { |
274 | kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); |
275 | q->saved_isc = VFIO_AP_ISC_INVALID; |
276 | } |
277 | if (q->saved_iova && !WARN_ON(!q->matrix_mdev)) { |
278 | vfio_unpin_pages(device: &q->matrix_mdev->vdev, iova: q->saved_iova, npage: 1); |
279 | q->saved_iova = 0; |
280 | } |
281 | } |
282 | |
283 | /** |
284 | * vfio_ap_irq_disable - disables and clears an ap_queue interrupt |
285 | * @q: The vfio_ap_queue |
286 | * |
287 | * Uses ap_aqic to disable the interruption and in case of success, reset |
288 | * in progress or IRQ disable command already proceeded: calls |
289 | * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear |
290 | * and calls vfio_ap_free_aqic_resources() to free the resources associated |
291 | * with the AP interrupt handling. |
292 | * |
293 | * In the case the AP is busy, or a reset is in progress, |
294 | * retries after 20ms, up to 5 times. |
295 | * |
296 | * Returns if ap_aqic function failed with invalid, deconfigured or |
297 | * checkstopped AP. |
298 | * |
299 | * Return: &struct ap_queue_status |
300 | */ |
301 | static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) |
302 | { |
303 | union ap_qirq_ctrl aqic_gisa = { .value = 0 }; |
304 | struct ap_queue_status status; |
305 | int retries = 5; |
306 | |
307 | do { |
308 | status = ap_aqic(q->apqn, aqic_gisa, 0); |
309 | switch (status.response_code) { |
310 | case AP_RESPONSE_OTHERWISE_CHANGED: |
311 | case AP_RESPONSE_NORMAL: |
312 | vfio_ap_wait_for_irqclear(apqn: q->apqn); |
313 | goto end_free; |
314 | case AP_RESPONSE_RESET_IN_PROGRESS: |
315 | case AP_RESPONSE_BUSY: |
316 | msleep(msecs: 20); |
317 | break; |
318 | case AP_RESPONSE_Q_NOT_AVAIL: |
319 | case AP_RESPONSE_DECONFIGURED: |
320 | case AP_RESPONSE_CHECKSTOPPED: |
321 | case AP_RESPONSE_INVALID_ADDRESS: |
322 | default: |
323 | /* All cases in default means AP not operational */ |
324 | WARN_ONCE(1, "%s: ap_aqic status %d\n" , __func__, |
325 | status.response_code); |
326 | goto end_free; |
327 | } |
328 | } while (retries--); |
329 | |
330 | WARN_ONCE(1, "%s: ap_aqic status %d\n" , __func__, |
331 | status.response_code); |
332 | end_free: |
333 | vfio_ap_free_aqic_resources(q); |
334 | return status; |
335 | } |
336 | |
337 | /** |
338 | * vfio_ap_validate_nib - validate a notification indicator byte (nib) address. |
339 | * |
340 | * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. |
341 | * @nib: the location for storing the nib address. |
342 | * |
343 | * When the PQAP(AQIC) instruction is executed, general register 2 contains the |
344 | * address of the notification indicator byte (nib) used for IRQ notification. |
345 | * This function parses and validates the nib from gr2. |
346 | * |
347 | * Return: returns zero if the nib address is a valid; otherwise, returns |
348 | * -EINVAL. |
349 | */ |
350 | static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib) |
351 | { |
352 | *nib = vcpu->run->s.regs.gprs[2]; |
353 | |
354 | if (!*nib) |
355 | return -EINVAL; |
356 | if (kvm_is_error_hva(addr: gfn_to_hva(kvm: vcpu->kvm, gfn: *nib >> PAGE_SHIFT))) |
357 | return -EINVAL; |
358 | |
359 | return 0; |
360 | } |
361 | |
362 | static int ensure_nib_shared(unsigned long addr, struct gmap *gmap) |
363 | { |
364 | int ret; |
365 | |
366 | /* |
367 | * The nib has to be located in shared storage since guest and |
368 | * host access it. vfio_pin_pages() will do a pin shared and |
369 | * if that fails (possibly because it's not a shared page) it |
370 | * calls export. We try to do a second pin shared here so that |
371 | * the UV gives us an error code if we try to pin a non-shared |
372 | * page. |
373 | * |
374 | * If the page is already pinned shared the UV will return a success. |
375 | */ |
376 | ret = uv_pin_shared(addr); |
377 | if (ret) { |
378 | /* vfio_pin_pages() likely exported the page so let's re-import */ |
379 | gmap_convert_to_secure(gmap, addr); |
380 | } |
381 | return ret; |
382 | } |
383 | |
384 | /** |
385 | * vfio_ap_irq_enable - Enable Interruption for a APQN |
386 | * |
387 | * @q: the vfio_ap_queue holding AQIC parameters |
388 | * @isc: the guest ISC to register with the GIB interface |
389 | * @vcpu: the vcpu object containing the registers specifying the parameters |
390 | * passed to the PQAP(AQIC) instruction. |
391 | * |
392 | * Pin the NIB saved in *q |
393 | * Register the guest ISC to GIB interface and retrieve the |
394 | * host ISC to issue the host side PQAP/AQIC |
395 | * |
396 | * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the |
397 | * vfio_pin_pages failed. |
398 | * |
399 | * Otherwise return the ap_queue_status returned by the ap_aqic(), |
400 | * all retry handling will be done by the guest. |
401 | * |
402 | * Return: &struct ap_queue_status |
403 | */ |
404 | static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, |
405 | int isc, |
406 | struct kvm_vcpu *vcpu) |
407 | { |
408 | union ap_qirq_ctrl aqic_gisa = { .value = 0 }; |
409 | struct ap_queue_status status = {}; |
410 | struct kvm_s390_gisa *gisa; |
411 | struct page *h_page; |
412 | int nisc; |
413 | struct kvm *kvm; |
414 | phys_addr_t h_nib; |
415 | dma_addr_t nib; |
416 | int ret; |
417 | |
418 | /* Verify that the notification indicator byte address is valid */ |
419 | if (vfio_ap_validate_nib(vcpu, nib: &nib)) { |
420 | VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%pad, apqn=%#04x\n" , |
421 | __func__, &nib, q->apqn); |
422 | |
423 | status.response_code = AP_RESPONSE_INVALID_ADDRESS; |
424 | return status; |
425 | } |
426 | |
427 | ret = vfio_pin_pages(device: &q->matrix_mdev->vdev, iova: nib, npage: 1, |
428 | IOMMU_READ | IOMMU_WRITE, pages: &h_page); |
429 | switch (ret) { |
430 | case 1: |
431 | break; |
432 | default: |
433 | VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," |
434 | "nib=%pad, apqn=%#04x\n" , |
435 | __func__, ret, &nib, q->apqn); |
436 | |
437 | status.response_code = AP_RESPONSE_INVALID_ADDRESS; |
438 | return status; |
439 | } |
440 | |
441 | kvm = q->matrix_mdev->kvm; |
442 | gisa = kvm->arch.gisa_int.origin; |
443 | |
444 | h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK); |
445 | aqic_gisa.gisc = isc; |
446 | |
447 | /* NIB in non-shared storage is a rc 6 for PV guests */ |
448 | if (kvm_s390_pv_cpu_is_protected(vcpu) && |
449 | ensure_nib_shared(addr: h_nib & PAGE_MASK, gmap: kvm->arch.gmap)) { |
450 | vfio_unpin_pages(device: &q->matrix_mdev->vdev, iova: nib, npage: 1); |
451 | status.response_code = AP_RESPONSE_INVALID_ADDRESS; |
452 | return status; |
453 | } |
454 | |
455 | nisc = kvm_s390_gisc_register(kvm, isc); |
456 | if (nisc < 0) { |
457 | VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n" , |
458 | __func__, nisc, isc, q->apqn); |
459 | |
460 | status.response_code = AP_RESPONSE_INVALID_GISA; |
461 | return status; |
462 | } |
463 | |
464 | aqic_gisa.isc = nisc; |
465 | aqic_gisa.ir = 1; |
466 | aqic_gisa.gisa = virt_to_phys(address: gisa) >> 4; |
467 | |
468 | status = ap_aqic(q->apqn, aqic_gisa, h_nib); |
469 | switch (status.response_code) { |
470 | case AP_RESPONSE_NORMAL: |
471 | /* See if we did clear older IRQ configuration */ |
472 | vfio_ap_free_aqic_resources(q); |
473 | q->saved_iova = nib; |
474 | q->saved_isc = isc; |
475 | break; |
476 | case AP_RESPONSE_OTHERWISE_CHANGED: |
477 | /* We could not modify IRQ settings: clear new configuration */ |
478 | vfio_unpin_pages(device: &q->matrix_mdev->vdev, iova: nib, npage: 1); |
479 | kvm_s390_gisc_unregister(kvm, isc); |
480 | break; |
481 | default: |
482 | pr_warn("%s: apqn %04x: response: %02x\n" , __func__, q->apqn, |
483 | status.response_code); |
484 | vfio_ap_irq_disable(q); |
485 | break; |
486 | } |
487 | |
488 | if (status.response_code != AP_RESPONSE_NORMAL) { |
489 | VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: " |
490 | "zone=%#x, ir=%#x, gisc=%#x, f=%#x," |
491 | "gisa=%#x, isc=%#x, apqn=%#04x\n" , |
492 | __func__, status.response_code, |
493 | aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc, |
494 | aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc, |
495 | q->apqn); |
496 | } |
497 | |
498 | return status; |
499 | } |
500 | |
501 | /** |
502 | * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array |
503 | * of big endian elements that can be passed by |
504 | * value to an s390dbf sprintf event function to |
505 | * format a UUID string. |
506 | * |
507 | * @guid: the object containing the little endian guid |
508 | * @uuid: a six-element array of long values that can be passed by value as |
509 | * arguments for a formatting string specifying a UUID. |
510 | * |
511 | * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf |
512 | * event functions if the memory for the passed string is available as long as |
513 | * the debug feature exists. Since a mediated device can be removed at any |
514 | * time, it's name can not be used because %s passes the reference to the string |
515 | * in memory and the reference will go stale once the device is removed . |
516 | * |
517 | * The s390dbf string formatting function allows a maximum of 9 arguments for a |
518 | * message to be displayed in the 'sprintf' view. In order to use the bytes |
519 | * comprising the mediated device's UUID to display the mediated device name, |
520 | * they will have to be converted into an array whose elements can be passed by |
521 | * value to sprintf. For example: |
522 | * |
523 | * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 } |
524 | * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804 |
525 | * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 } |
526 | * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx" |
527 | */ |
528 | static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid) |
529 | { |
530 | /* |
531 | * The input guid is ordered in little endian, so it needs to be |
532 | * reordered for displaying a UUID as a string. This specifies the |
533 | * guid indices in proper order. |
534 | */ |
535 | uuid[0] = le32_to_cpup(p: (__le32 *)guid); |
536 | uuid[1] = le16_to_cpup(p: (__le16 *)&guid->b[4]); |
537 | uuid[2] = le16_to_cpup(p: (__le16 *)&guid->b[6]); |
538 | uuid[3] = *((__u16 *)&guid->b[8]); |
539 | uuid[4] = *((__u16 *)&guid->b[10]); |
540 | uuid[5] = *((__u32 *)&guid->b[12]); |
541 | } |
542 | |
543 | /** |
544 | * handle_pqap - PQAP instruction callback |
545 | * |
546 | * @vcpu: The vcpu on which we received the PQAP instruction |
547 | * |
548 | * Get the general register contents to initialize internal variables. |
549 | * REG[0]: APQN |
550 | * REG[1]: IR and ISC |
551 | * REG[2]: NIB |
552 | * |
553 | * Response.status may be set to following Response Code: |
554 | * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available |
555 | * - AP_RESPONSE_DECONFIGURED: if the queue is not configured |
556 | * - AP_RESPONSE_NORMAL (0) : in case of success |
557 | * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC. |
558 | * We take the matrix_dev lock to ensure serialization on queues and |
559 | * mediated device access. |
560 | * |
561 | * Return: 0 if we could handle the request inside KVM. |
562 | * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault. |
563 | */ |
564 | static int handle_pqap(struct kvm_vcpu *vcpu) |
565 | { |
566 | uint64_t status; |
567 | uint16_t apqn; |
568 | unsigned long uuid[6]; |
569 | struct vfio_ap_queue *q; |
570 | struct ap_queue_status qstatus = { |
571 | .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; |
572 | struct ap_matrix_mdev *matrix_mdev; |
573 | |
574 | apqn = vcpu->run->s.regs.gprs[0] & 0xffff; |
575 | |
576 | /* If we do not use the AIV facility just go to userland */ |
577 | if (!(vcpu->arch.sie_block->eca & ECA_AIV)) { |
578 | VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n" , |
579 | __func__, apqn, vcpu->arch.sie_block->eca); |
580 | |
581 | return -EOPNOTSUPP; |
582 | } |
583 | |
584 | mutex_lock(&matrix_dev->mdevs_lock); |
585 | |
586 | if (!vcpu->kvm->arch.crypto.pqap_hook) { |
587 | VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n" , |
588 | __func__, apqn); |
589 | |
590 | goto out_unlock; |
591 | } |
592 | |
593 | matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, |
594 | struct ap_matrix_mdev, pqap_hook); |
595 | |
596 | /* If the there is no guest using the mdev, there is nothing to do */ |
597 | if (!matrix_mdev->kvm) { |
598 | vfio_ap_le_guid_to_be_uuid(guid: &matrix_mdev->mdev->uuid, uuid); |
599 | VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n" , |
600 | __func__, uuid[0], uuid[1], uuid[2], |
601 | uuid[3], uuid[4], uuid[5], apqn); |
602 | goto out_unlock; |
603 | } |
604 | |
605 | q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); |
606 | if (!q) { |
607 | VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n" , |
608 | __func__, AP_QID_CARD(apqn), |
609 | AP_QID_QUEUE(apqn)); |
610 | goto out_unlock; |
611 | } |
612 | |
613 | status = vcpu->run->s.regs.gprs[1]; |
614 | |
615 | /* If IR bit(16) is set we enable the interrupt */ |
616 | if ((status >> (63 - 16)) & 0x01) |
617 | qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu); |
618 | else |
619 | qstatus = vfio_ap_irq_disable(q); |
620 | |
621 | out_unlock: |
622 | memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); |
623 | vcpu->run->s.regs.gprs[1] >>= 32; |
624 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
625 | return 0; |
626 | } |
627 | |
628 | static void vfio_ap_matrix_init(struct ap_config_info *info, |
629 | struct ap_matrix *matrix) |
630 | { |
631 | matrix->apm_max = info->apxa ? info->na : 63; |
632 | matrix->aqm_max = info->apxa ? info->nd : 15; |
633 | matrix->adm_max = info->apxa ? info->nd : 15; |
634 | } |
635 | |
636 | static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev *matrix_mdev) |
637 | { |
638 | if (matrix_mdev->kvm) |
639 | kvm_arch_crypto_set_masks(matrix_mdev->kvm, |
640 | matrix_mdev->shadow_apcb.apm, |
641 | matrix_mdev->shadow_apcb.aqm, |
642 | matrix_mdev->shadow_apcb.adm); |
643 | } |
644 | |
645 | static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) |
646 | { |
647 | DECLARE_BITMAP(prev_shadow_adm, AP_DOMAINS); |
648 | |
649 | bitmap_copy(dst: prev_shadow_adm, src: matrix_mdev->shadow_apcb.adm, AP_DOMAINS); |
650 | bitmap_and(dst: matrix_mdev->shadow_apcb.adm, src1: matrix_mdev->matrix.adm, |
651 | src2: (unsigned long *)matrix_dev->info.adm, AP_DOMAINS); |
652 | |
653 | return !bitmap_equal(src1: prev_shadow_adm, src2: matrix_mdev->shadow_apcb.adm, |
654 | AP_DOMAINS); |
655 | } |
656 | |
657 | /* |
658 | * vfio_ap_mdev_filter_matrix - filter the APQNs assigned to the matrix mdev |
659 | * to ensure no queue devices are passed through to |
660 | * the guest that are not bound to the vfio_ap |
661 | * device driver. |
662 | * |
663 | * @matrix_mdev: the matrix mdev whose matrix is to be filtered. |
664 | * |
665 | * Note: If an APQN referencing a queue device that is not bound to the vfio_ap |
666 | * driver, its APID will be filtered from the guest's APCB. The matrix |
667 | * structure precludes filtering an individual APQN, so its APID will be |
668 | * filtered. |
669 | * |
670 | * Return: a boolean value indicating whether the KVM guest's APCB was changed |
671 | * by the filtering or not. |
672 | */ |
673 | static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, |
674 | struct ap_matrix_mdev *matrix_mdev) |
675 | { |
676 | unsigned long apid, apqi, apqn; |
677 | DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES); |
678 | DECLARE_BITMAP(prev_shadow_aqm, AP_DOMAINS); |
679 | struct vfio_ap_queue *q; |
680 | |
681 | bitmap_copy(dst: prev_shadow_apm, src: matrix_mdev->shadow_apcb.apm, AP_DEVICES); |
682 | bitmap_copy(dst: prev_shadow_aqm, src: matrix_mdev->shadow_apcb.aqm, AP_DOMAINS); |
683 | vfio_ap_matrix_init(info: &matrix_dev->info, matrix: &matrix_mdev->shadow_apcb); |
684 | |
685 | /* |
686 | * Copy the adapters, domains and control domains to the shadow_apcb |
687 | * from the matrix mdev, but only those that are assigned to the host's |
688 | * AP configuration. |
689 | */ |
690 | bitmap_and(dst: matrix_mdev->shadow_apcb.apm, src1: matrix_mdev->matrix.apm, |
691 | src2: (unsigned long *)matrix_dev->info.apm, AP_DEVICES); |
692 | bitmap_and(dst: matrix_mdev->shadow_apcb.aqm, src1: matrix_mdev->matrix.aqm, |
693 | src2: (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); |
694 | |
695 | for_each_set_bit_inv(apid, apm, AP_DEVICES) { |
696 | for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { |
697 | /* |
698 | * If the APQN is not bound to the vfio_ap device |
699 | * driver, then we can't assign it to the guest's |
700 | * AP configuration. The AP architecture won't |
701 | * allow filtering of a single APQN, so let's filter |
702 | * the APID since an adapter represents a physical |
703 | * hardware device. |
704 | */ |
705 | apqn = AP_MKQID(apid, apqi); |
706 | q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); |
707 | if (!q || q->reset_status.response_code) { |
708 | clear_bit_inv(apid, |
709 | matrix_mdev->shadow_apcb.apm); |
710 | break; |
711 | } |
712 | } |
713 | } |
714 | |
715 | return !bitmap_equal(src1: prev_shadow_apm, src2: matrix_mdev->shadow_apcb.apm, |
716 | AP_DEVICES) || |
717 | !bitmap_equal(src1: prev_shadow_aqm, src2: matrix_mdev->shadow_apcb.aqm, |
718 | AP_DOMAINS); |
719 | } |
720 | |
721 | static int vfio_ap_mdev_init_dev(struct vfio_device *vdev) |
722 | { |
723 | struct ap_matrix_mdev *matrix_mdev = |
724 | container_of(vdev, struct ap_matrix_mdev, vdev); |
725 | |
726 | matrix_mdev->mdev = to_mdev_device(dev: vdev->dev); |
727 | vfio_ap_matrix_init(info: &matrix_dev->info, matrix: &matrix_mdev->matrix); |
728 | matrix_mdev->pqap_hook = handle_pqap; |
729 | vfio_ap_matrix_init(info: &matrix_dev->info, matrix: &matrix_mdev->shadow_apcb); |
730 | hash_init(matrix_mdev->qtable.queues); |
731 | |
732 | return 0; |
733 | } |
734 | |
735 | static int vfio_ap_mdev_probe(struct mdev_device *mdev) |
736 | { |
737 | struct ap_matrix_mdev *matrix_mdev; |
738 | int ret; |
739 | |
740 | matrix_mdev = vfio_alloc_device(ap_matrix_mdev, vdev, &mdev->dev, |
741 | &vfio_ap_matrix_dev_ops); |
742 | if (IS_ERR(ptr: matrix_mdev)) |
743 | return PTR_ERR(ptr: matrix_mdev); |
744 | |
745 | ret = vfio_register_emulated_iommu_dev(device: &matrix_mdev->vdev); |
746 | if (ret) |
747 | goto err_put_vdev; |
748 | matrix_mdev->req_trigger = NULL; |
749 | dev_set_drvdata(dev: &mdev->dev, data: matrix_mdev); |
750 | mutex_lock(&matrix_dev->mdevs_lock); |
751 | list_add(new: &matrix_mdev->node, head: &matrix_dev->mdev_list); |
752 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
753 | return 0; |
754 | |
755 | err_put_vdev: |
756 | vfio_put_device(device: &matrix_mdev->vdev); |
757 | return ret; |
758 | } |
759 | |
760 | static void vfio_ap_mdev_link_queue(struct ap_matrix_mdev *matrix_mdev, |
761 | struct vfio_ap_queue *q) |
762 | { |
763 | if (q) { |
764 | q->matrix_mdev = matrix_mdev; |
765 | hash_add(matrix_mdev->qtable.queues, &q->mdev_qnode, q->apqn); |
766 | } |
767 | } |
768 | |
769 | static void vfio_ap_mdev_link_apqn(struct ap_matrix_mdev *matrix_mdev, int apqn) |
770 | { |
771 | struct vfio_ap_queue *q; |
772 | |
773 | q = vfio_ap_find_queue(apqn); |
774 | vfio_ap_mdev_link_queue(matrix_mdev, q); |
775 | } |
776 | |
777 | static void vfio_ap_unlink_queue_fr_mdev(struct vfio_ap_queue *q) |
778 | { |
779 | hash_del(node: &q->mdev_qnode); |
780 | } |
781 | |
782 | static void vfio_ap_unlink_mdev_fr_queue(struct vfio_ap_queue *q) |
783 | { |
784 | q->matrix_mdev = NULL; |
785 | } |
786 | |
787 | static void vfio_ap_mdev_unlink_fr_queues(struct ap_matrix_mdev *matrix_mdev) |
788 | { |
789 | struct vfio_ap_queue *q; |
790 | unsigned long apid, apqi; |
791 | |
792 | for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { |
793 | for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, |
794 | AP_DOMAINS) { |
795 | q = vfio_ap_mdev_get_queue(matrix_mdev, |
796 | apqn: AP_MKQID(apid, apqi)); |
797 | if (q) |
798 | q->matrix_mdev = NULL; |
799 | } |
800 | } |
801 | } |
802 | |
803 | static void vfio_ap_mdev_remove(struct mdev_device *mdev) |
804 | { |
805 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev: &mdev->dev); |
806 | |
807 | vfio_unregister_group_dev(device: &matrix_mdev->vdev); |
808 | |
809 | mutex_lock(&matrix_dev->guests_lock); |
810 | mutex_lock(&matrix_dev->mdevs_lock); |
811 | vfio_ap_mdev_reset_queues(qtable: &matrix_mdev->qtable); |
812 | vfio_ap_mdev_unlink_fr_queues(matrix_mdev); |
813 | list_del(entry: &matrix_mdev->node); |
814 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
815 | mutex_unlock(lock: &matrix_dev->guests_lock); |
816 | vfio_put_device(device: &matrix_mdev->vdev); |
817 | } |
818 | |
819 | #define MDEV_SHARING_ERR "Userspace may not re-assign queue %02lx.%04lx " \ |
820 | "already assigned to %s" |
821 | |
822 | static void vfio_ap_mdev_log_sharing_err(struct ap_matrix_mdev *matrix_mdev, |
823 | unsigned long *apm, |
824 | unsigned long *aqm) |
825 | { |
826 | unsigned long apid, apqi; |
827 | const struct device *dev = mdev_dev(mdev: matrix_mdev->mdev); |
828 | const char *mdev_name = dev_name(dev); |
829 | |
830 | for_each_set_bit_inv(apid, apm, AP_DEVICES) |
831 | for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) |
832 | dev_warn(dev, MDEV_SHARING_ERR, apid, apqi, mdev_name); |
833 | } |
834 | |
835 | /** |
836 | * vfio_ap_mdev_verify_no_sharing - verify APQNs are not shared by matrix mdevs |
837 | * |
838 | * @mdev_apm: mask indicating the APIDs of the APQNs to be verified |
839 | * @mdev_aqm: mask indicating the APQIs of the APQNs to be verified |
840 | * |
841 | * Verifies that each APQN derived from the Cartesian product of a bitmap of |
842 | * AP adapter IDs and AP queue indexes is not configured for any matrix |
843 | * mediated device. AP queue sharing is not allowed. |
844 | * |
845 | * Return: 0 if the APQNs are not shared; otherwise return -EADDRINUSE. |
846 | */ |
847 | static int vfio_ap_mdev_verify_no_sharing(unsigned long *mdev_apm, |
848 | unsigned long *mdev_aqm) |
849 | { |
850 | struct ap_matrix_mdev *matrix_mdev; |
851 | DECLARE_BITMAP(apm, AP_DEVICES); |
852 | DECLARE_BITMAP(aqm, AP_DOMAINS); |
853 | |
854 | list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { |
855 | /* |
856 | * If the input apm and aqm are fields of the matrix_mdev |
857 | * object, then move on to the next matrix_mdev. |
858 | */ |
859 | if (mdev_apm == matrix_mdev->matrix.apm && |
860 | mdev_aqm == matrix_mdev->matrix.aqm) |
861 | continue; |
862 | |
863 | memset(apm, 0, sizeof(apm)); |
864 | memset(aqm, 0, sizeof(aqm)); |
865 | |
866 | /* |
867 | * We work on full longs, as we can only exclude the leftover |
868 | * bits in non-inverse order. The leftover is all zeros. |
869 | */ |
870 | if (!bitmap_and(dst: apm, src1: mdev_apm, src2: matrix_mdev->matrix.apm, |
871 | AP_DEVICES)) |
872 | continue; |
873 | |
874 | if (!bitmap_and(dst: aqm, src1: mdev_aqm, src2: matrix_mdev->matrix.aqm, |
875 | AP_DOMAINS)) |
876 | continue; |
877 | |
878 | vfio_ap_mdev_log_sharing_err(matrix_mdev, apm, aqm); |
879 | |
880 | return -EADDRINUSE; |
881 | } |
882 | |
883 | return 0; |
884 | } |
885 | |
886 | /** |
887 | * vfio_ap_mdev_validate_masks - verify that the APQNs assigned to the mdev are |
888 | * not reserved for the default zcrypt driver and |
889 | * are not assigned to another mdev. |
890 | * |
891 | * @matrix_mdev: the mdev to which the APQNs being validated are assigned. |
892 | * |
893 | * Return: One of the following values: |
894 | * o the error returned from the ap_apqn_in_matrix_owned_by_def_drv() function, |
895 | * most likely -EBUSY indicating the ap_perms_mutex lock is already held. |
896 | * o EADDRNOTAVAIL if an APQN assigned to @matrix_mdev is reserved for the |
897 | * zcrypt default driver. |
898 | * o EADDRINUSE if an APQN assigned to @matrix_mdev is assigned to another mdev |
899 | * o A zero indicating validation succeeded. |
900 | */ |
901 | static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev) |
902 | { |
903 | if (ap_apqn_in_matrix_owned_by_def_drv(apm: matrix_mdev->matrix.apm, |
904 | aqm: matrix_mdev->matrix.aqm)) |
905 | return -EADDRNOTAVAIL; |
906 | |
907 | return vfio_ap_mdev_verify_no_sharing(mdev_apm: matrix_mdev->matrix.apm, |
908 | mdev_aqm: matrix_mdev->matrix.aqm); |
909 | } |
910 | |
911 | static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev, |
912 | unsigned long apid) |
913 | { |
914 | unsigned long apqi; |
915 | |
916 | for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) |
917 | vfio_ap_mdev_link_apqn(matrix_mdev, |
918 | apqn: AP_MKQID(apid, apqi)); |
919 | } |
920 | |
921 | /** |
922 | * assign_adapter_store - parses the APID from @buf and sets the |
923 | * corresponding bit in the mediated matrix device's APM |
924 | * |
925 | * @dev: the matrix device |
926 | * @attr: the mediated matrix device's assign_adapter attribute |
927 | * @buf: a buffer containing the AP adapter number (APID) to |
928 | * be assigned |
929 | * @count: the number of bytes in @buf |
930 | * |
931 | * Return: the number of bytes processed if the APID is valid; otherwise, |
932 | * returns one of the following errors: |
933 | * |
934 | * 1. -EINVAL |
935 | * The APID is not a valid number |
936 | * |
937 | * 2. -ENODEV |
938 | * The APID exceeds the maximum value configured for the system |
939 | * |
940 | * 3. -EADDRNOTAVAIL |
941 | * An APQN derived from the cross product of the APID being assigned |
942 | * and the APQIs previously assigned is not bound to the vfio_ap device |
943 | * driver; or, if no APQIs have yet been assigned, the APID is not |
944 | * contained in an APQN bound to the vfio_ap device driver. |
945 | * |
946 | * 4. -EADDRINUSE |
947 | * An APQN derived from the cross product of the APID being assigned |
948 | * and the APQIs previously assigned is being used by another mediated |
949 | * matrix device |
950 | * |
951 | * 5. -EAGAIN |
952 | * A lock required to validate the mdev's AP configuration could not |
953 | * be obtained. |
954 | */ |
955 | static ssize_t assign_adapter_store(struct device *dev, |
956 | struct device_attribute *attr, |
957 | const char *buf, size_t count) |
958 | { |
959 | int ret; |
960 | unsigned long apid; |
961 | DECLARE_BITMAP(apm_delta, AP_DEVICES); |
962 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
963 | |
964 | mutex_lock(&ap_perms_mutex); |
965 | get_update_locks_for_mdev(matrix_mdev); |
966 | |
967 | ret = kstrtoul(s: buf, base: 0, res: &apid); |
968 | if (ret) |
969 | goto done; |
970 | |
971 | if (apid > matrix_mdev->matrix.apm_max) { |
972 | ret = -ENODEV; |
973 | goto done; |
974 | } |
975 | |
976 | if (test_bit_inv(apid, matrix_mdev->matrix.apm)) { |
977 | ret = count; |
978 | goto done; |
979 | } |
980 | |
981 | set_bit_inv(apid, matrix_mdev->matrix.apm); |
982 | |
983 | ret = vfio_ap_mdev_validate_masks(matrix_mdev); |
984 | if (ret) { |
985 | clear_bit_inv(apid, matrix_mdev->matrix.apm); |
986 | goto done; |
987 | } |
988 | |
989 | vfio_ap_mdev_link_adapter(matrix_mdev, apid); |
990 | memset(apm_delta, 0, sizeof(apm_delta)); |
991 | set_bit_inv(apid, apm_delta); |
992 | |
993 | if (vfio_ap_mdev_filter_matrix(apm: apm_delta, |
994 | aqm: matrix_mdev->matrix.aqm, matrix_mdev)) |
995 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
996 | |
997 | ret = count; |
998 | done: |
999 | release_update_locks_for_mdev(matrix_mdev); |
1000 | mutex_unlock(lock: &ap_perms_mutex); |
1001 | |
1002 | return ret; |
1003 | } |
1004 | static DEVICE_ATTR_WO(assign_adapter); |
1005 | |
1006 | static struct vfio_ap_queue |
1007 | *vfio_ap_unlink_apqn_fr_mdev(struct ap_matrix_mdev *matrix_mdev, |
1008 | unsigned long apid, unsigned long apqi) |
1009 | { |
1010 | struct vfio_ap_queue *q = NULL; |
1011 | |
1012 | q = vfio_ap_mdev_get_queue(matrix_mdev, apqn: AP_MKQID(apid, apqi)); |
1013 | /* If the queue is assigned to the matrix mdev, unlink it. */ |
1014 | if (q) |
1015 | vfio_ap_unlink_queue_fr_mdev(q); |
1016 | |
1017 | return q; |
1018 | } |
1019 | |
1020 | /** |
1021 | * vfio_ap_mdev_unlink_adapter - unlink all queues associated with unassigned |
1022 | * adapter from the matrix mdev to which the |
1023 | * adapter was assigned. |
1024 | * @matrix_mdev: the matrix mediated device to which the adapter was assigned. |
1025 | * @apid: the APID of the unassigned adapter. |
1026 | * @qtable: table for storing queues associated with unassigned adapter. |
1027 | */ |
1028 | static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, |
1029 | unsigned long apid, |
1030 | struct ap_queue_table *qtable) |
1031 | { |
1032 | unsigned long apqi; |
1033 | struct vfio_ap_queue *q; |
1034 | |
1035 | for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) { |
1036 | q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); |
1037 | |
1038 | if (q && qtable) { |
1039 | if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && |
1040 | test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) |
1041 | hash_add(qtable->queues, &q->mdev_qnode, |
1042 | q->apqn); |
1043 | } |
1044 | } |
1045 | } |
1046 | |
1047 | static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev, |
1048 | unsigned long apid) |
1049 | { |
1050 | int loop_cursor; |
1051 | struct vfio_ap_queue *q; |
1052 | struct ap_queue_table *qtable = kzalloc(size: sizeof(*qtable), GFP_KERNEL); |
1053 | |
1054 | hash_init(qtable->queues); |
1055 | vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable); |
1056 | |
1057 | if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) { |
1058 | clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); |
1059 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
1060 | } |
1061 | |
1062 | vfio_ap_mdev_reset_queues(qtable); |
1063 | |
1064 | hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { |
1065 | vfio_ap_unlink_mdev_fr_queue(q); |
1066 | hash_del(node: &q->mdev_qnode); |
1067 | } |
1068 | |
1069 | kfree(objp: qtable); |
1070 | } |
1071 | |
1072 | /** |
1073 | * unassign_adapter_store - parses the APID from @buf and clears the |
1074 | * corresponding bit in the mediated matrix device's APM |
1075 | * |
1076 | * @dev: the matrix device |
1077 | * @attr: the mediated matrix device's unassign_adapter attribute |
1078 | * @buf: a buffer containing the adapter number (APID) to be unassigned |
1079 | * @count: the number of bytes in @buf |
1080 | * |
1081 | * Return: the number of bytes processed if the APID is valid; otherwise, |
1082 | * returns one of the following errors: |
1083 | * -EINVAL if the APID is not a number |
1084 | * -ENODEV if the APID it exceeds the maximum value configured for the |
1085 | * system |
1086 | */ |
1087 | static ssize_t unassign_adapter_store(struct device *dev, |
1088 | struct device_attribute *attr, |
1089 | const char *buf, size_t count) |
1090 | { |
1091 | int ret; |
1092 | unsigned long apid; |
1093 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1094 | |
1095 | get_update_locks_for_mdev(matrix_mdev); |
1096 | |
1097 | ret = kstrtoul(s: buf, base: 0, res: &apid); |
1098 | if (ret) |
1099 | goto done; |
1100 | |
1101 | if (apid > matrix_mdev->matrix.apm_max) { |
1102 | ret = -ENODEV; |
1103 | goto done; |
1104 | } |
1105 | |
1106 | if (!test_bit_inv(apid, matrix_mdev->matrix.apm)) { |
1107 | ret = count; |
1108 | goto done; |
1109 | } |
1110 | |
1111 | clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm); |
1112 | vfio_ap_mdev_hot_unplug_adapter(matrix_mdev, apid); |
1113 | ret = count; |
1114 | done: |
1115 | release_update_locks_for_mdev(matrix_mdev); |
1116 | return ret; |
1117 | } |
1118 | static DEVICE_ATTR_WO(unassign_adapter); |
1119 | |
1120 | static void vfio_ap_mdev_link_domain(struct ap_matrix_mdev *matrix_mdev, |
1121 | unsigned long apqi) |
1122 | { |
1123 | unsigned long apid; |
1124 | |
1125 | for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) |
1126 | vfio_ap_mdev_link_apqn(matrix_mdev, |
1127 | apqn: AP_MKQID(apid, apqi)); |
1128 | } |
1129 | |
1130 | /** |
1131 | * assign_domain_store - parses the APQI from @buf and sets the |
1132 | * corresponding bit in the mediated matrix device's AQM |
1133 | * |
1134 | * @dev: the matrix device |
1135 | * @attr: the mediated matrix device's assign_domain attribute |
1136 | * @buf: a buffer containing the AP queue index (APQI) of the domain to |
1137 | * be assigned |
1138 | * @count: the number of bytes in @buf |
1139 | * |
1140 | * Return: the number of bytes processed if the APQI is valid; otherwise returns |
1141 | * one of the following errors: |
1142 | * |
1143 | * 1. -EINVAL |
1144 | * The APQI is not a valid number |
1145 | * |
1146 | * 2. -ENODEV |
1147 | * The APQI exceeds the maximum value configured for the system |
1148 | * |
1149 | * 3. -EADDRNOTAVAIL |
1150 | * An APQN derived from the cross product of the APQI being assigned |
1151 | * and the APIDs previously assigned is not bound to the vfio_ap device |
1152 | * driver; or, if no APIDs have yet been assigned, the APQI is not |
1153 | * contained in an APQN bound to the vfio_ap device driver. |
1154 | * |
1155 | * 4. -EADDRINUSE |
1156 | * An APQN derived from the cross product of the APQI being assigned |
1157 | * and the APIDs previously assigned is being used by another mediated |
1158 | * matrix device |
1159 | * |
1160 | * 5. -EAGAIN |
1161 | * The lock required to validate the mdev's AP configuration could not |
1162 | * be obtained. |
1163 | */ |
1164 | static ssize_t assign_domain_store(struct device *dev, |
1165 | struct device_attribute *attr, |
1166 | const char *buf, size_t count) |
1167 | { |
1168 | int ret; |
1169 | unsigned long apqi; |
1170 | DECLARE_BITMAP(aqm_delta, AP_DOMAINS); |
1171 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1172 | |
1173 | mutex_lock(&ap_perms_mutex); |
1174 | get_update_locks_for_mdev(matrix_mdev); |
1175 | |
1176 | ret = kstrtoul(s: buf, base: 0, res: &apqi); |
1177 | if (ret) |
1178 | goto done; |
1179 | |
1180 | if (apqi > matrix_mdev->matrix.aqm_max) { |
1181 | ret = -ENODEV; |
1182 | goto done; |
1183 | } |
1184 | |
1185 | if (test_bit_inv(apqi, matrix_mdev->matrix.aqm)) { |
1186 | ret = count; |
1187 | goto done; |
1188 | } |
1189 | |
1190 | set_bit_inv(apqi, matrix_mdev->matrix.aqm); |
1191 | |
1192 | ret = vfio_ap_mdev_validate_masks(matrix_mdev); |
1193 | if (ret) { |
1194 | clear_bit_inv(apqi, matrix_mdev->matrix.aqm); |
1195 | goto done; |
1196 | } |
1197 | |
1198 | vfio_ap_mdev_link_domain(matrix_mdev, apqi); |
1199 | memset(aqm_delta, 0, sizeof(aqm_delta)); |
1200 | set_bit_inv(apqi, aqm_delta); |
1201 | |
1202 | if (vfio_ap_mdev_filter_matrix(apm: matrix_mdev->matrix.apm, aqm: aqm_delta, |
1203 | matrix_mdev)) |
1204 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
1205 | |
1206 | ret = count; |
1207 | done: |
1208 | release_update_locks_for_mdev(matrix_mdev); |
1209 | mutex_unlock(lock: &ap_perms_mutex); |
1210 | |
1211 | return ret; |
1212 | } |
1213 | static DEVICE_ATTR_WO(assign_domain); |
1214 | |
1215 | static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, |
1216 | unsigned long apqi, |
1217 | struct ap_queue_table *qtable) |
1218 | { |
1219 | unsigned long apid; |
1220 | struct vfio_ap_queue *q; |
1221 | |
1222 | for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { |
1223 | q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); |
1224 | |
1225 | if (q && qtable) { |
1226 | if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && |
1227 | test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) |
1228 | hash_add(qtable->queues, &q->mdev_qnode, |
1229 | q->apqn); |
1230 | } |
1231 | } |
1232 | } |
1233 | |
1234 | static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev, |
1235 | unsigned long apqi) |
1236 | { |
1237 | int loop_cursor; |
1238 | struct vfio_ap_queue *q; |
1239 | struct ap_queue_table *qtable = kzalloc(size: sizeof(*qtable), GFP_KERNEL); |
1240 | |
1241 | hash_init(qtable->queues); |
1242 | vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable); |
1243 | |
1244 | if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { |
1245 | clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm); |
1246 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
1247 | } |
1248 | |
1249 | vfio_ap_mdev_reset_queues(qtable); |
1250 | |
1251 | hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { |
1252 | vfio_ap_unlink_mdev_fr_queue(q); |
1253 | hash_del(node: &q->mdev_qnode); |
1254 | } |
1255 | |
1256 | kfree(objp: qtable); |
1257 | } |
1258 | |
1259 | /** |
1260 | * unassign_domain_store - parses the APQI from @buf and clears the |
1261 | * corresponding bit in the mediated matrix device's AQM |
1262 | * |
1263 | * @dev: the matrix device |
1264 | * @attr: the mediated matrix device's unassign_domain attribute |
1265 | * @buf: a buffer containing the AP queue index (APQI) of the domain to |
1266 | * be unassigned |
1267 | * @count: the number of bytes in @buf |
1268 | * |
1269 | * Return: the number of bytes processed if the APQI is valid; otherwise, |
1270 | * returns one of the following errors: |
1271 | * -EINVAL if the APQI is not a number |
1272 | * -ENODEV if the APQI exceeds the maximum value configured for the system |
1273 | */ |
1274 | static ssize_t unassign_domain_store(struct device *dev, |
1275 | struct device_attribute *attr, |
1276 | const char *buf, size_t count) |
1277 | { |
1278 | int ret; |
1279 | unsigned long apqi; |
1280 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1281 | |
1282 | get_update_locks_for_mdev(matrix_mdev); |
1283 | |
1284 | ret = kstrtoul(s: buf, base: 0, res: &apqi); |
1285 | if (ret) |
1286 | goto done; |
1287 | |
1288 | if (apqi > matrix_mdev->matrix.aqm_max) { |
1289 | ret = -ENODEV; |
1290 | goto done; |
1291 | } |
1292 | |
1293 | if (!test_bit_inv(apqi, matrix_mdev->matrix.aqm)) { |
1294 | ret = count; |
1295 | goto done; |
1296 | } |
1297 | |
1298 | clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm); |
1299 | vfio_ap_mdev_hot_unplug_domain(matrix_mdev, apqi); |
1300 | ret = count; |
1301 | |
1302 | done: |
1303 | release_update_locks_for_mdev(matrix_mdev); |
1304 | return ret; |
1305 | } |
1306 | static DEVICE_ATTR_WO(unassign_domain); |
1307 | |
1308 | /** |
1309 | * assign_control_domain_store - parses the domain ID from @buf and sets |
1310 | * the corresponding bit in the mediated matrix device's ADM |
1311 | * |
1312 | * @dev: the matrix device |
1313 | * @attr: the mediated matrix device's assign_control_domain attribute |
1314 | * @buf: a buffer containing the domain ID to be assigned |
1315 | * @count: the number of bytes in @buf |
1316 | * |
1317 | * Return: the number of bytes processed if the domain ID is valid; otherwise, |
1318 | * returns one of the following errors: |
1319 | * -EINVAL if the ID is not a number |
1320 | * -ENODEV if the ID exceeds the maximum value configured for the system |
1321 | */ |
1322 | static ssize_t assign_control_domain_store(struct device *dev, |
1323 | struct device_attribute *attr, |
1324 | const char *buf, size_t count) |
1325 | { |
1326 | int ret; |
1327 | unsigned long id; |
1328 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1329 | |
1330 | get_update_locks_for_mdev(matrix_mdev); |
1331 | |
1332 | ret = kstrtoul(s: buf, base: 0, res: &id); |
1333 | if (ret) |
1334 | goto done; |
1335 | |
1336 | if (id > matrix_mdev->matrix.adm_max) { |
1337 | ret = -ENODEV; |
1338 | goto done; |
1339 | } |
1340 | |
1341 | if (test_bit_inv(id, matrix_mdev->matrix.adm)) { |
1342 | ret = count; |
1343 | goto done; |
1344 | } |
1345 | |
1346 | /* Set the bit in the ADM (bitmask) corresponding to the AP control |
1347 | * domain number (id). The bits in the mask, from most significant to |
1348 | * least significant, correspond to IDs 0 up to the one less than the |
1349 | * number of control domains that can be assigned. |
1350 | */ |
1351 | set_bit_inv(id, matrix_mdev->matrix.adm); |
1352 | if (vfio_ap_mdev_filter_cdoms(matrix_mdev)) |
1353 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
1354 | |
1355 | ret = count; |
1356 | done: |
1357 | release_update_locks_for_mdev(matrix_mdev); |
1358 | return ret; |
1359 | } |
1360 | static DEVICE_ATTR_WO(assign_control_domain); |
1361 | |
1362 | /** |
1363 | * unassign_control_domain_store - parses the domain ID from @buf and |
1364 | * clears the corresponding bit in the mediated matrix device's ADM |
1365 | * |
1366 | * @dev: the matrix device |
1367 | * @attr: the mediated matrix device's unassign_control_domain attribute |
1368 | * @buf: a buffer containing the domain ID to be unassigned |
1369 | * @count: the number of bytes in @buf |
1370 | * |
1371 | * Return: the number of bytes processed if the domain ID is valid; otherwise, |
1372 | * returns one of the following errors: |
1373 | * -EINVAL if the ID is not a number |
1374 | * -ENODEV if the ID exceeds the maximum value configured for the system |
1375 | */ |
1376 | static ssize_t unassign_control_domain_store(struct device *dev, |
1377 | struct device_attribute *attr, |
1378 | const char *buf, size_t count) |
1379 | { |
1380 | int ret; |
1381 | unsigned long domid; |
1382 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1383 | |
1384 | get_update_locks_for_mdev(matrix_mdev); |
1385 | |
1386 | ret = kstrtoul(s: buf, base: 0, res: &domid); |
1387 | if (ret) |
1388 | goto done; |
1389 | |
1390 | if (domid > matrix_mdev->matrix.adm_max) { |
1391 | ret = -ENODEV; |
1392 | goto done; |
1393 | } |
1394 | |
1395 | if (!test_bit_inv(domid, matrix_mdev->matrix.adm)) { |
1396 | ret = count; |
1397 | goto done; |
1398 | } |
1399 | |
1400 | clear_bit_inv(domid, matrix_mdev->matrix.adm); |
1401 | |
1402 | if (test_bit_inv(domid, matrix_mdev->shadow_apcb.adm)) { |
1403 | clear_bit_inv(domid, matrix_mdev->shadow_apcb.adm); |
1404 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
1405 | } |
1406 | |
1407 | ret = count; |
1408 | done: |
1409 | release_update_locks_for_mdev(matrix_mdev); |
1410 | return ret; |
1411 | } |
1412 | static DEVICE_ATTR_WO(unassign_control_domain); |
1413 | |
1414 | static ssize_t control_domains_show(struct device *dev, |
1415 | struct device_attribute *dev_attr, |
1416 | char *buf) |
1417 | { |
1418 | unsigned long id; |
1419 | int nchars = 0; |
1420 | int n; |
1421 | char *bufpos = buf; |
1422 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1423 | unsigned long max_domid = matrix_mdev->matrix.adm_max; |
1424 | |
1425 | mutex_lock(&matrix_dev->mdevs_lock); |
1426 | for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) { |
1427 | n = sprintf(buf: bufpos, fmt: "%04lx\n" , id); |
1428 | bufpos += n; |
1429 | nchars += n; |
1430 | } |
1431 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
1432 | |
1433 | return nchars; |
1434 | } |
1435 | static DEVICE_ATTR_RO(control_domains); |
1436 | |
1437 | static ssize_t vfio_ap_mdev_matrix_show(struct ap_matrix *matrix, char *buf) |
1438 | { |
1439 | char *bufpos = buf; |
1440 | unsigned long apid; |
1441 | unsigned long apqi; |
1442 | unsigned long apid1; |
1443 | unsigned long apqi1; |
1444 | unsigned long napm_bits = matrix->apm_max + 1; |
1445 | unsigned long naqm_bits = matrix->aqm_max + 1; |
1446 | int nchars = 0; |
1447 | int n; |
1448 | |
1449 | apid1 = find_first_bit_inv(matrix->apm, napm_bits); |
1450 | apqi1 = find_first_bit_inv(matrix->aqm, naqm_bits); |
1451 | |
1452 | if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) { |
1453 | for_each_set_bit_inv(apid, matrix->apm, napm_bits) { |
1454 | for_each_set_bit_inv(apqi, matrix->aqm, |
1455 | naqm_bits) { |
1456 | n = sprintf(buf: bufpos, fmt: "%02lx.%04lx\n" , apid, |
1457 | apqi); |
1458 | bufpos += n; |
1459 | nchars += n; |
1460 | } |
1461 | } |
1462 | } else if (apid1 < napm_bits) { |
1463 | for_each_set_bit_inv(apid, matrix->apm, napm_bits) { |
1464 | n = sprintf(buf: bufpos, fmt: "%02lx.\n" , apid); |
1465 | bufpos += n; |
1466 | nchars += n; |
1467 | } |
1468 | } else if (apqi1 < naqm_bits) { |
1469 | for_each_set_bit_inv(apqi, matrix->aqm, naqm_bits) { |
1470 | n = sprintf(buf: bufpos, fmt: ".%04lx\n" , apqi); |
1471 | bufpos += n; |
1472 | nchars += n; |
1473 | } |
1474 | } |
1475 | |
1476 | return nchars; |
1477 | } |
1478 | |
1479 | static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, |
1480 | char *buf) |
1481 | { |
1482 | ssize_t nchars; |
1483 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1484 | |
1485 | mutex_lock(&matrix_dev->mdevs_lock); |
1486 | nchars = vfio_ap_mdev_matrix_show(matrix: &matrix_mdev->matrix, buf); |
1487 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
1488 | |
1489 | return nchars; |
1490 | } |
1491 | static DEVICE_ATTR_RO(matrix); |
1492 | |
1493 | static ssize_t guest_matrix_show(struct device *dev, |
1494 | struct device_attribute *attr, char *buf) |
1495 | { |
1496 | ssize_t nchars; |
1497 | struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); |
1498 | |
1499 | mutex_lock(&matrix_dev->mdevs_lock); |
1500 | nchars = vfio_ap_mdev_matrix_show(matrix: &matrix_mdev->shadow_apcb, buf); |
1501 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
1502 | |
1503 | return nchars; |
1504 | } |
1505 | static DEVICE_ATTR_RO(guest_matrix); |
1506 | |
1507 | static struct attribute *vfio_ap_mdev_attrs[] = { |
1508 | &dev_attr_assign_adapter.attr, |
1509 | &dev_attr_unassign_adapter.attr, |
1510 | &dev_attr_assign_domain.attr, |
1511 | &dev_attr_unassign_domain.attr, |
1512 | &dev_attr_assign_control_domain.attr, |
1513 | &dev_attr_unassign_control_domain.attr, |
1514 | &dev_attr_control_domains.attr, |
1515 | &dev_attr_matrix.attr, |
1516 | &dev_attr_guest_matrix.attr, |
1517 | NULL, |
1518 | }; |
1519 | |
1520 | static struct attribute_group vfio_ap_mdev_attr_group = { |
1521 | .attrs = vfio_ap_mdev_attrs |
1522 | }; |
1523 | |
1524 | static const struct attribute_group *vfio_ap_mdev_attr_groups[] = { |
1525 | &vfio_ap_mdev_attr_group, |
1526 | NULL |
1527 | }; |
1528 | |
1529 | /** |
1530 | * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed |
1531 | * to manage AP resources for the guest whose state is represented by @kvm |
1532 | * |
1533 | * @matrix_mdev: a mediated matrix device |
1534 | * @kvm: reference to KVM instance |
1535 | * |
1536 | * Return: 0 if no other mediated matrix device has a reference to @kvm; |
1537 | * otherwise, returns an -EPERM. |
1538 | */ |
1539 | static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, |
1540 | struct kvm *kvm) |
1541 | { |
1542 | struct ap_matrix_mdev *m; |
1543 | |
1544 | if (kvm->arch.crypto.crycbd) { |
1545 | down_write(sem: &kvm->arch.crypto.pqap_hook_rwsem); |
1546 | kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; |
1547 | up_write(sem: &kvm->arch.crypto.pqap_hook_rwsem); |
1548 | |
1549 | get_update_locks_for_kvm(kvm); |
1550 | |
1551 | list_for_each_entry(m, &matrix_dev->mdev_list, node) { |
1552 | if (m != matrix_mdev && m->kvm == kvm) { |
1553 | release_update_locks_for_kvm(kvm); |
1554 | return -EPERM; |
1555 | } |
1556 | } |
1557 | |
1558 | kvm_get_kvm(kvm); |
1559 | matrix_mdev->kvm = kvm; |
1560 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
1561 | |
1562 | release_update_locks_for_kvm(kvm); |
1563 | } |
1564 | |
1565 | return 0; |
1566 | } |
1567 | |
1568 | static void unmap_iova(struct ap_matrix_mdev *matrix_mdev, u64 iova, u64 length) |
1569 | { |
1570 | struct ap_queue_table *qtable = &matrix_mdev->qtable; |
1571 | struct vfio_ap_queue *q; |
1572 | int loop_cursor; |
1573 | |
1574 | hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { |
1575 | if (q->saved_iova >= iova && q->saved_iova < iova + length) |
1576 | vfio_ap_irq_disable(q); |
1577 | } |
1578 | } |
1579 | |
1580 | static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, |
1581 | u64 length) |
1582 | { |
1583 | struct ap_matrix_mdev *matrix_mdev = |
1584 | container_of(vdev, struct ap_matrix_mdev, vdev); |
1585 | |
1586 | mutex_lock(&matrix_dev->mdevs_lock); |
1587 | |
1588 | unmap_iova(matrix_mdev, iova, length); |
1589 | |
1590 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
1591 | } |
1592 | |
1593 | /** |
1594 | * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed |
1595 | * by @matrix_mdev. |
1596 | * |
1597 | * @matrix_mdev: a matrix mediated device |
1598 | */ |
1599 | static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) |
1600 | { |
1601 | struct kvm *kvm = matrix_mdev->kvm; |
1602 | |
1603 | if (kvm && kvm->arch.crypto.crycbd) { |
1604 | down_write(sem: &kvm->arch.crypto.pqap_hook_rwsem); |
1605 | kvm->arch.crypto.pqap_hook = NULL; |
1606 | up_write(sem: &kvm->arch.crypto.pqap_hook_rwsem); |
1607 | |
1608 | get_update_locks_for_kvm(kvm); |
1609 | |
1610 | kvm_arch_crypto_clear_masks(kvm); |
1611 | vfio_ap_mdev_reset_queues(qtable: &matrix_mdev->qtable); |
1612 | kvm_put_kvm(kvm); |
1613 | matrix_mdev->kvm = NULL; |
1614 | |
1615 | release_update_locks_for_kvm(kvm); |
1616 | } |
1617 | } |
1618 | |
1619 | static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) |
1620 | { |
1621 | struct ap_queue *queue; |
1622 | struct vfio_ap_queue *q = NULL; |
1623 | |
1624 | queue = ap_get_qdev(apqn); |
1625 | if (!queue) |
1626 | return NULL; |
1627 | |
1628 | if (queue->ap_dev.device.driver == &matrix_dev->vfio_ap_drv->driver) |
1629 | q = dev_get_drvdata(dev: &queue->ap_dev.device); |
1630 | |
1631 | put_device(dev: &queue->ap_dev.device); |
1632 | |
1633 | return q; |
1634 | } |
1635 | |
1636 | static int apq_status_check(int apqn, struct ap_queue_status *status) |
1637 | { |
1638 | switch (status->response_code) { |
1639 | case AP_RESPONSE_NORMAL: |
1640 | case AP_RESPONSE_DECONFIGURED: |
1641 | return 0; |
1642 | case AP_RESPONSE_RESET_IN_PROGRESS: |
1643 | case AP_RESPONSE_BUSY: |
1644 | return -EBUSY; |
1645 | case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE: |
1646 | case AP_RESPONSE_ASSOC_FAILED: |
1647 | /* |
1648 | * These asynchronous response codes indicate a PQAP(AAPQ) |
1649 | * instruction to associate a secret with the guest failed. All |
1650 | * subsequent AP instructions will end with the asynchronous |
1651 | * response code until the AP queue is reset; so, let's return |
1652 | * a value indicating a reset needs to be performed again. |
1653 | */ |
1654 | return -EAGAIN; |
1655 | default: |
1656 | WARN(true, |
1657 | "failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n" , |
1658 | AP_QID_CARD(apqn), AP_QID_QUEUE(apqn), |
1659 | status->response_code); |
1660 | return -EIO; |
1661 | } |
1662 | } |
1663 | |
1664 | #define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)" |
1665 | |
1666 | static void apq_reset_check(struct work_struct *reset_work) |
1667 | { |
1668 | int ret = -EBUSY, elapsed = 0; |
1669 | struct ap_queue_status status; |
1670 | struct vfio_ap_queue *q; |
1671 | |
1672 | q = container_of(reset_work, struct vfio_ap_queue, reset_work); |
1673 | memcpy(&status, &q->reset_status, sizeof(status)); |
1674 | while (true) { |
1675 | msleep(AP_RESET_INTERVAL); |
1676 | elapsed += AP_RESET_INTERVAL; |
1677 | status = ap_tapq(q->apqn, NULL); |
1678 | ret = apq_status_check(apqn: q->apqn, status: &status); |
1679 | if (ret == -EIO) |
1680 | return; |
1681 | if (ret == -EBUSY) { |
1682 | pr_notice_ratelimited(WAIT_MSG, elapsed, |
1683 | AP_QID_CARD(q->apqn), |
1684 | AP_QID_QUEUE(q->apqn), |
1685 | status.response_code, |
1686 | status.queue_empty, |
1687 | status.irq_enabled); |
1688 | } else { |
1689 | if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS || |
1690 | q->reset_status.response_code == AP_RESPONSE_BUSY || |
1691 | q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS || |
1692 | ret == -EAGAIN) { |
1693 | status = ap_zapq(q->apqn, 0); |
1694 | memcpy(&q->reset_status, &status, sizeof(status)); |
1695 | continue; |
1696 | } |
1697 | /* |
1698 | * When an AP adapter is deconfigured, the |
1699 | * associated queues are reset, so let's set the |
1700 | * status response code to 0 so the queue may be |
1701 | * passed through (i.e., not filtered) |
1702 | */ |
1703 | if (status.response_code == AP_RESPONSE_DECONFIGURED) |
1704 | q->reset_status.response_code = 0; |
1705 | if (q->saved_isc != VFIO_AP_ISC_INVALID) |
1706 | vfio_ap_free_aqic_resources(q); |
1707 | break; |
1708 | } |
1709 | } |
1710 | } |
1711 | |
1712 | static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) |
1713 | { |
1714 | struct ap_queue_status status; |
1715 | |
1716 | if (!q) |
1717 | return; |
1718 | status = ap_zapq(q->apqn, 0); |
1719 | memcpy(&q->reset_status, &status, sizeof(status)); |
1720 | switch (status.response_code) { |
1721 | case AP_RESPONSE_NORMAL: |
1722 | case AP_RESPONSE_RESET_IN_PROGRESS: |
1723 | case AP_RESPONSE_BUSY: |
1724 | case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS: |
1725 | /* |
1726 | * Let's verify whether the ZAPQ completed successfully on a work queue. |
1727 | */ |
1728 | queue_work(wq: system_long_wq, work: &q->reset_work); |
1729 | break; |
1730 | case AP_RESPONSE_DECONFIGURED: |
1731 | /* |
1732 | * When an AP adapter is deconfigured, the associated |
1733 | * queues are reset, so let's set the status response code to 0 |
1734 | * so the queue may be passed through (i.e., not filtered). |
1735 | */ |
1736 | q->reset_status.response_code = 0; |
1737 | vfio_ap_free_aqic_resources(q); |
1738 | break; |
1739 | default: |
1740 | WARN(true, |
1741 | "PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n" , |
1742 | AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn), |
1743 | status.response_code); |
1744 | } |
1745 | } |
1746 | |
1747 | static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable) |
1748 | { |
1749 | int ret = 0, loop_cursor; |
1750 | struct vfio_ap_queue *q; |
1751 | |
1752 | hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) |
1753 | vfio_ap_mdev_reset_queue(q); |
1754 | |
1755 | hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { |
1756 | flush_work(work: &q->reset_work); |
1757 | |
1758 | if (q->reset_status.response_code) |
1759 | ret = -EIO; |
1760 | } |
1761 | |
1762 | return ret; |
1763 | } |
1764 | |
1765 | static int vfio_ap_mdev_open_device(struct vfio_device *vdev) |
1766 | { |
1767 | struct ap_matrix_mdev *matrix_mdev = |
1768 | container_of(vdev, struct ap_matrix_mdev, vdev); |
1769 | |
1770 | if (!vdev->kvm) |
1771 | return -EINVAL; |
1772 | |
1773 | return vfio_ap_mdev_set_kvm(matrix_mdev, kvm: vdev->kvm); |
1774 | } |
1775 | |
1776 | static void vfio_ap_mdev_close_device(struct vfio_device *vdev) |
1777 | { |
1778 | struct ap_matrix_mdev *matrix_mdev = |
1779 | container_of(vdev, struct ap_matrix_mdev, vdev); |
1780 | |
1781 | vfio_ap_mdev_unset_kvm(matrix_mdev); |
1782 | } |
1783 | |
1784 | static void vfio_ap_mdev_request(struct vfio_device *vdev, unsigned int count) |
1785 | { |
1786 | struct device *dev = vdev->dev; |
1787 | struct ap_matrix_mdev *matrix_mdev; |
1788 | |
1789 | matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); |
1790 | |
1791 | if (matrix_mdev->req_trigger) { |
1792 | if (!(count % 10)) |
1793 | dev_notice_ratelimited(dev, |
1794 | "Relaying device request to user (#%u)\n" , |
1795 | count); |
1796 | |
1797 | eventfd_signal(ctx: matrix_mdev->req_trigger, n: 1); |
1798 | } else if (count == 0) { |
1799 | dev_notice(dev, |
1800 | "No device request registered, blocked until released by user\n" ); |
1801 | } |
1802 | } |
1803 | |
1804 | static int vfio_ap_mdev_get_device_info(unsigned long arg) |
1805 | { |
1806 | unsigned long minsz; |
1807 | struct vfio_device_info info; |
1808 | |
1809 | minsz = offsetofend(struct vfio_device_info, num_irqs); |
1810 | |
1811 | if (copy_from_user(to: &info, from: (void __user *)arg, n: minsz)) |
1812 | return -EFAULT; |
1813 | |
1814 | if (info.argsz < minsz) |
1815 | return -EINVAL; |
1816 | |
1817 | info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET; |
1818 | info.num_regions = 0; |
1819 | info.num_irqs = VFIO_AP_NUM_IRQS; |
1820 | |
1821 | return copy_to_user(to: (void __user *)arg, from: &info, n: minsz) ? -EFAULT : 0; |
1822 | } |
1823 | |
1824 | static ssize_t vfio_ap_get_irq_info(unsigned long arg) |
1825 | { |
1826 | unsigned long minsz; |
1827 | struct vfio_irq_info info; |
1828 | |
1829 | minsz = offsetofend(struct vfio_irq_info, count); |
1830 | |
1831 | if (copy_from_user(to: &info, from: (void __user *)arg, n: minsz)) |
1832 | return -EFAULT; |
1833 | |
1834 | if (info.argsz < minsz || info.index >= VFIO_AP_NUM_IRQS) |
1835 | return -EINVAL; |
1836 | |
1837 | switch (info.index) { |
1838 | case VFIO_AP_REQ_IRQ_INDEX: |
1839 | info.count = 1; |
1840 | info.flags = VFIO_IRQ_INFO_EVENTFD; |
1841 | break; |
1842 | default: |
1843 | return -EINVAL; |
1844 | } |
1845 | |
1846 | return copy_to_user(to: (void __user *)arg, from: &info, n: minsz) ? -EFAULT : 0; |
1847 | } |
1848 | |
1849 | static int vfio_ap_irq_set_init(struct vfio_irq_set *irq_set, unsigned long arg) |
1850 | { |
1851 | int ret; |
1852 | size_t data_size; |
1853 | unsigned long minsz; |
1854 | |
1855 | minsz = offsetofend(struct vfio_irq_set, count); |
1856 | |
1857 | if (copy_from_user(to: irq_set, from: (void __user *)arg, n: minsz)) |
1858 | return -EFAULT; |
1859 | |
1860 | ret = vfio_set_irqs_validate_and_prepare(hdr: irq_set, num_irqs: 1, max_irq_type: VFIO_AP_NUM_IRQS, |
1861 | data_size: &data_size); |
1862 | if (ret) |
1863 | return ret; |
1864 | |
1865 | if (!(irq_set->flags & VFIO_IRQ_SET_ACTION_TRIGGER)) |
1866 | return -EINVAL; |
1867 | |
1868 | return 0; |
1869 | } |
1870 | |
1871 | static int vfio_ap_set_request_irq(struct ap_matrix_mdev *matrix_mdev, |
1872 | unsigned long arg) |
1873 | { |
1874 | s32 fd; |
1875 | void __user *data; |
1876 | unsigned long minsz; |
1877 | struct eventfd_ctx *req_trigger; |
1878 | |
1879 | minsz = offsetofend(struct vfio_irq_set, count); |
1880 | data = (void __user *)(arg + minsz); |
1881 | |
1882 | if (get_user(fd, (s32 __user *)data)) |
1883 | return -EFAULT; |
1884 | |
1885 | if (fd == -1) { |
1886 | if (matrix_mdev->req_trigger) |
1887 | eventfd_ctx_put(ctx: matrix_mdev->req_trigger); |
1888 | matrix_mdev->req_trigger = NULL; |
1889 | } else if (fd >= 0) { |
1890 | req_trigger = eventfd_ctx_fdget(fd); |
1891 | if (IS_ERR(ptr: req_trigger)) |
1892 | return PTR_ERR(ptr: req_trigger); |
1893 | |
1894 | if (matrix_mdev->req_trigger) |
1895 | eventfd_ctx_put(ctx: matrix_mdev->req_trigger); |
1896 | |
1897 | matrix_mdev->req_trigger = req_trigger; |
1898 | } else { |
1899 | return -EINVAL; |
1900 | } |
1901 | |
1902 | return 0; |
1903 | } |
1904 | |
1905 | static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, |
1906 | unsigned long arg) |
1907 | { |
1908 | int ret; |
1909 | struct vfio_irq_set irq_set; |
1910 | |
1911 | ret = vfio_ap_irq_set_init(irq_set: &irq_set, arg); |
1912 | if (ret) |
1913 | return ret; |
1914 | |
1915 | switch (irq_set.flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { |
1916 | case VFIO_IRQ_SET_DATA_EVENTFD: |
1917 | switch (irq_set.index) { |
1918 | case VFIO_AP_REQ_IRQ_INDEX: |
1919 | return vfio_ap_set_request_irq(matrix_mdev, arg); |
1920 | default: |
1921 | return -EINVAL; |
1922 | } |
1923 | default: |
1924 | return -EINVAL; |
1925 | } |
1926 | } |
1927 | |
1928 | static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, |
1929 | unsigned int cmd, unsigned long arg) |
1930 | { |
1931 | struct ap_matrix_mdev *matrix_mdev = |
1932 | container_of(vdev, struct ap_matrix_mdev, vdev); |
1933 | int ret; |
1934 | |
1935 | mutex_lock(&matrix_dev->mdevs_lock); |
1936 | switch (cmd) { |
1937 | case VFIO_DEVICE_GET_INFO: |
1938 | ret = vfio_ap_mdev_get_device_info(arg); |
1939 | break; |
1940 | case VFIO_DEVICE_RESET: |
1941 | ret = vfio_ap_mdev_reset_queues(qtable: &matrix_mdev->qtable); |
1942 | break; |
1943 | case VFIO_DEVICE_GET_IRQ_INFO: |
1944 | ret = vfio_ap_get_irq_info(arg); |
1945 | break; |
1946 | case VFIO_DEVICE_SET_IRQS: |
1947 | ret = vfio_ap_set_irqs(matrix_mdev, arg); |
1948 | break; |
1949 | default: |
1950 | ret = -EOPNOTSUPP; |
1951 | break; |
1952 | } |
1953 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
1954 | |
1955 | return ret; |
1956 | } |
1957 | |
1958 | static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q) |
1959 | { |
1960 | struct ap_matrix_mdev *matrix_mdev; |
1961 | unsigned long apid = AP_QID_CARD(q->apqn); |
1962 | unsigned long apqi = AP_QID_QUEUE(q->apqn); |
1963 | |
1964 | list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { |
1965 | if (test_bit_inv(apid, matrix_mdev->matrix.apm) && |
1966 | test_bit_inv(apqi, matrix_mdev->matrix.aqm)) |
1967 | return matrix_mdev; |
1968 | } |
1969 | |
1970 | return NULL; |
1971 | } |
1972 | |
1973 | static ssize_t status_show(struct device *dev, |
1974 | struct device_attribute *attr, |
1975 | char *buf) |
1976 | { |
1977 | ssize_t nchars = 0; |
1978 | struct vfio_ap_queue *q; |
1979 | struct ap_matrix_mdev *matrix_mdev; |
1980 | struct ap_device *apdev = to_ap_dev(dev); |
1981 | |
1982 | mutex_lock(&matrix_dev->mdevs_lock); |
1983 | q = dev_get_drvdata(dev: &apdev->device); |
1984 | matrix_mdev = vfio_ap_mdev_for_queue(q); |
1985 | |
1986 | if (matrix_mdev) { |
1987 | if (matrix_mdev->kvm) |
1988 | nchars = scnprintf(buf, PAGE_SIZE, fmt: "%s\n" , |
1989 | AP_QUEUE_IN_USE); |
1990 | else |
1991 | nchars = scnprintf(buf, PAGE_SIZE, fmt: "%s\n" , |
1992 | AP_QUEUE_ASSIGNED); |
1993 | } else { |
1994 | nchars = scnprintf(buf, PAGE_SIZE, fmt: "%s\n" , |
1995 | AP_QUEUE_UNASSIGNED); |
1996 | } |
1997 | |
1998 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
1999 | |
2000 | return nchars; |
2001 | } |
2002 | |
2003 | static DEVICE_ATTR_RO(status); |
2004 | |
2005 | static struct attribute *vfio_queue_attrs[] = { |
2006 | &dev_attr_status.attr, |
2007 | NULL, |
2008 | }; |
2009 | |
2010 | static const struct attribute_group vfio_queue_attr_group = { |
2011 | .attrs = vfio_queue_attrs, |
2012 | }; |
2013 | |
2014 | static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { |
2015 | .init = vfio_ap_mdev_init_dev, |
2016 | .open_device = vfio_ap_mdev_open_device, |
2017 | .close_device = vfio_ap_mdev_close_device, |
2018 | .ioctl = vfio_ap_mdev_ioctl, |
2019 | .dma_unmap = vfio_ap_mdev_dma_unmap, |
2020 | .bind_iommufd = vfio_iommufd_emulated_bind, |
2021 | .unbind_iommufd = vfio_iommufd_emulated_unbind, |
2022 | .attach_ioas = vfio_iommufd_emulated_attach_ioas, |
2023 | .detach_ioas = vfio_iommufd_emulated_detach_ioas, |
2024 | .request = vfio_ap_mdev_request |
2025 | }; |
2026 | |
2027 | static struct mdev_driver vfio_ap_matrix_driver = { |
2028 | .device_api = VFIO_DEVICE_API_AP_STRING, |
2029 | .max_instances = MAX_ZDEV_ENTRIES_EXT, |
2030 | .driver = { |
2031 | .name = "vfio_ap_mdev" , |
2032 | .owner = THIS_MODULE, |
2033 | .mod_name = KBUILD_MODNAME, |
2034 | .dev_groups = vfio_ap_mdev_attr_groups, |
2035 | }, |
2036 | .probe = vfio_ap_mdev_probe, |
2037 | .remove = vfio_ap_mdev_remove, |
2038 | }; |
2039 | |
2040 | int vfio_ap_mdev_register(void) |
2041 | { |
2042 | int ret; |
2043 | |
2044 | ret = mdev_register_driver(drv: &vfio_ap_matrix_driver); |
2045 | if (ret) |
2046 | return ret; |
2047 | |
2048 | matrix_dev->mdev_type.sysfs_name = VFIO_AP_MDEV_TYPE_HWVIRT; |
2049 | matrix_dev->mdev_type.pretty_name = VFIO_AP_MDEV_NAME_HWVIRT; |
2050 | matrix_dev->mdev_types[0] = &matrix_dev->mdev_type; |
2051 | ret = mdev_register_parent(parent: &matrix_dev->parent, dev: &matrix_dev->device, |
2052 | mdev_driver: &vfio_ap_matrix_driver, |
2053 | types: matrix_dev->mdev_types, nr_types: 1); |
2054 | if (ret) |
2055 | goto err_driver; |
2056 | return 0; |
2057 | |
2058 | err_driver: |
2059 | mdev_unregister_driver(drv: &vfio_ap_matrix_driver); |
2060 | return ret; |
2061 | } |
2062 | |
2063 | void vfio_ap_mdev_unregister(void) |
2064 | { |
2065 | mdev_unregister_parent(parent: &matrix_dev->parent); |
2066 | mdev_unregister_driver(drv: &vfio_ap_matrix_driver); |
2067 | } |
2068 | |
2069 | int vfio_ap_mdev_probe_queue(struct ap_device *apdev) |
2070 | { |
2071 | int ret; |
2072 | struct vfio_ap_queue *q; |
2073 | struct ap_matrix_mdev *matrix_mdev; |
2074 | |
2075 | ret = sysfs_create_group(kobj: &apdev->device.kobj, grp: &vfio_queue_attr_group); |
2076 | if (ret) |
2077 | return ret; |
2078 | |
2079 | q = kzalloc(size: sizeof(*q), GFP_KERNEL); |
2080 | if (!q) { |
2081 | ret = -ENOMEM; |
2082 | goto err_remove_group; |
2083 | } |
2084 | |
2085 | q->apqn = to_ap_queue(&apdev->device)->qid; |
2086 | q->saved_isc = VFIO_AP_ISC_INVALID; |
2087 | memset(&q->reset_status, 0, sizeof(q->reset_status)); |
2088 | INIT_WORK(&q->reset_work, apq_reset_check); |
2089 | matrix_mdev = get_update_locks_by_apqn(apqn: q->apqn); |
2090 | |
2091 | if (matrix_mdev) { |
2092 | vfio_ap_mdev_link_queue(matrix_mdev, q); |
2093 | |
2094 | if (vfio_ap_mdev_filter_matrix(apm: matrix_mdev->matrix.apm, |
2095 | aqm: matrix_mdev->matrix.aqm, |
2096 | matrix_mdev)) |
2097 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
2098 | } |
2099 | dev_set_drvdata(dev: &apdev->device, data: q); |
2100 | release_update_locks_for_mdev(matrix_mdev); |
2101 | |
2102 | return 0; |
2103 | |
2104 | err_remove_group: |
2105 | sysfs_remove_group(kobj: &apdev->device.kobj, grp: &vfio_queue_attr_group); |
2106 | return ret; |
2107 | } |
2108 | |
2109 | void vfio_ap_mdev_remove_queue(struct ap_device *apdev) |
2110 | { |
2111 | unsigned long apid, apqi; |
2112 | struct vfio_ap_queue *q; |
2113 | struct ap_matrix_mdev *matrix_mdev; |
2114 | |
2115 | sysfs_remove_group(kobj: &apdev->device.kobj, grp: &vfio_queue_attr_group); |
2116 | q = dev_get_drvdata(dev: &apdev->device); |
2117 | get_update_locks_for_queue(q); |
2118 | matrix_mdev = q->matrix_mdev; |
2119 | |
2120 | if (matrix_mdev) { |
2121 | vfio_ap_unlink_queue_fr_mdev(q); |
2122 | |
2123 | apid = AP_QID_CARD(q->apqn); |
2124 | apqi = AP_QID_QUEUE(q->apqn); |
2125 | |
2126 | /* |
2127 | * If the queue is assigned to the guest's APCB, then remove |
2128 | * the adapter's APID from the APCB and hot it into the guest. |
2129 | */ |
2130 | if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && |
2131 | test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { |
2132 | clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); |
2133 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
2134 | } |
2135 | } |
2136 | |
2137 | vfio_ap_mdev_reset_queue(q); |
2138 | flush_work(work: &q->reset_work); |
2139 | dev_set_drvdata(dev: &apdev->device, NULL); |
2140 | kfree(objp: q); |
2141 | release_update_locks_for_mdev(matrix_mdev); |
2142 | } |
2143 | |
2144 | /** |
2145 | * vfio_ap_mdev_resource_in_use: check whether any of a set of APQNs is |
2146 | * assigned to a mediated device under the control |
2147 | * of the vfio_ap device driver. |
2148 | * |
2149 | * @apm: a bitmap specifying a set of APIDs comprising the APQNs to check. |
2150 | * @aqm: a bitmap specifying a set of APQIs comprising the APQNs to check. |
2151 | * |
2152 | * Return: |
2153 | * * -EADDRINUSE if one or more of the APQNs specified via @apm/@aqm are |
2154 | * assigned to a mediated device under the control of the vfio_ap |
2155 | * device driver. |
2156 | * * Otherwise, return 0. |
2157 | */ |
2158 | int vfio_ap_mdev_resource_in_use(unsigned long *apm, unsigned long *aqm) |
2159 | { |
2160 | int ret; |
2161 | |
2162 | mutex_lock(&matrix_dev->guests_lock); |
2163 | mutex_lock(&matrix_dev->mdevs_lock); |
2164 | ret = vfio_ap_mdev_verify_no_sharing(mdev_apm: apm, mdev_aqm: aqm); |
2165 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
2166 | mutex_unlock(lock: &matrix_dev->guests_lock); |
2167 | |
2168 | return ret; |
2169 | } |
2170 | |
2171 | /** |
2172 | * vfio_ap_mdev_hot_unplug_cfg - hot unplug the adapters, domains and control |
2173 | * domains that have been removed from the host's |
2174 | * AP configuration from a guest. |
2175 | * |
2176 | * @matrix_mdev: an ap_matrix_mdev object attached to a KVM guest. |
2177 | * @aprem: the adapters that have been removed from the host's AP configuration |
2178 | * @aqrem: the domains that have been removed from the host's AP configuration |
2179 | * @cdrem: the control domains that have been removed from the host's AP |
2180 | * configuration. |
2181 | */ |
2182 | static void vfio_ap_mdev_hot_unplug_cfg(struct ap_matrix_mdev *matrix_mdev, |
2183 | unsigned long *aprem, |
2184 | unsigned long *aqrem, |
2185 | unsigned long *cdrem) |
2186 | { |
2187 | int do_hotplug = 0; |
2188 | |
2189 | if (!bitmap_empty(src: aprem, AP_DEVICES)) { |
2190 | do_hotplug |= bitmap_andnot(dst: matrix_mdev->shadow_apcb.apm, |
2191 | src1: matrix_mdev->shadow_apcb.apm, |
2192 | src2: aprem, AP_DEVICES); |
2193 | } |
2194 | |
2195 | if (!bitmap_empty(src: aqrem, AP_DOMAINS)) { |
2196 | do_hotplug |= bitmap_andnot(dst: matrix_mdev->shadow_apcb.aqm, |
2197 | src1: matrix_mdev->shadow_apcb.aqm, |
2198 | src2: aqrem, AP_DEVICES); |
2199 | } |
2200 | |
2201 | if (!bitmap_empty(src: cdrem, AP_DOMAINS)) |
2202 | do_hotplug |= bitmap_andnot(dst: matrix_mdev->shadow_apcb.adm, |
2203 | src1: matrix_mdev->shadow_apcb.adm, |
2204 | src2: cdrem, AP_DOMAINS); |
2205 | |
2206 | if (do_hotplug) |
2207 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
2208 | } |
2209 | |
2210 | /** |
2211 | * vfio_ap_mdev_cfg_remove - determines which guests are using the adapters, |
2212 | * domains and control domains that have been removed |
2213 | * from the host AP configuration and unplugs them |
2214 | * from those guests. |
2215 | * |
2216 | * @ap_remove: bitmap specifying which adapters have been removed from the host |
2217 | * config. |
2218 | * @aq_remove: bitmap specifying which domains have been removed from the host |
2219 | * config. |
2220 | * @cd_remove: bitmap specifying which control domains have been removed from |
2221 | * the host config. |
2222 | */ |
2223 | static void vfio_ap_mdev_cfg_remove(unsigned long *ap_remove, |
2224 | unsigned long *aq_remove, |
2225 | unsigned long *cd_remove) |
2226 | { |
2227 | struct ap_matrix_mdev *matrix_mdev; |
2228 | DECLARE_BITMAP(aprem, AP_DEVICES); |
2229 | DECLARE_BITMAP(aqrem, AP_DOMAINS); |
2230 | DECLARE_BITMAP(cdrem, AP_DOMAINS); |
2231 | int do_remove = 0; |
2232 | |
2233 | list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { |
2234 | mutex_lock(&matrix_mdev->kvm->lock); |
2235 | mutex_lock(&matrix_dev->mdevs_lock); |
2236 | |
2237 | do_remove |= bitmap_and(dst: aprem, src1: ap_remove, |
2238 | src2: matrix_mdev->matrix.apm, |
2239 | AP_DEVICES); |
2240 | do_remove |= bitmap_and(dst: aqrem, src1: aq_remove, |
2241 | src2: matrix_mdev->matrix.aqm, |
2242 | AP_DOMAINS); |
2243 | do_remove |= bitmap_andnot(dst: cdrem, src1: cd_remove, |
2244 | src2: matrix_mdev->matrix.adm, |
2245 | AP_DOMAINS); |
2246 | |
2247 | if (do_remove) |
2248 | vfio_ap_mdev_hot_unplug_cfg(matrix_mdev, aprem, aqrem, |
2249 | cdrem); |
2250 | |
2251 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
2252 | mutex_unlock(lock: &matrix_mdev->kvm->lock); |
2253 | } |
2254 | } |
2255 | |
2256 | /** |
2257 | * vfio_ap_mdev_on_cfg_remove - responds to the removal of adapters, domains and |
2258 | * control domains from the host AP configuration |
2259 | * by unplugging them from the guests that are |
2260 | * using them. |
2261 | * @cur_config_info: the current host AP configuration information |
2262 | * @prev_config_info: the previous host AP configuration information |
2263 | */ |
2264 | static void vfio_ap_mdev_on_cfg_remove(struct ap_config_info *cur_config_info, |
2265 | struct ap_config_info *prev_config_info) |
2266 | { |
2267 | int do_remove; |
2268 | DECLARE_BITMAP(aprem, AP_DEVICES); |
2269 | DECLARE_BITMAP(aqrem, AP_DOMAINS); |
2270 | DECLARE_BITMAP(cdrem, AP_DOMAINS); |
2271 | |
2272 | do_remove = bitmap_andnot(dst: aprem, |
2273 | src1: (unsigned long *)prev_config_info->apm, |
2274 | src2: (unsigned long *)cur_config_info->apm, |
2275 | AP_DEVICES); |
2276 | do_remove |= bitmap_andnot(dst: aqrem, |
2277 | src1: (unsigned long *)prev_config_info->aqm, |
2278 | src2: (unsigned long *)cur_config_info->aqm, |
2279 | AP_DEVICES); |
2280 | do_remove |= bitmap_andnot(dst: cdrem, |
2281 | src1: (unsigned long *)prev_config_info->adm, |
2282 | src2: (unsigned long *)cur_config_info->adm, |
2283 | AP_DEVICES); |
2284 | |
2285 | if (do_remove) |
2286 | vfio_ap_mdev_cfg_remove(ap_remove: aprem, aq_remove: aqrem, cd_remove: cdrem); |
2287 | } |
2288 | |
2289 | /** |
2290 | * vfio_ap_filter_apid_by_qtype: filter APIDs from an AP mask for adapters that |
2291 | * are older than AP type 10 (CEX4). |
2292 | * @apm: a bitmap of the APIDs to examine |
2293 | * @aqm: a bitmap of the APQIs of the queues to query for the AP type. |
2294 | */ |
2295 | static void vfio_ap_filter_apid_by_qtype(unsigned long *apm, unsigned long *aqm) |
2296 | { |
2297 | bool apid_cleared; |
2298 | struct ap_queue_status status; |
2299 | unsigned long apid, apqi; |
2300 | struct ap_tapq_gr2 info; |
2301 | |
2302 | for_each_set_bit_inv(apid, apm, AP_DEVICES) { |
2303 | apid_cleared = false; |
2304 | |
2305 | for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { |
2306 | status = ap_test_queue(AP_MKQID(apid, apqi), 1, &info); |
2307 | switch (status.response_code) { |
2308 | /* |
2309 | * According to the architecture in each case |
2310 | * below, the queue's info should be filled. |
2311 | */ |
2312 | case AP_RESPONSE_NORMAL: |
2313 | case AP_RESPONSE_RESET_IN_PROGRESS: |
2314 | case AP_RESPONSE_DECONFIGURED: |
2315 | case AP_RESPONSE_CHECKSTOPPED: |
2316 | case AP_RESPONSE_BUSY: |
2317 | /* |
2318 | * The vfio_ap device driver only |
2319 | * supports CEX4 and newer adapters, so |
2320 | * remove the APID if the adapter is |
2321 | * older than a CEX4. |
2322 | */ |
2323 | if (info.at < AP_DEVICE_TYPE_CEX4) { |
2324 | clear_bit_inv(apid, apm); |
2325 | apid_cleared = true; |
2326 | } |
2327 | |
2328 | break; |
2329 | |
2330 | default: |
2331 | /* |
2332 | * If we don't know the adapter type, |
2333 | * clear its APID since it can't be |
2334 | * determined whether the vfio_ap |
2335 | * device driver supports it. |
2336 | */ |
2337 | clear_bit_inv(apid, apm); |
2338 | apid_cleared = true; |
2339 | break; |
2340 | } |
2341 | |
2342 | /* |
2343 | * If we've already cleared the APID from the apm, there |
2344 | * is no need to continue examining the remainin AP |
2345 | * queues to determine the type of the adapter. |
2346 | */ |
2347 | if (apid_cleared) |
2348 | continue; |
2349 | } |
2350 | } |
2351 | } |
2352 | |
2353 | /** |
2354 | * vfio_ap_mdev_cfg_add - store bitmaps specifying the adapters, domains and |
2355 | * control domains that have been added to the host's |
2356 | * AP configuration for each matrix mdev to which they |
2357 | * are assigned. |
2358 | * |
2359 | * @apm_add: a bitmap specifying the adapters that have been added to the AP |
2360 | * configuration. |
2361 | * @aqm_add: a bitmap specifying the domains that have been added to the AP |
2362 | * configuration. |
2363 | * @adm_add: a bitmap specifying the control domains that have been added to the |
2364 | * AP configuration. |
2365 | */ |
2366 | static void vfio_ap_mdev_cfg_add(unsigned long *apm_add, unsigned long *aqm_add, |
2367 | unsigned long *adm_add) |
2368 | { |
2369 | struct ap_matrix_mdev *matrix_mdev; |
2370 | |
2371 | if (list_empty(head: &matrix_dev->mdev_list)) |
2372 | return; |
2373 | |
2374 | vfio_ap_filter_apid_by_qtype(apm: apm_add, aqm: aqm_add); |
2375 | |
2376 | list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { |
2377 | bitmap_and(dst: matrix_mdev->apm_add, |
2378 | src1: matrix_mdev->matrix.apm, src2: apm_add, AP_DEVICES); |
2379 | bitmap_and(dst: matrix_mdev->aqm_add, |
2380 | src1: matrix_mdev->matrix.aqm, src2: aqm_add, AP_DOMAINS); |
2381 | bitmap_and(dst: matrix_mdev->adm_add, |
2382 | src1: matrix_mdev->matrix.adm, src2: adm_add, AP_DEVICES); |
2383 | } |
2384 | } |
2385 | |
2386 | /** |
2387 | * vfio_ap_mdev_on_cfg_add - responds to the addition of adapters, domains and |
2388 | * control domains to the host AP configuration |
2389 | * by updating the bitmaps that specify what adapters, |
2390 | * domains and control domains have been added so they |
2391 | * can be hot plugged into the guest when the AP bus |
2392 | * scan completes (see vfio_ap_on_scan_complete |
2393 | * function). |
2394 | * @cur_config_info: the current AP configuration information |
2395 | * @prev_config_info: the previous AP configuration information |
2396 | */ |
2397 | static void vfio_ap_mdev_on_cfg_add(struct ap_config_info *cur_config_info, |
2398 | struct ap_config_info *prev_config_info) |
2399 | { |
2400 | bool do_add; |
2401 | DECLARE_BITMAP(apm_add, AP_DEVICES); |
2402 | DECLARE_BITMAP(aqm_add, AP_DOMAINS); |
2403 | DECLARE_BITMAP(adm_add, AP_DOMAINS); |
2404 | |
2405 | do_add = bitmap_andnot(dst: apm_add, |
2406 | src1: (unsigned long *)cur_config_info->apm, |
2407 | src2: (unsigned long *)prev_config_info->apm, |
2408 | AP_DEVICES); |
2409 | do_add |= bitmap_andnot(dst: aqm_add, |
2410 | src1: (unsigned long *)cur_config_info->aqm, |
2411 | src2: (unsigned long *)prev_config_info->aqm, |
2412 | AP_DOMAINS); |
2413 | do_add |= bitmap_andnot(dst: adm_add, |
2414 | src1: (unsigned long *)cur_config_info->adm, |
2415 | src2: (unsigned long *)prev_config_info->adm, |
2416 | AP_DOMAINS); |
2417 | |
2418 | if (do_add) |
2419 | vfio_ap_mdev_cfg_add(apm_add, aqm_add, adm_add); |
2420 | } |
2421 | |
2422 | /** |
2423 | * vfio_ap_on_cfg_changed - handles notification of changes to the host AP |
2424 | * configuration. |
2425 | * |
2426 | * @cur_cfg_info: the current host AP configuration |
2427 | * @prev_cfg_info: the previous host AP configuration |
2428 | */ |
2429 | void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info, |
2430 | struct ap_config_info *prev_cfg_info) |
2431 | { |
2432 | if (!cur_cfg_info || !prev_cfg_info) |
2433 | return; |
2434 | |
2435 | mutex_lock(&matrix_dev->guests_lock); |
2436 | |
2437 | vfio_ap_mdev_on_cfg_remove(cur_config_info: cur_cfg_info, prev_config_info: prev_cfg_info); |
2438 | vfio_ap_mdev_on_cfg_add(cur_config_info: cur_cfg_info, prev_config_info: prev_cfg_info); |
2439 | memcpy(&matrix_dev->info, cur_cfg_info, sizeof(*cur_cfg_info)); |
2440 | |
2441 | mutex_unlock(lock: &matrix_dev->guests_lock); |
2442 | } |
2443 | |
2444 | static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev) |
2445 | { |
2446 | bool do_hotplug = false; |
2447 | int filter_domains = 0; |
2448 | int filter_adapters = 0; |
2449 | DECLARE_BITMAP(apm, AP_DEVICES); |
2450 | DECLARE_BITMAP(aqm, AP_DOMAINS); |
2451 | |
2452 | mutex_lock(&matrix_mdev->kvm->lock); |
2453 | mutex_lock(&matrix_dev->mdevs_lock); |
2454 | |
2455 | filter_adapters = bitmap_and(dst: apm, src1: matrix_mdev->matrix.apm, |
2456 | src2: matrix_mdev->apm_add, AP_DEVICES); |
2457 | filter_domains = bitmap_and(dst: aqm, src1: matrix_mdev->matrix.aqm, |
2458 | src2: matrix_mdev->aqm_add, AP_DOMAINS); |
2459 | |
2460 | if (filter_adapters && filter_domains) |
2461 | do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev); |
2462 | else if (filter_adapters) |
2463 | do_hotplug |= |
2464 | vfio_ap_mdev_filter_matrix(apm, |
2465 | aqm: matrix_mdev->shadow_apcb.aqm, |
2466 | matrix_mdev); |
2467 | else |
2468 | do_hotplug |= |
2469 | vfio_ap_mdev_filter_matrix(apm: matrix_mdev->shadow_apcb.apm, |
2470 | aqm, matrix_mdev); |
2471 | |
2472 | if (bitmap_intersects(src1: matrix_mdev->matrix.adm, src2: matrix_mdev->adm_add, |
2473 | AP_DOMAINS)) |
2474 | do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev); |
2475 | |
2476 | if (do_hotplug) |
2477 | vfio_ap_mdev_update_guest_apcb(matrix_mdev); |
2478 | |
2479 | mutex_unlock(lock: &matrix_dev->mdevs_lock); |
2480 | mutex_unlock(lock: &matrix_mdev->kvm->lock); |
2481 | } |
2482 | |
2483 | void vfio_ap_on_scan_complete(struct ap_config_info *new_config_info, |
2484 | struct ap_config_info *old_config_info) |
2485 | { |
2486 | struct ap_matrix_mdev *matrix_mdev; |
2487 | |
2488 | mutex_lock(&matrix_dev->guests_lock); |
2489 | |
2490 | list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { |
2491 | if (bitmap_empty(src: matrix_mdev->apm_add, AP_DEVICES) && |
2492 | bitmap_empty(src: matrix_mdev->aqm_add, AP_DOMAINS) && |
2493 | bitmap_empty(src: matrix_mdev->adm_add, AP_DOMAINS)) |
2494 | continue; |
2495 | |
2496 | vfio_ap_mdev_hot_plug_cfg(matrix_mdev); |
2497 | bitmap_clear(map: matrix_mdev->apm_add, start: 0, AP_DEVICES); |
2498 | bitmap_clear(map: matrix_mdev->aqm_add, start: 0, AP_DOMAINS); |
2499 | bitmap_clear(map: matrix_mdev->adm_add, start: 0, AP_DOMAINS); |
2500 | } |
2501 | |
2502 | mutex_unlock(lock: &matrix_dev->guests_lock); |
2503 | } |
2504 | |