1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * channel program interfaces |
4 | * |
5 | * Copyright IBM Corp. 2017 |
6 | * |
7 | * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> |
8 | * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> |
9 | */ |
10 | |
11 | #include <linux/ratelimit.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/highmem.h> |
15 | #include <linux/iommu.h> |
16 | #include <linux/vfio.h> |
17 | #include <asm/idals.h> |
18 | |
19 | #include "vfio_ccw_cp.h" |
20 | #include "vfio_ccw_private.h" |
21 | |
22 | struct page_array { |
23 | /* Array that stores pages need to pin. */ |
24 | dma_addr_t *pa_iova; |
25 | /* Array that receives the pinned pages. */ |
26 | struct page **pa_page; |
27 | /* Number of pages pinned from @pa_iova. */ |
28 | int pa_nr; |
29 | }; |
30 | |
31 | struct ccwchain { |
32 | struct list_head next; |
33 | struct ccw1 *ch_ccw; |
34 | /* Guest physical address of the current chain. */ |
35 | u64 ch_iova; |
36 | /* Count of the valid ccws in chain. */ |
37 | int ch_len; |
38 | /* Pinned PAGEs for the original data. */ |
39 | struct page_array *ch_pa; |
40 | }; |
41 | |
42 | /* |
43 | * page_array_alloc() - alloc memory for page array |
44 | * @pa: page_array on which to perform the operation |
45 | * @len: number of pages that should be pinned from @iova |
46 | * |
47 | * Attempt to allocate memory for page array. |
48 | * |
49 | * Usage of page_array: |
50 | * We expect (pa_nr == 0) and (pa_iova == NULL), any field in |
51 | * this structure will be filled in by this function. |
52 | * |
53 | * Returns: |
54 | * 0 if page array is allocated |
55 | * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL |
56 | * -ENOMEM if alloc failed |
57 | */ |
58 | static int page_array_alloc(struct page_array *pa, unsigned int len) |
59 | { |
60 | if (pa->pa_nr || pa->pa_iova) |
61 | return -EINVAL; |
62 | |
63 | if (len == 0) |
64 | return -EINVAL; |
65 | |
66 | pa->pa_nr = len; |
67 | |
68 | pa->pa_iova = kcalloc(n: len, size: sizeof(*pa->pa_iova), GFP_KERNEL); |
69 | if (!pa->pa_iova) |
70 | return -ENOMEM; |
71 | |
72 | pa->pa_page = kcalloc(n: len, size: sizeof(*pa->pa_page), GFP_KERNEL); |
73 | if (!pa->pa_page) { |
74 | kfree(objp: pa->pa_iova); |
75 | return -ENOMEM; |
76 | } |
77 | |
78 | return 0; |
79 | } |
80 | |
81 | /* |
82 | * page_array_unpin() - Unpin user pages in memory |
83 | * @pa: page_array on which to perform the operation |
84 | * @vdev: the vfio device to perform the operation |
85 | * @pa_nr: number of user pages to unpin |
86 | * @unaligned: were pages unaligned on the pin request |
87 | * |
88 | * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, |
89 | * otherwise only clear pa->pa_nr |
90 | */ |
91 | static void page_array_unpin(struct page_array *pa, |
92 | struct vfio_device *vdev, int pa_nr, bool unaligned) |
93 | { |
94 | int unpinned = 0, npage = 1; |
95 | |
96 | while (unpinned < pa_nr) { |
97 | dma_addr_t *first = &pa->pa_iova[unpinned]; |
98 | dma_addr_t *last = &first[npage]; |
99 | |
100 | if (unpinned + npage < pa_nr && |
101 | *first + npage * PAGE_SIZE == *last && |
102 | !unaligned) { |
103 | npage++; |
104 | continue; |
105 | } |
106 | |
107 | vfio_unpin_pages(device: vdev, iova: *first, npage); |
108 | unpinned += npage; |
109 | npage = 1; |
110 | } |
111 | |
112 | pa->pa_nr = 0; |
113 | } |
114 | |
115 | /* |
116 | * page_array_pin() - Pin user pages in memory |
117 | * @pa: page_array on which to perform the operation |
118 | * @vdev: the vfio device to perform pin operations |
119 | * @unaligned: are pages aligned to 4K boundary? |
120 | * |
121 | * Returns number of pages pinned upon success. |
122 | * If the pin request partially succeeds, or fails completely, |
123 | * all pages are left unpinned and a negative error value is returned. |
124 | * |
125 | * Requests to pin "aligned" pages can be coalesced into a single |
126 | * vfio_pin_pages request for the sake of efficiency, based on the |
127 | * expectation of 4K page requests. Unaligned requests are probably |
128 | * dealing with 2K "pages", and cannot be coalesced without |
129 | * reworking this logic to incorporate that math. |
130 | */ |
131 | static int page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned) |
132 | { |
133 | int pinned = 0, npage = 1; |
134 | int ret = 0; |
135 | |
136 | while (pinned < pa->pa_nr) { |
137 | dma_addr_t *first = &pa->pa_iova[pinned]; |
138 | dma_addr_t *last = &first[npage]; |
139 | |
140 | if (pinned + npage < pa->pa_nr && |
141 | *first + npage * PAGE_SIZE == *last && |
142 | !unaligned) { |
143 | npage++; |
144 | continue; |
145 | } |
146 | |
147 | ret = vfio_pin_pages(device: vdev, iova: *first, npage, |
148 | IOMMU_READ | IOMMU_WRITE, |
149 | pages: &pa->pa_page[pinned]); |
150 | if (ret < 0) { |
151 | goto err_out; |
152 | } else if (ret > 0 && ret != npage) { |
153 | pinned += ret; |
154 | ret = -EINVAL; |
155 | goto err_out; |
156 | } |
157 | pinned += npage; |
158 | npage = 1; |
159 | } |
160 | |
161 | return ret; |
162 | |
163 | err_out: |
164 | page_array_unpin(pa, vdev, pa_nr: pinned, unaligned); |
165 | return ret; |
166 | } |
167 | |
168 | /* Unpin the pages before releasing the memory. */ |
169 | static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev, bool unaligned) |
170 | { |
171 | page_array_unpin(pa, vdev, pa_nr: pa->pa_nr, unaligned); |
172 | kfree(objp: pa->pa_page); |
173 | kfree(objp: pa->pa_iova); |
174 | } |
175 | |
176 | static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length) |
177 | { |
178 | u64 iova_pfn_start = iova >> PAGE_SHIFT; |
179 | u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT; |
180 | u64 pfn; |
181 | int i; |
182 | |
183 | for (i = 0; i < pa->pa_nr; i++) { |
184 | pfn = pa->pa_iova[i] >> PAGE_SHIFT; |
185 | if (pfn >= iova_pfn_start && pfn <= iova_pfn_end) |
186 | return true; |
187 | } |
188 | |
189 | return false; |
190 | } |
191 | /* Create the list of IDAL words for a page_array. */ |
192 | static inline void page_array_idal_create_words(struct page_array *pa, |
193 | unsigned long *idaws) |
194 | { |
195 | int i; |
196 | |
197 | /* |
198 | * Idal words (execept the first one) rely on the memory being 4k |
199 | * aligned. If a user virtual address is 4K aligned, then it's |
200 | * corresponding kernel physical address will also be 4K aligned. Thus |
201 | * there will be no problem here to simply use the phys to create an |
202 | * idaw. |
203 | */ |
204 | |
205 | for (i = 0; i < pa->pa_nr; i++) { |
206 | idaws[i] = page_to_phys(pa->pa_page[i]); |
207 | |
208 | /* Incorporate any offset from each starting address */ |
209 | idaws[i] += pa->pa_iova[i] & (PAGE_SIZE - 1); |
210 | } |
211 | } |
212 | |
213 | static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) |
214 | { |
215 | struct ccw0 ccw0; |
216 | struct ccw1 *pccw1 = source; |
217 | int i; |
218 | |
219 | for (i = 0; i < len; i++) { |
220 | ccw0 = *(struct ccw0 *)pccw1; |
221 | if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) { |
222 | pccw1->cmd_code = CCW_CMD_TIC; |
223 | pccw1->flags = 0; |
224 | pccw1->count = 0; |
225 | } else { |
226 | pccw1->cmd_code = ccw0.cmd_code; |
227 | pccw1->flags = ccw0.flags; |
228 | pccw1->count = ccw0.count; |
229 | } |
230 | pccw1->cda = ccw0.cda; |
231 | pccw1++; |
232 | } |
233 | } |
234 | |
235 | #define idal_is_2k(_cp) (!(_cp)->orb.cmd.c64 || (_cp)->orb.cmd.i2k) |
236 | |
237 | /* |
238 | * Helpers to operate ccwchain. |
239 | */ |
240 | #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02) |
241 | #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) |
242 | #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) |
243 | |
244 | #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) |
245 | |
246 | #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) |
247 | |
248 | #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA) |
249 | #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP) |
250 | |
251 | #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC)) |
252 | |
253 | /* |
254 | * ccw_does_data_transfer() |
255 | * |
256 | * Determine whether a CCW will move any data, such that the guest pages |
257 | * would need to be pinned before performing the I/O. |
258 | * |
259 | * Returns 1 if yes, 0 if no. |
260 | */ |
261 | static inline int ccw_does_data_transfer(struct ccw1 *ccw) |
262 | { |
263 | /* If the count field is zero, then no data will be transferred */ |
264 | if (ccw->count == 0) |
265 | return 0; |
266 | |
267 | /* If the command is a NOP, then no data will be transferred */ |
268 | if (ccw_is_noop(ccw)) |
269 | return 0; |
270 | |
271 | /* If the skip flag is off, then data will be transferred */ |
272 | if (!ccw_is_skip(ccw)) |
273 | return 1; |
274 | |
275 | /* |
276 | * If the skip flag is on, it is only meaningful if the command |
277 | * code is a read, read backward, sense, or sense ID. In those |
278 | * cases, no data will be transferred. |
279 | */ |
280 | if (ccw_is_read(ccw) || ccw_is_read_backward(ccw)) |
281 | return 0; |
282 | |
283 | if (ccw_is_sense(ccw)) |
284 | return 0; |
285 | |
286 | /* The skip flag is on, but it is ignored for this command code. */ |
287 | return 1; |
288 | } |
289 | |
290 | /* |
291 | * is_cpa_within_range() |
292 | * |
293 | * @cpa: channel program address being questioned |
294 | * @head: address of the beginning of a CCW chain |
295 | * @len: number of CCWs within the chain |
296 | * |
297 | * Determine whether the address of a CCW (whether a new chain, |
298 | * or the target of a TIC) falls within a range (including the end points). |
299 | * |
300 | * Returns 1 if yes, 0 if no. |
301 | */ |
302 | static inline int is_cpa_within_range(u32 cpa, u32 head, int len) |
303 | { |
304 | u32 tail = head + (len - 1) * sizeof(struct ccw1); |
305 | |
306 | return (head <= cpa && cpa <= tail); |
307 | } |
308 | |
309 | static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len) |
310 | { |
311 | if (!ccw_is_tic(ccw)) |
312 | return 0; |
313 | |
314 | return is_cpa_within_range(cpa: ccw->cda, head, len); |
315 | } |
316 | |
317 | static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) |
318 | { |
319 | struct ccwchain *chain; |
320 | |
321 | chain = kzalloc(size: sizeof(*chain), GFP_KERNEL); |
322 | if (!chain) |
323 | return NULL; |
324 | |
325 | chain->ch_ccw = kcalloc(len, sizeof(*chain->ch_ccw), GFP_DMA | GFP_KERNEL); |
326 | if (!chain->ch_ccw) |
327 | goto out_err; |
328 | |
329 | chain->ch_pa = kcalloc(n: len, size: sizeof(*chain->ch_pa), GFP_KERNEL); |
330 | if (!chain->ch_pa) |
331 | goto out_err; |
332 | |
333 | list_add_tail(new: &chain->next, head: &cp->ccwchain_list); |
334 | |
335 | return chain; |
336 | |
337 | out_err: |
338 | kfree(objp: chain->ch_ccw); |
339 | kfree(objp: chain); |
340 | return NULL; |
341 | } |
342 | |
343 | static void ccwchain_free(struct ccwchain *chain) |
344 | { |
345 | list_del(entry: &chain->next); |
346 | kfree(objp: chain->ch_pa); |
347 | kfree(objp: chain->ch_ccw); |
348 | kfree(objp: chain); |
349 | } |
350 | |
351 | /* Free resource for a ccw that allocated memory for its cda. */ |
352 | static void ccwchain_cda_free(struct ccwchain *chain, int idx) |
353 | { |
354 | struct ccw1 *ccw = &chain->ch_ccw[idx]; |
355 | |
356 | if (ccw_is_tic(ccw)) |
357 | return; |
358 | |
359 | kfree(phys_to_virt(address: ccw->cda)); |
360 | } |
361 | |
362 | /** |
363 | * ccwchain_calc_length - calculate the length of the ccw chain. |
364 | * @iova: guest physical address of the target ccw chain |
365 | * @cp: channel_program on which to perform the operation |
366 | * |
367 | * This is the chain length not considering any TICs. |
368 | * You need to do a new round for each TIC target. |
369 | * |
370 | * The program is also validated for absence of not yet supported |
371 | * indirect data addressing scenarios. |
372 | * |
373 | * Returns: the length of the ccw chain or -errno. |
374 | */ |
375 | static int ccwchain_calc_length(u64 iova, struct channel_program *cp) |
376 | { |
377 | struct ccw1 *ccw = cp->guest_cp; |
378 | int cnt = 0; |
379 | |
380 | do { |
381 | cnt++; |
382 | |
383 | /* |
384 | * We want to keep counting if the current CCW has the |
385 | * command-chaining flag enabled, or if it is a TIC CCW |
386 | * that loops back into the current chain. The latter |
387 | * is used for device orientation, where the CCW PRIOR to |
388 | * the TIC can either jump to the TIC or a CCW immediately |
389 | * after the TIC, depending on the results of its operation. |
390 | */ |
391 | if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt)) |
392 | break; |
393 | |
394 | ccw++; |
395 | } while (cnt < CCWCHAIN_LEN_MAX + 1); |
396 | |
397 | if (cnt == CCWCHAIN_LEN_MAX + 1) |
398 | cnt = -EINVAL; |
399 | |
400 | return cnt; |
401 | } |
402 | |
403 | static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp) |
404 | { |
405 | struct ccwchain *chain; |
406 | u32 ccw_head; |
407 | |
408 | list_for_each_entry(chain, &cp->ccwchain_list, next) { |
409 | ccw_head = chain->ch_iova; |
410 | if (is_cpa_within_range(cpa: tic->cda, head: ccw_head, len: chain->ch_len)) |
411 | return 1; |
412 | } |
413 | |
414 | return 0; |
415 | } |
416 | |
417 | static int ccwchain_loop_tic(struct ccwchain *chain, |
418 | struct channel_program *cp); |
419 | |
420 | static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp) |
421 | { |
422 | struct vfio_device *vdev = |
423 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; |
424 | struct ccwchain *chain; |
425 | int len, ret; |
426 | |
427 | /* Copy 2K (the most we support today) of possible CCWs */ |
428 | ret = vfio_dma_rw(vdev, cda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false); |
429 | if (ret) |
430 | return ret; |
431 | |
432 | /* Convert any Format-0 CCWs to Format-1 */ |
433 | if (!cp->orb.cmd.fmt) |
434 | convert_ccw0_to_ccw1(source: cp->guest_cp, CCWCHAIN_LEN_MAX); |
435 | |
436 | /* Count the CCWs in the current chain */ |
437 | len = ccwchain_calc_length(iova: cda, cp); |
438 | if (len < 0) |
439 | return len; |
440 | |
441 | /* Need alloc a new chain for this one. */ |
442 | chain = ccwchain_alloc(cp, len); |
443 | if (!chain) |
444 | return -ENOMEM; |
445 | |
446 | chain->ch_len = len; |
447 | chain->ch_iova = cda; |
448 | |
449 | /* Copy the actual CCWs into the new chain */ |
450 | memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1)); |
451 | |
452 | /* Loop for tics on this new chain. */ |
453 | ret = ccwchain_loop_tic(chain, cp); |
454 | |
455 | if (ret) |
456 | ccwchain_free(chain); |
457 | |
458 | return ret; |
459 | } |
460 | |
461 | /* Loop for TICs. */ |
462 | static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp) |
463 | { |
464 | struct ccw1 *tic; |
465 | int i, ret; |
466 | |
467 | for (i = 0; i < chain->ch_len; i++) { |
468 | tic = &chain->ch_ccw[i]; |
469 | |
470 | if (!ccw_is_tic(tic)) |
471 | continue; |
472 | |
473 | /* May transfer to an existing chain. */ |
474 | if (tic_target_chain_exists(tic, cp)) |
475 | continue; |
476 | |
477 | /* Build a ccwchain for the next segment */ |
478 | ret = ccwchain_handle_ccw(cda: tic->cda, cp); |
479 | if (ret) |
480 | return ret; |
481 | } |
482 | |
483 | return 0; |
484 | } |
485 | |
486 | static int ccwchain_fetch_tic(struct ccw1 *ccw, |
487 | struct channel_program *cp) |
488 | { |
489 | struct ccwchain *iter; |
490 | u32 ccw_head; |
491 | |
492 | list_for_each_entry(iter, &cp->ccwchain_list, next) { |
493 | ccw_head = iter->ch_iova; |
494 | if (is_cpa_within_range(cpa: ccw->cda, head: ccw_head, len: iter->ch_len)) { |
495 | ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) + |
496 | (ccw->cda - ccw_head)); |
497 | return 0; |
498 | } |
499 | } |
500 | |
501 | return -EFAULT; |
502 | } |
503 | |
504 | static unsigned long *get_guest_idal(struct ccw1 *ccw, |
505 | struct channel_program *cp, |
506 | int idaw_nr) |
507 | { |
508 | struct vfio_device *vdev = |
509 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; |
510 | unsigned long *idaws; |
511 | unsigned int *idaws_f1; |
512 | int idal_len = idaw_nr * sizeof(*idaws); |
513 | int idaw_size = idal_is_2k(cp) ? PAGE_SIZE / 2 : PAGE_SIZE; |
514 | int idaw_mask = ~(idaw_size - 1); |
515 | int i, ret; |
516 | |
517 | idaws = kcalloc(n: idaw_nr, size: sizeof(*idaws), GFP_DMA | GFP_KERNEL); |
518 | if (!idaws) |
519 | return ERR_PTR(error: -ENOMEM); |
520 | |
521 | if (ccw_is_idal(ccw)) { |
522 | /* Copy IDAL from guest */ |
523 | ret = vfio_dma_rw(device: vdev, iova: ccw->cda, data: idaws, len: idal_len, write: false); |
524 | if (ret) { |
525 | kfree(objp: idaws); |
526 | return ERR_PTR(error: ret); |
527 | } |
528 | } else { |
529 | /* Fabricate an IDAL based off CCW data address */ |
530 | if (cp->orb.cmd.c64) { |
531 | idaws[0] = ccw->cda; |
532 | for (i = 1; i < idaw_nr; i++) |
533 | idaws[i] = (idaws[i - 1] + idaw_size) & idaw_mask; |
534 | } else { |
535 | idaws_f1 = (unsigned int *)idaws; |
536 | idaws_f1[0] = ccw->cda; |
537 | for (i = 1; i < idaw_nr; i++) |
538 | idaws_f1[i] = (idaws_f1[i - 1] + idaw_size) & idaw_mask; |
539 | } |
540 | } |
541 | |
542 | return idaws; |
543 | } |
544 | |
545 | /* |
546 | * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer |
547 | * a specified amount of data |
548 | * |
549 | * @ccw: The Channel Command Word being translated |
550 | * @cp: Channel Program being processed |
551 | * |
552 | * The ORB is examined, since it specifies what IDAWs could actually be |
553 | * used by any CCW in the channel program, regardless of whether or not |
554 | * the CCW actually does. An ORB that does not specify Format-2-IDAW |
555 | * Control could still contain a CCW with an IDAL, which would be |
556 | * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within |
557 | * the channel program must follow the same size requirements. |
558 | */ |
559 | static int ccw_count_idaws(struct ccw1 *ccw, |
560 | struct channel_program *cp) |
561 | { |
562 | struct vfio_device *vdev = |
563 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; |
564 | u64 iova; |
565 | int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32); |
566 | int ret; |
567 | int bytes = 1; |
568 | |
569 | if (ccw->count) |
570 | bytes = ccw->count; |
571 | |
572 | if (ccw_is_idal(ccw)) { |
573 | /* Read first IDAW to check its starting address. */ |
574 | /* All subsequent IDAWs will be 2K- or 4K-aligned. */ |
575 | ret = vfio_dma_rw(device: vdev, iova: ccw->cda, data: &iova, len: size, write: false); |
576 | if (ret) |
577 | return ret; |
578 | |
579 | /* |
580 | * Format-1 IDAWs only occupy the first 32 bits, |
581 | * and bit 0 is always off. |
582 | */ |
583 | if (!cp->orb.cmd.c64) |
584 | iova = iova >> 32; |
585 | } else { |
586 | iova = ccw->cda; |
587 | } |
588 | |
589 | /* Format-1 IDAWs operate on 2K each */ |
590 | if (!cp->orb.cmd.c64) |
591 | return idal_2k_nr_words((void *)iova, bytes); |
592 | |
593 | /* Using the 2K variant of Format-2 IDAWs? */ |
594 | if (cp->orb.cmd.i2k) |
595 | return idal_2k_nr_words((void *)iova, bytes); |
596 | |
597 | /* The 'usual' case is 4K Format-2 IDAWs */ |
598 | return idal_nr_words((void *)iova, bytes); |
599 | } |
600 | |
601 | static int ccwchain_fetch_ccw(struct ccw1 *ccw, |
602 | struct page_array *pa, |
603 | struct channel_program *cp) |
604 | { |
605 | struct vfio_device *vdev = |
606 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; |
607 | unsigned long *idaws; |
608 | unsigned int *idaws_f1; |
609 | int ret; |
610 | int idaw_nr; |
611 | int i; |
612 | |
613 | /* Calculate size of IDAL */ |
614 | idaw_nr = ccw_count_idaws(ccw, cp); |
615 | if (idaw_nr < 0) |
616 | return idaw_nr; |
617 | |
618 | /* Allocate an IDAL from host storage */ |
619 | idaws = get_guest_idal(ccw, cp, idaw_nr); |
620 | if (IS_ERR(ptr: idaws)) { |
621 | ret = PTR_ERR(ptr: idaws); |
622 | goto out_init; |
623 | } |
624 | |
625 | /* |
626 | * Allocate an array of pages to pin/translate. |
627 | * The number of pages is actually the count of the idaws |
628 | * required for the data transfer, since we only only support |
629 | * 4K IDAWs today. |
630 | */ |
631 | ret = page_array_alloc(pa, len: idaw_nr); |
632 | if (ret < 0) |
633 | goto out_free_idaws; |
634 | |
635 | /* |
636 | * Copy guest IDAWs into page_array, in case the memory they |
637 | * occupy is not contiguous. |
638 | */ |
639 | idaws_f1 = (unsigned int *)idaws; |
640 | for (i = 0; i < idaw_nr; i++) { |
641 | if (cp->orb.cmd.c64) |
642 | pa->pa_iova[i] = idaws[i]; |
643 | else |
644 | pa->pa_iova[i] = idaws_f1[i]; |
645 | } |
646 | |
647 | if (ccw_does_data_transfer(ccw)) { |
648 | ret = page_array_pin(pa, vdev, idal_is_2k(cp)); |
649 | if (ret < 0) |
650 | goto out_unpin; |
651 | } else { |
652 | pa->pa_nr = 0; |
653 | } |
654 | |
655 | ccw->cda = (__u32) virt_to_phys(address: idaws); |
656 | ccw->flags |= CCW_FLAG_IDA; |
657 | |
658 | /* Populate the IDAL with pinned/translated addresses from page */ |
659 | page_array_idal_create_words(pa, idaws); |
660 | |
661 | return 0; |
662 | |
663 | out_unpin: |
664 | page_array_unpin_free(pa, vdev, idal_is_2k(cp)); |
665 | out_free_idaws: |
666 | kfree(objp: idaws); |
667 | out_init: |
668 | ccw->cda = 0; |
669 | return ret; |
670 | } |
671 | |
672 | /* |
673 | * Fetch one ccw. |
674 | * To reduce memory copy, we'll pin the cda page in memory, |
675 | * and to get rid of the cda 2G limitation of ccw1, we'll translate |
676 | * direct ccws to idal ccws. |
677 | */ |
678 | static int ccwchain_fetch_one(struct ccw1 *ccw, |
679 | struct page_array *pa, |
680 | struct channel_program *cp) |
681 | |
682 | { |
683 | if (ccw_is_tic(ccw)) |
684 | return ccwchain_fetch_tic(ccw, cp); |
685 | |
686 | return ccwchain_fetch_ccw(ccw, pa, cp); |
687 | } |
688 | |
689 | /** |
690 | * cp_init() - allocate ccwchains for a channel program. |
691 | * @cp: channel_program on which to perform the operation |
692 | * @orb: control block for the channel program from the guest |
693 | * |
694 | * This creates one or more ccwchain(s), and copies the raw data of |
695 | * the target channel program from @orb->cmd.iova to the new ccwchain(s). |
696 | * |
697 | * Limitations: |
698 | * 1. Supports idal(c64) ccw chaining. |
699 | * 2. Supports 4k idaw. |
700 | * |
701 | * Returns: |
702 | * %0 on success and a negative error value on failure. |
703 | */ |
704 | int cp_init(struct channel_program *cp, union orb *orb) |
705 | { |
706 | struct vfio_device *vdev = |
707 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; |
708 | /* custom ratelimit used to avoid flood during guest IPL */ |
709 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); |
710 | int ret; |
711 | |
712 | /* this is an error in the caller */ |
713 | if (cp->initialized) |
714 | return -EBUSY; |
715 | |
716 | /* |
717 | * We only support prefetching the channel program. We assume all channel |
718 | * programs executed by supported guests likewise support prefetching. |
719 | * Executing a channel program that does not specify prefetching will |
720 | * typically not cause an error, but a warning is issued to help identify |
721 | * the problem if something does break. |
722 | */ |
723 | if (!orb->cmd.pfch && __ratelimit(&ratelimit_state)) |
724 | dev_warn( |
725 | vdev->dev, |
726 | "Prefetching channel program even though prefetch not specified in ORB" ); |
727 | |
728 | INIT_LIST_HEAD(list: &cp->ccwchain_list); |
729 | memcpy(&cp->orb, orb, sizeof(*orb)); |
730 | |
731 | /* Build a ccwchain for the first CCW segment */ |
732 | ret = ccwchain_handle_ccw(cda: orb->cmd.cpa, cp); |
733 | |
734 | if (!ret) |
735 | cp->initialized = true; |
736 | |
737 | return ret; |
738 | } |
739 | |
740 | |
741 | /** |
742 | * cp_free() - free resources for channel program. |
743 | * @cp: channel_program on which to perform the operation |
744 | * |
745 | * This unpins the memory pages and frees the memory space occupied by |
746 | * @cp, which must have been returned by a previous call to cp_init(). |
747 | * Otherwise, undefined behavior occurs. |
748 | */ |
749 | void cp_free(struct channel_program *cp) |
750 | { |
751 | struct vfio_device *vdev = |
752 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; |
753 | struct ccwchain *chain, *temp; |
754 | int i; |
755 | |
756 | if (!cp->initialized) |
757 | return; |
758 | |
759 | cp->initialized = false; |
760 | list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { |
761 | for (i = 0; i < chain->ch_len; i++) { |
762 | page_array_unpin_free(pa: &chain->ch_pa[i], vdev, idal_is_2k(cp)); |
763 | ccwchain_cda_free(chain, idx: i); |
764 | } |
765 | ccwchain_free(chain); |
766 | } |
767 | } |
768 | |
769 | /** |
770 | * cp_prefetch() - translate a guest physical address channel program to |
771 | * a real-device runnable channel program. |
772 | * @cp: channel_program on which to perform the operation |
773 | * |
774 | * This function translates the guest-physical-address channel program |
775 | * and stores the result to ccwchain list. @cp must have been |
776 | * initialized by a previous call with cp_init(). Otherwise, undefined |
777 | * behavior occurs. |
778 | * For each chain composing the channel program: |
779 | * - On entry ch_len holds the count of CCWs to be translated. |
780 | * - On exit ch_len is adjusted to the count of successfully translated CCWs. |
781 | * This allows cp_free to find in ch_len the count of CCWs to free in a chain. |
782 | * |
783 | * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced |
784 | * as helpers to do ccw chain translation inside the kernel. Basically |
785 | * they accept a channel program issued by a virtual machine, and |
786 | * translate the channel program to a real-device runnable channel |
787 | * program. |
788 | * |
789 | * These APIs will copy the ccws into kernel-space buffers, and update |
790 | * the guest physical addresses with their corresponding host physical |
791 | * addresses. Then channel I/O device drivers could issue the |
792 | * translated channel program to real devices to perform an I/O |
793 | * operation. |
794 | * |
795 | * These interfaces are designed to support translation only for |
796 | * channel programs, which are generated and formatted by a |
797 | * guest. Thus this will make it possible for things like VFIO to |
798 | * leverage the interfaces to passthrough a channel I/O mediated |
799 | * device in QEMU. |
800 | * |
801 | * We support direct ccw chaining by translating them to idal ccws. |
802 | * |
803 | * Returns: |
804 | * %0 on success and a negative error value on failure. |
805 | */ |
806 | int cp_prefetch(struct channel_program *cp) |
807 | { |
808 | struct ccwchain *chain; |
809 | struct ccw1 *ccw; |
810 | struct page_array *pa; |
811 | int len, idx, ret; |
812 | |
813 | /* this is an error in the caller */ |
814 | if (!cp->initialized) |
815 | return -EINVAL; |
816 | |
817 | list_for_each_entry(chain, &cp->ccwchain_list, next) { |
818 | len = chain->ch_len; |
819 | for (idx = 0; idx < len; idx++) { |
820 | ccw = &chain->ch_ccw[idx]; |
821 | pa = &chain->ch_pa[idx]; |
822 | |
823 | ret = ccwchain_fetch_one(ccw, pa, cp); |
824 | if (ret) |
825 | goto out_err; |
826 | } |
827 | } |
828 | |
829 | return 0; |
830 | out_err: |
831 | /* Only cleanup the chain elements that were actually translated. */ |
832 | chain->ch_len = idx; |
833 | list_for_each_entry_continue(chain, &cp->ccwchain_list, next) { |
834 | chain->ch_len = 0; |
835 | } |
836 | return ret; |
837 | } |
838 | |
839 | /** |
840 | * cp_get_orb() - get the orb of the channel program |
841 | * @cp: channel_program on which to perform the operation |
842 | * @sch: subchannel the operation will be performed against |
843 | * |
844 | * This function returns the address of the updated orb of the channel |
845 | * program. Channel I/O device drivers could use this orb to issue a |
846 | * ssch. |
847 | */ |
848 | union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch) |
849 | { |
850 | union orb *orb; |
851 | struct ccwchain *chain; |
852 | struct ccw1 *cpa; |
853 | |
854 | /* this is an error in the caller */ |
855 | if (!cp->initialized) |
856 | return NULL; |
857 | |
858 | orb = &cp->orb; |
859 | |
860 | orb->cmd.intparm = (u32)virt_to_phys(address: sch); |
861 | orb->cmd.fmt = 1; |
862 | |
863 | /* |
864 | * Everything built by vfio-ccw is a Format-2 IDAL. |
865 | * If the input was a Format-1 IDAL, indicate that |
866 | * 2K Format-2 IDAWs were created here. |
867 | */ |
868 | if (!orb->cmd.c64) |
869 | orb->cmd.i2k = 1; |
870 | orb->cmd.c64 = 1; |
871 | |
872 | if (orb->cmd.lpm == 0) |
873 | orb->cmd.lpm = sch->lpm; |
874 | |
875 | chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next); |
876 | cpa = chain->ch_ccw; |
877 | orb->cmd.cpa = (__u32)virt_to_phys(address: cpa); |
878 | |
879 | return orb; |
880 | } |
881 | |
882 | /** |
883 | * cp_update_scsw() - update scsw for a channel program. |
884 | * @cp: channel_program on which to perform the operation |
885 | * @scsw: I/O results of the channel program and also the target to be |
886 | * updated |
887 | * |
888 | * @scsw contains the I/O results of the channel program that pointed |
889 | * to by @cp. However what @scsw->cpa stores is a host physical |
890 | * address, which is meaningless for the guest, which is waiting for |
891 | * the I/O results. |
892 | * |
893 | * This function updates @scsw->cpa to its coressponding guest physical |
894 | * address. |
895 | */ |
896 | void cp_update_scsw(struct channel_program *cp, union scsw *scsw) |
897 | { |
898 | struct ccwchain *chain; |
899 | u32 cpa = scsw->cmd.cpa; |
900 | u32 ccw_head; |
901 | |
902 | if (!cp->initialized) |
903 | return; |
904 | |
905 | /* |
906 | * LATER: |
907 | * For now, only update the cmd.cpa part. We may need to deal with |
908 | * other portions of the schib as well, even if we don't return them |
909 | * in the ioctl directly. Path status changes etc. |
910 | */ |
911 | list_for_each_entry(chain, &cp->ccwchain_list, next) { |
912 | ccw_head = (u32)(u64)chain->ch_ccw; |
913 | /* |
914 | * On successful execution, cpa points just beyond the end |
915 | * of the chain. |
916 | */ |
917 | if (is_cpa_within_range(cpa, head: ccw_head, len: chain->ch_len + 1)) { |
918 | /* |
919 | * (cpa - ccw_head) is the offset value of the host |
920 | * physical ccw to its chain head. |
921 | * Adding this value to the guest physical ccw chain |
922 | * head gets us the guest cpa. |
923 | */ |
924 | cpa = chain->ch_iova + (cpa - ccw_head); |
925 | break; |
926 | } |
927 | } |
928 | |
929 | scsw->cmd.cpa = cpa; |
930 | } |
931 | |
932 | /** |
933 | * cp_iova_pinned() - check if an iova is pinned for a ccw chain. |
934 | * @cp: channel_program on which to perform the operation |
935 | * @iova: the iova to check |
936 | * @length: the length to check from @iova |
937 | * |
938 | * If the @iova is currently pinned for the ccw chain, return true; |
939 | * else return false. |
940 | */ |
941 | bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length) |
942 | { |
943 | struct ccwchain *chain; |
944 | int i; |
945 | |
946 | if (!cp->initialized) |
947 | return false; |
948 | |
949 | list_for_each_entry(chain, &cp->ccwchain_list, next) { |
950 | for (i = 0; i < chain->ch_len; i++) |
951 | if (page_array_iova_pinned(pa: &chain->ch_pa[i], iova, length)) |
952 | return true; |
953 | } |
954 | |
955 | return false; |
956 | } |
957 | |