1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * VFIO PCI I/O Port & MMIO access |
4 | * |
5 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
6 | * Author: Alex Williamson <alex.williamson@redhat.com> |
7 | * |
8 | * Derived from original vfio: |
9 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. |
10 | * Author: Tom Lyon, pugs@cisco.com |
11 | */ |
12 | |
13 | #include <linux/fs.h> |
14 | #include <linux/pci.h> |
15 | #include <linux/uaccess.h> |
16 | #include <linux/io.h> |
17 | #include <linux/vfio.h> |
18 | #include <linux/vgaarb.h> |
19 | |
20 | #include "vfio_pci_priv.h" |
21 | |
22 | #ifdef __LITTLE_ENDIAN |
23 | #define vfio_ioread64 ioread64 |
24 | #define vfio_iowrite64 iowrite64 |
25 | #define vfio_ioread32 ioread32 |
26 | #define vfio_iowrite32 iowrite32 |
27 | #define vfio_ioread16 ioread16 |
28 | #define vfio_iowrite16 iowrite16 |
29 | #else |
30 | #define vfio_ioread64 ioread64be |
31 | #define vfio_iowrite64 iowrite64be |
32 | #define vfio_ioread32 ioread32be |
33 | #define vfio_iowrite32 iowrite32be |
34 | #define vfio_ioread16 ioread16be |
35 | #define vfio_iowrite16 iowrite16be |
36 | #endif |
37 | #define vfio_ioread8 ioread8 |
38 | #define vfio_iowrite8 iowrite8 |
39 | |
40 | #define VFIO_IOWRITE(size) \ |
41 | int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ |
42 | bool test_mem, u##size val, void __iomem *io) \ |
43 | { \ |
44 | if (test_mem) { \ |
45 | down_read(&vdev->memory_lock); \ |
46 | if (!__vfio_pci_memory_enabled(vdev)) { \ |
47 | up_read(&vdev->memory_lock); \ |
48 | return -EIO; \ |
49 | } \ |
50 | } \ |
51 | \ |
52 | vfio_iowrite##size(val, io); \ |
53 | \ |
54 | if (test_mem) \ |
55 | up_read(&vdev->memory_lock); \ |
56 | \ |
57 | return 0; \ |
58 | } \ |
59 | EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size); |
60 | |
61 | VFIO_IOWRITE(8) |
62 | VFIO_IOWRITE(16) |
63 | VFIO_IOWRITE(32) |
64 | #ifdef iowrite64 |
65 | VFIO_IOWRITE(64) |
66 | #endif |
67 | |
68 | #define VFIO_IOREAD(size) \ |
69 | int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ |
70 | bool test_mem, u##size *val, void __iomem *io) \ |
71 | { \ |
72 | if (test_mem) { \ |
73 | down_read(&vdev->memory_lock); \ |
74 | if (!__vfio_pci_memory_enabled(vdev)) { \ |
75 | up_read(&vdev->memory_lock); \ |
76 | return -EIO; \ |
77 | } \ |
78 | } \ |
79 | \ |
80 | *val = vfio_ioread##size(io); \ |
81 | \ |
82 | if (test_mem) \ |
83 | up_read(&vdev->memory_lock); \ |
84 | \ |
85 | return 0; \ |
86 | } \ |
87 | EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size); |
88 | |
89 | VFIO_IOREAD(8) |
90 | VFIO_IOREAD(16) |
91 | VFIO_IOREAD(32) |
92 | |
93 | /* |
94 | * Read or write from an __iomem region (MMIO or I/O port) with an excluded |
95 | * range which is inaccessible. The excluded range drops writes and fills |
96 | * reads with -1. This is intended for handling MSI-X vector tables and |
97 | * leftover space for ROM BARs. |
98 | */ |
99 | ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, |
100 | void __iomem *io, char __user *buf, |
101 | loff_t off, size_t count, size_t x_start, |
102 | size_t x_end, bool iswrite) |
103 | { |
104 | ssize_t done = 0; |
105 | int ret; |
106 | |
107 | while (count) { |
108 | size_t fillable, filled; |
109 | |
110 | if (off < x_start) |
111 | fillable = min(count, (size_t)(x_start - off)); |
112 | else if (off >= x_end) |
113 | fillable = count; |
114 | else |
115 | fillable = 0; |
116 | |
117 | if (fillable >= 4 && !(off % 4)) { |
118 | u32 val; |
119 | |
120 | if (iswrite) { |
121 | if (copy_from_user(to: &val, from: buf, n: 4)) |
122 | return -EFAULT; |
123 | |
124 | ret = vfio_pci_core_iowrite32(vdev, test_mem, |
125 | val, io + off); |
126 | if (ret) |
127 | return ret; |
128 | } else { |
129 | ret = vfio_pci_core_ioread32(vdev, test_mem, |
130 | &val, io + off); |
131 | if (ret) |
132 | return ret; |
133 | |
134 | if (copy_to_user(to: buf, from: &val, n: 4)) |
135 | return -EFAULT; |
136 | } |
137 | |
138 | filled = 4; |
139 | } else if (fillable >= 2 && !(off % 2)) { |
140 | u16 val; |
141 | |
142 | if (iswrite) { |
143 | if (copy_from_user(to: &val, from: buf, n: 2)) |
144 | return -EFAULT; |
145 | |
146 | ret = vfio_pci_core_iowrite16(vdev, test_mem, |
147 | val, io + off); |
148 | if (ret) |
149 | return ret; |
150 | } else { |
151 | ret = vfio_pci_core_ioread16(vdev, test_mem, |
152 | &val, io + off); |
153 | if (ret) |
154 | return ret; |
155 | |
156 | if (copy_to_user(to: buf, from: &val, n: 2)) |
157 | return -EFAULT; |
158 | } |
159 | |
160 | filled = 2; |
161 | } else if (fillable) { |
162 | u8 val; |
163 | |
164 | if (iswrite) { |
165 | if (copy_from_user(to: &val, from: buf, n: 1)) |
166 | return -EFAULT; |
167 | |
168 | ret = vfio_pci_core_iowrite8(vdev, test_mem, |
169 | val, io + off); |
170 | if (ret) |
171 | return ret; |
172 | } else { |
173 | ret = vfio_pci_core_ioread8(vdev, test_mem, |
174 | &val, io + off); |
175 | if (ret) |
176 | return ret; |
177 | |
178 | if (copy_to_user(to: buf, from: &val, n: 1)) |
179 | return -EFAULT; |
180 | } |
181 | |
182 | filled = 1; |
183 | } else { |
184 | /* Fill reads with -1, drop writes */ |
185 | filled = min(count, (size_t)(x_end - off)); |
186 | if (!iswrite) { |
187 | u8 val = 0xFF; |
188 | size_t i; |
189 | |
190 | for (i = 0; i < filled; i++) |
191 | if (copy_to_user(to: buf + i, from: &val, n: 1)) |
192 | return -EFAULT; |
193 | } |
194 | } |
195 | |
196 | count -= filled; |
197 | done += filled; |
198 | off += filled; |
199 | buf += filled; |
200 | } |
201 | |
202 | return done; |
203 | } |
204 | EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw); |
205 | |
206 | int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) |
207 | { |
208 | struct pci_dev *pdev = vdev->pdev; |
209 | int ret; |
210 | void __iomem *io; |
211 | |
212 | if (vdev->barmap[bar]) |
213 | return 0; |
214 | |
215 | ret = pci_request_selected_regions(pdev, 1 << bar, "vfio" ); |
216 | if (ret) |
217 | return ret; |
218 | |
219 | io = pci_iomap(dev: pdev, bar, max: 0); |
220 | if (!io) { |
221 | pci_release_selected_regions(pdev, 1 << bar); |
222 | return -ENOMEM; |
223 | } |
224 | |
225 | vdev->barmap[bar] = io; |
226 | |
227 | return 0; |
228 | } |
229 | EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); |
230 | |
231 | ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, |
232 | size_t count, loff_t *ppos, bool iswrite) |
233 | { |
234 | struct pci_dev *pdev = vdev->pdev; |
235 | loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; |
236 | int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); |
237 | size_t x_start = 0, x_end = 0; |
238 | resource_size_t end; |
239 | void __iomem *io; |
240 | struct resource *res = &vdev->pdev->resource[bar]; |
241 | ssize_t done; |
242 | |
243 | if (pci_resource_start(pdev, bar)) |
244 | end = pci_resource_len(pdev, bar); |
245 | else if (bar == PCI_ROM_RESOURCE && |
246 | pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) |
247 | end = 0x20000; |
248 | else |
249 | return -EINVAL; |
250 | |
251 | if (pos >= end) |
252 | return -EINVAL; |
253 | |
254 | count = min(count, (size_t)(end - pos)); |
255 | |
256 | if (bar == PCI_ROM_RESOURCE) { |
257 | /* |
258 | * The ROM can fill less space than the BAR, so we start the |
259 | * excluded range at the end of the actual ROM. This makes |
260 | * filling large ROM BARs much faster. |
261 | */ |
262 | io = pci_map_rom(pdev, size: &x_start); |
263 | if (!io) { |
264 | done = -ENOMEM; |
265 | goto out; |
266 | } |
267 | x_end = end; |
268 | } else { |
269 | int ret = vfio_pci_core_setup_barmap(vdev, bar); |
270 | if (ret) { |
271 | done = ret; |
272 | goto out; |
273 | } |
274 | |
275 | io = vdev->barmap[bar]; |
276 | } |
277 | |
278 | if (bar == vdev->msix_bar) { |
279 | x_start = vdev->msix_offset; |
280 | x_end = vdev->msix_offset + vdev->msix_size; |
281 | } |
282 | |
283 | done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, |
284 | count, x_start, x_end, iswrite); |
285 | |
286 | if (done >= 0) |
287 | *ppos += done; |
288 | |
289 | if (bar == PCI_ROM_RESOURCE) |
290 | pci_unmap_rom(pdev, rom: io); |
291 | out: |
292 | return done; |
293 | } |
294 | |
295 | #ifdef CONFIG_VFIO_PCI_VGA |
296 | ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, |
297 | size_t count, loff_t *ppos, bool iswrite) |
298 | { |
299 | int ret; |
300 | loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; |
301 | void __iomem *iomem = NULL; |
302 | unsigned int rsrc; |
303 | bool is_ioport; |
304 | ssize_t done; |
305 | |
306 | if (!vdev->has_vga) |
307 | return -EINVAL; |
308 | |
309 | if (pos > 0xbfffful) |
310 | return -EINVAL; |
311 | |
312 | switch ((u32)pos) { |
313 | case 0xa0000 ... 0xbffff: |
314 | count = min(count, (size_t)(0xc0000 - pos)); |
315 | iomem = ioremap(offset: 0xa0000, size: 0xbffff - 0xa0000 + 1); |
316 | off = pos - 0xa0000; |
317 | rsrc = VGA_RSRC_LEGACY_MEM; |
318 | is_ioport = false; |
319 | break; |
320 | case 0x3b0 ... 0x3bb: |
321 | count = min(count, (size_t)(0x3bc - pos)); |
322 | iomem = ioport_map(port: 0x3b0, nr: 0x3bb - 0x3b0 + 1); |
323 | off = pos - 0x3b0; |
324 | rsrc = VGA_RSRC_LEGACY_IO; |
325 | is_ioport = true; |
326 | break; |
327 | case 0x3c0 ... 0x3df: |
328 | count = min(count, (size_t)(0x3e0 - pos)); |
329 | iomem = ioport_map(port: 0x3c0, nr: 0x3df - 0x3c0 + 1); |
330 | off = pos - 0x3c0; |
331 | rsrc = VGA_RSRC_LEGACY_IO; |
332 | is_ioport = true; |
333 | break; |
334 | default: |
335 | return -EINVAL; |
336 | } |
337 | |
338 | if (!iomem) |
339 | return -ENOMEM; |
340 | |
341 | ret = vga_get_interruptible(pdev: vdev->pdev, rsrc); |
342 | if (ret) { |
343 | is_ioport ? ioport_unmap(p: iomem) : iounmap(addr: iomem); |
344 | return ret; |
345 | } |
346 | |
347 | /* |
348 | * VGA MMIO is a legacy, non-BAR resource that hopefully allows |
349 | * probing, so we don't currently worry about access in relation |
350 | * to the memory enable bit in the command register. |
351 | */ |
352 | done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count, |
353 | 0, 0, iswrite); |
354 | |
355 | vga_put(pdev: vdev->pdev, rsrc); |
356 | |
357 | is_ioport ? ioport_unmap(p: iomem) : iounmap(addr: iomem); |
358 | |
359 | if (done >= 0) |
360 | *ppos += done; |
361 | |
362 | return done; |
363 | } |
364 | #endif |
365 | |
366 | static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, |
367 | bool test_mem) |
368 | { |
369 | switch (ioeventfd->count) { |
370 | case 1: |
371 | vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem, |
372 | ioeventfd->data, ioeventfd->addr); |
373 | break; |
374 | case 2: |
375 | vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem, |
376 | ioeventfd->data, ioeventfd->addr); |
377 | break; |
378 | case 4: |
379 | vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem, |
380 | ioeventfd->data, ioeventfd->addr); |
381 | break; |
382 | #ifdef iowrite64 |
383 | case 8: |
384 | vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem, |
385 | ioeventfd->data, ioeventfd->addr); |
386 | break; |
387 | #endif |
388 | } |
389 | } |
390 | |
391 | static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) |
392 | { |
393 | struct vfio_pci_ioeventfd *ioeventfd = opaque; |
394 | struct vfio_pci_core_device *vdev = ioeventfd->vdev; |
395 | |
396 | if (ioeventfd->test_mem) { |
397 | if (!down_read_trylock(sem: &vdev->memory_lock)) |
398 | return 1; /* Lock contended, use thread */ |
399 | if (!__vfio_pci_memory_enabled(vdev)) { |
400 | up_read(sem: &vdev->memory_lock); |
401 | return 0; |
402 | } |
403 | } |
404 | |
405 | vfio_pci_ioeventfd_do_write(ioeventfd, test_mem: false); |
406 | |
407 | if (ioeventfd->test_mem) |
408 | up_read(sem: &vdev->memory_lock); |
409 | |
410 | return 0; |
411 | } |
412 | |
413 | static void vfio_pci_ioeventfd_thread(void *opaque, void *unused) |
414 | { |
415 | struct vfio_pci_ioeventfd *ioeventfd = opaque; |
416 | |
417 | vfio_pci_ioeventfd_do_write(ioeventfd, test_mem: ioeventfd->test_mem); |
418 | } |
419 | |
420 | int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, |
421 | uint64_t data, int count, int fd) |
422 | { |
423 | struct pci_dev *pdev = vdev->pdev; |
424 | loff_t pos = offset & VFIO_PCI_OFFSET_MASK; |
425 | int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset); |
426 | struct vfio_pci_ioeventfd *ioeventfd; |
427 | |
428 | /* Only support ioeventfds into BARs */ |
429 | if (bar > VFIO_PCI_BAR5_REGION_INDEX) |
430 | return -EINVAL; |
431 | |
432 | if (pos + count > pci_resource_len(pdev, bar)) |
433 | return -EINVAL; |
434 | |
435 | /* Disallow ioeventfds working around MSI-X table writes */ |
436 | if (bar == vdev->msix_bar && |
437 | !(pos + count <= vdev->msix_offset || |
438 | pos >= vdev->msix_offset + vdev->msix_size)) |
439 | return -EINVAL; |
440 | |
441 | #ifndef iowrite64 |
442 | if (count == 8) |
443 | return -EINVAL; |
444 | #endif |
445 | |
446 | ret = vfio_pci_core_setup_barmap(vdev, bar); |
447 | if (ret) |
448 | return ret; |
449 | |
450 | mutex_lock(&vdev->ioeventfds_lock); |
451 | |
452 | list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) { |
453 | if (ioeventfd->pos == pos && ioeventfd->bar == bar && |
454 | ioeventfd->data == data && ioeventfd->count == count) { |
455 | if (fd == -1) { |
456 | vfio_virqfd_disable(pvirqfd: &ioeventfd->virqfd); |
457 | list_del(entry: &ioeventfd->next); |
458 | vdev->ioeventfds_nr--; |
459 | kfree(objp: ioeventfd); |
460 | ret = 0; |
461 | } else |
462 | ret = -EEXIST; |
463 | |
464 | goto out_unlock; |
465 | } |
466 | } |
467 | |
468 | if (fd < 0) { |
469 | ret = -ENODEV; |
470 | goto out_unlock; |
471 | } |
472 | |
473 | if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) { |
474 | ret = -ENOSPC; |
475 | goto out_unlock; |
476 | } |
477 | |
478 | ioeventfd = kzalloc(size: sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT); |
479 | if (!ioeventfd) { |
480 | ret = -ENOMEM; |
481 | goto out_unlock; |
482 | } |
483 | |
484 | ioeventfd->vdev = vdev; |
485 | ioeventfd->addr = vdev->barmap[bar] + pos; |
486 | ioeventfd->data = data; |
487 | ioeventfd->pos = pos; |
488 | ioeventfd->bar = bar; |
489 | ioeventfd->count = count; |
490 | ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM; |
491 | |
492 | ret = vfio_virqfd_enable(opaque: ioeventfd, handler: vfio_pci_ioeventfd_handler, |
493 | thread: vfio_pci_ioeventfd_thread, NULL, |
494 | pvirqfd: &ioeventfd->virqfd, fd); |
495 | if (ret) { |
496 | kfree(objp: ioeventfd); |
497 | goto out_unlock; |
498 | } |
499 | |
500 | list_add(new: &ioeventfd->next, head: &vdev->ioeventfds_list); |
501 | vdev->ioeventfds_nr++; |
502 | |
503 | out_unlock: |
504 | mutex_unlock(lock: &vdev->ioeventfds_lock); |
505 | |
506 | return ret; |
507 | } |
508 | |