1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Xen PCI Frontend |
4 | * |
5 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> |
6 | */ |
7 | #include <linux/module.h> |
8 | #include <linux/init.h> |
9 | #include <linux/mm.h> |
10 | #include <xen/xenbus.h> |
11 | #include <xen/events.h> |
12 | #include <xen/grant_table.h> |
13 | #include <xen/page.h> |
14 | #include <linux/spinlock.h> |
15 | #include <linux/pci.h> |
16 | #include <linux/msi.h> |
17 | #include <xen/interface/io/pciif.h> |
18 | #include <asm/xen/pci.h> |
19 | #include <linux/interrupt.h> |
20 | #include <linux/atomic.h> |
21 | #include <linux/workqueue.h> |
22 | #include <linux/bitops.h> |
23 | #include <linux/time.h> |
24 | #include <linux/ktime.h> |
25 | #include <xen/platform_pci.h> |
26 | |
27 | #include <asm/xen/swiotlb-xen.h> |
28 | |
29 | #define INVALID_EVTCHN (-1) |
30 | |
31 | struct pci_bus_entry { |
32 | struct list_head list; |
33 | struct pci_bus *bus; |
34 | }; |
35 | |
36 | #define _PDEVB_op_active (0) |
37 | #define PDEVB_op_active (1 << (_PDEVB_op_active)) |
38 | |
39 | struct pcifront_device { |
40 | struct xenbus_device *xdev; |
41 | struct list_head root_buses; |
42 | |
43 | int evtchn; |
44 | grant_ref_t gnt_ref; |
45 | |
46 | int irq; |
47 | |
48 | /* Lock this when doing any operations in sh_info */ |
49 | spinlock_t sh_info_lock; |
50 | struct xen_pci_sharedinfo *sh_info; |
51 | struct work_struct op_work; |
52 | unsigned long flags; |
53 | |
54 | }; |
55 | |
56 | struct pcifront_sd { |
57 | struct pci_sysdata sd; |
58 | struct pcifront_device *pdev; |
59 | }; |
60 | |
61 | static inline struct pcifront_device * |
62 | pcifront_get_pdev(struct pcifront_sd *sd) |
63 | { |
64 | return sd->pdev; |
65 | } |
66 | |
67 | static inline void pcifront_init_sd(struct pcifront_sd *sd, |
68 | unsigned int domain, unsigned int bus, |
69 | struct pcifront_device *pdev) |
70 | { |
71 | /* Because we do not expose that information via XenBus. */ |
72 | sd->sd.node = first_online_node; |
73 | sd->sd.domain = domain; |
74 | sd->pdev = pdev; |
75 | } |
76 | |
77 | static DEFINE_SPINLOCK(pcifront_dev_lock); |
78 | static struct pcifront_device *pcifront_dev; |
79 | |
80 | static int errno_to_pcibios_err(int errno) |
81 | { |
82 | switch (errno) { |
83 | case XEN_PCI_ERR_success: |
84 | return PCIBIOS_SUCCESSFUL; |
85 | |
86 | case XEN_PCI_ERR_dev_not_found: |
87 | return PCIBIOS_DEVICE_NOT_FOUND; |
88 | |
89 | case XEN_PCI_ERR_invalid_offset: |
90 | case XEN_PCI_ERR_op_failed: |
91 | return PCIBIOS_BAD_REGISTER_NUMBER; |
92 | |
93 | case XEN_PCI_ERR_not_implemented: |
94 | return PCIBIOS_FUNC_NOT_SUPPORTED; |
95 | |
96 | case XEN_PCI_ERR_access_denied: |
97 | return PCIBIOS_SET_FAILED; |
98 | } |
99 | return errno; |
100 | } |
101 | |
102 | static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev) |
103 | { |
104 | if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) |
105 | && !test_and_set_bit(_PDEVB_op_active, addr: &pdev->flags)) { |
106 | dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n" ); |
107 | schedule_work(work: &pdev->op_work); |
108 | } |
109 | } |
110 | |
111 | static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) |
112 | { |
113 | int err = 0; |
114 | struct xen_pci_op *active_op = &pdev->sh_info->op; |
115 | unsigned long irq_flags; |
116 | evtchn_port_t port = pdev->evtchn; |
117 | unsigned int irq = pdev->irq; |
118 | s64 ns, ns_timeout; |
119 | |
120 | spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); |
121 | |
122 | memcpy(active_op, op, sizeof(struct xen_pci_op)); |
123 | |
124 | /* Go */ |
125 | wmb(); |
126 | set_bit(_XEN_PCIF_active, addr: (unsigned long *)&pdev->sh_info->flags); |
127 | notify_remote_via_evtchn(port); |
128 | |
129 | /* |
130 | * We set a poll timeout of 3 seconds but give up on return after |
131 | * 2 seconds. It is better to time out too late rather than too early |
132 | * (in the latter case we end up continually re-executing poll() with a |
133 | * timeout in the past). 1s difference gives plenty of slack for error. |
134 | */ |
135 | ns_timeout = ktime_get_ns() + 2 * (s64)NSEC_PER_SEC; |
136 | |
137 | xen_clear_irq_pending(irq); |
138 | |
139 | while (test_bit(_XEN_PCIF_active, |
140 | (unsigned long *)&pdev->sh_info->flags)) { |
141 | xen_poll_irq_timeout(irq, timeout: jiffies + 3*HZ); |
142 | xen_clear_irq_pending(irq); |
143 | ns = ktime_get_ns(); |
144 | if (ns > ns_timeout) { |
145 | dev_err(&pdev->xdev->dev, |
146 | "pciback not responding!!!\n" ); |
147 | clear_bit(_XEN_PCIF_active, |
148 | addr: (unsigned long *)&pdev->sh_info->flags); |
149 | err = XEN_PCI_ERR_dev_not_found; |
150 | goto out; |
151 | } |
152 | } |
153 | |
154 | /* |
155 | * We might lose backend service request since we |
156 | * reuse same evtchn with pci_conf backend response. So re-schedule |
157 | * aer pcifront service. |
158 | */ |
159 | if (test_bit(_XEN_PCIB_active, |
160 | (unsigned long *)&pdev->sh_info->flags)) { |
161 | dev_err(&pdev->xdev->dev, |
162 | "schedule aer pcifront service\n" ); |
163 | schedule_pcifront_aer_op(pdev); |
164 | } |
165 | |
166 | memcpy(op, active_op, sizeof(struct xen_pci_op)); |
167 | |
168 | err = op->err; |
169 | out: |
170 | spin_unlock_irqrestore(lock: &pdev->sh_info_lock, flags: irq_flags); |
171 | return err; |
172 | } |
173 | |
174 | /* Access to this function is spinlocked in drivers/pci/access.c */ |
175 | static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn, |
176 | int where, int size, u32 *val) |
177 | { |
178 | int err = 0; |
179 | struct xen_pci_op op = { |
180 | .cmd = XEN_PCI_OP_conf_read, |
181 | .domain = pci_domain_nr(bus), |
182 | .bus = bus->number, |
183 | .devfn = devfn, |
184 | .offset = where, |
185 | .size = size, |
186 | }; |
187 | struct pcifront_sd *sd = bus->sysdata; |
188 | struct pcifront_device *pdev = pcifront_get_pdev(sd); |
189 | |
190 | dev_dbg(&pdev->xdev->dev, |
191 | "read dev=%04x:%02x:%02x.%d - offset %x size %d\n" , |
192 | pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), |
193 | PCI_FUNC(devfn), where, size); |
194 | |
195 | err = do_pci_op(pdev, op: &op); |
196 | |
197 | if (likely(!err)) { |
198 | dev_dbg(&pdev->xdev->dev, "read got back value %x\n" , |
199 | op.value); |
200 | |
201 | *val = op.value; |
202 | } else if (err == -ENODEV) { |
203 | /* No device here, pretend that it just returned 0 */ |
204 | err = 0; |
205 | *val = 0; |
206 | } |
207 | |
208 | return errno_to_pcibios_err(errno: err); |
209 | } |
210 | |
211 | /* Access to this function is spinlocked in drivers/pci/access.c */ |
212 | static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, |
213 | int where, int size, u32 val) |
214 | { |
215 | struct xen_pci_op op = { |
216 | .cmd = XEN_PCI_OP_conf_write, |
217 | .domain = pci_domain_nr(bus), |
218 | .bus = bus->number, |
219 | .devfn = devfn, |
220 | .offset = where, |
221 | .size = size, |
222 | .value = val, |
223 | }; |
224 | struct pcifront_sd *sd = bus->sysdata; |
225 | struct pcifront_device *pdev = pcifront_get_pdev(sd); |
226 | |
227 | dev_dbg(&pdev->xdev->dev, |
228 | "write dev=%04x:%02x:%02x.%d - offset %x size %d val %x\n" , |
229 | pci_domain_nr(bus), bus->number, |
230 | PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); |
231 | |
232 | return errno_to_pcibios_err(errno: do_pci_op(pdev, op: &op)); |
233 | } |
234 | |
235 | static struct pci_ops pcifront_bus_ops = { |
236 | .read = pcifront_bus_read, |
237 | .write = pcifront_bus_write, |
238 | }; |
239 | |
240 | #ifdef CONFIG_PCI_MSI |
241 | static int pci_frontend_enable_msix(struct pci_dev *dev, |
242 | int vector[], int nvec) |
243 | { |
244 | int err; |
245 | int i; |
246 | struct xen_pci_op op = { |
247 | .cmd = XEN_PCI_OP_enable_msix, |
248 | .domain = pci_domain_nr(bus: dev->bus), |
249 | .bus = dev->bus->number, |
250 | .devfn = dev->devfn, |
251 | .value = nvec, |
252 | }; |
253 | struct pcifront_sd *sd = dev->bus->sysdata; |
254 | struct pcifront_device *pdev = pcifront_get_pdev(sd); |
255 | struct msi_desc *entry; |
256 | |
257 | if (nvec > SH_INFO_MAX_VEC) { |
258 | pci_err(dev, "too many vectors (0x%x) for PCI frontend:" |
259 | " Increase SH_INFO_MAX_VEC\n" , nvec); |
260 | return -EINVAL; |
261 | } |
262 | |
263 | i = 0; |
264 | msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) { |
265 | op.msix_entries[i].entry = entry->msi_index; |
266 | /* Vector is useless at this point. */ |
267 | op.msix_entries[i].vector = -1; |
268 | i++; |
269 | } |
270 | |
271 | err = do_pci_op(pdev, op: &op); |
272 | |
273 | if (likely(!err)) { |
274 | if (likely(!op.value)) { |
275 | /* we get the result */ |
276 | for (i = 0; i < nvec; i++) { |
277 | if (op.msix_entries[i].vector <= 0) { |
278 | pci_warn(dev, "MSI-X entry %d is invalid: %d!\n" , |
279 | i, op.msix_entries[i].vector); |
280 | err = -EINVAL; |
281 | vector[i] = -1; |
282 | continue; |
283 | } |
284 | vector[i] = op.msix_entries[i].vector; |
285 | } |
286 | } else { |
287 | pr_info("enable msix get value %x\n" , op.value); |
288 | err = op.value; |
289 | } |
290 | } else { |
291 | pci_err(dev, "enable msix get err %x\n" , err); |
292 | } |
293 | return err; |
294 | } |
295 | |
296 | static void pci_frontend_disable_msix(struct pci_dev *dev) |
297 | { |
298 | int err; |
299 | struct xen_pci_op op = { |
300 | .cmd = XEN_PCI_OP_disable_msix, |
301 | .domain = pci_domain_nr(bus: dev->bus), |
302 | .bus = dev->bus->number, |
303 | .devfn = dev->devfn, |
304 | }; |
305 | struct pcifront_sd *sd = dev->bus->sysdata; |
306 | struct pcifront_device *pdev = pcifront_get_pdev(sd); |
307 | |
308 | err = do_pci_op(pdev, op: &op); |
309 | |
310 | /* What should do for error ? */ |
311 | if (err) |
312 | pci_err(dev, "pci_disable_msix get err %x\n" , err); |
313 | } |
314 | |
315 | static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[]) |
316 | { |
317 | int err; |
318 | struct xen_pci_op op = { |
319 | .cmd = XEN_PCI_OP_enable_msi, |
320 | .domain = pci_domain_nr(bus: dev->bus), |
321 | .bus = dev->bus->number, |
322 | .devfn = dev->devfn, |
323 | }; |
324 | struct pcifront_sd *sd = dev->bus->sysdata; |
325 | struct pcifront_device *pdev = pcifront_get_pdev(sd); |
326 | |
327 | err = do_pci_op(pdev, op: &op); |
328 | if (likely(!err)) { |
329 | vector[0] = op.value; |
330 | if (op.value <= 0) { |
331 | pci_warn(dev, "MSI entry is invalid: %d!\n" , |
332 | op.value); |
333 | err = -EINVAL; |
334 | vector[0] = -1; |
335 | } |
336 | } else { |
337 | pci_err(dev, "pci frontend enable msi failed for dev " |
338 | "%x:%x\n" , op.bus, op.devfn); |
339 | err = -EINVAL; |
340 | } |
341 | return err; |
342 | } |
343 | |
344 | static void pci_frontend_disable_msi(struct pci_dev *dev) |
345 | { |
346 | int err; |
347 | struct xen_pci_op op = { |
348 | .cmd = XEN_PCI_OP_disable_msi, |
349 | .domain = pci_domain_nr(bus: dev->bus), |
350 | .bus = dev->bus->number, |
351 | .devfn = dev->devfn, |
352 | }; |
353 | struct pcifront_sd *sd = dev->bus->sysdata; |
354 | struct pcifront_device *pdev = pcifront_get_pdev(sd); |
355 | |
356 | err = do_pci_op(pdev, op: &op); |
357 | if (err == XEN_PCI_ERR_dev_not_found) { |
358 | /* XXX No response from backend, what shall we do? */ |
359 | pr_info("get no response from backend for disable MSI\n" ); |
360 | return; |
361 | } |
362 | if (err) |
363 | /* how can pciback notify us fail? */ |
364 | pr_info("get fake response from backend\n" ); |
365 | } |
366 | |
367 | static struct xen_pci_frontend_ops pci_frontend_ops = { |
368 | .enable_msi = pci_frontend_enable_msi, |
369 | .disable_msi = pci_frontend_disable_msi, |
370 | .enable_msix = pci_frontend_enable_msix, |
371 | .disable_msix = pci_frontend_disable_msix, |
372 | }; |
373 | |
374 | static void pci_frontend_registrar(int enable) |
375 | { |
376 | if (enable) |
377 | xen_pci_frontend = &pci_frontend_ops; |
378 | else |
379 | xen_pci_frontend = NULL; |
380 | }; |
381 | #else |
382 | static inline void pci_frontend_registrar(int enable) { }; |
383 | #endif /* CONFIG_PCI_MSI */ |
384 | |
385 | /* Claim resources for the PCI frontend as-is, backend won't allow changes */ |
386 | static int pcifront_claim_resource(struct pci_dev *dev, void *data) |
387 | { |
388 | struct pcifront_device *pdev = data; |
389 | int i; |
390 | struct resource *r; |
391 | |
392 | pci_dev_for_each_resource(dev, r, i) { |
393 | if (!r->parent && r->start && r->flags) { |
394 | dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n" , |
395 | pci_name(dev), i); |
396 | if (pci_claim_resource(dev, i)) { |
397 | dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! " |
398 | "Device offline. Try using e820_host=1 in the guest config.\n" , |
399 | pci_name(dev), i); |
400 | } |
401 | } |
402 | } |
403 | |
404 | return 0; |
405 | } |
406 | |
407 | static int pcifront_scan_bus(struct pcifront_device *pdev, |
408 | unsigned int domain, unsigned int bus, |
409 | struct pci_bus *b) |
410 | { |
411 | struct pci_dev *d; |
412 | unsigned int devfn; |
413 | |
414 | /* |
415 | * Scan the bus for functions and add. |
416 | * We omit handling of PCI bridge attachment because pciback prevents |
417 | * bridges from being exported. |
418 | */ |
419 | for (devfn = 0; devfn < 0x100; devfn++) { |
420 | d = pci_get_slot(bus: b, devfn); |
421 | if (d) { |
422 | /* Device is already known. */ |
423 | pci_dev_put(dev: d); |
424 | continue; |
425 | } |
426 | |
427 | d = pci_scan_single_device(bus: b, devfn); |
428 | if (d) |
429 | dev_info(&pdev->xdev->dev, "New device on " |
430 | "%04x:%02x:%02x.%d found.\n" , domain, bus, |
431 | PCI_SLOT(devfn), PCI_FUNC(devfn)); |
432 | } |
433 | |
434 | return 0; |
435 | } |
436 | |
437 | static int pcifront_scan_root(struct pcifront_device *pdev, |
438 | unsigned int domain, unsigned int bus) |
439 | { |
440 | struct pci_bus *b; |
441 | LIST_HEAD(resources); |
442 | struct pcifront_sd *sd = NULL; |
443 | struct pci_bus_entry *bus_entry = NULL; |
444 | int err = 0; |
445 | static struct resource busn_res = { |
446 | .start = 0, |
447 | .end = 255, |
448 | .flags = IORESOURCE_BUS, |
449 | }; |
450 | |
451 | #ifndef CONFIG_PCI_DOMAINS |
452 | if (domain != 0) { |
453 | dev_err(&pdev->xdev->dev, |
454 | "PCI Root in non-zero PCI Domain! domain=%d\n" , domain); |
455 | dev_err(&pdev->xdev->dev, |
456 | "Please compile with CONFIG_PCI_DOMAINS\n" ); |
457 | err = -EINVAL; |
458 | goto err_out; |
459 | } |
460 | #endif |
461 | |
462 | dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n" , |
463 | domain, bus); |
464 | |
465 | bus_entry = kzalloc(size: sizeof(*bus_entry), GFP_KERNEL); |
466 | sd = kzalloc(size: sizeof(*sd), GFP_KERNEL); |
467 | if (!bus_entry || !sd) { |
468 | err = -ENOMEM; |
469 | goto err_out; |
470 | } |
471 | pci_add_resource(resources: &resources, res: &ioport_resource); |
472 | pci_add_resource(resources: &resources, res: &iomem_resource); |
473 | pci_add_resource(resources: &resources, res: &busn_res); |
474 | pcifront_init_sd(sd, domain, bus, pdev); |
475 | |
476 | pci_lock_rescan_remove(); |
477 | |
478 | b = pci_scan_root_bus(parent: &pdev->xdev->dev, bus, |
479 | ops: &pcifront_bus_ops, sysdata: sd, resources: &resources); |
480 | if (!b) { |
481 | dev_err(&pdev->xdev->dev, |
482 | "Error creating PCI Frontend Bus!\n" ); |
483 | err = -ENOMEM; |
484 | pci_unlock_rescan_remove(); |
485 | pci_free_resource_list(resources: &resources); |
486 | goto err_out; |
487 | } |
488 | |
489 | bus_entry->bus = b; |
490 | |
491 | list_add(new: &bus_entry->list, head: &pdev->root_buses); |
492 | |
493 | /* |
494 | * pci_scan_root_bus skips devices which do not have a |
495 | * devfn==0. The pcifront_scan_bus enumerates all devfn. |
496 | */ |
497 | err = pcifront_scan_bus(pdev, domain, bus, b); |
498 | |
499 | /* Claim resources before going "live" with our devices */ |
500 | pci_walk_bus(top: b, cb: pcifront_claim_resource, userdata: pdev); |
501 | |
502 | /* Create SysFS and notify udev of the devices. Aka: "going live" */ |
503 | pci_bus_add_devices(bus: b); |
504 | |
505 | pci_unlock_rescan_remove(); |
506 | return err; |
507 | |
508 | err_out: |
509 | kfree(objp: bus_entry); |
510 | kfree(objp: sd); |
511 | |
512 | return err; |
513 | } |
514 | |
515 | static int pcifront_rescan_root(struct pcifront_device *pdev, |
516 | unsigned int domain, unsigned int bus) |
517 | { |
518 | int err; |
519 | struct pci_bus *b; |
520 | |
521 | b = pci_find_bus(domain, busnr: bus); |
522 | if (!b) |
523 | /* If the bus is unknown, create it. */ |
524 | return pcifront_scan_root(pdev, domain, bus); |
525 | |
526 | dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n" , |
527 | domain, bus); |
528 | |
529 | err = pcifront_scan_bus(pdev, domain, bus, b); |
530 | |
531 | /* Claim resources before going "live" with our devices */ |
532 | pci_walk_bus(top: b, cb: pcifront_claim_resource, userdata: pdev); |
533 | |
534 | /* Create SysFS and notify udev of the devices. Aka: "going live" */ |
535 | pci_bus_add_devices(bus: b); |
536 | |
537 | return err; |
538 | } |
539 | |
540 | static void free_root_bus_devs(struct pci_bus *bus) |
541 | { |
542 | struct pci_dev *dev; |
543 | |
544 | while (!list_empty(head: &bus->devices)) { |
545 | dev = container_of(bus->devices.next, struct pci_dev, |
546 | bus_list); |
547 | pci_dbg(dev, "removing device\n" ); |
548 | pci_stop_and_remove_bus_device(dev); |
549 | } |
550 | } |
551 | |
552 | static void pcifront_free_roots(struct pcifront_device *pdev) |
553 | { |
554 | struct pci_bus_entry *bus_entry, *t; |
555 | |
556 | dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n" ); |
557 | |
558 | pci_lock_rescan_remove(); |
559 | list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) { |
560 | list_del(entry: &bus_entry->list); |
561 | |
562 | free_root_bus_devs(bus: bus_entry->bus); |
563 | |
564 | kfree(objp: bus_entry->bus->sysdata); |
565 | |
566 | device_unregister(dev: bus_entry->bus->bridge); |
567 | pci_remove_bus(b: bus_entry->bus); |
568 | |
569 | kfree(objp: bus_entry); |
570 | } |
571 | pci_unlock_rescan_remove(); |
572 | } |
573 | |
574 | static pci_ers_result_t pcifront_common_process(int cmd, |
575 | struct pcifront_device *pdev, |
576 | pci_channel_state_t state) |
577 | { |
578 | struct pci_driver *pdrv; |
579 | int bus = pdev->sh_info->aer_op.bus; |
580 | int devfn = pdev->sh_info->aer_op.devfn; |
581 | int domain = pdev->sh_info->aer_op.domain; |
582 | struct pci_dev *pcidev; |
583 | |
584 | dev_dbg(&pdev->xdev->dev, |
585 | "pcifront AER process: cmd %x (bus:%x, devfn%x)" , |
586 | cmd, bus, devfn); |
587 | |
588 | pcidev = pci_get_domain_bus_and_slot(domain, bus, devfn); |
589 | if (!pcidev || !pcidev->dev.driver) { |
590 | dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n" ); |
591 | pci_dev_put(dev: pcidev); |
592 | return PCI_ERS_RESULT_NONE; |
593 | } |
594 | pdrv = to_pci_driver(drv: pcidev->dev.driver); |
595 | |
596 | if (pdrv->err_handler && pdrv->err_handler->error_detected) { |
597 | pci_dbg(pcidev, "trying to call AER service\n" ); |
598 | switch (cmd) { |
599 | case XEN_PCI_OP_aer_detected: |
600 | return pdrv->err_handler->error_detected(pcidev, state); |
601 | case XEN_PCI_OP_aer_mmio: |
602 | return pdrv->err_handler->mmio_enabled(pcidev); |
603 | case XEN_PCI_OP_aer_slotreset: |
604 | return pdrv->err_handler->slot_reset(pcidev); |
605 | case XEN_PCI_OP_aer_resume: |
606 | pdrv->err_handler->resume(pcidev); |
607 | return PCI_ERS_RESULT_NONE; |
608 | default: |
609 | dev_err(&pdev->xdev->dev, |
610 | "bad request in aer recovery operation!\n" ); |
611 | } |
612 | } |
613 | |
614 | return PCI_ERS_RESULT_NONE; |
615 | } |
616 | |
617 | |
618 | static void pcifront_do_aer(struct work_struct *data) |
619 | { |
620 | struct pcifront_device *pdev = |
621 | container_of(data, struct pcifront_device, op_work); |
622 | int cmd = pdev->sh_info->aer_op.cmd; |
623 | pci_channel_state_t state = |
624 | (pci_channel_state_t)pdev->sh_info->aer_op.err; |
625 | |
626 | /* |
627 | * If a pci_conf op is in progress, we have to wait until it is done |
628 | * before service aer op |
629 | */ |
630 | dev_dbg(&pdev->xdev->dev, |
631 | "pcifront service aer bus %x devfn %x\n" , |
632 | pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn); |
633 | |
634 | pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state); |
635 | |
636 | /* Post the operation to the guest. */ |
637 | wmb(); |
638 | clear_bit(_XEN_PCIB_active, addr: (unsigned long *)&pdev->sh_info->flags); |
639 | notify_remote_via_evtchn(port: pdev->evtchn); |
640 | |
641 | /*in case of we lost an aer request in four lines time_window*/ |
642 | smp_mb__before_atomic(); |
643 | clear_bit(_PDEVB_op_active, addr: &pdev->flags); |
644 | smp_mb__after_atomic(); |
645 | |
646 | schedule_pcifront_aer_op(pdev); |
647 | |
648 | } |
649 | |
650 | static irqreturn_t pcifront_handler_aer(int irq, void *dev) |
651 | { |
652 | struct pcifront_device *pdev = dev; |
653 | |
654 | schedule_pcifront_aer_op(pdev); |
655 | return IRQ_HANDLED; |
656 | } |
657 | static int pcifront_connect_and_init_dma(struct pcifront_device *pdev) |
658 | { |
659 | int err = 0; |
660 | |
661 | spin_lock(lock: &pcifront_dev_lock); |
662 | |
663 | if (!pcifront_dev) { |
664 | dev_info(&pdev->xdev->dev, "Installing PCI frontend\n" ); |
665 | pcifront_dev = pdev; |
666 | } else |
667 | err = -EEXIST; |
668 | |
669 | spin_unlock(lock: &pcifront_dev_lock); |
670 | |
671 | return err; |
672 | } |
673 | |
674 | static void pcifront_disconnect(struct pcifront_device *pdev) |
675 | { |
676 | spin_lock(lock: &pcifront_dev_lock); |
677 | |
678 | if (pdev == pcifront_dev) { |
679 | dev_info(&pdev->xdev->dev, |
680 | "Disconnecting PCI Frontend Buses\n" ); |
681 | pcifront_dev = NULL; |
682 | } |
683 | |
684 | spin_unlock(lock: &pcifront_dev_lock); |
685 | } |
686 | static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev) |
687 | { |
688 | struct pcifront_device *pdev; |
689 | |
690 | pdev = kzalloc(size: sizeof(struct pcifront_device), GFP_KERNEL); |
691 | if (pdev == NULL) |
692 | goto out; |
693 | |
694 | if (xenbus_setup_ring(dev: xdev, GFP_KERNEL, vaddr: (void **)&pdev->sh_info, nr_pages: 1, |
695 | grefs: &pdev->gnt_ref)) { |
696 | kfree(objp: pdev); |
697 | pdev = NULL; |
698 | goto out; |
699 | } |
700 | pdev->sh_info->flags = 0; |
701 | |
702 | /*Flag for registering PV AER handler*/ |
703 | set_bit(_XEN_PCIB_AERHANDLER, addr: (void *)&pdev->sh_info->flags); |
704 | |
705 | dev_set_drvdata(dev: &xdev->dev, data: pdev); |
706 | pdev->xdev = xdev; |
707 | |
708 | INIT_LIST_HEAD(list: &pdev->root_buses); |
709 | |
710 | spin_lock_init(&pdev->sh_info_lock); |
711 | |
712 | pdev->evtchn = INVALID_EVTCHN; |
713 | pdev->irq = -1; |
714 | |
715 | INIT_WORK(&pdev->op_work, pcifront_do_aer); |
716 | |
717 | dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n" , |
718 | pdev, pdev->sh_info); |
719 | out: |
720 | return pdev; |
721 | } |
722 | |
723 | static void free_pdev(struct pcifront_device *pdev) |
724 | { |
725 | dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n" , pdev); |
726 | |
727 | pcifront_free_roots(pdev); |
728 | |
729 | cancel_work_sync(work: &pdev->op_work); |
730 | |
731 | if (pdev->irq >= 0) |
732 | unbind_from_irqhandler(irq: pdev->irq, dev_id: pdev); |
733 | |
734 | if (pdev->evtchn != INVALID_EVTCHN) |
735 | xenbus_free_evtchn(dev: pdev->xdev, port: pdev->evtchn); |
736 | |
737 | xenbus_teardown_ring(vaddr: (void **)&pdev->sh_info, nr_pages: 1, grefs: &pdev->gnt_ref); |
738 | |
739 | dev_set_drvdata(dev: &pdev->xdev->dev, NULL); |
740 | |
741 | kfree(objp: pdev); |
742 | } |
743 | |
744 | static int pcifront_publish_info(struct pcifront_device *pdev) |
745 | { |
746 | int err = 0; |
747 | struct xenbus_transaction trans; |
748 | |
749 | err = xenbus_alloc_evtchn(dev: pdev->xdev, port: &pdev->evtchn); |
750 | if (err) |
751 | goto out; |
752 | |
753 | err = bind_evtchn_to_irqhandler(evtchn: pdev->evtchn, handler: pcifront_handler_aer, |
754 | irqflags: 0, devname: "pcifront" , dev_id: pdev); |
755 | |
756 | if (err < 0) |
757 | return err; |
758 | |
759 | pdev->irq = err; |
760 | |
761 | do_publish: |
762 | err = xenbus_transaction_start(t: &trans); |
763 | if (err) { |
764 | xenbus_dev_fatal(dev: pdev->xdev, err, |
765 | fmt: "Error writing configuration for backend " |
766 | "(start transaction)" ); |
767 | goto out; |
768 | } |
769 | |
770 | err = xenbus_printf(t: trans, dir: pdev->xdev->nodename, |
771 | node: "pci-op-ref" , fmt: "%u" , pdev->gnt_ref); |
772 | if (!err) |
773 | err = xenbus_printf(t: trans, dir: pdev->xdev->nodename, |
774 | node: "event-channel" , fmt: "%u" , pdev->evtchn); |
775 | if (!err) |
776 | err = xenbus_printf(t: trans, dir: pdev->xdev->nodename, |
777 | node: "magic" , XEN_PCI_MAGIC); |
778 | |
779 | if (err) { |
780 | xenbus_transaction_end(t: trans, abort: 1); |
781 | xenbus_dev_fatal(dev: pdev->xdev, err, |
782 | fmt: "Error writing configuration for backend" ); |
783 | goto out; |
784 | } else { |
785 | err = xenbus_transaction_end(t: trans, abort: 0); |
786 | if (err == -EAGAIN) |
787 | goto do_publish; |
788 | else if (err) { |
789 | xenbus_dev_fatal(dev: pdev->xdev, err, |
790 | fmt: "Error completing transaction " |
791 | "for backend" ); |
792 | goto out; |
793 | } |
794 | } |
795 | |
796 | xenbus_switch_state(dev: pdev->xdev, new_state: XenbusStateInitialised); |
797 | |
798 | dev_dbg(&pdev->xdev->dev, "publishing successful!\n" ); |
799 | |
800 | out: |
801 | return err; |
802 | } |
803 | |
804 | static void pcifront_connect(struct pcifront_device *pdev) |
805 | { |
806 | int err; |
807 | int i, num_roots, len; |
808 | char str[64]; |
809 | unsigned int domain, bus; |
810 | |
811 | err = xenbus_scanf(XBT_NIL, dir: pdev->xdev->otherend, |
812 | node: "root_num" , fmt: "%d" , &num_roots); |
813 | if (err == -ENOENT) { |
814 | xenbus_dev_error(pdev->xdev, err, |
815 | "No PCI Roots found, trying 0000:00" ); |
816 | err = pcifront_rescan_root(pdev, 0, 0); |
817 | if (err) { |
818 | xenbus_dev_fatal(pdev->xdev, err, |
819 | "Error scanning PCI root 0000:00" ); |
820 | return; |
821 | } |
822 | num_roots = 0; |
823 | } else if (err != 1) { |
824 | xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err, |
825 | "Error reading number of PCI roots" ); |
826 | return; |
827 | } |
828 | |
829 | for (i = 0; i < num_roots; i++) { |
830 | len = snprintf(str, sizeof(str), "root-%d" , i); |
831 | if (unlikely(len >= (sizeof(str) - 1))) |
832 | return; |
833 | |
834 | err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, |
835 | "%x:%x" , &domain, &bus); |
836 | if (err != 2) { |
837 | xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err, |
838 | "Error reading PCI root %d" , i); |
839 | return; |
840 | } |
841 | |
842 | err = pcifront_rescan_root(pdev, domain, bus); |
843 | if (err) { |
844 | xenbus_dev_fatal(pdev->xdev, err, |
845 | "Error scanning PCI root %04x:%02x" , |
846 | domain, bus); |
847 | return; |
848 | } |
849 | } |
850 | |
851 | xenbus_switch_state(pdev->xdev, XenbusStateConnected); |
852 | } |
853 | |
854 | static void pcifront_try_connect(struct pcifront_device *pdev) |
855 | { |
856 | int err; |
857 | |
858 | /* Only connect once */ |
859 | if (xenbus_read_driver_state(path: pdev->xdev->nodename) != |
860 | XenbusStateInitialised) |
861 | return; |
862 | |
863 | err = pcifront_connect_and_init_dma(pdev); |
864 | if (err && err != -EEXIST) { |
865 | xenbus_dev_fatal(dev: pdev->xdev, err, |
866 | fmt: "Error setting up PCI Frontend" ); |
867 | return; |
868 | } |
869 | |
870 | pcifront_connect(pdev); |
871 | } |
872 | |
873 | static int pcifront_try_disconnect(struct pcifront_device *pdev) |
874 | { |
875 | int err = 0; |
876 | enum xenbus_state prev_state; |
877 | |
878 | |
879 | prev_state = xenbus_read_driver_state(path: pdev->xdev->nodename); |
880 | |
881 | if (prev_state >= XenbusStateClosing) |
882 | goto out; |
883 | |
884 | if (prev_state == XenbusStateConnected) { |
885 | pcifront_free_roots(pdev); |
886 | pcifront_disconnect(pdev); |
887 | } |
888 | |
889 | err = xenbus_switch_state(dev: pdev->xdev, new_state: XenbusStateClosed); |
890 | |
891 | out: |
892 | |
893 | return err; |
894 | } |
895 | |
896 | static void pcifront_attach_devices(struct pcifront_device *pdev) |
897 | { |
898 | if (xenbus_read_driver_state(path: pdev->xdev->nodename) == |
899 | XenbusStateReconfiguring) |
900 | pcifront_connect(pdev); |
901 | } |
902 | |
903 | static int pcifront_detach_devices(struct pcifront_device *pdev) |
904 | { |
905 | int err = 0; |
906 | int i, num_devs; |
907 | enum xenbus_state state; |
908 | unsigned int domain, bus, slot, func; |
909 | struct pci_dev *pci_dev; |
910 | char str[64]; |
911 | |
912 | state = xenbus_read_driver_state(path: pdev->xdev->nodename); |
913 | if (state == XenbusStateInitialised) { |
914 | dev_dbg(&pdev->xdev->dev, "Handle skipped connect.\n" ); |
915 | /* We missed Connected and need to initialize. */ |
916 | err = pcifront_connect_and_init_dma(pdev); |
917 | if (err && err != -EEXIST) { |
918 | xenbus_dev_fatal(dev: pdev->xdev, err, |
919 | fmt: "Error setting up PCI Frontend" ); |
920 | goto out; |
921 | } |
922 | |
923 | goto out_switch_state; |
924 | } else if (state != XenbusStateConnected) { |
925 | goto out; |
926 | } |
927 | |
928 | err = xenbus_scanf(XBT_NIL, dir: pdev->xdev->otherend, node: "num_devs" , fmt: "%d" , |
929 | &num_devs); |
930 | if (err != 1) { |
931 | if (err >= 0) |
932 | err = -EINVAL; |
933 | xenbus_dev_fatal(pdev->xdev, err, |
934 | "Error reading number of PCI devices" ); |
935 | goto out; |
936 | } |
937 | |
938 | /* Find devices being detached and remove them. */ |
939 | for (i = 0; i < num_devs; i++) { |
940 | int l, state; |
941 | |
942 | l = snprintf(str, sizeof(str), "state-%d" , i); |
943 | if (unlikely(l >= (sizeof(str) - 1))) { |
944 | err = -ENOMEM; |
945 | goto out; |
946 | } |
947 | state = xenbus_read_unsigned(pdev->xdev->otherend, str, |
948 | XenbusStateUnknown); |
949 | |
950 | if (state != XenbusStateClosing) |
951 | continue; |
952 | |
953 | /* Remove device. */ |
954 | l = snprintf(str, sizeof(str), "vdev-%d" , i); |
955 | if (unlikely(l >= (sizeof(str) - 1))) { |
956 | err = -ENOMEM; |
957 | goto out; |
958 | } |
959 | err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, |
960 | "%x:%x:%x.%x" , &domain, &bus, &slot, &func); |
961 | if (err != 4) { |
962 | if (err >= 0) |
963 | err = -EINVAL; |
964 | xenbus_dev_fatal(pdev->xdev, err, |
965 | "Error reading PCI device %d" , i); |
966 | goto out; |
967 | } |
968 | |
969 | pci_dev = pci_get_domain_bus_and_slot(domain, bus, |
970 | PCI_DEVFN(slot, func)); |
971 | if (!pci_dev) { |
972 | dev_dbg(&pdev->xdev->dev, |
973 | "Cannot get PCI device %04x:%02x:%02x.%d\n" , |
974 | domain, bus, slot, func); |
975 | continue; |
976 | } |
977 | pci_lock_rescan_remove(); |
978 | pci_stop_and_remove_bus_device(pci_dev); |
979 | pci_dev_put(pci_dev); |
980 | pci_unlock_rescan_remove(); |
981 | |
982 | dev_dbg(&pdev->xdev->dev, |
983 | "PCI device %04x:%02x:%02x.%d removed.\n" , |
984 | domain, bus, slot, func); |
985 | } |
986 | |
987 | out_switch_state: |
988 | err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); |
989 | |
990 | out: |
991 | return err; |
992 | } |
993 | |
994 | static void pcifront_backend_changed(struct xenbus_device *xdev, |
995 | enum xenbus_state be_state) |
996 | { |
997 | struct pcifront_device *pdev = dev_get_drvdata(dev: &xdev->dev); |
998 | |
999 | switch (be_state) { |
1000 | case XenbusStateUnknown: |
1001 | case XenbusStateInitialising: |
1002 | case XenbusStateInitWait: |
1003 | case XenbusStateInitialised: |
1004 | break; |
1005 | |
1006 | case XenbusStateConnected: |
1007 | pcifront_try_connect(pdev); |
1008 | break; |
1009 | |
1010 | case XenbusStateClosed: |
1011 | if (xdev->state == XenbusStateClosed) |
1012 | break; |
1013 | fallthrough; /* Missed the backend's CLOSING state */ |
1014 | case XenbusStateClosing: |
1015 | dev_warn(&xdev->dev, "backend going away!\n" ); |
1016 | pcifront_try_disconnect(pdev); |
1017 | break; |
1018 | |
1019 | case XenbusStateReconfiguring: |
1020 | pcifront_detach_devices(pdev); |
1021 | break; |
1022 | |
1023 | case XenbusStateReconfigured: |
1024 | pcifront_attach_devices(pdev); |
1025 | break; |
1026 | } |
1027 | } |
1028 | |
1029 | static int pcifront_xenbus_probe(struct xenbus_device *xdev, |
1030 | const struct xenbus_device_id *id) |
1031 | { |
1032 | int err = 0; |
1033 | struct pcifront_device *pdev = alloc_pdev(xdev); |
1034 | |
1035 | if (pdev == NULL) { |
1036 | err = -ENOMEM; |
1037 | xenbus_dev_fatal(dev: xdev, err, |
1038 | fmt: "Error allocating pcifront_device struct" ); |
1039 | goto out; |
1040 | } |
1041 | |
1042 | err = pcifront_publish_info(pdev); |
1043 | if (err) |
1044 | free_pdev(pdev); |
1045 | |
1046 | out: |
1047 | return err; |
1048 | } |
1049 | |
1050 | static void pcifront_xenbus_remove(struct xenbus_device *xdev) |
1051 | { |
1052 | struct pcifront_device *pdev = dev_get_drvdata(dev: &xdev->dev); |
1053 | |
1054 | if (pdev) |
1055 | free_pdev(pdev); |
1056 | } |
1057 | |
1058 | static const struct xenbus_device_id xenpci_ids[] = { |
1059 | {"pci" }, |
1060 | {"" }, |
1061 | }; |
1062 | |
1063 | static struct xenbus_driver xenpci_driver = { |
1064 | .name = "pcifront" , |
1065 | .ids = xenpci_ids, |
1066 | .probe = pcifront_xenbus_probe, |
1067 | .remove = pcifront_xenbus_remove, |
1068 | .otherend_changed = pcifront_backend_changed, |
1069 | }; |
1070 | |
1071 | static int __init pcifront_init(void) |
1072 | { |
1073 | if (!xen_pv_domain() || xen_initial_domain()) |
1074 | return -ENODEV; |
1075 | |
1076 | if (!xen_has_pv_devices()) |
1077 | return -ENODEV; |
1078 | |
1079 | pci_frontend_registrar(enable: 1 /* enable */); |
1080 | |
1081 | return xenbus_register_frontend(&xenpci_driver); |
1082 | } |
1083 | |
1084 | static void __exit pcifront_cleanup(void) |
1085 | { |
1086 | xenbus_unregister_driver(drv: &xenpci_driver); |
1087 | pci_frontend_registrar(enable: 0 /* disable */); |
1088 | } |
1089 | module_init(pcifront_init); |
1090 | module_exit(pcifront_cleanup); |
1091 | |
1092 | MODULE_DESCRIPTION("Xen PCI passthrough frontend." ); |
1093 | MODULE_LICENSE("GPL" ); |
1094 | MODULE_ALIAS("xen:pci" ); |
1095 | |