1 | /* |
2 | * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and |
3 | * initial domain support. We also handle the DSDT _PRT callbacks for GSI's |
4 | * used in HVM and initial domain mode (PV does not parse ACPI, so it has no |
5 | * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and |
6 | * 0xcf8 PCI configuration read/write. |
7 | * |
8 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> |
9 | * Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
10 | * Stefano Stabellini <stefano.stabellini@eu.citrix.com> |
11 | */ |
12 | #include <linux/export.h> |
13 | #include <linux/init.h> |
14 | #include <linux/pci.h> |
15 | #include <linux/acpi.h> |
16 | |
17 | #include <linux/io.h> |
18 | #include <asm/io_apic.h> |
19 | #include <asm/pci_x86.h> |
20 | |
21 | #include <asm/xen/hypervisor.h> |
22 | |
23 | #include <xen/features.h> |
24 | #include <xen/events.h> |
25 | #include <asm/xen/pci.h> |
26 | #include <asm/xen/cpuid.h> |
27 | #include <asm/apic.h> |
28 | #include <asm/i8259.h> |
29 | |
30 | static int xen_pcifront_enable_irq(struct pci_dev *dev) |
31 | { |
32 | int rc; |
33 | int share = 1; |
34 | int pirq; |
35 | u8 gsi; |
36 | |
37 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); |
38 | if (rc < 0) { |
39 | dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n" , |
40 | rc); |
41 | return rc; |
42 | } |
43 | /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ |
44 | pirq = gsi; |
45 | |
46 | if (gsi < nr_legacy_irqs()) |
47 | share = 0; |
48 | |
49 | rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront" ); |
50 | if (rc < 0) { |
51 | dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n" , |
52 | gsi, pirq, rc); |
53 | return rc; |
54 | } |
55 | |
56 | dev->irq = rc; |
57 | dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n" , gsi, dev->irq); |
58 | return 0; |
59 | } |
60 | |
61 | #ifdef CONFIG_ACPI |
62 | static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, |
63 | bool set_pirq) |
64 | { |
65 | int rc, pirq = -1, irq = -1; |
66 | struct physdev_map_pirq map_irq; |
67 | int shareable = 0; |
68 | char *name; |
69 | |
70 | irq = xen_irq_from_gsi(gsi); |
71 | if (irq > 0) |
72 | return irq; |
73 | |
74 | if (set_pirq) |
75 | pirq = gsi; |
76 | |
77 | map_irq.domid = DOMID_SELF; |
78 | map_irq.type = MAP_PIRQ_TYPE_GSI; |
79 | map_irq.index = gsi; |
80 | map_irq.pirq = pirq; |
81 | |
82 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); |
83 | if (rc) { |
84 | printk(KERN_WARNING "xen map irq failed %d\n" , rc); |
85 | return -1; |
86 | } |
87 | |
88 | if (triggering == ACPI_EDGE_SENSITIVE) { |
89 | shareable = 0; |
90 | name = "ioapic-edge" ; |
91 | } else { |
92 | shareable = 1; |
93 | name = "ioapic-level" ; |
94 | } |
95 | |
96 | if (gsi_override >= 0) |
97 | gsi = gsi_override; |
98 | |
99 | irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); |
100 | if (irq < 0) |
101 | goto out; |
102 | |
103 | printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n" , map_irq.pirq, irq, gsi); |
104 | out: |
105 | return irq; |
106 | } |
107 | |
108 | static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, |
109 | int trigger, int polarity) |
110 | { |
111 | if (!xen_hvm_domain()) |
112 | return -1; |
113 | |
114 | return xen_register_pirq(gsi, -1 /* no GSI override */, trigger, |
115 | false /* no mapping of GSI to PIRQ */); |
116 | } |
117 | |
118 | #ifdef CONFIG_XEN_DOM0 |
119 | static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) |
120 | { |
121 | int rc, irq; |
122 | struct physdev_setup_gsi setup_gsi; |
123 | |
124 | if (!xen_pv_domain()) |
125 | return -1; |
126 | |
127 | printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n" , |
128 | gsi, triggering, polarity); |
129 | |
130 | irq = xen_register_pirq(gsi, gsi_override, triggering, true); |
131 | |
132 | setup_gsi.gsi = gsi; |
133 | setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); |
134 | setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
135 | |
136 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); |
137 | if (rc == -EEXIST) |
138 | printk(KERN_INFO "Already setup the GSI :%d\n" , gsi); |
139 | else if (rc) { |
140 | printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n" , |
141 | gsi, rc); |
142 | } |
143 | |
144 | return irq; |
145 | } |
146 | |
147 | static int acpi_register_gsi_xen(struct device *dev, u32 gsi, |
148 | int trigger, int polarity) |
149 | { |
150 | return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity); |
151 | } |
152 | #endif |
153 | #endif |
154 | |
155 | #if defined(CONFIG_PCI_MSI) |
156 | #include <linux/msi.h> |
157 | #include <asm/msidef.h> |
158 | |
159 | struct xen_pci_frontend_ops *xen_pci_frontend; |
160 | EXPORT_SYMBOL_GPL(xen_pci_frontend); |
161 | |
162 | static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
163 | { |
164 | int irq, ret, i; |
165 | struct msi_desc *msidesc; |
166 | int *v; |
167 | |
168 | if (type == PCI_CAP_ID_MSI && nvec > 1) |
169 | return 1; |
170 | |
171 | v = kcalloc(max(1, nvec), sizeof(int), GFP_KERNEL); |
172 | if (!v) |
173 | return -ENOMEM; |
174 | |
175 | if (type == PCI_CAP_ID_MSIX) |
176 | ret = xen_pci_frontend_enable_msix(dev, v, nvec); |
177 | else |
178 | ret = xen_pci_frontend_enable_msi(dev, v); |
179 | if (ret) |
180 | goto error; |
181 | i = 0; |
182 | for_each_pci_msi_entry(msidesc, dev) { |
183 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], |
184 | (type == PCI_CAP_ID_MSI) ? nvec : 1, |
185 | (type == PCI_CAP_ID_MSIX) ? |
186 | "pcifront-msi-x" : |
187 | "pcifront-msi" , |
188 | DOMID_SELF); |
189 | if (irq < 0) { |
190 | ret = irq; |
191 | goto free; |
192 | } |
193 | i++; |
194 | } |
195 | kfree(v); |
196 | return 0; |
197 | |
198 | error: |
199 | if (ret == -ENOSYS) |
200 | dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n" ); |
201 | else if (ret) |
202 | dev_err(&dev->dev, "Xen PCI frontend error: %d!\n" , ret); |
203 | free: |
204 | kfree(v); |
205 | return ret; |
206 | } |
207 | |
208 | #define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ |
209 | MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) |
210 | |
211 | static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, |
212 | struct msi_msg *msg) |
213 | { |
214 | /* We set vector == 0 to tell the hypervisor we don't care about it, |
215 | * but we want a pirq setup instead. |
216 | * We use the dest_id field to pass the pirq that we want. */ |
217 | msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq); |
218 | msg->address_lo = |
219 | MSI_ADDR_BASE_LO | |
220 | MSI_ADDR_DEST_MODE_PHYSICAL | |
221 | MSI_ADDR_REDIRECTION_CPU | |
222 | MSI_ADDR_DEST_ID(pirq); |
223 | |
224 | msg->data = XEN_PIRQ_MSI_DATA; |
225 | } |
226 | |
227 | static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
228 | { |
229 | int irq, pirq; |
230 | struct msi_desc *msidesc; |
231 | struct msi_msg msg; |
232 | |
233 | if (type == PCI_CAP_ID_MSI && nvec > 1) |
234 | return 1; |
235 | |
236 | for_each_pci_msi_entry(msidesc, dev) { |
237 | pirq = xen_allocate_pirq_msi(dev, msidesc); |
238 | if (pirq < 0) { |
239 | irq = -ENODEV; |
240 | goto error; |
241 | } |
242 | xen_msi_compose_msg(dev, pirq, &msg); |
243 | __pci_write_msi_msg(msidesc, &msg); |
244 | dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n" , pirq); |
245 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, |
246 | (type == PCI_CAP_ID_MSI) ? nvec : 1, |
247 | (type == PCI_CAP_ID_MSIX) ? |
248 | "msi-x" : "msi" , |
249 | DOMID_SELF); |
250 | if (irq < 0) |
251 | goto error; |
252 | dev_dbg(&dev->dev, |
253 | "xen: msi --> pirq=%d --> irq=%d\n" , pirq, irq); |
254 | } |
255 | return 0; |
256 | |
257 | error: |
258 | dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n" , |
259 | type == PCI_CAP_ID_MSI ? "" : "-X" , irq); |
260 | return irq; |
261 | } |
262 | |
263 | #ifdef CONFIG_XEN_DOM0 |
264 | static bool __read_mostly pci_seg_supported = true; |
265 | |
266 | static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
267 | { |
268 | int ret = 0; |
269 | struct msi_desc *msidesc; |
270 | |
271 | for_each_pci_msi_entry(msidesc, dev) { |
272 | struct physdev_map_pirq map_irq; |
273 | domid_t domid; |
274 | |
275 | domid = ret = xen_find_device_domain_owner(dev); |
276 | /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, |
277 | * hence check ret value for < 0. */ |
278 | if (ret < 0) |
279 | domid = DOMID_SELF; |
280 | |
281 | memset(&map_irq, 0, sizeof(map_irq)); |
282 | map_irq.domid = domid; |
283 | map_irq.type = MAP_PIRQ_TYPE_MSI_SEG; |
284 | map_irq.index = -1; |
285 | map_irq.pirq = -1; |
286 | map_irq.bus = dev->bus->number | |
287 | (pci_domain_nr(dev->bus) << 16); |
288 | map_irq.devfn = dev->devfn; |
289 | |
290 | if (type == PCI_CAP_ID_MSI && nvec > 1) { |
291 | map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI; |
292 | map_irq.entry_nr = nvec; |
293 | } else if (type == PCI_CAP_ID_MSIX) { |
294 | int pos; |
295 | unsigned long flags; |
296 | u32 table_offset, bir; |
297 | |
298 | pos = dev->msix_cap; |
299 | pci_read_config_dword(dev, pos + PCI_MSIX_TABLE, |
300 | &table_offset); |
301 | bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); |
302 | flags = pci_resource_flags(dev, bir); |
303 | if (!flags || (flags & IORESOURCE_UNSET)) |
304 | return -EINVAL; |
305 | |
306 | map_irq.table_base = pci_resource_start(dev, bir); |
307 | map_irq.entry_nr = msidesc->msi_attrib.entry_nr; |
308 | } |
309 | |
310 | ret = -EINVAL; |
311 | if (pci_seg_supported) |
312 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, |
313 | &map_irq); |
314 | if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) { |
315 | /* |
316 | * If MAP_PIRQ_TYPE_MULTI_MSI is not available |
317 | * there's nothing else we can do in this case. |
318 | * Just set ret > 0 so driver can retry with |
319 | * single MSI. |
320 | */ |
321 | ret = 1; |
322 | goto out; |
323 | } |
324 | if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { |
325 | map_irq.type = MAP_PIRQ_TYPE_MSI; |
326 | map_irq.index = -1; |
327 | map_irq.pirq = -1; |
328 | map_irq.bus = dev->bus->number; |
329 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, |
330 | &map_irq); |
331 | if (ret != -EINVAL) |
332 | pci_seg_supported = false; |
333 | } |
334 | if (ret) { |
335 | dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n" , |
336 | ret, domid); |
337 | goto out; |
338 | } |
339 | |
340 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, |
341 | (type == PCI_CAP_ID_MSI) ? nvec : 1, |
342 | (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi" , |
343 | domid); |
344 | if (ret < 0) |
345 | goto out; |
346 | } |
347 | ret = 0; |
348 | out: |
349 | return ret; |
350 | } |
351 | |
352 | static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) |
353 | { |
354 | int ret = 0; |
355 | |
356 | if (pci_seg_supported) { |
357 | struct physdev_pci_device restore_ext; |
358 | |
359 | restore_ext.seg = pci_domain_nr(dev->bus); |
360 | restore_ext.bus = dev->bus->number; |
361 | restore_ext.devfn = dev->devfn; |
362 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext, |
363 | &restore_ext); |
364 | if (ret == -ENOSYS) |
365 | pci_seg_supported = false; |
366 | WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n" , ret); |
367 | } |
368 | if (!pci_seg_supported) { |
369 | struct physdev_restore_msi restore; |
370 | |
371 | restore.bus = dev->bus->number; |
372 | restore.devfn = dev->devfn; |
373 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore); |
374 | WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n" , ret); |
375 | } |
376 | } |
377 | #endif |
378 | |
379 | static void xen_teardown_msi_irqs(struct pci_dev *dev) |
380 | { |
381 | struct msi_desc *msidesc; |
382 | |
383 | msidesc = first_pci_msi_entry(dev); |
384 | if (msidesc->msi_attrib.is_msix) |
385 | xen_pci_frontend_disable_msix(dev); |
386 | else |
387 | xen_pci_frontend_disable_msi(dev); |
388 | |
389 | /* Free the IRQ's and the msidesc using the generic code. */ |
390 | default_teardown_msi_irqs(dev); |
391 | } |
392 | |
393 | static void xen_teardown_msi_irq(unsigned int irq) |
394 | { |
395 | xen_destroy_irq(irq); |
396 | } |
397 | |
398 | #endif |
399 | |
400 | int __init pci_xen_init(void) |
401 | { |
402 | if (!xen_pv_domain() || xen_initial_domain()) |
403 | return -ENODEV; |
404 | |
405 | printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n" ); |
406 | |
407 | pcibios_set_cache_line_size(); |
408 | |
409 | pcibios_enable_irq = xen_pcifront_enable_irq; |
410 | pcibios_disable_irq = NULL; |
411 | |
412 | /* Keep ACPI out of the picture */ |
413 | acpi_noirq_set(); |
414 | |
415 | #ifdef CONFIG_PCI_MSI |
416 | x86_msi.setup_msi_irqs = xen_setup_msi_irqs; |
417 | x86_msi.teardown_msi_irq = xen_teardown_msi_irq; |
418 | x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs; |
419 | pci_msi_ignore_mask = 1; |
420 | #endif |
421 | return 0; |
422 | } |
423 | |
424 | #ifdef CONFIG_PCI_MSI |
425 | void __init xen_msi_init(void) |
426 | { |
427 | if (!disable_apic) { |
428 | /* |
429 | * If hardware supports (x2)APIC virtualization (as indicated |
430 | * by hypervisor's leaf 4) then we don't need to use pirqs/ |
431 | * event channels for MSI handling and instead use regular |
432 | * APIC processing |
433 | */ |
434 | uint32_t eax = cpuid_eax(xen_cpuid_base() + 4); |
435 | |
436 | if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) || |
437 | ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC))) |
438 | return; |
439 | } |
440 | |
441 | x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; |
442 | x86_msi.teardown_msi_irq = xen_teardown_msi_irq; |
443 | } |
444 | #endif |
445 | |
446 | int __init pci_xen_hvm_init(void) |
447 | { |
448 | if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs)) |
449 | return 0; |
450 | |
451 | #ifdef CONFIG_ACPI |
452 | /* |
453 | * We don't want to change the actual ACPI delivery model, |
454 | * just how GSIs get registered. |
455 | */ |
456 | __acpi_register_gsi = acpi_register_gsi_xen_hvm; |
457 | __acpi_unregister_gsi = NULL; |
458 | #endif |
459 | |
460 | #ifdef CONFIG_PCI_MSI |
461 | /* |
462 | * We need to wait until after x2apic is initialized |
463 | * before we can set MSI IRQ ops. |
464 | */ |
465 | x86_platform.apic_post_init = xen_msi_init; |
466 | #endif |
467 | return 0; |
468 | } |
469 | |
470 | #ifdef CONFIG_XEN_DOM0 |
471 | int __init pci_xen_initial_domain(void) |
472 | { |
473 | int irq; |
474 | |
475 | #ifdef CONFIG_PCI_MSI |
476 | x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; |
477 | x86_msi.teardown_msi_irq = xen_teardown_msi_irq; |
478 | x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; |
479 | pci_msi_ignore_mask = 1; |
480 | #endif |
481 | __acpi_register_gsi = acpi_register_gsi_xen; |
482 | __acpi_unregister_gsi = NULL; |
483 | /* |
484 | * Pre-allocate the legacy IRQs. Use NR_LEGACY_IRQS here |
485 | * because we don't have a PIC and thus nr_legacy_irqs() is zero. |
486 | */ |
487 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { |
488 | int trigger, polarity; |
489 | |
490 | if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) |
491 | continue; |
492 | |
493 | xen_register_pirq(irq, -1 /* no GSI override */, |
494 | trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE, |
495 | true /* Map GSI to PIRQ */); |
496 | } |
497 | if (0 == nr_ioapics) { |
498 | for (irq = 0; irq < nr_legacy_irqs(); irq++) |
499 | xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic" ); |
500 | } |
501 | return 0; |
502 | } |
503 | |
504 | struct xen_device_domain_owner { |
505 | domid_t domain; |
506 | struct pci_dev *dev; |
507 | struct list_head list; |
508 | }; |
509 | |
510 | static DEFINE_SPINLOCK(dev_domain_list_spinlock); |
511 | static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); |
512 | |
513 | static struct xen_device_domain_owner *find_device(struct pci_dev *dev) |
514 | { |
515 | struct xen_device_domain_owner *owner; |
516 | |
517 | list_for_each_entry(owner, &dev_domain_list, list) { |
518 | if (owner->dev == dev) |
519 | return owner; |
520 | } |
521 | return NULL; |
522 | } |
523 | |
524 | int xen_find_device_domain_owner(struct pci_dev *dev) |
525 | { |
526 | struct xen_device_domain_owner *owner; |
527 | int domain = -ENODEV; |
528 | |
529 | spin_lock(&dev_domain_list_spinlock); |
530 | owner = find_device(dev); |
531 | if (owner) |
532 | domain = owner->domain; |
533 | spin_unlock(&dev_domain_list_spinlock); |
534 | return domain; |
535 | } |
536 | EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); |
537 | |
538 | int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) |
539 | { |
540 | struct xen_device_domain_owner *owner; |
541 | |
542 | owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); |
543 | if (!owner) |
544 | return -ENODEV; |
545 | |
546 | spin_lock(&dev_domain_list_spinlock); |
547 | if (find_device(dev)) { |
548 | spin_unlock(&dev_domain_list_spinlock); |
549 | kfree(owner); |
550 | return -EEXIST; |
551 | } |
552 | owner->domain = domain; |
553 | owner->dev = dev; |
554 | list_add_tail(&owner->list, &dev_domain_list); |
555 | spin_unlock(&dev_domain_list_spinlock); |
556 | return 0; |
557 | } |
558 | EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); |
559 | |
560 | int xen_unregister_device_domain_owner(struct pci_dev *dev) |
561 | { |
562 | struct xen_device_domain_owner *owner; |
563 | |
564 | spin_lock(&dev_domain_list_spinlock); |
565 | owner = find_device(dev); |
566 | if (!owner) { |
567 | spin_unlock(&dev_domain_list_spinlock); |
568 | return -ENODEV; |
569 | } |
570 | list_del(&owner->list); |
571 | spin_unlock(&dev_domain_list_spinlock); |
572 | kfree(owner); |
573 | return 0; |
574 | } |
575 | EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); |
576 | #endif |
577 | |