1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Support of MSI, HPET and DMAR interrupts. |
4 | * |
5 | * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo |
6 | * Moved from arch/x86/kernel/apic/io_apic.c. |
7 | * Jiang Liu <jiang.liu@linux.intel.com> |
8 | * Convert to hierarchical irqdomain |
9 | */ |
10 | #include <linux/mm.h> |
11 | #include <linux/interrupt.h> |
12 | #include <linux/irq.h> |
13 | #include <linux/pci.h> |
14 | #include <linux/dmar.h> |
15 | #include <linux/hpet.h> |
16 | #include <linux/msi.h> |
17 | #include <asm/irqdomain.h> |
18 | #include <asm/hpet.h> |
19 | #include <asm/hw_irq.h> |
20 | #include <asm/apic.h> |
21 | #include <asm/irq_remapping.h> |
22 | #include <asm/xen/hypervisor.h> |
23 | |
24 | struct irq_domain *x86_pci_msi_default_domain __ro_after_init; |
25 | |
26 | static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) |
27 | { |
28 | struct msi_msg msg[2] = { [1] = { }, }; |
29 | |
30 | __irq_msi_compose_msg(cfg, msg, dmar: false); |
31 | irq_data_get_irq_chip(d: irqd)->irq_write_msi_msg(irqd, msg); |
32 | } |
33 | |
34 | static int |
35 | msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) |
36 | { |
37 | struct irq_cfg old_cfg, *cfg = irqd_cfg(irq_data: irqd); |
38 | struct irq_data *parent = irqd->parent_data; |
39 | unsigned int cpu; |
40 | int ret; |
41 | |
42 | /* Save the current configuration */ |
43 | cpu = cpumask_first(srcp: irq_data_get_effective_affinity_mask(d: irqd)); |
44 | old_cfg = *cfg; |
45 | |
46 | /* Allocate a new target vector */ |
47 | ret = parent->chip->irq_set_affinity(parent, mask, force); |
48 | if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) |
49 | return ret; |
50 | |
51 | /* |
52 | * For non-maskable and non-remapped MSI interrupts the migration |
53 | * to a different destination CPU and a different vector has to be |
54 | * done careful to handle the possible stray interrupt which can be |
55 | * caused by the non-atomic update of the address/data pair. |
56 | * |
57 | * Direct update is possible when: |
58 | * - The MSI is maskable (remapped MSI does not use this code path). |
59 | * The reservation mode bit is set in this case. |
60 | * - The new vector is the same as the old vector |
61 | * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) |
62 | * - The interrupt is not yet started up |
63 | * - The new destination CPU is the same as the old destination CPU |
64 | */ |
65 | if (!irqd_can_reserve(d: irqd) || |
66 | cfg->vector == old_cfg.vector || |
67 | old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || |
68 | !irqd_is_started(d: irqd) || |
69 | cfg->dest_apicid == old_cfg.dest_apicid) { |
70 | irq_msi_update_msg(irqd, cfg); |
71 | return ret; |
72 | } |
73 | |
74 | /* |
75 | * Paranoia: Validate that the interrupt target is the local |
76 | * CPU. |
77 | */ |
78 | if (WARN_ON_ONCE(cpu != smp_processor_id())) { |
79 | irq_msi_update_msg(irqd, cfg); |
80 | return ret; |
81 | } |
82 | |
83 | /* |
84 | * Redirect the interrupt to the new vector on the current CPU |
85 | * first. This might cause a spurious interrupt on this vector if |
86 | * the device raises an interrupt right between this update and the |
87 | * update to the final destination CPU. |
88 | * |
89 | * If the vector is in use then the installed device handler will |
90 | * denote it as spurious which is no harm as this is a rare event |
91 | * and interrupt handlers have to cope with spurious interrupts |
92 | * anyway. If the vector is unused, then it is marked so it won't |
93 | * trigger the 'No irq handler for vector' warning in |
94 | * common_interrupt(). |
95 | * |
96 | * This requires to hold vector lock to prevent concurrent updates to |
97 | * the affected vector. |
98 | */ |
99 | lock_vector_lock(); |
100 | |
101 | /* |
102 | * Mark the new target vector on the local CPU if it is currently |
103 | * unused. Reuse the VECTOR_RETRIGGERED state which is also used in |
104 | * the CPU hotplug path for a similar purpose. This cannot be |
105 | * undone here as the current CPU has interrupts disabled and |
106 | * cannot handle the interrupt before the whole set_affinity() |
107 | * section is done. In the CPU unplug case, the current CPU is |
108 | * about to vanish and will not handle any interrupts anymore. The |
109 | * vector is cleaned up when the CPU comes online again. |
110 | */ |
111 | if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) |
112 | this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); |
113 | |
114 | /* Redirect it to the new vector on the local CPU temporarily */ |
115 | old_cfg.vector = cfg->vector; |
116 | irq_msi_update_msg(irqd, cfg: &old_cfg); |
117 | |
118 | /* Now transition it to the target CPU */ |
119 | irq_msi_update_msg(irqd, cfg); |
120 | |
121 | /* |
122 | * All interrupts after this point are now targeted at the new |
123 | * vector/CPU. |
124 | * |
125 | * Drop vector lock before testing whether the temporary assignment |
126 | * to the local CPU was hit by an interrupt raised in the device, |
127 | * because the retrigger function acquires vector lock again. |
128 | */ |
129 | unlock_vector_lock(); |
130 | |
131 | /* |
132 | * Check whether the transition raced with a device interrupt and |
133 | * is pending in the local APICs IRR. It is safe to do this outside |
134 | * of vector lock as the irq_desc::lock of this interrupt is still |
135 | * held and interrupts are disabled: The check is not accessing the |
136 | * underlying vector store. It's just checking the local APIC's |
137 | * IRR. |
138 | */ |
139 | if (lapic_vector_set_in_irr(vector: cfg->vector)) |
140 | irq_data_get_irq_chip(d: irqd)->irq_retrigger(irqd); |
141 | |
142 | return ret; |
143 | } |
144 | |
145 | /** |
146 | * pci_dev_has_default_msi_parent_domain - Check whether the device has the default |
147 | * MSI parent domain associated |
148 | * @dev: Pointer to the PCI device |
149 | */ |
150 | bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev) |
151 | { |
152 | struct irq_domain *domain = dev_get_msi_domain(dev: &dev->dev); |
153 | |
154 | if (!domain) |
155 | domain = dev_get_msi_domain(dev: &dev->bus->dev); |
156 | if (!domain) |
157 | return false; |
158 | |
159 | return domain == x86_vector_domain; |
160 | } |
161 | |
162 | /** |
163 | * x86_msi_prepare - Setup of msi_alloc_info_t for allocations |
164 | * @domain: The domain for which this setup happens |
165 | * @dev: The device for which interrupts are allocated |
166 | * @nvec: The number of vectors to allocate |
167 | * @alloc: The allocation info structure to initialize |
168 | * |
169 | * This function is to be used for all types of MSI domains above the x86 |
170 | * vector domain and any intermediates. It is always invoked from the |
171 | * top level interrupt domain. The domain specific allocation |
172 | * functionality is determined via the @domain's bus token which allows to |
173 | * map the X86 specific allocation type. |
174 | */ |
175 | static int x86_msi_prepare(struct irq_domain *domain, struct device *dev, |
176 | int nvec, msi_alloc_info_t *alloc) |
177 | { |
178 | struct msi_domain_info *info = domain->host_data; |
179 | |
180 | init_irq_alloc_info(info: alloc, NULL); |
181 | |
182 | switch (info->bus_token) { |
183 | case DOMAIN_BUS_PCI_DEVICE_MSI: |
184 | alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; |
185 | return 0; |
186 | case DOMAIN_BUS_PCI_DEVICE_MSIX: |
187 | case DOMAIN_BUS_PCI_DEVICE_IMS: |
188 | alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; |
189 | return 0; |
190 | default: |
191 | return -EINVAL; |
192 | } |
193 | } |
194 | |
195 | /** |
196 | * x86_init_dev_msi_info - Domain info setup for MSI domains |
197 | * @dev: The device for which the domain should be created |
198 | * @domain: The (root) domain providing this callback |
199 | * @real_parent: The real parent domain of the to initialize domain |
200 | * @info: The domain info for the to initialize domain |
201 | * |
202 | * This function is to be used for all types of MSI domains above the x86 |
203 | * vector domain and any intermediates. The domain specific functionality |
204 | * is determined via the @real_parent. |
205 | */ |
206 | static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, |
207 | struct irq_domain *real_parent, struct msi_domain_info *info) |
208 | { |
209 | const struct msi_parent_ops *pops = real_parent->msi_parent_ops; |
210 | |
211 | /* MSI parent domain specific settings */ |
212 | switch (real_parent->bus_token) { |
213 | case DOMAIN_BUS_ANY: |
214 | /* Only the vector domain can have the ANY token */ |
215 | if (WARN_ON_ONCE(domain != real_parent)) |
216 | return false; |
217 | info->chip->irq_set_affinity = msi_set_affinity; |
218 | break; |
219 | case DOMAIN_BUS_DMAR: |
220 | case DOMAIN_BUS_AMDVI: |
221 | break; |
222 | default: |
223 | WARN_ON_ONCE(1); |
224 | return false; |
225 | } |
226 | |
227 | /* Is the target supported? */ |
228 | switch(info->bus_token) { |
229 | case DOMAIN_BUS_PCI_DEVICE_MSI: |
230 | case DOMAIN_BUS_PCI_DEVICE_MSIX: |
231 | break; |
232 | case DOMAIN_BUS_PCI_DEVICE_IMS: |
233 | if (!(pops->supported_flags & MSI_FLAG_PCI_IMS)) |
234 | return false; |
235 | break; |
236 | default: |
237 | WARN_ON_ONCE(1); |
238 | return false; |
239 | } |
240 | |
241 | /* |
242 | * Mask out the domain specific MSI feature flags which are not |
243 | * supported by the real parent. |
244 | */ |
245 | info->flags &= pops->supported_flags; |
246 | /* Enforce the required flags */ |
247 | info->flags |= X86_VECTOR_MSI_FLAGS_REQUIRED; |
248 | |
249 | /* This is always invoked from the top level MSI domain! */ |
250 | info->ops->msi_prepare = x86_msi_prepare; |
251 | |
252 | info->chip->irq_ack = irq_chip_ack_parent; |
253 | info->chip->irq_retrigger = irq_chip_retrigger_hierarchy; |
254 | info->chip->flags |= IRQCHIP_SKIP_SET_WAKE | |
255 | IRQCHIP_AFFINITY_PRE_STARTUP; |
256 | |
257 | info->handler = handle_edge_irq; |
258 | info->handler_name = "edge" ; |
259 | |
260 | return true; |
261 | } |
262 | |
263 | static const struct msi_parent_ops x86_vector_msi_parent_ops = { |
264 | .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED, |
265 | .init_dev_msi_info = x86_init_dev_msi_info, |
266 | }; |
267 | |
268 | struct irq_domain * __init native_create_pci_msi_domain(void) |
269 | { |
270 | if (apic_is_disabled) |
271 | return NULL; |
272 | |
273 | x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; |
274 | x86_vector_domain->msi_parent_ops = &x86_vector_msi_parent_ops; |
275 | return x86_vector_domain; |
276 | } |
277 | |
278 | void __init x86_create_pci_msi_domain(void) |
279 | { |
280 | x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain(); |
281 | } |
282 | |
283 | /* Keep around for hyperV */ |
284 | int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, |
285 | msi_alloc_info_t *arg) |
286 | { |
287 | init_irq_alloc_info(info: arg, NULL); |
288 | |
289 | if (to_pci_dev(dev)->msix_enabled) |
290 | arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; |
291 | else |
292 | arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; |
293 | return 0; |
294 | } |
295 | EXPORT_SYMBOL_GPL(pci_msi_prepare); |
296 | |
297 | #ifdef CONFIG_DMAR_TABLE |
298 | /* |
299 | * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the |
300 | * high bits of the destination APIC ID. This can't be done in the general |
301 | * case for MSIs as it would be targeting real memory above 4GiB not the |
302 | * APIC. |
303 | */ |
304 | static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) |
305 | { |
306 | __irq_msi_compose_msg(cfg: irqd_cfg(irq_data: data), msg, dmar: true); |
307 | } |
308 | |
309 | static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg) |
310 | { |
311 | dmar_msi_write(irq: data->irq, msg); |
312 | } |
313 | |
314 | static struct irq_chip dmar_msi_controller = { |
315 | .name = "DMAR-MSI" , |
316 | .irq_unmask = dmar_msi_unmask, |
317 | .irq_mask = dmar_msi_mask, |
318 | .irq_ack = irq_chip_ack_parent, |
319 | .irq_set_affinity = msi_domain_set_affinity, |
320 | .irq_retrigger = irq_chip_retrigger_hierarchy, |
321 | .irq_compose_msi_msg = dmar_msi_compose_msg, |
322 | .irq_write_msi_msg = dmar_msi_write_msg, |
323 | .flags = IRQCHIP_SKIP_SET_WAKE | |
324 | IRQCHIP_AFFINITY_PRE_STARTUP, |
325 | }; |
326 | |
327 | static int dmar_msi_init(struct irq_domain *domain, |
328 | struct msi_domain_info *info, unsigned int virq, |
329 | irq_hw_number_t hwirq, msi_alloc_info_t *arg) |
330 | { |
331 | irq_domain_set_info(domain, virq, hwirq: arg->devid, chip: info->chip, NULL, |
332 | handler: handle_edge_irq, handler_data: arg->data, handler_name: "edge" ); |
333 | |
334 | return 0; |
335 | } |
336 | |
337 | static struct msi_domain_ops dmar_msi_domain_ops = { |
338 | .msi_init = dmar_msi_init, |
339 | }; |
340 | |
341 | static struct msi_domain_info dmar_msi_domain_info = { |
342 | .ops = &dmar_msi_domain_ops, |
343 | .chip = &dmar_msi_controller, |
344 | .flags = MSI_FLAG_USE_DEF_DOM_OPS, |
345 | }; |
346 | |
347 | static struct irq_domain *dmar_get_irq_domain(void) |
348 | { |
349 | static struct irq_domain *dmar_domain; |
350 | static DEFINE_MUTEX(dmar_lock); |
351 | struct fwnode_handle *fn; |
352 | |
353 | mutex_lock(&dmar_lock); |
354 | if (dmar_domain) |
355 | goto out; |
356 | |
357 | fn = irq_domain_alloc_named_fwnode(name: "DMAR-MSI" ); |
358 | if (fn) { |
359 | dmar_domain = msi_create_irq_domain(fwnode: fn, info: &dmar_msi_domain_info, |
360 | parent: x86_vector_domain); |
361 | if (!dmar_domain) |
362 | irq_domain_free_fwnode(fwnode: fn); |
363 | } |
364 | out: |
365 | mutex_unlock(lock: &dmar_lock); |
366 | return dmar_domain; |
367 | } |
368 | |
369 | int dmar_alloc_hwirq(int id, int node, void *arg) |
370 | { |
371 | struct irq_domain *domain = dmar_get_irq_domain(); |
372 | struct irq_alloc_info info; |
373 | |
374 | if (!domain) |
375 | return -1; |
376 | |
377 | init_irq_alloc_info(info: &info, NULL); |
378 | info.type = X86_IRQ_ALLOC_TYPE_DMAR; |
379 | info.devid = id; |
380 | info.hwirq = id; |
381 | info.data = arg; |
382 | |
383 | return irq_domain_alloc_irqs(domain, nr_irqs: 1, node, arg: &info); |
384 | } |
385 | |
386 | void dmar_free_hwirq(int irq) |
387 | { |
388 | irq_domain_free_irqs(virq: irq, nr_irqs: 1); |
389 | } |
390 | #endif |
391 | |
392 | bool arch_restore_msi_irqs(struct pci_dev *dev) |
393 | { |
394 | return xen_initdom_restore_msi(dev); |
395 | } |
396 | |