1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2014 Intel Corp.
4 * Author: Jiang Liu <jiang.liu@linux.intel.com>
5 *
6 * This file is licensed under GPLv2.
7 *
8 * This file contains common code to support Message Signaled Interrupts for
9 * PCI compatible and non PCI compatible devices.
10 */
11#include <linux/types.h>
12#include <linux/device.h>
13#include <linux/irq.h>
14#include <linux/irqdomain.h>
15#include <linux/msi.h>
16#include <linux/slab.h>
17#include <linux/sysfs.h>
18#include <linux/pci.h>
19
20#include "internals.h"
21
22/**
23 * struct msi_ctrl - MSI internal management control structure
24 * @domid: ID of the domain on which management operations should be done
25 * @first: First (hardware) slot index to operate on
26 * @last: Last (hardware) slot index to operate on
27 * @nirqs: The number of Linux interrupts to allocate. Can be larger
28 * than the range due to PCI/multi-MSI.
29 */
30struct msi_ctrl {
31 unsigned int domid;
32 unsigned int first;
33 unsigned int last;
34 unsigned int nirqs;
35};
36
37/* Invalid Xarray index which is outside of any searchable range */
38#define MSI_XA_MAX_INDEX (ULONG_MAX - 1)
39/* The maximum domain size */
40#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1)
41
42static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
43static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
44static inline int msi_sysfs_create_group(struct device *dev);
45
46
47/**
48 * msi_alloc_desc - Allocate an initialized msi_desc
49 * @dev: Pointer to the device for which this is allocated
50 * @nvec: The number of vectors used in this entry
51 * @affinity: Optional pointer to an affinity mask array size of @nvec
52 *
53 * If @affinity is not %NULL then an affinity array[@nvec] is allocated
54 * and the affinity masks and flags from @affinity are copied.
55 *
56 * Return: pointer to allocated &msi_desc on success or %NULL on failure
57 */
58static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
59 const struct irq_affinity_desc *affinity)
60{
61 struct msi_desc *desc = kzalloc(size: sizeof(*desc), GFP_KERNEL);
62
63 if (!desc)
64 return NULL;
65
66 desc->dev = dev;
67 desc->nvec_used = nvec;
68 if (affinity) {
69 desc->affinity = kmemdup(p: affinity, size: nvec * sizeof(*desc->affinity), GFP_KERNEL);
70 if (!desc->affinity) {
71 kfree(objp: desc);
72 return NULL;
73 }
74 }
75 return desc;
76}
77
78static void msi_free_desc(struct msi_desc *desc)
79{
80 kfree(objp: desc->affinity);
81 kfree(objp: desc);
82}
83
84static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
85 unsigned int domid, unsigned int index)
86{
87 struct msi_device_data *md = dev->msi.data;
88 struct xarray *xa = &md->__domains[domid].store;
89 unsigned int hwsize;
90 int ret;
91
92 hwsize = msi_domain_get_hwsize(dev, domid);
93
94 if (index == MSI_ANY_INDEX) {
95 struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
96 unsigned int index;
97
98 /* Let the xarray allocate a free index within the limit */
99 ret = xa_alloc(xa, id: &index, entry: desc, limit, GFP_KERNEL);
100 if (ret)
101 goto fail;
102
103 desc->msi_index = index;
104 return 0;
105 } else {
106 if (index >= hwsize) {
107 ret = -ERANGE;
108 goto fail;
109 }
110
111 desc->msi_index = index;
112 ret = xa_insert(xa, index, entry: desc, GFP_KERNEL);
113 if (ret)
114 goto fail;
115 return 0;
116 }
117fail:
118 msi_free_desc(desc);
119 return ret;
120}
121
122/**
123 * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
124 * insert it at @init_desc->msi_index
125 *
126 * @dev: Pointer to the device for which the descriptor is allocated
127 * @domid: The id of the interrupt domain to which the desriptor is added
128 * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor
129 *
130 * Return: 0 on success or an appropriate failure code.
131 */
132int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
133 struct msi_desc *init_desc)
134{
135 struct msi_desc *desc;
136
137 lockdep_assert_held(&dev->msi.data->mutex);
138
139 desc = msi_alloc_desc(dev, nvec: init_desc->nvec_used, affinity: init_desc->affinity);
140 if (!desc)
141 return -ENOMEM;
142
143 /* Copy type specific data to the new descriptor. */
144 desc->pci = init_desc->pci;
145
146 return msi_insert_desc(dev, desc, domid, index: init_desc->msi_index);
147}
148
149static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
150{
151 switch (filter) {
152 case MSI_DESC_ALL:
153 return true;
154 case MSI_DESC_NOTASSOCIATED:
155 return !desc->irq;
156 case MSI_DESC_ASSOCIATED:
157 return !!desc->irq;
158 }
159 WARN_ON_ONCE(1);
160 return false;
161}
162
163static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
164{
165 unsigned int hwsize;
166
167 if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
168 (dev->msi.domain &&
169 !dev->msi.data->__domains[ctrl->domid].domain)))
170 return false;
171
172 hwsize = msi_domain_get_hwsize(dev, domid: ctrl->domid);
173 if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
174 ctrl->first >= hwsize ||
175 ctrl->last >= hwsize))
176 return false;
177 return true;
178}
179
180static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
181{
182 struct msi_desc *desc;
183 struct xarray *xa;
184 unsigned long idx;
185
186 lockdep_assert_held(&dev->msi.data->mutex);
187
188 if (!msi_ctrl_valid(dev, ctrl))
189 return;
190
191 xa = &dev->msi.data->__domains[ctrl->domid].store;
192 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
193 xa_erase(xa, index: idx);
194
195 /* Leak the descriptor when it is still referenced */
196 if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
197 continue;
198 msi_free_desc(desc);
199 }
200}
201
202/**
203 * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
204 * @dev: Device for which to free the descriptors
205 * @domid: Id of the domain to operate on
206 * @first: Index to start freeing from (inclusive)
207 * @last: Last index to be freed (inclusive)
208 */
209void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
210 unsigned int first, unsigned int last)
211{
212 struct msi_ctrl ctrl = {
213 .domid = domid,
214 .first = first,
215 .last = last,
216 };
217
218 msi_domain_free_descs(dev, ctrl: &ctrl);
219}
220
221/**
222 * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
223 * @dev: Pointer to the device for which the descriptors are allocated
224 * @ctrl: Allocation control struct
225 *
226 * Return: 0 on success or an appropriate failure code.
227 */
228static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
229{
230 struct msi_desc *desc;
231 unsigned int idx;
232 int ret;
233
234 lockdep_assert_held(&dev->msi.data->mutex);
235
236 if (!msi_ctrl_valid(dev, ctrl))
237 return -EINVAL;
238
239 for (idx = ctrl->first; idx <= ctrl->last; idx++) {
240 desc = msi_alloc_desc(dev, nvec: 1, NULL);
241 if (!desc)
242 goto fail_mem;
243 ret = msi_insert_desc(dev, desc, domid: ctrl->domid, index: idx);
244 if (ret)
245 goto fail;
246 }
247 return 0;
248
249fail_mem:
250 ret = -ENOMEM;
251fail:
252 msi_domain_free_descs(dev, ctrl);
253 return ret;
254}
255
256void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
257{
258 *msg = entry->msg;
259}
260
261void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
262{
263 struct msi_desc *entry = irq_get_msi_desc(irq);
264
265 __get_cached_msi_msg(entry, msg);
266}
267EXPORT_SYMBOL_GPL(get_cached_msi_msg);
268
269static void msi_device_data_release(struct device *dev, void *res)
270{
271 struct msi_device_data *md = res;
272 int i;
273
274 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
275 msi_remove_device_irq_domain(dev, domid: i);
276 WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
277 xa_destroy(&md->__domains[i].store);
278 }
279 dev->msi.data = NULL;
280}
281
282/**
283 * msi_setup_device_data - Setup MSI device data
284 * @dev: Device for which MSI device data should be set up
285 *
286 * Return: 0 on success, appropriate error code otherwise
287 *
288 * This can be called more than once for @dev. If the MSI device data is
289 * already allocated the call succeeds. The allocated memory is
290 * automatically released when the device is destroyed.
291 */
292int msi_setup_device_data(struct device *dev)
293{
294 struct msi_device_data *md;
295 int ret, i;
296
297 if (dev->msi.data)
298 return 0;
299
300 md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
301 if (!md)
302 return -ENOMEM;
303
304 ret = msi_sysfs_create_group(dev);
305 if (ret) {
306 devres_free(res: md);
307 return ret;
308 }
309
310 for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
311 xa_init_flags(xa: &md->__domains[i].store, XA_FLAGS_ALLOC);
312
313 /*
314 * If @dev::msi::domain is set and is a global MSI domain, copy the
315 * pointer into the domain array so all code can operate on domain
316 * ids. The NULL pointer check is required to keep the legacy
317 * architecture specific PCI/MSI support working.
318 */
319 if (dev->msi.domain && !irq_domain_is_msi_parent(domain: dev->msi.domain))
320 md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
321
322 mutex_init(&md->mutex);
323 dev->msi.data = md;
324 devres_add(dev, res: md);
325 return 0;
326}
327
328/**
329 * msi_lock_descs - Lock the MSI descriptor storage of a device
330 * @dev: Device to operate on
331 */
332void msi_lock_descs(struct device *dev)
333{
334 mutex_lock(&dev->msi.data->mutex);
335}
336EXPORT_SYMBOL_GPL(msi_lock_descs);
337
338/**
339 * msi_unlock_descs - Unlock the MSI descriptor storage of a device
340 * @dev: Device to operate on
341 */
342void msi_unlock_descs(struct device *dev)
343{
344 /* Invalidate the index which was cached by the iterator */
345 dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
346 mutex_unlock(lock: &dev->msi.data->mutex);
347}
348EXPORT_SYMBOL_GPL(msi_unlock_descs);
349
350static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
351 enum msi_desc_filter filter)
352{
353 struct xarray *xa = &md->__domains[domid].store;
354 struct msi_desc *desc;
355
356 xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
357 if (msi_desc_match(desc, filter))
358 return desc;
359 }
360 md->__iter_idx = MSI_XA_MAX_INDEX;
361 return NULL;
362}
363
364/**
365 * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
366 * @dev: Device to operate on
367 * @domid: The id of the interrupt domain which should be walked.
368 * @filter: Descriptor state filter
369 *
370 * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
371 * must be invoked before the call.
372 *
373 * Return: Pointer to the first MSI descriptor matching the search
374 * criteria, NULL if none found.
375 */
376struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
377 enum msi_desc_filter filter)
378{
379 struct msi_device_data *md = dev->msi.data;
380
381 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
382 return NULL;
383
384 lockdep_assert_held(&md->mutex);
385
386 md->__iter_idx = 0;
387 return msi_find_desc(md, domid, filter);
388}
389EXPORT_SYMBOL_GPL(msi_domain_first_desc);
390
391/**
392 * msi_next_desc - Get the next MSI descriptor of a device
393 * @dev: Device to operate on
394 * @domid: The id of the interrupt domain which should be walked.
395 * @filter: Descriptor state filter
396 *
397 * The first invocation of msi_next_desc() has to be preceeded by a
398 * successful invocation of __msi_first_desc(). Consecutive invocations are
399 * only valid if the previous one was successful. All these operations have
400 * to be done within the same MSI mutex held region.
401 *
402 * Return: Pointer to the next MSI descriptor matching the search
403 * criteria, NULL if none found.
404 */
405struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
406 enum msi_desc_filter filter)
407{
408 struct msi_device_data *md = dev->msi.data;
409
410 if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
411 return NULL;
412
413 lockdep_assert_held(&md->mutex);
414
415 if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
416 return NULL;
417
418 md->__iter_idx++;
419 return msi_find_desc(md, domid, filter);
420}
421EXPORT_SYMBOL_GPL(msi_next_desc);
422
423/**
424 * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
425 * @dev: Device to operate on
426 * @domid: Domain ID of the interrupt domain associated to the device
427 * @index: MSI interrupt index to look for (0-based)
428 *
429 * Return: The Linux interrupt number on success (> 0), 0 if not found
430 */
431unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
432{
433 struct msi_desc *desc;
434 unsigned int ret = 0;
435 bool pcimsi = false;
436 struct xarray *xa;
437
438 if (!dev->msi.data)
439 return 0;
440
441 if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
442 return 0;
443
444 /* This check is only valid for the PCI default MSI domain */
445 if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
446 pcimsi = to_pci_dev(dev)->msi_enabled;
447
448 msi_lock_descs(dev);
449 xa = &dev->msi.data->__domains[domid].store;
450 desc = xa_load(xa, index: pcimsi ? 0 : index);
451 if (desc && desc->irq) {
452 /*
453 * PCI-MSI has only one descriptor for multiple interrupts.
454 * PCI-MSIX and platform MSI use a descriptor per
455 * interrupt.
456 */
457 if (pcimsi) {
458 if (index < desc->nvec_used)
459 ret = desc->irq + index;
460 } else {
461 ret = desc->irq;
462 }
463 }
464
465 msi_unlock_descs(dev);
466 return ret;
467}
468EXPORT_SYMBOL_GPL(msi_domain_get_virq);
469
470#ifdef CONFIG_SYSFS
471static struct attribute *msi_dev_attrs[] = {
472 NULL
473};
474
475static const struct attribute_group msi_irqs_group = {
476 .name = "msi_irqs",
477 .attrs = msi_dev_attrs,
478};
479
480static inline int msi_sysfs_create_group(struct device *dev)
481{
482 return devm_device_add_group(dev, grp: &msi_irqs_group);
483}
484
485static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
486 char *buf)
487{
488 /* MSI vs. MSIX is per device not per interrupt */
489 bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
490
491 return sysfs_emit(buf, fmt: "%s\n", is_msix ? "msix" : "msi");
492}
493
494static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
495{
496 struct device_attribute *attrs = desc->sysfs_attrs;
497 int i;
498
499 if (!attrs)
500 return;
501
502 desc->sysfs_attrs = NULL;
503 for (i = 0; i < desc->nvec_used; i++) {
504 if (attrs[i].show)
505 sysfs_remove_file_from_group(kobj: &dev->kobj, attr: &attrs[i].attr, group: msi_irqs_group.name);
506 kfree(objp: attrs[i].attr.name);
507 }
508 kfree(objp: attrs);
509}
510
511static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
512{
513 struct device_attribute *attrs;
514 int ret, i;
515
516 attrs = kcalloc(n: desc->nvec_used, size: sizeof(*attrs), GFP_KERNEL);
517 if (!attrs)
518 return -ENOMEM;
519
520 desc->sysfs_attrs = attrs;
521 for (i = 0; i < desc->nvec_used; i++) {
522 sysfs_attr_init(&attrs[i].attr);
523 attrs[i].attr.name = kasprintf(GFP_KERNEL, fmt: "%d", desc->irq + i);
524 if (!attrs[i].attr.name) {
525 ret = -ENOMEM;
526 goto fail;
527 }
528
529 attrs[i].attr.mode = 0444;
530 attrs[i].show = msi_mode_show;
531
532 ret = sysfs_add_file_to_group(kobj: &dev->kobj, attr: &attrs[i].attr, group: msi_irqs_group.name);
533 if (ret) {
534 attrs[i].show = NULL;
535 goto fail;
536 }
537 }
538 return 0;
539
540fail:
541 msi_sysfs_remove_desc(dev, desc);
542 return ret;
543}
544
545#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
546/**
547 * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
548 * @dev: The device (PCI, platform etc) which will get sysfs entries
549 */
550int msi_device_populate_sysfs(struct device *dev)
551{
552 struct msi_desc *desc;
553 int ret;
554
555 msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
556 if (desc->sysfs_attrs)
557 continue;
558 ret = msi_sysfs_populate_desc(dev, desc);
559 if (ret)
560 return ret;
561 }
562 return 0;
563}
564
565/**
566 * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
567 * @dev: The device (PCI, platform etc) for which to remove
568 * sysfs entries
569 */
570void msi_device_destroy_sysfs(struct device *dev)
571{
572 struct msi_desc *desc;
573
574 msi_for_each_desc(desc, dev, MSI_DESC_ALL)
575 msi_sysfs_remove_desc(dev, desc);
576}
577#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
578#else /* CONFIG_SYSFS */
579static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
580static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
581static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
582#endif /* !CONFIG_SYSFS */
583
584static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
585{
586 struct irq_domain *domain;
587
588 lockdep_assert_held(&dev->msi.data->mutex);
589
590 if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
591 return NULL;
592
593 domain = dev->msi.data->__domains[domid].domain;
594 if (!domain)
595 return NULL;
596
597 if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
598 return NULL;
599
600 return domain;
601}
602
603static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
604{
605 struct msi_domain_info *info;
606 struct irq_domain *domain;
607
608 domain = msi_get_device_domain(dev, domid);
609 if (domain) {
610 info = domain->host_data;
611 return info->hwsize;
612 }
613 /* No domain, default to MSI_XA_DOMAIN_SIZE */
614 return MSI_XA_DOMAIN_SIZE;
615}
616
617static inline void irq_chip_write_msi_msg(struct irq_data *data,
618 struct msi_msg *msg)
619{
620 data->chip->irq_write_msi_msg(data, msg);
621}
622
623static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
624{
625 struct msi_domain_info *info = domain->host_data;
626
627 /*
628 * If the MSI provider has messed with the second message and
629 * not advertized that it is level-capable, signal the breakage.
630 */
631 WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
632 (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
633 (msg[1].address_lo || msg[1].address_hi || msg[1].data));
634}
635
636/**
637 * msi_domain_set_affinity - Generic affinity setter function for MSI domains
638 * @irq_data: The irq data associated to the interrupt
639 * @mask: The affinity mask to set
640 * @force: Flag to enforce setting (disable online checks)
641 *
642 * Intended to be used by MSI interrupt controllers which are
643 * implemented with hierarchical domains.
644 *
645 * Return: IRQ_SET_MASK_* result code
646 */
647int msi_domain_set_affinity(struct irq_data *irq_data,
648 const struct cpumask *mask, bool force)
649{
650 struct irq_data *parent = irq_data->parent_data;
651 struct msi_msg msg[2] = { [1] = { }, };
652 int ret;
653
654 ret = parent->chip->irq_set_affinity(parent, mask, force);
655 if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
656 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
657 msi_check_level(domain: irq_data->domain, msg);
658 irq_chip_write_msi_msg(data: irq_data, msg);
659 }
660
661 return ret;
662}
663
664static int msi_domain_activate(struct irq_domain *domain,
665 struct irq_data *irq_data, bool early)
666{
667 struct msi_msg msg[2] = { [1] = { }, };
668
669 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
670 msi_check_level(domain: irq_data->domain, msg);
671 irq_chip_write_msi_msg(data: irq_data, msg);
672 return 0;
673}
674
675static void msi_domain_deactivate(struct irq_domain *domain,
676 struct irq_data *irq_data)
677{
678 struct msi_msg msg[2];
679
680 memset(msg, 0, sizeof(msg));
681 irq_chip_write_msi_msg(data: irq_data, msg);
682}
683
684static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
685 unsigned int nr_irqs, void *arg)
686{
687 struct msi_domain_info *info = domain->host_data;
688 struct msi_domain_ops *ops = info->ops;
689 irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
690 int i, ret;
691
692 if (irq_find_mapping(domain, hwirq) > 0)
693 return -EEXIST;
694
695 if (domain->parent) {
696 ret = irq_domain_alloc_irqs_parent(domain, irq_base: virq, nr_irqs, arg);
697 if (ret < 0)
698 return ret;
699 }
700
701 for (i = 0; i < nr_irqs; i++) {
702 ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
703 if (ret < 0) {
704 if (ops->msi_free) {
705 for (i--; i > 0; i--)
706 ops->msi_free(domain, info, virq + i);
707 }
708 irq_domain_free_irqs_top(domain, virq, nr_irqs);
709 return ret;
710 }
711 }
712
713 return 0;
714}
715
716static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
717 unsigned int nr_irqs)
718{
719 struct msi_domain_info *info = domain->host_data;
720 int i;
721
722 if (info->ops->msi_free) {
723 for (i = 0; i < nr_irqs; i++)
724 info->ops->msi_free(domain, info, virq + i);
725 }
726 irq_domain_free_irqs_top(domain, virq, nr_irqs);
727}
728
729static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec,
730 irq_hw_number_t *hwirq, unsigned int *type)
731{
732 struct msi_domain_info *info = domain->host_data;
733
734 /*
735 * This will catch allocations through the regular irqdomain path except
736 * for MSI domains which really support this, e.g. MBIGEN.
737 */
738 if (!info->ops->msi_translate)
739 return -ENOTSUPP;
740 return info->ops->msi_translate(domain, fwspec, hwirq, type);
741}
742
743static const struct irq_domain_ops msi_domain_ops = {
744 .alloc = msi_domain_alloc,
745 .free = msi_domain_free,
746 .activate = msi_domain_activate,
747 .deactivate = msi_domain_deactivate,
748 .translate = msi_domain_translate,
749};
750
751static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
752 msi_alloc_info_t *arg)
753{
754 return arg->hwirq;
755}
756
757static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
758 int nvec, msi_alloc_info_t *arg)
759{
760 memset(arg, 0, sizeof(*arg));
761 return 0;
762}
763
764static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
765 struct msi_desc *desc)
766{
767 arg->desc = desc;
768}
769
770static int msi_domain_ops_init(struct irq_domain *domain,
771 struct msi_domain_info *info,
772 unsigned int virq, irq_hw_number_t hwirq,
773 msi_alloc_info_t *arg)
774{
775 irq_domain_set_hwirq_and_chip(domain, virq, hwirq, chip: info->chip,
776 chip_data: info->chip_data);
777 if (info->handler && info->handler_name) {
778 __irq_set_handler(irq: virq, handle: info->handler, is_chained: 0, name: info->handler_name);
779 if (info->handler_data)
780 irq_set_handler_data(irq: virq, data: info->handler_data);
781 }
782 return 0;
783}
784
785static struct msi_domain_ops msi_domain_ops_default = {
786 .get_hwirq = msi_domain_ops_get_hwirq,
787 .msi_init = msi_domain_ops_init,
788 .msi_prepare = msi_domain_ops_prepare,
789 .set_desc = msi_domain_ops_set_desc,
790};
791
792static void msi_domain_update_dom_ops(struct msi_domain_info *info)
793{
794 struct msi_domain_ops *ops = info->ops;
795
796 if (ops == NULL) {
797 info->ops = &msi_domain_ops_default;
798 return;
799 }
800
801 if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
802 return;
803
804 if (ops->get_hwirq == NULL)
805 ops->get_hwirq = msi_domain_ops_default.get_hwirq;
806 if (ops->msi_init == NULL)
807 ops->msi_init = msi_domain_ops_default.msi_init;
808 if (ops->msi_prepare == NULL)
809 ops->msi_prepare = msi_domain_ops_default.msi_prepare;
810 if (ops->set_desc == NULL)
811 ops->set_desc = msi_domain_ops_default.set_desc;
812}
813
814static void msi_domain_update_chip_ops(struct msi_domain_info *info)
815{
816 struct irq_chip *chip = info->chip;
817
818 BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
819 if (!chip->irq_set_affinity)
820 chip->irq_set_affinity = msi_domain_set_affinity;
821}
822
823static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
824 struct msi_domain_info *info,
825 unsigned int flags,
826 struct irq_domain *parent)
827{
828 struct irq_domain *domain;
829
830 if (info->hwsize > MSI_XA_DOMAIN_SIZE)
831 return NULL;
832
833 /*
834 * Hardware size 0 is valid for backwards compatibility and for
835 * domains which are not backed by a hardware table. Grant the
836 * maximum index space.
837 */
838 if (!info->hwsize)
839 info->hwsize = MSI_XA_DOMAIN_SIZE;
840
841 msi_domain_update_dom_ops(info);
842 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
843 msi_domain_update_chip_ops(info);
844
845 domain = irq_domain_create_hierarchy(parent, flags: flags | IRQ_DOMAIN_FLAG_MSI, size: 0,
846 fwnode, ops: &msi_domain_ops, host_data: info);
847
848 if (domain) {
849 irq_domain_update_bus_token(domain, bus_token: info->bus_token);
850 if (info->flags & MSI_FLAG_PARENT_PM_DEV)
851 domain->pm_dev = parent->pm_dev;
852 }
853
854 return domain;
855}
856
857/**
858 * msi_create_irq_domain - Create an MSI interrupt domain
859 * @fwnode: Optional fwnode of the interrupt controller
860 * @info: MSI domain info
861 * @parent: Parent irq domain
862 *
863 * Return: pointer to the created &struct irq_domain or %NULL on failure
864 */
865struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
866 struct msi_domain_info *info,
867 struct irq_domain *parent)
868{
869 return __msi_create_irq_domain(fwnode, info, flags: 0, parent);
870}
871
872/**
873 * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
874 * in the domain hierarchy
875 * @dev: The device for which the domain should be created
876 * @domain: The domain in the hierarchy this op is being called on
877 * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
878 * be created
879 * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
880 * domain to be created
881 *
882 * Return: true on success, false otherwise
883 *
884 * This is the most complex problem of per device MSI domains and the
885 * underlying interrupt domain hierarchy:
886 *
887 * The device domain to be initialized requests the broadest feature set
888 * possible and the underlying domain hierarchy puts restrictions on it.
889 *
890 * That's trivial for a simple parent->child relationship, but it gets
891 * interesting with an intermediate domain: root->parent->child. The
892 * intermediate 'parent' can expand the capabilities which the 'root'
893 * domain is providing. So that creates a classic hen and egg problem:
894 * Which entity is doing the restrictions/expansions?
895 *
896 * One solution is to let the root domain handle the initialization that's
897 * why there is the @domain and the @msi_parent_domain pointer.
898 */
899bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
900 struct irq_domain *msi_parent_domain,
901 struct msi_domain_info *msi_child_info)
902{
903 struct irq_domain *parent = domain->parent;
904
905 if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
906 !parent->msi_parent_ops->init_dev_msi_info))
907 return false;
908
909 return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
910 msi_child_info);
911}
912
913/**
914 * msi_create_device_irq_domain - Create a device MSI interrupt domain
915 * @dev: Pointer to the device
916 * @domid: Domain id
917 * @template: MSI domain info bundle used as template
918 * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited)
919 * @domain_data: Optional pointer to domain specific data which is set in
920 * msi_domain_info::data
921 * @chip_data: Optional pointer to chip specific data which is set in
922 * msi_domain_info::chip_data
923 *
924 * Return: True on success, false otherwise
925 *
926 * There is no firmware node required for this interface because the per
927 * device domains are software constructs which are actually closer to the
928 * hardware reality than any firmware can describe them.
929 *
930 * The domain name and the irq chip name for a MSI device domain are
931 * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
932 *
933 * $PREFIX: Optional prefix provided by the underlying MSI parent domain
934 * via msi_parent_ops::prefix. If that pointer is NULL the prefix
935 * is empty.
936 * $CHIPNAME: The name of the irq_chip in @template
937 * $DEVNAME: The name of the device
938 *
939 * This results in understandable chip names and hardware interrupt numbers
940 * in e.g. /proc/interrupts
941 *
942 * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix
943 * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-'
944 *
945 * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect
946 * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device
947 * IR-PCI-MSIX-0000:3d:00.0 2-edge
948 *
949 * On IMS domains the hardware interrupt number is either a table entry
950 * index or a purely software managed index but it is guaranteed to be
951 * unique.
952 *
953 * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
954 * subsequent operations on the domain depend on the domain id.
955 *
956 * The domain is automatically freed when the device is removed via devres
957 * in the context of @dev::msi::data freeing, but it can also be
958 * independently removed via @msi_remove_device_irq_domain().
959 */
960bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
961 const struct msi_domain_template *template,
962 unsigned int hwsize, void *domain_data,
963 void *chip_data)
964{
965 struct irq_domain *domain, *parent = dev->msi.domain;
966 struct fwnode_handle *fwnode, *fwnalloced = NULL;
967 struct msi_domain_template *bundle;
968 const struct msi_parent_ops *pops;
969
970 if (!irq_domain_is_msi_parent(domain: parent))
971 return false;
972
973 if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
974 return false;
975
976 bundle = kmemdup(p: template, size: sizeof(*bundle), GFP_KERNEL);
977 if (!bundle)
978 return false;
979
980 bundle->info.hwsize = hwsize;
981 bundle->info.chip = &bundle->chip;
982 bundle->info.ops = &bundle->ops;
983 bundle->info.data = domain_data;
984 bundle->info.chip_data = chip_data;
985
986 pops = parent->msi_parent_ops;
987 snprintf(buf: bundle->name, size: sizeof(bundle->name), fmt: "%s%s-%s",
988 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
989 bundle->chip.name = bundle->name;
990
991 /*
992 * Using the device firmware node is required for wire to MSI
993 * device domains so that the existing firmware results in a domain
994 * match.
995 * All other device domains like PCI/MSI use the named firmware
996 * node as they are not guaranteed to have a fwnode. They are never
997 * looked up and always handled in the context of the device.
998 */
999 if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)
1000 fwnode = dev->fwnode;
1001 else
1002 fwnode = fwnalloced = irq_domain_alloc_named_fwnode(name: bundle->name);
1003
1004 if (!fwnode)
1005 goto free_bundle;
1006
1007 if (msi_setup_device_data(dev))
1008 goto free_fwnode;
1009
1010 msi_lock_descs(dev);
1011
1012 if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
1013 goto fail;
1014
1015 if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
1016 goto fail;
1017
1018 domain = __msi_create_irq_domain(fwnode, info: &bundle->info, flags: IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
1019 if (!domain)
1020 goto fail;
1021
1022 domain->dev = dev;
1023 dev->msi.data->__domains[domid].domain = domain;
1024 msi_unlock_descs(dev);
1025 return true;
1026
1027fail:
1028 msi_unlock_descs(dev);
1029free_fwnode:
1030 irq_domain_free_fwnode(fwnode: fwnalloced);
1031free_bundle:
1032 kfree(objp: bundle);
1033 return false;
1034}
1035
1036/**
1037 * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1038 * @dev: Pointer to the device
1039 * @domid: Domain id
1040 */
1041void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1042{
1043 struct fwnode_handle *fwnode = NULL;
1044 struct msi_domain_info *info;
1045 struct irq_domain *domain;
1046
1047 msi_lock_descs(dev);
1048
1049 domain = msi_get_device_domain(dev, domid);
1050
1051 if (!domain || !irq_domain_is_msi_device(domain))
1052 goto unlock;
1053
1054 dev->msi.data->__domains[domid].domain = NULL;
1055 info = domain->host_data;
1056 if (irq_domain_is_msi_device(domain))
1057 fwnode = domain->fwnode;
1058 irq_domain_remove(host: domain);
1059 irq_domain_free_fwnode(fwnode);
1060 kfree(container_of(info, struct msi_domain_template, info));
1061
1062unlock:
1063 msi_unlock_descs(dev);
1064}
1065
1066/**
1067 * msi_match_device_irq_domain - Match a device irq domain against a bus token
1068 * @dev: Pointer to the device
1069 * @domid: Domain id
1070 * @bus_token: Bus token to match against the domain bus token
1071 *
1072 * Return: True if device domain exists and bus tokens match.
1073 */
1074bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1075 enum irq_domain_bus_token bus_token)
1076{
1077 struct msi_domain_info *info;
1078 struct irq_domain *domain;
1079 bool ret = false;
1080
1081 msi_lock_descs(dev);
1082 domain = msi_get_device_domain(dev, domid);
1083 if (domain && irq_domain_is_msi_device(domain)) {
1084 info = domain->host_data;
1085 ret = info->bus_token == bus_token;
1086 }
1087 msi_unlock_descs(dev);
1088 return ret;
1089}
1090
1091int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1092 int nvec, msi_alloc_info_t *arg)
1093{
1094 struct msi_domain_info *info = domain->host_data;
1095 struct msi_domain_ops *ops = info->ops;
1096
1097 return ops->msi_prepare(domain, dev, nvec, arg);
1098}
1099
1100int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
1101 int virq_base, int nvec, msi_alloc_info_t *arg)
1102{
1103 struct msi_domain_info *info = domain->host_data;
1104 struct msi_domain_ops *ops = info->ops;
1105 struct msi_ctrl ctrl = {
1106 .domid = MSI_DEFAULT_DOMAIN,
1107 .first = virq_base,
1108 .last = virq_base + nvec - 1,
1109 };
1110 struct msi_desc *desc;
1111 struct xarray *xa;
1112 int ret, virq;
1113
1114 msi_lock_descs(dev);
1115
1116 if (!msi_ctrl_valid(dev, ctrl: &ctrl)) {
1117 ret = -EINVAL;
1118 goto unlock;
1119 }
1120
1121 ret = msi_domain_add_simple_msi_descs(dev, ctrl: &ctrl);
1122 if (ret)
1123 goto unlock;
1124
1125 xa = &dev->msi.data->__domains[ctrl.domid].store;
1126
1127 for (virq = virq_base; virq < virq_base + nvec; virq++) {
1128 desc = xa_load(xa, index: virq);
1129 desc->irq = virq;
1130
1131 ops->set_desc(arg, desc);
1132 ret = irq_domain_alloc_irqs_hierarchy(domain, irq_base: virq, nr_irqs: 1, arg);
1133 if (ret)
1134 goto fail;
1135
1136 irq_set_msi_desc(irq: virq, entry: desc);
1137 }
1138 msi_unlock_descs(dev);
1139 return 0;
1140
1141fail:
1142 for (--virq; virq >= virq_base; virq--) {
1143 msi_domain_depopulate_descs(dev, virq, nvec: 1);
1144 irq_domain_free_irqs_common(domain, virq, nr_irqs: 1);
1145 }
1146 msi_domain_free_descs(dev, ctrl: &ctrl);
1147unlock:
1148 msi_unlock_descs(dev);
1149 return ret;
1150}
1151
1152void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec)
1153{
1154 struct msi_ctrl ctrl = {
1155 .domid = MSI_DEFAULT_DOMAIN,
1156 .first = virq_base,
1157 .last = virq_base + nvec - 1,
1158 };
1159 struct msi_desc *desc;
1160 struct xarray *xa;
1161 unsigned long idx;
1162
1163 if (!msi_ctrl_valid(dev, ctrl: &ctrl))
1164 return;
1165
1166 xa = &dev->msi.data->__domains[ctrl.domid].store;
1167 xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last)
1168 desc->irq = 0;
1169}
1170
1171/*
1172 * Carefully check whether the device can use reservation mode. If
1173 * reservation mode is enabled then the early activation will assign a
1174 * dummy vector to the device. If the PCI/MSI device does not support
1175 * masking of the entry then this can result in spurious interrupts when
1176 * the device driver is not absolutely careful. But even then a malfunction
1177 * of the hardware could result in a spurious interrupt on the dummy vector
1178 * and render the device unusable. If the entry can be masked then the core
1179 * logic will prevent the spurious interrupt and reservation mode can be
1180 * used. For now reservation mode is restricted to PCI/MSI.
1181 */
1182static bool msi_check_reservation_mode(struct irq_domain *domain,
1183 struct msi_domain_info *info,
1184 struct device *dev)
1185{
1186 struct msi_desc *desc;
1187
1188 switch(domain->bus_token) {
1189 case DOMAIN_BUS_PCI_MSI:
1190 case DOMAIN_BUS_PCI_DEVICE_MSI:
1191 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1192 case DOMAIN_BUS_VMD_MSI:
1193 break;
1194 default:
1195 return false;
1196 }
1197
1198 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1199 return false;
1200
1201 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
1202 return false;
1203
1204 /*
1205 * Checking the first MSI descriptor is sufficient. MSIX supports
1206 * masking and MSI does so when the can_mask attribute is set.
1207 */
1208 desc = msi_first_desc(dev, filter: MSI_DESC_ALL);
1209 return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1210}
1211
1212static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1213 int allocated)
1214{
1215 switch(domain->bus_token) {
1216 case DOMAIN_BUS_PCI_MSI:
1217 case DOMAIN_BUS_PCI_DEVICE_MSI:
1218 case DOMAIN_BUS_PCI_DEVICE_MSIX:
1219 case DOMAIN_BUS_VMD_MSI:
1220 if (IS_ENABLED(CONFIG_PCI_MSI))
1221 break;
1222 fallthrough;
1223 default:
1224 return -ENOSPC;
1225 }
1226
1227 /* Let a failed PCI multi MSI allocation retry */
1228 if (desc->nvec_used > 1)
1229 return 1;
1230
1231 /* If there was a successful allocation let the caller know */
1232 return allocated ? allocated : -ENOSPC;
1233}
1234
1235#define VIRQ_CAN_RESERVE 0x01
1236#define VIRQ_ACTIVATE 0x02
1237
1238static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1239{
1240 struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1241 int ret;
1242
1243 if (!(vflags & VIRQ_CAN_RESERVE)) {
1244 irqd_clr_can_reserve(d: irqd);
1245
1246 /*
1247 * If the interrupt is managed but no CPU is available to
1248 * service it, shut it down until better times. Note that
1249 * we only do this on the !RESERVE path as x86 (the only
1250 * architecture using this flag) deals with this in a
1251 * different way by using a catch-all vector.
1252 */
1253 if ((vflags & VIRQ_ACTIVATE) &&
1254 irqd_affinity_is_managed(d: irqd) &&
1255 !cpumask_intersects(src1p: irq_data_get_affinity_mask(d: irqd),
1256 cpu_online_mask)) {
1257 irqd_set_managed_shutdown(d: irqd);
1258 return 0;
1259 }
1260 }
1261
1262 if (!(vflags & VIRQ_ACTIVATE))
1263 return 0;
1264
1265 ret = irq_domain_activate_irq(irq_data: irqd, early: vflags & VIRQ_CAN_RESERVE);
1266 if (ret)
1267 return ret;
1268 /*
1269 * If the interrupt uses reservation mode, clear the activated bit
1270 * so request_irq() will assign the final vector.
1271 */
1272 if (vflags & VIRQ_CAN_RESERVE)
1273 irqd_clr_activated(d: irqd);
1274 return 0;
1275}
1276
1277static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1278 struct msi_ctrl *ctrl)
1279{
1280 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1281 struct msi_domain_info *info = domain->host_data;
1282 struct msi_domain_ops *ops = info->ops;
1283 unsigned int vflags = 0, allocated = 0;
1284 msi_alloc_info_t arg = { };
1285 struct msi_desc *desc;
1286 unsigned long idx;
1287 int i, ret, virq;
1288
1289 ret = msi_domain_prepare_irqs(domain, dev, nvec: ctrl->nirqs, arg: &arg);
1290 if (ret)
1291 return ret;
1292
1293 /*
1294 * This flag is set by the PCI layer as we need to activate
1295 * the MSI entries before the PCI layer enables MSI in the
1296 * card. Otherwise the card latches a random msi message.
1297 */
1298 if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1299 vflags |= VIRQ_ACTIVATE;
1300
1301 /*
1302 * Interrupt can use a reserved vector and will not occupy
1303 * a real device vector until the interrupt is requested.
1304 */
1305 if (msi_check_reservation_mode(domain, info, dev))
1306 vflags |= VIRQ_CAN_RESERVE;
1307
1308 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1309 if (!msi_desc_match(desc, filter: MSI_DESC_NOTASSOCIATED))
1310 continue;
1311
1312 /* This should return -ECONFUSED... */
1313 if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1314 return -EINVAL;
1315
1316 if (ops->prepare_desc)
1317 ops->prepare_desc(domain, &arg, desc);
1318
1319 ops->set_desc(&arg, desc);
1320
1321 virq = __irq_domain_alloc_irqs(domain, irq_base: -1, nr_irqs: desc->nvec_used,
1322 node: dev_to_node(dev), arg: &arg, realloc: false,
1323 affinity: desc->affinity);
1324 if (virq < 0)
1325 return msi_handle_pci_fail(domain, desc, allocated);
1326
1327 for (i = 0; i < desc->nvec_used; i++) {
1328 irq_set_msi_desc_off(irq_base: virq, irq_offset: i, entry: desc);
1329 irq_debugfs_copy_devname(irq: virq + i, dev);
1330 ret = msi_init_virq(domain, virq: virq + i, vflags);
1331 if (ret)
1332 return ret;
1333 }
1334 if (info->flags & MSI_FLAG_DEV_SYSFS) {
1335 ret = msi_sysfs_populate_desc(dev, desc);
1336 if (ret)
1337 return ret;
1338 }
1339 allocated++;
1340 }
1341 return 0;
1342}
1343
1344static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1345 struct msi_domain_info *info,
1346 struct msi_ctrl *ctrl)
1347{
1348 if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1349 return 0;
1350
1351 return msi_domain_add_simple_msi_descs(dev, ctrl);
1352}
1353
1354static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1355{
1356 struct msi_domain_info *info;
1357 struct msi_domain_ops *ops;
1358 struct irq_domain *domain;
1359 int ret;
1360
1361 if (!msi_ctrl_valid(dev, ctrl))
1362 return -EINVAL;
1363
1364 domain = msi_get_device_domain(dev, domid: ctrl->domid);
1365 if (!domain)
1366 return -ENODEV;
1367
1368 info = domain->host_data;
1369
1370 ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1371 if (ret)
1372 return ret;
1373
1374 ops = info->ops;
1375 if (ops->domain_alloc_irqs)
1376 return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1377
1378 return __msi_domain_alloc_irqs(dev, domain, ctrl);
1379}
1380
1381static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1382{
1383 int ret = __msi_domain_alloc_locked(dev, ctrl);
1384
1385 if (ret)
1386 msi_domain_free_locked(dev, ctrl);
1387 return ret;
1388}
1389
1390/**
1391 * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
1392 * @dev: Pointer to device struct of the device for which the interrupts
1393 * are allocated
1394 * @domid: Id of the interrupt domain to operate on
1395 * @first: First index to allocate (inclusive)
1396 * @last: Last index to allocate (inclusive)
1397 *
1398 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1399 * pair. Use this for MSI irqdomains which implement their own descriptor
1400 * allocation/free.
1401 *
1402 * Return: %0 on success or an error code.
1403 */
1404int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
1405 unsigned int first, unsigned int last)
1406{
1407 struct msi_ctrl ctrl = {
1408 .domid = domid,
1409 .first = first,
1410 .last = last,
1411 .nirqs = last + 1 - first,
1412 };
1413
1414 return msi_domain_alloc_locked(dev, ctrl: &ctrl);
1415}
1416
1417/**
1418 * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1419 * @dev: Pointer to device struct of the device for which the interrupts
1420 * are allocated
1421 * @domid: Id of the interrupt domain to operate on
1422 * @first: First index to allocate (inclusive)
1423 * @last: Last index to allocate (inclusive)
1424 *
1425 * Return: %0 on success or an error code.
1426 */
1427int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1428 unsigned int first, unsigned int last)
1429{
1430 int ret;
1431
1432 msi_lock_descs(dev);
1433 ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
1434 msi_unlock_descs(dev);
1435 return ret;
1436}
1437
1438/**
1439 * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1440 *
1441 * @dev: Pointer to device struct of the device for which the interrupts
1442 * are allocated
1443 * @domid: Id of the interrupt domain to operate on
1444 * @nirqs: The number of interrupts to allocate
1445 *
1446 * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1447 * for all unassigned ones. That function is to be used for MSI domain usage where
1448 * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1449 *
1450 * Return: %0 on success or an error code.
1451 */
1452int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1453{
1454 struct msi_ctrl ctrl = {
1455 .domid = domid,
1456 .first = 0,
1457 .last = msi_domain_get_hwsize(dev, domid) - 1,
1458 .nirqs = nirqs,
1459 };
1460
1461 return msi_domain_alloc_locked(dev, ctrl: &ctrl);
1462}
1463
1464static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid,
1465 unsigned int index,
1466 const struct irq_affinity_desc *affdesc,
1467 union msi_instance_cookie *icookie)
1468{
1469 struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, };
1470 struct irq_domain *domain;
1471 struct msi_map map = { };
1472 struct msi_desc *desc;
1473 int ret;
1474
1475 domain = msi_get_device_domain(dev, domid);
1476 if (!domain) {
1477 map.index = -ENODEV;
1478 return map;
1479 }
1480
1481 desc = msi_alloc_desc(dev, nvec: 1, affinity: affdesc);
1482 if (!desc) {
1483 map.index = -ENOMEM;
1484 return map;
1485 }
1486
1487 if (icookie)
1488 desc->data.icookie = *icookie;
1489
1490 ret = msi_insert_desc(dev, desc, domid, index);
1491 if (ret) {
1492 map.index = ret;
1493 return map;
1494 }
1495
1496 ctrl.first = ctrl.last = desc->msi_index;
1497
1498 ret = __msi_domain_alloc_irqs(dev, domain, ctrl: &ctrl);
1499 if (ret) {
1500 map.index = ret;
1501 msi_domain_free_locked(dev, ctrl: &ctrl);
1502 } else {
1503 map.index = desc->msi_index;
1504 map.virq = desc->irq;
1505 }
1506 return map;
1507}
1508
1509/**
1510 * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1511 * a given index - or at the next free index
1512 *
1513 * @dev: Pointer to device struct of the device for which the interrupts
1514 * are allocated
1515 * @domid: Id of the interrupt domain to operate on
1516 * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1517 * uses the next free index.
1518 * @affdesc: Optional pointer to an interrupt affinity descriptor structure
1519 * @icookie: Optional pointer to a domain specific per instance cookie. If
1520 * non-NULL the content of the cookie is stored in msi_desc::data.
1521 * Must be NULL for MSI-X allocations
1522 *
1523 * This requires a MSI interrupt domain which lets the core code manage the
1524 * MSI descriptors.
1525 *
1526 * Return: struct msi_map
1527 *
1528 * On success msi_map::index contains the allocated index number and
1529 * msi_map::virq the corresponding Linux interrupt number
1530 *
1531 * On failure msi_map::index contains the error code and msi_map::virq
1532 * is %0.
1533 */
1534struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1535 const struct irq_affinity_desc *affdesc,
1536 union msi_instance_cookie *icookie)
1537{
1538 struct msi_map map;
1539
1540 msi_lock_descs(dev);
1541 map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
1542 msi_unlock_descs(dev);
1543 return map;
1544}
1545
1546/**
1547 * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain
1548 * @domain: The domain to allocate on
1549 * @hwirq: The hardware interrupt number to allocate for
1550 * @type: The interrupt type
1551 *
1552 * This weirdness supports wire to MSI controllers like MBIGEN.
1553 *
1554 * @hwirq is the hardware interrupt number which is handed in from
1555 * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but
1556 * sized in firmware, the hardware interrupt number cannot be used as MSI
1557 * index. For the underlying irq chip the MSI index is irrelevant and
1558 * all it needs is the hardware interrupt number.
1559 *
1560 * To handle this the MSI index is allocated with MSI_ANY_INDEX and the
1561 * hardware interrupt number is stored along with the type information in
1562 * msi_desc::cookie so the underlying interrupt chip and domain code can
1563 * retrieve it.
1564 *
1565 * Return: The Linux interrupt number (> 0) or an error code
1566 */
1567int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
1568 unsigned int type)
1569{
1570 unsigned int domid = MSI_DEFAULT_DOMAIN;
1571 union msi_instance_cookie icookie = { };
1572 struct device *dev = domain->dev;
1573 struct msi_map map = { };
1574
1575 if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1576 return -EINVAL;
1577
1578 icookie.value = ((u64)type << 32) | hwirq;
1579
1580 msi_lock_descs(dev);
1581 if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
1582 map.index = -EINVAL;
1583 else
1584 map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, icookie: &icookie);
1585 msi_unlock_descs(dev);
1586
1587 return map.index >= 0 ? map.virq : map.index;
1588}
1589
1590static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1591 struct msi_ctrl *ctrl)
1592{
1593 struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1594 struct msi_domain_info *info = domain->host_data;
1595 struct irq_data *irqd;
1596 struct msi_desc *desc;
1597 unsigned long idx;
1598 int i;
1599
1600 xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1601 /* Only handle MSI entries which have an interrupt associated */
1602 if (!msi_desc_match(desc, filter: MSI_DESC_ASSOCIATED))
1603 continue;
1604
1605 /* Make sure all interrupts are deactivated */
1606 for (i = 0; i < desc->nvec_used; i++) {
1607 irqd = irq_domain_get_irq_data(domain, virq: desc->irq + i);
1608 if (irqd && irqd_is_activated(d: irqd))
1609 irq_domain_deactivate_irq(irq_data: irqd);
1610 }
1611
1612 irq_domain_free_irqs(virq: desc->irq, nr_irqs: desc->nvec_used);
1613 if (info->flags & MSI_FLAG_DEV_SYSFS)
1614 msi_sysfs_remove_desc(dev, desc);
1615 desc->irq = 0;
1616 }
1617}
1618
1619static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1620{
1621 struct msi_domain_info *info;
1622 struct msi_domain_ops *ops;
1623 struct irq_domain *domain;
1624
1625 if (!msi_ctrl_valid(dev, ctrl))
1626 return;
1627
1628 domain = msi_get_device_domain(dev, domid: ctrl->domid);
1629 if (!domain)
1630 return;
1631
1632 info = domain->host_data;
1633 ops = info->ops;
1634
1635 if (ops->domain_free_irqs)
1636 ops->domain_free_irqs(domain, dev);
1637 else
1638 __msi_domain_free_irqs(dev, domain, ctrl);
1639
1640 if (ops->msi_post_free)
1641 ops->msi_post_free(domain, dev);
1642
1643 if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1644 msi_domain_free_descs(dev, ctrl);
1645}
1646
1647/**
1648 * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1649 * associated to @dev with msi_lock held
1650 * @dev: Pointer to device struct of the device for which the interrupts
1651 * are freed
1652 * @domid: Id of the interrupt domain to operate on
1653 * @first: First index to free (inclusive)
1654 * @last: Last index to free (inclusive)
1655 */
1656void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1657 unsigned int first, unsigned int last)
1658{
1659 struct msi_ctrl ctrl = {
1660 .domid = domid,
1661 .first = first,
1662 .last = last,
1663 };
1664 msi_domain_free_locked(dev, ctrl: &ctrl);
1665}
1666
1667/**
1668 * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1669 * associated to @dev
1670 * @dev: Pointer to device struct of the device for which the interrupts
1671 * are freed
1672 * @domid: Id of the interrupt domain to operate on
1673 * @first: First index to free (inclusive)
1674 * @last: Last index to free (inclusive)
1675 */
1676void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1677 unsigned int first, unsigned int last)
1678{
1679 msi_lock_descs(dev);
1680 msi_domain_free_irqs_range_locked(dev, domid, first, last);
1681 msi_unlock_descs(dev);
1682}
1683
1684/**
1685 * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1686 * associated to a device
1687 * @dev: Pointer to device struct of the device for which the interrupts
1688 * are freed
1689 * @domid: The id of the domain to operate on
1690 *
1691 * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1692 * pair. Use this for MSI irqdomains which implement their own vector
1693 * allocation.
1694 */
1695void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1696{
1697 msi_domain_free_irqs_range_locked(dev, domid, first: 0,
1698 last: msi_domain_get_hwsize(dev, domid) - 1);
1699}
1700
1701/**
1702 * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1703 * associated to a device
1704 * @dev: Pointer to device struct of the device for which the interrupts
1705 * are freed
1706 * @domid: The id of the domain to operate on
1707 */
1708void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1709{
1710 msi_lock_descs(dev);
1711 msi_domain_free_irqs_all_locked(dev, domid);
1712 msi_unlock_descs(dev);
1713}
1714
1715/**
1716 * msi_device_domain_free_wired - Free a wired interrupt in @domain
1717 * @domain: The domain to free the interrupt on
1718 * @virq: The Linux interrupt number to free
1719 *
1720 * This is the counterpart of msi_device_domain_alloc_wired() for the
1721 * weird wired to MSI converting domains.
1722 */
1723void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
1724{
1725 struct msi_desc *desc = irq_get_msi_desc(irq: virq);
1726 struct device *dev = domain->dev;
1727
1728 if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1729 return;
1730
1731 msi_lock_descs(dev);
1732 if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) {
1733 msi_domain_free_irqs_range_locked(dev, domid: MSI_DEFAULT_DOMAIN, first: desc->msi_index,
1734 last: desc->msi_index);
1735 }
1736 msi_unlock_descs(dev);
1737}
1738
1739/**
1740 * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1741 * @domain: The interrupt domain to retrieve data from
1742 *
1743 * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1744 */
1745struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1746{
1747 return (struct msi_domain_info *)domain->host_data;
1748}
1749
1750/**
1751 * msi_device_has_isolated_msi - True if the device has isolated MSI
1752 * @dev: The device to check
1753 *
1754 * Isolated MSI means that HW modeled by an irq_domain on the path from the
1755 * initiating device to the CPU will validate that the MSI message specifies an
1756 * interrupt number that the device is authorized to trigger. This must block
1757 * devices from triggering interrupts they are not authorized to trigger.
1758 * Currently authorization means the MSI vector is one assigned to the device.
1759 *
1760 * This is interesting for securing VFIO use cases where a rouge MSI (eg created
1761 * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
1762 * impact outside its security domain, eg userspace triggering interrupts on
1763 * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
1764 * triggering interrupts on another VM.
1765 */
1766bool msi_device_has_isolated_msi(struct device *dev)
1767{
1768 struct irq_domain *domain = dev_get_msi_domain(dev);
1769
1770 for (; domain; domain = domain->parent)
1771 if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
1772 return true;
1773 return arch_is_isolated_msi();
1774}
1775EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
1776

source code of linux/kernel/irq/msi.c